#!/usr/bin/perl -w
# modlogrun.pl -- run modlogan onto a set of cronolog log files
#                 with optional splitting into vhosts
#
# Copyright 2003 Erich Schubert <erich@vitavonni.de>
#                written for Drinsama IT Services GmbH
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
use strict;
use File::stat;
use File::Basename;

my $modlogan='/usr/bin/sudo -u modlogan /usr/bin/modlogan';
my $logresbin='/usr/bin/jdresolve';
my $gzip='/bin/gzip';
my $logresolve=$logresbin.' --recursive --nostats --mask="unknown...%c"'.
               ' --database=/var/lib/jdresolve/jdresolve.db -';
my $vhostdir="vhosts";
##########################################################################

### predefinitions
sub mkdirpc($$);
sub chownref($$);
sub splitlog($$);

# command line parameters
my ($logdir,$vhostmode) = @ARGV;

# remember which vhosts do exist and need to be processed
my %vhosts;

# Some checks
if (! -d "$logdir") { die "Log-Direcotry not found!\n"; }
if (! -e "$logdir/modlogan.conf") { die "Configuration file not found!\n"; }

### resolve logs, starting with yesterdays log.
my $time=time();
while(1) {
	$time -= 60*60*24;
	my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time);
	$mon++; $year+=1900;

	# Generate file names.
	my $reslog=sprintf("%04d/%02d/resolved.log-%04d.%02d.%02d",$year,$mon,$year,$mon,$mday);
	my $acclog=sprintf("%04d/%02d/access.log-%04d.%02d.%02d",$year,$mon,$year,$mon,$mday);
	
	# Check if the log was already resolved
	if (-e "$logdir/$reslog" || -e "$logdir/$reslog.gz") {
		# it was resolved. This was the last log we have to try.
		last;
	}
	if (! -e "$logdir/$acclog" || -e "$logdir/$reslog.gz") {
		# If there was no log file the whole month (dir doesn't exist),
		# we won't search for older logs any more.
		if (! -d "$logdir/".sprintf("%04d/%02d",$year,$mon)) { last; }
		# single days without a log file are skipped this way.
		next;
	}
	print "Resolving logfile $acclog...\n";
	system("$logresolve < \"$logdir/$acclog\" > \"$logdir/$reslog\"");
	chownref("$logdir/$reslog","$logdir/$acclog");
	
	# Compress unresolved log file after resolving.
	system("$gzip \"$logdir/$acclog\"");
	
	# Do we need to split the logfile into vhosts?
	if ($vhostmode) {
		print "Splitting $reslog into vhosts...\n";
		my @this_vhosts = splitlog($logdir,$reslog);
		# Build a map of existing vhosts
		foreach my $vhost (@this_vhosts) {
			$vhosts{$vhost}=1;
		}
	}
}

### Configuration generation for vhosts.
foreach my $vhost (keys %vhosts) {
	# generate missing vhost modlogan config files...
	if (! -e "$logdir/$vhostdir/$vhost/modlogan.conf") {
		open(TEMPLATE,"<$logdir/$vhostdir/modlogan.conf.template");
		open(OUTPUT,">$logdir/$vhostdir/$vhost/modlogan.conf");
		my $hostnameesc=$vhost;
		$hostnameesc =~ s/\./\\./g; $hostnameesc =~ s/^www\./(www\\.)?/;
		while(<TEMPLATE>) {
			s/-HOSTNAME-/$vhost/g;
			s/-HOSTNAMEESC-/$hostnameesc/g;
			print OUTPUT $_;
		}
		close(OUTPUT);
		close(TEMPLATE);
	}
	# Directories will be create with the same permissions and owners
	# as the subdirs in the top level directory.
	if (! -d "$logdir/$vhostdir/$vhost/modlogan") {
		mkdirpc("$logdir/$vhostdir/$vhost/modlogan","$logdir/modlogan");
	}
	if (! -d "$logdir/$vhostdir/$vhost/modlogan-state") {
		mkdirpc("$logdir/$vhostdir/$vhost/modlogan-state","$logdir/modlogan-state");
	}
}

### the logs are resolved now, we can run modlogan.
while(1) {
	# count the time back up. Counting first is correct, because the last
	# file we tried did not exist.
	$time += 60*60*24;
	# don't process the current log.
	if ( (time() - $time) < 60*60*24 ) { last; }
	my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time);
	$mon++; $year+=1900;

	# calculate file name.
	my $reslog=sprintf("%04d/%02d/resolved.log-%04d.%02d.%02d",$year,$mon,$year,$mon,$mday);
	# process the log file.
	if (-e "$logdir/$reslog") {
		print "Processing $reslog...\n";
		system("$modlogan -c \"$logdir/modlogan.conf\" < \"$logdir/$reslog\"");
		# compress the log file afterwards
		system("$gzip \"$logdir/$reslog\"");
	}
	# for each vhost - if applicable - do the same.
	foreach my $vhost (keys %vhosts) {
		if (-e "$logdir/$vhostdir/$vhost/$reslog") {
			print "Processing $vhost: $reslog...\n";
			system("$modlogan -c \"$logdir/$vhostdir/$vhost/modlogan.conf\" < \"$logdir/$vhostdir/$vhost/$reslog\"");
			# compress the log file afterwards
			system("$gzip \"$logdir/$vhostdir/$vhost/$reslog\"");
		}
	}
}

exit 0;

#### Helper functions-

# chown $1 --reference=$2
sub chownref($$) {
	my ($newfile,$oldfile)=@_;
	my $stat=stat($oldfile);
	chmod $stat->mode, $newfile
	|| print STDERR "Couldn't chmod $newfile: $@\n";
	chown $stat->uid, $stat->gid, $newfile
	|| print STDERR "Couldn't chown $newfile: $@\n";
}

# makedir with --reference and --parents
sub mkdirpc($$) {
	my ($newdir, $olddir) = @_;
	my @newc=split "/",$newdir;
	my @oldc=split "/",$olddir;
	my ($nc,$oc)=($#newc,$#oldc);
	while($nc > 0 && $oc > 0) {
		if (-d join("/",@newc[0..$nc])) { last; }
		$nc-=1; $oc-=1;
	}
	while($nc < $#newc) {
		$nc++; $oc++;
		my $newdir=join("/",@newc[0..$nc]);
		mkdir $newdir;
		if ($oc > 0) { chownref($newdir, join("/",@oldc[0..$oc]) ); }
	}
}

# split a logfile into vhosts.
sub splitlog($$) {
	my %fd;
	my ($logdir,$filename)=@_;
	open(INFILE,"< $logdir/$filename")
	|| do { print STDERR "Couldn't open $logdir/$filename.\n"; return; };
	my $stat = stat("$logdir/$filename");
	while(my $log_line = <INFILE>) {
		$log_line =~ m/\s([^\s]+)\s*$/
		|| do { print STDERR "Ignored line: $log_line"; next; };
		my $vhost = $1;
		if ($vhost !~ m/^[a-z0-9\.\-]+$/) { print STDERR "Corrupted line in log file $logdir/$filename: $log_line"; }
		# do we need to open the log file?
		unless ($fd{$vhost}) {
			my $outfilename = "$logdir/$vhostdir/$vhost/$filename";
			# copy directory structure.
			mkdirpc(dirname($outfilename),dirname("$logdir/$filename"));
			my $needchown = ! -e "$outfilename";
			open $fd{$vhost}, ">>".$outfilename
			|| print STDERR "Fatal: couldn't open logfile $outfilename.\n";
			# Chown newly created log files to the same permissions as the
			# spplitted log file.
			if ($needchown) {
				chmod $stat->mode, $outfilename || print STDERR "Couldn't chmod: $@\n";
				chown $stat->uid, $stat->gid, $outfilename || print STDERR "Couldn't chown: $@\n";
			}
		}
		# perl doesn't like values from hashes as FD directly.
		my $fh = $fd{$vhost};
		print $fh $log_line;
	}
	# close the log file descriptors.
	my @vhosts=keys %fd;
	foreach my $vhost (@vhosts) {
		close $fd{$vhost};
		delete $fd{$vhost};
	}
	close(INFILE);
	# return the vhosts we did process this turn.
	return @vhosts;
}
