#!/usr/bin/perl
# Added by Debian package rules script
push @INC, ("/usr/lib/perl5/misterhouse","/usr/share/perl5/misterhouse");
#---------------------------------------------------------------------------
#  File:
#     report_weblog
#
#  Last emacs update Time-stamp: <2003-09-21 21:42:53 winter>
#
#  Description:
#     See help text below
#
#  Author:
#      Bruce Winter    bruce@misterhouse.net 
#
#  Latest version:
#      http://misterhouse.net
#
#  Inspired by: 
#     http://www.itknowledge.com/tpj/programs/Issue_14/On_The_Fly_Plots/tally_hourly.pl
#
#  Requires Gnuplot, available from:
#     http://www.gnuplot.org
#
#  Change log:
#    07/27/99  Created.  CVS log at bottom.
#
#---------------------------------------------------------------------------

use strict;

my($Pgm_Path, $Pgm_Name);
BEGIN {
    ($Pgm_Path, $Pgm_Name) = $0 =~ /(.*)[\\\/](.*)\.?/;
    ($Pgm_Name) = $0 =~ /([^.]+)/, $Pgm_Path = '.' unless $Pgm_Name;
    eval "use lib '$Pgm_Path/../lib/site'"
    }

my %parms;
use Getopt::Long;
if (!&GetOptions(\%parms, "h", "help",
                 "mailto=s", "mailserver=s", "ignore:s",
                 "size=s", "type=s",
                 "runid=s", "outdir=s",
                 "debug") or @ARGV == 0 or 
    ($parms{h} or $parms{help})) {
    print<<eof;

$Pgm_Name reads  MisterHouse and/or Apache web server logs and
generates report graphs and optionally email them.

Usage:

  $Pgm_Name [options] logfile(s)

    -h        => This help text
    -help     => This help text

    -ignore xyz     => A comma-delimited list of ip address to ignore.

    -size xyz       => How big to make the plot.  xyz=.5 gives about a 3 inch long plot

    -type  xyz      => Type of plot, gif or png.  Default is png.

    -mailto     xyz => Will email the charts to xyz
    -mailserver xyz => xyz is the SMTP host.  Default is localhost

    -runid      xyz => All graphs will have xyz as a prefix.  Default is blank.
    -outdir     xyz => All graphs be stored in directory xyz.  Default is .

  Examples:
    $Pgm_Name -mailto 'brucewinter\@home.net' /var/log/httpd/access_log
    $Pgm_Name -mailto winters\@home.net -mailserver 24.2.1.70 e:/mh/data/logs/server.1999_07.log
    $Pgm_Name -outdir /mh/data/logs -runid 1999_07 /mh/data/logs/server.1999_07.log

eof

  exit;
}

print "Running $Pgm_Name @ARGV\n";

                                # Globals
my (%hits);

                                # Setup defaults
$parms{mailserver} = "localhost" unless $parms{mailserver};
$parms{outdir} = "." unless $parms{outdir};
$parms{type} = "png" unless $parms{type};
$parms{size} = ".5" unless $parms{size};
my $prefix = $parms{outdir} . '/';
$prefix .= $parms{runid} . "_" if $parms{runid};
print "Output files will be prefixed with $prefix\n";
#$parms{ignore} = 'house,dm,p90,c2,misterhouse,200.200.200.*' unless defined $parms{ignore};
my @ignore_list = split(',', $parms{ignore});


&read_data(@ARGV);
&generate_plots;
&mail_report if $parms{mailto};
print "All done\n";

                                # Parse logfile and summarize data into .txt files
sub read_data {
    my (@logfiles) = @_;

    my $logfile_cnt = @logfiles;
    print "Reading $logfile_cnt logfiles ...\n";

    for my $logfile (@logfiles) {
        open (DATAIN,  $logfile) or die "Error, could not open $logfile: $!\n";
        while (<DATAIN>) {
# Apache log example
#house - - [24/Jul/1999:17:59:18 -0500] "GET /mh/code/Bruce/weather_monitor.pl HTTP/1.1" 304 -
# Misterhouse log example
#Sat, Jul 31  9:14 AM http 127.0.0.1
#Sat, Jul 31  9:15 AM server_speak 200.200.200.5
#Fri, Nov  5  9:39 PM server_speak 200.200.200.5
#Fri 11/05/99 21:47:48 server_speak 200.200.200.5
#Sun 09/21/03 20:29:07 192.168.0.81 /ia5/images/wind.gif 
                                # Read Apache log
            my ($client, $day, $hour, $min) = $_ =~ m!^(\S+) - - \[(\d+)/\w+/\d{4}:(\d+):(\d+):\d+ [\d+-]+\]!;
                                # Alternate form of Apache log
            ($day, $hour, $min,  $client) = $_ =~ /([a-z|A-Z]*) \d+\/\d+\/\d+(\d+):(\d+):\d+ ([a-z|A-Z|0-9|\.]*)/  unless $client;
                                # Read Misterhouse log
            unless (defined $hour) {
                my $ampm;
                ($day, $hour, $min, $ampm, $client) = $_ =~ m!^\S+, +\S+ +(\d+) +(\d+):(\d+) (\S+) \S+ (\S+)!;
                ($day, $hour, $min, $ampm, $client) = $_ =~ m!^\S+ +\S+ +(\d+) +(\d+):(\d+):\d+ +\S+ (\S+)! unless $client;
                ($day, $hour, $min, $client)        = $_ =~ m!^\S+ +\S+ +(\d+):(\d+):\d+ (\S+)! unless $client; # Current format
#                print "db d=$day h=$hour ap=$ampm m=$min c=$client r=$_\n";
                $hour += 12 if $ampm eq 'PM' and $hour != 12;
            }

            next unless defined $hour;

                                # Ignore local clients
            next if grep($client =~ /^$_$/, @ignore_list); # This takes longer
#           next if grep($client eq $_, @ignore_list);
            
            my $dayhour = $day + sprintf("%0.2f", $hour/24);
            
            $hits{client}{$client}++;
            $hits{client_time}{$client} = "$day-$hour-$min";
            $hits{hour}{$hour}++;
            $hits{dayhour}{"$dayhour"}++;
            $hits{dayhour_client}{"$dayhour"}{$client}++;
            $hits{day}{$day}++;
            $hits{day_client}{$day}{$client}++;
            $hits{total}++;
        }
    }

    $hits{total_clients} = keys %{$hits{client}};
    $hits{total_days}    = keys %{$hits{day}};
    $hits{msg} = "Read $logfile_cnt logfiles, counted $hits{total} total hits from $hits{total_clients} clients in $hits{total_days} days";
    print $hits{msg}, "\n";

    open (DATAOUT, ">${prefix}hour.txt") or die "Error, could not open ${prefix}hour.txt: $!\n";
    foreach (sort {$a <=> $b} keys %{$hits{hour}}) {
        print DATAOUT join("\t", $_, $hits{hour}{$_}),"\n";
    }

    open (DATAOUT, ">${prefix}dayhour.txt") or die "Error, could not open ${prefix}dayhour.txt: $!\n";
    foreach (sort {$a cmp $b} keys %{$hits{dayhour}}) {
        my $client_cnt = keys %{$hits{dayhour_client}{$_}};
        print DATAOUT join("\t", $_, int($hits{dayhour}{$_}/10), $client_cnt), "\n";
    }

    open (DATAOUT, ">${prefix}day.txt") or die "Error, could not open ${prefix}day.txt: $!\n";
    foreach (sort {$a <=> $b} keys %{$hits{day}}) {
        my $client_cnt = keys %{$hits{day_client}{$_}};
        print DATAOUT join("\t", $_, int($hits{day}{$_}/10), $client_cnt),"\n";
    }

    open (DATAOUT, ">${prefix}client.txt") or die "Error, could not open ${prefix}client.txt: $!\n";
    foreach (sort {$hits{client_time}{$a} cmp $hits{client_time}{$b}} keys %{$hits{client}}) {
        print DATAOUT join("\t", $hits{client_time}{$_}, $hits{client}{$_}, $_),"\n";
    }

    close DATAOUT;
}

                                # Run gnuplot to generate plots
sub generate_plots {

    print "Generating $parms{type} plots\n";

    unlink "${prefix}hour.$parms{type}" or print "Error: $@\n";
    unlink "${prefix}day.$parms{type}";
    unlink "${prefix}dayhour.$parms{type}";

#   open (GP, "| gnuplot") || die "Couldn't open gnuplot: $!";
    open (GP, ">${prefix}gnuplot.txt") || die "Couldn't open gnuplot.txt: $!";
    print GP <<eof;
#   set terminal postscript color
#   set terminal gif small size 320,240 interlace
#   set terminal gif small size 320,240 xffffff x000000 x777777  x0000ff xff0000 x00ff00
    set terminal $parms{type} small color
    set size $parms{size},$parms{size}
    set border
    set boxwidth
    set nogrid
#   set nokey
#   set nolabel
    set data style boxes
    set noxzeroaxis
    set noyzeroaxis
    set tics out
#   set ytics nomirror
    set ylabel "Hits" 0,0
    set xlabel "Day" 0,0

    set title
    set output "${prefix}hour.$parms{type}"
    set xlabel "Hour of Day" 0,0
    set xtics 0,2,23
#   set xrange [ -0.75 : 23.75]
    plot "${prefix}hour.txt" using 1:2 title "Hits by Hour of Day"

    set title "Hits per Day\\nTotal: $hits{total} hits from $hits{total_clients} clients in $hits{total_days} days"
    set output "${prefix}day.$parms{type}"
    set xlabel "Day" 0,0
    set xtics 0,2,31
    plot "${prefix}day.txt" using 1:2 title "Hits/10", "${prefix}day.txt" using 1:3 title "Clients"

    set title "Hits per Hour"
    set output "${prefix}dayhour.$parms{type}"
    set xtics 0,2,31
    set data style impulses
    plot "${prefix}dayhour.txt" using 1:2 title "Hits/10", "${prefix}dayhour.txt" using 1:3 title "Clients"

eof
    close GP;
    my $pgm = '/usr/local/bin/gnuplot';
    $pgm = 'gnuplot' unless -e $pgm; # If not in standard spot, assume it is the path
    print "running: $pgm ${prefix}gnuplot.txt\n";
    system("$pgm ${prefix}gnuplot.txt");
    
}

                                # Email report plots
sub mail_report {
    eval "use MIME::Lite";
    if ($@) {
        print "To use email, you need to install MIME::Lite\n";
        print " - linux: perl -MCPAN -eshell    install MIME::Lite\n";
        print " - windows: ppm -install MIME-Lite\n";
        return;
    }

    my $date = localtime;
    my $message = MIME::Lite->new(From => $Pgm_Name,
                                  Subject => "Web Stats Report for $date",
                      Type  => "image/$parms{type}",
                      Encoding => 'base64',
                      Path => "${prefix}dayhour.$parms{type}",
                      Filename => "dayhour.$parms{type}",
#                                 Type  => 'text/html',
#                                 Encoding => '8bit',
#                                 Path => "/projects/www/report_weblog.html",
#                                 Filename => 'report_weblog.html',
                                  To => $parms{mailto});

    $message->attach( 
                      Type  => "image/$parms{type}",
                      Encoding => 'base64',
                      Path => "${prefix}day.$parms{type}",
                      Filename => "day.$parms{type}");


    if ($^O eq "MSWin32") {
        print "Using built in smtp code with server $parms{mailserver}\n";
        MIME::Lite->send('smtp', $parms{mailserver}, Timeout => 20);
    }
    print "Sending report to $parms{mailto}\n";
    $message->send;
    return;

#                                  Type => 'text/plain',
 #                                 Encoding => '7bit',
#                                  Data => "$hits{msg}\n");
    $message->attach( 
                      Type  => "image/$parms{type}",
                      Encoding => 'base64',
#                      Type  => 'application/postscript',
#                      Encoding => '8bit',
                      Path => "${prefix}day.$parms{type}",
                      Filename => "day.$parms{type}");

    $message->attach( 
                      Type  => "image/$parms{type}",
                      Encoding => 'base64',
#                      Type  => 'application/postscript',
#                      Encoding => '8bit',
                      Path => "${prefix}dayhour.$parms{type}",
                      Filename => "dayhour.$parms{type}");

    $message->attach( 
                      Type  => 'text/plain',
                      Encoding => '7bit',
                      Path => "${prefix}client.txt",
                      Filename => "clients.txt");

    MIME::Lite->send('smtp', $parms{mailserver}, Timeout => 20);
    print "Sending report to $parms{mailto}\n";
    $message->send;

}
#
# $Log: report_weblog,v $
# Revision 1.12  2003/11/23 20:25:51  winter
#  - 2.84 release
#
# Revision 1.11  2003/06/01 21:54:40  winter
#  - 2.81 release
#
# Revision 1.10  2000/10/22 16:48:28  winter
# - 2.32 release
#
# Revision 1.9  2000/10/01 23:35:25  winter
# - 2.29 release
#
# Revision 1.8  2000/01/27 13:30:39  winter
# - update version number
#
# Revision 1.7  2000/01/02 23:41:59  winter
# - fixed help typo
#
# Revision 1.6  1999/11/20 15:53:01  winter
# - add -size and -type options.  Change -type default from gif to png.
#
# Revision 1.5  1999/11/08 02:25:56  winter
# - support new log format
#
# Revision 1.4  1999/09/27 03:14:34  winter
# - /10 on hit count.  Add total line to titles
#
# Revision 1.3  1999/08/30 00:22:11  winter
# - add full path to gnuplot, so it works from cron
#
# Revision 1.2  1999/08/01 01:20:55  winter
# *** empty log message ***
#
# Revision 1.1  1999/07/31 15:38:59  winter
# - created
#
#


