#!/usr/bin/perl
#---------------------------------------------------------------------------
#  File:
#     monitor_weblog
#
#  Last emacs update Time-stamp: <2003-04-30 08:13:53 winter>
#
#  Description:
#     See help text below
#
#  - If you want to monitor a linux ipchain log data, monitor_ipchainlog.pl
#  - If you want to monitor router traffic, use mh/code/bruce/monitor_router.pl
#  - If you want to monitor mh server web traffic, use monitor_server.pl
#
#  Author:
#     Bruce Winter   bruce@misterhouse.net
#
#  Latest version:
#     http://misterhouse/mh/bin/monitor_weblog
#
#  Change log:
#    07/25/99  Created.  CVS log at bottom.
#
#
#---------------------------------------------------------------------------

use strict;

my %parms;
use Getopt::Long;
if (!&GetOptions(\%parms, "h", "help",
                 "mailto=s", "ignore=s",
                 "runid=s", "outdir=s",
                 "debug") or @ARGV > 1 or 
    ($parms{h} or $parms{help})) {
    print<<eof;


monitor_weblog is a stand alone daemon that monitors a Apache web log file.
When the server logs hits, this code will summarize them and pass them
onto the MisterHouse mhsend_server.pl code via a tcp/ip socket.

Usage:

  monitor_weblog [options] logfile

    -h        => This help text
    -help     => This help text

    -mh_server xyz => The ip address of your MisterHouse box.
    -mh_port   xyz => The ip port you set mh.ini server_speak to.
    -ignore    xyz => A comma-delimited list of ip address to ignore.

  Examples:
    monitor_weblog -mh_server house -ignore 'house,10.0.0.1' /var/log/httpd/access_log

eof

  exit;
}

$parms{mh_server} = 'house' unless $parms{mh_server};
$parms{mh_port}   = '8082'          unless $parms{mh_port};
$parms{ignore}    = 'house,dm,p90,c2,misterhouse' unless $parms{ignore};

my %ignore_list = map{$_, 1} split(',', $parms{ignore});

my $log_file = shift;
$log_file = "/var/log/httpd/access_log" unless $log_file;
#log_file = "/usr/local/apache/logs/misterhouse.net.ws-access_log" unless $log_file;
my $log_pos  = &open_log($log_file);

print "Current log position=$log_pos\n";

my (%clients, %hits, $hits_hour, $hits_day, $clients_hour, $clients_day, $last_active);
my $id_cnt = 'WebGuy1';

while (1) {
    sleep 2;
    my ($sec, $min, $hour) = (localtime)[0,1,2];
    if ($min == 2 and $sec < 5) {
        &speak("rooms=all $hits_hour web hits from $clients_hour clients in the last hour") if $hits_hour > 5;
        $hits_hour = 0;
        $clients_hour = 0;
    }
    elsif ($hour == 20 and $min == 1 and $sec < 5) {
        &speak("rooms=all Notice, there were $hits_day web hits from $clients_day clients in the last day") if $hits_day > 1;
        $hits_day = 0;
        $clients_day = 0;
    }
#   print '.';
    undef %hits;
    my ($hits);

                                # Restart log if it hasn't changed in a while
                                #  - This allows us to not get stuck following a rotated log file
#   $log_pos  = &open_log($log_file) if ($last_active + 3600*8) < time;
#   $log_pos  = &open_log($log_file) if ($hour == 3 and $min == 30 and $sec < 5) ;
    if ($hour == 5 and $min == 2 and $sec < 5) {
        &speak("Restarting monitor weblog");
        exit;
    }

    seek(LOG, $log_pos, 0);     # Reset end of file flag
    while (<LOG>) {
        my ($client, $day, $hour) = $_ =~ m!^(\S+) - - \[(\d+)/\w+/\d{4}:(\d+):\d+:\d+ [\d+-]+\]!;
        next unless defined $hour;
        $last_active = time;

#       next if grep($client eq $_, @ignore_list);
        next if $ignore_list{$client};

                                # Keep only the last 3 qualifiers (e.g. xyz.proxy.aol.com)
        $client = $1 if $client =~ /((\.?[^\.]*){1,3})$/;

        my $time_since_last_visit = time - $clients{$client}{time};
        $clients{$client}{time} = time;
        $clients{$client}{id}   = $id_cnt++ unless $clients{$client}{id};
        $hits{$client}++;
        $hits++;
        $hits_hour++;
        $hits_day++;
        if ($time_since_last_visit > 3600) {
            $client =~ s/[\d\.]/ /g; # Get rid of digits and dots
            $client = 'unknown' if $client =~ /^ *$/;
#           &speak("rooms=all display=600 Web access from $client");
            &speak("Web access from $client");
            $clients_hour++;
            $clients_day++;
        }
    }
    $log_pos  = tell(LOG);

    if ($hits > 3) {
        &speak('sound_beep1.wav');
    }
    elsif ($hits > 2) {
        &speak('sound_gleep1.wav');
    }
    elsif ($hits) {
        &speak('sound_glurp1.wav');
    }

#    my $msg;
#    for my $client (sort keys %hits) {
#        $msg .= $clients{$client}{id} . " has $hits{$client} hits. ";
#    }
#    &speak($msg) if $msg;
}


sub open_log {
    my ($log_file) = @_;
    $last_active = time;
    print "Opening log file $log_file\n";
    close LOG;
    open (LOG, $log_file) or die "Error, could not open log file $log_file: $!\n";
    while (<LOG>) {}
    return tell(LOG);           # On startup, point pointer to the tail of the file
}


                                # This works with the mhsend server
sub speak {
    my ($text) = @_;
    use IO::Socket;
    use Cwd;
    my $Pgm_Path = cwd();
    system("$Pgm_Path/mhsend -speak '$text'");
}
    
                                # This only works with the old speak_server.pl code
sub speak_old {
    use IO::Socket;
    my ($text) = @_;
    my $socket = new IO::Socket::INET (PeerAddr => $parms{mh_server},
                                       PeerPort => $parms{mh_port}, Proto => 'tcp');
    if ($socket) {
        $text = "$text\n";
        print $socket $text;
        close $socket;
    }
    else {
        print "monitor_weblog error opening socket to $parms{mh_server} on port $parms{mh_port}: $!\n";
    }
}

#
# Example log entry:
#house - - [24/Jul/1999:17:59:18 -0500] "GET /mh/code/Bruce/weather_monitor.pl HTTP/1.1" 304 -
#

#
# $Log: monitor_weblog,v $
# Revision 1.14  2003/06/01 21:54:40  winter
#  - 2.81 release
#
# Revision 1.13  2000/12/21 18:54:14  winter
# - 2.38 release
#
# Revision 1.12  2000/08/19 01:20:42  winter
# - 2.27 release
#
# Revision 1.11  2000/05/06 16:39:05  winter
# - 2.15 release
#
# Revision 1.10  2000/04/09 18:03:19  winter
# - 2.13 release
#
# Revision 1.9  2000/02/20 04:47:54  winter
# -2.01 release
#
# Revision 1.8  2000/02/12 05:33:34  winter
# - commit lots of changes, in preperation for mh release 2.0
#
# Revision 1.7  2000/01/27 13:29:54  winter
# - update version number
#
# Revision 1.6  1999/11/08 02:14:08  winter
# - keep only last 3 qualifiers of domain name
#
# Revision 1.5  1999/10/09 20:35:40  winter
# - don't display new access
#
# Revision 1.4  1999/09/12 16:42:17  winter
# *** empty log message ***
#
# Revision 1.3  1999/08/30 00:21:44  winter
# - add ignore option.
#
# Revision 1.2  1999/08/01 01:20:33  winter
# - use has in ignore_list
#
# Revision 1.1  1999/07/31 15:40:33  winter
# - created
#
#
