snapshot_stats.pl


#!/usr/bin/perl
# ---------------------------------------------------------------------------------------------
#
# snapshot_stats.pl --- Take a SnapShot of Popfile's Classification Stats
#
# This program authored by Scott W Leighton (helphand@pacbell.net)
# based upon the Popfile project, which is Copyrighted
# by John Graham-Cumming. The author hereby contributes this code
# to the Popfile project under the terms of the Popfile License
# Agreement.    /Scott W Leighton/  May 25, 2003
#
# History - May 25, 2003 - original release
#           May 26, 2003 - Minor cleanup, added some comments
#           May 27, 2003 - Added options for overriding separator and quoting
#           May 30, 2003 - Enhanced version check
#           June 22, 2003 - Major re-work for v 0.20.0 POPFile changes
#           June 29, 2003 - Made backward compatible with v 0.19.0
#
# Popfile and Components
# Copyright (c) 2001-2003 John Graham-Cumming
#
# ---------------------------------------------------------------------------------------------

use strict;


    my %components;
    my $time = localtime;


# ---------------------------------------------------------------------------------------------
#
# load_modules
#
# Called to load specific POPFile loadable modules (implemented as .pm files with special
# comment on first line) in a specific subdirectory
#
# $directory          The directory to search for loadable modules
# $type               The 'type' of module being loaded (e.g. proxy, core, ui) which is used
#                     below when fixing up references between modules (e.g. proxy modules all
#                     need access to the classifier module)
# $module             The specific module name to be loaded.
#
# ---------------------------------------------------------------------------------------------

sub load_modules {

     my ( $directory, $type, $module ) = @_;

     $module = $directory . '/' . $module;

     if ( open MODULE, "<$module" ) {
           my $first = <MODULE>;
           close MODULE;

           if ( $first =~ /^# POPFILE LOADABLE MODULE/ ) {
                require $module;

                $module =~ s/\//::/;
                $module =~ s/\.pm//;

                my $mod = new $module;
                my $name = $mod->name();

                $components{$type}{$name} = $mod;

           }
     }

}


#
# main
#

    my $snap_time = time;
    my $time = localtime;


    #
    # Main
    #

    #
    # Load the modules we'll be using
    #

    load_modules( 'POPFile',      'core',       'Configuration.pm' );
    load_modules( 'POPFile',      'core',       'Logger.pm' );
    load_modules( 'POPFile',      'core',       'MQ.pm' );
    load_modules( 'Classifier',   'classifier', 'Bayes.pm' );

# Do not run if we are not on version 0.19.0 or higher

if ($components{core}{config}->isa ('POPFile::Module') && $components{core}{config}->can ( 'parameter' ) ) {

    # link each of the objects with the configuration object and
    # the logger and the mq

    foreach my $type (keys %components) {
        foreach my $name (keys %{$components{$type}}) {
            $components{$type}{$name}->configuration($components{core}{config});
            $components{$type}{$name}->logger($components{core}{logger}) if ( $name ne 'logger' );
            $components{$type}{$name}->mq($components{core}{mq}) if ( $components{$type}{$name}->can ( 'mq' ) );
        }
    }


    #
    # Tell each module to initialize itself
    #

    foreach my $type (keys %components) {
        foreach my $name (keys %{$components{$type}}) {
            if ($components{$type}{$name}->initialize() == 0 ) {
                die "Failed to start while initializing the $name module\n";
            }
        }
    }


    # Ensure that a snapshot subdirectory exists to hold any error log
    # that logger might generate so we don't interfere with a
    # running POPFile.

    mkdir ( 'snapshot' );

    # Set default quote and separator characters

    $components{core}{config}->parameter('csv_quote','');
    $components{core}{config}->parameter('csv_separator',',');

    # Load in the Popfile configuration parameters, any configured
    # ones will override the initialized default values
    # NOTE: We are intentially NOT saving this configuration
    # back to disk since the parameters we are allowing
    # for this program are NOT legal Popfile parameters.

    $components{core}{config}->load_configuration();

    # override the logdir and piddir so we don't mess with
    # the production ones

    $components{core}{config}->parameter('logger_logdir','snapshot/');
    $components{core}{config}->parameter('config_piddir','snapshot/');

    # Now grab any commandline parameters, they will override
    # the defaults and those in popfile.cfg. As a byproduct,
    # if the user overrides our csv_quote or csv_separator
    # parameter, this will pick it up for us.
    
    $components{core}{config}->parse_command_line();

    # force logger to recognize the new logdir before we startup
    # the modules. That way we will not inadvertently log to the
    # production POPFile log.

    $components{core}{logger}->service();


    # now that the configuration is established, tell each module
    # to start


    foreach my $type (keys %components) {
        foreach my $name (keys %{$components{$type}}) {
            if ($components{$type}{$name}->start() == 0 ) {
                die "Failed to start while starting the $name module\n";
            }
        }
    }



    # Check for existing CSV file, if present open in append mode
    # if not, then create it and output the header row.
    my $fn = 'snapshot_stats.csv';

    if (-s $fn) {
       open CSV, ">>$fn" or die "Unable to open ${fn} :$!\n";
    } else {
       open CSV, ">$fn" or die "Unable to open ${fn} :$!\n";
       print CSV join ( $components{core}{config}->parameter("csv_separator"),
                        wrap_in_quotes($components{core}{config}->parameter("csv_quote"),
                            qw ( BucketName
                                 BucketColor
                                 UnixTimestamp
                                 Timestamp
                                 BucketUniqueWords
                                 BucketWordCount
                                 BucketMailsClassified
                                 BucketFalsePositives
                                 BucketFalseNegatives
                                 GlobalWordCount
                                 GlobalDownloads
                                 GlobalMessages
                                 GlobalErrors
                                 LastResetDate
                               )
                               ));
       print CSV "\n";
    }


# Get the buckets, then iterate thru them and output the stats
# for each bucket by appending to CSV file

    my @buckets = $components{classifier}{bayes}->get_buckets();

    foreach my $bucket (@buckets) {
        print CSV join ( $components{core}{config}->parameter('csv_separator'),
                     wrap_in_quotes($components{core}{config}->parameter('csv_quote'),
                       (
                        $bucket,
                        $components{classifier}{bayes}->get_bucket_color($bucket),
                        $snap_time,
                        $time,
                        $components{classifier}{bayes}->get_bucket_unique_count($bucket),
                        $components{classifier}{bayes}->get_bucket_word_count($bucket),
                        $components{classifier}{bayes}->get_bucket_parameter($bucket,'count'),
                        $components{classifier}{bayes}->get_bucket_parameter($bucket,'fpcount'),
                        $components{classifier}{bayes}->get_bucket_parameter($bucket,'fncount'),
                        $components{classifier}{bayes}->get_word_count(),
                        $components{core}{config}->parameter('GLOBAL_download_count'),
                        $components{core}{config}->parameter('GLOBAL_mcount'),
                        $components{core}{config}->parameter('GLOBAL_ecount'),
                        $components{core}{config}->parameter('html_last_reset')
                       )
                       ));

        print CSV "\n";
    } 
        
    close CSV;

    #
    # Cleanup - Get rid of the popfile.pid file created by the configuration
    #           module.
    #

    unlink($components{core}{config}->parameter('config_piddir') . 'popfile.pid');

    # All Done

} else {
    print "$0 is compatible only with Popfile version 0.19.0 or above\n";
}


#
# Routine to wrap array values in quotes
#

sub wrap_in_quotes {

   my ($default_quote, @list) = @_;
   my @newlist;

   for (@list) {
       push @newlist,$default_quote . $_ . $default_quote;
   }
   return @newlist;
}

1