snapshot_stats.pl


#!/usr/bin/perl
# ---------------------------------------------------------------------------------------------
#
# snapshot_stats.pl --- Take a SnapShot of Popfile's Classification Stats
#
# This program authored by Scott W Leighton (helphand@pacbell.net)
# based upon the Popfile project, which is Copyrighted
# by John Graham-Cumming. The author hereby contributes this code
# to the Popfile project under the terms of the Popfile License
# Agreement.    /Scott W Leighton/  January 25, 2004
#
# History - January 25, 2004 - complete rewrite for v 0.21.0
#           Mar  9, 2004 - Look for popfile.cfg in POPFILE_USER not POPFILE_ROOT
#           Mar 10, 2004 - Make sure ROOT and USER end in /
#
# Popfile and Components
# Copyright (c) 2001-2004 John Graham-Cumming
#
# ---------------------------------------------------------------------------------------------

use strict;
use DBI;
use Getopt::Long;


  #
  # Main
  #

  my %opts;

  GetOptions ("set=s%" => \%opts);

  my $csvquote = $opts{csv_quote} || '"';
  my $csvsep = $opts{csv_separator} || ',';
  my $user = $opts{user} || 1;

  my $time = localtime;
  my $root = $ENV{POPFILE_ROOT} || './';
  my $userroot = $ENV{POPFILE_USER} || './';
  $root =~ s/[\/\\]$//;
  $userroot =~ s/[\/\\]$//;
  $root .= '/';
  $userroot .= '/';

  my %config;

  if ( open CONFIG, '<' . $userroot .'popfile.cfg'  ) {
        while ( <CONFIG> ) {
            s/(\015|\012)//g;
            if ( /(\S+) (.+)/ ) {
                $config{$1}=$2;
            }
        }
        close CONFIG;
  } else {
     die "Unable to get POPFile's configuration from ${userroot}popfile.cfg : $!";
  }


  #
  #  Open the SQL database
  #

  my $dbname = $userroot . $config{bayes_database};
  my $dbconnect = $config{bayes_dbconnect};

  $dbconnect =~ s/\$dbname/$dbname/g;

  my $dbh = DBI->connect($dbconnect,
                         $config{bayes_dbuser},
                         $config{bayes_dbauth}) ||
                         die "$0 requires version 0.21.0 or higher of POPFile\n";


  #
  #  Define some global work areas
  #


   my $snap_time = time;


   # Check for existing CSV file, if present open in append mode
   # if not, then create it and output the header row.
   my $fn = 'snapshot_stats.csv';

   if (-s $fn) {
       open CSV, ">>$fn" or die "Unable to open ${fn} :$!\n";
   } else {
       open CSV, ">$fn" or die "Unable to open ${fn} :$!\n";
       print CSV join ( $csvsep,
                        wrap_in_quotes($csvquote,
                            qw ( BucketName
                                 BucketColor
                                 UnixTimestamp
                                 Timestamp
                                 BucketUniqueWords
                                 BucketWordCount
                                 BucketMailsClassified
                                 BucketFalsePositives
                                 BucketFalseNegatives
                                 GlobalWordCount
                                 GlobalDownloads
                                 GlobalMessages
                                 GlobalErrors
                                 LastResetDate
                               )
                               ));
       print CSV "\n";
   }


# Get the buckets, then iterate thru them and output the stats
# for each bucket by appending to CSV file

  # Get the buckets for this installation
  my %buckets;
  my @buckets = get_buckets();

  # calc global count
  my $globalcount=0;
  foreach my $bucket (@buckets) {
     print "checking bucket $bucket\n";
     $globalcount+=$buckets{$bucket}{'wordcount'};
  }

  foreach my $bucket (@buckets) {
        print CSV join ( $csvsep,
                     wrap_in_quotes($csvquote,
                       (
                        $bucket,
                        $buckets{$bucket}{color},
                        $snap_time,
                        $time,
                        $buckets{$bucket}{'uniquecount'},
                        $buckets{$bucket}{'wordcount'},
                        $buckets{$bucket}{'count'},
                        $buckets{$bucket}{'fpcount'},
                        $buckets{$bucket}{'fncount'},
                        $globalcount,
                        $config{'GLOBAL_download_count'},
                        $config{'GLOBAL_mcount'},
                        $config{'GLOBAL_ecount'},
                        $config{'html_last_reset'}
                       )
                       ));

        print CSV "\n";
    } 
        
    close CSV;


    # All Done

    exit(0);

#
# Routine to wrap array values in quotes
#

sub wrap_in_quotes {

   my ($default_quote, @list) = @_;
   my @newlist;

   for (@list) {
       push @newlist,$default_quote . $_ . $default_quote;
   }
   return @newlist;
}

sub get_buckets {

    my $sth=$dbh->prepare('select name, id, pseudo from buckets
                          where buckets.userid = ?;') || die $dbh->errstr;
    $sth->execute($user) || die $dbh->errstr;
    while (my $row = $sth->fetchrow_hashref) {
         $buckets{$row->{name}}{id}=$row->{id};
         $buckets{$row->{name}}{psuedo}=$row->{psuedo};
         #
         # get the wordcount for the bucket
         #
         my $sth2=$dbh->prepare('select sum(matrix.times) as btot,
                          count(matrix.id) as uniquecount
                          from matrix where matrix.bucketid = ?;') || die $dbh->errstr;
         $sth2->execute($row->{id}) || die $dbh->errstr;
         while (my $row2 = $sth2->fetchrow_hashref) {
             $buckets{$row->{name}}{wordcount}=$row2->{btot};
             $buckets{$row->{name}}{uniquecount}=$row2->{uniquecount};
         }
         #
         # get the color of the bucket
         #
         $sth2=$dbh->prepare("select
                          bucket_template.name as name,
                          bucket_params.val as val
                          from bucket_params
                          left join bucket_template on bucket_params.btid
                                = bucket_template.id
                          where bucket_params.bucketid = ?
                            ;") || die $dbh->errstr;
         $sth2->execute($row->{id}) || die $dbh->errstr;
         while (my $row2 = $sth2->fetchrow_hashref) {
             $buckets{$row->{name}}{$row2->{name}}=$row2->{val};
         }
    }
    return keys %buckets;
}


1