snapshot_stats.pl
#!/usr/bin/perl
# ---------------------------------------------------------------------------------------------
#
# snapshot_stats.pl --- Take a SnapShot of Popfile's Classification Stats
#
# This program authored by Scott W Leighton (helphand@pacbell.net)
# based upon the Popfile project, which is Copyrighted
# by John Graham-Cumming. The author hereby contributes this code
# to the Popfile project under the terms of the Popfile License
# Agreement. /Scott W Leighton/ January 25, 2004
#
# History - January 25, 2004 - complete rewrite for v 0.21.0
# Mar 9, 2004 - Look for popfile.cfg in POPFILE_USER not POPFILE_ROOT
# Mar 10, 2004 - Make sure ROOT and USER end in /
#
# Popfile and Components
# Copyright (c) 2001-2004 John Graham-Cumming
#
# ---------------------------------------------------------------------------------------------
use strict;
use DBI;
use Getopt::Long;
#
# Main
#
my %opts;
GetOptions ("set=s%" => \%opts);
my $csvquote = $opts{csv_quote} || '"';
my $csvsep = $opts{csv_separator} || ',';
my $user = $opts{user} || 1;
my $time = localtime;
my $root = $ENV{POPFILE_ROOT} || './';
my $userroot = $ENV{POPFILE_USER} || './';
$root =~ s/[\/\\]$//;
$userroot =~ s/[\/\\]$//;
$root .= '/';
$userroot .= '/';
my %config;
if ( open CONFIG, '<' . $userroot .'popfile.cfg' ) {
while ( <CONFIG> ) {
s/(\015|\012)//g;
if ( /(\S+) (.+)/ ) {
$config{$1}=$2;
}
}
close CONFIG;
} else {
die "Unable to get POPFile's configuration from ${userroot}popfile.cfg : $!";
}
#
# Open the SQL database
#
my $dbname = $userroot . $config{bayes_database};
my $dbconnect = $config{bayes_dbconnect};
$dbconnect =~ s/\$dbname/$dbname/g;
my $dbh = DBI->connect($dbconnect,
$config{bayes_dbuser},
$config{bayes_dbauth}) ||
die "$0 requires version 0.21.0 or higher of POPFile\n";
#
# Define some global work areas
#
my $snap_time = time;
# Check for existing CSV file, if present open in append mode
# if not, then create it and output the header row.
my $fn = 'snapshot_stats.csv';
if (-s $fn) {
open CSV, ">>$fn" or die "Unable to open ${fn} :$!\n";
} else {
open CSV, ">$fn" or die "Unable to open ${fn} :$!\n";
print CSV join ( $csvsep,
wrap_in_quotes($csvquote,
qw ( BucketName
BucketColor
UnixTimestamp
Timestamp
BucketUniqueWords
BucketWordCount
BucketMailsClassified
BucketFalsePositives
BucketFalseNegatives
GlobalWordCount
GlobalDownloads
GlobalMessages
GlobalErrors
LastResetDate
)
));
print CSV "\n";
}
# Get the buckets, then iterate thru them and output the stats
# for each bucket by appending to CSV file
# Get the buckets for this installation
my %buckets;
my @buckets = get_buckets();
# calc global count
my $globalcount=0;
foreach my $bucket (@buckets) {
print "checking bucket $bucket\n";
$globalcount+=$buckets{$bucket}{'wordcount'};
}
foreach my $bucket (@buckets) {
print CSV join ( $csvsep,
wrap_in_quotes($csvquote,
(
$bucket,
$buckets{$bucket}{color},
$snap_time,
$time,
$buckets{$bucket}{'uniquecount'},
$buckets{$bucket}{'wordcount'},
$buckets{$bucket}{'count'},
$buckets{$bucket}{'fpcount'},
$buckets{$bucket}{'fncount'},
$globalcount,
$config{'GLOBAL_download_count'},
$config{'GLOBAL_mcount'},
$config{'GLOBAL_ecount'},
$config{'html_last_reset'}
)
));
print CSV "\n";
}
close CSV;
# All Done
exit(0);
#
# Routine to wrap array values in quotes
#
sub wrap_in_quotes {
my ($default_quote, @list) = @_;
my @newlist;
for (@list) {
push @newlist,$default_quote . $_ . $default_quote;
}
return @newlist;
}
sub get_buckets {
my $sth=$dbh->prepare('select name, id, pseudo from buckets
where buckets.userid = ?;') || die $dbh->errstr;
$sth->execute($user) || die $dbh->errstr;
while (my $row = $sth->fetchrow_hashref) {
$buckets{$row->{name}}{id}=$row->{id};
$buckets{$row->{name}}{psuedo}=$row->{psuedo};
#
# get the wordcount for the bucket
#
my $sth2=$dbh->prepare('select sum(matrix.times) as btot,
count(matrix.id) as uniquecount
from matrix where matrix.bucketid = ?;') || die $dbh->errstr;
$sth2->execute($row->{id}) || die $dbh->errstr;
while (my $row2 = $sth2->fetchrow_hashref) {
$buckets{$row->{name}}{wordcount}=$row2->{btot};
$buckets{$row->{name}}{uniquecount}=$row2->{uniquecount};
}
#
# get the color of the bucket
#
$sth2=$dbh->prepare("select
bucket_template.name as name,
bucket_params.val as val
from bucket_params
left join bucket_template on bucket_params.btid
= bucket_template.id
where bucket_params.bucketid = ?
;") || die $dbh->errstr;
$sth2->execute($row->{id}) || die $dbh->errstr;
while (my $row2 = $sth2->fetchrow_hashref) {
$buckets{$row->{name}}{$row2->{name}}=$row2->{val};
}
}
return keys %buckets;
}