#!/usr/bin/perl # --------------------------------------------------------------------------------------------- # # classify_eudora_mbx.pl --- Process Eudora MBX files using Popfile's # Classifier::Bayes and emit pf_[bucketname].mbx # files for use by Eudora. # # This program authored by Scott W Leighton (helphand@pacbell.net) # based upon bayes.pl, a Popfile component, which is Copyrighted # by John Graham-Cumming. The author hereby contributes this code # to the Popfile project under the terms of the Popfile License # Agreement. /Scott W Leighton/ February 15, 2003 # # Modified May 24, 2003 - added support for v 0.19.x # # Popfile and Classifier::Bayes # Copyright (c) 2001-2003 John Graham-Cumming # # --------------------------------------------------------------------------------------------- use strict; use Classifier::Bayes; use POPFile::Configuration; # version check my $version; if ( -s "Popfile/Module.pm" ) { $version = 19; } else { $version = 18; } # main if ( $#ARGV == 0 ) { my $start_time = time; my $b; my $c; if ($version == 18) { $b = new Classifier::Bayes; if ( $b->initialize() == 0 ) { die "Failed to start while initializing the classifier module"; } $b->{debug} = 0; $b->{parser}->{debug} = 0; $b->load_word_matrix(); } else { $b = new Classifier::Bayes; $c = new POPFile::Configuration; $b->configuration($c); $c->configuration($c); $c->initialize(); $b->initialize(); $c->load_configuration(); $b->{unclassified__} = $c->parameter("bayes_inclassified_probablity") || 0.5; $b->{debug} = 0; $b->{parser__}->{debug} = 0; $b->start(); # my @dirs = glob $c->parameter("html_archive_dir") . "/*"; # setup dummy buckets # foreach my $dir (@dirs) { # my ($x, $bucket) = split /\//,$dir; # $b->{total__}{$bucket} = 100; # $b->{colors__}{$bucket} = 'black'; # } } my @files = glob $ARGV[0]; foreach my $file (@files) { open FILE, "<$file" or die "Unable to open $file :$!"; my $nexthdr; while () { my $hdr = ""; if (/^From \?{3}\@\?{3} / or defined $nexthdr) { $hdr = defined $nexthdr ? $nexthdr : $_; my @body = (); undef $nexthdr; LINE: while () { if (/From \?{3}\@\?{3} /) { $nexthdr = $_; last LINE; } push @body,$_; } my $fname = "pf_message.tmp"; open MSG, ">$fname" or die "Unable to create $fname :$!\n"; binmode MSG; foreach (@body) { print MSG $_; } close MSG; my $d = $b->classify_file($fname); if (open(FROM,"<$fname")) { if (open (TO, ">>pf_${d}.mbx")) { binmode FROM; print TO $hdr; while () { print TO $_; } close TO; } close FROM; } unlink($fname); } } close FILE; } my $end_time = time; print "Done, process took " . ($end_time - $start_time) . " seconds\n"; } else { print "classify_eudora_mbx.pl - determine bucket for mails in mbx file and output pf_[bucketname].mbx files for classified mail. Original input Eudora mbx file left untouched.\n\n"; print "Usage: classify_eudora_mbx.pl \n"; print " filename of Eudora *.mbx file to classify\n"; }