#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use POSIX qw(strftime);
my %global_stats = ();
my %user_stats = ();
my $filter = undef;
my $userlimit = 65534;
GetOptions("help|h" =>\ my $opt_help,
"today|t" =>\ my $opt_today,
"yesterday|y" =>\ my $opt_yesterday,
"limit|l=i" =>\ my $opt_limit);
if ($opt_help)
{
print "Usage: $0 [options] </var/log/mail.info\n";
print "Without any options the entire logfile is processed\n";
print "Options can be any of the following:\n";
printf("\t%-5s: Stats for todays date only.\n", "t");
printf("\t%-5s: Stats for yesterdays date only.\n", "y");
printf("\t%-5s: Stats for top n mail recipients.\n", "l n");
exit 0;
}
if ($opt_today && $opt_yesterday)
{
print "You can't use the -t option and -y option together.\n";
exit 1;
}
if ($opt_today)
{
$filter = strftime("%b %e", localtime);
}
elsif ($opt_yesterday)
{
$filter = strftime("%b %e", localtime(time() - (24 * 60 * 60)));
}
if ($opt_limit) {
$userlimit = $opt_limit;
}
while(defined(my $line = <STDIN>))
{
if (defined($filter))
{
if ($line !~ /^$filter/)
{
next;
}
}
if ($line =~ m/spamd\[([0-9].*?)\]: clean message \((.*?)\/(.*?)\) for (.*?):/)
{
&update_stats("ham", $4, $2, $3);
}
elsif ($line =~ m/spamd: clean message \((.*?)\/(.*?)\) for (.*?):/)
{
&update_stats("ham", $3, $1, $2);
}
elsif ($line =~ m/spamd\[([0-9].*?)\]: identified spam \((.*?)\/(.*?)\) for (.*?):/)
{
&update_stats("spam", $4, $2, $3);
}
elsif ($line =~ m/spamd: identified spam \((.*?)\/(.*?)\) for (.*?):/)
{
&update_stats("spam", $3, $1, $2);
}
}
&show_stats();
exit 0;
sub show_stats()
{
if ($opt_today)
{
print "SpamAssassin statistics for today ($filter)\n";
}
elsif ($opt_yesterday)
{
print "SpamAssassin statistics for yesterday ($filter)\n";
}
else
{
print "SpamAssassin statistics for entire logfile\n";
}
draw_line(70);
print "\n";
if (exists($global_stats{'spam'}) && exists($global_stats{'ham'}))
{
printf("%-30s %-10s %-10s %-10s\n", "Total messages:", "Ham:", "Spam:", "% Spam:");
draw_line(70);
printf("%-30d %-10d %-10d %1.2f%%\n", $global_stats{'ham'}{'count'} + $global_stats{'spam'}{'count'},
$global_stats{'ham'}{'count'},
$global_stats{'spam'}{'count'},
100 * ($global_stats{'spam'}{'count'} / ($global_stats{'ham'}{'count'} + $global_stats{'spam'}{'count'})));
print "\n";
printf("%-30s: %1.2f/%1.2f\n", "Average spam score",
$global_stats{'spam'}{'score'} / $global_stats{'spam'}{'count'},
$global_stats{'spam'}{'threshold'} / $global_stats{'spam'}{'count'});
printf("%-30s: %1.2f/%1.2f\n", "Average ham score",
$global_stats{'ham'}{'score'} / $global_stats{'ham'}{'count'},
$global_stats{'ham'}{'threshold'} / $global_stats{'ham'}{'count'});
}
else
{
if (!exists($global_stats{'ham'}))
{
print "No ham (clean) messages found in logfile.\n";
}
if (!exists($global_stats{'spam'}))
{
print "No spam (identified) messages found in logfile.\n";
}
print "Due to the above, not enough information is available to calculate\nglobal statistics.\n";
}
print "\n";
printf("%-30s %-7s %-7s %-7s %-7s\n", "Username:", "Total:", "Ham:", "Spam:", "% Spam:");
draw_line(70);
if($opt_limit)
{
my $userdisp = 0;
my %sorthash;
foreach my $username (sort keys %user_stats)
{
$sorthash{$username} = $user_stats{$username}{'ham'}{'count'} + $user_stats{$username}{'spam'}{'count'};
}
foreach my $username (sort { $sorthash{$b} <=> $sorthash{$a} } keys %user_stats)
{
printf("%-30s %-7d %-7d %-7d %1.2f%%\n", $username,
$user_stats{$username}{'ham'}{'count'} + $user_stats{$username}{'spam'}{'count'},
$user_stats{$username}{'ham'}{'count'},
$user_stats{$username}{'spam'}{'count'},
100 * ($user_stats{$username}{'spam'}{'count'} / ($user_stats{$username}{'ham'}{'count'} + $user_stats{$username}{'spam'}{'count'})));
$userdisp++;
last if $userdisp >= $userlimit;
}
}
else
{
foreach my $username (sort keys %user_stats)
{
printf("%-30s %-7d %-7d %-7d %1.2f%%\n", $username,
$user_stats{$username}{'ham'}{'count'} + $user_stats{$username}{'spam'}{'count'},
$user_stats{$username}{'ham'}{'count'},
$user_stats{$username}{'spam'}{'count'},
100 * ($user_stats{$username}{'spam'}{'count'} / ($user_stats{$username}{'ham'}{'count'} + $user_stats{$username}{'spam'}{'count'})));
}
}
print "\n";
printf("%-30s %-20s %-20s\n", "Username:", "Avg. ham score:", "Avg. spam score:");
draw_line(70);
if($opt_limit)
{
my $userdisp = 0;
my %sorthash;
foreach my $username (sort keys %user_stats)
{
$sorthash{$username} = $user_stats{$username}{'ham'}{'count'} + $user_stats{$username}{'spam'}{'count'};
}
foreach my $username (sort { $sorthash{$b} <=> $sorthash{$a} } keys %sorthash)
{
my $ham_average = "None";
my $spam_average = "None";
if ($user_stats{$username}{'ham'}{'count'})
{
$ham_average = sprintf("%1.2f/%1.2f", $user_stats{$username}{'ham'}{'score'} / $user_stats{$username}{'ham'}{'count'},
$user_stats{$username}{'ham'}{'threshold'} / $user_stats{$username}{'ham'}{'count'});
}
if ($user_stats{$username}{'spam'}{'score'})
{
$spam_average = sprintf("%1.2f/%1.2f", $user_stats{$username}{'spam'}{'score'} / $user_stats{$username}{'spam'}{'count'},
$user_stats{$username}{'spam'}{'threshold'} / $user_stats{$username}{'spam'}{'count'});
}
printf("%-30s %-20s %-20s\n", $username, $ham_average, $spam_average);
$userdisp++;
last if $userdisp >= $userlimit;
}
}
else
{
foreach my $username (sort keys %user_stats)
{
my $ham_average = "None";
my $spam_average = "None";
if ($user_stats{$username}{'ham'}{'count'})
{
$ham_average = sprintf("%1.2f/%1.2f", $user_stats{$username}{'ham'}{'score'} / $user_stats{$username}{'ham'}{'count'},
$user_stats{$username}{'ham'}{'threshold'} / $user_stats{$username}{'ham'}{'count'});
}
if ($user_stats{$username}{'spam'}{'score'})
{
$spam_average = sprintf("%1.2f/%1.2f", $user_stats{$username}{'spam'}{'score'} / $user_stats{$username}{'spam'}{'count'},
$user_stats{$username}{'spam'}{'threshold'} / $user_stats{$username}{'spam'}{'count'});
}
printf("%-30s %-20s %-20s\n", $username, $ham_average, $spam_average);
}
}
}
sub update_stats()
{
my $stat = shift;
my $username = shift;
my $score = shift;
my $threshold = shift;
$username = lc($username);
if (!exists($global_stats{$stat}))
{
$global_stats{$stat}{'count'} = 0;
$global_stats{$stat}{'score'} = 0;
$global_stats{$stat}{'threshold'} = 0;
}
$global_stats{$stat}{'count'}++;
$global_stats{$stat}{'score'} += $score;
$global_stats{$stat}{'threshold'} += $threshold;
if (!exists($user_stats{$username}))
{
$user_stats{$username}{'ham'}{'count'} = 0;
$user_stats{$username}{'ham'}{'score'} = 0;
$user_stats{$username}{'ham'}{'threshold'} = 0;
$user_stats{$username}{'spam'}{'count'} = 0;
$user_stats{$username}{'spam'}{'score'} = 0;
$user_stats{$username}{'spam'}{'threshold'} = 0;
}
$user_stats{$username}{$stat}{'count'}++;
$user_stats{$username}{$stat}{'score'} += $score;
$user_stats{$username}{$stat}{'threshold'} += $threshold;
}
sub draw_line()
{
my $length = shift;
print "-" x $length;
print "\n";
}