#!/usr/bin/perl
#
# extract_mail_stats
#
# Extract sendmail stats from syslog file, processing them and saving
# result that is later used by the 'report_mail_stats' script.
#
#
# Typical usage: extract_mail_stats
#
# flags:
# -d debug mode
# -e print strange lines to stderr
# -o <file> output file
# -l <file> log file
# -T print entries with a "to=<>" or "from=<>" (missing address)
# -m reduce to local mbox is possible
#
#
# Cron entry:
#
# 4 0 * * * /usr/local/etc/syswatch/bin/extract_mail_stats
#
require( '/usr/local/etc/syswatch/bin/sw_common.pl' );
do set_common_vars();
$PrintStrangeLines = 1;
#
# Some systems keep the mail log file here:
#
$TheRawSysLog = "/var/log/mail";
$TheRawSysLog = "/var/log/maillog" if( ! -e $TheRawSysLog ); # But could be here, too
$TheRawSysLog = "/var/adm/SYSLOG" if( ! -e $TheRawSysLog ); # But could be here, too
$TheDataDay = '-yesterday'; # default to yesterday
$TheDataOutputFile = '';
#
# Where data goes
#
$DATA_FILE_DIR = "/var/log/mailtraffic"; # dir containing data files
#
# Some log entries can be: "... from=<>, size=..."
# Use this string as a placeholder.
#
$NO_FROM_USER = '<blank \'from\'>';
$NO_TO_USER = '<blank \'to\'>';
#
# Main part of program:
#
do handle_args ();
do hash_passwd () if $ReduceToLocalMailbox;
$data_file = do get_raw_data( $TheRawSysLog );
do gather_stats( $data_file );
unlink $data_file; # don't need them any more
do output_stats( $TheDataOutputFile );
exit( 0 );
#
# -- end --
#
#
# handle_args
#
# Parse the args and set out internal flags
#
sub handle_args
{
while( $ARGV[ 0 ] =~ /^-/ )
{
shift ARGV, $TheDataOutputFile = $ARGV[ 0 ], shift ARGV, next if( $ARGV[ 0 ] eq '-o' );
shift ARGV, $TheMailLog = $ARGV[ 0 ], shift ARGV, next if( $ARGV[ 0 ] eq '-l' );
$TheDataDay = $ARGV[ 0 ], shift ARGV, next if( $ARGV[ 0 ] eq '-yesterday'
|| $ARGV[ 0 ] eq '-today' );
$ARGV[ 0 ] =~ s/^-//;
foreach $flag ( split(//, $ARGV[ 0 ]) )
{
if( 'emdT' !~ /$flag/ )
{
printf stderr "unknown flag: $flag\n";
print "$program [flags]
-d debug mode
-e print strange lines to stderr
-o <file> output file (overrides \"" . &CalcOutputFile() . "\")
-l <file> log file (overrides \"" . &CalcLogFile() . "\")
-T print entries with a \"to=<>\" or \"from=<>\" (missing address)
-m reduce to local mbox is possible
";
exit -1;
}
die "$0: '$flag' flag already set\n" if( $flags{ $flag }++ );
}
shift ARGV;
}
$ReduceToLocalMailbox = $flags{ 'm' };
$PrintStrangeLines = $flags{ 'e' };
$PrintLinesWithNoFromOrTo = $flags{ 'T' };
$Debug = $flags{ 'd' };
print "Debug mode\n" if( $Debug );
if( ! $TheDataOutputFile )
{
$TheDataOutputFile = do CalcOutputFile();
print "Output file is $TheDataOutputFile\n" if( $Debug );
}
$TheRawSysLog = do CalcLogFile();
print "SysLog = $TheRawSysLog\n" if( $Debug );
}
#
# CalcOutputFile()
#
sub CalcOutputFile
{
return $TheDataOutputFile if( $TheDataOutputFile );
#
# Calc default
#
local ( $month_let ) = `$GET_DATE $TheDataDay -monthLMod 2`;
local ( $day_num ) = `$GET_DATE $TheDataDay -day`;
return &mk_data_file_name( $month_let, $day_num );
}
sub CalcLogFile
{
$TheMailLog = "$SW_HOME/ml" if( $Debug && -e "$SW_HOME/ml" );
return $TheMailLog if( $TheMailLog );
return $TheRawSysLog;
}
#
# get_raw_data()
#
# Separate the raw data into a temp file
#
sub get_raw_data
{
local ( $raw_sys_log ) = @_;
#
# Info is logged as:
#
# Jun 13 08:32:45 ptolemy sendmail[22383]: IAA22383: from=<opentpt@lis...
# Jun 13 08:32:45 ptolemy sendmail[22384]: IAA22383: to=<matt@kamson.com>...
#
# So we need to extract only those lines from the general
# log file
#
local ( $search_date ) = `$GET_DATE $TheDataDay -month -space -spacePaddedDay`;
#
# Grep out the day's lines to a separate file:
#
local ( $days_mail_entries ) = "$SW_TMP/eml.$$";
local ( $x ) = system( "$GREP \"^$search_date\" $raw_sys_log | $GREP sendmail > $days_mail_entries" );
return $days_mail_entries;
}
#
# mk_data_file_name()
#
# Construct the name of the data file.
# They are typically of the form: "/var/log/mail_traffic/A_day.24"
#
sub mk_data_file_name
{
local ( $month_letter, # '' or 'A', 'B'
$day ) = @_; # '-yesterday', '-today' or 1, 2, 3, etc
local ( $fn, $day_num );
$month_letter = `$GET_DATE -monthLMod 2` if( ! $month_letter );
$day_num = "$day" if( int( $day ) > 0 );
$day_num = "0$day" if( length( "$day" ) == 1 );
$day_num = `$GET_DATE $day -zeroPaddedDay` if( $day eq '-yesterday' || $day eq '-today' );
if( ! $day_num )
{
print "Bad 'day': $day, (month_let=$month_letter)\n";
exit -1;
}
local ( $dir ) = $DATA_FILE_DIR;
$dir = $SW_TMP if( $Debug );
$fn = "$dir/${month_letter}_day.$day_num";
return $fn;
}
#
# gather_stats
#
sub gather_stats
{
local ( $mail_log_fn ) = @_;
open( MAIL_LOG, $mail_log_fn ) || die "Can't open $mail_log_fn";
while( <MAIL_LOG> )
{
if( /lost input channel from/
|| /, stat=queued/
|| /stat=Service unavailable/ )
{
# ignore these
}
#
# A "from" line?
#
# Jun 13 08:32:45 ptolemy sendmail[22383]: IAA22383: from=<opentpt@list.stairways.com>,
# size=7978, class=-60, pri=145978, nrcpts=1, msgid=<35959685@cupid.Dartmouth.EDU>,
# proto=ESMTP, relay=angstrom.metawire.com [198.147.96.73]
#
elsif( / (..):\d\d:\d\d .*: ([A-Z]+\d+): from=(.*), size=(\d+)/ )
{
do parse_from_line( $1, $2, $3, $4 );
}
#
# Some kind of error?
#
# Like:
# Jun 17 08:41:34 bandit sendmail[15290]: IAA13746: to=<'sharden@ghproductions.com'>,
# ctladdr=<lconners@cyberstudios.com> (10758/101), delay=00:00:03, xdelay=00:00:00,
# mailer=smtp8, relay=ghproductions.com', stat=Host unknown (Name server:
# ghproductions.com': host not found)
#
elsif( /stat=Host unknown/
|| /stat= Host unknown/
|| /: .ost not found/
|| /User unknown/
|| /user address required/
|| /could not send message for past \d+ hours/
|| /Connection reset by/
|| /stat=I\/O error: Error 0/
|| /collect: I\/O error on connection from/
|| /collect: premature EOM: Error 0/
|| /collect: unexpected close on connection from/
|| /timeout waiting for input from/
|| /Local configuration error/
|| /Remote protocol error/ )
{
print $_ if $PrintStrangeLines;
$NErrors++;
}
#
# A "to" line?
#
# Jun 13 08:32:45 ptolemy sendmail[22384]: IAA22383: to=<matt@kamson.com>,
# delay=00:00:02, xdelay=00:00:00, mailer=local, stat=Sent
#
# Jun 13 06:02:36 ptolemy sendmail[21730]: GAA21728: to=devnull@metawire.com,
# ctladdr=mailcollector (500/12), delay=00:00:36, xdelay=00:00:36, mailer=esmtp,
# relay=mail.metawire.com. [198.147.96.73], stat=Sent (FAA05583 Message accepted
# for delivery)
#
# Jun 17 00:08:46 bandit sendmail[9468]: PAA10510: to=<adamross@mail2.DELTANET.COM>,
# delay=1+08:37:48, xdelay=00:00:00, mailer=smtp8, relay=mail2.deltanet.com.,
# stat=Deferred: 418 <>... can't resolve your name, check your DNS
#
elsif( /: ([A-Z]+\d+): to=(.*)/ )
{
do parse_to_line( $1, $2 );
}
#
# Don't count these as errors
#
# || /aliases/
#
elsif( /message-id/
|| /locked/
|| /alias database (auto|)rebuilt/
|| /rebuilding alias database/
|| /Authentication-Warning: .*: nuucp set sender to/ )
{
# print " Noted, didn't count line\n";
}
#
# Is a "blocking" line?
#
elsif( / blocking: / )
{
if( / has expired/ )
{
$NBlockedExpired++;
}
else
{
$NBlocked++;
}
}
#
# Is a "blocking" deubg line?
#
elsif( / blocking - / )
{
# ignore
}
#
# or an "unblocking" line?
#
elsif( / unblocking: / )
{
$NUnblocked++;
}
else
{
#
# Count as an error
#
$NErrors++;
#
# Jun 18 12:30:17 bandit sendmail[12994]: dropenvelope: MAA03138: q_flags = 6019,
# paddr = <tony@sportspin.com>
# Jun 18 14:55:57 bandit sendmail[14228]: dropenvelope: OAA09328: q_flags = 6019,
# paddr = <thor@oz.com>
#
next if( /dropenvelope: .*: q_flags = 6019/ );
#
# Don't know what kind of line it - print it out
#
print $_ if $PrintStrangeLines;
}
}
close( MAIL_LOG );
}
#
# A "from" line
#
# Jun 13 08:32:45 ptolemy sendmail[22383]: IAA22383: from=<opentpt@list.stairways.com>,
# size=7978, class=-60, pri=145978, nrcpts=1, msgid=<35959685@cupid.Dartmouth.EDU>,
# proto=ESMTP, relay=angstrom.metawire.com [198.147.96.73]
#
sub parse_from_line
{
local ( $hour, $id, $user, $size ) = @_;
print "id=$id, user=$user" if( $DetailedDebug );
$user =~ s/.*<(.*)>/$1/; # get rid of <>
print "$user\n" if( $DetailedDebug );
$user =~ tr/A-Z/a-z/; # canonical lc
print "$user\n" if( $DetailedDebug );
if( ! $user )
{
$user = $NO_FROM_USER;
print stderr "No 'from' user: $_" if( $PrintLinesWithNoFromOrTo );
$NErrors++;
}
if( $ReduceToLocalMailbox )
{
local ( $ouser ) = $user;
#print " $user\n";
$user = do strip( $user );
#print " $user\n";
$user = $ouser if ! $known{ $user };
#print " $user\n";
#print " $known{$user}\n";
}
#
# First time we've run into this ID ?
#
$NMsgsFrom++ if( ! $id_size{ $id } );
#
# Record other info about it
#
$from_user_size { $user } += $size;
$id_size { $id } = $size;
$from_user_count{ $user }++;
$Hourly { $hour }++;
}
#
# parse_to_line()
#
# Parse a "to=" line, recording the info about it.
# It is somewhat complicated because the commas that
# normal separate fields are also used to separate
# several addresses:
#
# to=<martha.josephson@ezi.com>,<cathy.anterasian@ezi.com>, delay=2+05:02:33, ...
# |________________________| |________________________| |______________|
# addr 1 addr 2
# fld 1 fld 2
#
sub parse_to_line
{
local ( $id, $rest ) = @_;
local ( $statv, $to, $oto, $statv );
if( $rest =~ /, stat=(Sent)/
|| $rest =~ /, stat=(Deferred)/
|| $rest =~ /, stat=(.ueued)/ )
{
$statv = $1;
}
else
{
print "Not a 'Sent', 'Deferred' or 'Queued' to= line: $rest\n" if( ! $Debug );
return;
}
#
# $id = NAA10522
# $rest = <martha.josephson@ezi.com>,<cathy.anterasian@ezi.com>,
# delay=2+05:02:33, mailer=esmtp, relay=nova.linked.net. [220.1.1.1],
# stat=Deferred: No route to host
#
# or:
#
# $rest = matt@ptolemy.metawire.com, ctladdr=syswatch (1010/201),
# delay=00:01:15, xdelay=00:01:15, mailer=smtp8,
# relay=ptolemy-mail.metawire.com. [205.219.92.165],
# stat=Deferred: No route to host
#
print " parsing rest...\n" if( $DetailedDebug );
while( $rest )
{
($to, $rest) = split( /,/, $rest, 2 );
print " split: to=$to, rest=$rest\n" if( $DetailedDebug );
#
# Have we split to the point where "addr" now is "xxx=yyy"?
# If so then this is the next field
#
last if( $to =~ /=/ );
#
# Remove enclosing "<" ">", if they are there
#
if( $to =~ /.*<(.*)>/ )
{
$to = $1;
}
print " to=$to\n" if( $DetailedDebug );
if( $ReduceToLocalMailbox )
{
$oto = $to;
print " $to\n" if( $Debug );
$to = do strip( $to );
print " $to\n" if( $Debug );
$to = $oto if ! $known{ $to };
print " $to\n" if( $Debug );
print " $known{ $to }\n" if( $Debug );
}
$to =~ tr/A-Z/a-z/;
if( ! $to )
{
$to = $NO_TO_USER;
print stderr "No 'to' user: $_" if( $PrintLinesWithNoFromOrTo );
$NErrors++;
}
if( $statv eq "Deferred" )
{
$NDeferred++;
$deferred_count{ $to }++ if( $deferred_msg{ $id } == 0 );
$deferred_msg { $id }++;
print "Deferred: to=$to, #=$deferred_count{ $to }\n" if( $DetailedDebug );
}
#
# $statv eq "Sent"
# $statv eq "queued" || $statv eq "Queued" )
else
{
# printf " adding %d bytes to %s from %s\n", $id_size{ $id }, $to, $user if( $Debug );
print "Sent: to=$to\n" if( $DetailedDebug );
$to_user_size { $to } += $id_size{ $id };
$to_user_count{ $to }++;
$NMsgsTo++;
}
}
print " done parsing_to_line\n" if( $DetailedDebug );
}
#
# output_stats
#
sub output_stats
{
local ( $output_file ) = @_;
local ( $n, $user );
open( OF, "> $output_file" ) || die "Can't open output $output_file";
$n = $NMsgsFrom;
$n = "0" if( ! $n );
print OF "MsgsFrom\t$n\n";
$n = $NMsgsTo;
$n = "0" if( ! $n );
print OF "MsgsTo\t$n\n";
$n = $NErrors;
$n = "0" if( ! $NErrors );
print OF "Error lines\t$n\n";
$n = $NDeferred;
$n = "0" if( ! $NDeferred );
print OF "Deferred\t$n\n";
$n = $NBlocked;
$n = "0" if( ! $NBlocked );
print OF "Blocked\t$n\n";
$n = $NBlockedExpired;
$n = "0" if( ! $NBlockedExpired );
print OF "BlockedExpired\t$n\n";
$n = $NUnblocked;
$n = "0" if( ! $NUnblocked );
print OF "Unblocked\t$n\n";
local ( $hr, $ind );
print OF "Hourly\t";
foreach $hr (0..23)
{
$ind = "$hr";
$ind = "0$hr" if( $hr < 10 );
$n = $Hourly{ $ind };
$n = "0" if( $n == 0 );
print OF "$n";
print OF "\t" if( $hr ne "23" );
}
print OF "\n";
print OF "#\n";
print OF "# Who\tMsgs To\tMsgs From\tBytes To\tBytes From\tDeferred\n";
#
# Merge all together
#
@loop = keys( to_user_count );
foreach $user (@loop)
{
$merged_count{ $user } = $to_user_count { $user };
}
@loop = keys( from_user_count );
foreach $user (@loop)
{
$merged_count{ $user } += $from_user_count{ $user };
}
@loop = keys( deferred_count );
foreach $user (@loop)
{
$merged_count{ $user } += $deferred_count { $user };
}
#
# And print each to the file
#
@loop = keys( merged_count );
foreach $user (sort bothsort @loop)
{
print OF "$user";
print OF "\t" . &numof( $to_user_count { $user } );
print OF "\t" . &numof( $from_user_count{ $user } );
print OF "\t" . &numof( $to_user_size { $user } );
print OF "\t" . &numof( $from_user_size { $user } );
print OF "\t" . &numof( $deferred_count { $user } );
print OF "\n";
}
close( OF );
}
sub bothsort
{
($merged_count{ $b } - $merged_count{ $a });
}
sub numof
{
local ( $n ) = @_;
$n = "0" if( ! $n );
return $n;
}
sub strip
{
local($foo) = shift(@_);
$foo =~ s/@.*//;
$foo =~ s/.*!//;
$foo =~ s/\s*\(.*\)//;
$foo =~ tr/A-Z/a-z/;
return $foo;
}
sub hash_passwd
{
chop( $yp = `/bin/domainname` ) if -x '/bin/domainname';
$passwd = $yp ? 'ypcat passwd |' : '/etc/passwd';
open( PASSWD, $passwd ) || die "$program: can't open $passwd: $!\n";
while( <PASSWD> )
{
/^(\w+):[^:]+:(\d+):.*/;
($who,$uid) = ($1, $2);
$uid = 'zero' if( $uid == 0 && $who );
$known{$who} = $uid;
}
close PASSWD;
}
|