# <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =head1 NAME Mail::SpamAssassin::PerMsgStatus - per-message status (spam or not-spam) =head1 SYNOPSIS my $spamtest = Mail::SpamAssassin->new({ 'rules_filename' => '/etc/spamassassin.rules', 'userprefs_filename' => $ENV{HOME}.'/.spamassassin/user_prefs' }); my $mail = $spamtest->parse(); my $status = $spamtest->check ($mail); my $rewritten_mail; if ($status->is_spam()) { $rewritten_mail = $status->rewrite_mail (); } ... =head1 DESCRIPTION The Mail::SpamAssassin C method returns an object of this class. This object encapsulates all the per-message state. =head1 METHODS =over 4 =cut package Mail::SpamAssassin::PerMsgStatus; use strict; use warnings; use re 'taint'; use Errno qw(ENOENT); use Time::HiRes qw(time); use Encode; use Mail::SpamAssassin::Constants qw(:sa :ip); use Mail::SpamAssassin::AsyncLoop; use Mail::SpamAssassin::Conf; use Mail::SpamAssassin::Util qw(untaint_var base64_encode idn_to_ascii uri_list_canonicalize reverse_ip_address is_fqdn_valid parse_header_addresses); use Mail::SpamAssassin::Timeout; use Mail::SpamAssassin::Logger; our @ISA = qw(); # methods defined by the compiled ruleset; deleted in finish() our @TEMPORARY_METHODS; # methods defined by register_plugin_eval_glue(); deleted in finish() our %TEMPORARY_EVAL_GLUE_METHODS; ########################################################################### our %common_tags; BEGIN { %common_tags = ( YESNO => sub { my $pms = shift; $pms->_get_tag_value_for_yesno(@_); }, YESNOCAPS => sub { my $pms = shift; uc $pms->_get_tag_value_for_yesno(@_); }, SCORE => sub { my $pms = shift; $pms->_get_tag_value_for_score(@_); }, HITS => sub { my $pms = shift; $pms->_get_tag_value_for_score(@_); }, REQD => sub { my $pms = shift; $pms->_get_tag_value_for_required_score(@_); }, VERSION => \&Mail::SpamAssassin::Version, SUBVERSION => sub { $Mail::SpamAssassin::SUB_VERSION }, RULESVERSION => sub { my $pms = shift; my $conf = $pms->{conf}; my @fnames; @fnames = keys %{$conf->{update_version}} if $conf->{update_version}; @fnames = sort @fnames if @fnames > 1; join(',', map($conf->{update_version}{$_}, @fnames)); }, HOSTNAME => sub { my $pms = shift; $pms->{conf}->{report_hostname} || Mail::SpamAssassin::Util::fq_hostname(); }, REMOTEHOSTNAME => sub { my $pms = shift; $pms->{tag_data}->{'REMOTEHOSTNAME'} || "localhost"; }, REMOTEHOSTADDR => sub { my $pms = shift; $pms->{tag_data}->{'REMOTEHOSTADDR'} || "127.0.0.1"; }, FIRSTTRUSTEDIP => sub { my $pms = shift; my $lasthop = $pms->{msg}->{metadata}->{relays_trusted}->[-1]; $lasthop ? $lasthop->{ip} : ''; }, FIRSTTRUSTEDREVIP => sub { my $pms = shift; my $lasthop = $pms->{msg}->{metadata}->{relays_trusted}->[-1]; $lasthop ? reverse_ip_address($lasthop->{ip}) : ''; }, LASTEXTERNALIP => sub { my $pms = shift; my $lasthop = $pms->{msg}->{metadata}->{relays_external}->[0]; $lasthop ? $lasthop->{ip} : ''; }, LASTEXTERNALREVIP => sub { my $pms = shift; my $lasthop = $pms->{msg}->{metadata}->{relays_external}->[0]; $lasthop ? reverse_ip_address($lasthop->{ip}) : ''; }, LASTEXTERNALRDNS => sub { my $pms = shift; my $lasthop = $pms->{msg}->{metadata}->{relays_external}->[0]; $lasthop ? $lasthop->{rdns} : ''; }, LASTEXTERNALHELO => sub { my $pms = shift; my $lasthop = $pms->{msg}->{metadata}->{relays_external}->[0]; $lasthop ? $lasthop->{helo} : ''; }, CONTACTADDRESS => sub { my $pms = shift; $pms->{conf}->{report_contact}; }, BAYES => sub { my $pms = shift; defined $pms->{bayes_score} ? sprintf("%3.4f", $pms->{bayes_score}) : "0.5"; }, DATE => sub { Mail::SpamAssassin::Util::time_to_rfc822_date() }, STARS => sub { my $pms = shift; my $arg = (shift || "*"); my $length = int($pms->{score}); $length = 50 if $length > 50; # avoid a perl 5.21 warning: "Negative repeat count does nothing" $length > 0 ? $arg x $length : ''; }, AUTOLEARN => sub { my $pms = shift; $pms->get_autolearn_status(); }, AUTOLEARNSCORE => sub { my $pms = shift; $pms->get_autolearn_points(); }, TESTS => sub { my $pms = shift; my $arg = (shift || ','); join($arg, sort @{$pms->{test_names_hit}}) || "none"; }, SUBTESTS => sub { my $pms = shift; my $arg = (shift || ','); join($arg, sort @{$pms->{subtest_names_hit}}) || "none"; }, SUBTESTSCOLLAPSED => sub { my $pms = shift; my $arg = (shift || ','); my (@subtests) = $pms->get_names_of_subtests_hit("collapsed"); join($arg, sort @subtests) || "none"; }, TESTSSCORES => sub { my $pms = shift; my $arg = (shift || ","); my $scores = $pms->{conf}->{scores}; join($arg, map($_ . "=" . ($scores->{$_} || '0'), sort @{$pms->{test_names_hit}})) || "none"; }, PREVIEW => sub { my $pms = shift; $pms->get_content_preview(); }, REPORT => sub { my $pms = shift; "\n" . ($pms->{tag_data}->{REPORT} || ""); }, SUBJPREFIX => sub { my $pms = shift; ($pms->{tag_data}->{SUBJPREFIX} || ""); }, HEADER => sub { my $pms = shift; my $hdr = shift; return '' if !$hdr; $pms->get($hdr, ''); }, TIMING => sub { my $pms = shift; $pms->{main}->timer_report(); }, ADDEDHEADERHAM => sub { my $pms = shift; $pms->_get_added_headers('headers_ham'); }, ADDEDHEADERSPAM => sub { my $pms = shift; $pms->_get_added_headers('headers_spam'); }, ADDEDHEADER => sub { my $pms = shift; $pms->_get_added_headers( $pms->{is_spam} ? 'headers_spam' : 'headers_ham'); }, ); } my $IP_ADDRESS = IP_ADDRESS; sub new { my $class = shift; $class = ref($class) || $class; my ($main, $msg, $opts) = @_; my $self = { 'main' => $main, 'msg' => $msg, 'score' => 0, 'test_log_msgs' => { }, # deprecated since 4.0, renamed to test_logs to prevent conflicts 'test_logs' => { }, 'test_names_hit' => [ ], 'subtest_names_hit' => [ ], 'spamd_result_log_items' => [ ], 'tests_already_hit' => { }, 'get_cache' => { }, 'tag_data' => { }, 'rule_errors' => 0, 'disable_auto_learning' => 0, 'auto_learn_status' => undef, 'auto_learn_force_status' => undef, 'conf' => $main->{conf}, 'async' => Mail::SpamAssassin::AsyncLoop->new($main), 'master_deadline' => $msg->{master_deadline}, # dflt inherited from msg 'deadline_exceeded' => 0, # time limit exceeded, skipping further tests 'tmpfiles' => { }, 'uri_detail_list' => { }, 'subjprefix' => undef, }; dbg("check: pms new, time limit in %.3f s", $self->{master_deadline} - time) if $self->{master_deadline}; if (defined $opts && $opts->{disable_auto_learning}) { $self->{disable_auto_learning} = 1; } # used with "mass-check --loghits" if ($self->{main}->{save_pattern_hits}) { $self->{save_pattern_hits} = 1; $self->{pattern_hits} = { }; } delete $self->{should_log_rule_hits}; my $dbgcache = would_log('dbg', 'rules'); if ($dbgcache || $self->{save_pattern_hits}) { $self->{should_log_rule_hits} = 1; } # known valid tags that might not get their entry in pms->{tag_data} # in some circumstances my $tag_data_ref = $self->{tag_data}; foreach (qw(SUMMARY REPORT SUBJPREFIX RBL)) { $tag_data_ref->{$_} = '' } foreach (qw(ASN ASNCIDR AWL AWLMEAN AWLCOUNT AWLPRESCORE DCCB DCCR EXTRACTTEXTCHARS EXTRACTTEXTWORDS EXTRACTTEXTTOOLS EXTRACTTEXTTYPES EXTRACTTEXTEXTENSIONS EXTRACTTEXTFLAGS EXTRACTTEXTURIS DCCREP PYZOR DKIMIDENTITY DKIMDOMAIN DKIMSELECTOR BAYESTC BAYESTCLEARNED BAYESTCSPAMMY BAYESTCHAMMY HAMMYTOKENS SPAMMYTOKENS TOKENSUMMARY)) { $tag_data_ref->{$_} = undef; # exist, but undefined } bless ($self, $class); $self; } sub DESTROY { my ($self) = shift; # best practices: prevent potential calls to eval and to system routines # in code of a DESTROY method from clobbering global variables $@ and $! local($@,$!); # keep outer error handling unaffected by DESTROY # Bug 5808 - cleanup tmpfiles foreach my $fn (keys %{$self->{tmpfiles}}) { unlink($fn) or dbg("check: cannot unlink $fn: $!"); } } ########################################################################### =item $status-Echeck () Runs the SpamAssassin rules against the message pointed to by the object. =cut sub check { my ($self) = shift; my $master_deadline = $self->{master_deadline}; if (!$master_deadline) { $self->check_timed(@_); } else { my $t = Mail::SpamAssassin::Timeout->new({ deadline => $master_deadline }); my $err = $t->run(sub { $self->check_timed(@_) }); if (time > $master_deadline && !$self->{deadline_exceeded}) { info("check: exceeded time limit in pms check"); $self->{deadline_exceeded} = 1; } } } sub check_timed { my ($self) = @_; local ($_); $self->{learned_points} = 0; $self->{body_only_points} = 0; $self->{head_only_points} = 0; $self->{score} = 0; # flush any old stale DNS responses $self->{main}->{resolver}->flush_responses(); # clear NetSet cache before every check to prevent it growing too large foreach my $nset_name (qw(internal_networks trusted_networks msa_networks)) { my $netset = $self->{conf}->{$nset_name}; $netset->ditch_cache() if $netset; } $self->{main}->call_plugins ("check_start", { permsgstatus => $self }); # in order of slowness; fastest first, slowest last. # we do ALL the tests, even if a spam triggers lots of them early on. # this lets us see ludicrously spammish mails (score: 40) etc., which # we can then immediately submit to spamblocking services. # # TODO: change this to do welcomelist/blocklists first? probably a plan # NOTE: definitely need AWL stuff last, for regression-to-mean of score # TVD: we may want to do more than just clearing out the headers, but ... $self->{msg}->delete_header('X-Spam-.*'); # Resident Mail::SpamAssassin code will possibly never change score # sets, even if bayes becomes available. So we should do a quick check # to see if we should go from {0,1} to {2,3}. We of course don't need # to do this switch if we're already using bayes ... ;) my $set = $self->{conf}->get_score_set(); if (($set & 2) == 0 && $self->{main}->{bayes_scanner} && $self->{main}->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules}) { dbg("check: scoreset $set but bayes is available, switching scoresets"); $self->{conf}->set_score_set ($set|2); } dbg("check: using scoreset $set in M:S:Pms"); # The primary check functionality occurs via a plugin call. For more # information, please see: Mail::SpamAssassin::Plugin::Check if (!$self->{main}->call_plugins ("check_main", { permsgstatus => $self })) { # did anything happen? if not, this is fatal if (!$self->{main}->have_plugin("check_main")) { die "check: no loaded plugin implements 'check_main': cannot scan!\n". "Check that the necessary '.pre' files are in the config directory.\n". "At a minimum, v320.pre loads the Check plugin which is required.\n"; } } # delete temporary storage and memory allocation used during checking $self->delete_fulltext_tmpfile(); # now that we've finished checking the mail, clear out this cache # to avoid unforeseen side-effects. $self->{get_cache} = { }; # Round the score to 3 decimal places to avoid rounding issues # We assume required_score to be properly rounded already. # add 0 to force it back to numeric representation instead of string. $self->{score} = (sprintf "%0.3f", $self->{score}) + 0; dbg("check: is spam? score=".$self->{score}. " required=".$self->{conf}->{required_score}); dbg("check: tests=".$self->get_names_of_tests_hit()); dbg("check: subtests=".$self->get_names_of_subtests_hit("dbg")); $self->{is_spam} = $self->is_spam(); $self->{main}->{resolver}->bgabort(); $self->{main}->call_plugins ("check_end", { permsgstatus => $self }); 1; } # Called from Check.pm after Plugins check_cleanup calls # Cleanup and finish things before learning/rewrites etc # TODO: document? sub check_cleanup { my ($self) = shift; # Create subjprefix if (defined $self->{subjprefix}) { $self->{tag_data}->{SUBJPREFIX} = $self->{subjprefix}; } # Create reports $self->{tag_data}->{REPORT} = ''; $self->{tag_data}->{SUMMARY} = ''; my $test_logs = $self->{test_logs}; my $scores = $self->{conf}->{scores}; foreach my $rule (@{$self->{test_names_hit}}) { my $score = $scores->{$rule}; my $area = $test_logs->{$rule}->{area} || ''; my $desc = $test_logs->{$rule}->{desc} || ''; if ($score >= 10 || $score <= -10) { $score = sprintf("%4.0f", $score); } else { $score = sprintf("%4.1f", $score); } my $terse = ''; my $long = ''; if (defined $test_logs->{$rule}->{msg}) { my @msgs; if (($self->{conf}->{tflags}->{$rule}||'') =~ /\bnolog\b/) { push(@msgs, '*REDACTED*'); } else { @msgs = @{$test_logs->{$rule}->{msg}}; } local $1; foreach my $msg (@msgs) { while ($msg =~ s/^(.{30,48})\s//) { $terse .= sprintf ("[%s]\n", $1); if (length($1) > 47) { $long .= sprintf ("%78s\n", "[$1]"); } else { $long .= sprintf ("%27s [%s]\n", "", $1); } } $terse .= sprintf ("[%s]\n", $msg); if (length($msg) > 47) { $long .= sprintf ("%78s\n", "[$msg]"); } else { $long .= sprintf ("%27s [%s]\n", "", $msg); } } } $self->{tag_data}->{REPORT} .= sprintf ("* %s %s %s%s\n%s", $score, $rule, $area, $self->_wrap_desc($desc, 4+length($rule)+length($score)+length($area), "* "), ($terse ? "* " . $terse : '')); $self->{tag_data}->{SUMMARY} .= sprintf ("%s %-22s %s%s\n%s", $score, $rule, $area, $self->_wrap_desc($desc, 3+length($rule)+length($score)+length($area), " " x 28), $long); } } ########################################################################### =item $status-Elearn() After a mail message has been checked, this method can be called. If the score is outside a certain range around the threshold, ie. if the message is judged more-or-less definitely spam or definitely non-spam, it will be fed into SpamAssassin's learning systems (currently the naive Bayesian classifier), so that future similar mails will be caught. =cut sub learn { my ($self) = shift; my $master_deadline = $self->{master_deadline}; if (!$master_deadline) { $self->learn_timed(@_); } else { my $t = Mail::SpamAssassin::Timeout->new({ deadline => $master_deadline }); my $err = $t->run(sub { $self->learn_timed(@_) }); if (time > $master_deadline && !$self->{deadline_exceeded}) { info("learn: exceeded time limit in pms learn"); $self->{deadline_exceeded} = 1; } } } sub learn_timed { my ($self) = @_; if (!$self->{conf}->{bayes_auto_learn} || !$self->{conf}->{use_bayes} || $self->{disable_auto_learning}) { $self->{auto_learn_status} = "disabled"; return; } my ($isspam, $force_autolearn, $force_autolearn_names, $arrayref); $arrayref = $self->{main}->call_plugins ("autolearn_discriminator", { permsgstatus => $self }); $isspam = $arrayref->[0]; $force_autolearn = $arrayref->[1]; $force_autolearn_names = $arrayref->[2]; #AUTOLEARN_FORCE FLAG INFORMATION if (defined $force_autolearn and $force_autolearn > 0) { $self->{auto_learn_force_status} = "yes"; if (defined $force_autolearn_names) { $self->{auto_learn_force_status} .= " ($force_autolearn_names)"; } } else { $self->{auto_learn_force_status} = "no"; } if (!defined $isspam) { $self->{auto_learn_status} = 'no'; return; } my $timer = $self->{main}->time_method("learn"); $self->{main}->call_plugins ("autolearn", { permsgstatus => $self, isspam => $isspam }); # bug 3704: temporarily override learn's ability to re-learn a message my $orig_learner = $self->{main}->init_learner({ "no_relearn" => 1 }); my $eval_stat; eval { my $learnstatus = $self->{main}->learn ($self->{msg}, undef, $isspam, 0); if ($learnstatus->did_learn()) { $self->{auto_learn_status} = $isspam ? "spam" : "ham"; } # This must wait until the did_learn call. $learnstatus->finish(); $self->{main}->finish_learner(); # for now if (exists $self->{main}->{bayes_scanner}) { $self->{main}->{bayes_scanner}->force_close(); } 1; } or do { $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; }; # reset learner options to their original values $self->{main}->init_learner($orig_learner); if (defined $eval_stat) { dbg("learn: auto-learning failed: $eval_stat"); $self->{auto_learn_status} = "failed"; } } =item $score = $status-Eget_autolearn_points() Return the message's score as computed for auto-learning. Certain tests are ignored: - rules with tflags set to 'learn' (the Bayesian rules) - rules with tflags set to 'userconf' (user welcome/block-listing rules, etc) - rules with tflags set to 'noautolearn' Also note that auto-learning occurs using scores from either scoreset 0 or 1, depending on what scoreset is used during message check. It is likely that the message check and auto-learn scores will be different. =cut sub get_autolearn_points { my ($self) = @_; $self->_get_autolearn_points(); return $self->{autolearn_points}; } =item $score = $status-Eget_head_only_points() Return the message's score as computed for auto-learning, ignoring all rules except for header-based ones. =cut sub get_head_only_points { my ($self) = @_; $self->_get_autolearn_points(); return $self->{head_only_points}; } =item $score = $status-Eget_learned_points() Return the message's score as computed for auto-learning, ignoring all rules except for learning-based ones. =cut sub get_learned_points { my ($self) = @_; $self->_get_autolearn_points(); return $self->{learned_points}; } =item $score = $status-Eget_body_only_points() Return the message's score as computed for auto-learning, ignoring all rules except for body-based ones. =cut sub get_body_only_points { my ($self) = @_; $self->_get_autolearn_points(); return $self->{body_only_points}; } =item $score = $status-Eget_autolearn_force_status() Return whether a message's score included any rules that are flagged as autolearn_force. =cut sub get_autolearn_force_status { my ($self) = @_; $self->_get_autolearn_points(); return $self->{autolearn_force}; } =item $rule_names = $status-Eget_autolearn_force_names() Return a list of comma separated list of rule names if a message's score included any rules that are flagged as autolearn_force. =cut sub get_autolearn_force_names { my ($self) = @_; my ($names); $self->_get_autolearn_points(); $names = $self->{autolearn_force_names}; if (defined $names) { #remove trailing comma $names =~ s/,$//; } else { $names = ""; } return $names; } sub _get_autolearn_testtype { my ($self, $test) = @_; return '' unless defined $test; return 'head' if $test == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS || $test == $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS; return 'body' if $test == $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS || $test == $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS || $test == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS || $test == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_EVALS || $test == $Mail::SpamAssassin::Conf::TYPE_URI_TESTS || $test == $Mail::SpamAssassin::Conf::TYPE_URI_EVALS; return 'meta' if $test == $Mail::SpamAssassin::Conf::TYPE_META_TESTS; return ''; } sub _get_autolearn_points { my ($self) = @_; return if (exists $self->{autolearn_points}); # ensure it only gets computed once, even if we return early $self->{autolearn_points} = 0; my $conf = $self->{conf}; # This function needs to use use sum($score[scoreset % 2]) not just {score}. # otherwise we shift what we autolearn on and it gets really weird. - tvd my $orig_scoreset = $conf->get_score_set(); my $new_scoreset = $orig_scoreset; my $scores = $conf->{scores}; if (($orig_scoreset & 2) == 0) { # we don't need to recompute dbg("learn: auto-learn: currently using scoreset $orig_scoreset"); } else { $new_scoreset = $orig_scoreset & ~2; dbg("learn: auto-learn: currently using scoreset $orig_scoreset, recomputing score based on scoreset $new_scoreset"); $scores = $conf->{scoreset}->[$new_scoreset]; } my $tflags = $conf->{tflags}; my $points = 0; # Just in case this function is called multiple times, clear out the # previous calculated values $self->{learned_points} = 0; $self->{body_only_points} = 0; $self->{head_only_points} = 0; $self->{autolearn_force} = 0; foreach my $test (@{$self->{test_names_hit}}) { my $force_type = ''; # According to the documentation, noautolearn, userconf, and learn # rules are ignored for autolearning. if (exists $tflags->{$test}) { next if $tflags->{$test} =~ /\bnoautolearn\b/; next if $tflags->{$test} =~ /\buserconf\b/; # Keep track of the learn points for an additional autolearn check. # Use the original scoreset since it'll be 0 in sets 0 and 1. if ($tflags->{$test} =~ /\blearn\b/) { # we're guaranteed that the score will be defined $self->{learned_points} += $conf->{scoreset}->[$orig_scoreset]->{$test}; next; } #IF ANY RULES ARE AUTOLEARN FORCE, SET THAT FLAG if ($tflags->{$test} =~ /\bautolearn_force\b/) { $self->{autolearn_force}++; #ADD RULE NAME TO LIST $self->{autolearn_force_names}.="$test,"; } # Bug 7907 local $1; if ($tflags->{$test} =~ /\bautolearn_(body|header)\b/) { $force_type = $1; } } # ignore tests with 0 score (or undefined) in this scoreset next if !$scores->{$test}; # Go ahead and add points to the proper locations # Changed logic because in testing, I was getting both head and body. Bug 5503 # Cleanup logic, Bug 7905/7906 my $type = $self->_get_autolearn_testtype($conf->{test_types}->{$test}); if ($force_type eq 'header' || ($force_type eq '' && $type eq 'head')) { $self->{head_only_points} += $scores->{$test}; dbg("learn: auto-learn: adding header points $scores->{$test} ($test)"); } elsif ($force_type eq 'body' || ($force_type eq '' && $type eq 'body')) { $self->{body_only_points} += $scores->{$test}; dbg("learn: auto-learn: adding body points $scores->{$test} ($test)"); } elsif ($type eq 'meta') { if ($conf->{meta_dependencies}->{$test}) { my $dep_head = 0; my $dep_body = 0; foreach my $deptest (@{$conf->{meta_dependencies}->{$test}}) { my $deptype = $self->_get_autolearn_testtype($conf->{test_types}->{$deptest}); if ($deptype eq 'head') { $dep_head++; } elsif ($deptype eq 'body') { $dep_body++; } } if ($dep_head || $dep_body) { my $dep_total = $dep_head + $dep_body; my $p_head = sprintf "%0.3f", $scores->{$test} * ($dep_head / $dep_total); my $p_body = sprintf "%0.3f", $scores->{$test} * ($dep_body / $dep_total); $self->{head_only_points} += $p_head; $self->{body_only_points} += $p_body; dbg("learn: auto-learn: adding $p_head header and $p_body body points, $dep_head/$dep_body ratio ($test)"); } else { dbg("learn: auto-learn: not considered as header or body points, no header/body deps ($test)"); } } else { dbg("learn: auto-learn: not considered as header or body points, no meta deps ($test)"); } } else { dbg("learn: auto-learn: not considered as header or body points, ignored ruletype ($test)"); } $points += $scores->{$test}; } # Figure out the final value we'll use for autolearning $points = (sprintf "%0.3f", $points) + 0; dbg("learn: auto-learn: message score: ".$self->{score}.", computed score for autolearn: $points"); $self->{autolearn_points} = $points; } ########################################################################### =item $isspam = $status-Eis_spam () After a mail message has been checked, this method can be called. It will return 1 for mail determined likely to be spam, 0 if it does not seem spam-like. =cut sub is_spam { my ($self) = @_; # changed to test this so sub-tests can ask "is_spam" during a run return ($self->{score} >= $self->{conf}->{required_score}); } ########################################################################### =item $list = $status-Eget_names_of_tests_hit () After a mail message has been checked, this method can be called. It will return a comma-separated string, listing all the symbolic test names of the tests which were triggered by the mail. =cut sub get_names_of_tests_hit { my ($self) = @_; return join(',', sort @{$self->{test_names_hit}}); } =item $list = $status-Eget_names_of_tests_hit_with_scores_hash () After a mail message has been checked, this method can be called. It will return a pointer to a hash for rule & score pairs for all the symbolic test names and individual scores of the tests which were triggered by the mail. =cut sub get_names_of_tests_hit_with_scores_hash { my ($self) = @_; #BASED ON CODE FOR TESTSSCORES TAG my $scores = $self->{conf}->{scores}; my %testsscores; $testsscores{$_} = $scores->{$_} || '0' for @{$self->{test_names_hit}}; return \%testsscores; } =item $list = $status-Eget_names_of_tests_hit_with_scores () After a mail message has been checked, this method can be called. It will return a comma-separated string of rule=score pairs for all the symbolic test names and individual scores of the tests which were triggered by the mail. =cut sub get_names_of_tests_hit_with_scores { my ($self) = @_; #BASED ON CODE FOR TESTSSCORES TAG my $scores = $self->{conf}->{scores}; return join(',', map($_ . '=' . ($scores->{$_} || '0'), sort @{$self->{test_names_hit}})) || "none"; } ########################################################################### =item $list = $status-Eget_names_of_subtests_hit () After a mail message has been checked, this method can be called. It will return a comma-separated string, listing all the symbolic test names of the meta-rule sub-tests which were triggered by the mail. Sub-tests are the normally-hidden rules, which score 0 and have names beginning with two underscores, used in meta rules. If a parameter of collapsed or dbg is passed, the output will be a condensed array of sub-tests with multiple hits reduced to one entry. If the parameter of dbg is passed, the output will be a condensed string of sub-tests with multiple hits reduced to one entry with the number of hits in parentheses. Some information is also added at the end regarding the multiple hits. =cut sub get_names_of_subtests_hit { my ($self, $mode) = @_; if (defined $mode && ($mode eq 'dbg' || $mode eq 'collapsed')) { # This routine prints only one instance of a subrule hit with a count of how many times it hit if greater than 1 my $total_hits = scalar(@{$self->{subtest_names_hit}}); return '' if !$total_hits; my %subtest_names_hit; $subtest_names_hit{$_}++ foreach @{$self->{subtest_names_hit}}; my @subtests = sort keys %subtest_names_hit; my $deduplicated_hits = scalar(@subtests); my @result; foreach my $rule (@subtests) { if ($subtest_names_hit{$rule} > 1) { push @result, "$rule($subtest_names_hit{$rule})"; } else { push @result, $rule; } } if ($mode eq 'dbg') { return join(',', @result)." (Total Subtest Hits: $total_hits / Deduplicated Total Hits: $deduplicated_hits)"; } else { return join(',', @result); } } else { # Return the simpler string with duplicates and commas return join(',', sort @{$self->{subtest_names_hit}}); } } ########################################################################### =item $num = $status-Eget_score () After a mail message has been checked, this method can be called. It will return the message's score. =cut sub get_score { my ($self) = @_; return $self->{score}; } # left as backward compatibility sub get_hits { my ($self) = @_; return $self->{score}; } ########################################################################### =item $num = $status-Eget_required_score () After a mail message has been checked, this method can be called. It will return the score required for a mail to be considered spam. =cut sub get_required_score { my ($self) = @_; return $self->{conf}->{required_score}; } # left as backward compatibility sub get_required_hits { my ($self) = @_; return $self->{conf}->{required_score}; } ########################################################################### =item $num = $status-Eget_autolearn_status () After a mail message has been checked, this method can be called. It will return one of the following strings depending on whether the mail was auto-learned or not: "ham", "no", "spam", "disabled", "failed", "unavailable". It also returns is flagged with auto_learn_force, it will also include the status and the rules hit. For example: "autolearn_force=yes (AUTOLEARNTEST_BODY)" =cut sub get_autolearn_status { my ($self) = @_; my ($status) = $self->{auto_learn_status} || "unavailable"; if (defined $self->{auto_learn_force_status}) { $status .= " autolearn_force=".$self->{auto_learn_force_status}; } return $status; } ########################################################################### =item $report = $status-Eget_report () Deliver a "spam report" on the checked mail message. This contains details of how many spam detection rules it triggered. The report is returned as a multi-line string, with the lines separated by C<\n> characters. =cut sub get_report { my ($self) = @_; if (!exists $self->{'report'}) { my $report; my $timer = $self->{main}->time_method("get_report"); $report = $self->{conf}->{report_template}; $report ||= '(no report template found)'; $report = $self->_replace_tags($report); $report =~ s/\n*$/\n\n/s; $self->{report} = $report; } return $self->{report}; } ########################################################################### =item $preview = $status-Eget_content_preview () Give a "preview" of the content. This is returned as a multi-line string, with the lines separated by C<\n> characters, containing a fully-decoded, safe, plain-text sample of the first few lines of the message body. =cut sub get_content_preview { my ($self) = @_; my $str = ''; my @ary = @{$self->get_decoded_stripped_body_text_array()}; shift @ary; # drop the subject line my $numlines = 3; while (length ($str) < 200 && @ary && $numlines-- > 0) { $str .= shift @ary; } # in case the last line was huge, trim it back to around 200 chars local $1; $str =~ s/^(.{200}).+$/$1 [...]/gm; chomp ($str); $str .= "\n"; # now, some tidy-ups that make things look a bit prettier $str =~ s/-----Original Message-----.*$//gm; $str =~ s/This is a multi-part message in MIME format\.//gs; $str =~ s/[-_*.]{10,}//gs; $str =~ s/\s+/ /gs; # add "Content preview:" ourselves, so that the text aligns # correctly with the template -- then trim it off. We don't # have to get this *exactly* right, but it's nicer if we # make a bit of an effort ;) $str = Mail::SpamAssassin::Util::wrap($str, " ", "Content preview: ", 75, 1); $str =~ s/^Content preview:\s+//gs; return $str; } ########################################################################### =item $msg = $status-Eget_message() Return the object representing the message being scanned. =cut sub get_message { my ($self) = @_; return $self->{msg}; } ########################################################################### =item $status-Erewrite_mail () Rewrite the mail message. This will at minimum add headers, and at maximum MIME-encapsulate the message text, to reflect its spam or not-spam status. The function will return a scalar of the rewritten message. The actual modifications depend on the configuration (see C for more information). The possible modifications are as follows: =over 4 =item To:, From: and Subject: modification on spam mails Depending on the configuration, the To: and From: lines can have a user-defined RFC 2822 comment appended for spam mail. The subject line may have a user-defined string prepended to it for spam mail. =item X-Spam-* headers for all mails Depending on the configuration, zero or more headers with names beginning with C will be added to mail depending on whether it is spam or ham. =item spam message with report_safe If report_safe is set to true (1), then spam messages are encapsulated into their own message/rfc822 MIME attachment without any modifications being made. If report_safe is set to false (0), then the message will only have the above headers added/modified. =back =cut sub rewrite_mail { my ($self) = @_; my $timer = $self->{main}->time_method("rewrite_mail"); my $msg = $self->{msg}->get_mbox_separator() || ''; if ($self->{is_spam} && $self->{conf}->{report_safe}) { $msg .= $self->rewrite_report_safe(); } else { $msg .= $self->rewrite_no_report_safe(); } return $msg; } # Make the line endings in the passed string reference appropriate # for the original mail. Callers must note bug 5250: don't rewrite # the message body, since that will corrupt 8bit attachments/MIME parts. # sub _fixup_report_line_endings { my ($self, $strref) = @_; if ($self->{msg}->{line_ending} ne "\n") { $$strref =~ s/\r?\n/$self->{msg}->{line_ending}/gs; } } sub _get_added_headers { my ($self, $which) = @_; my $str = ''; # use string appends to put this back together -- I finally benchmarked it. # join() is 56% of the speed of just using string appends. ;) foreach my $hf_ref (@{$self->{conf}->{$which}}) { my($hfname, $hfbody) = @$hf_ref; my $line = $self->_process_header($hfname,$hfbody); $line = $self->mime_encode_header($line); $str .= "X-Spam-$hfname: $line\n"; } return $str; }; # rewrite the message in report_safe mode # should not be called directly, use rewrite_mail instead # sub rewrite_report_safe { my ($self) = @_; my $tag; # This is the original message. We do not want to make any modifications so # we may recover it if necessary. It will be put into the new message as a # message/rfc822 MIME part. my $original = $self->{msg}->get_pristine(); # This is the new message. my $newmsg = ''; # the character set of a report my $report_charset = $self->{conf}->{report_charset} || "UTF-8"; # the SpamAssassin report my $report = $self->get_report(); if (!utf8::is_utf8($report)) { # already in octets } else { # encode to octets if (uc $report_charset eq 'UTF-8') { dbg("check: encoding report to $report_charset"); utf8::encode($report); # very fast } else { dbg("check: encoding report to $report_charset. Slow, to be avoided!"); $report = Encode::encode($report_charset, $report); # slow } } # get original headers, "pristine" if we can do it my $from = $self->{msg}->get_pristine_header("From"); my $to = $self->{msg}->get_pristine_header("To"); my $cc = $self->{msg}->get_pristine_header("Cc"); my $subject = $self->{msg}->get_pristine_header("Subject"); my $msgid = $self->{msg}->get_pristine_header('Message-Id'); my $date = $self->{msg}->get_pristine_header("Date"); # It'd be nice to do this with a foreach loop, but with only three # possibilities right now, it's easier not to... if (defined $self->{conf}->{rewrite_header}->{Subject}) { # Add a prefix to the subject if needed $subject = "\n" if !defined $subject; if (defined $self->{subjprefix}) { $tag = $self->_replace_tags($self->{subjprefix}); $tag =~ s/\n/ /gs; $subject = $tag . $subject; } # Add a **SPAM** prefix $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{Subject}); $tag =~ s/\n/ /gs; # strip tag's newlines $subject =~ s/^(?:\Q${tag}\E )?/${tag} /g; # For some reason the tag may already be there!? } if (defined $self->{conf}->{rewrite_header}->{To}) { $to = "\n" if !defined $to; my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{To}); $tag =~ s/\n/ /gs; # strip tag's newlines $to =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/; } if (defined $self->{conf}->{rewrite_header}->{From}) { $from = "\n" if !defined $from; my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{From}); $tag =~ s/\n+//gs; # strip tag's newlines $from =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/; } # add report headers to message $newmsg .= "From: $from" if defined $from; $newmsg .= "To: $to" if defined $to; $newmsg .= "Cc: $cc" if defined $cc; $newmsg .= "Subject: $subject" if defined $subject; $newmsg .= "Date: $date" if defined $date; $newmsg .= "Message-Id: $msgid" if defined $msgid; $newmsg .= $self->_get_added_headers('headers_spam'); if (defined $self->{conf}->{report_safe_copy_headers}) { my %already_added = map { $_ => 1 } qw/from to cc subject date message-id/; foreach my $hdr (@{$self->{conf}->{report_safe_copy_headers}}) { next if exists $already_added{lc $hdr}; my @hdrtext = $self->{msg}->get_pristine_header($hdr); $already_added{lc $hdr}++; if (lc $hdr eq "received") { # add Received at the top ... my $rhdr = ""; foreach (@hdrtext) { $rhdr .= "$hdr: $_"; } $newmsg = "$rhdr$newmsg"; } else { foreach (@hdrtext) { $newmsg .= "$hdr: $_"; } } } } # jm: add a SpamAssassin Received header to note markup time etc. # emulates the fetchmail style. # tvd: do this after report_safe_copy_headers so Received will be done correctly $newmsg = "Received: from localhost by " . Mail::SpamAssassin::Util::fq_hostname() . "\n" . "\twith SpamAssassin (version " . Mail::SpamAssassin::Version() . ");\n" . "\t" . Mail::SpamAssassin::Util::time_to_rfc822_date() . "\n" . $newmsg; # MIME boundary my $boundary = "----------=_" . sprintf("%08X.%08X",time,int(rand(2 ** 32))); # ensure it's unique, so we can't be attacked this way while ($original =~ /^\Q${boundary}\E(?:--)?$/m) { $boundary .= "/".sprintf("%08X",int(rand(2 ** 32))); } # determine whether Content-Disposition should be "attachment" or "inline" my $disposition; my $ct = $self->{msg}->get_header("Content-Type"); if (defined $ct && $ct ne '' && $ct !~ m{text/plain}i) { $disposition = "attachment"; $report .= $self->_replace_tags($self->{conf}->{unsafe_report_template}); # if we wanted to defang the attachment, this would be the place } else { $disposition = "inline"; } my $type = "message/rfc822"; $type = "text/plain" if $self->{conf}->{report_safe} > 1; my $description = $self->{conf}->{'encapsulated_content_description'}; # Note: the message should end in blank line since mbox format wants # blank line at end and messages may be concatenated! In addition, the # x-spam-type parameter is fixed since we will use it later to recognize # original messages that can be extracted. $newmsg .= <<"EOM"; MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="$boundary" This is a multi-part message in MIME format. --$boundary Content-Type: text/plain; charset=$report_charset Content-Disposition: inline Content-Transfer-Encoding: 8bit $report --$boundary Content-Type: $type; x-spam-type=original Content-Description: $description Content-Disposition: $disposition Content-Transfer-Encoding: 8bit EOM my $newmsgtrailer = "\n--$boundary--\n\n"; # now fix line endings in both headers, report_safe body parts, # and new MIME boundaries and structure $self->_fixup_report_line_endings(\$newmsg); $self->_fixup_report_line_endings(\$newmsgtrailer); $newmsg .= $original.$newmsgtrailer; return $newmsg; } # rewrite the message in non-report_safe mode (just headers) # should not be called directly, use rewrite_mail instead # sub rewrite_no_report_safe { my ($self) = @_; my $ntag; my $pref_subject = 0; # put the pristine headers into an array # skip the X-Spam- headers, but allow the X-Spam-Prev headers to remain. # since there may be a missing header/body # my @pristine_headers = split(/^/m, $self->{msg}->get_pristine_header()); for (my $line = 0; $line <= $#pristine_headers; $line++) { next unless ($pristine_headers[$line] =~ /^X-Spam-(?!Prev-)/i); splice @pristine_headers, $line, 1 while ($pristine_headers[$line] =~ /^(?:X-Spam-(?!Prev-)|[ \t])/i); $line--; } my $separator = ''; if (@pristine_headers && $pristine_headers[$#pristine_headers] =~ /^\s*$/) { $separator = pop @pristine_headers; } my $addition = 'headers_ham'; if($self->{is_spam}) { # special-case: Subject lines. ensure one exists, if we're # supposed to mark it up. my $created_subject = 0; my $subject = $self->{msg}->get_pristine_header('Subject'); if (!defined($subject) && $self->{is_spam} && exists $self->{conf}->{rewrite_header}->{'Subject'}) { push(@pristine_headers, "Subject: \n"); $created_subject = 1; } # Deal with header rewriting foreach (@pristine_headers) { # if we're not going to do a rewrite, skip this header! next if (!/^(From|Subject|To):/i); my $hdr = ucfirst(lc($1)); next if (!defined $self->{conf}->{rewrite_header}->{$hdr}); # pop the original version onto the end of the header array if ($created_subject) { push(@pristine_headers, "X-Spam-Prev-Subject: (nonexistent)\n"); } else { push(@pristine_headers, "X-Spam-Prev-$_"); } # Figure out the rewrite piece my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{$hdr}); $tag =~ s/\n/ /gs; # The tag should be a comment for this header ... $tag = "($tag)" if ($hdr =~ /^(?:From|To)$/); if (defined $self->{subjprefix}) { $ntag = $self->_replace_tags($self->{subjprefix}); $ntag =~ s/\n/ /gs; $ntag =~ s/\s+$//; local $1; s/^([^:]+:)[ \t]*(?:\Q${ntag}\E )?/$1 ${ntag} /i; } s/^([^:]+:)[ \t]*(?:\Q${tag}\E )?/$1 ${tag} /i; } $addition = 'headers_spam'; } else { # special-case: Subject lines. ensure one exists, if we're # supposed to mark it up. my $created_subject = 0; my $subject = $self->{msg}->get_pristine_header('Subject'); if (!defined($subject) && exists $self->{conf}->{rewrite_header}->{'Subject'}) { push(@pristine_headers, "Subject: \n"); $created_subject = 1; } # Deal with header rewriting foreach (@pristine_headers) { # if we're not going to do a rewrite, skip this header! next if (!/^(Subject):/i); my $hdr = ucfirst(lc($1)); next if (!defined $self->{conf}->{rewrite_header}->{$hdr}); if (defined $self->{subjprefix}) { $ntag = $self->_replace_tags($self->{subjprefix}); $ntag =~ s/\n/ /gs; $ntag =~ s/\s+$//; local $1; s/^([^:]+:)[ \t]*(?:\Q${ntag}\E )?/$1 ${ntag} /i; } } } # Break the pristine header set into two blocks; $new_hdrs_pre is the stuff # that we want to ensure comes before any SpamAssassin markup headers, # like the Return-Path header (see bug 3409). # # all the rest of the message headers (as left in @pristine_headers), is # to be placed after the SpamAssassin markup hdrs. Once one of those headers # is seen, all further headers go into that set; it's assumed that it's an # old copy of the header, or attempted spoofing, if it crops up halfway # through the headers. my $new_hdrs_pre = ''; if (@pristine_headers && $pristine_headers[0] =~ /^Return-Path:/i) { $new_hdrs_pre .= shift(@pristine_headers); while (@pristine_headers && $pristine_headers[0] =~ /^[ \t]/) { $new_hdrs_pre .= shift(@pristine_headers); } } $new_hdrs_pre .= $self->_get_added_headers($addition); # fix up line endings appropriately my $newmsg = $new_hdrs_pre . join('',@pristine_headers) . $separator; $self->_fixup_report_line_endings(\$newmsg); return $newmsg.$self->{msg}->get_pristine_body(); } # encode a header field body into ASCII as per RFC 2047 # sub mime_encode_header { my ($self, $text) = @_; utf8::encode($text) if utf8::is_utf8($text); my $result = ''; for my $line (split(/^/, $text)) { if ($line =~ /^[\x09\x20-\x7E]*\r?\n\z/s) { $result .= $line; # no need for encoding } else { my $prefix = ''; my $suffix = ''; local $1; if ($line =~ s/( (?: ^ | [ \t] ) [\x09\x20-\x7E]* (?: \r?\n )? ) \z//xs) { $suffix = $1; } elsif ($line =~ s/(\r?\n)\z//s) { $suffix = $1; } if ($line =~ s/^ ( [\x09\x20-\x7E]* (?: [ \t] | \z ) )//xs) { $prefix = $1; } if ($line eq '') { $result .= $prefix . $suffix; } else { my $qp_enc_count = $line =~ tr/=?_\x00-\x1F\x7F-\xFF//; if (length($line) + $qp_enc_count*2 <= 4 * int(length($line)+2)/3) { # RFC 2047: Upper case should be used for hex digits A through F $line =~ s{ ( [=?_\x00-\x20\x7F-\xFF] ) } { $1 eq ' ' ? '_' : sprintf("=%02X", ord $1) }xges; $result .= $prefix . '=?UTF-8?Q?' . $line; } else { $result .= $prefix . '=?UTF-8?B?' . base64_encode($line); } $result .= '?=' . $suffix; } } } dbg("markup: mime_encode_header: %s", $result); return $result; } sub _process_header { my ($self, $hdr_name, $hdr_data) = @_; $hdr_data = $self->_replace_tags($hdr_data); # as octets $hdr_data =~ s/(?:\r?\n)+$//; # make sure there are no trailing newlines ... if ($self->{conf}->{fold_headers}) { if ($hdr_data =~ /\n/) { $hdr_data =~ s/\s*\n\s*/\n\t/g; return $hdr_data; } else { # use '!!' instead of ': ' so it doesn't wrap on the space my $hdr = "X-Spam-$hdr_name!!$hdr_data"; $hdr = Mail::SpamAssassin::Util::wrap($hdr, "\t", "", 79, 0, '(?<=[\s,])'); # make sure there are no blank lines in headers # buggy wrap might not prefix blank lines with \t, so use \s* (bug 7672) $hdr =~ s/^\s*\n//gm; return (split (/!!/, $hdr, 2))[1]; # just return the data part } } else { $hdr_data =~ s/\n/ /g; # Can't have newlines in headers, unless folded return $hdr_data; } } sub _replace_tags { my $self = shift; my $text = shift; # default to leaving the original string in place, if we cannot find # a tag for it (bug 4793) local($1); $text =~ s{_([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)*(?:\(.*?\))?)_}{ my $tag = $1; my $result; if ($tag =~ /^ADDEDHEADER(?:HAM|SPAM|)\z/) { # Bug 6278: break infinite recursion through _get_added_headers and # _get_tag on an attempt to use such tag in add_header template } else { $result = $self->get_tag_raw($tag); if (!ref $result) { utf8::encode($result) if utf8::is_utf8($result); } elsif (ref $result eq 'ARRAY') { my @values = @$result; # avoid modifying referenced array for (@values) { utf8::encode($_) if utf8::is_utf8($_) } $result = join(' ', @values); } } defined $result ? $result : "_${tag}_"; }ge; return $text; } ########################################################################### # public API for plugins =item $status-Eaction_depends_on_tags($tags, $code, @args) Enqueue the supplied subroutine reference C<$code>, to become runnable when all the specified tags become available. The C<$tags> may be a simple scalar - a tag name, or a listref of tag names. The subroutine C<&$code> when called will be passed a C object as its first argument, followed by the supplied (optional) list C<@args> . =cut sub action_depends_on_tags { my($self, $tags, $code, @args) = @_; ref $code eq 'CODE' or die "action_depends_on_tags: argument must be a subroutine ref"; # tag names on which the given action depends my @dep_tags = !ref $tags ? $tags : @$tags; # uppercase tag, but not args, f.e. HEADER(foo) local($1,$2); foreach (@dep_tags) { if (/^ ([^\(]+) (\(.*)? $/x) { $_ = uc($1).(defined $2 ? $2 : ''); } } # list dependency tag names which are not already satisfied my @blocking_tags; foreach (@dep_tags) { my $data = $self->get_tag($_); if (!defined $data || $data eq '') { push @blocking_tags, $_; } } if (!@blocking_tags) { dbg("check: tagrun - tag %s was ready, runnable immediately: %s", join(', ',@dep_tags), join(', ',$code,@args)); &$code($self, @args); } else { # @{$self->{tagrun_subs}} list of all submitted subroutines # @{$self->{tagrun_actions}{$tag}} bitmask of action indices blocked by tag # $self->{tagrun_tagscnt}[$action_ind] count of tags still pending # store action details, obtain its index push(@{$self->{tagrun_subs}}, [$code,@args]); my $action_ind = $#{$self->{tagrun_subs}}; $self->{tagrun_tagscnt}[$action_ind] = scalar @blocking_tags; $self->{tagrun_actions}{$_}[$action_ind] = 1 for @blocking_tags; dbg("check: tagrun - action %s blocking on tags %s", $action_ind, join(', ',@blocking_tags)); } } # tag_is_ready() will be called by set_tag(), indicating that a given # tag just received its value, possibly unblocking an action routine # as declared by action_depends_on_tags(). # # Well-behaving plugins should call set_tag() once when a tag is fully # assembled and ready. Multiple calls to set the same tag value are handled # gracefully, but may result in premature activation of a pending action. # Setting tag values by plugins should not be done directly but only through # the public API set_tag(), otherwise a pending action release may be missed. # sub tag_is_ready { my($self, $tag) = @_; $tag = uc $tag; if (would_log('dbg', 'check')) { my $tag_val = $self->{tag_data}->{$tag}; dbg("check: tagrun - tag %s is now ready, value: %s", $tag, !defined $tag_val ? '' : ref $tag_val ne 'ARRAY' ? $tag_val : 'ARY:[' . join(',',@$tag_val) . ']' ); } if (ref $self->{tagrun_actions}{$tag}) { # any action blocking on this tag? my $action_ind = 0; foreach my $action_pending (@{$self->{tagrun_actions}{$tag}}) { if ($action_pending) { $self->{tagrun_actions}{$tag}[$action_ind] = 0; if ($self->{tagrun_tagscnt}[$action_ind] <= 0) { # should not happen, warn and ignore warn "tagrun error: count for $action_ind is ". $self->{tagrun_tagscnt}[$action_ind]."\n"; } elsif (! --($self->{tagrun_tagscnt}[$action_ind])) { my($code,@args) = @{$self->{tagrun_subs}[$action_ind]}; dbg("check: tagrun - tag %s unblocking the action %s: %s", $tag, $action_ind, join(', ',$code,@args)); &$code($self, @args); } } $action_ind++; } } } # debugging aid: show actions that are still pending, waiting for their # tags to receive a value # sub report_unsatisfied_actions { my($self) = @_; my @tags; @tags = keys %{$self->{tagrun_actions}} if ref $self->{tagrun_actions}; for my $tag (@tags) { my @pending_actions = grep($self->{tagrun_actions}{$tag}[$_], (0 .. $#{$self->{tagrun_actions}{$tag}})); dbg("check: tagrun - tag %s is still blocking action %s", $tag, join(', ', @pending_actions)) if @pending_actions; } } =item $status-Eset_tag($tagname, $value) Set a template tag, as used in C, report templates, etc. This API is intended for use by plugins. Tag names will be converted to an all-uppercase representation internally. Tag names must consist only of [A-Z0-9_] characters and must not contain consecutive underscores. Also the name must not start or end in an underscore, as that is the template tagging format. C<$value> can be a simple scalar (string or number), or a reference to an array, in which case the public method get_tag will join array elements using a space as a separator, returning a single string for backward compatibility. C<$value> can also be a subroutine reference, which will be evaluated each time the template is expanded. The first argument passed by get_tag to a called subroutine will be a PerMsgStatus object (this module's object), followed by optional arguments provided by a caller to get_tag. Note that perl supports closures, which means that variables set in the caller's scope can be accessed inside this C. For example: my $text = "hello world!"; $status->set_tag("FOO", sub { my $pms = shift; return $text; }); See C's C