proxmox-spamassassin/upstream/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
Stoiko Ivanov ae52237fd8 update SpamAssassin to 4.0.0
generated by make update-upstream

Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
2023-03-13 21:13:17 +01:00

321 lines
9.1 KiB
Perl

# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
=head1 NAME
MIMEHeader - perform regexp tests against MIME headers
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::MIMEHeader
mimeheader NAME_OF_RULE Content-Id =~ /foo/
=head1 DESCRIPTION
This plugin allows regexp rules to be written against MIME headers in the
message.
=head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
=over 4
=item mimeheader NAME_OF_RULE Header-Name =~ /pattern/modifiers
Specify a rule. C<NAME_OF_RULE> is the name of the rule to be used,
C<Header-Name> is the name of the MIME header to check, and
C</pattern/modifiers> is the Perl regular expression to match against this.
Note that in a message of multiple parts, each header will be checked
against the pattern separately. In other words, if multiple parts
have a 'Content-Type' header, each header's value will be tested
individually as a separate string.
Header names are considered case-insensitive.
The header values are normally cleaned up a little; for example, whitespace
around the newline character in "folded" headers will be replaced with a single
space. Append C<:raw> to the header name to retrieve the raw, undecoded value,
including pristine whitespace, instead.
=item tflags NAME_OF_RULE range=x-y
Match only from specific MIME parts, indexed in the order they are parsed.
Part 1 = main message headers. Part 2 = next part etc.
range=1 (match only main headers, not any subparts)
range=2- (match any subparts, but not the main headers)
range=-3 (match only first three parts, including main headers)
range=2-3 (match only first two subparts)
=item tflags NAME_OF_RULE concat
Concatenate all headers from all mime parts (possible range applied) into a
single string for matching. This allows matching headers across multiple
parts with single regex. Normally pattern is tested individually for
different mime parts.
=back
=cut
package Mail::SpamAssassin::Plugin::MIMEHeader;
use strict;
use warnings;
# use bytes;
use re 'taint';
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Conf;
use Mail::SpamAssassin::Logger;
use Mail::SpamAssassin::Util qw(untaint_var compile_regexp);
use Mail::SpamAssassin::Constants qw(:sa);
our @ISA = qw(Mail::SpamAssassin::Plugin);
our @TEMPORARY_METHODS;
my $RULENAME_RE = RULENAME_RE;
# ---------------------------------------------------------------------------
# constructor
sub new {
my $class = shift;
my $samain = shift;
# some boilerplate...
$class = ref($class) || $class;
my $self = $class->SUPER::new($samain);
bless ($self, $class);
$self->set_config($samain->{conf});
return $self;
}
# ---------------------------------------------------------------------------
sub set_config {
my($self, $conf) = @_;
my @cmds;
my $pluginobj = $self; # allow use inside the closure below
push (@cmds, {
setting => 'mimeheader',
is_priv => 1,
code => sub {
my ($self, $key, $value, $line) = @_;
local ($1,$2,$3);
if ($value !~ s/^(${RULENAME_RE})\s+//) {
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
}
my $rulename = untaint_var($1);
if ($value eq '') {
return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
}
# Take :raw to hdrname!
if ($value !~ /^([^:\s]+(?:\:(?:raw)?)?)\s*([=!]~)\s*(.+)$/) {
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
}
my $hdrname = $1;
my $negated = $2 eq '!~' ? 1 : 0;
my $pattern = $3;
$hdrname =~ s/:$//;
my $if_unset = '';
if ($pattern =~ s/\s+\[if-unset:\s+(.+)\]$//) {
$if_unset = $1;
}
my ($rec, $err) = compile_regexp($pattern, 1);
if (!$rec) {
info("mimeheader: invalid regexp for $rulename '$pattern': $err");
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
}
$self->{mimeheader_tests}->{$rulename} = {
hdr => $hdrname,
negated => $negated,
if_unset => $if_unset,
pattern => $rec
};
# now here's a hack; generate a fake eval rule function to
# call this rule's _real_ code!
# TODO: we should have a more elegant way for new rule types to
# be defined
my $evalfn = "_mimeheader_eval_$rulename";
# don't redefine the subroutine if it already exists!
# this causes lots of annoying warnings and such during things like
# "make test".
return if (defined &{'Mail::SpamAssassin::Plugin::MIMEHeader::'.$evalfn});
$self->{parser}->add_test($rulename, $evalfn."()",
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
# Support named regex captures
$self->{parser}->parse_captures($rulename, $rec);
# evalfn/rulename safe, sanitized by $RULENAME_RE
my $evalcode = '
sub Mail::SpamAssassin::Plugin::MIMEHeader::'.$evalfn.' {
$_[0]->eval_hook_called($_[1], q{'.$rulename.'});
}
';
eval
$evalcode . '; 1'
or do {
my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
warn "mimeheader: plugin error: $eval_stat\n";
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
};
$pluginobj->register_eval_rule($evalfn);
push @TEMPORARY_METHODS, "Mail::SpamAssassin::Plugin::MIMEHeader::${evalfn}";
}
});
$conf->{parser}->register_commands(\@cmds);
}
# ---------------------------------------------------------------------------
sub eval_hook_called {
my ($pobj, $pms, $rulename) = @_;
my $conf = $pms->{conf};
my $rule = $conf->{mimeheader_tests}->{$rulename};
my $hdr = $rule->{hdr};
my $negated = $rule->{negated};
my $pattern = $rule->{pattern};
my $tflags = $conf->{tflags}->{$rulename}||'';
my $getraw = 0;
if ($hdr =~ s/:raw$//) {
$getraw = 1;
}
my $range_min = 0;
my $range_max = 1000;
if ($tflags =~ /(?:^|\s)range=(\d+)?(-)?(\d+)?(?:\s|$)/) {
if (defined $1 && defined $2 && defined $3) {
$range_min = $1;
$range_max = $3;
}
elsif (defined $1 && defined $2) {
$range_min = $1;
}
elsif (defined $2 && defined $3) {
$range_max = $3;
}
elsif (defined $1) {
$range_min = $range_max = $1;
}
}
my $multiple = $tflags =~ /\bmultiple\b/;
my $concat = $tflags =~ /\bconcat\b/;
my $maxhits = $tflags =~ /\bmaxhits=(\d+)\b/ ? $1 :
$multiple ? 1000 : 1;
my $cval = '';
my $idx = 0;
foreach my $p ($pms->{msg}->find_parts(qr/./)) {
$idx++;
last if $idx > $range_max;
next if $idx < $range_min;
my $val;
if ($hdr eq 'ALL') {
$val = $p->get_all_headers($getraw, 0);
} elsif ($getraw) {
$val = $p->raw_header($hdr);
} else {
$val = $p->get_header($hdr);
}
$val = $rule->{if_unset} if !defined $val;
if ($concat) {
$val .= "\n" unless $val =~ /\n$/;
$cval .= $val;
next;
}
if (_check($pms, $rulename, $val, $pattern, $negated, $maxhits, "part $idx")) {
return 0;
}
}
if ($concat) {
if (_check($pms, $rulename, $cval, $pattern, $negated, $maxhits, 'concat')) {
return 0;
}
}
if ($negated) {
dbg("mimeheader: ran rule $rulename ======> got hit: \"<negative match>\"");
return 1;
}
return 0;
}
sub _check {
my ($pms, $rulename, $value, $pattern, $negated, $maxhits, $desc) = @_;
my $hits = 0;
my %captures;
while ($value =~ /$pattern/gp) {
last if $negated;
if (%-) {
foreach my $cname (keys %-) {
push @{$captures{$cname}}, grep { $_ ne "" } @{$-{$cname}};
}
}
my $match = defined ${^MATCH} ? ${^MATCH} : "<negative match>";
$pms->got_hit($rulename, '', ruletype => 'eval');
dbg("mimeheader: ran rule $rulename ======> got hit: \"$match\" ($desc)");
last if ++$hits >= $maxhits;
}
$pms->set_captures(\%captures) if %captures;
return $hits;
}
# ---------------------------------------------------------------------------
sub finish_tests {
my ($self, $params) = @_;
foreach my $method (@TEMPORARY_METHODS) {
undef &{$method};
}
@TEMPORARY_METHODS = (); # clear for next time
}
# ---------------------------------------------------------------------------
sub has_all_header { 1 } # Supports ALL header query (Bug 5582)
sub has_tflags_range { 1 } # Supports tflags range=x-y
sub has_tflags_concat { 1 } # Supports tflags concat
sub has_tflags_multiple { 1 } # Supports tflags multiple
sub has_capture_rules { 1 } # Supports named regex captures (Bug 7992)
1;