mirror of
https://git.proxmox.com/git/proxmox-spamassassin
synced 2025-04-28 16:01:29 +00:00
555 lines
16 KiB
Perl
555 lines
16 KiB
Perl
# <@LICENSE>
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to you under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at:
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# </@LICENSE>
|
|
|
|
=head1 NAME
|
|
|
|
URILocalBL - blocklist URIs using local information (ISP names, address lists, and country codes)
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
This plugin creates some new rule test types, such as "uri_block_cc",
|
|
"uri_block_cidr", and "uri_block_isp". These rules apply to the URIs
|
|
found in the HTML portion of a message, i.e. E<lt>a href=...E<gt> markup.
|
|
|
|
loadplugin Mail::SpamAssassin::Plugin::URILocalBL
|
|
|
|
Why local blocklisting? There are a few excellent, effective, and
|
|
well-maintained DNSBL's out there. But they have several drawbacks:
|
|
|
|
=over 2
|
|
|
|
=item * blocklists can cover tens of thousands of entries, and you can't select which ones you use;
|
|
|
|
=item * verifying that it's correctly configured can be non-trivial;
|
|
|
|
=item * new blocklisting entries may take a while to be detected and entered, so it's not instantaneous.
|
|
|
|
=back
|
|
|
|
Sometimes all you want is a quick, easy, and very surgical blocklisting of
|
|
a particular site or a particular ISP. This plugin is defined for that
|
|
exact usage case.
|
|
|
|
=head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
|
|
|
|
The format for defining a rule is as follows:
|
|
|
|
uri_block_cc SYMBOLIC_TEST_NAME cc1 cc2 cc3 cc4 ..
|
|
uri_block_cc SYMBOLIC_TEST_NAME !cc1 !cc2 ..
|
|
|
|
or:
|
|
|
|
uri_block_cont SYMBOLIC_TEST_NAME co1 co2 co3 co4 ..
|
|
uri_block_cont SYMBOLIC_TEST_NAME !co1 !co2 ..
|
|
|
|
or:
|
|
|
|
uri_block_cidr SYMBOLIC_TEST_NAME a.a.a.a b.b.b.b/cc
|
|
|
|
or:
|
|
|
|
uri_block_isp SYMBOLIC_TEST_NAME "Data Rancid" McCarrier Phishers-r-Us
|
|
|
|
Example rule for matching a URI in China:
|
|
|
|
uri_block_cc TEST1 cn
|
|
|
|
If you specify list of negations, such rule will match ANY country except
|
|
the listed ones (Finland, Sweden):
|
|
|
|
uri_block_cc TEST1 !fi !se
|
|
|
|
Continents uri_block_cont works exactly the same as uri_block_cc.
|
|
|
|
This would block the URL http://www.baidu.com/index.htm. Similarly, to
|
|
match a Spam-haven netblock:
|
|
|
|
uri_block_cidr TEST2 65.181.64.0/18
|
|
|
|
would match a netblock where several phishing sites were recently hosted.
|
|
|
|
And to block all CIDR blocks registered to an ISP, one might use:
|
|
|
|
uri_block_isp TEST3 "Data Rancid" ColoCrossing
|
|
|
|
Quote ISP names containing spaces.
|
|
|
|
Lastly, if there's a country that you want to block but there's an explicit
|
|
host you wish to exempt from that blocklist, you can use:
|
|
|
|
uri_block_exclude TEST1 www.baidu.com
|
|
|
|
if you wish to exempt URL's referring to this host. The same syntax is
|
|
applicable to CIDR and ISP blocks as well.
|
|
|
|
=head1 DEPENDENCIES
|
|
|
|
The Country-Code based filtering can use any Mail::SpamAssassin::GeoDB
|
|
supported module like MaxMind::DB::Reader (GeoIP2) or Geo::IP. ISP based
|
|
filtering might require a paid subscription database like GeoIPISP.
|
|
|
|
=cut
|
|
|
|
package Mail::SpamAssassin::Plugin::URILocalBL;
|
|
use Mail::SpamAssassin::Plugin;
|
|
use Mail::SpamAssassin::Constants qw(:ip :sa);
|
|
use Mail::SpamAssassin::Util qw(untaint_var idn_to_ascii);
|
|
use Mail::SpamAssassin::NetSet;
|
|
|
|
use Socket;
|
|
|
|
use strict;
|
|
use warnings;
|
|
# use bytes;
|
|
use re 'taint';
|
|
|
|
our @ISA = qw(Mail::SpamAssassin::Plugin);
|
|
|
|
sub dbg { my $msg = shift; Mail::SpamAssassin::Plugin::dbg ("URILocalBL: $msg", @_); }
|
|
|
|
my $IP_ADDRESS = IP_ADDRESS;
|
|
my $RULENAME_RE = RULENAME_RE;
|
|
|
|
# constructor
|
|
sub new {
|
|
my $class = shift;
|
|
my $mailsaobject = shift;
|
|
|
|
# some boilerplate...
|
|
$class = ref($class) || $class;
|
|
my $self = $class->SUPER::new($mailsaobject);
|
|
bless ($self, $class);
|
|
|
|
$self->register_eval_rule("check_uri_local_bl");
|
|
$self->set_config($mailsaobject->{conf});
|
|
|
|
# we need GeoDB country/isp
|
|
$self->{main}->{geodb_wanted}->{country} = 1;
|
|
$self->{main}->{geodb_wanted}->{isp} = 1;
|
|
|
|
return $self;
|
|
}
|
|
|
|
sub set_config {
|
|
my ($self, $conf) = @_;
|
|
my @cmds;
|
|
|
|
push (@cmds, {
|
|
setting => 'uri_block_cc',
|
|
type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
|
|
is_priv => 1,
|
|
code => sub {
|
|
my ($self, $key, $value, $line) = @_;
|
|
|
|
if ($value !~ /^(${RULENAME_RE})\s+(.+?)\s*$/) {
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
my $name = $1;
|
|
my $args = $2;
|
|
my @added;
|
|
|
|
foreach my $cc (split(/\s+/, uc($args))) {
|
|
# this should match all country codes including satellite providers
|
|
if ($cc =~ /^((\!)?([a-z][a-z0-9]))$/i) {
|
|
if (defined $2) {
|
|
$self->{urilocalbl}->{$name}{countries_neg} = 1;
|
|
$self->{urilocalbl}->{$name}{countries}{$3} = 0;
|
|
} else {
|
|
$self->{urilocalbl}->{$name}{countries}{$3} = 1;
|
|
}
|
|
push @added, $1;
|
|
} else {
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
}
|
|
|
|
my %checkneg = map { $_ => 1 } values %{$self->{urilocalbl}->{$name}{countries}};
|
|
if (scalar keys %checkneg > 1) {
|
|
dbg("config: uri_block_cc $name failed: trying to combine negations and non-negations");
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
|
|
dbg("config: uri_block_cc $name added: ".join(' ', @added));
|
|
$self->{parser}->add_test($name, 'check_uri_local_bl()',
|
|
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
|
|
$self->{parser}->{conf}->{priority}->{$name} = -100;
|
|
}
|
|
});
|
|
|
|
push (@cmds, {
|
|
setting => 'uri_block_cont',
|
|
type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
|
|
is_priv => 1,
|
|
code => sub {
|
|
my ($self, $key, $value, $line) = @_;
|
|
|
|
if ($value !~ /^(${RULENAME_RE})\s+(.+?)\s*$/) {
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
my $name = $1;
|
|
my $args = $2;
|
|
my @added;
|
|
|
|
foreach my $cc (split(/\s+/, uc($args))) {
|
|
# this should match all continent codes
|
|
if ($cc =~ /^((\!)?([a-z]{2}))$/i) {
|
|
if (defined $2) {
|
|
$self->{urilocalbl}->{$name}{continents_neg} = 1;
|
|
$self->{urilocalbl}->{$name}{continents}{$3} = 0;
|
|
} else {
|
|
$self->{urilocalbl}->{$name}{continents}{$3} = 1;
|
|
}
|
|
push @added, $1;
|
|
} else {
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
}
|
|
|
|
my %checkneg = map { $_ => 1 } values %{$self->{urilocalbl}->{$name}{continents}};
|
|
if (scalar keys %checkneg > 1) {
|
|
dbg("config: uri_block_cont $name failed: trying to combine negations and non-negations");
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
|
|
dbg("config: uri_block_cont $name added: ".join(' ', @added));
|
|
$self->{parser}->add_test($name, 'check_uri_local_bl()',
|
|
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
|
|
$self->{parser}->{conf}->{priority}->{$name} = -100;
|
|
}
|
|
});
|
|
|
|
push (@cmds, {
|
|
setting => 'uri_block_isp',
|
|
type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
|
|
is_priv => 1,
|
|
code => sub {
|
|
my ($self, $key, $value, $line) = @_;
|
|
|
|
if ($value !~ /^(${RULENAME_RE})\s+(.+?)\s*$/) {
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
my $name = $1;
|
|
my $args = $2;
|
|
my @added;
|
|
|
|
# gather up possibly quoted strings
|
|
while ($args =~ /("[^"]*"|(?<!")\S+(?!"))/g) {
|
|
my $isp = $1;
|
|
$isp =~ s/"//g;
|
|
my $ispkey = uc($isp); $ispkey =~ s/\s+//gs;
|
|
$self->{urilocalbl}->{$name}{isps}{$ispkey} = $isp;
|
|
push @added, "\"$isp\"";
|
|
}
|
|
|
|
if (!defined $self->{urilocalbl}->{$name}{isps}) {
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
|
|
dbg("config: uri_block_isp $name added: ". join(', ', @added));
|
|
$self->{parser}->add_test($name, 'check_uri_local_bl()',
|
|
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
|
|
$self->{parser}->{conf}->{priority}->{$name} = -100;
|
|
}
|
|
});
|
|
|
|
push (@cmds, {
|
|
setting => 'uri_block_cidr',
|
|
type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
|
|
is_priv => 1,
|
|
code => sub {
|
|
my ($self, $key, $value, $line) = @_;
|
|
|
|
if ($value !~ /^(${RULENAME_RE})\s+(.+?)\s*$/) {
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
my $name = $1;
|
|
my $args = $2;
|
|
|
|
foreach my $addr (split(/\s+/, $args)) {
|
|
if ($addr =~ m!^$IP_ADDRESS(?:/\d{1,3})?$!o) {
|
|
$self->{urilocalbl}->{$name}{cidr}{$addr} = 1;
|
|
} else {
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
}
|
|
|
|
$self->{parser}->add_test($name, 'check_uri_local_bl()',
|
|
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
|
|
$self->{parser}->{conf}->{priority}->{$name} = -100;
|
|
}
|
|
});
|
|
|
|
push (@cmds, {
|
|
setting => 'uri_block_exclude',
|
|
type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
|
|
is_priv => 1,
|
|
code => sub {
|
|
my ($self, $key, $value, $line) = @_;
|
|
|
|
if ($value !~ /^(${RULENAME_RE})\s+(.+?)\s*$/) {
|
|
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
|
}
|
|
my $name = $1;
|
|
my $args = $2;
|
|
|
|
foreach my $arg (split(/\s+/, $args)) {
|
|
$self->{urilocalbl}->{$name}{exclusions}{lc($arg)} = 1;
|
|
}
|
|
|
|
$self->{parser}->add_test($name, 'check_uri_local_bl()',
|
|
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
|
|
$self->{parser}->{conf}->{priority}->{$name} = -100;
|
|
}
|
|
});
|
|
|
|
$conf->{parser}->register_commands(\@cmds);
|
|
}
|
|
|
|
sub finish_parsing_end {
|
|
my ($self, $opts) = @_;
|
|
|
|
my $conf = $opts->{conf};
|
|
|
|
# compile cidrs now
|
|
foreach my $rulename (keys %{$conf->{urilocalbl}}) {
|
|
my $ruleconf = $conf->{urilocalbl}->{$rulename};
|
|
next if defined $ruleconf->{netset};
|
|
next if !defined $ruleconf->{cidr};
|
|
my $netset = Mail::SpamAssassin::NetSet->new($rulename);
|
|
foreach my $addr (keys %{$ruleconf->{cidr}}) {
|
|
if ($netset->add_cidr($addr)) {
|
|
dbg("config: uri_block_cidr $rulename added: $addr");
|
|
} else {
|
|
dbg("config: uri_block_cidr $rulename add failed: $addr");
|
|
}
|
|
}
|
|
if ($netset->get_num_nets()) {
|
|
$ruleconf->{netset} = $netset;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub check_uri_local_bl {
|
|
my ($self, $pms) = @_;
|
|
|
|
return 0 if $self->{urilocalbl_disabled};
|
|
|
|
if (!$self->{main}->{geodb} ||
|
|
(!$self->{main}->{geodb}->can('country') &&
|
|
!$self->{main}->{geodb}->can('isp'))) {
|
|
dbg("plugin disabled, GeoDB country/isp not available");
|
|
$self->{urilocalbl_disabled} = 1;
|
|
return 0;
|
|
}
|
|
|
|
my $rulename = $pms->get_current_eval_rule_name();
|
|
my $ruleconf = $pms->{conf}->{urilocalbl}->{$rulename};
|
|
|
|
dbg("running $rulename");
|
|
|
|
my %found_hosts;
|
|
|
|
foreach my $info (values %{$pms->get_uri_detail_list()}) {
|
|
next unless $info->{hosts};
|
|
|
|
# look for W3 links only
|
|
next unless defined $info->{types}->{a} || defined $info->{types}->{parsed};
|
|
|
|
my %hosts = %{$info->{hosts}}; # evade hash reset by copy
|
|
while (my($host, $domain) = each %hosts) {
|
|
if (defined $ruleconf->{exclusions}{lc($domain)}) {
|
|
dbg("excluded $host, domain $domain matches");
|
|
next;
|
|
}
|
|
elsif ($host =~ IS_IP_ADDRESS) {
|
|
if ($self->_check_host($pms, $rulename, $host, [$host])) {
|
|
# if hit, rule is done
|
|
return 0;
|
|
}
|
|
} else {
|
|
# do host lookups only after all IPs are checked, since they
|
|
# don't need resolving..
|
|
$found_hosts{$host} = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0 unless %found_hosts;
|
|
|
|
# bail out now if dns not available
|
|
return 0 if !$pms->is_dns_available();
|
|
|
|
my $queries;
|
|
foreach my $host (keys %found_hosts) {
|
|
$host = idn_to_ascii($host);
|
|
dbg("launching A/AAAA lookup for $host");
|
|
# launch dns
|
|
my $ret = $pms->{async}->bgsend_and_start_lookup($host, 'A', undef,
|
|
{ rulename => $rulename, host => $host, type => 'URILocalBL' },
|
|
sub { my($ent, $pkt) = @_; $self->_finish_lookup($pms, $ent, $pkt); },
|
|
master_deadline => $pms->{master_deadline}
|
|
);
|
|
$queries++ if defined $ret;
|
|
# also IPv6 if database supports
|
|
if ($self->{main}->{geodb}->can('country_v6')) {
|
|
$ret = $pms->{async}->bgsend_and_start_lookup($host, 'AAAA', undef,
|
|
{ rulename => $rulename, host => $host, type => 'URILocalBL' },
|
|
sub { my($ent, $pkt) = @_; $self->_finish_lookup($pms, $ent, $pkt); },
|
|
master_deadline => $pms->{master_deadline}
|
|
);
|
|
$queries++ if defined $ret;
|
|
}
|
|
}
|
|
|
|
return 0 if !$queries; # no query started
|
|
return; # return undef for async status
|
|
}
|
|
|
|
sub _finish_lookup {
|
|
my ($self, $pms, $ent, $pkt) = @_;
|
|
|
|
my $rulename = $ent->{rulename};
|
|
my $host = $ent->{host};
|
|
|
|
# Skip duplicate A / AAAA matches
|
|
return if $pms->{urilocalbl_finished}->{$rulename};
|
|
|
|
if (!$pkt) {
|
|
# $pkt will be undef if the DNS query was aborted (e.g. timed out)
|
|
dbg("host lookup failed: $rulename $host");
|
|
return;
|
|
}
|
|
|
|
$pms->rule_ready($rulename); # mark rule ready for metas
|
|
|
|
my @answer = $pkt->answer;
|
|
my @addrs;
|
|
foreach my $rr (@answer) {
|
|
if ($rr->type eq 'A' || $rr->type eq 'AAAA') {
|
|
push @addrs, $rr->address;
|
|
}
|
|
}
|
|
|
|
if (@addrs) {
|
|
if ($self->_check_host($pms, $rulename, $host, \@addrs)) {
|
|
$pms->{urilocalbl_finished}->{$rulename} = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub _check_host {
|
|
my ($self, $pms, $rulename, $host, $addrs) = @_;
|
|
|
|
my $ruleconf = $pms->{conf}->{urilocalbl}->{$rulename};
|
|
my $geodb = $self->{main}->{geodb};
|
|
|
|
if ($host ne $addrs->[0]) {
|
|
dbg("resolved $host: ".join(', ', @$addrs));
|
|
}
|
|
|
|
foreach my $ip (@$addrs) {
|
|
if (defined $ruleconf->{exclusions}{$ip}) {
|
|
dbg("excluded $host, IP $ip matches");
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (defined $ruleconf->{countries}) {
|
|
my $neg = defined $ruleconf->{countries_neg};
|
|
my $testcc = join(' ', sort keys %{$ruleconf->{countries}});
|
|
if ($neg) {
|
|
dbg("checking $host for any country except: $testcc");
|
|
} else {
|
|
dbg("checking $host for countries: $testcc");
|
|
}
|
|
foreach my $ip (@$addrs) {
|
|
my $cc = $geodb->get_country($ip);
|
|
if ( (!$neg && defined $ruleconf->{countries}{$cc}) ||
|
|
($neg && !defined $ruleconf->{countries}{$cc}) ) {
|
|
dbg("$host ($ip) country $cc - HIT");
|
|
$pms->test_log("Host: $host in country $cc", $rulename);
|
|
$pms->got_hit($rulename, "");
|
|
return 1;
|
|
} else {
|
|
dbg("$host ($ip) country $cc - ".($neg ? "excluded" : "no match"));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (defined $ruleconf->{continents}) {
|
|
my $neg = defined $ruleconf->{continents_neg};
|
|
my $testcont = join(' ', sort keys %{$ruleconf->{continents}});
|
|
if ($neg) {
|
|
dbg("checking $host for any continent except: $testcont");
|
|
} else {
|
|
dbg("checking $host for continents: $testcont");
|
|
}
|
|
foreach my $ip (@$addrs) {
|
|
my $cc = $geodb->get_continent($ip);
|
|
if ( (!$neg && defined $ruleconf->{continents}{$cc}) ||
|
|
($neg && !defined $ruleconf->{continents}{$cc}) ) {
|
|
dbg("$host ($ip) continent $cc - HIT");
|
|
$pms->test_log("Host: $host in continent $cc", $rulename);
|
|
$pms->got_hit($rulename, "");
|
|
return 1;
|
|
} else {
|
|
dbg("$host ($ip) continent $cc - ".($neg ? "excluded" : "no match"));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (defined $ruleconf->{isps}) {
|
|
if ($geodb->can('isp')) {
|
|
my $testisp = join(', ', map {"\"$_\""} sort values %{$ruleconf->{isps}});
|
|
dbg("checking $host for isps: $testisp");
|
|
|
|
foreach my $ip (@$addrs) {
|
|
my $isp = $geodb->get_isp($ip);
|
|
next unless defined $isp;
|
|
my $ispkey = uc($isp); $ispkey =~ s/\s+//gs;
|
|
if (defined $ruleconf->{isps}{$ispkey}) {
|
|
dbg("$host ($ip) isp \"$isp\" - HIT");
|
|
$pms->test_log("Host: $host in isp $isp", $rulename);
|
|
$pms->got_hit($rulename, "");
|
|
return 1;
|
|
} else {
|
|
dbg("$host ($ip) isp $isp - no match");
|
|
}
|
|
}
|
|
} else {
|
|
dbg("skipping ISP check, GeoDB database not loaded");
|
|
}
|
|
}
|
|
|
|
if (defined $ruleconf->{netset}) {
|
|
foreach my $ip (@$addrs) {
|
|
if ($ruleconf->{netset}->contains_ip($ip)) {
|
|
dbg("$host ($ip) matches cidr - HIT");
|
|
$pms->test_log("Host: $host in cidr", $rulename);
|
|
$pms->got_hit($rulename, "");
|
|
return 1;
|
|
} else {
|
|
dbg("$host ($ip) not matching cidr");
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
1;
|