mirror of
https://git.proxmox.com/git/proxmox-spamassassin
synced 2025-08-06 17:52:00 +00:00
220 lines
5.9 KiB
Perl
Executable File
220 lines
5.9 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
#
|
|
# <@LICENSE>
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to you under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at:
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# </@LICENSE>
|
|
|
|
use strict;
|
|
|
|
sub usage {
|
|
die "
|
|
parse-rules-for-masses: parse the SpamAssassin rules files for mass-checks,
|
|
evolving, and frequency analysis
|
|
|
|
usage: ./parse-rules-for-masses [-d rulesdir] [-o outputfile] [-s scoreset] [-x]
|
|
|
|
rulesdir defaults to ../rules
|
|
outputfile defaults to ./tmp/rules.pl
|
|
scoreset default to 0
|
|
-x do not include test rules files (ie 70_*)
|
|
";
|
|
}
|
|
|
|
use Getopt::Long;
|
|
use Data::Dumper;
|
|
|
|
our (@rulesdirs, $outputfile, $scoreset, $skip_test_rules);
|
|
GetOptions (
|
|
"d=s" => \@rulesdirs,
|
|
"o=s" => \$outputfile,
|
|
"s=i" => \$scoreset,
|
|
"x" => \$skip_test_rules,
|
|
"help|h|?" => sub { usage(); } );
|
|
|
|
if ($#rulesdirs < 0) {
|
|
@rulesdirs = ("../rules");
|
|
}
|
|
|
|
if (!defined $outputfile) {
|
|
$outputfile = "./tmp/rules.pl";
|
|
mkdir ("tmp", 0755);
|
|
}
|
|
|
|
$scoreset = 0 if ( !defined $scoreset );
|
|
|
|
my $rules = { };
|
|
$rules->{_scoreset} = $scoreset;
|
|
readrules(@rulesdirs);
|
|
|
|
my $scores = { };
|
|
foreach my $key (keys %{$rules}) {
|
|
next if $key eq '_scoreset';
|
|
$scores->{$key} = $rules->{$key}->{score};
|
|
}
|
|
|
|
writerules($outputfile);
|
|
exit;
|
|
|
|
sub readrules {
|
|
foreach my $indir (@_) {
|
|
my @files = <$indir/*.cf>;
|
|
|
|
my $file;
|
|
my $scores_mutable = 1;
|
|
my %rulesfound = ();
|
|
my %langs = ();
|
|
foreach $file (sort @files) {
|
|
$scores_mutable = 1; # always start off mutable in each file
|
|
if ($skip_test_rules) {
|
|
next if ($file =~ /70_/);
|
|
}
|
|
open (IN, "<$file");
|
|
while (<IN>)
|
|
{
|
|
# these appear in comments, so deal with them before comment stripping
|
|
# takes place
|
|
if (/<\/gen:mutable>/i) {
|
|
$scores_mutable = 0;
|
|
}
|
|
elsif (/<gen:mutable>/i) {
|
|
$scores_mutable = 1;
|
|
}
|
|
|
|
s/#.*$//g; s/^\s+//; s/\s+$//; next if /^$/;
|
|
|
|
# TODO: this could be overwriting stuff
|
|
my $lang = '';
|
|
if (s/^lang\s+(\S+)\s+//) {
|
|
$lang = $1;
|
|
}
|
|
|
|
if (/^(header|rawbody|body|full|uri|askdns|meta|mimeheader|reuse)\s+(\S+)\s+(.*)$/) {
|
|
my $type = $1;
|
|
my $name = $2;
|
|
my $val = $3;
|
|
|
|
if (exists $rules->{$name}->{type} && $type eq 'reuse') {
|
|
# "reuse" should be skipped if we already have a rule
|
|
next;
|
|
}
|
|
|
|
$rules->{$name} ||= { };
|
|
$rules->{$name}->{type} = $type;
|
|
$rules->{$name}->{lang} = $lang;
|
|
$rules->{$name}->{issubrule} = ($name =~ /^__/) ? '1' : '0';
|
|
$rules->{$name}->{tflags} = '';
|
|
$rules->{$name}->{eval} = ($val =~ /\beval:(\w+)/) ? $1 : '0';
|
|
if ($type eq "meta") {
|
|
my @depends = grep { !/^\d+$/ } ($val =~ m/(\w+)/g);
|
|
push(@{ $rules->{$name}->{depends} }, @depends);
|
|
}
|
|
$rules->{$name}->{code} = $val;
|
|
|
|
} elsif (/^describe\s+(\S+)\s+(.+)$/) {
|
|
$rules->{$1} ||= { };
|
|
if ($lang) {
|
|
$rules->{$1}->{describe} ||= $2;
|
|
}
|
|
else {
|
|
$rules->{$1}->{describe} = $2;
|
|
}
|
|
} elsif (/^tflags\s+(\S+)\s+(.+)$/) {
|
|
$rules->{$1} ||= { };
|
|
$rules->{$1}->{tflags} = $2;
|
|
|
|
} elsif (/^score\s+(\S+)\s+(.+)$/) {
|
|
my($name,$score) = ($1,$2);
|
|
$rules->{$name} ||= { };
|
|
if ( $score =~ /\s/ ) { # there are multiple scores
|
|
($score) = (split(/\s+/,$score))[$scoreset];
|
|
}
|
|
$rules->{$name}->{score} = $score;
|
|
$rules->{$name}->{mutable} = $scores_mutable;
|
|
|
|
}
|
|
}
|
|
close IN;
|
|
}
|
|
}
|
|
|
|
foreach my $rule (keys %{$rules}) {
|
|
next if ($rule eq '_scoreset');
|
|
if (!defined $rules->{$rule}->{type}) {
|
|
delete $rules->{$rule}; # no rule definition -> no rule
|
|
next;
|
|
}
|
|
|
|
my $tflags = $rules->{$rule}->{tflags};
|
|
if (!defined $rules->{$rule}->{score}) {
|
|
my $def = 1.0;
|
|
if ($rule =~ /^T_/) {
|
|
$def = 0.01;
|
|
}
|
|
|
|
if ($tflags =~ /\bnice\b/) {
|
|
$rules->{$rule}->{score} = -$def;
|
|
} else {
|
|
$rules->{$rule}->{score} = $def;
|
|
}
|
|
$rules->{$rule}->{no_score_found} = 1;
|
|
}
|
|
|
|
# ignore net rules in set 0 or set 2
|
|
if ($tflags =~ /\bnet\b/ && ($scoreset & 1) == 0) {
|
|
$rules->{$rule}->{mutable} = 0;
|
|
$rules->{$rule}->{score} = 0;
|
|
}
|
|
|
|
# ignore bayes rules in set 0 or set 2
|
|
if ($tflags =~ /\blearn\b/ && ($scoreset & 2) == 0) {
|
|
$rules->{$rule}->{mutable} = 0;
|
|
$rules->{$rule}->{score} = 0;
|
|
}
|
|
|
|
# if a rule didn't have a score specified, assume it's
|
|
# mutable
|
|
if (!defined $rules->{$rule}->{mutable}) {
|
|
$rules->{$rule}->{mutable} = 1;
|
|
}
|
|
|
|
# although T_ test rules are clamped to 0.01. this works well
|
|
# for release mass-checks, at least
|
|
if ($rule =~ /^T_/) {
|
|
$rules->{$rule}->{mutable} = 0;
|
|
} elsif ($rule eq 'AWL') { # ignore entirely
|
|
$rules->{$rule}->{mutable} = 0;
|
|
$rules->{$rule}->{score} = 0;
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
sub writerules {
|
|
my $outfile = shift;
|
|
# quick hack to create the tmp directory
|
|
system ("mkdir -p $outfile 2>/dev/null ; rmdir $outfile 2>/dev/null");
|
|
|
|
open (OUT, ">$outfile") or die "cannot write to $outfile";
|
|
print OUT "# dumped at ".`date`."\n";
|
|
|
|
$Data::Dumper::Purity = 1;
|
|
print OUT Data::Dumper->Dump ([$rules, $scores], ['*rules', '*scores']);
|
|
|
|
print OUT "1;";
|
|
close OUT;
|
|
}
|
|
|