mirror of
https://git.proxmox.com/git/proxmox-spamassassin
synced 2025-08-16 17:17:39 +00:00
1278 lines
36 KiB
Perl
Executable File
1278 lines
36 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
#
|
|
# build/mkrules -- compile the SpamAssassin rules into installable form
|
|
#
|
|
# <@LICENSE>
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to you under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at:
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# </@LICENSE>
|
|
|
|
# This is an implementation of
|
|
# http://wiki.apache.org/spamassassin/RulesProjPromotion
|
|
|
|
sub usage {
|
|
die "build/mkrules [--src srcdir] [--exit_on_no_src] [--out outputdir]\n";
|
|
}
|
|
|
|
my $RULE_DEFINE_KEYWORDS_RE = qr{
|
|
header|rawbody|body|full|uri
|
|
|meta|mimeheader|urirhssub|uridnsbl
|
|
}x;
|
|
|
|
my $RULE_KEYWORDS_RE = qr{
|
|
${RULE_DEFINE_KEYWORDS_RE}|
|
|
describe|tflags|reuse|score
|
|
}x;
|
|
|
|
my $fail_message = "";
|
|
|
|
use strict;
|
|
use File::Find;
|
|
use File::Copy;
|
|
use File::Basename;
|
|
use Getopt::Long;
|
|
|
|
# use SpamAssassin classes directly, so we can lint rules
|
|
# as we go
|
|
use lib 'lib';
|
|
use Mail::SpamAssassin;
|
|
|
|
our ( @opt_srcs, $opt_out, $opt_sandboxout, $opt_manifest,
|
|
$opt_manifestskip, $opt_listpromotable, $opt_active,
|
|
$opt_activeout, $default_file_header,
|
|
$opt_rulemetadata, $opt_exit_on_no_src);
|
|
|
|
GetOptions("src=s" => \@opt_srcs,
|
|
"out=s",
|
|
"sandboxout=s",
|
|
"activeout=s",
|
|
"active=s",
|
|
"manifest=s",
|
|
"manifestskip=s",
|
|
"rulemetadata=s",
|
|
"exit_on_no_src",
|
|
);
|
|
|
|
if (!@opt_srcs) {
|
|
foreach ( 'rulescode', 'rulesrc' ) {
|
|
if (-d $_) {
|
|
# print "using default src $_\n";
|
|
push(@opt_srcs, $_);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!$opt_manifest && -f "MANIFEST") {
|
|
$opt_manifest = "MANIFEST";
|
|
}
|
|
|
|
if (!$opt_manifestskip && -f "MANIFEST.SKIP") {
|
|
$opt_manifestskip = "MANIFEST.SKIP";
|
|
}
|
|
|
|
if (!$opt_active && -f "rules/active.list") {
|
|
$opt_active = "rules/active.list";
|
|
}
|
|
|
|
if ($opt_exit_on_no_src) {
|
|
my $foundone = 0;
|
|
foreach my $src (@opt_srcs) {
|
|
if (-d $src) { $foundone++; last; }
|
|
}
|
|
|
|
if (!$foundone) {
|
|
print "no source directory found: exiting\n";
|
|
exit 0;
|
|
}
|
|
}
|
|
# else
|
|
die "no src" unless (@opt_srcs >= 1);
|
|
|
|
my $promolist;
|
|
|
|
die "no out" unless ($opt_out);
|
|
die "unreadable out" unless (-d $opt_out);
|
|
die "unreadable active" unless (-f $opt_active);
|
|
|
|
$opt_sandboxout ||= "70_sandbox.cf";
|
|
$opt_activeout ||= "72_active.cf";
|
|
|
|
# source files that need compilation, and their targets
|
|
my $needs_compile = { };
|
|
my $found_output = { };
|
|
my $current_src;
|
|
my $newest_src_mtime = 0;
|
|
my $newest_out_mtime = 0;
|
|
|
|
$default_file_header = join('', <DATA>);
|
|
compile_utf8ify_function();
|
|
|
|
foreach my $src (@opt_srcs) {
|
|
if (!-d $src) {
|
|
warn "WARNING: unreadable src '$src'\n";
|
|
next;
|
|
}
|
|
$current_src = $src;
|
|
File::Find::find ({
|
|
wanted => \&src_wanted,
|
|
no_chdir => 1
|
|
}, $src);
|
|
}
|
|
|
|
# get mtimes of output files; we can be sure that all
|
|
# output is under the "opt_out" dir, so recurse there
|
|
File::Find::find ({
|
|
wanted => \&out_wanted,
|
|
no_chdir => 1
|
|
}, $opt_out);
|
|
|
|
# we must rebuild if a compiled .pm is missing, too
|
|
my $found_all_pm_files = 1;
|
|
foreach my $f (keys %{$needs_compile}) {
|
|
next unless ($f =~ /\.pm$/i);
|
|
if (!exists $found_output->{basename $f}) {
|
|
$found_all_pm_files = 0;
|
|
}
|
|
}
|
|
|
|
# check mtime on the active.list file, too
|
|
{
|
|
my @st = stat $opt_active;
|
|
if ($st[9] && $st[9] > $newest_src_mtime) {
|
|
$newest_src_mtime = $st[9];
|
|
}
|
|
}
|
|
|
|
# check mtimes, and also require that the two required output files
|
|
# really do exist
|
|
if ($newest_src_mtime && $newest_out_mtime
|
|
&& $newest_src_mtime < $newest_out_mtime
|
|
&& -f $opt_out.'/'.$opt_sandboxout
|
|
&& -f $opt_out.'/'.$opt_activeout
|
|
&& $found_all_pm_files)
|
|
{
|
|
print "mkrules: no rules updated\n";
|
|
exit 0;
|
|
}
|
|
|
|
my $rules = { };
|
|
|
|
my $file_manifest = { };
|
|
my $file_manifest_skip = [ ];
|
|
if ($opt_manifest) {
|
|
read_manifest($opt_manifest);
|
|
}
|
|
if ($opt_manifestskip) {
|
|
read_manifest_skip($opt_manifestskip);
|
|
}
|
|
|
|
my $active_rules = { };
|
|
read_active($opt_active);
|
|
|
|
# context for the rules compiler
|
|
my $seen_rules = { };
|
|
my $renamed_rules = { };
|
|
my $output_files = { };
|
|
my $output_file_text = { };
|
|
my $files_to_lint = { };
|
|
my $entries_for_rule_name = { };
|
|
|
|
# $COMMENTS is a "catch-all" "name", for lines that appear after the last line
|
|
# that refers to a rule by name. Those lines are not published by themselves;
|
|
# they'll be published to all pubfiles found in the file.
|
|
#
|
|
# It's assumed they are comments, because they generally are, but could be all
|
|
# sorts of unparseable lines.
|
|
my $COMMENTS = '!comments!';
|
|
|
|
# another "fake name" for lines that should always be published. They'll
|
|
# be published to the non-sandbox file.
|
|
my $ALWAYS_PUBLISH = '!always_publish!';
|
|
|
|
read_all_rules($needs_compile);
|
|
read_rules_from_output_dir();
|
|
compile_output_files();
|
|
lint_output_files();
|
|
write_output_files();
|
|
|
|
# mkrules.t relies on the script exiting cleanly Bug #7302 and Bug #7692
|
|
exit if ($ENV{'TEST_ACTIVE'}) ;
|
|
|
|
die "$fail_message" if ( $fail_message =~ m/./) ;
|
|
exit;
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
sub lint_output_files {
|
|
foreach my $file (keys %{$files_to_lint}) {
|
|
my $text = join("\n", "file start $file", $output_file_text->{$file}, "file end $file");
|
|
if (lint_rule_text($text) != 0) {
|
|
warn "\nERROR: LINT FAILED, suppressing output: $file\n\n";
|
|
$fail_message = $fail_message . "ERROR: LINT FAILED, suppressing output: $file\n";
|
|
|
|
# don't suppress entirely, otherwise 'make distcheck'/'disttest'
|
|
# will fail since the MANIFEST-listed output files will be
|
|
# empty.
|
|
|
|
# delete $output_file_text->{$file};
|
|
$output_file_text->{$file} = '';
|
|
}
|
|
}
|
|
}
|
|
|
|
sub lint_rule_text {
|
|
my ($text) = @_;
|
|
|
|
# ensure we turn off slow/optional stuff for linting, but keep the essentials
|
|
my $pretext = q{
|
|
loadplugin Mail::SpamAssassin::Plugin::Check
|
|
loadplugin Mail::SpamAssassin::Plugin::URIDNSBL
|
|
util_rb_tld com # skip "need to run sa-update" warn
|
|
use_bayes 0
|
|
};
|
|
|
|
my $mailsa = Mail::SpamAssassin->new({
|
|
rules_filename => "./rules",
|
|
# debug => 1,
|
|
local_tests_only => 1,
|
|
dont_copy_prefs => 1,
|
|
config_text => $pretext.$text
|
|
});
|
|
|
|
my $errors = 0;
|
|
$mailsa->{lint_callback} = sub {
|
|
my %opts = @_;
|
|
|
|
return if ($opts{msg} =~ /
|
|
(?:score\sset\sfor\snon-existent|description\sexists)
|
|
/x);
|
|
|
|
warn "lint: $opts{msg}";
|
|
if ($opts{iserror}) {
|
|
$errors++;
|
|
}
|
|
};
|
|
|
|
$mailsa->lint_rules();
|
|
$mailsa->finish();
|
|
return $errors; # 0 means good
|
|
}
|
|
|
|
sub src_wanted {
|
|
my $path = $File::Find::name;
|
|
|
|
# record stat times of directories, too, to catch file additions/removals
|
|
# in the source tree
|
|
my @st = stat $path;
|
|
if ($st[9] && $st[9] > $newest_src_mtime) {
|
|
$newest_src_mtime = $st[9];
|
|
}
|
|
|
|
# only files from now on, though
|
|
return if (!-f $path);
|
|
|
|
# limit what will be copied from sandboxes
|
|
return if ($path =~ /\bsandbox\b/ && !/(?:\d.*\.cf|\.pm)$/i);
|
|
|
|
# don't use generated scores; they can be out of sync with what is currently
|
|
# in the sandboxes or the most current active.list file at any given time
|
|
return if ($path =~ /\bscores\b/);
|
|
|
|
# a bit of sanity please - no svn metadata ;)
|
|
return if ($path =~ /\.svn/);
|
|
|
|
my $dir = $path;
|
|
$dir =~ s/^${current_src}[\/\\\:]//s;
|
|
$dir =~ s/([^\/\\\:]+)$//;
|
|
my $filename = $1;
|
|
|
|
|
|
my $f = "$current_src/$dir$filename";
|
|
my $t;
|
|
$t = "$opt_out/$filename";
|
|
|
|
$needs_compile->{$f} = {
|
|
f => $f,
|
|
t => $t,
|
|
dir => $dir,
|
|
filename => $filename
|
|
};
|
|
}
|
|
|
|
sub out_wanted {
|
|
my $path = $File::Find::name;
|
|
return unless (-f $path);
|
|
return if ($path =~ /\.svn/);
|
|
return unless ($path =~ /\.(?:cf|pm)$/i);
|
|
|
|
my @st = stat $path;
|
|
if ($st[9] && $st[9] > $newest_out_mtime) {
|
|
$newest_out_mtime = $st[9];
|
|
}
|
|
|
|
my $dir = $path;
|
|
$dir =~ s/^${current_src}[\/\\\:]//s;
|
|
$dir =~ s/([^\/\\\:]+)$//;
|
|
my $filename = $1;
|
|
|
|
if ($path =~ /\.pm$/i) {
|
|
$found_output->{$filename} = 1;
|
|
}
|
|
}
|
|
|
|
# compile all the source files found by the src_wanted() sub, in sorted
|
|
# order so that the order of precedence makes sense.
|
|
sub read_all_rules {
|
|
my ($sources) = @_;
|
|
|
|
# deal with the perl modules first, so that later linting w/ loadplugin will
|
|
# work appropriately.
|
|
foreach my $f (sort {
|
|
my ($ae) = $a =~ /\.(cf|pm)$/;
|
|
my ($be) = $b =~ /\.(cf|pm)$/;
|
|
return $be cmp $ae || $a cmp $b;
|
|
} keys %$sources)
|
|
{
|
|
my $entry = $needs_compile->{$f};
|
|
my $t = $entry->{t};
|
|
|
|
# TODO: dependency checking optimization?
|
|
## my $needs_rebuild = 0;
|
|
## if (!-f $t || -M $t > -M $f) {
|
|
## # the source file is newer, or dest is not there
|
|
## $needs_rebuild = 1;
|
|
## }
|
|
|
|
my $needs_rebuild = 1;
|
|
|
|
if ($entry->{filename} =~ /\.pm$/) {
|
|
plugin_file_compile($entry);
|
|
}
|
|
elsif ($entry->{dir} =~ /sandbox/) {
|
|
rule_file_compile($f, $t, $entry->{filename},
|
|
{ issandbox => 1 });
|
|
}
|
|
elsif ($entry->{dir} =~ /scores/) {
|
|
rule_file_compile($f, $t, $entry->{filename},
|
|
{ issandbox => 1, isscores => 1 });
|
|
}
|
|
elsif ($entry->{dir} =~ /extra/) {
|
|
# 'extra' rulesets; not built by default (TODO)
|
|
next;
|
|
}
|
|
else {
|
|
# rules in "core" and "lang" are always copied
|
|
if ($needs_rebuild) {
|
|
rule_file_compile($f, $t, $entry->{filename}, { });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
# Rules are compiled from source dir to output dir.
|
|
#
|
|
# Rules in "rules/active.list" are promoted to "72_active.cf"; rules not
|
|
# listed there are relegated to "70_sandbox.cf". There is code to allow
|
|
# other filenames to be selected from the rulesrc .cf file, but I'm not
|
|
# sure if it works anymore ;)
|
|
#
|
|
# Rules will be autorenamed, if there's a collision between a new rule name and
|
|
# one that's already been output by the compiler in another source file. The
|
|
# autorenaming is very simple -- portions of the current source path are
|
|
# appended to the rule name, sanitised.
|
|
|
|
sub rule_file_compile {
|
|
my ($f, $t, $filename, $flags) = @_;
|
|
my $issandbox = $flags->{issandbox};
|
|
my $isscores = $flags->{isscores};
|
|
|
|
open (IN, "<$f") or die "cannot read $f";
|
|
|
|
# a fast parser for the config file format; don't need the
|
|
# full deal here, and it must be fast, since it's run on every
|
|
# "make" invocation
|
|
|
|
my $rule_order = [ ];
|
|
|
|
my $lastrule = $COMMENTS;
|
|
|
|
if (!defined $rules->{$ALWAYS_PUBLISH}) {
|
|
$rules->{$ALWAYS_PUBLISH} = rule_entry_create();
|
|
}
|
|
|
|
# zero or more "ifplugin" or "if" scopes
|
|
my @current_conditionals = ();
|
|
my $current_comments = '';
|
|
|
|
while (<IN>) {
|
|
my $orig = $_;
|
|
|
|
s/#.*$//g; s/^\s+//; s/\s+$//;
|
|
|
|
# drop comments/blank lines from output
|
|
next if (/^$/);
|
|
|
|
# save "lang" declarations
|
|
my $lang = '';
|
|
if (s/^lang\s+(\S+)\s+//) {
|
|
$lang = $1;
|
|
}
|
|
|
|
if (/^(${RULE_KEYWORDS_RE})\s+(\S+)\s+(.*)$/)
|
|
{
|
|
# rule definitions
|
|
my $type = $1;
|
|
my $name = $2;
|
|
my $val = $3;
|
|
|
|
my $origname = $name;
|
|
if ($issandbox) {
|
|
$name = sandbox_rule_name_avoid_collisions($name, $f);
|
|
}
|
|
my $origname_w_T_prefix = $name;
|
|
# non-sandbox rules always use the same name
|
|
|
|
if (scalar @current_conditionals) {
|
|
# ensure the current conditionals are used in the block name;
|
|
# this ensures that we scope alternative (#ifdef-style) dupe
|
|
# rule definitions in their own ifplugin scopes
|
|
$name .= " ".join("", @current_conditionals);
|
|
$name =~ s/\s+/ /gs; $name =~ s/ $//;
|
|
}
|
|
|
|
# track this as a rule-entry block for that rule name
|
|
# (and it's T_ prefixed variant, if relevant)
|
|
push @{$entries_for_rule_name->{$origname}}, $name;
|
|
push @{$entries_for_rule_name->{$origname_w_T_prefix}}, $name;
|
|
|
|
# comment "score" lines for sandbox rules (bug 5558)
|
|
# use generated scores, though, if the rule is active
|
|
if ($type eq 'score' && $issandbox &&
|
|
!($isscores && $active_rules->{$name}))
|
|
{
|
|
$orig =~ s/^/#/g;
|
|
}
|
|
|
|
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
|
|
|
|
$rules->{$name}->{issandbox} = $issandbox;
|
|
$rules->{$name}->{isscores} = $isscores;
|
|
$rules->{$name}->{origname} = $origname;
|
|
$rules->{$name}->{origname_w_T_prefix} = $origname_w_T_prefix;
|
|
$rules->{$name}->{cond} = [@current_conditionals];
|
|
$rules->{$name}->{text} .= $current_comments . $orig;
|
|
$rules->{$name}->{plugin_dependencies} = {};
|
|
|
|
# note if the conditional is a plugin reference, as we need to
|
|
# ensure that "loadplugin" lines stay in the same place
|
|
foreach my $c (@current_conditionals) {
|
|
if ($c =~ /^ifplugin\s+(\S+)/) {
|
|
$rules->{$name}->{plugin_dependencies}->{$1} = 1;
|
|
} elsif ($c =~ /^if.*plugin/) {
|
|
while ($c =~ /plugin\s*\(\s*(\S+)\s*\)/g) {
|
|
$rules->{$name}->{plugin_dependencies}->{$1} = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
# note if we found the rule defn or not. if we did not,
|
|
# that means the rule was a code-tied rule, which should always
|
|
# have its descriptions/scores/etc. published in "active".
|
|
if ($type =~ /^${RULE_DEFINE_KEYWORDS_RE}$/x) {
|
|
$rules->{$name}->{found_definition} = 1;
|
|
$rules->{$name}->{srcfile} = $f;
|
|
$rules->{$name}->{code} = $orig;
|
|
}
|
|
elsif ($type eq 'tflags') {
|
|
# userconf rules are always published in "active"
|
|
if ($val =~ /\buserconf\b/) {
|
|
$rules->{$name}->{forceactive} = 1;
|
|
}
|
|
|
|
# record for rulemetadata code
|
|
$val =~ s/\s+/ /gs;
|
|
if ($rules->{$name}->{tflags}) {
|
|
$rules->{$name}->{tflags} .= ' '.$val;
|
|
} else {
|
|
$rules->{$name}->{tflags} = $val;
|
|
}
|
|
}
|
|
|
|
$current_comments = '';
|
|
|
|
$lastrule = $name;
|
|
push (@$rule_order, $name);
|
|
}
|
|
elsif (/^
|
|
(pubfile|publish)
|
|
\s+(\S+)\s*(.*?)$
|
|
/x)
|
|
{
|
|
# preprocessor directives
|
|
my $command = $1;
|
|
my $name = $2;
|
|
my $val = $3;
|
|
|
|
my $origname = $name;
|
|
|
|
# note: if we call sandbox_rule_name_avoid_collisions(), it'll
|
|
# rename to 'T_RULENAME' -- which is exactly what we're trying
|
|
# to avoid in 'publish RULENAME' lines! so don't call it here.
|
|
# if ($issandbox) {
|
|
# $name = sandbox_rule_name_avoid_collisions($name, $f);
|
|
# }
|
|
|
|
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
|
|
$rules->{$name}->{origname} = $origname;
|
|
$rules->{$name}->{origname_w_T_prefix} = $origname;
|
|
|
|
if ($command eq 'publish') {
|
|
# the 'publish' command defaults to "1", unless it explicitly
|
|
# is set to "0". iow: publish RULE_NAME [(0 | 1)] [default: 1]
|
|
if (!defined $val || $val eq '') { $val = '1'; }
|
|
}
|
|
elsif ($command eq 'pubfile') {
|
|
if (!filename_in_manifest($opt_out.'/'.$val)) {
|
|
warn "$val: WARNING: not listed in manifest file, using default\n";
|
|
next; # don't set 'pubfile' below
|
|
}
|
|
}
|
|
|
|
$rules->{$name}->{$command} = $val;
|
|
|
|
# if we see "publish NAMEOFRULE", that means the rule is
|
|
# considered active
|
|
if ($rules->{$name}->{publish}) {
|
|
$rules->{$name}->{forceactive} = 1;
|
|
}
|
|
}
|
|
elsif (/^
|
|
(if|ifplugin)
|
|
\s+(.*?)$
|
|
/x)
|
|
{
|
|
push @current_conditionals, $orig;
|
|
}
|
|
elsif (/^else\b/x)
|
|
{
|
|
if (!scalar @current_conditionals) {
|
|
warn "WARNING: 'else' without 'if'/'ifplugin' conditional\n";
|
|
} else {
|
|
my $cond = invert_conditional(pop @current_conditionals);
|
|
push @current_conditionals, $cond;
|
|
}
|
|
}
|
|
elsif (/^endif\b/x)
|
|
{
|
|
if (!scalar @current_conditionals) {
|
|
warn "WARNING: 'endif' without 'if'/'ifplugin' conditional\n";
|
|
} else {
|
|
pop @current_conditionals;
|
|
}
|
|
}
|
|
elsif (/^require_version\s*(\S+)\b/) {
|
|
# silently ignored. TODO? (meh)
|
|
}
|
|
elsif (/^loadplugin\s*(\S+)\b/) {
|
|
my $name = 'loadplugin_'.$1;
|
|
|
|
unless ($rules->{$name}) {
|
|
$rules->{$name} = rule_entry_create();
|
|
$rules->{$name}->{origname} = $name;
|
|
$rules->{$name}->{origname_w_T_prefix} = $name;
|
|
$rules->{$name}->{issandbox} = $issandbox;
|
|
$rules->{$name}->{iscommand} = 1;
|
|
}
|
|
|
|
if (/^loadplugin\s*\S+\s+(\S+)/) {
|
|
my $fname = $1;
|
|
my $fpath = dirname($f)."/".$fname;
|
|
|
|
if (!-f $fpath) {
|
|
warn "$f: WARNING: plugin code file '$fpath' not found, line ignored: $orig";
|
|
next;
|
|
}
|
|
|
|
if ($fpath =~ /sandbox/i) {
|
|
# Since this is a sandbox plugin, force its output to the sandbox area.
|
|
$rules->{$name}->{sandbox_plugin} = 1;
|
|
}
|
|
|
|
# If a 'loadplugin' line is found, and the plugin .pm is not listed in
|
|
# the MANIFEST file, this will mean that the .pm will not be copied
|
|
# during "make dist". This causes failures during "make disttest",
|
|
# since the file does not exist.
|
|
#
|
|
# However, we do want to preserve these lines in the 'rules' dir, for
|
|
# use during development -- without requiring that the .pm's be put
|
|
# into MANIFEST -- ie. before the plugin is considered release-ready,
|
|
# ie. sandbox plugins.
|
|
#
|
|
# fix: make it a "tryplugin" line instead; these are ignored if the
|
|
# target file is nonexistent.
|
|
|
|
if (!filename_in_manifest($opt_out.'/'.$fname)) {
|
|
warn "$f: WARNING: '$opt_out/$fname' not listed in manifest file, making 'tryplugin': $orig";
|
|
$orig =~ s/^\s*loadplugin\b/tryplugin/;
|
|
}
|
|
}
|
|
|
|
$rules->{$name}->{text} .= $orig;
|
|
unshift (@$rule_order, $name);
|
|
}
|
|
else {
|
|
# an unhandled configuration line; "redirector_pattern",
|
|
# "report", something like that. This should be sent to
|
|
# the active.cf output (or sandbox if it appeared in a sandbox
|
|
# input file).
|
|
|
|
# use the line itself as a key
|
|
my $name = $_;
|
|
/^\s*(\S+)/ and $name = $1;
|
|
$name =~ s/\s+/ /gs;
|
|
|
|
my $forceactive = 1;
|
|
|
|
# always send 'test' lines to the sandbox files
|
|
if (/^test\s*/) {
|
|
$forceactive = 0;
|
|
|
|
$name = $_; # ensure we don't drag rules with us though!
|
|
$name =~ s/\s+/ /gs;
|
|
}
|
|
|
|
if (scalar @current_conditionals) {
|
|
$name = join("", @current_conditionals);
|
|
$name =~ s/\s+/ /gs; $name =~ s/ $//;
|
|
}
|
|
|
|
if ($issandbox) {
|
|
$name .= "_sandbox";
|
|
}
|
|
|
|
unless ($rules->{$name}) {
|
|
$rules->{$name} = rule_entry_create();
|
|
$rules->{$name}->{origname} = $name;
|
|
$rules->{$name}->{origname_w_T_prefix} = $name;
|
|
}
|
|
$rules->{$name}->{cond} = [@current_conditionals];
|
|
$rules->{$name}->{issandbox} = $issandbox;
|
|
$rules->{$name}->{forceactive} = $forceactive;
|
|
# $rules->{$name}->{forceactive} = 1;
|
|
$rules->{$name}->{iscommand} = 1;
|
|
|
|
# TODO: bug 6241: 'replace_rules' should be handled ok, but isn't
|
|
|
|
# warn "unknown line in rules file '$f', saving to default: $orig";
|
|
|
|
$rules->{$name}->{text} .= $orig;
|
|
unshift (@$rule_order, $name);
|
|
}
|
|
}
|
|
close IN;
|
|
|
|
if ($current_comments) {
|
|
$rules->{$COMMENTS}->{text} .= $current_comments;
|
|
}
|
|
|
|
# now append all the found text to the output file buffers
|
|
copy_to_output_buffers($rule_order, $issandbox, $f, $filename);
|
|
|
|
# ok; file complete. now mark all those rules as "seen"; future
|
|
# refs to those rule names will trigger an autorename.
|
|
foreach my $name (@$rule_order) {
|
|
$seen_rules->{$name} = 1;
|
|
}
|
|
}
|
|
|
|
# this is only run if we're generating rulemetadata!
|
|
sub read_rules_from_output_dir {
|
|
return unless ($opt_rulemetadata);
|
|
|
|
foreach my $file (<$opt_out/*.cf>) {
|
|
next unless ($file =~ /\d\d_\S+\.cf$/);
|
|
next if (pubfile_is_activeout($file));
|
|
next if (pubfile_is_sandboxout($file));
|
|
read_output_file($file);
|
|
}
|
|
}
|
|
|
|
sub read_output_file {
|
|
my ($file) = @_;
|
|
open (IN, "<$file") or warn "cannot read $file";
|
|
while (<IN>) {
|
|
my $orig = $_;
|
|
|
|
s/#.*$//g; s/^\s+//; s/\s+$//;
|
|
|
|
# drop comments/blank lines from output
|
|
next if (/^$/);
|
|
|
|
# save "lang" declarations
|
|
my $lang = '';
|
|
if (s/^lang\s+(\S+)\s+//) {
|
|
$lang = $1;
|
|
}
|
|
|
|
if (/^(${RULE_KEYWORDS_RE})\s+(\S+)\s+(.*)$/) {
|
|
# rule definitions
|
|
my $type = $1;
|
|
my $name = $2;
|
|
my $val = $3;
|
|
|
|
# note: we only want to do this if --rulemetadata is in use!
|
|
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
|
|
|
|
if ($type eq 'tflags') {
|
|
$val =~ s/\s+/ /gs;
|
|
if ($rules->{$name}->{tflags}) {
|
|
$rules->{$name}->{tflags} .= ' '.$val;
|
|
} else {
|
|
$rules->{$name}->{tflags} = $val;
|
|
}
|
|
}
|
|
|
|
if ($type =~ /^${RULE_DEFINE_KEYWORDS_RE}$/x) {
|
|
$rules->{$name}->{srcfile} = $file;
|
|
$rules->{$name}->{code} = $orig;
|
|
}
|
|
}
|
|
}
|
|
close IN;
|
|
}
|
|
|
|
sub copy_to_output_buffers {
|
|
my ($rule_order, $issandbox, $f, $filename) = @_;
|
|
|
|
# always output these two files, even if they're empty!
|
|
foreach my $pubfile ($opt_out.'/'.$opt_sandboxout,
|
|
$opt_out.'/'.$opt_activeout)
|
|
{
|
|
$output_files->{$pubfile} = {
|
|
header => $default_file_header
|
|
};
|
|
}
|
|
|
|
my %already_done = ();
|
|
my $copied_active = 0;
|
|
my $copied_other = 0;
|
|
foreach my $name (@$rule_order)
|
|
{
|
|
# only do each rule once, please ;)
|
|
next if exists $already_done{$name};
|
|
$already_done{$name} = undef;
|
|
|
|
my $text = $rules->{$name}->{text};
|
|
if (!$text) {
|
|
next; # nothing to write!
|
|
}
|
|
|
|
my $srcfile = $rules->{$name}->{srcfile};
|
|
my $pubfile = pubfile_for_rule($rules, $rules->{$name}->{origname_w_T_prefix});
|
|
my $is_active = 0;
|
|
if (pubfile_is_activeout($pubfile)) {
|
|
$is_active++;
|
|
}
|
|
|
|
my $cond = $rules->{$name}->{cond};
|
|
if ($cond) {
|
|
foreach my $pluginclass (keys %{$rules->{$name}->{plugin_dependencies}}) {
|
|
my $ifplugin_text_name = "loadplugin_".($pluginclass || "");
|
|
|
|
if ($rules->{$ifplugin_text_name}) {
|
|
# if the plugin is a sandbox plugin, ensure it's not
|
|
# sent to the active file
|
|
if ($rules->{$ifplugin_text_name}->{sandbox_plugin}) {
|
|
$pubfile = $opt_out.'/'.$opt_sandboxout;
|
|
$is_active = 0;
|
|
}
|
|
|
|
# either way, ensure the "loadplugin" line, if there is one,
|
|
# goes to the same file
|
|
$rules->{$ifplugin_text_name}->{output_file} = $pubfile;
|
|
}
|
|
}
|
|
|
|
# ensure we produce enough "endif"s to match however many
|
|
# nested conditions there are
|
|
my $endifs = "endif\n" x (scalar @{$cond});
|
|
|
|
$rules->{$name}->{output_text} = "\n"
|
|
.join("", @{$cond})
|
|
.$text
|
|
.$endifs;
|
|
|
|
} else {
|
|
$rules->{$name}->{output_text} = $text;
|
|
}
|
|
|
|
# note the target file
|
|
$rules->{$name}->{output_file} = $pubfile;
|
|
|
|
$output_files->{$pubfile} = {
|
|
header => $default_file_header
|
|
};
|
|
|
|
if ($is_active) {
|
|
$copied_active++;
|
|
} else {
|
|
$copied_other++;
|
|
}
|
|
}
|
|
|
|
print "$f: $copied_active active rules, ".
|
|
"$copied_other other\n";
|
|
}
|
|
|
|
sub pubfile_for_rule {
|
|
my ($rules, $name) = @_;
|
|
|
|
my $pubfile;
|
|
if ($rules->{$name}->{publish}) {
|
|
# "publish NAMEOFRULE" => send it to active
|
|
$pubfile = $opt_out.'/'.$opt_activeout;
|
|
}
|
|
|
|
# default: "70_sandbox.cf" or "72_active.cf"
|
|
if (!$pubfile) {
|
|
if ($active_rules->{$name} # is active
|
|
|| $rules->{$name}->{forceactive} # or is forced to be
|
|
|| (!$rules->{$name}->{found_definition} && !$rules->{$name}->{iscommand}
|
|
&& !$rules->{$name}->{isscores}))
|
|
# or is a rule-related setting in reference to an unknown rule
|
|
# but isn't a generated score
|
|
{
|
|
$pubfile = $opt_out.'/'.$opt_activeout;
|
|
}
|
|
elsif ($rules->{$name}->{issandbox}) {
|
|
$pubfile = $opt_out.'/'.$opt_sandboxout;
|
|
}
|
|
else {
|
|
warn "oops? inactive rule, non-sandbox, shouldn't be possible anymore";
|
|
$pubfile = $opt_out.'/'.$opt_sandboxout;
|
|
}
|
|
}
|
|
return $pubfile;
|
|
}
|
|
|
|
sub plugin_file_compile {
|
|
my ($entry) = @_;
|
|
|
|
return if $opt_listpromotable;
|
|
# just copy the raw perl module over to the new area
|
|
# we can't really rename to avoid conflicts since the loadplugin lines
|
|
# are going to be all screwed up in that case.
|
|
# jm: we always want to update the output file in case the input
|
|
# has been changed!
|
|
if (0 && -e $entry->{t}) {
|
|
warn "The perl module ".$entry->{t}." already exists, can't copy from ".$entry->{f}."\n";
|
|
}
|
|
else {
|
|
copy($entry->{f}, $entry->{t}) || warn "Couldn't copy ".$entry->{f}.": $!";
|
|
}
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
sub compile_output_files {
|
|
my $always = $rules->{$ALWAYS_PUBLISH}->{output_text};
|
|
|
|
# create all known output files
|
|
foreach my $file (keys %$output_files) {
|
|
$output_file_text->{$file} = $output_files->{$file}->{header};
|
|
|
|
if ($always && pubfile_is_activeout($file)) {
|
|
$output_file_text->{$file} .= $always;
|
|
}
|
|
}
|
|
|
|
# this is a horrible kluge.
|
|
# at this point in the game, we've lost the ordered list of rules, so the
|
|
# loadplugin lines have no guarantee that they'll be loaded before the rules
|
|
# that require them. so we kluge the sort to always have loadplugin lines
|
|
# appear at the very top of the array so we know they'll be listed before
|
|
# anything else.
|
|
my @rulenames = sort {
|
|
if ($a =~ /^loadplugin_/) {
|
|
return -1;
|
|
}
|
|
elsif ($b =~ /^loadplugin_/) {
|
|
return 1;
|
|
}
|
|
return $a cmp $b;
|
|
} keys %$rules;
|
|
my %seen = ();
|
|
|
|
# go through the rules looking for meta subrules we
|
|
# may have forgotten; this happens if a non-subrule is
|
|
# listed in active.list, the subrules will not be! fix them
|
|
# to appear in the same output file as the master rule.
|
|
foreach my $rule (@rulenames) {
|
|
fix_up_rule_dependencies($rule);
|
|
}
|
|
|
|
# now repeat, just for rules in the active set; their dependencies should
|
|
# always be likewise promoted into the active set, overriding the prev step.
|
|
foreach my $rule (@rulenames) {
|
|
my $pubfile = $rules->{$rule}->{output_file};
|
|
next unless ($pubfile && pubfile_is_activeout($pubfile));
|
|
fix_up_rule_dependencies($rule);
|
|
}
|
|
|
|
my $rulemd = '';
|
|
|
|
# output the known rules that are not meta subrules.
|
|
foreach my $rule (@rulenames) {
|
|
$rulemd .= get_rulemetadata_string($rule); # all metadata strings
|
|
|
|
next if ($rule =~ /^__/);
|
|
my $pubfile = $rules->{$rule}->{output_file};
|
|
my $text = $rules->{$rule}->{output_text};
|
|
next unless defined ($text);
|
|
|
|
# DOS - bug 6297 - HACK HACK HACK HACK
|
|
# this will probably screw up meta rules that do something like '&& !$rule'
|
|
|
|
# avoid publishing 'tflags nopublish' rules
|
|
if (pubfile_is_activeout($pubfile) && exists $rules->{$rule}->{tflags} &&
|
|
$rules->{$rule}->{tflags} =~ /\bnopublish\b/)
|
|
{
|
|
print "omitting rule $rule due to tflags nopublish (tflags $rules->{$rule}->{tflags})\n";
|
|
next;
|
|
}
|
|
|
|
# DOS - END HACK
|
|
|
|
$output_file_text->{$pubfile} .= "##{ $rule\n".
|
|
$text.
|
|
"##} ".$rule."\n\n";
|
|
}
|
|
|
|
# now output all subrules (in a slightly more compact form)
|
|
foreach my $rule (@rulenames) {
|
|
next unless ($rule =~ /^__/);
|
|
my $pubfile = $rules->{$rule}->{output_file};
|
|
my $text = $rules->{$rule}->{output_text};
|
|
next unless defined ($text);
|
|
|
|
# DOS - bug 6297 - HACK HACK HACK HACK
|
|
# this will probably screw up meta rules that do something like '&& !$rule'
|
|
|
|
# avoid publishing 'tflags nopublish' rules
|
|
if (pubfile_is_activeout($pubfile) && exists $rules->{$rule}->{tflags} &&
|
|
$rules->{$rule}->{tflags} =~ /\bnopublish\b/)
|
|
{
|
|
print "omitting rule $rule due to tflags nopublish (tflags $rules->{$rule}->{tflags})\n";
|
|
next;
|
|
}
|
|
|
|
# DOS - END HACK
|
|
|
|
$output_file_text->{$pubfile} .= $text;
|
|
}
|
|
|
|
# finally, finish off all output files
|
|
foreach my $file (keys %$output_files) {
|
|
# and get them lint-checked!
|
|
$files_to_lint->{$file} = 1;
|
|
}
|
|
|
|
if ($opt_rulemetadata) {
|
|
open (RULEMD, ">".$opt_rulemetadata)
|
|
or die "cannot write rulemd to $opt_rulemetadata";
|
|
print RULEMD "<?xml version='1.0' encoding='UTF-8'?>\n",
|
|
"<rulemds>", $rulemd, "</rulemds>\n";
|
|
close RULEMD or die "cannot close rulemd to $opt_rulemetadata";
|
|
}
|
|
}
|
|
|
|
# conditionally build a method to UTF-8-encode a string. this is only required
|
|
# for the rulemetadata XML output, so don't make it mandatory!
|
|
sub compile_utf8ify_function {
|
|
if (!eval '
|
|
sub utf8ify { use Encode; return Encode::encode("UTF-8", $_[0]); } 1;
|
|
')
|
|
{
|
|
eval '
|
|
sub utf8ify { die "unimplemented -- Encode module required!" } 1;
|
|
'
|
|
}
|
|
}
|
|
|
|
sub get_rulemetadata_string {
|
|
my ($rule) = @_;
|
|
|
|
return '' unless ($opt_rulemetadata);
|
|
|
|
my $mod = 0;
|
|
my $srcfile = '';
|
|
my $code = '';
|
|
my $name = $rule;
|
|
|
|
# if we found a rule definition with a T_ prefix, use that data
|
|
if (!$rules->{$name}->{srcfile} && $rules->{"T_".$name}->{srcfile}) {
|
|
$name = "T_".$name;
|
|
}
|
|
|
|
if ($rules->{$name}->{srcfile}) {
|
|
$srcfile = $rules->{$name}->{srcfile};
|
|
if ($srcfile) {
|
|
my @s = stat $srcfile;
|
|
if (@s) { $mod = $s[9]; }
|
|
}
|
|
}
|
|
|
|
if ($rules->{$name}->{code}) {
|
|
$code = $rules->{$name}->{code};
|
|
$code =~ s/\]\]>/\](defanged by mkrules)\]>/gs; # ensure it's CDATA-safe
|
|
$code = utf8ify($code);
|
|
}
|
|
|
|
my $tf = $rules->{$name}->{tflags} || '';
|
|
|
|
return "<rulemetadata>".
|
|
"<name>$rule</name>".
|
|
"<src>$srcfile</src>".
|
|
"<srcmtime>$mod</srcmtime>".
|
|
# don't include <code> blocks; they bloat up the XML badly (to 800KB)
|
|
# and make it very slow to parse later
|
|
# "<code><![CDATA[$code]]></code>".
|
|
"<tf>$tf</tf>".
|
|
"</rulemetadata>\n";
|
|
}
|
|
|
|
sub fix_up_rule_dependencies {
|
|
my $rule = shift;
|
|
|
|
my $pubfile = $rules->{$rule}->{output_file};
|
|
my $text = $rules->{$rule}->{output_text};
|
|
return unless $text;
|
|
|
|
while ($text =~ /^\s*meta\s+(.*)$/mg) {
|
|
my $line = $1;
|
|
while ($line =~ /\b([_A-Za-z0-9]+)\b/g) {
|
|
# force that subrule (if it exists) to output in the
|
|
# same pubfile
|
|
my $rule2 = $1;
|
|
|
|
# deal with rules that changed name from "FOO" to "T_FOO"
|
|
sed_renamed_rule_names(\$rule2);
|
|
|
|
if (!$entries_for_rule_name->{$rule2}) {
|
|
# we may not always have a rule entry, if the rule was from a non-sandbox
|
|
# source
|
|
# warn "cannot find entries_for_rule_name '$rule2'";
|
|
}
|
|
|
|
foreach my $entryname2 (@{$entries_for_rule_name->{$rule2}}) {
|
|
next unless ($rules->{$entryname2} && $rules->{$entryname2}->{output_file});
|
|
|
|
# don't do this if the subrule would be moved *out* of the
|
|
# active file!
|
|
my $pubfile2 = $rules->{$entryname2}->{output_file};
|
|
next if (pubfile_is_activeout($pubfile2));
|
|
|
|
$rules->{$entryname2}->{output_file} = $pubfile;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
sub pubfile_is_activeout {
|
|
return 1 if ($_[0] && $_[0] =~ /\b\Q$opt_activeout\E$/);
|
|
return 0;
|
|
}
|
|
|
|
sub pubfile_is_sandboxout {
|
|
return 1 if ($_[0] && $_[0] =~ /\b\Q$opt_sandboxout\E$/);
|
|
return 0;
|
|
}
|
|
|
|
sub write_output_files {
|
|
foreach my $pubfile (sort keys %$output_files) {
|
|
if (-f $pubfile) {
|
|
unlink $pubfile or die "cannot remove output file '$pubfile'";
|
|
}
|
|
|
|
if (!filename_in_manifest($pubfile)) {
|
|
warn "$pubfile: WARNING: not listed in manifest file\n";
|
|
}
|
|
|
|
my $text = $output_file_text->{$pubfile};
|
|
if ($text) {
|
|
open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'";
|
|
sed_renamed_rule_names(\$text);
|
|
print OUT $text;
|
|
close OUT or die "cannot close output file '$pubfile'";
|
|
# print "$pubfile: written\n"; # too noisy
|
|
}
|
|
else {
|
|
print "$pubfile: no rules promoted\n";
|
|
|
|
# create an empty file anyway to satisfy MANIFEST
|
|
open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'";
|
|
close OUT or die "cannot close output file '$pubfile'";
|
|
}
|
|
}
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
sub rule_entry_create {
|
|
return {
|
|
text => '',
|
|
publish => 0
|
|
};
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
sub sandbox_rule_name_avoid_collisions {
|
|
my ($rule, $path) = @_;
|
|
my $new;
|
|
my $newreason;
|
|
my $dowarn = 0;
|
|
|
|
return $rule if $opt_listpromotable;
|
|
return $rule if $active_rules->{$rule};
|
|
return $rule if $rules->{$rule}->{forceactive};
|
|
|
|
if ($rule !~ /^(?:T_|__)/) {
|
|
$new = "T_".$rule;
|
|
$newreason = "missing T_ prefix";
|
|
}
|
|
elsif (!exists $seen_rules->{$rule}) {
|
|
return $rule;
|
|
}
|
|
else {
|
|
$new = $path;
|
|
$new =~ s/[^A-Za-z0-9]+/_/gs;
|
|
$new =~ s/_+/_/gs;
|
|
$new =~ s/^_//;
|
|
$new =~ s/_$//;
|
|
$new = $rule.'_'.$new;
|
|
$newreason = "collision with existing rule";
|
|
$dowarn = 1;
|
|
}
|
|
|
|
if (!$renamed_rules->{$new}) {
|
|
$renamed_rules->{$new} = $rule;
|
|
if ($dowarn) {
|
|
warn "WARNING: $rule: renamed as $new due to $newreason\n";
|
|
}
|
|
}
|
|
|
|
return $new;
|
|
}
|
|
|
|
sub sed_renamed_rule_names {
|
|
my ($textref) = @_;
|
|
foreach my $new (keys %{$renamed_rules}) {
|
|
my $rule = $renamed_rules->{$new};
|
|
$$textref =~ s/\b${rule}\b/${new}/gs;
|
|
}
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
sub invert_conditional {
|
|
my $cond = shift;
|
|
if ($cond =~ /^ \s* ifplugin \s+(.*?)$ /x) {
|
|
return "if !plugin($1)\n";
|
|
} elsif ($cond =~ /^ \s* if \s+(.*?)$ /x) {
|
|
return "if !($1)\n";
|
|
} else {
|
|
warn "WARNING: cannot parse '$cond' for 'else'\n";
|
|
return 'if 0';
|
|
}
|
|
}
|
|
|
|
###########################################################################
|
|
|
|
sub read_manifest {
|
|
my ($fname) = @_;
|
|
parse_line_delimited_config_file($fname, sub {
|
|
/^\s*(.*?)\s*$/ and $file_manifest->{$1} = 1;
|
|
});
|
|
}
|
|
|
|
sub read_manifest_skip {
|
|
my ($fname) = @_;
|
|
parse_line_delimited_config_file($fname, sub {
|
|
/^\s*(.*?)\s*$/ and push (@{$file_manifest_skip}, qr/$1/);
|
|
});
|
|
}
|
|
|
|
sub read_active {
|
|
my ($fname) = @_;
|
|
parse_line_delimited_config_file($fname, sub {
|
|
/^(\S+)/ and $active_rules->{$1} = 1;
|
|
});
|
|
}
|
|
|
|
sub filename_in_manifest {
|
|
my ($fname) = @_;
|
|
return 1 if ($file_manifest->{$fname});
|
|
foreach my $skipre (@{$file_manifest_skip}) {
|
|
return 1 if ($fname =~ $skipre);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub parse_line_delimited_config_file {
|
|
my ($fname, $callback) = @_;
|
|
if (!open (IN, "<$fname")) {
|
|
warn "cannot read $fname\n";
|
|
} else {
|
|
while (<IN>) {
|
|
next if /^#/;
|
|
$callback->();
|
|
}
|
|
close IN;
|
|
}
|
|
}
|
|
|
|
|
|
__DATA__
|
|
# SpamAssassin rules file
|
|
#
|
|
# Please don't modify this file as your changes will be overwritten with
|
|
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
|
|
# See 'perldoc Mail::SpamAssassin::Conf' for details.
|
|
#
|
|
# <@LICENSE>
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to you under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at:
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# </@LICENSE>
|
|
#
|
|
###########################################################################
|
|
|
|
require_version @@VERSION@@
|
|
|