mirror of
https://git.proxmox.com/git/proxmox-spamassassin
synced 2025-04-28 17:53:08 +00:00
2370 lines
58 KiB
Perl
2370 lines
58 KiB
Perl
# <@LICENSE>
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to you under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at:
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# </@LICENSE>
|
|
|
|
=head1 NAME
|
|
|
|
Mail::SpamAssassin::BayesStore::SQL - SQL Bayesian Storage Module Implementation
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This module implements a SQL based bayesian storage module. It's compatible
|
|
with SQLite and possibly other standard SQL servers.
|
|
|
|
Do not use this for MySQL/MariaDB or PgSQL, they have their own specific
|
|
modules.
|
|
|
|
=cut
|
|
|
|
package Mail::SpamAssassin::BayesStore::SQL;
|
|
|
|
use strict;
|
|
use warnings;
|
|
# use bytes;
|
|
use re 'taint';
|
|
use Errno qw(EBADF);
|
|
use Digest::SHA qw(sha1);
|
|
|
|
use Mail::SpamAssassin::BayesStore;
|
|
use Mail::SpamAssassin::Logger;
|
|
|
|
our @ISA = qw( Mail::SpamAssassin::BayesStore );
|
|
|
|
use constant HAS_DBI => eval { require DBI; };
|
|
|
|
=head1 METHODS
|
|
|
|
=head2 new
|
|
|
|
public class (Mail::SpamAssassin::BayesStore::SQL) new (Mail::Spamassassin::Plugin::Bayes $bayes)
|
|
|
|
Description:
|
|
This methods creates a new instance of the Mail::SpamAssassin::BayesStore::SQL
|
|
object. It expects to be passed an instance of the Mail::SpamAssassin:Bayes
|
|
object which is passed into the Mail::SpamAssassin::BayesStore parent object.
|
|
|
|
This method sets up the database connection and determines the username to
|
|
use in queries.
|
|
|
|
=cut
|
|
|
|
sub new {
|
|
my $class = shift;
|
|
$class = ref($class) || $class;
|
|
|
|
my $self = $class->SUPER::new(@_);
|
|
|
|
$self->{supported_db_version} = 3;
|
|
$self->{db_writable_p} = 0;
|
|
|
|
if (!$self->{bayes}->{conf}->{bayes_sql_dsn}) {
|
|
dbg("bayes: invalid config, must set bayes_sql_dsn config variable\n");
|
|
return;
|
|
}
|
|
|
|
$self->{_dsn} = $self->{bayes}->{conf}->{bayes_sql_dsn};
|
|
$self->{_dbuser} = $self->{bayes}->{conf}->{bayes_sql_username};
|
|
$self->{_dbpass} = $self->{bayes}->{conf}->{bayes_sql_password};
|
|
|
|
$self->{_dbh} = undef;
|
|
|
|
unless (HAS_DBI) {
|
|
dbg("bayes: unable to connect to database: DBI module not available: $!");
|
|
}
|
|
|
|
if ($self->{bayes}->{conf}->{bayes_sql_override_username}) {
|
|
$self->{_username} = $self->{bayes}->{conf}->{bayes_sql_override_username};
|
|
}
|
|
else {
|
|
$self->{_username} = $self->{bayes}->{main}->{username};
|
|
|
|
# Need to make sure that a username is set, so just in case there is
|
|
# no username set in main, set one here.
|
|
if (!defined $self->{_username} || $self->{_username} eq '') {
|
|
$self->{_username} = "GLOBALBAYES";
|
|
}
|
|
}
|
|
dbg("bayes: using username: ".$self->{_username});
|
|
|
|
return $self;
|
|
}
|
|
|
|
=head2 tie_db_readonly
|
|
|
|
public instance (Boolean) tie_db_readonly ();
|
|
|
|
Description:
|
|
This method ensures that the database connection is properly setup
|
|
and working. If necessary it will initialize a user's bayes variables
|
|
so that they can begin using the database immediately.
|
|
|
|
=cut
|
|
|
|
sub tie_db_readonly {
|
|
my ($self) = @_;
|
|
|
|
return 0 unless (HAS_DBI);
|
|
|
|
if ($self->{_dbh}) {
|
|
# already connected, but connection has now become readonly
|
|
$self->{db_writable_p} = 0;
|
|
return 1;
|
|
}
|
|
|
|
my $main = $self->{bayes}->{main};
|
|
my $timer_tie_ro = $main->time_method('b_tie_ro');
|
|
|
|
$self->read_db_configs();
|
|
|
|
return 0 unless ($self->_connect_db());
|
|
|
|
my $db_ver = $self->_get_db_version();
|
|
$self->{db_version} = $db_ver;
|
|
dbg("bayes: found bayes db version ".$self->{db_version});
|
|
|
|
if ( $db_ver != $self->DB_VERSION ) {
|
|
warn("bayes: database version $db_ver is different than we understand (".$self->DB_VERSION."), aborting!");
|
|
$self->untie_db();
|
|
return 0;
|
|
}
|
|
|
|
unless ($self->_initialize_db(0)) {
|
|
dbg("bayes: unable to initialize database for ".$self->{_username}." user, aborting!");
|
|
$self->untie_db();
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 tie_db_writable
|
|
|
|
public instance (Boolean) tie_db_writable ()
|
|
|
|
Description:
|
|
This method ensures that the database connection is properly setup
|
|
and working. If necessary it will initialize a users bayes variables
|
|
so that they can begin using the database immediately.
|
|
|
|
=cut
|
|
|
|
sub tie_db_writable {
|
|
my ($self) = @_;
|
|
|
|
return 0 unless (HAS_DBI);
|
|
|
|
if ($self->{_dbh}) {
|
|
# already connected, but now it will be writable
|
|
$self->{db_writable_p} = 1;
|
|
return 1;
|
|
}
|
|
|
|
my $main = $self->{bayes}->{main};
|
|
my $timer_tie_rw = $main->time_method('b_tie_rw');
|
|
|
|
$self->read_db_configs();
|
|
|
|
return 0 unless ($self->_connect_db());
|
|
|
|
my $db_ver = $self->_get_db_version();
|
|
$self->{db_version} = $db_ver;
|
|
dbg("bayes: found bayes db version ".$self->{db_version});
|
|
|
|
if ( $db_ver != $self->DB_VERSION ) {
|
|
warn("bayes: database version $db_ver is different than we understand (".$self->DB_VERSION."), aborting!");
|
|
$self->untie_db();
|
|
return 0;
|
|
}
|
|
|
|
unless ($self->_initialize_db(1)) {
|
|
dbg("bayes: unable to initialize database for ".$self->{_username}." user, aborting!");
|
|
|
|
$self->untie_db();
|
|
return 0;
|
|
}
|
|
|
|
$self->{db_writable_p} = 1;
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
=head2 untie_db
|
|
|
|
public instance () untie_db ()
|
|
|
|
Description:
|
|
Disconnects from an SQL server.
|
|
|
|
=cut
|
|
|
|
sub untie_db {
|
|
my ($self) = @_;
|
|
|
|
return unless (defined($self->{_dbh}));
|
|
|
|
$self->{db_writable_p} = 0;
|
|
|
|
$self->{_dbh}->disconnect();
|
|
$self->{_dbh} = undef;
|
|
}
|
|
|
|
=head2 calculate_expire_delta
|
|
|
|
public instance (%) calculate_expire_delta (Integer $newest_atime,
|
|
Integer $start,
|
|
Integer $max_expire_mult)
|
|
|
|
Description:
|
|
This method performs a calculation on the data to determine the optimum
|
|
atime for token expiration.
|
|
|
|
=cut
|
|
|
|
sub calculate_expire_delta {
|
|
my ($self, $newest_atime, $start, $max_expire_mult) = @_;
|
|
|
|
my %delta; # use a hash since an array is going to be very sparse
|
|
|
|
return %delta unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "SELECT count(*)
|
|
FROM bayes_token
|
|
WHERE id = ?
|
|
AND atime < ?";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: calculate_expire_delta: SQL Error: ".$self->{_dbh}->errstr());
|
|
return %delta;
|
|
}
|
|
|
|
for (my $i = 1; $i <= $max_expire_mult; $i<<=1) {
|
|
my $rc = $sth->execute($self->{_userid}, $newest_atime - $start * $i);
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: calculate_expire_delta: SQL error: ".$self->{_dbh}->errstr());
|
|
return;
|
|
}
|
|
|
|
my ($count) = $sth->fetchrow_array();
|
|
|
|
$delta{$i} = $count;
|
|
}
|
|
$sth->finish();
|
|
|
|
return %delta;
|
|
}
|
|
|
|
=head2 token_expiration
|
|
|
|
public instance (Integer, Integer,
|
|
Integer, Integer) token_expiration(\% $opts,
|
|
Integer $newdelta,
|
|
@ @vars)
|
|
|
|
Description:
|
|
This method performs the database specific expiration of tokens based on
|
|
the passed in C<$newdelta> and C<@vars>.
|
|
|
|
=cut
|
|
|
|
sub token_expiration {
|
|
my ($self, $opts, $newdelta, @vars) = @_;
|
|
|
|
my $num_hapaxes;
|
|
my $num_lowfreq;
|
|
my $deleted;
|
|
|
|
# Figure out how old is too old...
|
|
my $too_old = $vars[10] - $newdelta; # tooold = newest - delta
|
|
|
|
# if token atime > newest, reset to newest ...
|
|
my $sql = "UPDATE bayes_token SET atime = ?
|
|
WHERE id = ?
|
|
AND atime > ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, $vars[10], $self->{_userid}, $vars[10]);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
|
|
$deleted = 0;
|
|
goto token_expiration_final;
|
|
}
|
|
|
|
# Check to make sure the expire won't remove too many tokens
|
|
$sql = "SELECT count(token) FROM bayes_token
|
|
WHERE id = ?
|
|
AND atime < ?";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
|
|
$deleted = 0;
|
|
goto token_expiration_final;
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid}, $too_old);
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
|
|
$deleted = 0;
|
|
goto token_expiration_final;
|
|
}
|
|
|
|
my ($count) = $sth->fetchrow_array();
|
|
|
|
$sth->finish();
|
|
|
|
# Sanity check: if we expired too many tokens, abort!
|
|
if ($vars[3] - $count < 100000) {
|
|
dbg("bayes: token expiration would expire too many tokens, aborting");
|
|
# set these appropriately so the next expire pass does the first pass
|
|
$deleted = 0;
|
|
$newdelta = 0;
|
|
}
|
|
else {
|
|
# Do the expire
|
|
$sql = "DELETE from bayes_token
|
|
WHERE id = ?
|
|
AND atime < ?";
|
|
|
|
$rows = $self->{_dbh}->do($sql, undef, $self->{_userid}, $too_old);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
|
|
$deleted = 0;
|
|
goto token_expiration_final;
|
|
}
|
|
|
|
$deleted = ($rows eq '0E0') ? 0 : $rows;
|
|
}
|
|
|
|
# Update the magic tokens as appropriate
|
|
$sql = "UPDATE bayes_vars SET token_count = token_count - ?,
|
|
last_expire = ?,
|
|
last_atime_delta = ?,
|
|
last_expire_reduce = ?
|
|
WHERE id = ?";
|
|
|
|
$rows = $self->{_dbh}->do($sql, undef, $deleted, time(), $newdelta, $deleted, $self->{_userid});
|
|
|
|
unless (defined($rows)) {
|
|
# Very bad, we actually deleted the tokens, but were unable to update
|
|
# bayes_vars with the new data.
|
|
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
|
|
dbg("bayes: bayes database now in inconsistent state, suggest a backup/restore");
|
|
goto token_expiration_final;
|
|
}
|
|
|
|
# If we didn't remove any tokens, the oldest token age wouldn't have changed
|
|
if ($deleted) {
|
|
# Now let's update the oldest_token_age value, shouldn't need to worry
|
|
# about newest_token_age. There is a slight race condition here, but the
|
|
# chance is small that we'll insert a new token with such an old atime
|
|
my $oldest_token_age = $self->_get_oldest_token_age();
|
|
|
|
$sql = "UPDATE bayes_vars SET oldest_token_age = ? WHERE id = ?";
|
|
|
|
$rows = $self->{_dbh}->do($sql, undef, $oldest_token_age, $self->{_userid});
|
|
|
|
unless (defined($rows)) {
|
|
# not much more we can do here, so just warn the user and bail out
|
|
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
|
|
# yeah I know it's the next thing anyway, but here in case someone adds
|
|
# additional code below this block
|
|
goto token_expiration_final;
|
|
}
|
|
}
|
|
|
|
token_expiration_final:
|
|
my $kept = $vars[3] - $deleted;
|
|
|
|
$num_hapaxes = $self->_get_num_hapaxes() if ($opts->{verbose});
|
|
$num_lowfreq = $self->_get_num_lowfreq() if ($opts->{verbose});
|
|
|
|
# Call untie_db() first so we unlock correctly etc. first
|
|
$self->untie_db();
|
|
|
|
return ($kept, $deleted, $num_hapaxes, $num_lowfreq);
|
|
}
|
|
|
|
=head2 sync_due
|
|
|
|
public instance (Boolean) sync_due ()
|
|
|
|
Description:
|
|
This method determines if a database sync is currently required.
|
|
|
|
Unused for SQL based implementation.
|
|
|
|
=cut
|
|
|
|
sub sync_due {
|
|
my ($self) = @_;
|
|
|
|
return 0;
|
|
}
|
|
|
|
=head2 seen_get
|
|
|
|
public instance (String) seen_get (string $msgid)
|
|
|
|
Description:
|
|
This method retrieves the stored value, if any, for C<$msgid>. The return value
|
|
is the stored string ('s' for spam and 'h' for ham) or undef if C<$msgid> is not
|
|
found.
|
|
|
|
=cut
|
|
|
|
sub seen_get {
|
|
my ($self, $msgid) = @_;
|
|
|
|
return unless defined($self->{_dbh});
|
|
|
|
my $sql = "SELECT flag FROM bayes_seen
|
|
WHERE id = ?
|
|
AND msgid = ?";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: seen_get: SQL Error: ".$self->{_dbh}->errstr());
|
|
return;
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid}, $msgid);
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: seen_get: SQL error: ".$self->{_dbh}->errstr());
|
|
return;
|
|
}
|
|
|
|
my ($flag) = $sth->fetchrow_array();
|
|
|
|
$sth->finish();
|
|
|
|
return $flag;
|
|
}
|
|
|
|
=head2 seen_put
|
|
|
|
public (Boolean) seen_put (string $msgid, char $flag)
|
|
|
|
Description:
|
|
This method records C<$msgid> as the type given by C<$flag>. C<$flag> is one of
|
|
two values 's' for spam and 'h' for ham.
|
|
|
|
=cut
|
|
|
|
sub seen_put {
|
|
my ($self, $msgid, $flag) = @_;
|
|
|
|
return 0 if (!$msgid);
|
|
return 0 if (!$flag);
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "INSERT INTO bayes_seen (id, msgid, flag)
|
|
VALUES (?,?,?)";
|
|
|
|
my $rows = $self->{_dbh}->do($sql,
|
|
undef,
|
|
$self->{_userid}, $msgid, $flag);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: seen_put: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
dbg("bayes: seen ($msgid) put");
|
|
return 1;
|
|
}
|
|
|
|
=head2 seen_delete
|
|
|
|
public instance (Boolean) seen_delete (string $msgid)
|
|
|
|
Description:
|
|
This method removes C<$msgid> from the database.
|
|
|
|
=cut
|
|
|
|
sub seen_delete {
|
|
my ($self, $msgid) = @_;
|
|
|
|
return 0 if (!$msgid);
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "DELETE FROM bayes_seen
|
|
WHERE id = ?
|
|
AND msgid = ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql,
|
|
undef,
|
|
$self->{_userid}, $msgid);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: seen_delete: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 get_storage_variables
|
|
|
|
public instance (@) get_storage_variables ()
|
|
|
|
Description:
|
|
This method retrieves the various administrative variables used by
|
|
the Bayes process and database.
|
|
|
|
The values returned in the array are in the following order:
|
|
|
|
0: scan count base
|
|
|
|
1: number of spam
|
|
|
|
2: number of ham
|
|
|
|
3: number of tokens in db
|
|
|
|
4: last expire atime
|
|
|
|
5: oldest token in db atime
|
|
|
|
6: db version value
|
|
|
|
7: last journal sync
|
|
|
|
8: last atime delta
|
|
|
|
9: last expire reduction count
|
|
|
|
10: newest token in db atime
|
|
|
|
=cut
|
|
|
|
sub get_storage_variables {
|
|
my ($self) = @_;
|
|
my @values;
|
|
|
|
return (0,0,0,0,0,0,0,0,0,0,0) unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "SELECT spam_count, ham_count, token_count, last_expire,
|
|
last_atime_delta, last_expire_reduce, oldest_token_age,
|
|
newest_token_age
|
|
FROM bayes_vars
|
|
WHERE id = ?";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: get_storage_variables: SQL error: ".$self->{_dbh}->errstr());
|
|
return (0,0,0,0,0,0,0,0,0,0,0);
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid});
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: get_storage_variables: SQL error: ".$self->{_dbh}->errstr());
|
|
return (0,0,0,0,0,0,0,0,0,0,0);
|
|
}
|
|
|
|
my ($spam_count, $ham_count, $token_count,
|
|
$last_expire, $last_atime_delta, $last_expire_reduce,
|
|
$oldest_token_age, $newest_token_age) = $sth->fetchrow_array();
|
|
|
|
$sth->finish();
|
|
|
|
my $db_ver = $self->DB_VERSION;
|
|
|
|
@values = (
|
|
0,
|
|
$spam_count,
|
|
$ham_count,
|
|
$token_count,
|
|
$last_expire,
|
|
$oldest_token_age,
|
|
$db_ver,
|
|
0, # we do not do journal syncs
|
|
$last_atime_delta,
|
|
$last_expire_reduce,
|
|
$newest_token_age
|
|
);
|
|
|
|
return @values;
|
|
}
|
|
|
|
=head2 dump_db_toks
|
|
|
|
public instance () dump_db_toks (String $template, String $regex, Array @vars)
|
|
|
|
Description:
|
|
This method loops over all tokens, computing the probability for the token and then
|
|
printing it out according to the passed in token.
|
|
|
|
=cut
|
|
|
|
sub dump_db_toks {
|
|
my ($self, $template, $regex, @vars) = @_;
|
|
|
|
return unless (defined($self->{_dbh}));
|
|
|
|
# 0/0 tokens don't count, but in theory we shouldn't have any
|
|
my $token_select = $self->_token_select_string();
|
|
|
|
my $sql = "SELECT $token_select, spam_count, ham_count, atime
|
|
FROM bayes_token
|
|
WHERE id = ?
|
|
AND (spam_count > 0 OR ham_count > 0)";
|
|
|
|
my $sth = $self->{_dbh}->prepare($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: dump_db_toks: SQL error: ".$self->{_dbh}->errstr());
|
|
return;
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid});
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: dump_db_toks: SQL error: ".$self->{_dbh}->errstr());
|
|
return;
|
|
}
|
|
|
|
while (my ($token, $spam_count, $ham_count, $atime) = $sth->fetchrow_array()) {
|
|
my $prob = $self->{bayes}->_compute_prob_for_token($token, $vars[1], $vars[2],
|
|
$spam_count, $ham_count);
|
|
$prob ||= 0.5;
|
|
|
|
my $encoded_token = unpack("H*", $token);
|
|
|
|
printf $template,$prob,$spam_count,$ham_count,$atime,$encoded_token
|
|
or die "Error writing tokens: $!";
|
|
}
|
|
|
|
$sth->finish();
|
|
|
|
return;
|
|
}
|
|
|
|
=head2 set_last_expire
|
|
|
|
public instance (Boolean) set_last_expire (Integer $time)
|
|
|
|
Description:
|
|
This method sets the last expire time.
|
|
|
|
=cut
|
|
|
|
sub set_last_expire {
|
|
my ($self, $time) = @_;
|
|
|
|
return 0 unless (defined($time));
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "UPDATE bayes_vars SET last_expire = ? WHERE id = ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql,
|
|
undef,
|
|
$time,
|
|
$self->{_userid});
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: set_last_expire: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 get_running_expire_tok
|
|
|
|
public instance (String $time) get_running_expire_tok ()
|
|
|
|
Description:
|
|
This method determines if an expire is currently running and returns
|
|
the last time set.
|
|
|
|
There can be multiple times, so we just pull the greatest (most recent)
|
|
value.
|
|
|
|
=cut
|
|
|
|
sub get_running_expire_tok {
|
|
my ($self) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "SELECT max(runtime) from bayes_expire WHERE id = ?";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: get_running_expire_tok: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid});
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: get_running_expire_tok: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my ($runtime) = $sth->fetchrow_array();
|
|
|
|
$sth->finish();
|
|
|
|
return $runtime;
|
|
}
|
|
|
|
=head2 set_running_expire_tok
|
|
|
|
public instance (String $time) set_running_expire_tok ()
|
|
|
|
Description:
|
|
This method sets the time that an expire starts running.
|
|
|
|
=cut
|
|
|
|
sub set_running_expire_tok {
|
|
my ($self) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "INSERT INTO bayes_expire (id,runtime) VALUES (?,?)";
|
|
|
|
my $time = time();
|
|
|
|
my $rows = $self->{_dbh}->do($sql,
|
|
undef,
|
|
$self->{_userid}, $time);
|
|
unless (defined($rows)) {
|
|
dbg("bayes: set_running_expire_tok: SQL error: ".$self->{_dbh}->errstr());
|
|
return;
|
|
}
|
|
|
|
return $time;
|
|
}
|
|
|
|
=head2 remove_running_expire_tok
|
|
|
|
public instance (Boolean) remove_running_expire_tok ()
|
|
|
|
Description:
|
|
This method removes the row in the database that indicates that
|
|
and expire is currently running.
|
|
|
|
=cut
|
|
|
|
sub remove_running_expire_tok {
|
|
my ($self) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "DELETE from bayes_expire
|
|
WHERE id = ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, $self->{_userid});
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: remove_running_expire_tok: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 tok_get
|
|
|
|
public instance (Integer, Integer, Integer) tok_get (String $token)
|
|
|
|
Description:
|
|
This method retrieves a specified token (C<$token>) from the database
|
|
and returns it's spam_count, ham_count and last access time.
|
|
|
|
=cut
|
|
|
|
sub tok_get {
|
|
my ($self, $token) = @_;
|
|
|
|
return (0,0,0) unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "SELECT spam_count, ham_count, atime
|
|
FROM bayes_token
|
|
WHERE id = ?
|
|
AND token = ?";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: tok_get: SQL error: ".$self->{_dbh}->errstr());
|
|
return (0,0,0);
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid}, $token);
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: tok_get: SQL error: ".$self->{_dbh}->errstr());
|
|
return (0,0,0);
|
|
}
|
|
|
|
my ($spam_count, $ham_count, $atime) = $sth->fetchrow_array();
|
|
|
|
$sth->finish();
|
|
|
|
$spam_count = 0 if (!$spam_count || $spam_count < 0);
|
|
$ham_count = 0 if (!$ham_count || $ham_count < 0);
|
|
$atime = 0 if (!$atime);
|
|
|
|
return ($spam_count, $ham_count, $atime)
|
|
}
|
|
|
|
=head2 tok_get_all
|
|
|
|
public instance (\@) tok_get (@ $tokens)
|
|
|
|
Description:
|
|
This method retrieves the specified tokens (C<$tokens>) from storage and returns
|
|
an array ref of arrays spam count, ham count and last access time.
|
|
|
|
=cut
|
|
|
|
sub tok_get_all {
|
|
my ($self, @tokens) = @_;
|
|
|
|
return [] unless (defined($self->{_dbh}));
|
|
|
|
my $token_list_size = scalar(@tokens);
|
|
dbg("bayes: tok_get_all: token count: $token_list_size");
|
|
my @tok_results;
|
|
|
|
my $search_index = 0;
|
|
my $results_index = 0;
|
|
my $bunch_end;
|
|
|
|
my $token_select = $self->_token_select_string();
|
|
|
|
my $multi_sql = "SELECT $token_select, spam_count, ham_count, atime
|
|
FROM bayes_token
|
|
WHERE id = ?
|
|
AND token IN ";
|
|
|
|
# fetch tokens in bunches of 100 until there are <= 100 left, then just fetch the rest
|
|
while ($token_list_size > $search_index) {
|
|
my $bunch_size;
|
|
if ($token_list_size - $search_index > 100) {
|
|
$bunch_size = 100;
|
|
}
|
|
else {
|
|
$bunch_size = $token_list_size - $search_index;
|
|
}
|
|
while ($token_list_size - $search_index >= $bunch_size) {
|
|
my @bindings;
|
|
my $bindcount;
|
|
my $in_str = '(';
|
|
|
|
$bunch_end = $search_index + $bunch_size;
|
|
for ( ; $search_index < $bunch_end; $search_index++) {
|
|
$in_str .= '?,';
|
|
push(@bindings, $tokens[$search_index]);
|
|
}
|
|
chop $in_str;
|
|
$in_str .= ')';
|
|
|
|
my $dynamic_sql = $multi_sql . $in_str;
|
|
|
|
my $sth = $self->{_dbh}->prepare($dynamic_sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: tok_get_all: SQL error: ".$self->{_dbh}->errstr());
|
|
return [];
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid}, @bindings);
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: tok_get_all: SQL error: ".$self->{_dbh}->errstr());
|
|
return [];
|
|
}
|
|
|
|
my $results = $sth->fetchall_arrayref();
|
|
|
|
$sth->finish();
|
|
|
|
foreach my $result (@{$results}) {
|
|
# Make sure that spam_count and ham_count are not negative
|
|
$result->[1] = 0 if (!$result->[1] || $result->[1] < 0);
|
|
$result->[2] = 0 if (!$result->[2] || $result->[2] < 0);
|
|
# Make sure that atime has a value
|
|
$result->[3] = 0 if (!$result->[3]);
|
|
$tok_results[$results_index++] = $result;
|
|
}
|
|
}
|
|
}
|
|
|
|
return \@tok_results;
|
|
}
|
|
|
|
=head2 tok_count_change
|
|
|
|
public instance (Boolean) tok_count_change (Integer $spam_count,
|
|
Integer $ham_count,
|
|
String $token,
|
|
String $atime)
|
|
|
|
Description:
|
|
This method takes a C<$spam_count> and C<$ham_count> and adds it to
|
|
C<$tok> along with updating C<$tok>s atime with C<$atime>.
|
|
|
|
=cut
|
|
|
|
sub tok_count_change {
|
|
my ($self, $spam_count, $ham_count, $token, $atime) = @_;
|
|
|
|
$atime = 0 unless defined $atime;
|
|
|
|
$self->_put_token($token, $spam_count, $ham_count, $atime);
|
|
}
|
|
|
|
=head2 multi_tok_count_change
|
|
|
|
public instance (Boolean) multi_tok_count_change (Integer $spam_count,
|
|
Integer $ham_count,
|
|
\% $tokens,
|
|
String $atime)
|
|
|
|
Description:
|
|
This method takes a C<$spam_count> and C<$ham_count> and adds it to all
|
|
of the tokens in the C<$tokens> hash ref along with updating each token's
|
|
atime with C<$atime>.
|
|
|
|
=cut
|
|
|
|
sub multi_tok_count_change {
|
|
my ($self, $spam_count, $ham_count, $tokens, $atime) = @_;
|
|
|
|
$atime = 0 unless defined $atime;
|
|
|
|
$self->_put_tokens($tokens, $spam_count, $ham_count, $atime);
|
|
}
|
|
|
|
=head2 nspam_nham_get
|
|
|
|
public instance ($spam_count, $ham_count) nspam_nham_get ()
|
|
|
|
Description:
|
|
This method retrieves the total number of spam and the total number of
|
|
ham learned.
|
|
|
|
=cut
|
|
|
|
sub nspam_nham_get {
|
|
my ($self) = @_;
|
|
|
|
return (0,0) unless (defined($self->{_dbh}));
|
|
|
|
my @vars = $self->get_storage_variables();
|
|
|
|
return ($vars[1] || 0, $vars[2] || 0);
|
|
}
|
|
|
|
=head2 nspam_nham_change
|
|
|
|
public instance (Boolean) nspam_nham_change (Integer $num_spam,
|
|
Integer $num_ham)
|
|
|
|
Description:
|
|
This method updates the number of spam and the number of ham in the database.
|
|
|
|
=cut
|
|
|
|
sub nspam_nham_change {
|
|
my ($self, $num_spam, $num_ham) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $sql;
|
|
my @bindings;
|
|
|
|
if ($num_spam != 0 && $num_ham != 0) {
|
|
$sql = "UPDATE bayes_vars
|
|
SET spam_count = spam_count + ?,
|
|
ham_count = ham_count + ?
|
|
WHERE id = ?";
|
|
@bindings = ($num_spam, $num_ham, $self->{_userid});
|
|
}
|
|
elsif ($num_spam != 0) {
|
|
$sql = "UPDATE bayes_vars
|
|
SET spam_count = spam_count + ?
|
|
WHERE id = ?";
|
|
@bindings = ($num_spam, $self->{_userid});
|
|
}
|
|
elsif ($num_ham != 0) {
|
|
$sql = "UPDATE bayes_vars
|
|
SET ham_count = ham_count + ?
|
|
WHERE id = ?";
|
|
@bindings = ($num_ham, $self->{_userid});
|
|
}
|
|
else {
|
|
# For some reason called with no delta, it's ok though so just return
|
|
dbg("bayes: nspam_nham_change: Called with no delta on spam or ham");
|
|
return 1;
|
|
}
|
|
|
|
my $rows = $self->{_dbh}->do($sql,
|
|
undef,
|
|
@bindings);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: nspam_nham_change: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 tok_touch
|
|
|
|
public instance (Boolean) tok_touch (String $token,
|
|
String $atime)
|
|
|
|
Description:
|
|
This method updates the given tokens (C<$token>) atime.
|
|
|
|
The assumption is that the token already exists in the database.
|
|
|
|
=cut
|
|
|
|
sub tok_touch {
|
|
my ($self, $token, $atime) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
# shortcut, will only update atime for the token if the atime is less than
|
|
# what we are updating to
|
|
my $sql = "UPDATE bayes_token
|
|
SET atime = ?
|
|
WHERE id = ?
|
|
AND token = ?
|
|
AND atime < ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid},
|
|
$token, $atime);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: tok_touch: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
# if we didn't update a row then no need to update newest_token_age
|
|
return 1 if ($rows eq '0E0');
|
|
|
|
# need to check newest_token_age
|
|
# no need to check oldest_token_age since we would only update if the
|
|
# atime was newer than what is in the database
|
|
$sql = "UPDATE bayes_vars
|
|
SET newest_token_age = ?
|
|
WHERE id = ?
|
|
AND newest_token_age < ?";
|
|
|
|
$rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: tok_touch: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 tok_touch_all
|
|
|
|
public instance (Boolean) tok_touch (\@ $tokens
|
|
String $atime)
|
|
|
|
Description:
|
|
This method does a mass update of the given list of tokens C<$tokens>, if the existing token
|
|
atime is < C<$atime>.
|
|
|
|
The assumption is that the tokens already exist in the database.
|
|
|
|
We should never be touching more than N_SIGNIFICANT_TOKENS, so we can make
|
|
some assumptions about how to handle the data (ie no need to batch like we
|
|
do in tok_get_all)
|
|
|
|
=cut
|
|
|
|
sub tok_touch_all {
|
|
my ($self, $tokens, $atime) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
return 1 unless (scalar(@{$tokens}));
|
|
|
|
my $sql = "UPDATE bayes_token SET atime = ? WHERE id = ? AND token IN (";
|
|
|
|
my @bindings = ($atime, $self->{_userid});
|
|
foreach my $token (@{$tokens}) {
|
|
$sql .= "?,";
|
|
push(@bindings, $token);
|
|
}
|
|
chop($sql); # get rid of trailing ,
|
|
|
|
$sql .= ") AND atime < ?";
|
|
push(@bindings, $atime);
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, @bindings);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: tok_touch_all: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
# if we didn't update a row then no need to update newest_token_age
|
|
return 1 if ($rows eq '0E0');
|
|
|
|
# need to check newest_token_age
|
|
# no need to check oldest_token_age since we would only update if the
|
|
# atime was newer than what is in the database
|
|
$sql = "UPDATE bayes_vars
|
|
SET newest_token_age = ?
|
|
WHERE id = ?
|
|
AND newest_token_age < ?";
|
|
|
|
$rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: tok_touch_all: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 cleanup
|
|
|
|
public instance (Boolean) cleanup ()
|
|
|
|
Description:
|
|
This method performs any cleanup necessary before moving onto the next
|
|
operation.
|
|
|
|
=cut
|
|
|
|
sub cleanup {
|
|
my ($self) = @_;
|
|
|
|
|
|
return 1 unless ($self->{needs_cleanup});
|
|
|
|
# cleanup was needed, go ahead and clear the cleanup flag
|
|
$self->{needs_cleanup} = 0;
|
|
|
|
my $sql = "DELETE from bayes_token
|
|
WHERE id = ?
|
|
AND spam_count = 0
|
|
AND ham_count = 0";
|
|
|
|
my $toks_deleted = $self->{_dbh}->do($sql, undef, $self->{_userid});
|
|
|
|
unless (defined($toks_deleted)) {
|
|
dbg("bayes: cleanup: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
# check to see if any tokens where deleted
|
|
return 1 if ($toks_deleted eq '0E0');
|
|
|
|
$sql = "UPDATE bayes_vars SET token_count = token_count - $toks_deleted
|
|
WHERE id = ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, $self->{_userid});
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: cleanup: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 get_magic_re
|
|
|
|
public instance get_magic_re (String)
|
|
|
|
Description:
|
|
This method returns a regexp which indicates a magic token.
|
|
|
|
Unused in SQL implementation.
|
|
|
|
=cut
|
|
|
|
sub get_magic_re {
|
|
my ($self) = @_;
|
|
undef;
|
|
}
|
|
|
|
=head2 sync
|
|
|
|
public instance (Boolean) sync (\% $opts)
|
|
|
|
Description:
|
|
This method performs a sync of the database
|
|
|
|
=cut
|
|
|
|
sub sync {
|
|
my ($self, $opts) = @_;
|
|
|
|
# Not used for this implementation
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 perform_upgrade
|
|
|
|
public instance (Boolean) perform_upgrade (\% $opts);
|
|
|
|
Description:
|
|
Performs an upgrade of the database from one version to another, not
|
|
currently used in this implementation.
|
|
|
|
=cut
|
|
|
|
sub perform_upgrade {
|
|
my ($self) = @_;
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 clear_database
|
|
|
|
public instance (Boolean) clear_database ()
|
|
|
|
Description:
|
|
This method deletes all records for a particular user.
|
|
|
|
Callers should be aware that any errors returned by this method
|
|
could cause the database to be inconsistent for the given user.
|
|
|
|
=cut
|
|
|
|
sub clear_database {
|
|
my ($self) = @_;
|
|
|
|
# We want to open readonly first, because if they don't already have
|
|
# a db entry, we want to avoid creating one, just to delete it in a few secs
|
|
if ($self->tie_db_readonly()) {
|
|
# Ok, they must have had a db entry, so now make the connection writable
|
|
$self->tie_db_writable();
|
|
}
|
|
else {
|
|
# If we were unable to create a readonly connection then they must
|
|
# not have a db entry, so no need to clear.
|
|
# But it should be considered a success.
|
|
return 1;
|
|
}
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $rows = $self->{_dbh}->do("DELETE FROM bayes_vars WHERE id = ?",
|
|
undef,
|
|
$self->{_userid});
|
|
unless (defined($rows)) {
|
|
dbg("bayes: SQL error removing user (bayes_vars) data: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
$rows = $self->{_dbh}->do("DELETE FROM bayes_seen WHERE id = ?",
|
|
undef,
|
|
$self->{_userid});
|
|
unless (defined($rows)) {
|
|
dbg("bayes: SQL error removing seen data: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
$rows = $self->{_dbh}->do("DELETE FROM bayes_token WHERE id = ?",
|
|
undef,
|
|
$self->{_userid});
|
|
unless (defined($rows)) {
|
|
dbg("bayes: SQL error removing token data: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 backup_database
|
|
|
|
public instance (Boolean) backup_database ()
|
|
|
|
Description:
|
|
This method will dump the users database in a machine readable format.
|
|
|
|
=cut
|
|
|
|
sub backup_database {
|
|
my ($self) = @_;
|
|
|
|
return 0 unless ($self->tie_db_readonly());
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my @vars = $self->get_storage_variables();
|
|
|
|
my $num_spam = $vars[1] || 0;
|
|
my $num_ham = $vars[2] || 0;
|
|
|
|
print "v\t$vars[6]\tdb_version # this must be the first line!!!\n"
|
|
or die "Error writing: $!";
|
|
print "v\t$num_spam\tnum_spam\n" or die "Error writing: $!";
|
|
print "v\t$num_ham\tnum_nonspam\n" or die "Error writing: $!";
|
|
|
|
my $token_select = $self->_token_select_string();
|
|
|
|
my $token_sql = "SELECT spam_count, ham_count, atime, $token_select
|
|
FROM bayes_token
|
|
WHERE id = ?
|
|
AND (spam_count > 0 OR ham_count > 0)";
|
|
|
|
my $seen_sql = "SELECT flag, msgid
|
|
FROM bayes_seen
|
|
WHERE id = ?";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($token_sql);
|
|
|
|
unless (defined ($sth)) {
|
|
dbg("bayes: backup_database: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid});
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: backup_database: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
while (my @values = $sth->fetchrow_array()) {
|
|
$values[3] = unpack("H*", $values[3]);
|
|
print "t\t" . join("\t", @values) . "\n"
|
|
or die "Error writing: $!";
|
|
}
|
|
|
|
$sth->finish();
|
|
|
|
$sth = $self->{_dbh}->prepare_cached($seen_sql);
|
|
|
|
unless (defined ($sth)) {
|
|
dbg("bayes: backup_database: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
$rc = $sth->execute($self->{_userid});
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: backup_database: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
while (my @values = $sth->fetchrow_array()) {
|
|
print "s\t" . join("\t",@values) . "\n" or die "Error writing: $!";
|
|
}
|
|
|
|
$sth->finish();
|
|
|
|
$self->untie_db();
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 restore_database
|
|
|
|
public instance (Boolean) restore_database (String $filename, Boolean $showdots)
|
|
|
|
Description:
|
|
This method restores a database from the given filename, C<$filename>.
|
|
|
|
Callers should be aware that any errors returned by this method
|
|
could causes the database to be inconsistent for the given user.
|
|
|
|
=cut
|
|
|
|
sub restore_database {
|
|
my ($self, $filename, $showdots) = @_;
|
|
|
|
local *DUMPFILE;
|
|
if (!open(DUMPFILE, '<', $filename)) {
|
|
dbg("bayes: unable to open backup file $filename: $!");
|
|
return 0;
|
|
}
|
|
|
|
# This is the critical phase (moving sql around), so don't allow it
|
|
# to be interrupted.
|
|
local $SIG{'INT'} = 'IGNORE';
|
|
local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
|
|
local $SIG{'TERM'} = 'IGNORE';
|
|
|
|
unless ($self->clear_database()) {
|
|
return 0;
|
|
}
|
|
|
|
# we need to go ahead close the db connection so we can then open it up
|
|
# in a fresh state after clearing
|
|
$self->untie_db();
|
|
|
|
unless ($self->tie_db_writable()) {
|
|
return 0;
|
|
}
|
|
|
|
my $token_count = 0;
|
|
my $db_version;
|
|
my $num_spam;
|
|
my $num_ham;
|
|
my $error_p = 0;
|
|
my $line_count = 0;
|
|
|
|
my $line = <DUMPFILE>;
|
|
defined $line or die "Error reading dump file: $!";
|
|
$line_count++;
|
|
# We require the database version line to be the first in the file so we can
|
|
# figure out how to properly deal with the file. If it is not the first
|
|
# line then fail
|
|
if ($line =~ m/^v\s+(\d+)\s+db_version/) {
|
|
$db_version = $1;
|
|
}
|
|
else {
|
|
dbg("bayes: database version must be the first line in the backup file, correct and re-run");
|
|
return 0;
|
|
}
|
|
|
|
unless ($db_version == 2 || $db_version == 3) {
|
|
warn("bayes: database version $db_version is unsupported, must be version 2 or 3");
|
|
return 0;
|
|
}
|
|
|
|
my $token_error_count = 0;
|
|
my $seen_error_count = 0;
|
|
|
|
for ($!=0; defined($line=<DUMPFILE>); $!=0) {
|
|
chomp($line);
|
|
$line_count++;
|
|
|
|
if ($line_count % 1000 == 0) {
|
|
print STDERR "." if ($showdots);
|
|
}
|
|
|
|
if ($line =~ /^v\s+/) { # variable line
|
|
my @parsed_line = split(/\s+/, $line, 3);
|
|
my $value = $parsed_line[1] + 0;
|
|
if ($parsed_line[2] eq 'num_spam') {
|
|
$num_spam = $value;
|
|
}
|
|
elsif ($parsed_line[2] eq 'num_nonspam') {
|
|
$num_ham = $value;
|
|
}
|
|
else {
|
|
dbg("bayes: restore_database: skipping unknown line: $line");
|
|
}
|
|
}
|
|
elsif ($line =~ /^t\s+/) { # token line
|
|
my @parsed_line = split(/\s+/, $line, 5);
|
|
my $spam_count = $parsed_line[1] + 0;
|
|
my $ham_count = $parsed_line[2] + 0;
|
|
my $atime = $parsed_line[3] + 0;
|
|
my $token = $parsed_line[4];
|
|
|
|
my $token_warn_p = 0;
|
|
my @warnings;
|
|
|
|
if ($spam_count < 0) {
|
|
$spam_count = 0;
|
|
push(@warnings, 'spam count < 0, resetting');
|
|
$token_warn_p = 1;
|
|
}
|
|
if ($ham_count < 0) {
|
|
$ham_count = 0;
|
|
push(@warnings, 'ham count < 0, resetting');
|
|
$token_warn_p = 1;
|
|
}
|
|
|
|
if ($spam_count == 0 && $ham_count == 0) {
|
|
dbg("bayes: token has zero spam and ham count, skipping");
|
|
next;
|
|
}
|
|
|
|
if ($atime > time()) {
|
|
$atime = time();
|
|
push(@warnings, 'atime > current time, resetting');
|
|
$token_warn_p = 1;
|
|
}
|
|
|
|
if ($token_warn_p) {
|
|
dbg("bayes: token ($token) has the following warnings:\n".join("\n",@warnings));
|
|
}
|
|
|
|
if ($db_version < 3) {
|
|
# versions < 3 use plain text tokens, so we need to convert to hash
|
|
$token = substr(sha1($token), -5);
|
|
}
|
|
else {
|
|
# turn unpacked binary token back into binary value
|
|
$token = pack("H*",$token);
|
|
}
|
|
|
|
unless ($self->_put_token($token, $spam_count, $ham_count, $atime)) {
|
|
dbg("bayes: error inserting token for line: $line");
|
|
$token_error_count++;
|
|
}
|
|
$token_count++;
|
|
}
|
|
elsif ($line =~ /^s\s+/) { # seen line
|
|
my @parsed_line = split(/\s+/, $line, 3);
|
|
my $flag = $parsed_line[1];
|
|
my $msgid = $parsed_line[2];
|
|
|
|
unless ($flag eq 'h' || $flag eq 's') {
|
|
dbg("bayes: unknown seen flag ($flag) for line: $line, skipping");
|
|
next;
|
|
}
|
|
|
|
unless ($msgid) {
|
|
dbg("bayes: blank msgid for line: $line, skipping");
|
|
next;
|
|
}
|
|
|
|
unless ($self->seen_put($msgid, $flag)) {
|
|
dbg("bayes: error inserting msgid in seen table for line: $line");
|
|
$seen_error_count++;
|
|
}
|
|
}
|
|
else {
|
|
dbg("bayes: skipping unknown line: $line");
|
|
next;
|
|
}
|
|
|
|
if ($token_error_count >= 20) {
|
|
warn "bayes: encountered too many errors (20) while parsing token line, reverting to empty database and exiting\n";
|
|
$self->clear_database();
|
|
return 0;
|
|
}
|
|
|
|
if ($seen_error_count >= 20) {
|
|
warn "bayes: encountered too many errors (20) while parsing seen lines, reverting to empty database and exiting\n";
|
|
$self->clear_database();
|
|
return 0;
|
|
}
|
|
}
|
|
defined $line || $!==0 or
|
|
$!==EBADF ? dbg("bayes: error reading dump file: $!")
|
|
: die "error reading dump file: $!";
|
|
close(DUMPFILE) or die "Can't close dump file: $!";
|
|
|
|
print STDERR "\n" if ($showdots);
|
|
|
|
unless (defined($num_spam)) {
|
|
dbg("bayes: unable to find num spam, please check file");
|
|
$error_p = 1;
|
|
}
|
|
|
|
unless (defined($num_ham)) {
|
|
dbg("bayes: unable to find num ham, please check file");
|
|
$error_p = 1;
|
|
}
|
|
|
|
if ($error_p) {
|
|
dbg("bayes: error(s) while attempting to load $filename, clearing database, correct and re-run");
|
|
$self->clear_database();
|
|
return 0;
|
|
}
|
|
|
|
if ($num_spam || $num_ham) {
|
|
unless ($self->nspam_nham_change($num_spam, $num_ham)) {
|
|
dbg("bayes: error updating num spam and num ham, clearing database");
|
|
$self->clear_database();
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
dbg("bayes: parsed $line_count lines");
|
|
dbg("bayes: created database with $token_count tokens based on $num_spam spam messages and $num_ham ham messages");
|
|
|
|
$self->untie_db();
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 db_readable
|
|
|
|
public instance (Boolean) db_readable()
|
|
|
|
Description:
|
|
This method returns a boolean value indicating if the database is in a
|
|
readable state.
|
|
|
|
=cut
|
|
|
|
sub db_readable {
|
|
my ($self) = @_;
|
|
|
|
# if there's a database handle, we can read...
|
|
return defined $self->{_dbh};
|
|
}
|
|
|
|
=head2 db_writable
|
|
|
|
public instance (Boolean) db_writeable()
|
|
|
|
Description:
|
|
This method returns a boolean value indicating if the database is in a
|
|
writable state.
|
|
|
|
=cut
|
|
|
|
sub db_writable {
|
|
my ($self) = @_;
|
|
|
|
return (defined $self->{_dbh} && $self->{db_writable_p})
|
|
}
|
|
|
|
=head1 Private Methods
|
|
|
|
=head2 _connect_db
|
|
|
|
private instance (Boolean) _connect_db ()
|
|
|
|
Description:
|
|
This method connects to the SQL database.
|
|
|
|
=cut
|
|
|
|
sub _connect_db {
|
|
my ($self) = @_;
|
|
|
|
$self->{_dbh} = undef;
|
|
|
|
# Turn off PrintError and explicitly set AutoCommit to off
|
|
my $dbh = DBI->connect($self->{_dsn}, $self->{_dbuser}, $self->{_dbpass},
|
|
{'PrintError' => 0, 'AutoCommit' => 1});
|
|
|
|
if (!$dbh) {
|
|
dbg("bayes: unable to connect to database: ".DBI->errstr());
|
|
return 0;
|
|
}
|
|
else {
|
|
dbg("bayes: database connection established");
|
|
}
|
|
|
|
# SQLite PRAGMA attributes - here for tests, see bug 8033
|
|
if ($self->{_dsn} =~ /^dbi:SQLite:.*?;(.+)/i) {
|
|
foreach my $attr (split(/;/, $1)) {
|
|
$dbh->do("PRAGMA $attr");
|
|
}
|
|
}
|
|
|
|
$self->{_dbh} = $dbh;
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 _get_db_version
|
|
|
|
private instance (Integer) _get_db_version ()
|
|
|
|
Description:
|
|
Gets the current version of the database from the special global vars
|
|
tables.
|
|
|
|
=cut
|
|
|
|
sub _get_db_version {
|
|
my ($self) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
return ($self->{_db_version_cache}) if (defined($self->{_db_version_cache}));
|
|
|
|
my $sql = "SELECT value FROM bayes_global_vars WHERE variable = 'VERSION'";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: _get_db_version: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my $rc = $sth->execute();
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: _get_db_version: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my ($version) = $sth->fetchrow_array();
|
|
|
|
$sth->finish();
|
|
|
|
$self->{_db_version_cache} = $version;
|
|
|
|
return $version;
|
|
}
|
|
|
|
=head2 _initialize_db
|
|
|
|
private instance (Boolean) _initialize_db ()
|
|
|
|
Description:
|
|
This method will check to see if a user has had their bayes variables
|
|
initialized. If not then it will perform this initialization.
|
|
|
|
=cut
|
|
|
|
sub _initialize_db {
|
|
my ($self, $create_entry_p) = @_;
|
|
|
|
return 0 if !defined $self->{_dbh};
|
|
return 0 if !defined $self->{_username} || $self->{_username} eq '';
|
|
|
|
# Check to see if we should call the services_authorized_for_username plugin
|
|
# hook to see if this user is allowed/able to use bayes. If not, do nothing
|
|
# and return 0.
|
|
if ($self->{bayes}->{conf}->{bayes_sql_username_authorized}) {
|
|
my $services = { 'bayessql' => 0 };
|
|
$self->{bayes}->{main}->call_plugins("services_allowed_for_username",
|
|
{ services => $services,
|
|
username => $self->{_username},
|
|
conf => $self->{bayes}->{conf},
|
|
});
|
|
|
|
unless ($services->{bayessql}) {
|
|
dbg("bayes: username not allowed by services_allowed_for_username plugin call");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
my $sqlselect = "SELECT id FROM bayes_vars WHERE username = ?";
|
|
|
|
my $sthselect = $self->{_dbh}->prepare_cached($sqlselect);
|
|
|
|
unless (defined($sthselect)) {
|
|
dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my $rc = $sthselect->execute($self->{_username});
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my ($id) = $sthselect->fetchrow_array();
|
|
|
|
if ($id) {
|
|
$self->{_userid} = $id;
|
|
dbg("bayes: Using userid: ".$self->{_userid});
|
|
$sthselect->finish();
|
|
return 1;
|
|
}
|
|
|
|
# Do not create an entry for this user unless we were specifically asked to
|
|
return 0 unless ($create_entry_p);
|
|
|
|
# For now let the database setup the other variables as defaults
|
|
my $sqlinsert = "INSERT INTO bayes_vars (username) VALUES (?)";
|
|
|
|
my $rows = $self->{_dbh}->do($sqlinsert,
|
|
undef,
|
|
$self->{_username});
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
# Now we need to figure out what id we inserted them at, in a perfect
|
|
# world the database driver would handle this for us (ie mysql_insert_id)
|
|
# but this is far from a perfect world, however since in theory we only
|
|
# ever do this once it's ok to take the hit
|
|
$rc = $sthselect->execute($self->{_username});
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
($id) = $sthselect->fetchrow_array();
|
|
|
|
$sthselect->finish();
|
|
|
|
if ($id) {
|
|
$self->{_userid} = $id;
|
|
dbg("bayes: using userid: ".$self->{_userid});
|
|
return 1;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 _put_token
|
|
|
|
private instance (Boolean) _put_token (string $token,
|
|
integer $spam_count,
|
|
integer $ham_count,
|
|
string $atime)
|
|
|
|
Description:
|
|
This method performs the work of either inserting or updating a token in
|
|
the database.
|
|
|
|
=cut
|
|
|
|
sub _put_token {
|
|
my ($self, $token, $spam_count, $ham_count, $atime) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
$spam_count ||= 0;
|
|
$ham_count ||= 0;
|
|
|
|
if ($spam_count == 0 && $ham_count == 0) {
|
|
return 1;
|
|
}
|
|
|
|
my ($existing_spam_count,
|
|
$existing_ham_count,
|
|
$existing_atime) = $self->tok_get($token);
|
|
|
|
if (!$existing_atime) {
|
|
|
|
# You can't create a new entry for a token with a negative count, so just return
|
|
# if we are unable to find an entry.
|
|
return 1 if ($spam_count < 0 || $ham_count < 0);
|
|
|
|
my $sql = "INSERT INTO bayes_token
|
|
(id, token, spam_count, ham_count, atime)
|
|
VALUES (?,?,?,?,?)";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid},
|
|
$token,
|
|
$spam_count,
|
|
$ham_count,
|
|
$atime);
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
$sth->finish();
|
|
|
|
$sql = "UPDATE bayes_vars SET token_count = token_count + 1
|
|
WHERE id = ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, $self->{_userid});
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
$sql = "UPDATE bayes_vars SET newest_token_age = ?
|
|
WHERE id = ? AND newest_token_age < ?";
|
|
|
|
$rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
if ($rows eq '0E0') {
|
|
# no need to update oldest_token_age if we updated newest_token_age
|
|
|
|
$sql = "UPDATE bayes_vars SET oldest_token_age = ?
|
|
WHERE id = ? AND oldest_token_age > ?";
|
|
|
|
$rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
|
|
if ($spam_count < 0 || $ham_count < 0) {
|
|
# we only need to cleanup when we subtract counts for a token and the
|
|
# counts may have both reached 0
|
|
# XXX - future optimization, since we have the existing spam/ham counts
|
|
# we can make an educated guess on if the count would reach 0, for
|
|
# instance, if we are decreasing spam_count but spam_count is currently
|
|
# > 1000, then there is no possible way this update or any others that
|
|
# might currently be happening could reduce that value to 0, so there
|
|
# would be no need to set the needs_cleanup flag
|
|
$self->{needs_cleanup} = 1;
|
|
}
|
|
|
|
my $update_atime_p = 1;
|
|
my $updated_atime_p = 0;
|
|
|
|
# if the existing atime is already >= the one we are going to set, then
|
|
# don't bother
|
|
$update_atime_p = 0 if ($existing_atime >= $atime);
|
|
|
|
# These SQL statements include as part of the WHERE clause something like
|
|
# "AND spam_count + ? >= 0" or "AND ham_count + ? >= 0". This is to keep
|
|
# the count from going negative.
|
|
|
|
if ($spam_count) {
|
|
my $sql;
|
|
my @args;
|
|
if ($update_atime_p) {
|
|
$sql = "UPDATE bayes_token
|
|
SET spam_count = spam_count + ?,
|
|
atime = ?
|
|
WHERE id = ?
|
|
AND token = ?
|
|
AND spam_count + ? >= 0";
|
|
@args = ($spam_count, $atime, $self->{_userid}, $token, $spam_count);
|
|
$updated_atime_p = 1; # note the fact that we did do it
|
|
}
|
|
else {
|
|
$sql = "UPDATE bayes_token
|
|
SET spam_count = spam_count + ?
|
|
WHERE id = ?
|
|
AND token = ?
|
|
AND spam_count + ? >= 0";
|
|
@args = ($spam_count, $self->{_userid}, $token, $spam_count);
|
|
}
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, @args);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if ($ham_count) {
|
|
my $sql;
|
|
my @args;
|
|
if ($update_atime_p && !$updated_atime_p) {
|
|
$sql = "UPDATE bayes_token
|
|
SET ham_count = ham_count + ?,
|
|
atime = ?
|
|
WHERE id = ?
|
|
AND token = ?
|
|
AND ham_count + ? >= 0";
|
|
@args = ($ham_count, $atime, $self->{_userid}, $token, $ham_count);
|
|
$updated_atime_p = 1; # note the fact that we did do it
|
|
}
|
|
else {
|
|
$sql = "UPDATE bayes_token
|
|
SET ham_count = ham_count + ?
|
|
WHERE id = ?
|
|
AND token = ?
|
|
AND ham_count + ? >= 0";
|
|
@args = ($ham_count, $self->{_userid}, $token, $ham_count);
|
|
}
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, @args);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if ($updated_atime_p) {
|
|
# we updated the atime, so we need to check and update bayes_vars
|
|
# we only need to worry about newest_token_age since we would have
|
|
# only updated the atime if it was > the previous value
|
|
my $sql = "UPDATE bayes_vars SET newest_token_age = ?
|
|
WHERE id = ? AND newest_token_age < ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 _put_tokens
|
|
|
|
private instance (Boolean) _put_tokens (\% $tokens,
|
|
integer $spam_count,
|
|
integer $ham_count,
|
|
string $atime)
|
|
|
|
Description:
|
|
This method performs the work of either inserting or updating tokens in
|
|
the database.
|
|
|
|
=cut
|
|
|
|
sub _put_tokens {
|
|
my ($self, $tokens, $spam_count, $ham_count, $atime) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
$spam_count ||= 0;
|
|
$ham_count ||= 0;
|
|
|
|
if ($spam_count == 0 && $ham_count == 0) {
|
|
return 1;
|
|
}
|
|
|
|
my $atime_updated_p = 0;
|
|
my $atime_inserted_p = 0;
|
|
my $new_tokens = 0;
|
|
|
|
my $insertsql = "INSERT INTO bayes_token
|
|
(id, token, spam_count, ham_count, atime)
|
|
VALUES (?,?,?,?,?)";
|
|
|
|
my $insertsth = $self->{_dbh}->prepare_cached($insertsql);
|
|
|
|
unless (defined($insertsth)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
foreach my $token (keys %{$tokens}) {
|
|
my ($existing_spam_count,
|
|
$existing_ham_count,
|
|
$existing_atime) = $self->tok_get($token);
|
|
|
|
if (!$existing_atime) {
|
|
|
|
# You can't create a new entry for a token with a negative count, so
|
|
# just skip to the next one if we are unable to find an entry.
|
|
next if ($spam_count < 0 || $ham_count < 0);
|
|
|
|
|
|
my $rc = $insertsth->execute($self->{_userid},
|
|
$token,
|
|
$spam_count,
|
|
$ham_count,
|
|
$atime);
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
next;
|
|
}
|
|
|
|
$insertsth->finish();
|
|
|
|
$atime_inserted_p = 1;
|
|
$new_tokens++;
|
|
}
|
|
else {
|
|
|
|
if ($spam_count < 0 || $ham_count < 0) {
|
|
# we only need to cleanup when we subtract counts for a token and the
|
|
# counts may have both reached 0
|
|
# XXX - future optimization, since we have the existing spam/ham counts
|
|
# we can make an educated guess on if the count would reach 0, for
|
|
# instance, if we are decreasing spam_count but spam_count is currently
|
|
# > 1000, then there is no possible way this update or any others that
|
|
# might currently be happening could reduce that value to 0, so there
|
|
# would be no need to set the needs_cleanup flag
|
|
$self->{needs_cleanup} = 1;
|
|
}
|
|
|
|
my $update_atime_p = 1;
|
|
|
|
# if the existing atime is already >= the one we are going to set, then
|
|
# don't bother
|
|
$update_atime_p = 0 if ($existing_atime >= $atime);
|
|
|
|
# These SQL statements include as part of the WHERE clause something like
|
|
# "AND spam_count + ? >= 0" or "AND ham_count + ? >= 0". This is to keep
|
|
# the count from going negative.
|
|
|
|
if ($spam_count) {
|
|
my $sql;
|
|
my @args;
|
|
if ($update_atime_p) {
|
|
$sql = "UPDATE bayes_token
|
|
SET spam_count = spam_count + ?,
|
|
atime = ?
|
|
WHERE id = ?
|
|
AND token = ?
|
|
AND spam_count + ? >= 0";
|
|
@args = ($spam_count, $atime, $self->{_userid}, $token, $spam_count);
|
|
$atime_updated_p = 1;
|
|
}
|
|
else {
|
|
$sql = "UPDATE bayes_token
|
|
SET spam_count = spam_count + ?
|
|
WHERE id = ?
|
|
AND token = ?
|
|
AND spam_count + ? >= 0";
|
|
@args = ($spam_count, $self->{_userid}, $token, $spam_count);
|
|
}
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, @args);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
}
|
|
}
|
|
|
|
if ($ham_count) {
|
|
my $sql;
|
|
my @args;
|
|
# if $spam_count then we already updated the atime
|
|
if ($update_atime_p && !$spam_count) {
|
|
$sql = "UPDATE bayes_token
|
|
SET ham_count = ham_count + ?,
|
|
atime = ?
|
|
WHERE id = ?
|
|
AND token = ?
|
|
AND ham_count + ? >= 0";
|
|
@args = ($ham_count, $atime, $self->{_userid}, $token, $ham_count);
|
|
$atime_updated_p = 1;
|
|
}
|
|
else {
|
|
$sql = "UPDATE bayes_token
|
|
SET ham_count = ham_count + ?
|
|
WHERE id = ?
|
|
AND token = ?
|
|
AND ham_count + ? >= 0";
|
|
@args = ($ham_count, $self->{_userid}, $token, $ham_count);
|
|
}
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, @args);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($new_tokens) {
|
|
my $sql = "UPDATE bayes_vars SET token_count = token_count + ?
|
|
WHERE id = ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, $new_tokens, $self->{_userid});
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
}
|
|
}
|
|
|
|
if ($atime_updated_p || $atime_inserted_p) {
|
|
# we updated the atime, so we need to check and update bayes_vars
|
|
# we only need to worry about newest_token_age since we would have
|
|
# only updated the atime if it was > the previous value
|
|
my $sql = "UPDATE bayes_vars SET newest_token_age = ?
|
|
WHERE id = ? AND newest_token_age < ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
}
|
|
}
|
|
|
|
|
|
# If we inserted then we might need to update oldest_token_age
|
|
# but if we already updated newest_token_age then there is no need
|
|
|
|
if ($atime_inserted_p) {
|
|
my $sql = "UPDATE bayes_vars SET oldest_token_age = ?
|
|
WHERE id = ? AND oldest_token_age > ?";
|
|
|
|
my $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
|
|
|
|
unless (defined($rows)) {
|
|
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 _get_oldest_token_age
|
|
|
|
private instance (Integer) _get_oldest_token_age ()
|
|
|
|
Description:
|
|
This method finds the atime of the oldest token in the database.
|
|
|
|
The use of min(atime) in the SQL is ugly and but really the most efficient
|
|
way of getting the oldest_token_age after we've done a mass expire. It should
|
|
only be called at expire time.
|
|
|
|
=cut
|
|
|
|
sub _get_oldest_token_age {
|
|
my ($self) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "SELECT min(atime) FROM bayes_token
|
|
WHERE id = ?";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: _get_oldest_token_age: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid});
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: _get_oldest_token_age: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my ($atime) = $sth->fetchrow_array();
|
|
|
|
$sth->finish();
|
|
|
|
return $atime;
|
|
}
|
|
|
|
|
|
=head2 _get_num_hapaxes
|
|
|
|
private instance (Integer) _get_num_hapaxes ()
|
|
|
|
Description:
|
|
This method gets the total number of hapaxes (spam_count + ham_count == 1) in
|
|
the token database for a user.
|
|
|
|
=cut
|
|
|
|
sub _get_num_hapaxes {
|
|
my ($self) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "SELECT count(*)
|
|
FROM bayes_token
|
|
WHERE id = ?
|
|
AND spam_count + ham_count = 1";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: _get_num_hapaxes: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid});
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: _get_num_hapaxes: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
|
|
my ($num_hapaxes) = $sth->fetchrow_array();
|
|
|
|
$sth->finish();
|
|
|
|
return $num_hapaxes;
|
|
}
|
|
|
|
=head2 _get_num_lowfreq
|
|
|
|
private instance (Integer) _get_num_lowfreq ()
|
|
|
|
Description:
|
|
This method gets the total number of lowfreq tokens (spam_count < 8 and
|
|
ham_count < 8) in the token database for a user
|
|
|
|
=cut
|
|
|
|
sub _get_num_lowfreq {
|
|
my ($self) = @_;
|
|
|
|
return 0 unless (defined($self->{_dbh}));
|
|
|
|
my $sql = "SELECT count(*)
|
|
FROM bayes_token
|
|
WHERE id = ?
|
|
AND (spam_count >= 0 AND spam_count < 8)
|
|
AND (ham_count >= 0 AND ham_count < 8)
|
|
AND spam_count + ham_count != 1";
|
|
|
|
my $sth = $self->{_dbh}->prepare_cached($sql);
|
|
|
|
unless (defined($sth)) {
|
|
dbg("bayes: _get_num_lowfreq: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my $rc = $sth->execute($self->{_userid});
|
|
|
|
unless ($rc) {
|
|
dbg("bayes: _get_num_lowfreq: SQL error: ".$self->{_dbh}->errstr());
|
|
return 0;
|
|
}
|
|
|
|
my ($num_lowfreq) = $sth->fetchrow_array();
|
|
|
|
$sth->finish();
|
|
|
|
return $num_lowfreq;
|
|
}
|
|
|
|
=head2 _token_select_string
|
|
|
|
private instance (String) _token_select_string
|
|
|
|
Description:
|
|
This method returns the string to be used in SELECT statements to represent
|
|
the token column.
|
|
|
|
The default is to use the SUBSTR function to pad the token out to 5 characters.
|
|
|
|
=cut
|
|
|
|
sub _token_select_string {
|
|
# Use SQLite compatible RPAD alternative
|
|
return "SUBSTR(token || ' ', 1, 5)";
|
|
}
|
|
|
|
sub sa_die { Mail::SpamAssassin::sa_die(@_); }
|
|
|
|
1;
|