Tools: make file-locking aware of external exception sources

Previously an external exception (eg. caused by a SIGARLM in a code which is already inside a run_with_timeout() call) could happen in various places where we did not properly this situation. For instance after calling $lock_func() but before reaching the cleanup code. In this case a lock was leaked. Additionally the code was broken in that it used perl's automatic hash creation side effect ($a->{x}->{y} implicitly initializing $a->{x} with an empty hash when it did not exist). The effect was that if our own time out was triggered after the initial check for an existing file handle inside $lock_func() happened (extremely rare since perl would have to be running insanely slow), the cleanup did: if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) { This recreated $lock_handles->{$$}->{$filename} as an empty hash. A subsequent call to lock_file_full() will think a file descriptor already exists because the check simply used: if (!$lock_handles->{$$}->{$filename}) { While this could have been a one-line fix for this one particular case, we'd still not be taking external timeouts into account causing the first issue described above.
2025-07-26 22:31:42 +00:00 · 2017-05-11 12:40:02 +02:00 · 2017-05-11 12:40:02 +02:00 · d9f86d0d87
commit d9f86d0d87
parent 2d63b598e1
3 changed files with 218 additions and 40 deletions
--- a/src/PVE/Tools.pm
+++ b/src/PVE/Tools.pm
@ -25,6 +25,7 @@ use Time::HiRes qw(usleep gettimeofday tv_interval alarm);
 use Net::DBus qw(dbus_uint32 dbus_uint64);
 use Net::DBus::Callback;
 use Net::DBus::Reactor;
 use Scalar::Util 'weaken';
 # avoid warning when parsing long hex values with hex()
 no warnings 'portable'; # Support for 64-bit ints required
@ -122,7 +123,12 @@ sub run_with_timeout {
 }
 # flock: we use one file handle per process, so lock file
-# can be called multiple times and succeeds for the same process.
+# can be nested multiple times and succeeds for the same process.
 #
 # Since this is the only way we lock now and we don't have the old
 # 'lock(); code(); unlock();' pattern anymore we do not actually need to
 # count how deep we're nesting. Therefore this hash now stores a weak reference
 # to a boolean telling us whether we already have a lock.
 my $lock_handles =  {};
@ -133,58 +139,67 @@ sub lock_file_full {
    my $mode = $shared ? LOCK_SH : LOCK_EX;
-    my $lock_func = sub {
+    my $lockhash = ($lock_handles->{$$} //= {});
        if (!$lock_handles->{$$}->{$filename}) {
 	    my $fh = new IO::File(">>$filename") ||
 		die "can't open file - $!\n";
 	    $lock_handles->{$$}->{$filename} = { fh => $fh, refcount => 0};
        }
-        if (!flock($lock_handles->{$$}->{$filename}->{fh}, $mode|LOCK_NB)) {
+    # Returns a locked file handle.
-            print STDERR "trying to acquire lock...";
+    my $get_locked_file = sub {
 	my $fh = IO::File->new(">>$filename")
 	    or die "can't open file - $!\n";
 	if (!flock($fh, $mode|LOCK_NB)) {
 	    print STDERR "trying to acquire lock...";
 	    my $success;
 	    while(1) {
-		$success = flock($lock_handles->{$$}->{$filename}->{fh}, $mode);
+		$success = flock($fh, $mode);
 		# try again on EINTR (see bug #273)
 		if ($success || ($! != EINTR)) {
 		    last;
 		}
 	    }
-            if (!$success) {
+	    if (!$success) {
-                print STDERR " failed\n";
+		print STDERR " failed\n";
-                die "can't acquire lock '$filename' - $!\n";
+		die "can't acquire lock '$filename' - $!\n";
-            }
+	    }
-            print STDERR " OK\n";
+	    print STDERR " OK\n";
-        }
+	}
-	$lock_handles->{$$}->{$filename}->{refcount}++;
+
 	return $fh;
    };
    my $res;
-
+    my $checkptr = $lockhash->{$filename};
-    eval { run_with_timeout($timeout, $lock_func); };
+    my $check = 0; # This must not go out of scope before running the code.
-    my $err = $@;
+    my $local_fh; # This must stay local
-    if ($err) {
+    if (!$checkptr || !$$checkptr) {
-	$err = "can't lock file '$filename' - $err";
+	# We cannot create a weak reference in a single atomic step, so we first
-    } else {
+	# create a false-value, then create a reference to it, then weaken it,
-	eval { $res = &$code(@param) };
+	# and after successfully locking the file we change the boolean value.
-	$err = $@;
+	#
-    }
+	# The reason for this is that if an outer SIGALRM throws an exception
-
+	# between creating the reference and weakening it, a subsequent call to
-    if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) {
+	# lock_file_full() will see a leftover full reference to a valid
-	my $refcount = --$lock_handles->{$$}->{$filename}->{refcount};
+	# variable. This variable must be 0 in order for said call to attempt to
-	if ($refcount <= 0) {
+	# lock the file anew.
-	    $lock_handles->{$$}->{$filename} = undef;
+	#
-	    close ($fh);
+	# An externally triggered exception elsewhere in the code will cause the
 	# weak reference to become 'undef', and since the file handle is only
 	# stored in the local scope in $local_fh, the file will be closed by
 	# perl's cleanup routines as well.
 	#
 	# This still assumes that an IO::File handle can properly deal with such
 	# exceptions thrown during its own destruction, but that's up to perls
 	# guts now.
 	$lockhash->{$filename} = \$check;
 	weaken $lockhash->{$filename};
 	$local_fh = eval { run_with_timeout($timeout, $get_locked_file) };
 	if ($@) {
 	    $@ = "can't lock file '$filename' - $@";
 	    return undef;
 	}
 	$check = 1;
    }
-
+    $res = eval { &$code(@param); };
-    if ($err) {
+    return undef if $@;
        $@ = $err;
        return undef;
    }
    $@ = undef;
    return $res;
 }
--- a/test/Makefile
+++ b/test/Makefile
@ -6,6 +6,7 @@ all:
 check:
 	for d in $(SUBDIRS); do $(MAKE) -C $$d check; done
 	./lock_file.pl
 install: check
 distclean: clean
--- a/test/lock_file.pl
+++ b/test/lock_file.pl
@ -0,0 +1,162 @@
 #!/usr/bin/perl
 use lib '../src';
 use strict;
 use warnings;
 use Socket;
 use POSIX (); # don't import assert()
 use PVE::Tools 'lock_file_full';
 my $name = "test.lockfile.$$-";
 END {
 	system("rm $name*");
 };
 # Utilities:
 sub forked($$) {
    my ($code1, $code2) = @_;
    pipe(my $except_r, my $except_w) or die "pipe: $!\n";
    my $pid = fork();
    die "fork failed: $!\n" if !defined($pid);
    if ($pid == 0) {
 	close($except_r);
 	eval { $code1->() };
 	if ($@) {
 	    print {$except_w} $@;
 	    $except_w->flush();
 	    POSIX::_exit(1);
 	}
 	POSIX::_exit(0);
    }
    close($except_w);
    eval { $code2->() };
    my $err = $@;
    if ($err) {
 	kill(15, $pid);
    } else {
 	my $err = do { local $/ = undef; <$except_r> };
    }
    die "interrupted\n" if waitpid($pid, 0) != $pid;
    die $err if $err;
    # Check exit code:
    my $status = POSIX::WEXITSTATUS($?);
    if ($? == -1) {
 	die "failed to execute\n";
    } elsif (POSIX::WIFSIGNALED($?)) {
 	my $sig = POSIX::WTERMSIG($?);
 	die "got signal $sig\n";
    } elsif ($status != 0) {
 	die "exit code $status\n";
    }
 }
 # Book-keeping:
 my %_ran;
 sub new {
 	%_ran = ();
 }
 sub ran {
 	my ($what) = @_;
 	$_ran{$what} = 1;
 }
 sub assert {
 	my ($what) = @_;
 	die "code didn't run: $what\n" if !$_ran{$what};
 }
 sub assert_not {
 	my ($what) = @_;
 	die "code shouldn't have run: $what\n" if $_ran{$what};
 }
 # Regular lock:
 new();
 lock_file_full($name, 10, 0, sub { ran('single lock') });
 assert('single lock');
 # Lock multiple times in a row:
 new();
 lock_file_full($name, 10, 0, sub { ran('lock A') });
 assert('lock A');
 lock_file_full($name, 10, 0, sub { ran('lock B') });
 assert('lock B');
 # Nested lock:
 new();
 lock_file_full($name, 10, 0, sub {
 	ran('lock A');
 	lock_file_full($name, 10, 0, sub { ran('lock B') });
 	assert('lock B');
 	ran('lock C');
 });
 assert('lock A');
 assert('lock B');
 assert('lock C');
 # Independent locks:
 new();
 lock_file_full($name, 10, 0, sub {
 	ran('lock A');
 	# locks file "${name}2"
 	lock_file_full($name.2, 10, 0, sub { ran('lock B') });
 	assert('lock B');
 	ran('lock C');
 });
 assert('lock A');
 assert('lock B');
 assert('lock C');
 # Does it actually lock? (shared=0)
 # Can we get two simultaneous shared locks? (shared=1)
 sub forktest1($) {
    my ($shared) = @_;
    new();
    # socket pair for synchronization
    socketpair(my $fmain, my $fother, AF_UNIX, SOCK_STREAM, PF_UNSPEC)
 	or die "socketpair(): $!\n";
    forked sub {
 	    # other side
 	    close($fmain);
 	    my $line;
 	    lock_file_full($name, 60, $shared, sub {
 		ran('other side');
 		# tell parent we've acquired the lock
 		print {$fother} "1\n";
 		$fother->flush();
 		# wait for parent to be done trying to lock
 		$line = <$fother>;
 	    });
 	    die $@ if $@;
 	    die "parent failed\n" if !$line || $line ne "2\n";
 	    assert('other side');
    }, sub {
 	    # main process
 	    # Wait for our child to lock:
 	    close($fother);
 	    my $line = <$fmain>;
 	    die "child failed to acquire a lock\n" if !$line || $line ne "1\n";
 	    lock_file_full($name, 1, $shared, sub {
 		ran('local side');
 	    });
 	    if ($shared) {
 		assert('local side');
 	    } else {
 		assert_not('local side');
 	    }
 	    print {$fmain} "2\n";
 	    $fmain->flush();
    };
    close($fmain);
 }
 forktest1(0);
 forktest1(1);
 print "Ok\n"; # Line-terminate the 'trying to acquire lock' message(s)