diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_info.rs index ef2b982c..c7f7ed53 100644 --- a/pbs-datastore/src/backup_info.rs +++ b/pbs-datastore/src/backup_info.rs @@ -3,9 +3,11 @@ use std::os::unix::io::RawFd; use std::path::PathBuf; use std::sync::Arc; -use anyhow::{bail, format_err, Error}; +use anyhow::{bail, format_err, Context, Error}; -use proxmox_sys::fs::{lock_dir_noblock, replace_file, CreateOptions}; +use proxmox_sys::fs::{ + lock_dir_noblock, lock_dir_noblock_shared, replace_file, CreateOptions, DirLockGuard, +}; use pbs_api_types::{ Authid, BackupGroupDeleteStats, BackupNamespace, BackupType, GroupFilter, VerifyState, @@ -199,9 +201,10 @@ impl BackupGroup { /// Returns `BackupGroupDeleteStats`, containing the number of deleted snapshots /// and number of protected snaphsots, which therefore were not removed. pub fn destroy(&self) -> Result { + let _guard = self + .lock() + .with_context(|| format!("while destroying group '{self:?}'"))?; let path = self.full_group_path(); - let _guard = - proxmox_sys::fs::lock_dir_noblock(&path, "backup group", "possible running backup")?; log::info!("removing backup group {:?}", path); let mut delete_stats = BackupGroupDeleteStats::default(); @@ -237,6 +240,15 @@ impl BackupGroup { self.store .set_owner(&self.ns, self.as_ref(), auth_id, force) } + + /// Lock a group exclusively + pub fn lock(&self) -> Result { + lock_dir_noblock( + &self.full_group_path(), + "backup group", + "possible running backup", + ) + } } impl AsRef for BackupGroup { @@ -442,15 +454,33 @@ impl BackupDir { .map_err(|err| format_err!("unable to acquire manifest lock {:?} - {}", &path, err)) } + /// Lock this snapshot exclusively + pub fn lock(&self) -> Result { + lock_dir_noblock( + &self.full_path(), + "snapshot", + "backup is running or snapshot is in use", + ) + } + + /// Acquire a shared lock on this snapshot + pub fn lock_shared(&self) -> Result { + lock_dir_noblock_shared( + &self.full_path(), + "snapshot", + "backup is running or snapshot is in use, could not acquire shared lock", + ) + } + /// Destroy the whole snapshot, bails if it's protected /// /// Setting `force` to true skips locking and thus ignores if the backup is currently in use. pub fn destroy(&self, force: bool) -> Result<(), Error> { - let full_path = self.full_path(); - let (_guard, _manifest_guard); if !force { - _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?; + _guard = self + .lock() + .with_context(|| format!("while destroying snapshot '{self:?}'"))?; _manifest_guard = self.lock_manifest()?; } @@ -458,6 +488,7 @@ impl BackupDir { bail!("cannot remove protected snapshot"); // use special error type? } + let full_path = self.full_path(); log::info!("removing backup snapshot {:?}", full_path); std::fs::remove_dir_all(&full_path).map_err(|err| { format_err!("removing backup snapshot {:?} failed - {}", full_path, err,) diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index a6a91ca7..1cbd0038 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -5,7 +5,7 @@ use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; use std::sync::{Arc, LazyLock, Mutex}; -use anyhow::{bail, format_err, Error}; +use anyhow::{bail, format_err, Context, Error}; use nix::unistd::{unlinkat, UnlinkatFlags}; use tracing::{info, warn}; @@ -13,8 +13,8 @@ use proxmox_human_byte::HumanByte; use proxmox_schema::ApiType; use proxmox_sys::error::SysError; +use proxmox_sys::fs::DirLockGuard; use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions}; -use proxmox_sys::fs::{lock_dir_noblock, DirLockGuard}; use proxmox_sys::linux::procfs::MountInfo; use proxmox_sys::process_locker::ProcessLockSharedGuard; use proxmox_worker_task::WorkerTaskContext; @@ -774,41 +774,35 @@ impl DataStore { /// /// This also acquires an exclusive lock on the directory and returns the lock guard. pub fn create_locked_backup_group( - &self, + self: &Arc, ns: &BackupNamespace, backup_group: &pbs_api_types::BackupGroup, auth_id: &Authid, ) -> Result<(Authid, DirLockGuard), Error> { - // create intermediate path first: - let mut full_path = self.base_path(); - for ns in ns.components() { - full_path.push("ns"); - full_path.push(ns); - } - full_path.push(backup_group.ty.as_str()); - std::fs::create_dir_all(&full_path)?; + let backup_group = self.backup_group(ns.clone(), backup_group.clone()); - full_path.push(&backup_group.id); + // create intermediate path first + let full_path = backup_group.full_group_path(); - // create the last component now + std::fs::create_dir_all(full_path.parent().ok_or_else(|| { + format_err!("could not construct parent path for group {backup_group:?}") + })?)?; + + // now create the group, this allows us to check whether it existed before match std::fs::create_dir(&full_path) { Ok(_) => { - let guard = lock_dir_noblock( - &full_path, - "backup group", - "another backup is already running", - )?; - self.set_owner(ns, backup_group, auth_id, false)?; - let owner = self.get_owner(ns, backup_group)?; // just to be sure + let guard = backup_group.lock().with_context(|| { + format!("while creating new locked backup group '{backup_group:?}'") + })?; + self.set_owner(ns, backup_group.group(), auth_id, false)?; + let owner = self.get_owner(ns, backup_group.group())?; // just to be sure Ok((owner, guard)) } Err(ref err) if err.kind() == io::ErrorKind::AlreadyExists => { - let guard = lock_dir_noblock( - &full_path, - "backup group", - "another backup is already running", - )?; - let owner = self.get_owner(ns, backup_group)?; // just to be sure + let guard = backup_group.lock().with_context(|| { + format!("while creating locked backup group '{backup_group:?}'") + })?; + let owner = self.get_owner(ns, backup_group.group())?; // just to be sure Ok((owner, guard)) } Err(err) => bail!("unable to create backup group {:?} - {}", full_path, err), @@ -819,29 +813,25 @@ impl DataStore { /// /// The BackupGroup directory needs to exist. pub fn create_locked_backup_dir( - &self, + self: &Arc, ns: &BackupNamespace, backup_dir: &pbs_api_types::BackupDir, ) -> Result<(PathBuf, bool, DirLockGuard), Error> { - let full_path = self.snapshot_path(ns, backup_dir); - let relative_path = full_path.strip_prefix(self.base_path()).map_err(|err| { - format_err!( - "failed to produce correct path for backup {backup_dir} in namespace {ns}: {err}" - ) - })?; + let backup_dir = self.backup_dir(ns.clone(), backup_dir.clone())?; + let relative_path = backup_dir.relative_path(); - let lock = || { - lock_dir_noblock( - &full_path, - "snapshot", - "internal error - tried creating snapshot that's already in use", - ) - }; - - match std::fs::create_dir(&full_path) { - Ok(_) => Ok((relative_path.to_owned(), true, lock()?)), + match std::fs::create_dir(backup_dir.full_path()) { + Ok(_) => { + let guard = backup_dir.lock().with_context(|| { + format!("while creating new locked snapshot '{backup_dir:?}'") + })?; + Ok((relative_path, true, guard)) + } Err(ref e) if e.kind() == io::ErrorKind::AlreadyExists => { - Ok((relative_path.to_owned(), false, lock()?)) + let guard = backup_dir + .lock() + .with_context(|| format!("while creating locked snapshot '{backup_dir:?}'"))?; + Ok((relative_path, false, guard)) } Err(e) => Err(e.into()), } @@ -1305,7 +1295,9 @@ impl DataStore { bail!("snapshot {} does not exist!", backup_dir.dir()); } - let _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?; + let _guard = backup_dir.lock().with_context(|| { + format!("while updating the protection status of snapshot '{backup_dir:?}'") + })?; let protected_path = backup_dir.protected_file(); if protection { diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs index 99ae1180..edff19ef 100644 --- a/pbs-datastore/src/snapshot_reader.rs +++ b/pbs-datastore/src/snapshot_reader.rs @@ -4,11 +4,9 @@ use std::path::Path; use std::rc::Rc; use std::sync::Arc; -use anyhow::{bail, Error}; +use anyhow::{bail, Context, Error}; use nix::dir::Dir; -use proxmox_sys::fs::lock_dir_noblock_shared; - use pbs_api_types::{ print_store_and_ns, ArchiveType, BackupNamespace, Operation, CLIENT_LOG_BLOB_NAME, MANIFEST_BLOB_NAME, @@ -48,8 +46,9 @@ impl SnapshotReader { bail!("snapshot {} does not exist!", snapshot.dir()); } - let locked_dir = - lock_dir_noblock_shared(&snapshot_path, "snapshot", "locked by another operation")?; + let locked_dir = snapshot + .lock_shared() + .with_context(|| format!("while trying to read snapshot '{snapshot:?}'"))?; let datastore_name = datastore.name().to_string(); let manifest = match snapshot.load_manifest() { diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs index 99d885e2..7943b576 100644 --- a/src/api2/backup/environment.rs +++ b/src/api2/backup/environment.rs @@ -1,4 +1,4 @@ -use anyhow::{bail, format_err, Error}; +use anyhow::{bail, format_err, Context, Error}; use nix::dir::Dir; use std::collections::HashMap; use std::sync::{Arc, Mutex}; @@ -8,7 +8,7 @@ use ::serde::Serialize; use serde_json::{json, Value}; use proxmox_router::{RpcEnvironment, RpcEnvironmentType}; -use proxmox_sys::fs::{lock_dir_noblock_shared, replace_file, CreateOptions}; +use proxmox_sys::fs::{replace_file, CreateOptions}; use pbs_api_types::Authid; use pbs_datastore::backup_info::{BackupDir, BackupInfo}; @@ -645,12 +645,12 @@ impl BackupEnvironment { // Downgrade to shared lock, the backup itself is finished drop(excl_snap_lock); - let snap_lock = lock_dir_noblock_shared( - &self.backup_dir.full_path(), - "snapshot", - "snapshot is already locked by another operation", - )?; - + let snap_lock = self.backup_dir.lock_shared().with_context(|| { + format!( + "while trying to verify snapshot '{:?}' after completion", + self.backup_dir + ) + })?; let worker_id = format!( "{}:{}/{}/{:08X}", self.datastore.name(), diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs index efc97a1f..344c80d4 100644 --- a/src/api2/backup/mod.rs +++ b/src/api2/backup/mod.rs @@ -1,6 +1,6 @@ //! Backup protocol (HTTP2 upgrade) -use anyhow::{bail, format_err, Error}; +use anyhow::{bail, format_err, Context, Error}; use futures::*; use hex::FromHex; use hyper::header::{HeaderValue, CONNECTION, UPGRADE}; @@ -17,7 +17,6 @@ use proxmox_router::{ }; use proxmox_schema::*; use proxmox_sortable_macro::sortable; -use proxmox_sys::fs::lock_dir_noblock_shared; use pbs_api_types::{ ArchiveType, Authid, BackupNamespace, BackupType, Operation, VerifyState, @@ -186,12 +185,10 @@ fn upgrade_to_backup_protocol( } // lock last snapshot to prevent forgetting/pruning it during backup - let full_path = last.backup_dir.full_path(); - Some(lock_dir_noblock_shared( - &full_path, - "snapshot", - "base snapshot is already locked by another operation", - )?) + let guard = last.backup_dir + .lock_shared() + .with_context(|| format!("while locking last snapshot during backup '{last:?}'"))?; + Some(guard) } else { None }; diff --git a/src/api2/reader/mod.rs b/src/api2/reader/mod.rs index 1713f182..cc791299 100644 --- a/src/api2/reader/mod.rs +++ b/src/api2/reader/mod.rs @@ -1,6 +1,6 @@ //! Backup reader/restore protocol (HTTP2 upgrade) -use anyhow::{bail, format_err, Error}; +use anyhow::{bail, format_err, Context, Error}; use futures::*; use hex::FromHex; use hyper::header::{self, HeaderValue, CONNECTION, UPGRADE}; @@ -16,7 +16,6 @@ use proxmox_router::{ }; use proxmox_schema::{BooleanSchema, ObjectSchema}; use proxmox_sortable_macro::sortable; -use proxmox_sys::fs::lock_dir_noblock_shared; use pbs_api_types::{ ArchiveType, Authid, Operation, BACKUP_ARCHIVE_NAME_SCHEMA, BACKUP_ID_SCHEMA, @@ -129,11 +128,9 @@ fn upgrade_to_backup_reader_protocol( bail!("snapshot {} does not exist.", backup_dir.dir()); } - let _guard = lock_dir_noblock_shared( - &backup_dir.full_path(), - "snapshot", - "locked by another operation", - )?; + let _guard = backup_dir + .lock_shared() + .with_context(|| format!("while reading snapshot '{backup_dir:?}'"))?; let path = datastore.base_path(); diff --git a/src/backup/verify.rs b/src/backup/verify.rs index f652c262..10a64fda 100644 --- a/src/backup/verify.rs +++ b/src/backup/verify.rs @@ -4,10 +4,9 @@ use std::sync::{Arc, Mutex}; use std::time::Instant; use anyhow::{bail, Error}; -use nix::dir::Dir; +use proxmox_sys::fs::DirLockGuard; use tracing::{error, info, warn}; -use proxmox_sys::fs::lock_dir_noblock_shared; use proxmox_worker_task::WorkerTaskContext; use pbs_api_types::{ @@ -307,11 +306,8 @@ pub fn verify_backup_dir( return Ok(true); } - let snap_lock = lock_dir_noblock_shared( - &backup_dir.full_path(), - "snapshot", - "locked by another operation", - ); + let snap_lock = backup_dir.lock_shared(); + match snap_lock { Ok(snap_lock) => { verify_backup_dir_with_lock(verify_worker, backup_dir, upid, filter, snap_lock) @@ -334,7 +330,7 @@ pub fn verify_backup_dir_with_lock( backup_dir: &BackupDir, upid: UPID, filter: Option<&dyn Fn(&BackupManifest) -> bool>, - _snap_lock: Dir, + _snap_lock: DirLockGuard, ) -> Result { let datastore_name = verify_worker.datastore.name(); let backup_dir_name = backup_dir.dir(); diff --git a/src/server/sync.rs b/src/server/sync.rs index 4dd46c5a..b6852aff 100644 --- a/src/server/sync.rs +++ b/src/server/sync.rs @@ -10,6 +10,7 @@ use std::time::Duration; use anyhow::{bail, format_err, Context, Error}; use futures::{future::FutureExt, select}; use hyper::http::StatusCode; +use proxmox_sys::fs::DirLockGuard; use serde_json::json; use tracing::{info, warn}; @@ -105,7 +106,7 @@ pub(crate) struct RemoteSourceReader { } pub(crate) struct LocalSourceReader { - pub(crate) _dir_lock: Arc>, + pub(crate) _dir_lock: Arc>, pub(crate) path: PathBuf, pub(crate) datastore: Arc, } @@ -478,13 +479,11 @@ impl SyncSource for LocalSource { dir: &BackupDir, ) -> Result, Error> { let dir = self.store.backup_dir(ns.clone(), dir.clone())?; - let dir_lock = proxmox_sys::fs::lock_dir_noblock_shared( - &dir.full_path(), - "snapshot", - "locked by another operation", - )?; + let guard = dir + .lock() + .with_context(|| format!("while reading snapshot '{dir:?}' for a sync job"))?; Ok(Arc::new(LocalSourceReader { - _dir_lock: Arc::new(Mutex::new(dir_lock)), + _dir_lock: Arc::new(Mutex::new(guard)), path: dir.full_path(), datastore: dir.datastore().clone(), }))