diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs index e1a0aacc..74a54d02 100644 --- a/src/api2/admin/datastore.rs +++ b/src/api2/admin/datastore.rs @@ -579,6 +579,7 @@ pub fn verify( move |worker| { let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024*16))); let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64))); + let filter = |_backup_info: &BackupInfo| { true }; let failed_dirs = if let Some(backup_dir) = backup_dir { let mut res = Vec::new(); @@ -602,10 +603,11 @@ pub fn verify( None, worker.clone(), worker.upid(), + &filter, )?; failed_dirs } else { - verify_all_backups(datastore, worker.clone(), worker.upid())? + verify_all_backups(datastore, worker.clone(), worker.upid(), &filter)? }; if failed_dirs.len() > 0 { worker.log("Failed to verify following snapshots:"); diff --git a/src/backup/verify.rs b/src/backup/verify.rs index 2103da40..b09ad173 100644 --- a/src/backup/verify.rs +++ b/src/backup/verify.rs @@ -404,7 +404,7 @@ pub fn verify_backup_dir_with_lock( /// Returns /// - Ok((count, failed_dirs)) where failed_dirs had verification errors /// - Err(_) if task was aborted -pub fn verify_backup_group( +pub fn verify_backup_group bool>( datastore: Arc, group: &BackupGroup, verified_chunks: Arc>>, @@ -412,6 +412,7 @@ pub fn verify_backup_group( progress: Option<(usize, usize)>, // (done, snapshot_count) worker: Arc, upid: &UPID, + filter: &F, ) -> Result<(usize, Vec), Error> { let mut errors = Vec::new(); @@ -437,6 +438,17 @@ pub fn verify_backup_group( BackupInfo::sort_list(&mut list, false); // newest first for info in list { count += 1; + + if filter(&info) == false { + task_log!( + worker, + "SKIPPED: verify {}:{} (already verified)", + datastore.name(), + info.backup_dir, + ); + continue; + } + if !verify_backup_dir( datastore.clone(), &info.backup_dir, @@ -470,10 +482,11 @@ pub fn verify_backup_group( /// Returns /// - Ok(failed_dirs) where failed_dirs had verification errors /// - Err(_) if task was aborted -pub fn verify_all_backups( +pub fn verify_all_backups bool>( datastore: Arc, worker: Arc, upid: &UPID, + filter: &F, ) -> Result, Error> { let mut errors = Vec::new(); @@ -518,6 +531,7 @@ pub fn verify_all_backups( Some((done, snapshot_count)), worker.clone(), upid, + filter, )?; errors.append(&mut group_errors); diff --git a/src/server/email_notifications.rs b/src/server/email_notifications.rs index bad9f09f..be25e24f 100644 --- a/src/server/email_notifications.rs +++ b/src/server/email_notifications.rs @@ -57,7 +57,11 @@ const VERIFY_ERR_TEMPLATE: &str = r###" Job ID: {{job.id}} Datastore: {{job.store}} -Verification failed: {{error}} +Verification failed on these snapshots: + +{{#each errors}} + {{this}} +{{/each}} "###; @@ -150,27 +154,31 @@ pub fn send_gc_status( pub fn send_verify_status( email: &str, job: VerificationJobConfig, - result: &Result<(), Error>, + result: &Result, Error>, ) -> Result<(), Error> { let text = match result { - Ok(()) => { + Ok(errors) if errors.is_empty() => { let data = json!({ "job": job }); HANDLEBARS.render("verify_ok_template", &data)? } - Err(err) => { - let data = json!({ "job": job, "error": err.to_string() }); + Ok(errors) => { + let data = json!({ "job": job, "errors": errors }); HANDLEBARS.render("verify_err_template", &data)? } + Err(_) => { + // aboreted job - do not send any email + return Ok(()); + } }; let subject = match result { - Ok(()) => format!( + Ok(errors) if errors.is_empty() => format!( "Verify Datastore '{}' successful", job.store, ), - Err(_) => format!( + _ => format!( "Verify Datastore '{}' failed", job.store, ), diff --git a/src/server/verify_job.rs b/src/server/verify_job.rs index 064fb2b7..e5d932d4 100644 --- a/src/server/verify_job.rs +++ b/src/server/verify_job.rs @@ -1,7 +1,4 @@ -use std::collections::HashSet; -use std::sync::{Arc, Mutex}; - -use anyhow::{bail, Error}; +use anyhow::{format_err, Error}; use crate::{ server::WorkerTask, @@ -11,7 +8,7 @@ use crate::{ backup::{ DataStore, BackupInfo, - verify_backup_dir, + verify_all_backups, }, task_log, }; @@ -23,28 +20,36 @@ pub fn do_verification_job( userid: &Userid, schedule: Option, ) -> Result { + let datastore = DataStore::lookup_datastore(&verification_job.store)?; - let mut backups_to_verify = BackupInfo::list_backups(&datastore.base_path())?; - if verification_job.ignore_verified.unwrap_or(true) { - backups_to_verify.retain(|backup_info| { - let manifest = match datastore.load_manifest(&backup_info.backup_dir) { - Ok((manifest, _)) => manifest, - Err(_) => return false, - }; + let datastore2 = datastore.clone(); - let raw_verify_state = manifest.unprotected["verify_state"].clone(); - let last_state = match serde_json::from_value::(raw_verify_state) { - Ok(last_state) => last_state, - Err(_) => return true, - }; + let outdated_after = verification_job.outdated_after.clone(); + let ignore_verified = verification_job.ignore_verified.unwrap_or(true); - let now = proxmox::tools::time::epoch_i64(); - let days_since_last_verify = (now - last_state.upid.starttime) / 86400; - verification_job.outdated_after.is_some() - && days_since_last_verify > verification_job.outdated_after.unwrap() - }) - } + let filter = move |backup_info: &BackupInfo| { + if !ignore_verified { + return true; + } + let manifest = match datastore2.load_manifest(&backup_info.backup_dir) { + Ok((manifest, _)) => manifest, + Err(_) => return false, + }; + + let raw_verify_state = manifest.unprotected["verify_state"].clone(); + let last_state = match serde_json::from_value::(raw_verify_state) { + Ok(last_state) => last_state, + Err(_) => return true, + }; + + let now = proxmox::tools::time::epoch_i64(); + let days_since_last_verify = (now - last_state.upid.starttime) / 86400; + + outdated_after + .map(|v| days_since_last_verify > v) + .unwrap_or(true) + }; let email = crate::server::lookup_user_email(userid); @@ -59,42 +64,18 @@ pub fn do_verification_job( job.start(&worker.upid().to_string())?; task_log!(worker,"Starting datastore verify job '{}'", job_id); - task_log!(worker,"verifying {} backups", backups_to_verify.len()); if let Some(event_str) = schedule { task_log!(worker,"task triggered by schedule '{}'", event_str); } - let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024 * 16))); - let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64))); - let result = proxmox::try_block!({ - let mut failed_dirs: Vec = Vec::new(); + let result = verify_all_backups(datastore, worker.clone(), worker.upid(), &filter); + let job_result = match result { + Ok(ref errors) if errors.is_empty() => Ok(()), + Ok(_) => Err(format_err!("verification failed - please check the log for details")), + Err(_) => Err(format_err!("verification failed - job aborted")), + }; - for backup_info in backups_to_verify { - let verification_result = verify_backup_dir( - datastore.clone(), - &backup_info.backup_dir, - verified_chunks.clone(), - corrupt_chunks.clone(), - worker.clone(), - worker.upid().clone() - ); - - if let Ok(false) = verification_result { - failed_dirs.push(backup_info.backup_dir.to_string()); - } // otherwise successful or aborted - } - - if !failed_dirs.is_empty() { - task_log!(worker,"Failed to verify following snapshots:",); - for dir in failed_dirs { - task_log!(worker, "\t{}", dir) - } - bail!("verification failed - please check the log for details"); - } - Ok(()) - }); - - let status = worker.create_state(&result); + let status = worker.create_state(&job_result); match job.finish(status) { Err(err) => eprintln!( @@ -111,7 +92,7 @@ pub fn do_verification_job( } } - result + job_result }, )?; Ok(upid_str)