fix #4315: jobs: modify GroupFilter so include/exclude is tracked

After some discussion I canged the include/exclude behavior to first run
all include filter and after that all exclude filter (rather then
allowing to alternate inbetween). This is done by splitting them into 2
lists, running include first.

A lot of discussion happened how edge cases should be handled and we
came to following conclusion:

no include filter + no exclude filter => include all
some include filter + no exclude filter => filter as always
no include filter +  some exclude filter => include all then exclude

Since a GroupFilter now also features an behavior, the Struct has been
renamed To GroupType (since simply type is a keyword). The new
GroupFilter now has a behaviour as a flag 'is_exclude'.

I considered calling it 'is_include' but a reader later then might not
know what the opposite of 'include' is (do not include?  deactivate?). I
also considered making a new enum 'behaviour' but since there are only 2
values I considered it over engeneered.

Signed-off-by: Philipp Hufnagl <p.hufnagl@proxmox.com>
This commit is contained in:
Philipp Hufnagl 2024-01-02 12:06:52 +01:00 committed by Wolfgang Bumiller
parent 6b0d7f3d32
commit 59c9273698
4 changed files with 100 additions and 72 deletions

View File

@ -10,9 +10,9 @@ use proxmox_schema::{
}; };
use crate::{ use crate::{
Authid, CryptMode, Fingerprint, MaintenanceMode, Userid, DATASTORE_NOTIFY_STRING_SCHEMA, Authid, CryptMode, Fingerprint, GroupFilter, MaintenanceMode, Userid,
GC_SCHEDULE_SCHEMA, PROXMOX_SAFE_ID_FORMAT, PRUNE_SCHEDULE_SCHEMA, SHA256_HEX_REGEX, DATASTORE_NOTIFY_STRING_SCHEMA, GC_SCHEDULE_SCHEMA, PROXMOX_SAFE_ID_FORMAT,
SINGLE_LINE_COMMENT_SCHEMA, UPID, PRUNE_SCHEDULE_SCHEMA, SHA256_HEX_REGEX, SINGLE_LINE_COMMENT_SCHEMA, UPID,
}; };
const_regex! { const_regex! {
@ -843,19 +843,37 @@ impl BackupGroup {
} }
pub fn matches(&self, filter: &crate::GroupFilter) -> bool { pub fn matches(&self, filter: &crate::GroupFilter) -> bool {
use crate::GroupFilter; use crate::FilterType;
match &filter.filter_type {
match filter { FilterType::Group(backup_group) => {
GroupFilter::Group(backup_group) => {
match backup_group.parse::<BackupGroup>() { match backup_group.parse::<BackupGroup>() {
Ok(group) => *self == group, Ok(group) => *self == group,
Err(_) => false, // shouldn't happen if value is schema-checked Err(_) => false, // shouldn't happen if value is schema-checked
} }
} }
GroupFilter::BackupType(ty) => self.ty == *ty, FilterType::BackupType(ty) => self.ty == *ty,
GroupFilter::Regex(regex) => regex.is_match(&self.to_string()), FilterType::Regex(regex) => regex.is_match(&self.to_string()),
} }
} }
pub fn apply_filters(&self, filters: &[GroupFilter]) -> bool {
// since there will only be view filter in the list, an extra iteration to get the umber of
// include filter should not be an issue
let is_included = if filters.iter().filter(|f| !f.is_exclude).count() == 0 {
true
} else {
filters
.iter()
.filter(|f| !f.is_exclude)
.any(|filter| self.matches(filter))
};
is_included
&& !filters
.iter()
.filter(|f| f.is_exclude)
.any(|filter| self.matches(filter))
}
} }
impl AsRef<BackupGroup> for BackupGroup { impl AsRef<BackupGroup> for BackupGroup {

View File

@ -388,7 +388,7 @@ pub struct TapeBackupJobStatus {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
/// Filter for matching `BackupGroup`s, for use with `BackupGroup::filter`. /// Filter for matching `BackupGroup`s, for use with `BackupGroup::filter`.
pub enum GroupFilter { pub enum FilterType {
/// BackupGroup type - either `vm`, `ct`, or `host`. /// BackupGroup type - either `vm`, `ct`, or `host`.
BackupType(BackupType), BackupType(BackupType),
/// Full identifier of BackupGroup, including type /// Full identifier of BackupGroup, including type
@ -397,7 +397,7 @@ pub enum GroupFilter {
Regex(Regex), Regex(Regex),
} }
impl PartialEq for GroupFilter { impl PartialEq for FilterType {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
match (self, other) { match (self, other) {
(Self::BackupType(a), Self::BackupType(b)) => a == b, (Self::BackupType(a), Self::BackupType(b)) => a == b,
@ -408,27 +408,52 @@ impl PartialEq for GroupFilter {
} }
} }
#[derive(Clone, Debug)]
pub struct GroupFilter {
pub is_exclude: bool,
pub filter_type: FilterType,
}
impl PartialEq for GroupFilter {
fn eq(&self, other: &Self) -> bool {
self.filter_type == other.filter_type && self.is_exclude == other.is_exclude
}
}
impl Eq for GroupFilter {}
impl std::str::FromStr for GroupFilter { impl std::str::FromStr for GroupFilter {
type Err = anyhow::Error; type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.split_once(':') { let (is_exclude, type_str) = match s.split_once(':') {
Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| GroupFilter::Group(value.to_string())), Some(("include", value)) => (false, value),
Some(("type", value)) => Ok(GroupFilter::BackupType(value.parse()?)), Some(("exclude", value)) => (true, value),
Some(("regex", value)) => Ok(GroupFilter::Regex(Regex::new(value)?)), _ => (false, s),
};
let filter_type = match type_str.split_once(':') {
Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| FilterType::Group(value.to_string())),
Some(("type", value)) => Ok(FilterType::BackupType(value.parse()?)),
Some(("regex", value)) => Ok(FilterType::Regex(Regex::new(value)?)),
Some((ty, _value)) => Err(format_err!("expected 'group', 'type' or 'regex' prefix, got '{}'", ty)), Some((ty, _value)) => Err(format_err!("expected 'group', 'type' or 'regex' prefix, got '{}'", ty)),
None => Err(format_err!("input doesn't match expected format '<group:GROUP||type:<vm|ct|host>|regex:REGEX>'")), None => Err(format_err!("input doesn't match expected format '<group:GROUP||type:<vm|ct|host>|regex:REGEX>'")),
}.map_err(|err| format_err!("'{}' - {}", s, err)) }?;
Ok(GroupFilter {
is_exclude,
filter_type,
})
} }
} }
// used for serializing below, caution! // used for serializing below, caution!
impl std::fmt::Display for GroupFilter { impl std::fmt::Display for GroupFilter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { let exclude = if self.is_exclude { "exclude:" } else { "" };
GroupFilter::BackupType(backup_type) => write!(f, "type:{}", backup_type), match &self.filter_type {
GroupFilter::Group(backup_group) => write!(f, "group:{}", backup_group), FilterType::BackupType(backup_type) => write!(f, "{}type:{}", exclude, backup_type),
GroupFilter::Regex(regex) => write!(f, "regex:{}", regex.as_str()), FilterType::Group(backup_group) => write!(f, "{}group:{}", exclude, backup_group),
FilterType::Regex(regex) => write!(f, "{}regex:{}", exclude, regex.as_str()),
} }
} }
} }
@ -441,9 +466,9 @@ fn verify_group_filter(input: &str) -> Result<(), anyhow::Error> {
} }
pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new( pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new(
"Group filter based on group identifier ('group:GROUP'), group type ('type:<vm|ct|host>'), or regex ('regex:RE').") "Group filter based on group identifier ('group:GROUP'), group type ('type:<vm|ct|host>'), or regex ('regex:RE'). Can be inverted by adding 'exclude:' before.")
.format(&ApiStringFormat::VerifyFn(verify_group_filter)) .format(&ApiStringFormat::VerifyFn(verify_group_filter))
.type_text("<type:<vm|ct|host>|group:GROUP|regex:RE>") .type_text("[<exclude:|include:>]<type:<vm|ct|host>|group:GROUP|regex:RE>")
.schema(); .schema();
pub const GROUP_FILTER_LIST_SCHEMA: Schema = pub const GROUP_FILTER_LIST_SCHEMA: Schema =

View File

@ -9,13 +9,13 @@ use proxmox_schema::api;
use proxmox_sys::{task_log, task_warn, WorkerTaskContext}; use proxmox_sys::{task_log, task_warn, WorkerTaskContext};
use pbs_api_types::{ use pbs_api_types::{
print_ns_and_snapshot, print_store_and_ns, Authid, GroupFilter, MediaPoolConfig, Operation, print_ns_and_snapshot, print_store_and_ns, Authid, MediaPoolConfig, Operation,
TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, Userid, JOB_ID_SCHEMA, TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, Userid, JOB_ID_SCHEMA,
PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA, PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA,
}; };
use pbs_config::CachedUserInfo; use pbs_config::CachedUserInfo;
use pbs_datastore::backup_info::{BackupDir, BackupGroup, BackupInfo}; use pbs_datastore::backup_info::{BackupDir, BackupInfo};
use pbs_datastore::{DataStore, StoreProgress}; use pbs_datastore::{DataStore, StoreProgress};
use proxmox_rest_server::WorkerTask; use proxmox_rest_server::WorkerTask;
@ -411,31 +411,24 @@ fn backup_worker(
group_list.sort_unstable_by(|a, b| a.group().cmp(b.group())); group_list.sort_unstable_by(|a, b| a.group().cmp(b.group()));
let (group_list, group_count) = if let Some(group_filters) = &setup.group_filter { let group_count_full = group_list.len();
let filter_fn = |group: &BackupGroup, group_filters: &[GroupFilter]| {
group_filters.iter().any(|filter| group.matches(filter))
};
let group_count_full = group_list.len(); let group_list = match &setup.group_filter {
let list: Vec<BackupGroup> = group_list Some(f) => group_list
.into_iter() .into_iter()
.filter(|group| filter_fn(group, group_filters)) .filter(|group| group.group().apply_filters(f))
.collect(); .collect(),
let group_count = list.len(); None => group_list,
task_log!(
worker,
"found {} groups (out of {} total)",
group_count,
group_count_full
);
(list, group_count)
} else {
let group_count = group_list.len();
task_log!(worker, "found {} groups", group_count);
(group_list, group_count)
}; };
let mut progress = StoreProgress::new(group_count as u64); task_log!(
worker,
"found {} groups (out of {} total)",
group_list.len(),
group_count_full
);
let mut progress = StoreProgress::new(group_list.len() as u64);
let latest_only = setup.latest_only.unwrap_or(false); let latest_only = setup.latest_only.unwrap_or(false);

View File

@ -486,7 +486,7 @@ pub(crate) struct PullParameters {
/// How many levels of sub-namespaces to pull (0 == no recursion, None == maximum recursion) /// How many levels of sub-namespaces to pull (0 == no recursion, None == maximum recursion)
max_depth: Option<usize>, max_depth: Option<usize>,
/// Filters for reducing the pull scope /// Filters for reducing the pull scope
group_filter: Option<Vec<GroupFilter>>, group_filter: Vec<GroupFilter>,
/// How many snapshots should be transferred at most (taking the newest N snapshots) /// How many snapshots should be transferred at most (taking the newest N snapshots)
transfer_last: Option<usize>, transfer_last: Option<usize>,
} }
@ -539,6 +539,11 @@ impl PullParameters {
ns, ns,
}; };
let group_filter = match group_filter {
Some(f) => f,
None => Vec::<GroupFilter>::new(),
};
Ok(Self { Ok(Self {
source, source,
target, target,
@ -1358,7 +1363,6 @@ pub(crate) async fn pull_ns(
) -> Result<(StoreProgress, bool), Error> { ) -> Result<(StoreProgress, bool), Error> {
let mut list: Vec<BackupGroup> = params.source.list_groups(namespace, &params.owner).await?; let mut list: Vec<BackupGroup> = params.source.list_groups(namespace, &params.owner).await?;
let total_count = list.len();
list.sort_unstable_by(|a, b| { list.sort_unstable_by(|a, b| {
let type_order = a.ty.cmp(&b.ty); let type_order = a.ty.cmp(&b.ty);
if type_order == std::cmp::Ordering::Equal { if type_order == std::cmp::Ordering::Equal {
@ -1368,27 +1372,17 @@ pub(crate) async fn pull_ns(
} }
}); });
let apply_filters = |group: &BackupGroup, filters: &[GroupFilter]| -> bool { let unfiltered_count = list.len();
filters.iter().any(|filter| group.matches(filter)) let list: Vec<BackupGroup> = list
}; .into_iter()
.filter(|group| group.apply_filters(&params.group_filter))
let list = if let Some(ref group_filter) = &params.group_filter { .collect();
let unfiltered_count = list.len(); task_log!(
let list: Vec<BackupGroup> = list worker,
.into_iter() "found {} groups to sync (out of {} total)",
.filter(|group| apply_filters(group, group_filter)) list.len(),
.collect(); unfiltered_count
task_log!( );
worker,
"found {} groups to sync (out of {} total)",
list.len(),
unfiltered_count
);
list
} else {
task_log!(worker, "found {} groups to sync", total_count);
list
};
let mut errors = false; let mut errors = false;
@ -1457,10 +1451,8 @@ pub(crate) async fn pull_ns(
if check_backup_owner(&owner, &params.owner).is_err() { if check_backup_owner(&owner, &params.owner).is_err() {
continue; continue;
} }
if let Some(ref group_filter) = &params.group_filter { if !local_group.apply_filters(&params.group_filter) {
if !apply_filters(local_group, group_filter) { continue;
continue;
}
} }
task_log!(worker, "delete vanished group '{local_group}'",); task_log!(worker, "delete vanished group '{local_group}'",);
match params match params