mirror of
https://git.proxmox.com/git/proxmox-backup
synced 2025-07-27 09:12:19 +00:00
api: add /status/metrics API
This one is modelled exactly as the one in PVE (there it is available under /cluster/metrics/export). The returned data format is quite simple, being an array of metric records, including a value, a metric name, an id to identify the object (e.g. datastore/foo, host), a timestamp and a type ('gauge', 'derive', ...). The latter property makes the format self-describing and aids the metric collector in choosing a representation for storing the metric data. [ ... { "metric": "cpu_avg1", "value": 0.12, "timestamp": 170053205, "id": "host", "type": "gauge" }, ... ] In terms of permissions, the new endpoint requires Sys.Audit on /system/status for metrics of the 'host' object, and Datastore.Audit on /datastore/{store} for 'datastore/{store}' metric objects. Via the 'history' and 'start-time' parameters one can query the last 30mins of metric history. If these parameters are not provided, only the most recent metric generation is returned. Signed-off-by: Lukas Wagner <l.wagner@proxmox.com>
This commit is contained in:
parent
da12adb1f9
commit
c804763bdf
73
src/api2/status/metrics.rs
Normal file
73
src/api2/status/metrics.rs
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
use anyhow::Error;
|
||||||
|
use pbs_api_types::{Authid, MetricDataPoint, Metrics, PRIV_DATASTORE_AUDIT, PRIV_SYS_AUDIT};
|
||||||
|
use pbs_config::CachedUserInfo;
|
||||||
|
use proxmox_router::{Permission, Router, RpcEnvironment};
|
||||||
|
use proxmox_schema::api;
|
||||||
|
|
||||||
|
use crate::server::metric_collection::pull_metrics;
|
||||||
|
|
||||||
|
pub const ROUTER: Router = Router::new().get(&API_METHOD_GET_METRICS);
|
||||||
|
|
||||||
|
#[api(
|
||||||
|
input: {
|
||||||
|
properties: {
|
||||||
|
"start-time": {
|
||||||
|
optional: true,
|
||||||
|
default: 0,
|
||||||
|
description: "Only return values with a timestamp > start-time. Only has an effect if 'history' is also set",
|
||||||
|
},
|
||||||
|
"history": {
|
||||||
|
optional: true,
|
||||||
|
default: false,
|
||||||
|
description: "Include historic values (last 30 minutes)",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
access: {
|
||||||
|
description: "Users need Sys.Audit on /system/status for host metrics and Datastore.Audit on /datastore/{store} for datastore metrics",
|
||||||
|
permission: &Permission::Anybody,
|
||||||
|
},
|
||||||
|
)]
|
||||||
|
/// Return backup server metrics.
|
||||||
|
pub fn get_metrics(
|
||||||
|
start_time: i64,
|
||||||
|
history: bool,
|
||||||
|
rpcenv: &mut dyn RpcEnvironment,
|
||||||
|
) -> Result<Metrics, Error> {
|
||||||
|
let metrics = if history {
|
||||||
|
pull_metrics::get_all_metrics(start_time)?
|
||||||
|
} else {
|
||||||
|
pull_metrics::get_most_recent_metrics()?
|
||||||
|
};
|
||||||
|
|
||||||
|
let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
|
||||||
|
let user_info = CachedUserInfo::new()?;
|
||||||
|
|
||||||
|
let filter_by_privs = |point: &MetricDataPoint| {
|
||||||
|
let elements: Vec<&str> = point.id.as_str().split('/').collect();
|
||||||
|
|
||||||
|
match elements.as_slice() {
|
||||||
|
["host"] => {
|
||||||
|
let user_privs =
|
||||||
|
CachedUserInfo::lookup_privs(&user_info, &auth_id, &["system", "status"]);
|
||||||
|
(user_privs & PRIV_SYS_AUDIT) != 0
|
||||||
|
}
|
||||||
|
["datastore", datastore_id] => {
|
||||||
|
let user_privs = CachedUserInfo::lookup_privs(
|
||||||
|
&user_info,
|
||||||
|
&auth_id,
|
||||||
|
&["datastore", datastore_id],
|
||||||
|
);
|
||||||
|
(user_privs & PRIV_DATASTORE_AUDIT) != 0
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
log::error!("invalid metric object id: {}", point.id);
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Metrics {
|
||||||
|
data: metrics.into_iter().filter(filter_by_privs).collect(),
|
||||||
|
})
|
||||||
|
}
|
@ -7,6 +7,7 @@ use proxmox_router::list_subdirs_api_method;
|
|||||||
use proxmox_router::{ApiMethod, Permission, Router, RpcEnvironment, SubdirMap};
|
use proxmox_router::{ApiMethod, Permission, Router, RpcEnvironment, SubdirMap};
|
||||||
use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
|
use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
|
||||||
use proxmox_schema::api;
|
use proxmox_schema::api;
|
||||||
|
use proxmox_sortable_macro::sortable;
|
||||||
|
|
||||||
use pbs_api_types::{
|
use pbs_api_types::{
|
||||||
Authid, DataStoreStatusListItem, Operation, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP,
|
Authid, DataStoreStatusListItem, Operation, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP,
|
||||||
@ -20,6 +21,8 @@ use crate::tools::statistics::linear_regression;
|
|||||||
|
|
||||||
use crate::backup::can_access_any_namespace;
|
use crate::backup::can_access_any_namespace;
|
||||||
|
|
||||||
|
pub mod metrics;
|
||||||
|
|
||||||
#[api(
|
#[api(
|
||||||
returns: {
|
returns: {
|
||||||
description: "Lists the Status of the Datastores.",
|
description: "Lists the Status of the Datastores.",
|
||||||
@ -137,10 +140,14 @@ pub async fn datastore_status(
|
|||||||
Ok(list)
|
Ok(list)
|
||||||
}
|
}
|
||||||
|
|
||||||
const SUBDIRS: SubdirMap = &[(
|
#[sortable]
|
||||||
"datastore-usage",
|
const SUBDIRS: SubdirMap = &sorted!([
|
||||||
&Router::new().get(&API_METHOD_DATASTORE_STATUS),
|
(
|
||||||
)];
|
"datastore-usage",
|
||||||
|
&Router::new().get(&API_METHOD_DATASTORE_STATUS),
|
||||||
|
),
|
||||||
|
("metrics", &metrics::ROUTER),
|
||||||
|
]);
|
||||||
|
|
||||||
pub const ROUTER: Router = Router::new()
|
pub const ROUTER: Router = Router::new()
|
||||||
.get(&list_subdirs_api_method!(SUBDIRS))
|
.get(&list_subdirs_api_method!(SUBDIRS))
|
@ -17,8 +17,8 @@ use proxmox_sys::{
|
|||||||
use crate::tools::disks::{zfs_dataset_stats, BlockDevStat, DiskManage};
|
use crate::tools::disks::{zfs_dataset_stats, BlockDevStat, DiskManage};
|
||||||
|
|
||||||
mod metric_server;
|
mod metric_server;
|
||||||
mod pull_metrics;
|
pub(crate) mod pull_metrics;
|
||||||
pub mod rrd;
|
pub(crate) mod rrd;
|
||||||
|
|
||||||
const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(10);
|
const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(10);
|
||||||
|
|
||||||
|
@ -39,6 +39,51 @@ pub(super) fn init() -> Result<(), Error> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return most recent metrics
|
||||||
|
///
|
||||||
|
/// If the metric collection loop has no produced any metrics yet, an empty
|
||||||
|
/// `Vec` is returned. Returns an error if the cache could not be accessed.
|
||||||
|
pub fn get_most_recent_metrics() -> Result<Vec<MetricDataPoint>, Error> {
|
||||||
|
let cached_datapoints: Option<MetricDataPoints> = get_cache()?.get()?;
|
||||||
|
let mut points = cached_datapoints.map(|r| r.datapoints).unwrap_or_default();
|
||||||
|
|
||||||
|
points.sort_unstable_by_key(|p| p.timestamp);
|
||||||
|
|
||||||
|
Ok(points)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return all cached metrics with a `timestamp > start_time`
|
||||||
|
///
|
||||||
|
/// If the metric collection loop has no produced any metrics yet, an empty
|
||||||
|
/// `Vec` is returned. Returns an error if the cache could not be accessed.
|
||||||
|
pub fn get_all_metrics(start_time: i64) -> Result<Vec<MetricDataPoint>, Error> {
|
||||||
|
let now = proxmox_time::epoch_i64();
|
||||||
|
|
||||||
|
let delta = now - start_time;
|
||||||
|
|
||||||
|
if delta < 0 {
|
||||||
|
// start-time in the future, no metrics for you
|
||||||
|
return Ok(Vec::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let generations = delta / (METRIC_COLLECTION_INTERVAL.as_secs() as i64);
|
||||||
|
let generations = generations.clamp(0, STORED_METRIC_GENERATIONS as i64);
|
||||||
|
|
||||||
|
let cached_datapoints: Vec<MetricDataPoints> = get_cache()?.get_last(generations as u32)?;
|
||||||
|
|
||||||
|
let mut points = Vec::new();
|
||||||
|
|
||||||
|
for gen in cached_datapoints {
|
||||||
|
if gen.timestamp > start_time {
|
||||||
|
points.extend(gen.datapoints);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
points.sort_unstable_by_key(|p| p.timestamp);
|
||||||
|
|
||||||
|
Ok(points)
|
||||||
|
}
|
||||||
|
|
||||||
/// Convert `DiskStat` `HostStat` into a universal metric data point and cache
|
/// Convert `DiskStat` `HostStat` into a universal metric data point and cache
|
||||||
/// them for a later retrieval.
|
/// them for a later retrieval.
|
||||||
pub(super) fn update_metrics(
|
pub(super) fn update_metrics(
|
||||||
|
Loading…
Reference in New Issue
Block a user