mirror of
https://git.proxmox.com/git/proxmox-backup
synced 2025-07-27 12:27:23 +00:00
api: add /status/metrics API
This one is modelled exactly as the one in PVE (there it is available under /cluster/metrics/export). The returned data format is quite simple, being an array of metric records, including a value, a metric name, an id to identify the object (e.g. datastore/foo, host), a timestamp and a type ('gauge', 'derive', ...). The latter property makes the format self-describing and aids the metric collector in choosing a representation for storing the metric data. [ ... { "metric": "cpu_avg1", "value": 0.12, "timestamp": 170053205, "id": "host", "type": "gauge" }, ... ] In terms of permissions, the new endpoint requires Sys.Audit on /system/status for metrics of the 'host' object, and Datastore.Audit on /datastore/{store} for 'datastore/{store}' metric objects. Via the 'history' and 'start-time' parameters one can query the last 30mins of metric history. If these parameters are not provided, only the most recent metric generation is returned. Signed-off-by: Lukas Wagner <l.wagner@proxmox.com>
This commit is contained in:
parent
da12adb1f9
commit
c804763bdf
73
src/api2/status/metrics.rs
Normal file
73
src/api2/status/metrics.rs
Normal file
@ -0,0 +1,73 @@
|
||||
use anyhow::Error;
|
||||
use pbs_api_types::{Authid, MetricDataPoint, Metrics, PRIV_DATASTORE_AUDIT, PRIV_SYS_AUDIT};
|
||||
use pbs_config::CachedUserInfo;
|
||||
use proxmox_router::{Permission, Router, RpcEnvironment};
|
||||
use proxmox_schema::api;
|
||||
|
||||
use crate::server::metric_collection::pull_metrics;
|
||||
|
||||
pub const ROUTER: Router = Router::new().get(&API_METHOD_GET_METRICS);
|
||||
|
||||
#[api(
|
||||
input: {
|
||||
properties: {
|
||||
"start-time": {
|
||||
optional: true,
|
||||
default: 0,
|
||||
description: "Only return values with a timestamp > start-time. Only has an effect if 'history' is also set",
|
||||
},
|
||||
"history": {
|
||||
optional: true,
|
||||
default: false,
|
||||
description: "Include historic values (last 30 minutes)",
|
||||
}
|
||||
},
|
||||
},
|
||||
access: {
|
||||
description: "Users need Sys.Audit on /system/status for host metrics and Datastore.Audit on /datastore/{store} for datastore metrics",
|
||||
permission: &Permission::Anybody,
|
||||
},
|
||||
)]
|
||||
/// Return backup server metrics.
|
||||
pub fn get_metrics(
|
||||
start_time: i64,
|
||||
history: bool,
|
||||
rpcenv: &mut dyn RpcEnvironment,
|
||||
) -> Result<Metrics, Error> {
|
||||
let metrics = if history {
|
||||
pull_metrics::get_all_metrics(start_time)?
|
||||
} else {
|
||||
pull_metrics::get_most_recent_metrics()?
|
||||
};
|
||||
|
||||
let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
|
||||
let user_info = CachedUserInfo::new()?;
|
||||
|
||||
let filter_by_privs = |point: &MetricDataPoint| {
|
||||
let elements: Vec<&str> = point.id.as_str().split('/').collect();
|
||||
|
||||
match elements.as_slice() {
|
||||
["host"] => {
|
||||
let user_privs =
|
||||
CachedUserInfo::lookup_privs(&user_info, &auth_id, &["system", "status"]);
|
||||
(user_privs & PRIV_SYS_AUDIT) != 0
|
||||
}
|
||||
["datastore", datastore_id] => {
|
||||
let user_privs = CachedUserInfo::lookup_privs(
|
||||
&user_info,
|
||||
&auth_id,
|
||||
&["datastore", datastore_id],
|
||||
);
|
||||
(user_privs & PRIV_DATASTORE_AUDIT) != 0
|
||||
}
|
||||
_ => {
|
||||
log::error!("invalid metric object id: {}", point.id);
|
||||
false
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Metrics {
|
||||
data: metrics.into_iter().filter(filter_by_privs).collect(),
|
||||
})
|
||||
}
|
@ -7,6 +7,7 @@ use proxmox_router::list_subdirs_api_method;
|
||||
use proxmox_router::{ApiMethod, Permission, Router, RpcEnvironment, SubdirMap};
|
||||
use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
|
||||
use proxmox_schema::api;
|
||||
use proxmox_sortable_macro::sortable;
|
||||
|
||||
use pbs_api_types::{
|
||||
Authid, DataStoreStatusListItem, Operation, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP,
|
||||
@ -20,6 +21,8 @@ use crate::tools::statistics::linear_regression;
|
||||
|
||||
use crate::backup::can_access_any_namespace;
|
||||
|
||||
pub mod metrics;
|
||||
|
||||
#[api(
|
||||
returns: {
|
||||
description: "Lists the Status of the Datastores.",
|
||||
@ -137,10 +140,14 @@ pub async fn datastore_status(
|
||||
Ok(list)
|
||||
}
|
||||
|
||||
const SUBDIRS: SubdirMap = &[(
|
||||
"datastore-usage",
|
||||
&Router::new().get(&API_METHOD_DATASTORE_STATUS),
|
||||
)];
|
||||
#[sortable]
|
||||
const SUBDIRS: SubdirMap = &sorted!([
|
||||
(
|
||||
"datastore-usage",
|
||||
&Router::new().get(&API_METHOD_DATASTORE_STATUS),
|
||||
),
|
||||
("metrics", &metrics::ROUTER),
|
||||
]);
|
||||
|
||||
pub const ROUTER: Router = Router::new()
|
||||
.get(&list_subdirs_api_method!(SUBDIRS))
|
@ -17,8 +17,8 @@ use proxmox_sys::{
|
||||
use crate::tools::disks::{zfs_dataset_stats, BlockDevStat, DiskManage};
|
||||
|
||||
mod metric_server;
|
||||
mod pull_metrics;
|
||||
pub mod rrd;
|
||||
pub(crate) mod pull_metrics;
|
||||
pub(crate) mod rrd;
|
||||
|
||||
const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(10);
|
||||
|
||||
|
@ -39,6 +39,51 @@ pub(super) fn init() -> Result<(), Error> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return most recent metrics
|
||||
///
|
||||
/// If the metric collection loop has no produced any metrics yet, an empty
|
||||
/// `Vec` is returned. Returns an error if the cache could not be accessed.
|
||||
pub fn get_most_recent_metrics() -> Result<Vec<MetricDataPoint>, Error> {
|
||||
let cached_datapoints: Option<MetricDataPoints> = get_cache()?.get()?;
|
||||
let mut points = cached_datapoints.map(|r| r.datapoints).unwrap_or_default();
|
||||
|
||||
points.sort_unstable_by_key(|p| p.timestamp);
|
||||
|
||||
Ok(points)
|
||||
}
|
||||
|
||||
/// Return all cached metrics with a `timestamp > start_time`
|
||||
///
|
||||
/// If the metric collection loop has no produced any metrics yet, an empty
|
||||
/// `Vec` is returned. Returns an error if the cache could not be accessed.
|
||||
pub fn get_all_metrics(start_time: i64) -> Result<Vec<MetricDataPoint>, Error> {
|
||||
let now = proxmox_time::epoch_i64();
|
||||
|
||||
let delta = now - start_time;
|
||||
|
||||
if delta < 0 {
|
||||
// start-time in the future, no metrics for you
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let generations = delta / (METRIC_COLLECTION_INTERVAL.as_secs() as i64);
|
||||
let generations = generations.clamp(0, STORED_METRIC_GENERATIONS as i64);
|
||||
|
||||
let cached_datapoints: Vec<MetricDataPoints> = get_cache()?.get_last(generations as u32)?;
|
||||
|
||||
let mut points = Vec::new();
|
||||
|
||||
for gen in cached_datapoints {
|
||||
if gen.timestamp > start_time {
|
||||
points.extend(gen.datapoints);
|
||||
}
|
||||
}
|
||||
|
||||
points.sort_unstable_by_key(|p| p.timestamp);
|
||||
|
||||
Ok(points)
|
||||
}
|
||||
|
||||
/// Convert `DiskStat` `HostStat` into a universal metric data point and cache
|
||||
/// them for a later retrieval.
|
||||
pub(super) fn update_metrics(
|
||||
|
Loading…
Reference in New Issue
Block a user