mirror of
https://git.proxmox.com/git/ceph.git
synced 2025-04-28 12:39:22 +00:00
import source of Ceph Squid 19.2.0 release
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
parent
27765aed33
commit
f78120f944
1
ceph/.github/CODEOWNERS
vendored
1
ceph/.github/CODEOWNERS
vendored
@ -132,6 +132,7 @@ README* @ceph/doc-writers
|
||||
/src/test/run-rbd* @ceph/rbd
|
||||
/src/test/test_rbd* @ceph/rbd
|
||||
/src/tools/rbd* @ceph/rbd
|
||||
/systemd/ceph-rbd-mirror* @ceph/rbd
|
||||
/systemd/rbdmap.service.in @ceph/rbd
|
||||
/udev/50-rbd.rules @ceph/rbd
|
||||
|
||||
|
54
ceph/.github/labeler.yml
vendored
54
ceph/.github/labeler.yml
vendored
@ -207,21 +207,60 @@ CI:
|
||||
- .github/**
|
||||
|
||||
rbd:
|
||||
- doc/dev/rbd*
|
||||
- doc/man/8/ceph-rbdnamer.rst
|
||||
- doc/man/8/rbd*
|
||||
- doc/rbd/**
|
||||
- doc/start/quick-rbd.rst
|
||||
- examples/librbd/**
|
||||
- examples/rbd-replay/**
|
||||
- qa/rbd/**
|
||||
- qa/run_xfstests*
|
||||
- qa/suites/krbd/**
|
||||
- qa/suites/rbd/**
|
||||
- qa/tasks/ceph_iscsi_client.py
|
||||
- qa/tasks/metadata.yaml
|
||||
- qa/tasks/qemu.py
|
||||
- qa/tasks/rbd*
|
||||
- qa/tasks/userdata*
|
||||
- qa/workunits/cls/test_cls_journal.sh
|
||||
- qa/workunits/cls/test_cls_lock.sh
|
||||
- qa/workunits/cls/test_cls_rbd.sh
|
||||
- qa/workunits/rbd/**
|
||||
- qa/workunits/windows/**
|
||||
- src/ceph-rbdnamer
|
||||
- src/cls/journal/**
|
||||
- src/cls/lock/**
|
||||
- src/cls/rbd/**
|
||||
- src/common/options/rbd*
|
||||
- src/etc-rbdmap
|
||||
- src/include/krbd.h
|
||||
- src/include/rbd*
|
||||
- src/include/rbd/**
|
||||
- src/journal/**
|
||||
- src/krbd.cc
|
||||
- src/librbd/**
|
||||
- src/ocf/**
|
||||
- src/pybind/mgr/rbd_support/**
|
||||
- src/pybind/rbd/**
|
||||
- src/rbd*
|
||||
- src/rbd*/**
|
||||
- src/test/cli/rbd/**
|
||||
- src/test/cli-integration/rbd/**
|
||||
- src/test/cls_journal/**
|
||||
- src/test/cls_lock/**
|
||||
- src/test/cls_rbd/**
|
||||
- src/test/journal/**
|
||||
- src/test/librbd/**
|
||||
- src/test/rbd_mirror/**
|
||||
- src/tools/rbd/**
|
||||
- src/tools/rbd_ggate/**
|
||||
- src/tools/rbd_mirror/**
|
||||
- src/tools/rbd_nbd/**
|
||||
- src/tools/rbd_wnbd/**
|
||||
- src/test/pybind/test_rbd.py
|
||||
- src/test/rbd*
|
||||
- src/test/rbd*/**
|
||||
- src/test/run-rbd*
|
||||
- src/test/test_rbd*
|
||||
- src/tools/rbd*/**
|
||||
- systemd/ceph-rbd-mirror*
|
||||
- systemd/rbdmap.service.in
|
||||
- udev/50-rbd.rules
|
||||
|
||||
rgw:
|
||||
- qa/suites/rgw/**
|
||||
@ -248,8 +287,7 @@ ceph-volume:
|
||||
- src/python-common/ceph/deployment/drive_selection/**
|
||||
|
||||
tests:
|
||||
- qa/tasks/**
|
||||
- qa/workunits/**
|
||||
- qa/**
|
||||
- src/test/**
|
||||
|
||||
nfs:
|
||||
|
@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
project(ceph
|
||||
VERSION 19.1.0
|
||||
VERSION 19.2.0
|
||||
LANGUAGES CXX C ASM)
|
||||
|
||||
foreach(policy CMP0127 CMP0135)
|
||||
@ -308,11 +308,11 @@ endif()
|
||||
option(WITH_BLUEFS "libbluefs library" OFF)
|
||||
|
||||
CMAKE_DEPENDENT_OPTION(WITH_QATLIB "Enable QAT with qatlib" ON
|
||||
"NOT CMAKE_SYSTEM_PROCESSOR MATCHES aarch64" OFF)
|
||||
"CMAKE_SYSTEM_PROCESSOR MATCHES amd64|x86_64|AMD64" OFF)
|
||||
option(WITH_SYSTEM_QATLIB "Use system packages for qatlib" OFF)
|
||||
option(WITH_QATDRV "Enable QAT with out-of-tree driver" OFF)
|
||||
CMAKE_DEPENDENT_OPTION(WITH_QATZIP "Enable QATzip" ON
|
||||
"NOT CMAKE_SYSTEM_PROCESSOR MATCHES aarch64" OFF)
|
||||
"CMAKE_SYSTEM_PROCESSOR MATCHES amd64|x86_64|AMD64" OFF)
|
||||
option(WITH_SYSTEM_QATZIP "Use system packages for QATzip" OFF)
|
||||
|
||||
if(WITH_QATDRV)
|
||||
@ -608,7 +608,7 @@ option(PG_DEBUG_REFS "PG Ref debugging is enabled" OFF)
|
||||
|
||||
option(WITH_TESTS "enable the build of ceph-test package scripts/binaries" ON)
|
||||
set(UNIT_TESTS_BUILT ${WITH_TESTS})
|
||||
set(CEPH_TEST_TIMEOUT 3600 CACHE STRING
|
||||
set(CEPH_TEST_TIMEOUT 7200 CACHE STRING
|
||||
"Maximum time before a CTest gets killed" )
|
||||
|
||||
# fio
|
||||
|
@ -30,6 +30,8 @@
|
||||
a large buildup of session metadata resulting in the MDS going read-only due to
|
||||
the RADOS operation exceeding the size threshold. `mds_session_metadata_threshold`
|
||||
config controls the maximum size that a (encoded) session metadata can grow.
|
||||
* CephFS: A new "mds last-seen" command is available for querying the last time
|
||||
an MDS was in the FSMap, subject to a pruning threshold.
|
||||
* CephFS: For clusters with multiple CephFS file systems, all the snap-schedule
|
||||
commands now expect the '--fs' argument.
|
||||
* CephFS: The period specifier ``m`` now implies minutes and the period specifier
|
||||
@ -77,6 +79,8 @@
|
||||
config option. Previously, they would ignore invalid or missing realms and
|
||||
go on to load a zone/zonegroup in a different realm. If startup fails with
|
||||
a "failed to load realm" error, fix or remove the ``rgw_realm`` option.
|
||||
* rgw: The radosgw-admin commands ``realm create`` and ``realm pull`` no
|
||||
longer set the default realm without ``--default``.
|
||||
* CephFS: Running the command "ceph fs authorize" for an existing entity now
|
||||
upgrades the entity's capabilities instead of printing an error. It can now
|
||||
also change read/write permissions in a capability that the entity already
|
||||
@ -158,13 +162,17 @@ CephFS: Disallow delegating preallocated inode ranges to clients. Config
|
||||
notifications to topics owned by other users. A new configuration parameter:
|
||||
``rgw_topic_require_publish_policy`` can be enabled to deny ``sns:Publish``
|
||||
permissions unless explicitly granted by topic policy.
|
||||
* RGW: Fix issue with persistent notifications where the changes to topic param that
|
||||
were modified while persistent notifications were in the queue will be reflected in notifications.
|
||||
So if user sets up topic with incorrect config (e.g. endpoint address) causing failure while delivering the
|
||||
notifications to broker, can now modify the incorrect topic attribute and on retry attempt to delivery
|
||||
the notifications, new configs will be used.
|
||||
* RBD: The option ``--image-id`` has been added to `rbd children` CLI command,
|
||||
so it can be run for images in the trash.
|
||||
* PG dump: The default output of `ceph pg dump --format json` has changed. The
|
||||
default json format produces a rather massive output in large clusters and
|
||||
isn't scalable. So we have removed the 'network_ping_times' section from
|
||||
the output. Details in the tracker: https://tracker.ceph.com/issues/57460
|
||||
|
||||
* CephFS: The `subvolume snapshot clone` command now depends on the config option
|
||||
`snapshot_clone_no_wait` which is used to reject the clone operation when
|
||||
all the cloner threads are busy. This config option is enabled by default which means
|
||||
@ -173,6 +181,56 @@ CephFS: Disallow delegating preallocated inode ranges to clients. Config
|
||||
`ceph config get mgr mgr/volumes/snapshot_clone_no_wait`
|
||||
and it can be disabled by using:
|
||||
`ceph config set mgr mgr/volumes/snapshot_clone_no_wait false`
|
||||
* RBD: `RBD_IMAGE_OPTION_CLONE_FORMAT` option has been exposed in Python
|
||||
bindings via `clone_format` optional parameter to `clone`, `deep_copy` and
|
||||
`migration_prepare` methods.
|
||||
* RBD: `RBD_IMAGE_OPTION_FLATTEN` option has been exposed in Python bindings via
|
||||
`flatten` optional parameter to `deep_copy` and `migration_prepare` methods.
|
||||
|
||||
* CephFS: fixes to the implementation of the ``root_squash`` mechanism enabled
|
||||
via cephx ``mds`` caps on a client credential require a new client feature
|
||||
bit, ``client_mds_auth_caps``. Clients using credentials with ``root_squash``
|
||||
without this feature will trigger the MDS to raise a HEALTH_ERR on the
|
||||
cluster, MDS_CLIENTS_BROKEN_ROOTSQUASH. See the documentation on this warning
|
||||
and the new feature bit for more information.
|
||||
|
||||
* cls_cxx_gather is marked as deprecated.
|
||||
* CephFS: cephfs-journal-tool is guarded against running on an online file system.
|
||||
The 'cephfs-journal-tool --rank <fs_name>:<mds_rank> journal reset' and
|
||||
'cephfs-journal-tool --rank <fs_name>:<mds_rank> journal reset --force'
|
||||
commands require '--yes-i-really-really-mean-it'.
|
||||
|
||||
* Dashboard: Rearranged Navigation Layout: The navigation layout has been reorganized
|
||||
for improved usability and easier access to key features.
|
||||
* Dashboard: CephFS Improvments
|
||||
* Support for managing CephFS snapshots and clones, as well as snapshot schedule
|
||||
management
|
||||
* Manage authorization capabilities for CephFS resources
|
||||
* Helpers on mounting a CephFS volume
|
||||
* Dashboard: RGW Improvements
|
||||
* Support for managing bucket policies
|
||||
* Add/Remove bucket tags
|
||||
* ACL Management
|
||||
* Several UI/UX Improvements to the bucket form
|
||||
* Monitoring: Grafana dashboards are now loaded into the container at runtime rather than
|
||||
building a grafana image with the grafana dashboards. Official Ceph grafana images
|
||||
can be found in quay.io/ceph/grafana
|
||||
* Monitoring: RGW S3 Analytics: A new Grafana dashboard is now available, enabling you to
|
||||
visualize per bucket and user analytics data, including total GETs, PUTs, Deletes,
|
||||
Copies, and list metrics.
|
||||
* RBD: `Image::access_timestamp` and `Image::modify_timestamp` Python APIs now
|
||||
return timestamps in UTC.
|
||||
* RBD: Support for cloning from non-user type snapshots is added. This is
|
||||
intended primarily as a building block for cloning new groups from group
|
||||
snapshots created with `rbd group snap create` command, but has also been
|
||||
exposed via the new `--snap-id` option for `rbd clone` command.
|
||||
* RBD: The output of `rbd snap ls --all` command now includes the original
|
||||
type for trashed snapshots.
|
||||
|
||||
* CephFS: Command "ceph mds fail" and "ceph fs fail" now requires a
|
||||
confirmation flag when some MDSs exhibit health warning MDS_TRIM or
|
||||
MDS_CACHE_OVERSIZED. This is to prevent accidental MDS failover causing
|
||||
further delays in recovery.
|
||||
|
||||
>=18.0.0
|
||||
|
||||
@ -353,11 +411,20 @@ CephFS: Disallow delegating preallocated inode ranges to clients. Config
|
||||
than the number mentioned against the config tunable `mds_max_snaps_per_dir`
|
||||
so that a new snapshot can be created and retained during the next schedule
|
||||
run.
|
||||
* cephfs: Running the command "ceph fs authorize" for an existing entity now
|
||||
upgrades the entity's capabilities instead of printing an error. It can now
|
||||
also change read/write permissions in a capability that the entity already
|
||||
holds. If the capability passed by user is same as one of the capabilities
|
||||
that the entity already holds, idempotency is maintained.
|
||||
* `ceph config dump --format <json|xml>` output will display the localized
|
||||
option names instead of its normalized version. For e.g.,
|
||||
"mgr/prometheus/x/server_port" will be displayed instead of
|
||||
"mgr/prometheus/server_port". This matches the output of the non pretty-print
|
||||
formatted version of the command.
|
||||
* CEPHFS: MDS config option name "mds_kill_skip_replaying_inotable" is a bit
|
||||
confusing with "mds_inject_skip_replaying_inotable", therefore renaming it to
|
||||
"mds_kill_after_journal_logs_flushed"
|
||||
|
||||
|
||||
>=17.2.1
|
||||
|
||||
|
@ -181,7 +181,7 @@
|
||||
# main package definition
|
||||
#################################################################################
|
||||
Name: ceph
|
||||
Version: 19.1.0
|
||||
Version: 19.2.0
|
||||
Release: 0%{?dist}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
Epoch: 2
|
||||
@ -197,7 +197,7 @@ License: LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-
|
||||
Group: System/Filesystems
|
||||
%endif
|
||||
URL: http://ceph.com/
|
||||
Source0: %{?_remote_tarball_prefix}ceph-19.1.0.tar.bz2
|
||||
Source0: %{?_remote_tarball_prefix}ceph-19.2.0.tar.bz2
|
||||
%if 0%{?suse_version}
|
||||
# _insert_obs_source_lines_here
|
||||
ExclusiveArch: x86_64 aarch64 ppc64le s390x riscv64
|
||||
@ -689,6 +689,7 @@ BuildArch: noarch
|
||||
Group: System/Filesystems
|
||||
%endif
|
||||
Requires: python%{python3_pkgversion}-bcrypt
|
||||
Requires: python%{python3_pkgversion}-packaging
|
||||
Requires: python%{python3_pkgversion}-pecan
|
||||
Requires: python%{python3_pkgversion}-pyOpenSSL
|
||||
Requires: python%{python3_pkgversion}-requests
|
||||
@ -1334,7 +1335,7 @@ This package provides a Ceph hardware monitoring agent.
|
||||
# common
|
||||
#################################################################################
|
||||
%prep
|
||||
%autosetup -p1 -n ceph-19.1.0
|
||||
%autosetup -p1 -n ceph-19.2.0
|
||||
|
||||
%build
|
||||
# Disable lto on systems that do not support symver attribute
|
||||
@ -1634,6 +1635,7 @@ rm -rf %{_vpath_builddir}
|
||||
%if %{with lttng}
|
||||
%{_libdir}/libos_tp.so*
|
||||
%{_libdir}/libosd_tp.so*
|
||||
%{_libdir}/libmgr_op_tp.so*
|
||||
%endif
|
||||
%config(noreplace) %{_sysconfdir}/logrotate.d/ceph
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
@ -2681,6 +2683,5 @@ exit 0
|
||||
%dir %{python3_sitelib}/ceph_node_proxy
|
||||
%{python3_sitelib}/ceph_node_proxy/*
|
||||
%{python3_sitelib}/ceph_node_proxy-*
|
||||
#%{_mandir}/man8/ceph-node-proxy.8*
|
||||
|
||||
%changelog
|
||||
|
@ -689,6 +689,7 @@ BuildArch: noarch
|
||||
Group: System/Filesystems
|
||||
%endif
|
||||
Requires: python%{python3_pkgversion}-bcrypt
|
||||
Requires: python%{python3_pkgversion}-packaging
|
||||
Requires: python%{python3_pkgversion}-pecan
|
||||
Requires: python%{python3_pkgversion}-pyOpenSSL
|
||||
Requires: python%{python3_pkgversion}-requests
|
||||
@ -1634,6 +1635,7 @@ rm -rf %{_vpath_builddir}
|
||||
%if %{with lttng}
|
||||
%{_libdir}/libos_tp.so*
|
||||
%{_libdir}/libosd_tp.so*
|
||||
%{_libdir}/libmgr_op_tp.so*
|
||||
%endif
|
||||
%config(noreplace) %{_sysconfdir}/logrotate.d/ceph
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
@ -2681,6 +2683,5 @@ exit 0
|
||||
%dir %{python3_sitelib}/ceph_node_proxy
|
||||
%{python3_sitelib}/ceph_node_proxy/*
|
||||
%{python3_sitelib}/ceph_node_proxy-*
|
||||
#%{_mandir}/man8/ceph-node-proxy.8*
|
||||
|
||||
%changelog
|
||||
|
@ -1,3 +1,15 @@
|
||||
ceph (19.2.0-1) stable; urgency=medium
|
||||
|
||||
* New upstream release
|
||||
|
||||
-- Ceph Release Team <ceph-maintainers@ceph.io> Wed, 18 Sep 2024 16:27:48 +0000
|
||||
|
||||
ceph (19.1.1-1) rc; urgency=medium
|
||||
|
||||
* New upstream release
|
||||
|
||||
-- Ceph Release Team <ceph-maintainers@ceph.io> Tue, 20 Aug 2024 12:51:10 -0400
|
||||
|
||||
ceph (19.1.0-1) rc; urgency=medium
|
||||
|
||||
* New upstream release
|
||||
|
@ -19,9 +19,27 @@ function(add_ceph_test test_name test_path)
|
||||
PATH=${CMAKE_RUNTIME_OUTPUT_DIRECTORY}:${CMAKE_SOURCE_DIR}/src:$ENV{PATH}
|
||||
PYTHONPATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/cython_modules/lib.3:${CMAKE_SOURCE_DIR}/src/pybind
|
||||
CEPH_BUILD_VIRTUALENV=${CEPH_BUILD_VIRTUALENV})
|
||||
# none of the tests should take more than 1 hour to complete
|
||||
set_property(TEST ${test_name}
|
||||
PROPERTY TIMEOUT ${CEPH_TEST_TIMEOUT})
|
||||
# Crimson seastar unittest always run with --smp N to start N threads. By default, crimson seastar unittest
|
||||
# will take cpu cores[0, N), starting one thread per core. When running many crimson seastar unittests
|
||||
# parallely, the front N cpu cores are shared, and the left cpu cores are idle. Lots of cpu cores are wasted.
|
||||
# Using CTest resource allocation feature(https://cmake.org/cmake/help/latest/manual/ctest.1.html#resource-allocation),
|
||||
# ctest can specify cpu cores resources to crimson seastar unittests.
|
||||
# 3 steps to enable CTest resource allocation feature:
|
||||
# Step 1: Generate a resource specification file to describe available resource, $(nproc) CPUs with id 0 to $(nproc) - 1
|
||||
# Step 2: Set RESOURCE_GROUPS property to a test with value "${smp_count},cpus:1"
|
||||
# Step 3: Read a series of environment variables CTEST_RESOURCE_GROUP_* and set seastar smp_opts while running a test
|
||||
list(FIND ARGV "--smp" smp_pos)
|
||||
if(smp_pos GREATER -1)
|
||||
if(smp_pos EQUAL ARGC)
|
||||
message(FATAL_ERROR "${test_name} --smp requires an argument")
|
||||
endif()
|
||||
math(EXPR i "${smp_pos} + 1")
|
||||
list(GET ARGV ${i} smp_count)
|
||||
set_property(TEST ${test_name}
|
||||
PROPERTY RESOURCE_GROUPS "${smp_count},cpus:1")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
option(WITH_GTEST_PARALLEL "Enable running gtest based tests in parallel" OFF)
|
||||
|
@ -162,7 +162,7 @@ function(do_build_boost root_dir version)
|
||||
set(boost_version 1.82.0)
|
||||
set(boost_sha256 a6e1ab9b0860e6a2881dd7b21fe9f737a095e5f33a3a874afc6a345228597ee6)
|
||||
string(REPLACE "." "_" boost_version_underscore ${boost_version} )
|
||||
string(JOIN " " boost_url
|
||||
list(APPEND boost_url
|
||||
https://boostorg.jfrog.io/artifactory/main/release/${boost_version}/source/boost_${boost_version_underscore}.tar.bz2
|
||||
https://download.ceph.com/qa/boost_${boost_version_underscore}.tar.bz2)
|
||||
set(source_dir
|
||||
|
@ -57,6 +57,9 @@ string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${Sanitizers_COMPILE_OPTIONS}")
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${Sanitizers_COMPILE_OPTIONS})
|
||||
check_cxx_source_compiles("int main() {}"
|
||||
Sanitizers_ARE_SUPPORTED)
|
||||
|
||||
file (READ ${CMAKE_CURRENT_LIST_DIR}/code_tests/Sanitizers_fiber_test.cc _sanitizers_fiber_test_code)
|
||||
check_cxx_source_compiles ("${_sanitizers_fiber_test_code}" Sanitizers_FIBER_SUPPORT)
|
||||
cmake_pop_check_state()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
11
ceph/cmake/modules/code_tests/Sanitizers_fiber_test.cc
Normal file
11
ceph/cmake/modules/code_tests/Sanitizers_fiber_test.cc
Normal file
@ -0,0 +1,11 @@
|
||||
#include <cstddef>
|
||||
|
||||
extern "C" {
|
||||
void __sanitizer_start_switch_fiber(void**, const void*, size_t);
|
||||
void __sanitizer_finish_switch_fiber(void*, const void**, size_t*);
|
||||
}
|
||||
|
||||
int main() {
|
||||
__sanitizer_start_switch_fiber(nullptr, nullptr, 0);
|
||||
__sanitizer_finish_switch_fiber(nullptr, nullptr, nullptr);
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
natsort
|
||||
CherryPy
|
||||
packaging
|
||||
pecan
|
||||
werkzeug
|
||||
requests
|
||||
|
@ -83,11 +83,13 @@ Build-Depends: automake,
|
||||
libndctl-dev (>= 63) <pkg.ceph.pmdk>,
|
||||
libpmem-dev <pkg.ceph.pmdk>,
|
||||
libpmemobj-dev (>= 1.8) <pkg.ceph.pmdk>,
|
||||
libprotobuf-dev <pkg.ceph.crimson>,
|
||||
ninja-build,
|
||||
nlohmann-json3-dev,
|
||||
patch,
|
||||
pkg-config,
|
||||
prometheus <pkg.ceph.check>,
|
||||
protobuf-compiler <pkg.ceph.crimson>,
|
||||
python3-all-dev,
|
||||
python3-cherrypy3,
|
||||
python3-natsort,
|
||||
|
@ -26,7 +26,7 @@ if [ -r /etc/os-release ]; then
|
||||
PYBUILD="3.11"
|
||||
fi
|
||||
;;
|
||||
rocky|rhel|centos)
|
||||
almalinux|rocky|rhel|centos)
|
||||
MAJOR_VER=$(echo "$VERSION_ID" | sed -e 's/\..*$//')
|
||||
if [ "$MAJOR_VER" -ge "9" ] ; then
|
||||
PYBUILD="3.9"
|
||||
|
@ -739,7 +739,8 @@ of ``K+M`` so that each chunk is stored in an OSD in the acting set. The rank of
|
||||
the chunk is stored as an attribute of the object.
|
||||
|
||||
For instance an erasure coded pool can be created to use five OSDs (``K+M = 5``) and
|
||||
sustain the loss of two of them (``M = 2``).
|
||||
sustain the loss of two of them (``M = 2``). Data may be unavailable until (``K+1``)
|
||||
shards are restored.
|
||||
|
||||
Reading and Writing Encoded Chunks
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -303,7 +303,10 @@ Setting the initial CRUSH location of host
|
||||
==========================================
|
||||
|
||||
Hosts can contain a ``location`` identifier which will instruct cephadm to
|
||||
create a new CRUSH host located in the specified hierarchy.
|
||||
create a new CRUSH host bucket located in the specified hierarchy.
|
||||
You can specify more than one element of the tree when doing so (for
|
||||
instance if you want to ensure that the rack that a host is being
|
||||
added to is also added to the default bucket), for example:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
@ -311,6 +314,7 @@ create a new CRUSH host located in the specified hierarchy.
|
||||
hostname: node-00
|
||||
addr: 192.168.0.10
|
||||
location:
|
||||
root: default
|
||||
rack: rack1
|
||||
|
||||
.. note::
|
||||
|
@ -262,8 +262,8 @@ Using custom images
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It is possible to install or upgrade monitoring components based on other
|
||||
images. To do so, the name of the image to be used needs to be stored in the
|
||||
configuration first. The following configuration options are available.
|
||||
images. The ID of the image that you plan to use must be stored in the
|
||||
configuration. The following configuration options are available:
|
||||
|
||||
- ``container_image_prometheus``
|
||||
- ``container_image_grafana``
|
||||
@ -279,51 +279,53 @@ configuration first. The following configuration options are available.
|
||||
- ``container_image_jaeger_collector``
|
||||
- ``container_image_jaeger_query``
|
||||
|
||||
Custom images can be set with the ``ceph config`` command
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
ceph config set mgr mgr/cephadm/<option_name> <value>
|
||||
|
||||
For example
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1
|
||||
|
||||
If there were already running monitoring stack daemon(s) of the type whose
|
||||
image you've changed, you must redeploy the daemon(s) in order to have them
|
||||
actually use the new image.
|
||||
|
||||
For example, if you had changed the prometheus image
|
||||
Custom images can be set with the ``ceph config`` command. To set custom images, run a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch redeploy prometheus
|
||||
ceph config set mgr mgr/cephadm/<option_name> <value>
|
||||
|
||||
For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1
|
||||
|
||||
If you were already running monitoring stack daemon(s) of the same image type
|
||||
that you changed, then you must redeploy the daemon(s) in order to make them
|
||||
use the new image.
|
||||
|
||||
For example, if you changed the Prometheus image, you would have to run the
|
||||
following command in order to pick up the changes:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch redeploy prometheus
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
By setting a custom image, the default value will be overridden (but not
|
||||
overwritten). The default value changes when updates become available.
|
||||
By setting a custom image, you will not be able to update the component
|
||||
you have set the custom image for automatically. You will need to
|
||||
manually update the configuration (image name and tag) to be able to
|
||||
install updates.
|
||||
overwritten). The default value will change when an update becomes
|
||||
available. If you set a custom image, you will not be able automatically
|
||||
to update the component you have modified with the custom image. You will
|
||||
need to manually update the configuration (that includes the image name
|
||||
and the tag) to be able to install updates.
|
||||
|
||||
If you choose to go with the recommendations instead, you can reset the
|
||||
custom image you have set before. After that, the default value will be
|
||||
used again. Use ``ceph config rm`` to reset the configuration option
|
||||
If you choose to accept the recommendations, you can reset the custom
|
||||
image that you have set before. If you do this, the default value will be
|
||||
used again. Use ``ceph config rm`` to reset the configuration option, in
|
||||
a command of the following form:
|
||||
|
||||
.. code-block:: bash
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config rm mgr mgr/cephadm/<option_name>
|
||||
ceph config rm mgr mgr/cephadm/<option_name>
|
||||
|
||||
For example
|
||||
For example:
|
||||
|
||||
.. code-block:: bash
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config rm mgr mgr/cephadm/container_image_prometheus
|
||||
ceph config rm mgr mgr/cephadm/container_image_prometheus
|
||||
|
||||
See also :ref:`cephadm-airgap`.
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
***********
|
||||
OSD Service
|
||||
***********
|
||||
.. _device management: ../rados/operations/devices
|
||||
.. _libstoragemgmt: https://github.com/libstorage/libstoragemgmt
|
||||
|
||||
List Devices
|
||||
@ -79,7 +78,7 @@ like this:
|
||||
|
||||
In this example, libstoragemgmt has confirmed the health of the drives and the ability to
|
||||
interact with the Identification and Fault LEDs on the drive enclosures. For further
|
||||
information about interacting with these LEDs, refer to `device management`_.
|
||||
information about interacting with these LEDs, refer to :ref:`devices`.
|
||||
|
||||
.. note::
|
||||
The current release of `libstoragemgmt`_ (1.8.8) supports SCSI, SAS, and SATA based
|
||||
|
@ -193,7 +193,7 @@ you need. For example, the following command upgrades to a development build:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch upgrade start --image quay.io/ceph-ci/ceph:recent-git-branch-name
|
||||
ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:recent-git-branch-name
|
||||
|
||||
For more information about available container images, see :ref:`containers`.
|
||||
|
||||
|
@ -115,4 +115,11 @@ the following method.
|
||||
|
||||
$ sudo rm -rf /var/lib/ceph/mds/ceph-${id}
|
||||
|
||||
|
||||
.. note:: When an active MDS either has health warning MDS_TRIM or
|
||||
MDS_CACHE_OVERSIZED, confirmation flag (--yes-i-really-mean-it)
|
||||
needs to be passed, else the command will fail. It is not recommended to
|
||||
restart an MDS which has these warnings since slow recovery at restart may
|
||||
lead to more problems.
|
||||
|
||||
.. _MDS Config Reference: ../mds-config-ref
|
||||
|
@ -193,7 +193,11 @@ file system and MDS daemons down, use the ``ceph fs fail`` command:
|
||||
|
||||
::
|
||||
|
||||
ceph fs fail <fs_name>
|
||||
ceph fs fail <fs_name> {--yes-i-really-mean-it}
|
||||
|
||||
.. note:: Note that confirmation flag is optional because it is only required
|
||||
when the MDS is active and has health warning MDS_TRIM or
|
||||
MDS_CACHE_OVERSIZED.
|
||||
|
||||
This command sets a file system flag to prevent standbys from
|
||||
activating on the file system (the ``joinable`` flag).
|
||||
@ -210,7 +214,11 @@ respawn as standbys. The file system will be left in a degraded state.
|
||||
::
|
||||
|
||||
# For all ranks, 0-N:
|
||||
ceph mds fail <fs_name>:<n>
|
||||
ceph mds fail <fs_name>:<n> {--yes-i-really-mean-it}
|
||||
|
||||
.. note:: Note that confirmation flag is optional because it is only required
|
||||
when the MDS is active and has health warning MDS_TRIM or
|
||||
MDS_CACHE_OVERSIZED.
|
||||
|
||||
Once all ranks are inactive, the file system may also be deleted or left in
|
||||
this state for other purposes (perhaps disaster recovery).
|
||||
@ -272,6 +280,17 @@ Mark the file system rank as repaired. Unlike the name suggests, this command
|
||||
does not change a MDS; it manipulates the file system rank which has been
|
||||
marked damaged.
|
||||
|
||||
::
|
||||
|
||||
ceph mds last-seen <name>
|
||||
|
||||
Learn the when the MDS named ``name`` was last in the FSMap. The JSON output
|
||||
includes the epoch the MDS was last seen. Historically information is limited by
|
||||
the following ``mon`` configuration:
|
||||
|
||||
|
||||
.. confval:: mon_fsmap_prune_threshold
|
||||
|
||||
|
||||
Required Client Features
|
||||
------------------------
|
||||
@ -298,31 +317,47 @@ Clients that are missing newly added features will be evicted automatically.
|
||||
|
||||
Here are the current CephFS features and first release they came out:
|
||||
|
||||
+------------------+--------------+-----------------+
|
||||
| Feature | Ceph release | Upstream Kernel |
|
||||
+==================+==============+=================+
|
||||
| jewel | jewel | 4.5 |
|
||||
+------------------+--------------+-----------------+
|
||||
| kraken | kraken | 4.13 |
|
||||
+------------------+--------------+-----------------+
|
||||
| luminous | luminous | 4.13 |
|
||||
+------------------+--------------+-----------------+
|
||||
| mimic | mimic | 4.19 |
|
||||
+------------------+--------------+-----------------+
|
||||
| reply_encoding | nautilus | 5.1 |
|
||||
+------------------+--------------+-----------------+
|
||||
| reclaim_client | nautilus | N/A |
|
||||
+------------------+--------------+-----------------+
|
||||
| lazy_caps_wanted | nautilus | 5.1 |
|
||||
+------------------+--------------+-----------------+
|
||||
| multi_reconnect | nautilus | 5.1 |
|
||||
+------------------+--------------+-----------------+
|
||||
| deleg_ino | octopus | 5.6 |
|
||||
+------------------+--------------+-----------------+
|
||||
| metric_collect | pacific | N/A |
|
||||
+------------------+--------------+-----------------+
|
||||
| alternate_name | pacific | PLANNED |
|
||||
+------------------+--------------+-----------------+
|
||||
+----------------------------+--------------+-----------------+
|
||||
| Feature | Ceph release | Upstream Kernel |
|
||||
+============================+==============+=================+
|
||||
| jewel | jewel | 4.5 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| kraken | kraken | 4.13 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| luminous | luminous | 4.13 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| mimic | mimic | 4.19 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| reply_encoding | nautilus | 5.1 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| reclaim_client | nautilus | N/A |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| lazy_caps_wanted | nautilus | 5.1 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| multi_reconnect | nautilus | 5.1 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| deleg_ino | octopus | 5.6 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| metric_collect | pacific | N/A |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| alternate_name | pacific | 6.5 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| notify_session_state | quincy | 5.19 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| op_getvxattr | quincy | 6.0 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| 32bits_retry_fwd | reef | 6.6 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| new_snaprealm_info | reef | UNKNOWN |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| has_owner_uidgid | reef | 6.6 |
|
||||
+----------------------------+--------------+-----------------+
|
||||
| client_mds_auth_caps | squid+bp | PLANNED |
|
||||
+----------------------------+--------------+-----------------+
|
||||
|
||||
..
|
||||
Comment: use `git describe --tags --abbrev=0 <commit>` to lookup release
|
||||
|
||||
|
||||
CephFS Feature Descriptions
|
||||
|
||||
@ -380,6 +415,15 @@ Clients can send performance metric to MDS if MDS support this feature.
|
||||
Clients can set and understand "alternate names" for directory entries. This is
|
||||
to be used for encrypted file name support.
|
||||
|
||||
::
|
||||
|
||||
client_mds_auth_caps
|
||||
|
||||
To effectively implement ``root_squash`` in a client's ``mds`` caps, the client
|
||||
must understand that it is enforcing ``root_squash`` and other cap metadata.
|
||||
Clients without this feature are in danger of dropping updates to files. It is
|
||||
recommend to set this feature bit.
|
||||
|
||||
|
||||
Global settings
|
||||
---------------
|
||||
|
@ -15,7 +15,8 @@ examining, modifying, and extracting data from journals.
|
||||
|
||||
This tool is **dangerous** because it directly modifies internal
|
||||
data structures of the file system. Make backups, be careful, and
|
||||
seek expert advice. If you are unsure, do not run this tool.
|
||||
seek expert advice. If you are unsure, do not run this tool. As a
|
||||
precaution, cephfs-journal-tool doesn't work on an active filesystem.
|
||||
|
||||
Syntax
|
||||
------
|
||||
|
@ -28,11 +28,10 @@ This restriction impacts *only* the filesystem hierarchy, or, in other words,
|
||||
the metadata tree that is managed by the MDS. Clients will still be able to
|
||||
access the underlying file data in RADOS directly. To segregate clients fully,
|
||||
isolate untrusted clients in their own RADOS namespace. You can place a
|
||||
client's filesystem subtree in a particular namespace using `file layouts`_ and
|
||||
then restrict their RADOS access to that namespace using `OSD capabilities`_
|
||||
client's filesystem subtree in a particular namespace using :ref:`file
|
||||
layouts<file-layouts>` and then restrict their RADOS access to that namespace
|
||||
using :ref:`OSD capabilities<modify-user-capabilities>`.
|
||||
|
||||
.. _file layouts: ./file-layouts
|
||||
.. _OSD capabilities: ../rados/operations/user-management/#authorization-capabilities
|
||||
|
||||
Syntax
|
||||
------
|
||||
|
@ -68,9 +68,9 @@ truncate it like so:
|
||||
|
||||
::
|
||||
|
||||
cephfs-journal-tool [--rank=N] journal reset
|
||||
cephfs-journal-tool [--rank=<fs_name>:{mds-rank|all}] journal reset --yes-i-really-really-mean-it
|
||||
|
||||
Specify the MDS rank using the ``--rank`` option when the file system has/had
|
||||
Specify the filesystem and the MDS rank using the ``--rank`` option when the file system has/had
|
||||
multiple active MDS.
|
||||
|
||||
.. warning::
|
||||
@ -135,7 +135,7 @@ objects.
|
||||
# InoTable
|
||||
cephfs-table-tool 0 reset inode
|
||||
# Journal
|
||||
cephfs-journal-tool --rank=0 journal reset
|
||||
cephfs-journal-tool --rank=<fs_name>:0 journal reset --yes-i-really-really-mean-it
|
||||
# Root inodes ("/" and MDS directory)
|
||||
cephfs-data-scan init
|
||||
|
||||
@ -253,7 +253,7 @@ Next, we will create the intial metadata for the fs:
|
||||
cephfs-table-tool cephfs_recovery:0 reset session
|
||||
cephfs-table-tool cephfs_recovery:0 reset snap
|
||||
cephfs-table-tool cephfs_recovery:0 reset inode
|
||||
cephfs-journal-tool --rank cephfs_recovery:0 journal reset --force
|
||||
cephfs-journal-tool --rank cephfs_recovery:0 journal reset --force --yes-i-really-really-mean-it
|
||||
|
||||
Now perform the recovery of the metadata pool from the data pool:
|
||||
|
||||
|
@ -20,11 +20,11 @@ abstractions:
|
||||
subvolumes. Used to effect policies (e.g., :doc:`/cephfs/file-layouts`)
|
||||
across a set of subvolumes
|
||||
|
||||
Some possible use-cases for the export abstractions:
|
||||
Possible use-cases for the export abstractions:
|
||||
|
||||
* FS subvolumes used as Manila shares or CSI volumes
|
||||
|
||||
* FS subvolume groups used as Manila share groups
|
||||
* FS-subvolume groups used as Manila share groups
|
||||
|
||||
Requirements
|
||||
------------
|
||||
@ -46,9 +46,9 @@ Create a volume by running the following command:
|
||||
|
||||
ceph fs volume create <vol_name> [placement]
|
||||
|
||||
This creates a CephFS file system and its data and metadata pools. It can also
|
||||
deploy MDS daemons for the filesystem using a ceph-mgr orchestrator module (for
|
||||
example Rook). See :doc:`/mgr/orchestrator`.
|
||||
This creates a CephFS file system and its data and metadata pools. This command
|
||||
can also deploy MDS daemons for the filesystem using a Ceph Manager orchestrator
|
||||
module (for example Rook). See :doc:`/mgr/orchestrator`.
|
||||
|
||||
``<vol_name>`` is the volume name (an arbitrary string). ``[placement]`` is an
|
||||
optional string that specifies the :ref:`orchestrator-cli-placement-spec` for
|
||||
@ -64,13 +64,13 @@ To remove a volume, run the following command:
|
||||
|
||||
ceph fs volume rm <vol_name> [--yes-i-really-mean-it]
|
||||
|
||||
This removes a file system and its data and metadata pools. It also tries to
|
||||
remove MDS daemons using the enabled ceph-mgr orchestrator module.
|
||||
This command removes a file system and its data and metadata pools. It also
|
||||
tries to remove MDS daemons using the enabled Ceph Manager orchestrator module.
|
||||
|
||||
.. note:: After volume deletion, it is recommended to restart `ceph-mgr`
|
||||
if a new file system is created on the same cluster and subvolume interface
|
||||
is being used. Please see https://tracker.ceph.com/issues/49605#note-5
|
||||
for more details.
|
||||
.. note:: After volume deletion, we recommend restarting `ceph-mgr` if a new
|
||||
file system is created on the same cluster and the subvolume interface is
|
||||
being used. See https://tracker.ceph.com/issues/49605#note-5 for more
|
||||
details.
|
||||
|
||||
List volumes by running the following command:
|
||||
|
||||
@ -86,17 +86,17 @@ Rename a volume by running the following command:
|
||||
|
||||
Renaming a volume can be an expensive operation that requires the following:
|
||||
|
||||
- Renaming the orchestrator-managed MDS service to match the <new_vol_name>.
|
||||
This involves launching a MDS service with ``<new_vol_name>`` and bringing
|
||||
down the MDS service with ``<vol_name>``.
|
||||
- Renaming the file system matching ``<vol_name>`` to ``<new_vol_name>``.
|
||||
- Changing the application tags on the data and metadata pools of the file system
|
||||
to ``<new_vol_name>``.
|
||||
- Renaming the orchestrator-managed MDS service to match the
|
||||
``<new_vol_name>``. This involves launching a MDS service with
|
||||
``<new_vol_name>`` and bringing down the MDS service with ``<vol_name>``.
|
||||
- Renaming the file system from ``<vol_name>`` to ``<new_vol_name>``.
|
||||
- Changing the application tags on the data and metadata pools of the file
|
||||
system to ``<new_vol_name>``.
|
||||
- Renaming the metadata and data pools of the file system.
|
||||
|
||||
The CephX IDs that are authorized for ``<vol_name>`` must be reauthorized for
|
||||
``<new_vol_name>``. Any ongoing operations of the clients using these IDs may
|
||||
be disrupted. Ensure that mirroring is disabled on the volume.
|
||||
``<new_vol_name>``. Any ongoing operations of the clients that are using these
|
||||
IDs may be disrupted. Ensure that mirroring is disabled on the volume.
|
||||
|
||||
To fetch the information of a CephFS volume, run the following command:
|
||||
|
||||
@ -104,7 +104,8 @@ To fetch the information of a CephFS volume, run the following command:
|
||||
|
||||
ceph fs volume info vol_name [--human_readable]
|
||||
|
||||
The ``--human_readable`` flag shows used and available pool capacities in KB/MB/GB.
|
||||
The ``--human_readable`` flag shows used and available pool capacities in
|
||||
KB/MB/GB.
|
||||
|
||||
The output format is JSON and contains fields as follows:
|
||||
|
||||
@ -159,7 +160,7 @@ Create a subvolume group by running the following command:
|
||||
|
||||
The command succeeds even if the subvolume group already exists.
|
||||
|
||||
When creating a subvolume group you can specify its data pool layout (see
|
||||
When you create a subvolume group, you can specify its data pool layout (see
|
||||
:doc:`/cephfs/file-layouts`), uid, gid, file mode in octal numerals, and
|
||||
size in bytes. The size of the subvolume group is specified by setting
|
||||
a quota on it (see :doc:`/cephfs/quota`). By default, the subvolume group
|
||||
@ -173,11 +174,11 @@ Remove a subvolume group by running a command of the following form:
|
||||
ceph fs subvolumegroup rm <vol_name> <group_name> [--force]
|
||||
|
||||
The removal of a subvolume group fails if the subvolume group is not empty or
|
||||
is non-existent. The ``--force`` flag allows the non-existent "subvolume group remove
|
||||
command" to succeed.
|
||||
is non-existent. The ``--force`` flag allows the command to succeed when its
|
||||
argument is a non-existent subvolume group.
|
||||
|
||||
|
||||
Fetch the absolute path of a subvolume group by running a command of the following form:
|
||||
Fetch the absolute path of a subvolume group by running a command of the
|
||||
following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -192,7 +193,8 @@ List subvolume groups by running a command of the following form:
|
||||
.. note:: Subvolume group snapshot feature is no longer supported in mainline CephFS (existing group
|
||||
snapshots can still be listed and deleted)
|
||||
|
||||
Fetch the metadata of a subvolume group by running a command of the following form:
|
||||
Fetch the metadata of a subvolume group by running a command of the following
|
||||
form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -200,9 +202,13 @@ Fetch the metadata of a subvolume group by running a command of the following fo
|
||||
|
||||
The output format is JSON and contains fields as follows:
|
||||
|
||||
* ``atime``: access time of the subvolume group path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``mtime``: modification time of the subvolume group path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``ctime``: change time of the subvolume group path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``atime``: access time of the subvolume group path in the format ``YYYY-MM-DD
|
||||
HH:MM:SS``
|
||||
* ``mtime``: time of the most recent modification of the subvolume group path
|
||||
in the format
|
||||
``YYYY-MM-DD HH:MM:SS``
|
||||
* ``ctime``: time of the most recent change of the subvolume group path in the
|
||||
format ``YYYY-MM-DD HH:MM:SS``
|
||||
* ``uid``: uid of the subvolume group path
|
||||
* ``gid``: gid of the subvolume group path
|
||||
* ``mode``: mode of the subvolume group path
|
||||
@ -213,7 +219,8 @@ The output format is JSON and contains fields as follows:
|
||||
* ``created_at``: creation time of the subvolume group in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``data_pool``: data pool to which the subvolume group belongs
|
||||
|
||||
Check the presence of any subvolume group by running a command of the following form:
|
||||
Check for the presence of a given subvolume group by running a command of the
|
||||
following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -221,13 +228,13 @@ Check the presence of any subvolume group by running a command of the following
|
||||
|
||||
The ``exist`` command outputs:
|
||||
|
||||
* "subvolumegroup exists": if any subvolumegroup is present
|
||||
* "no subvolumegroup exists": if no subvolumegroup is present
|
||||
* ``subvolumegroup exists``: if any subvolumegroup is present
|
||||
* ``no subvolumegroup exists``: if no subvolumegroup is present
|
||||
|
||||
.. note:: This command checks for the presence of custom groups and not
|
||||
presence of the default one. To validate the emptiness of the volume, a
|
||||
subvolumegroup existence check alone is not sufficient. Subvolume existence
|
||||
also needs to be checked as there might be subvolumes in the default group.
|
||||
presence of the default one. A subvolumegroup-existence check alone is not
|
||||
sufficient to validate the emptiness of the volume. Subvolume existence must
|
||||
also be checked, as there might be subvolumes in the default group.
|
||||
|
||||
Resize a subvolume group by running a command of the following form:
|
||||
|
||||
@ -235,21 +242,22 @@ Resize a subvolume group by running a command of the following form:
|
||||
|
||||
ceph fs subvolumegroup resize <vol_name> <group_name> <new_size> [--no_shrink]
|
||||
|
||||
The command resizes the subvolume group quota, using the size specified by
|
||||
This command resizes the subvolume group quota, using the size specified by
|
||||
``new_size``. The ``--no_shrink`` flag prevents the subvolume group from
|
||||
shrinking below the current used size.
|
||||
|
||||
The subvolume group may be resized to an infinite size by passing ``inf`` or
|
||||
``infinite`` as the ``new_size``.
|
||||
|
||||
Remove a snapshot of a subvolume group by running a command of the following form:
|
||||
Remove a snapshot of a subvolume group by running a command of the following
|
||||
form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolumegroup snapshot rm <vol_name> <group_name> <snap_name> [--force]
|
||||
|
||||
Supplying the ``--force`` flag allows the command to succeed when it would otherwise
|
||||
fail due to the nonexistence of the snapshot.
|
||||
Supplying the ``--force`` flag allows the command to succeed when it would
|
||||
otherwise fail due to the nonexistence of the snapshot.
|
||||
|
||||
List snapshots of a subvolume group by running a command of the following form:
|
||||
|
||||
@ -261,7 +269,10 @@ List snapshots of a subvolume group by running a command of the following form:
|
||||
FS Subvolumes
|
||||
-------------
|
||||
|
||||
Create a subvolume using:
|
||||
Creating a subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to create a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -270,131 +281,184 @@ Create a subvolume using:
|
||||
|
||||
The command succeeds even if the subvolume already exists.
|
||||
|
||||
When creating a subvolume you can specify its subvolume group, data pool layout,
|
||||
uid, gid, file mode in octal numerals, and size in bytes. The size of the subvolume is
|
||||
specified by setting a quota on it (see :doc:`/cephfs/quota`). The subvolume can be
|
||||
created in a separate RADOS namespace by specifying --namespace-isolated option. By
|
||||
default a subvolume is created within the default subvolume group, and with an octal file
|
||||
mode '755', uid of its subvolume group, gid of its subvolume group, data pool layout of
|
||||
its parent directory and no size limit.
|
||||
When creating a subvolume, you can specify its subvolume group, data pool
|
||||
layout, uid, gid, file mode in octal numerals, and size in bytes. The size of
|
||||
the subvolume is specified by setting a quota on it (see :doc:`/cephfs/quota`).
|
||||
The subvolume can be created in a separate RADOS namespace by specifying the
|
||||
``--namespace-isolated`` option. By default, a subvolume is created within the
|
||||
default subvolume group with an octal file mode of ``755``, a uid of its
|
||||
subvolume group, a gid of its subvolume group, a data pool layout of its parent
|
||||
directory, and no size limit.
|
||||
|
||||
Remove a subvolume using:
|
||||
Removing a subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to remove a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume rm <vol_name> <subvol_name> [--group_name <subvol_group_name>] [--force] [--retain-snapshots]
|
||||
|
||||
The command removes the subvolume and its contents. It does this in two steps.
|
||||
First, it moves the subvolume to a trash folder, and then asynchronously purges
|
||||
its contents.
|
||||
This command removes the subvolume and its contents. This is done in two steps.
|
||||
First, the subvolume is moved to a trash folder. Second, the contents of that
|
||||
trash folder are purged asynchronously.
|
||||
|
||||
The removal of a subvolume fails if it has snapshots, or is non-existent.
|
||||
'--force' flag allows the non-existent subvolume remove command to succeed.
|
||||
Subvolume removal fails if the subvolume has snapshots or is non-existent. The
|
||||
``--force`` flag allows the "non-existent subvolume remove" command to succeed.
|
||||
|
||||
A subvolume can be removed retaining existing snapshots of the subvolume using the
|
||||
'--retain-snapshots' option. If snapshots are retained, the subvolume is considered
|
||||
empty for all operations not involving the retained snapshots.
|
||||
To remove a subvolume while retaining snapshots of the subvolume, use the
|
||||
``--retain-snapshots`` flag. If snapshots associated with a given subvolume are
|
||||
retained, then the subvolume is considered empty for all operations that do not
|
||||
involve the retained snapshots.
|
||||
|
||||
.. note:: Snapshot retained subvolumes can be recreated using 'ceph fs subvolume create'
|
||||
.. note:: Snapshot-retained subvolumes can be recreated using ``ceph fs
|
||||
subvolume create``.
|
||||
|
||||
.. note:: Retained snapshots can be used as a clone source to recreate the subvolume, or clone to a newer subvolume.
|
||||
.. note:: Retained snapshots can be used as clone sources for recreating the
|
||||
subvolume or for cloning to a newer subvolume.
|
||||
|
||||
Resize a subvolume using:
|
||||
Resizing a subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to resize a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume resize <vol_name> <subvol_name> <new_size> [--group_name <subvol_group_name>] [--no_shrink]
|
||||
|
||||
The command resizes the subvolume quota using the size specified by ``new_size``.
|
||||
The ``--no_shrink`` flag prevents the subvolume from shrinking below the current used size of the subvolume.
|
||||
This command resizes the subvolume quota, using the size specified by
|
||||
``new_size``. The ``--no_shrink`` flag prevents the subvolume from shrinking
|
||||
below the current "used size" of the subvolume.
|
||||
|
||||
The subvolume can be resized to an unlimited (but sparse) logical size by passing ``inf`` or ``infinite`` as ``new_size``.
|
||||
The subvolume can be resized to an unlimited (but sparse) logical size by
|
||||
passing ``inf`` or ``infinite`` as ``<new_size>``.
|
||||
|
||||
Authorize cephx auth IDs, the read/read-write access to fs subvolumes:
|
||||
Authorizing CephX auth IDs
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to authorize CephX auth IDs. This provides
|
||||
the read/read-write access to file system subvolumes:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume authorize <vol_name> <sub_name> <auth_id> [--group_name=<group_name>] [--access_level=<access_level>]
|
||||
|
||||
The ``access_level`` takes ``r`` or ``rw`` as value.
|
||||
The ``<access_level>`` option takes either ``r`` or ``rw`` as a value.
|
||||
|
||||
Deauthorize cephx auth IDs, the read/read-write access to fs subvolumes:
|
||||
De-authorizing CephX auth IDs
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to deauthorize CephX auth IDs. This removes
|
||||
the read/read-write access to file system subvolumes:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume deauthorize <vol_name> <sub_name> <auth_id> [--group_name=<group_name>]
|
||||
|
||||
List cephx auth IDs authorized to access fs subvolume:
|
||||
Listing CephX auth IDs
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to list CephX auth IDs authorized to access
|
||||
the file system subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume authorized_list <vol_name> <sub_name> [--group_name=<group_name>]
|
||||
|
||||
Evict fs clients based on auth ID and subvolume mounted:
|
||||
Evicting File System Clients (Auth ID)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to evict file system clients based on the
|
||||
auth ID and the subvolume mounted:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume evict <vol_name> <sub_name> <auth_id> [--group_name=<group_name>]
|
||||
|
||||
Fetch the absolute path of a subvolume using:
|
||||
Fetching the Absolute Path of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to fetch the absolute path of a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume getpath <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Fetch the information of a subvolume using:
|
||||
Fetching a Subvolume's Information
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to fetch a subvolume's information:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume info <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
The output format is JSON and contains fields as follows.
|
||||
The output format is JSON and contains the following fields.
|
||||
|
||||
* ``atime``: access time of the subvolume path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``mtime``: modification time of the subvolume path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``ctime``: change time of the subvolume path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``atime``: access time of the subvolume path in the format ``YYYY-MM-DD
|
||||
HH:MM:SS``
|
||||
* ``mtime``: modification time of the subvolume path in the format ``YYYY-MM-DD
|
||||
HH:MM:SS``
|
||||
* ``ctime``: change time of the subvolume path in the format ``YYYY-MM-DD
|
||||
HH:MM:SS``
|
||||
* ``uid``: uid of the subvolume path
|
||||
* ``gid``: gid of the subvolume path
|
||||
* ``mode``: mode of the subvolume path
|
||||
* ``mon_addrs``: list of monitor addresses
|
||||
* ``bytes_pcent``: quota used in percentage if quota is set, else displays ``undefined``
|
||||
* ``bytes_quota``: quota size in bytes if quota is set, else displays ``infinite``
|
||||
* ``bytes_pcent``: quota used in percentage if quota is set; else displays
|
||||
``undefined``
|
||||
* ``bytes_quota``: quota size in bytes if quota is set; else displays
|
||||
``infinite``
|
||||
* ``bytes_used``: current used size of the subvolume in bytes
|
||||
* ``created_at``: creation time of the subvolume in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``created_at``: creation time of the subvolume in the format ``YYYY-MM-DD
|
||||
HH:MM:SS``
|
||||
* ``data_pool``: data pool to which the subvolume belongs
|
||||
* ``path``: absolute path of a subvolume
|
||||
* ``type``: subvolume type indicating whether it's clone or subvolume
|
||||
* ``type``: subvolume type, indicating whether it is ``clone`` or ``subvolume``
|
||||
* ``pool_namespace``: RADOS namespace of the subvolume
|
||||
* ``features``: features supported by the subvolume
|
||||
* ``state``: current state of the subvolume
|
||||
|
||||
If a subvolume has been removed retaining its snapshots, the output contains only fields as follows.
|
||||
If a subvolume has been removed but its snapshots have been retained, the
|
||||
output contains only the following fields.
|
||||
|
||||
* ``type``: subvolume type indicating whether it's clone or subvolume
|
||||
* ``type``: subvolume type indicating whether it is ``clone`` or ``subvolume``
|
||||
* ``features``: features supported by the subvolume
|
||||
* ``state``: current state of the subvolume
|
||||
|
||||
A subvolume's ``features`` are based on the internal version of the subvolume and are
|
||||
a subset of the following:
|
||||
A subvolume's ``features`` are based on the internal version of the subvolume
|
||||
and are a subset of the following:
|
||||
|
||||
* ``snapshot-clone``: supports cloning using a subvolumes snapshot as the source
|
||||
* ``snapshot-autoprotect``: supports automatically protecting snapshots, that are active clone sources, from deletion
|
||||
* ``snapshot-retention``: supports removing subvolume contents, retaining any existing snapshots
|
||||
* ``snapshot-clone``: supports cloning using a subvolume's snapshot as the
|
||||
source
|
||||
* ``snapshot-autoprotect``: supports automatically protecting snapshots from
|
||||
deletion if they are active clone sources
|
||||
* ``snapshot-retention``: supports removing subvolume contents, retaining any
|
||||
existing snapshots
|
||||
|
||||
A subvolume's ``state`` is based on the current state of the subvolume and contains one of the following values.
|
||||
A subvolume's ``state`` is based on the current state of the subvolume and
|
||||
contains one of the following values.
|
||||
|
||||
* ``complete``: subvolume is ready for all operations
|
||||
* ``snapshot-retained``: subvolume is removed but its snapshots are retained
|
||||
|
||||
List subvolumes using:
|
||||
Listing Subvolumes
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to list subvolumes:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume ls <vol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
.. note:: subvolumes that are removed but have snapshots retained, are also listed.
|
||||
.. note:: Subvolumes that have been removed but have snapshots retained, are
|
||||
also listed.
|
||||
|
||||
Check the presence of any subvolume using:
|
||||
Checking for the Presence of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to check for the presence of a given
|
||||
subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -402,10 +466,14 @@ Check the presence of any subvolume using:
|
||||
|
||||
These are the possible results of the ``exist`` command:
|
||||
|
||||
* ``subvolume exists``: if any subvolume of given group_name is present
|
||||
* ``no subvolume exists``: if no subvolume of given group_name is present
|
||||
* ``subvolume exists``: if any subvolume of given ``group_name`` is present
|
||||
* ``no subvolume exists``: if no subvolume of given ``group_name`` is present
|
||||
|
||||
Set custom metadata on the subvolume as a key-value pair using:
|
||||
Setting Custom Metadata On a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to set custom metadata on the subvolume as
|
||||
a key-value pair:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -413,67 +481,100 @@ Set custom metadata on the subvolume as a key-value pair using:
|
||||
|
||||
.. note:: If the key_name already exists then the old value will get replaced by the new value.
|
||||
|
||||
.. note:: key_name and value should be a string of ASCII characters (as specified in python's string.printable). key_name is case-insensitive and always stored in lower case.
|
||||
.. note:: ``key_name`` and ``value`` should be a string of ASCII characters (as
|
||||
specified in Python's ``string.printable``). ``key_name`` is
|
||||
case-insensitive and always stored in lower case.
|
||||
|
||||
.. note:: Custom metadata on a subvolume is not preserved when snapshotting the subvolume, and hence, is also not preserved when cloning the subvolume snapshot.
|
||||
.. note:: Custom metadata on a subvolume is not preserved when snapshotting the
|
||||
subvolume, and is therefore also not preserved when cloning the subvolume
|
||||
snapshot.
|
||||
|
||||
Get custom metadata set on the subvolume using the metadata key:
|
||||
Getting The Custom Metadata Set of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to get the custom metadata set on the
|
||||
subvolume using the metadata key:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume metadata get <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>]
|
||||
|
||||
List custom metadata (key-value pairs) set on the subvolume using:
|
||||
Listing The Custom Metadata Set of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to list custom metadata (key-value pairs)
|
||||
set on the subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume metadata ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Remove custom metadata set on the subvolume using the metadata key:
|
||||
Removing a Custom Metadata Set from a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to remove custom metadata set on the
|
||||
subvolume using the metadata key:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume metadata rm <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>] [--force]
|
||||
|
||||
Using the ``--force`` flag allows the command to succeed that would otherwise
|
||||
fail if the metadata key did not exist.
|
||||
Using the ``--force`` flag allows the command to succeed when it would
|
||||
otherwise fail (if the metadata key did not exist).
|
||||
|
||||
Create a snapshot of a subvolume using:
|
||||
Creating a Snapshot of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to create a snapshot of a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot create <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Remove a snapshot of a subvolume using:
|
||||
|
||||
Removing a Snapshot of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to remove a snapshot of a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot rm <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>] [--force]
|
||||
|
||||
Using the ``--force`` flag allows the command to succeed that would otherwise
|
||||
fail if the snapshot did not exist.
|
||||
Using the ``--force`` flag allows the command to succeed when it would
|
||||
otherwise fail (if the snapshot did not exist).
|
||||
|
||||
.. note:: if the last snapshot within a snapshot retained subvolume is removed, the subvolume is also removed
|
||||
|
||||
List snapshots of a subvolume using:
|
||||
Listing the Snapshots of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following from to list the snapshots of a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Fetch the information of a snapshot using:
|
||||
Fetching a Snapshot's Information
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to fetch a snapshot's information:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot info <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
|
||||
The output format is JSON and contains fields as follows.
|
||||
The output format is JSON and contains the following fields.
|
||||
|
||||
* ``created_at``: creation time of the snapshot in the format "YYYY-MM-DD HH:MM:SS:ffffff"
|
||||
* ``created_at``: creation time of the snapshot in the format ``YYYY-MM-DD
|
||||
HH:MM:SS:ffffff``
|
||||
* ``data_pool``: data pool to which the snapshot belongs
|
||||
* ``has_pending_clones``: ``yes`` if snapshot clone is in progress, otherwise ``no``
|
||||
* ``pending_clones``: list of in-progress or pending clones and their target group if any exist, otherwise this field is not shown
|
||||
* ``orphan_clones_count``: count of orphan clones if the snapshot has orphan clones, otherwise this field is not shown
|
||||
* ``has_pending_clones``: ``yes`` if snapshot clone is in progress, otherwise
|
||||
``no``
|
||||
* ``pending_clones``: list of in-progress or pending clones and their target
|
||||
groups if any exist; otherwise this field is not shown
|
||||
* ``orphan_clones_count``: count of orphan clones if the snapshot has orphan
|
||||
clones, otherwise this field is not shown
|
||||
|
||||
Sample output when snapshot clones are in progress or pending:
|
||||
|
||||
@ -516,50 +617,74 @@ Sample output when no snapshot clone is in progress or pending:
|
||||
"has_pending_clones": "no"
|
||||
}
|
||||
|
||||
Set custom key-value metadata on the snapshot by running:
|
||||
Setting Custom Key-Value Pair Metadata on a Snapshot
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to set custom key-value metadata on the
|
||||
snapshot:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot metadata set <vol_name> <subvol_name> <snap_name> <key_name> <value> [--group_name <subvol_group_name>]
|
||||
|
||||
.. note:: If the key_name already exists then the old value will get replaced by the new value.
|
||||
.. note:: If the ``key_name`` already exists then the old value will get replaced
|
||||
by the new value.
|
||||
|
||||
.. note:: The key_name and value should be a strings of ASCII characters (as specified in Python's ``string.printable``). The key_name is case-insensitive and always stored in lowercase.
|
||||
.. note:: The ``key_name`` and value should be a strings of ASCII characters
|
||||
(as specified in Python's ``string.printable``). The ``key_name`` is
|
||||
case-insensitive and always stored in lowercase.
|
||||
|
||||
.. note:: Custom metadata on a snapshot is not preserved when snapshotting the subvolume, and hence is also not preserved when cloning the subvolume snapshot.
|
||||
.. note:: Custom metadata on a snapshot is not preserved when snapshotting the
|
||||
subvolume, and is therefore not preserved when cloning the subvolume
|
||||
snapshot.
|
||||
|
||||
Get custom metadata set on the snapshot using the metadata key:
|
||||
Getting Custom Metadata That Has Been Set on a Snapshot
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to get custom metadata that has been set on
|
||||
the snapshot using the metadata key:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot metadata get <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>]
|
||||
|
||||
List custom metadata (key-value pairs) set on the snapshot using:
|
||||
Listing Custom Metadata that has been Set on a Snapshot
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following from to list custom metadata (key-value pairs)
|
||||
set on the snapshot:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot metadata ls <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Remove custom metadata set on the snapshot using the metadata key:
|
||||
Removing Custom Metadata from a Snapshot
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to remove custom metadata set on the
|
||||
snapshot using the metadata key:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot metadata rm <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>] [--force]
|
||||
|
||||
Using the ``--force`` flag allows the command to succeed that would otherwise
|
||||
fail if the metadata key did not exist.
|
||||
Using the ``--force`` flag allows the command to succeed when it would otherwise
|
||||
fail (if the metadata key did not exist).
|
||||
|
||||
Cloning Snapshots
|
||||
-----------------
|
||||
|
||||
Subvolumes can be created by cloning subvolume snapshots. Cloning is an asynchronous operation that copies
|
||||
data from a snapshot to a subvolume. Due to this bulk copying, cloning is inefficient for very large
|
||||
data sets.
|
||||
Subvolumes can be created by cloning subvolume snapshots. Cloning is an
|
||||
asynchronous operation that copies data from a snapshot to a subvolume. Because
|
||||
cloning is an operation that involves bulk copying, it is slow for
|
||||
very large data sets.
|
||||
|
||||
.. note:: Removing a snapshot (source subvolume) would fail if there are pending or in progress clone operations.
|
||||
.. note:: Removing a snapshot (source subvolume) fails when there are
|
||||
pending or in-progress clone operations.
|
||||
|
||||
Protecting snapshots prior to cloning was a prerequisite in the Nautilus release, and the commands to protect/unprotect
|
||||
snapshots were introduced for this purpose. This prerequisite, and hence the commands to protect/unprotect, is being
|
||||
Protecting snapshots prior to cloning was a prerequisite in the Nautilus
|
||||
release. Commands that made possible the protection and unprotection of
|
||||
snapshots were introduced for this purpose. This prerequisite is being
|
||||
deprecated and may be removed from a future release.
|
||||
|
||||
The commands being deprecated are:
|
||||
@ -573,7 +698,7 @@ The commands being deprecated are:
|
||||
|
||||
.. note:: Use the ``subvolume info`` command to fetch subvolume metadata regarding supported ``features`` to help decide if protect/unprotect of snapshots is required, based on the availability of the ``snapshot-autoprotect`` feature.
|
||||
|
||||
To initiate a clone operation use:
|
||||
Run a command of the following form to initiate a clone operation:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -581,25 +706,30 @@ To initiate a clone operation use:
|
||||
|
||||
.. note:: ``subvolume snapshot clone`` command depends upon the above mentioned config option ``snapshot_clone_no_wait``
|
||||
|
||||
If a snapshot (source subvolume) is a part of non-default group, the group name needs to be specified:
|
||||
Run a command of the following form when a snapshot (source subvolume) is a
|
||||
part of non-default group. Note that the group name needs to be specified:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --group_name <subvol_group_name>
|
||||
|
||||
Cloned subvolumes can be a part of a different group than the source snapshot (by default, cloned subvolumes are created in default group). To clone to a particular group use:
|
||||
Cloned subvolumes can be a part of a different group than the source snapshot
|
||||
(by default, cloned subvolumes are created in default group). Run a command of
|
||||
the following form to clone to a particular group use:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --target_group_name <subvol_group_name>
|
||||
|
||||
Similar to specifying a pool layout when creating a subvolume, pool layout can be specified when creating a cloned subvolume. To create a cloned subvolume with a specific pool layout use:
|
||||
Pool layout can be specified when creating a cloned subvolume in a way that is
|
||||
similar to specifying a pool layout when creating a subvolume. Run a command of
|
||||
the following form to create a cloned subvolume with a specific pool layout:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --pool_layout <pool_layout>
|
||||
|
||||
To check the status of a clone operation use:
|
||||
Run a command of the following form to check the status of a clone operation:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -665,11 +795,14 @@ Here is an example of a ``failed`` clone:
|
||||
}
|
||||
}
|
||||
|
||||
(NOTE: since ``subvol1`` is in the default group, the ``source`` object's ``clone status`` does not include the group name)
|
||||
.. note:: Because ``subvol1`` is in the default group, the ``source`` object's
|
||||
``clone status`` does not include the group name)
|
||||
|
||||
.. note:: Cloned subvolumes are accessible only after the clone operation has successfully completed.
|
||||
.. note:: Cloned subvolumes are accessible only after the clone operation has
|
||||
successfully completed.
|
||||
|
||||
After a successful clone operation, ``clone status`` will look like the below:
|
||||
After a successful clone operation, ``clone status`` will look like the
|
||||
following:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -685,23 +818,28 @@ After a successful clone operation, ``clone status`` will look like the below:
|
||||
|
||||
If a clone operation is unsuccessful, the ``state`` value will be ``failed``.
|
||||
|
||||
To retry a failed clone operation, the incomplete clone must be deleted and the clone operation must be issued again.
|
||||
To delete a partial clone use:
|
||||
To retry a failed clone operation, the incomplete clone must be deleted and the
|
||||
clone operation must be issued again.
|
||||
|
||||
Run a command of the following form to delete a partial clone:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume rm <vol_name> <clone_name> [--group_name <group_name>] --force
|
||||
|
||||
.. note:: Cloning synchronizes only directories, regular files and symbolic links. Inode timestamps (access and
|
||||
modification times) are synchronized up to seconds granularity.
|
||||
.. note:: Cloning synchronizes only directories, regular files and symbolic
|
||||
links. inode timestamps (access and modification times) are synchronized up
|
||||
to a second's granularity.
|
||||
|
||||
An ``in-progress`` or a ``pending`` clone operation may be canceled. To cancel a clone operation use the ``clone cancel`` command:
|
||||
An ``in-progress`` or a ``pending`` clone operation may be canceled. To cancel
|
||||
a clone operation use the ``clone cancel`` command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs clone cancel <vol_name> <clone_name> [--group_name <group_name>]
|
||||
|
||||
On successful cancellation, the cloned subvolume is moved to the ``canceled`` state:
|
||||
On successful cancellation, the cloned subvolume is moved to the ``canceled``
|
||||
state:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -722,7 +860,8 @@ On successful cancellation, the cloned subvolume is moved to the ``canceled`` st
|
||||
}
|
||||
}
|
||||
|
||||
.. note:: The canceled cloned may be deleted by supplying the ``--force`` option to the `fs subvolume rm` command.
|
||||
.. note:: Delete the canceled cloned by supplying the ``--force`` option to the
|
||||
``fs subvolume rm`` command.
|
||||
|
||||
Configurables
|
||||
~~~~~~~~~~~~~
|
||||
@ -733,17 +872,20 @@ Configure the maximum number of concurrent clone operations. The default is 4:
|
||||
|
||||
ceph config set mgr mgr/volumes/max_concurrent_clones <value>
|
||||
|
||||
Configure the snapshot_clone_no_wait option :
|
||||
Configure the ``snapshot_clone_no_wait`` option:
|
||||
|
||||
The ``snapshot_clone_no_wait`` config option is used to reject clone creation requests when cloner threads
|
||||
(which can be configured using above option i.e. ``max_concurrent_clones``) are not available.
|
||||
It is enabled by default i.e. the value set is True, whereas it can be configured by using below command.
|
||||
The ``snapshot_clone_no_wait`` config option is used to reject clone-creation
|
||||
requests when cloner threads (which can be configured using the above options,
|
||||
for example, ``max_concurrent_clones``) are not available. It is enabled by
|
||||
default. This means that the value is set to ``True``, but it can be configured
|
||||
by using the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/volumes/snapshot_clone_no_wait <bool>
|
||||
|
||||
The current value of ``snapshot_clone_no_wait`` can be fetched by using below command.
|
||||
The current value of ``snapshot_clone_no_wait`` can be fetched by running the
|
||||
following command.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -760,62 +902,73 @@ to policies. This can distribute load across MDS ranks in predictable and
|
||||
stable ways. Review :ref:`cephfs-pinning` and :ref:`cephfs-ephemeral-pinning`
|
||||
for details on how pinning works.
|
||||
|
||||
Pinning is configured by:
|
||||
Run a command of the following form to configure pinning for subvolume groups:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolumegroup pin <vol_name> <group_name> <pin_type> <pin_setting>
|
||||
|
||||
or for subvolumes:
|
||||
Run a command of the following form to configure pinning for subvolumes:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume pin <vol_name> <group_name> <pin_type> <pin_setting>
|
||||
|
||||
Typically you will want to set subvolume group pins. The ``pin_type`` may be
|
||||
one of ``export``, ``distributed``, or ``random``. The ``pin_setting``
|
||||
corresponds to the extended attributed "value" as in the pinning documentation
|
||||
referenced above.
|
||||
Under most circumstances, you will want to set subvolume group pins. The
|
||||
``pin_type`` may be ``export``, ``distributed``, or ``random``. The
|
||||
``pin_setting`` corresponds to the extended attributed "value" as in the
|
||||
pinning documentation referenced above.
|
||||
|
||||
So, for example, setting a distributed pinning strategy on a subvolume group:
|
||||
Here is an example of setting a distributed pinning strategy on a subvolume
|
||||
group:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolumegroup pin cephfilesystem-a csi distributed 1
|
||||
|
||||
Will enable distributed subtree partitioning policy for the "csi" subvolume
|
||||
group. This will cause every subvolume within the group to be automatically
|
||||
This enables distributed subtree partitioning policy for the "csi" subvolume
|
||||
group. This will cause every subvolume within the group to be automatically
|
||||
pinned to one of the available ranks on the file system.
|
||||
|
||||
Subvolume quiesce
|
||||
-----------------
|
||||
|
||||
It may be needed to pause IO to a set of subvolumes of a given volume (file system).
|
||||
A good example of such case is a consistent snapshot spanning multiple subvolumes.
|
||||
The task arises often in an orchestrated environment such as Kubernetes, where a single deployed application
|
||||
can work with many mounted subvolumes across several hosts. When a snapshot of such a system is needed,
|
||||
the application may not find the result consistent unless the snapshots were taken
|
||||
during an active write pause.
|
||||
.. note:: The information in this section applies only to Squid and later
|
||||
releases of Ceph.
|
||||
|
||||
The `volumes` plugin provides a tool to initiate and await such a pause across a set of subvolumes:
|
||||
CephFS snapshots do not provide strong-consistency guarantees in cases involving writes
|
||||
performed by multiple clients, which makes consistent backups and disaster recovery a serious
|
||||
challenge for distributed applications. Even in a case where an application uses
|
||||
file system flushes to synchronize checkpoints across its distributed components, there is
|
||||
no guarantee that all acknowledged writes will be part of a given snapshot.
|
||||
|
||||
The subvolume quiesce feature has been developed to provide enterprise-level consistency guarantees
|
||||
for multi-client applications that work with one or more subvolumes. The feature makes it possible to pause IO
|
||||
to a set of subvolumes of a given volume (file system). Enforcing such a pause across all clients makes
|
||||
it possible to guarantee that any persistent checkpoints reached by the application before the pause
|
||||
will be recoverable from the snapshots made during the pause.
|
||||
|
||||
The `volumes` plugin provides a CLI to initiate and await the pause for a set of subvolumes.
|
||||
This pause is called a `quiesce`, which is also used as the command name:
|
||||
|
||||
.. prompt:: bash $ auto
|
||||
|
||||
$ ceph fs quiesce <vol_name> --set-id myset1 <[group_name/]sub_name...> --await
|
||||
# perform actions while the IO pause is active, like taking snapshots
|
||||
$ ceph fs quiesce <vol_name> --set-id myset1 --release --await
|
||||
# if successful, all members of the set were confirmed as still in pause and released from such
|
||||
# if successful, all members of the set were confirmed as still paused and released
|
||||
|
||||
The ``quiesce`` functionality is itself based on a lower level QuiesceDb service provided by the MDS
|
||||
The ``fs quiesce`` functionality is based on a lower level ``quiesce db`` service provided by the MDS
|
||||
daemons, which operates at a file system path granularity.
|
||||
The `volumes` plugin merely maps the subvolume names to their corresponding paths on the given file system
|
||||
and then issues the appropriate quiesce command to the MDS. You can learn more about the feature in the developer guides.
|
||||
and then issues the corresponding ``quiesce db`` command to the MDS. You can learn more about the low-level service
|
||||
in the developer guides.
|
||||
|
||||
Operations
|
||||
~~~~~~~~~~
|
||||
|
||||
The IO pause (referred to as `quiesce`) is requested for a group of one or more subvolumes (i.e. paths in a filesystem).
|
||||
The group is referred to as "quiesce set", and every quiesce set must have a unique string id to interact with.
|
||||
The quiesce can be requested for a set of one or more subvolumes (i.e. paths in a filesystem).
|
||||
This set is referred to as `quiesce set`. Every quiesce set is identified by a unique `set id`.
|
||||
A quiesce set can be manipulated in the following ways:
|
||||
|
||||
* **include** one or more subvolumes - quiesce set members
|
||||
@ -1010,15 +1163,41 @@ Note that the commands above are all non-blocking. If we want to wait for the qu
|
||||
to reach the `QUIESCED` state, we should await it at some point. ``--await`` can be given
|
||||
along with other arguments to let the system know our intention.
|
||||
|
||||
Technically, there are two types of await: `quiesce await` and `release await`. The former
|
||||
is the default, and the latter can only be achieved with ``--release`` present in the argument list.
|
||||
To avoid confision, it is not permitted to issue a `quiesce await` when the set is already `RELEASING`
|
||||
or `RELEASED`. Trying to ``--release`` a set that is not `QUIESCED` is an ``EPERM`` error as well, regardless
|
||||
There are two types of await: `quiesce await` and `release await`. The former is the default,
|
||||
and the latter can only be achieved with ``--release`` present in the argument list.
|
||||
To avoid confision, it is not permitted to issue a `quiesce await` when the set is not `QUIESCING`.
|
||||
Trying to ``--release`` a set that is not `QUIESCED` is an ``EPERM`` error as well, regardless
|
||||
of whether await is requested alongside. However, it's not an error to `release await`
|
||||
an already released set, or to `quiesce await` a `QUIESCED` one.
|
||||
an already released set, or to `quiesce await` a `QUIESCED` one - those are successful no-ops.
|
||||
|
||||
When awaiting, one may also specify a maximum duration that they would like this await request to block for,
|
||||
not affecting the two intrinsic timeouts discussed above. If the target awaited state isn't reached
|
||||
Since a set is awaited after the application of the ``--await``-augmented command, the await operation
|
||||
may mask a successful result with its own error. A good example is trying to cancel-await a set:
|
||||
|
||||
.. prompt:: bash $ auto
|
||||
|
||||
$ ceph fs quiesce fs1 --set-id set1 --cancel --await
|
||||
{
|
||||
// ...
|
||||
"sets": {
|
||||
"set1": {
|
||||
// ...
|
||||
"state": {
|
||||
"name": "CANCELED",
|
||||
"age": 0
|
||||
},
|
||||
// ...
|
||||
}
|
||||
}
|
||||
}
|
||||
Error EPERM:
|
||||
|
||||
Although ``--cancel`` will succeed syncrhonously for a set in an active state, awaiting a canceled
|
||||
set is not permitted, hence this call will result in an ``EPERM``. This is deliberately different from
|
||||
returning a ``EINVAL`` error, denoting an error on the user's side, to simplify the system's behavior
|
||||
when ``--await`` is requested. As a result, it's also a simpler model for the user to work with.
|
||||
|
||||
When awaiting, one may specify a maximum duration that they would like this await request to block for,
|
||||
orthogonally to the two intrinsic set timeouts discussed above. If the target awaited state isn't reached
|
||||
within the specified duration, then ``EINPROGRESS`` is returned. For that, one should use the argument
|
||||
``--await-for=<seconds>``. One could think of ``--await`` as equivalent to ``--await-for=Infinity``.
|
||||
While it doesn't make sense to specify both arguments, it is not considered an error. If
|
||||
|
@ -252,3 +252,20 @@ other daemons, please see :ref:`health-checks`.
|
||||
dirty data for cap revokes). If ``defer_client_eviction_on_laggy_osds`` is
|
||||
set to true (default true), client eviction will not take place and thus
|
||||
this health warning will be generated.
|
||||
|
||||
``MDS_CLIENTS_BROKEN_ROOTSQUASH``
|
||||
---------------------------------
|
||||
Message
|
||||
"X client(s) with broken root_squash implementation (MDS_CLIENTS_BROKEN_ROOTSQUASH)"
|
||||
|
||||
Description
|
||||
A bug was discovered in root_squash which would potentially lose changes made by a
|
||||
client restricted with root_squash caps. The fix required a change to the protocol
|
||||
and a client upgrade is required.
|
||||
|
||||
This is a HEALTH_ERR warning because of the danger of inconsistency and lost
|
||||
data. It is recommended to either upgrade your clients, discontinue using
|
||||
root_squash in the interim, or silence the warning if desired.
|
||||
|
||||
To evict and permanently block broken clients from connecting to the
|
||||
cluster, set the ``required_client_feature`` bit ``client_mds_auth_caps``.
|
||||
|
@ -10,14 +10,13 @@ a state-of-the-art, multi-use, highly available, and performant file store for
|
||||
a variety of applications, including traditional use-cases like shared home
|
||||
directories, HPC scratch space, and distributed workflow shared storage.
|
||||
|
||||
CephFS achieves these goals through the use of some novel architectural
|
||||
choices. Notably, file metadata is stored in a separate RADOS pool from file
|
||||
data and served via a resizable cluster of *Metadata Servers*, or **MDS**,
|
||||
which may scale to support higher throughput metadata workloads. Clients of
|
||||
the file system have direct access to RADOS for reading and writing file data
|
||||
blocks. For this reason, workloads may linearly scale with the size of the
|
||||
underlying RADOS object store; that is, there is no gateway or broker mediating
|
||||
data I/O for clients.
|
||||
CephFS achieves these goals through novel architectural choices. Notably, file
|
||||
metadata is stored in a RADOS pool separate from file data and is served via a
|
||||
resizable cluster of *Metadata Servers*, or **MDS**\es, which scale to support
|
||||
higher-throughput workloads. Clients of the file system have direct access to
|
||||
RADOS for reading and writing file data blocks. This makes it possible for
|
||||
workloads to scale linearly with the size of the underlying RADOS object store.
|
||||
There is no gateway or broker that mediates data I/O for clients.
|
||||
|
||||
Access to data is coordinated through the cluster of MDS which serve as
|
||||
authorities for the state of the distributed metadata cache cooperatively
|
||||
@ -193,6 +192,7 @@ Developer Guides
|
||||
Client's Capabilities <capabilities>
|
||||
Java and Python bindings <api/index>
|
||||
Mantle <mantle>
|
||||
Metrics <metrics>
|
||||
|
||||
|
||||
.. raw:: html
|
||||
|
@ -6,20 +6,25 @@ Mantle
|
||||
Mantle is for research and development of metadata balancer algorithms,
|
||||
not for use on production CephFS clusters.
|
||||
|
||||
Multiple, active MDSs can migrate directories to balance metadata load. The
|
||||
policies for when, where, and how much to migrate are hard-coded into the
|
||||
metadata balancing module. Mantle is a programmable metadata balancer built
|
||||
into the MDS. The idea is to protect the mechanisms for balancing load
|
||||
(migration, replication, fragmentation) but stub out the balancing policies
|
||||
using Lua. Mantle is based on [1] but the current implementation does *NOT*
|
||||
have the following features from that paper:
|
||||
Mantle is a programmable metadata balancer that is built into the MDS.
|
||||
|
||||
By default (without Mantle), multiple, active MDSs can migrate directories to
|
||||
balance metadata load. The policies for when, where, and how much to migrate
|
||||
are hard-coded into the metadata balancing module.
|
||||
|
||||
Mantle works by protecting the mechanisms for balancing load (migration,
|
||||
replication, fragmentation) while suppressing the balancing policies using Lua.
|
||||
Mantle is based on [1] but the current implementation does *NOT* have the
|
||||
following features from that paper:
|
||||
|
||||
1. Balancing API: in the paper, the user fills in when, where, how much, and
|
||||
load calculation policies; currently, Mantle only requires that Lua policies
|
||||
return a table of target loads (e.g., how much load to send to each MDS)
|
||||
2. "How much" hook: in the paper, there was a hook that let the user control
|
||||
the fragment selector policy; currently, Mantle does not have this hook
|
||||
3. Instantaneous CPU utilization as a metric
|
||||
load calculation policies. Currently, Mantle requires only that Lua policies
|
||||
return a table of target loads (for example, how much load to send to each
|
||||
MDS)
|
||||
2. The "how much" hook: in the paper, there was a hook that allowed the user to
|
||||
control the "fragment selector policy". Currently, Mantle does not have this
|
||||
hook.
|
||||
3. "Instantaneous CPU utilization" as a metric.
|
||||
|
||||
[1] Supercomputing '15 Paper:
|
||||
http://sc15.supercomputing.org/schedule/event_detail-evid=pap168.html
|
||||
@ -30,10 +35,11 @@ Quickstart with vstart
|
||||
.. warning::
|
||||
|
||||
Developing balancers with vstart is difficult because running all daemons
|
||||
and clients on one node can overload the system. Let it run for a while, even
|
||||
though you will likely see a bunch of lost heartbeat and laggy MDS warnings.
|
||||
Most of the time this guide will work but sometimes all MDSs lock up and you
|
||||
cannot actually see them spill. It is much better to run this on a cluster.
|
||||
and clients on one node can overload the system. Let the system run for a
|
||||
while, even though there will likely be many lost heartbeat warnings and
|
||||
many laggy MDS warnings. In most cases this guide will work, but sometimes
|
||||
when developing with vstart all MDSs will lock up and you cannot actually
|
||||
see them spill. It is better to run this on a multi-node cluster.
|
||||
|
||||
As a prerequisite, we assume you have installed `mdtest
|
||||
<https://sourceforge.net/projects/mdtest/>`_ or pulled the `Docker image
|
||||
|
@ -57,7 +57,7 @@
|
||||
.. confval:: mds_kill_link_at
|
||||
.. confval:: mds_kill_rename_at
|
||||
.. confval:: mds_inject_skip_replaying_inotable
|
||||
.. confval:: mds_kill_skip_replaying_inotable
|
||||
.. confval:: mds_kill_after_journal_logs_flushed
|
||||
.. confval:: mds_wipe_sessions
|
||||
.. confval:: mds_wipe_ino_prealloc
|
||||
.. confval:: mds_skip_ino
|
||||
|
132
ceph/doc/cephfs/metrics.rst
Normal file
132
ceph/doc/cephfs/metrics.rst
Normal file
@ -0,0 +1,132 @@
|
||||
.. _cephfs_metrics:
|
||||
|
||||
Metrics
|
||||
=======
|
||||
|
||||
CephFS uses :ref:`Perf Counters` to track metrics. The counters can be labeled (:ref:`Labeled Perf Counters`).
|
||||
|
||||
Client Metrics
|
||||
--------------
|
||||
|
||||
CephFS exports client metrics as :ref:`Labeled Perf Counters`, which could be used to monitor the client performance. CephFS exports the below client metrics.
|
||||
|
||||
.. list-table:: Client Metrics
|
||||
:widths: 25 25 75
|
||||
:header-rows: 1
|
||||
|
||||
* - Name
|
||||
- Type
|
||||
- Description
|
||||
* - num_clients
|
||||
- Gauge
|
||||
- Number of client sessions
|
||||
* - cap_hits
|
||||
- Gauge
|
||||
- Percentage of file capability hits over total number of caps
|
||||
* - cap_miss
|
||||
- Gauge
|
||||
- Percentage of file capability misses over total number of caps
|
||||
* - avg_read_latency
|
||||
- Gauge
|
||||
- Mean value of the read latencies
|
||||
* - avg_write_latency
|
||||
- Gauge
|
||||
- Mean value of the write latencies
|
||||
* - avg_metadata_latency
|
||||
- Gauge
|
||||
- Mean value of the metadata latencies
|
||||
* - dentry_lease_hits
|
||||
- Gauge
|
||||
- Percentage of dentry lease hits handed out over the total dentry lease requests
|
||||
* - dentry_lease_miss
|
||||
- Gauge
|
||||
- Percentage of dentry lease misses handed out over the total dentry lease requests
|
||||
* - opened_files
|
||||
- Gauge
|
||||
- Number of opened files
|
||||
* - opened_inodes
|
||||
- Gauge
|
||||
- Number of opened inodes
|
||||
* - pinned_icaps
|
||||
- Gauge
|
||||
- Number of pinned Inode Caps
|
||||
* - total_inodes
|
||||
- Gauge
|
||||
- Total number of Inodes
|
||||
* - total_read_ops
|
||||
- Gauge
|
||||
- Total number of read operations generated by all process
|
||||
* - total_read_size
|
||||
- Gauge
|
||||
- Number of bytes read in input/output operations generated by all process
|
||||
* - total_write_ops
|
||||
- Gauge
|
||||
- Total number of write operations generated by all process
|
||||
* - total_write_size
|
||||
- Gauge
|
||||
- Number of bytes written in input/output operations generated by all processes
|
||||
|
||||
Getting Metrics
|
||||
===============
|
||||
|
||||
The metrics could be scraped from the MDS admin socket as well as using the tell interface. The ``mds_client_metrics-<fsname>`` section in the output of ``counter dump`` command displays the metrics for each client as shown below::
|
||||
|
||||
"mds_client_metrics": [
|
||||
{
|
||||
"labels": {
|
||||
"fs_name": "<fsname>",
|
||||
"id": "14213"
|
||||
},
|
||||
"counters": {
|
||||
"num_clients": 2
|
||||
}
|
||||
}
|
||||
],
|
||||
"mds_client_metrics-<fsname>": [
|
||||
{
|
||||
"labels": {
|
||||
"client": "client.0",
|
||||
"rank": "0"
|
||||
},
|
||||
"counters": {
|
||||
"cap_hits": 5149,
|
||||
"cap_miss": 1,
|
||||
"avg_read_latency": 0.000000000,
|
||||
"avg_write_latency": 0.000000000,
|
||||
"avg_metadata_latency": 0.000000000,
|
||||
"dentry_lease_hits": 0,
|
||||
"dentry_lease_miss": 0,
|
||||
"opened_files": 1,
|
||||
"opened_inodes": 2,
|
||||
"pinned_icaps": 2,
|
||||
"total_inodes": 2,
|
||||
"total_read_ops": 0,
|
||||
"total_read_size": 0,
|
||||
"total_write_ops": 4836,
|
||||
"total_write_size": 633864192
|
||||
}
|
||||
},
|
||||
{
|
||||
"labels": {
|
||||
"client": "client.1",
|
||||
"rank": "0"
|
||||
},
|
||||
"counters": {
|
||||
"cap_hits": 3375,
|
||||
"cap_miss": 8,
|
||||
"avg_read_latency": 0.000000000,
|
||||
"avg_write_latency": 0.000000000,
|
||||
"avg_metadata_latency": 0.000000000,
|
||||
"dentry_lease_hits": 0,
|
||||
"dentry_lease_miss": 0,
|
||||
"opened_files": 1,
|
||||
"opened_inodes": 2,
|
||||
"pinned_icaps": 2,
|
||||
"total_inodes": 2,
|
||||
"total_read_ops": 0,
|
||||
"total_read_size": 0,
|
||||
"total_write_ops": 3169,
|
||||
"total_write_size": 415367168
|
||||
}
|
||||
}
|
||||
]
|
@ -1,11 +1,10 @@
|
||||
Mount CephFS: Prerequisites
|
||||
===========================
|
||||
|
||||
You can use CephFS by mounting it to your local filesystem or by using
|
||||
`cephfs-shell`_. Mounting CephFS requires superuser privileges to trim
|
||||
dentries by issuing a remount of itself. CephFS can be mounted
|
||||
`using kernel`_ as well as `using FUSE`_. Both have their own
|
||||
advantages. Read the following section to understand more about both of
|
||||
You can use CephFS by mounting the file system on a machine or by using
|
||||
:ref:`cephfs-shell <cephfs-shell>`. A system mount can be performed using `the
|
||||
kernel driver`_ as well as `the FUSE driver`_. Both have their own advantages
|
||||
and disadvantages. Read the following section to understand more about both of
|
||||
these ways to mount CephFS.
|
||||
|
||||
For Windows CephFS mounts, please check the `ceph-dokan`_ page.
|
||||
@ -69,7 +68,7 @@ Ceph MON resides.
|
||||
individually, please check respective mount documents.
|
||||
|
||||
.. _Client Authentication: ../client-auth
|
||||
.. _cephfs-shell: ../cephfs-shell
|
||||
.. _using kernel: ../mount-using-kernel-driver
|
||||
.. _using FUSE: ../mount-using-fuse
|
||||
.. _cephfs-shell: ..cephfs-shell
|
||||
.. _the kernel driver: ../mount-using-kernel-driver
|
||||
.. _the FUSE driver: ../mount-using-fuse
|
||||
.. _ceph-dokan: ../ceph-dokan
|
||||
|
@ -12,8 +12,11 @@ Prerequisites
|
||||
Go through the prerequisites required by both, kernel as well as FUSE mounts,
|
||||
in `Mount CephFS: Prerequisites`_ page.
|
||||
|
||||
.. note:: Mounting CephFS using FUSE requires superuser privileges to trim dentries
|
||||
by issuing a remount of itself.
|
||||
.. note:: Mounting CephFS using FUSE requires superuser privileges (sudo/root).
|
||||
The libfuse interface does not provide a mechanism to trim cache entries in the
|
||||
kernel so a remount (``mount(2)``) system call is required to force the kernel
|
||||
to drop the cached metadata. ``ceph-fuse`` issues these remount system calls
|
||||
periodically in response to cache pressure in the MDS or due to metadata cache revocations.
|
||||
|
||||
Synopsis
|
||||
========
|
||||
|
@ -20,16 +20,18 @@ Complete General Prerequisites
|
||||
Go through the prerequisites required by both, kernel as well as FUSE mounts,
|
||||
in `Mount CephFS: Prerequisites`_ page.
|
||||
|
||||
Is mount helper is present?
|
||||
---------------------------
|
||||
Is mount helper present?
|
||||
------------------------
|
||||
``mount.ceph`` helper is installed by Ceph packages. The helper passes the
|
||||
monitor address(es) and CephX user keyrings automatically saving the Ceph
|
||||
admin the effort to pass these details explicitly while mounting CephFS. In
|
||||
case the helper is not present on the client machine, CephFS can still be
|
||||
mounted using kernel but by passing these details explicitly to the ``mount``
|
||||
command. To check whether it is present on your system, do::
|
||||
monitor address(es) and CephX user keyrings, saving the Ceph admin the effort
|
||||
of passing these details explicitly while mounting CephFS. If the helper is not
|
||||
present on the client machine, CephFS can still be mounted using the kernel
|
||||
driver, but only by passing these details explicitly to the ``mount`` command.
|
||||
To check whether ``mount.ceph`` is present on your system, run the following command:
|
||||
|
||||
stat /sbin/mount.ceph
|
||||
.. prompt:: bash #
|
||||
|
||||
stat /sbin/mount.ceph
|
||||
|
||||
Which Kernel Version?
|
||||
---------------------
|
||||
|
@ -246,44 +246,47 @@ value to `-1`.
|
||||
Dynamic Subtree Partitioning
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
CephFS has long had a dynamic metadata blanacer (sometimes called the "default
|
||||
CephFS has long had a dynamic metadata balancer (sometimes called the "default
|
||||
balancer") which can split or merge subtrees while placing them on "colder" MDS
|
||||
ranks. Moving the metadata around can improve overall file system throughput
|
||||
ranks. Moving the metadata in this way improves overall file system throughput
|
||||
and cache size.
|
||||
|
||||
However, the balancer has suffered from problem with efficiency and performance
|
||||
so it is by default turned off. This is to avoid an administrator "turning on
|
||||
multimds" by increasing the ``max_mds`` setting and then finding the balancer
|
||||
has made a mess of the cluster performance (reverting is straightforward but
|
||||
can take time).
|
||||
However, the balancer is sometimes inefficient or slow, so by default it is
|
||||
turned off. This is to avoid an administrator "turning on multimds" by
|
||||
increasing the ``max_mds`` setting only to find that the balancer has made a
|
||||
mess of the cluster performance (reverting from this messy state of affairs is
|
||||
straightforward but can take time).
|
||||
|
||||
The setting to turn on the balancer is:
|
||||
To turn on the balancer, run a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs set <fs_name> balance_automate true
|
||||
|
||||
Turning on the balancer should only be done with appropriate configuration,
|
||||
such as with the ``bal_rank_mask`` setting (described below). Careful
|
||||
monitoring of the file system performance and MDS is advised.
|
||||
Turn on the balancer only with an appropriate configuration, such as a
|
||||
configuration that includes the ``bal_rank_mask`` setting (described
|
||||
:ref:`below <bal-rank-mask>`).
|
||||
|
||||
Careful monitoring of the file system performance and MDS is advised.
|
||||
|
||||
|
||||
Dynamic subtree partitioning with Balancer on specific ranks
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The CephFS file system provides the ``bal_rank_mask`` option to enable the balancer
|
||||
to dynamically rebalance subtrees within particular active MDS ranks. This
|
||||
allows administrators to employ both the dynamic subtree partitioning and
|
||||
static pining schemes in different active MDS ranks so that metadata loads
|
||||
are optimized based on user demand. For instance, in realistic cloud
|
||||
storage environments, where a lot of subvolumes are allotted to multiple
|
||||
computing nodes (e.g., VMs and containers), some subvolumes that require
|
||||
high performance are managed by static partitioning, whereas most subvolumes
|
||||
that experience a moderate workload are managed by the balancer. As the balancer
|
||||
evenly spreads the metadata workload to all active MDS ranks, performance of
|
||||
static pinned subvolumes inevitably may be affected or degraded. If this option
|
||||
is enabled, subtrees managed by the balancer are not affected by
|
||||
static pinned subtrees.
|
||||
.. _bal-rank-mask:
|
||||
|
||||
The CephFS file system provides the ``bal_rank_mask`` option to enable the
|
||||
balancer to dynamically rebalance subtrees within particular active MDS ranks.
|
||||
This allows administrators to employ both the dynamic subtree partitioning and
|
||||
static pining schemes in different active MDS ranks so that metadata loads are
|
||||
optimized based on user demand. For instance, in realistic cloud storage
|
||||
environments, where a lot of subvolumes are allotted to multiple computing
|
||||
nodes (e.g., VMs and containers), some subvolumes that require high performance
|
||||
are managed by static partitioning, whereas most subvolumes that experience a
|
||||
moderate workload are managed by the balancer. As the balancer evenly spreads
|
||||
the metadata workload to all active MDS ranks, performance of static pinned
|
||||
subvolumes inevitably may be affected or degraded. If this option is enabled,
|
||||
subtrees managed by the balancer are not affected by static pinned subtrees.
|
||||
|
||||
This option can be configured with the ``ceph fs set`` command. For example:
|
||||
|
||||
|
@ -31,7 +31,7 @@ Snapshot schedules are identified by path, their repeat interval and their start
|
||||
time. The
|
||||
repeat interval defines the time between two subsequent snapshots. It is
|
||||
specified by a number and a period multiplier, one of `h(our)`, `d(ay)`,
|
||||
`w(eek)`, `M(onth)` and `Y(ear)`. E.g. a repeat interval of `12h` specifies one
|
||||
`w(eek)`, `M(onth)` and `y(ear)`. E.g. a repeat interval of `12h` specifies one
|
||||
snapshot every 12 hours.
|
||||
The start time is specified as a time string (more details about passing times
|
||||
below). By default
|
||||
@ -53,7 +53,7 @@ The semantics are that a spec will ensure `<number>` snapshots are kept that are
|
||||
at least `<time period>` apart. For Example `7d` means the user wants to keep 7
|
||||
snapshots that are at least one day (but potentially longer) apart from each other.
|
||||
The following time periods are recognized: `h(our)`, `d(ay)`, `w(eek)`, `M(onth)`,
|
||||
`Y(ear)` and `n`. The latter is a special modifier where e.g. `10n` means keep
|
||||
`y(ear)` and `n`. The latter is a special modifier where e.g. `10n` means keep
|
||||
the last 10 snapshots regardless of timing,
|
||||
|
||||
All subcommands take optional `fs` argument to specify paths in
|
||||
|
@ -16,8 +16,7 @@ Building Crimson
|
||||
Crimson is not enabled by default. Enable it at build time by running::
|
||||
|
||||
$ WITH_SEASTAR=true ./install-deps.sh
|
||||
$ mkdir build && cd build
|
||||
$ cmake -DWITH_SEASTAR=ON ..
|
||||
$ ./do_cmake.sh -DWITH_SEASTAR=ON
|
||||
|
||||
Please note, `ASan`_ is enabled by default if Crimson is built from a source
|
||||
cloned using ``git``.
|
||||
@ -28,7 +27,7 @@ Testing crimson with cephadm
|
||||
===============================
|
||||
|
||||
The Ceph CI/CD pipeline builds containers with
|
||||
``crimson-osd`` subsitituted for ``ceph-osd``.
|
||||
``crimson-osd`` substituted for ``ceph-osd``.
|
||||
|
||||
Once a branch at commit <sha1> has been built and is available in
|
||||
``shaman``, you can deploy it using the cephadm instructions outlined
|
||||
@ -44,8 +43,99 @@ use a Crimson build:
|
||||
You'll likely need to supply the ``--allow-mismatched-release`` flag to
|
||||
use a non-release branch.
|
||||
|
||||
Additionally, prior to deploying OSDs, you'll need enable Crimson to
|
||||
direct the default pools to be created as Crimson pools. From the cephadm shell run:
|
||||
Configure Crimson with Bluestore
|
||||
================================
|
||||
|
||||
As Bluestore is not a Crimson native `object store backend`_,
|
||||
deploying Crimson with Bluestore as the back end requires setting
|
||||
one of the two following configuration options:
|
||||
|
||||
.. note::
|
||||
|
||||
#. These two options, along with ``crimson_alien_op_num_threads``,
|
||||
can't be changed after deployment.
|
||||
#. `vstart.sh`_ sets these options using the ``--crimson-smp`` flag.
|
||||
|
||||
|
||||
1) ``crimson_seastar_num_threads``
|
||||
|
||||
In order to allow easier cluster deployments, this option can be used
|
||||
instead of setting the CPU mask manually for each OSD.
|
||||
|
||||
It's recommended to let the **number of OSDs on each host** multiplied by
|
||||
``crimson_seastar_num_threads`` to be less than the node's number of CPU
|
||||
cores (``nproc``).
|
||||
|
||||
For example, for deploying two nodes with eight CPU cores and two OSDs each:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
conf:
|
||||
# Global to all OSDs
|
||||
osd:
|
||||
crimson seastar num threads: 3
|
||||
|
||||
.. note::
|
||||
|
||||
#. For optimal performance ``crimson_seastar_cpu_cores`` should be set instead.
|
||||
|
||||
2) ``crimson_seastar_cpu_cores`` and ``crimson_alien_thread_cpu_cores``.
|
||||
|
||||
Explicitly set the CPU core allocation for each ``crimson-osd``
|
||||
and for the BlueStore back end. It's recommended for each set to be mutually exclusive.
|
||||
|
||||
For example, for deploying two nodes with eight CPU cores and two OSDs each:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
conf:
|
||||
# Both nodes
|
||||
osd:
|
||||
crimson alien thread cpu cores: 6-7
|
||||
|
||||
# First node
|
||||
osd.0:
|
||||
crimson seastar cpu cores: 0-2
|
||||
osd.1:
|
||||
crimson seastar cpu cores: 3-5
|
||||
|
||||
# Second node
|
||||
osd.2:
|
||||
crimson seastar cpu cores: 0-2
|
||||
osd.3:
|
||||
crimson seastar cpu cores: 3-5
|
||||
|
||||
For a single node with eight node and three OSDs:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
conf:
|
||||
osd:
|
||||
crimson alien thread cpu cores: 6-7
|
||||
osd.0:
|
||||
crimson seastar cpu cores: 0-1
|
||||
osd.1:
|
||||
crimson seastar cpu cores: 2-3
|
||||
osd.2:
|
||||
crimson seastar cpu cores: 4-5
|
||||
|
||||
Running Crimson
|
||||
===============
|
||||
|
||||
.. note::
|
||||
Crimson is in a tech preview stage.
|
||||
As you might expect, Crimson does not yet have as extensive a feature set as does ceph-osd.
|
||||
Malfunctions including crashes and data loss are to be expected.
|
||||
|
||||
Enabling Crimson
|
||||
================
|
||||
|
||||
After building Crimson and starting your cluster, but prior to deploying OSDs, you'll need to
|
||||
`Configure Crimson with Bluestore`_ and enable Crimson to
|
||||
direct the default pools to be created as Crimson pools. You can proceed by running the following after you have a running cluster:
|
||||
|
||||
.. note::
|
||||
`vstart.sh`_ enables crimson automatically when `--crimson` is used.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -53,9 +143,7 @@ direct the default pools to be created as Crimson pools. From the cephadm shell
|
||||
ceph osd set-allow-crimson --yes-i-really-mean-it
|
||||
ceph config set mon osd_pool_default_crimson true
|
||||
|
||||
The first command enables the ``crimson`` experimental feature. Crimson
|
||||
is highly experimental, and malfunctions including crashes
|
||||
and data loss are to be expected.
|
||||
The first command enables the ``crimson`` experimental feature.
|
||||
|
||||
The second enables the ``allow_crimson`` OSDMap flag. The monitor will
|
||||
not allow ``crimson-osd`` to boot without that flag.
|
||||
@ -64,13 +152,82 @@ The last causes pools to be created by default with the ``crimson`` flag.
|
||||
Crimson pools are restricted to operations supported by Crimson.
|
||||
``Crimson-osd`` won't instantiate PGs from non-Crimson pools.
|
||||
|
||||
Running Crimson
|
||||
===============
|
||||
vstart.sh
|
||||
=========
|
||||
|
||||
As you might expect, Crimson does not yet have as extensive a feature set as does ``ceph-osd``.
|
||||
The following options can be used with ``vstart.sh``.
|
||||
|
||||
object store backend
|
||||
--------------------
|
||||
``--crimson``
|
||||
Start ``crimson-osd`` instead of ``ceph-osd``.
|
||||
|
||||
``--nodaemon``
|
||||
Do not daemonize the service.
|
||||
|
||||
``--redirect-output``
|
||||
Redirect the ``stdout`` and ``stderr`` to ``out/$type.$num.stdout``.
|
||||
|
||||
``--osd-args``
|
||||
Pass extra command line options to ``crimson-osd`` or ``ceph-osd``.
|
||||
This is useful for passing Seastar options to ``crimson-osd``. For
|
||||
example, one can supply ``--osd-args "--memory 2G"`` to set the amount of
|
||||
memory to use. Please refer to the output of::
|
||||
|
||||
crimson-osd --help-seastar
|
||||
|
||||
for additional Seastar-specific command line options.
|
||||
|
||||
``--crimson-smp``
|
||||
The number of cores to use for each OSD.
|
||||
If BlueStore is used, the balance of available cores
|
||||
(as determined by `nproc`) will be assigned to the object store.
|
||||
|
||||
``--bluestore``
|
||||
Use the alienized BlueStore as the object store backend. This is the default (see below section on the `object store backend`_ for more details)
|
||||
|
||||
``--cyanstore``
|
||||
Use CyanStore as the object store backend.
|
||||
|
||||
``--memstore``
|
||||
Use the alienized MemStore as the object store backend.
|
||||
|
||||
``--seastore``
|
||||
Use SeaStore as the back end object store.
|
||||
|
||||
``--seastore-devs``
|
||||
Specify the block device used by SeaStore.
|
||||
|
||||
``--seastore-secondary-devs``
|
||||
Optional. SeaStore supports multiple devices. Enable this feature by
|
||||
passing the block device to this option.
|
||||
|
||||
``--seastore-secondary-devs-type``
|
||||
Optional. Specify the type of secondary devices. When the secondary
|
||||
device is slower than main device passed to ``--seastore-devs``, the cold
|
||||
data in faster device will be evicted to the slower devices over time.
|
||||
Valid types include ``HDD``, ``SSD``(default), ``ZNS``, and ``RANDOM_BLOCK_SSD``
|
||||
Note secondary devices should not be faster than the main device.
|
||||
|
||||
To start a cluster with a single Crimson node, run::
|
||||
|
||||
$ MGR=1 MON=1 OSD=1 MDS=0 RGW=0 ../src/vstart.sh \
|
||||
--without-dashboard --bluestore --crimson \
|
||||
--redirect-output
|
||||
|
||||
Another SeaStore example::
|
||||
|
||||
$ MGR=1 MON=1 OSD=1 MDS=0 RGW=0 ../src/vstart.sh -n -x \
|
||||
--without-dashboard --seastore \
|
||||
--crimson --redirect-output \
|
||||
--seastore-devs /dev/sda \
|
||||
--seastore-secondary-devs /dev/sdb \
|
||||
--seastore-secondary-devs-type HDD
|
||||
|
||||
Stop this ``vstart`` cluster by running::
|
||||
|
||||
$ ../src/stop.sh --crimson
|
||||
|
||||
Object Store Backend
|
||||
====================
|
||||
|
||||
At the moment, ``crimson-osd`` offers both native and alienized object store
|
||||
backends. The native object store backends perform IO using the SeaStar reactor.
|
||||
@ -91,7 +248,7 @@ managed by the Seastar framework. They are:
|
||||
|
||||
.. describe:: memstore
|
||||
|
||||
The memory backed object store
|
||||
The memory backend object store
|
||||
|
||||
.. describe:: bluestore
|
||||
|
||||
@ -109,7 +266,7 @@ a replica of the thread that called `fork()`_. Tackling this problem in Crimson
|
||||
would unnecessarily complicate the code.
|
||||
|
||||
Since supported GNU/Linux distributions use ``systemd``, which is able to
|
||||
daemonize the application, there is no need to daemonize ourselves.
|
||||
daemonize processes, there is no need to daemonize ourselves.
|
||||
Those using sysvinit can use ``start-stop-daemon`` to daemonize ``crimson-osd``.
|
||||
If this is does not work out, a helper utility may be devised.
|
||||
|
||||
@ -143,93 +300,19 @@ does not send log messages directly to a specified ``log_file``. It writes
|
||||
the logging messages to stdout and/or syslog. This behavior can be
|
||||
changed using ``--log-to-stdout`` and ``--log-to-syslog`` command line
|
||||
options. By default, ``log-to-stdout`` is enabled, and ``--log-to-syslog`` is disabled.
|
||||
|
||||
|
||||
vstart.sh
|
||||
---------
|
||||
|
||||
The following options can be used with ``vstart.sh``.
|
||||
|
||||
``--crimson``
|
||||
Start ``crimson-osd`` instead of ``ceph-osd``.
|
||||
|
||||
``--nodaemon``
|
||||
Do not daemonize the service.
|
||||
|
||||
``--redirect-output``
|
||||
Redirect the ``stdout`` and ``stderr`` to ``out/$type.$num.stdout``.
|
||||
|
||||
``--osd-args``
|
||||
Pass extra command line options to ``crimson-osd`` or ``ceph-osd``.
|
||||
This is useful for passing Seastar options to ``crimson-osd``. For
|
||||
example, one can supply ``--osd-args "--memory 2G"`` to set the amount of
|
||||
memory to use. Please refer to the output of::
|
||||
|
||||
crimson-osd --help-seastar
|
||||
|
||||
for additional Seastar-specific command line options.
|
||||
|
||||
``--cyanstore``
|
||||
Use CyanStore as the object store backend.
|
||||
|
||||
``--bluestore``
|
||||
Use the alienized BlueStore as the object store backend. This is the default.
|
||||
|
||||
``--memstore``
|
||||
Use the alienized MemStore as the object store backend.
|
||||
|
||||
``--seastore``
|
||||
Use SeaStore as the back end object store.
|
||||
|
||||
``--seastore-devs``
|
||||
Specify the block device used by SeaStore.
|
||||
|
||||
``--seastore-secondary-devs``
|
||||
Optional. SeaStore supports multiple devices. Enable this feature by
|
||||
passing the block device to this option.
|
||||
|
||||
``--seastore-secondary-devs-type``
|
||||
Optional. Specify the type of secondary devices. When the secondary
|
||||
device is slower than main device passed to ``--seastore-devs``, the cold
|
||||
data in faster device will be evicted to the slower devices over time.
|
||||
Valid types include ``HDD``, ``SSD``(default), ``ZNS``, and ``RANDOM_BLOCK_SSD``
|
||||
Note secondary devices should not be faster than the main device.
|
||||
|
||||
To start a cluster with a single Crimson node, run::
|
||||
|
||||
$ MGR=1 MON=1 OSD=1 MDS=0 RGW=0 ../src/vstart.sh -n -x \
|
||||
--without-dashboard --cyanstore \
|
||||
--crimson --redirect-output \
|
||||
--osd-args "--memory 4G"
|
||||
|
||||
Here we assign 4 GiB memory and a single thread running on core-0 to ``crimson-osd``.
|
||||
|
||||
Another SeaStore example::
|
||||
|
||||
$ MGR=1 MON=1 OSD=1 MDS=0 RGW=0 ../src/vstart.sh -n -x \
|
||||
--without-dashboard --seastore \
|
||||
--crimson --redirect-output \
|
||||
--seastore-devs /dev/sda \
|
||||
--seastore-secondary-devs /dev/sdb \
|
||||
--seastore-secondary-devs-type HDD
|
||||
|
||||
Stop this ``vstart`` cluster by running::
|
||||
|
||||
$ ../src/stop.sh --crimson
|
||||
|
||||
Metrics and Tracing
|
||||
===================
|
||||
|
||||
Crimson offers three ways to report stats and metrics.
|
||||
|
||||
pg stats reported to mgr
|
||||
PG stats reported to mgr
|
||||
------------------------
|
||||
|
||||
Crimson collects the per-pg, per-pool, and per-osd stats in a `MPGStats`
|
||||
message which is sent to the Ceph Managers. Manager modules can query
|
||||
them using the `MgrModule.get()` method.
|
||||
|
||||
asock command
|
||||
Asock command
|
||||
-------------
|
||||
|
||||
An admin socket command is offered for dumping metrics::
|
||||
@ -252,7 +335,7 @@ see `Prometheus`_ for more details.
|
||||
Profiling Crimson
|
||||
=================
|
||||
|
||||
fio
|
||||
Fio
|
||||
---
|
||||
|
||||
``crimson-store-nbd`` exposes configurable ``FuturizedStore`` internals as an
|
||||
@ -424,7 +507,7 @@ When a Seastar application crashes, it leaves us with a backtrace of addresses,
|
||||
The ``seastar-addr2line`` utility provided by Seastar can be used to map these
|
||||
addresses to functions. The script expects input on ``stdin``,
|
||||
so we need to copy and paste the above addresses, then send EOF by inputting
|
||||
``control-D`` in the terminal. One might use ``echo`` or ``cat`` instead`::
|
||||
``control-D`` in the terminal. One might use ``echo`` or ``cat`` instead::
|
||||
|
||||
$ ../src/seastar/scripts/seastar-addr2line -e bin/crimson-osd
|
||||
|
||||
@ -475,3 +558,10 @@ addresses in the backtrace::
|
||||
[root@3deb50a8ad51 ~]# dnf install -q -y file
|
||||
[root@3deb50a8ad51 ~]# python3 seastar-addr2line -e /usr/bin/crimson-osd
|
||||
# paste the backtrace here
|
||||
|
||||
Code Walkthroughs
|
||||
=================
|
||||
|
||||
* `Ceph Code Walkthroughs: Crimson <https://www.youtube.com/watch?v=rtkrHk6grsg>`_
|
||||
|
||||
* `Ceph Code Walkthroughs: SeaStore <https://www.youtube.com/watch?v=0rr5oWDE2Ck>`_
|
||||
|
@ -32,8 +32,8 @@ The following chart illustrates the basic Ceph development workflow:
|
||||
|
||||
This page assumes that you are a new contributor with an idea for a bugfix or
|
||||
an enhancement, but you do not know how to proceed. Watch the `Getting Started
|
||||
with Ceph Development <https://www.youtube.com/watch?v=t5UIehZ1oLs>`_ video for
|
||||
a practical summary of this workflow.
|
||||
with Ceph Development <https://www.youtube.com/watch?v=t5UIehZ1oLs>`_ video (1
|
||||
hour 15 minutes) for a practical summary of this workflow.
|
||||
|
||||
Updating the tracker
|
||||
--------------------
|
||||
@ -63,8 +63,8 @@ Ceph Workflow Overview
|
||||
|
||||
Three repositories are involved in the Ceph workflow. They are:
|
||||
|
||||
1. The upstream repository (ceph/ceph)
|
||||
2. Your fork of the upstream repository (your_github_id/ceph)
|
||||
1. The upstream repository (``ceph/ceph``)
|
||||
2. Your fork of the upstream repository (``your_github_id/ceph``)
|
||||
3. Your local working copy of the repository (on your workstation)
|
||||
|
||||
The procedure for making changes to the Ceph repository is as follows:
|
||||
@ -133,14 +133,14 @@ Configuring Your Local Environment
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The commands in this section configure your local git environment so that it
|
||||
generates "Signed-off-by:" tags. These commands also set up your local
|
||||
generates ``Signed-off-by:`` tags. These commands also set up your local
|
||||
environment so that it can stay synchronized with the upstream repository.
|
||||
|
||||
These commands are necessary only during the initial setup of your local
|
||||
working copy. Another way to say that is "These commands are necessary
|
||||
only the first time that you are working with the Ceph repository. They are,
|
||||
however, unavoidable, and if you fail to run them then you will not be able
|
||||
to work on the Ceph repository.".
|
||||
The commands in this section are necessary only during the initial setup of
|
||||
your local working copy. This means that these commands are necessary only the
|
||||
first time that you are working with the Ceph repository. They are, however,
|
||||
unavoidable, and if you fail to run them then you will not be able to work on
|
||||
the Ceph repository..
|
||||
|
||||
1. Configure your local git environment with your name and email address.
|
||||
|
||||
@ -180,12 +180,12 @@ at the moment that you cloned it, but the upstream repo
|
||||
that it was forked from is not frozen in time: the upstream repo is still being
|
||||
updated by other contributors.
|
||||
|
||||
Because upstream main is continually receiving updates from other
|
||||
contributors, your fork will drift farther and farther from the state of the
|
||||
upstream repo when you cloned it.
|
||||
Because upstream main is continually receiving updates from other contributors,
|
||||
over time your fork will drift farther and farther from the state of the
|
||||
upstream repository as it was when you cloned it.
|
||||
|
||||
Keep your fork's ``main`` branch synchronized with upstream main to reduce drift
|
||||
between your fork's main branch and the upstream main branch.
|
||||
Keep your fork's ``main`` branch synchronized with upstream main to reduce
|
||||
drift between your fork's main branch and the upstream main branch.
|
||||
|
||||
Here are the commands for keeping your fork synchronized with the
|
||||
upstream repository:
|
||||
@ -216,15 +216,15 @@ Create a branch for your bugfix:
|
||||
git checkout -b fix_1
|
||||
git push -u origin fix_1
|
||||
|
||||
The first command (git checkout main) makes sure that the bugfix branch
|
||||
The first command (``git checkout main``) makes sure that the bugfix branch
|
||||
"fix_1" is created from the most recent state of the main branch of the
|
||||
upstream repository.
|
||||
|
||||
The second command (git checkout -b fix_1) creates a "bugfix branch" called
|
||||
The second command (``git checkout -b fix_1``) creates a "bugfix branch" called
|
||||
"fix_1" in your local working copy of the repository. The changes that you make
|
||||
in order to fix the bug will be committed to this branch.
|
||||
|
||||
The third command (git push -u origin fix_1) pushes the bugfix branch from
|
||||
The third command (``git push -u origin fix_1``) pushes the bugfix branch from
|
||||
your local working repository to your fork of the upstream repository.
|
||||
|
||||
.. _fixing_bug_locally:
|
||||
@ -243,15 +243,17 @@ Fixing the bug in the local working copy
|
||||
#. **Fixing the bug itself**
|
||||
|
||||
This guide cannot tell you how to fix the bug that you have chosen to fix.
|
||||
This guide assumes that you know what required improvement, and that you
|
||||
know what to do to provide that improvement.
|
||||
This guide assumes that you have identified an area that required
|
||||
improvement, and that you know how to make that improvement.
|
||||
|
||||
It might be that your fix is simple and requires only minimal testing. But
|
||||
that's unlikely. It is more likely that the process of fixing your bug will
|
||||
be iterative and will involve trial, error, skill, and patience.
|
||||
It might be that your fix is simple and that it requires only minimal
|
||||
testing. But that's unlikely unless you're updating only documentation. It
|
||||
is more likely that the process of fixing your bug will require several
|
||||
rounds of testing. The testing process is likely to be iterative and will
|
||||
involve trial, error, skill, and patience.
|
||||
|
||||
For a detailed discussion of the tools available for validating bugfixes,
|
||||
see the chapters on testing.
|
||||
see :ref:`the sections that discuss testing <dev-testing-unit-tests>`.
|
||||
|
||||
Pushing the Fix to Your Fork
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -261,9 +263,9 @@ believe that it works.
|
||||
|
||||
#. Commit the changes to your local working copy.
|
||||
|
||||
Commit the changes to the `fix_1` branch of your local working copy by using
|
||||
the ``--signoff`` option (here represented as the `s` portion of the `-as`
|
||||
flag):
|
||||
Commit the changes to the ``fix_1`` branch of your local working copy by
|
||||
using the ``--signoff`` option (here represented as the ``s`` portion of the
|
||||
``-as`` flag):
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
@ -273,8 +275,8 @@ believe that it works.
|
||||
|
||||
#. Push the changes to your fork:
|
||||
|
||||
Push the changes from the `fix_1` branch of your local working copy to the
|
||||
`fix_1` branch of your fork of the upstream repository:
|
||||
Push the changes from the ``fix_1`` branch of your local working copy to the
|
||||
``fix_1`` branch of your fork of the upstream repository:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
@ -306,7 +308,7 @@ believe that it works.
|
||||
|
||||
origin git@github.com:username/ceph.git (push)
|
||||
|
||||
provide the information that "origin" is the name of your fork of the
|
||||
provide the information that ``origin`` is the name of your fork of the
|
||||
Ceph repository.
|
||||
|
||||
|
||||
@ -333,7 +335,7 @@ the `Git Commit Good Practice`_ article at the `OpenStack Project Wiki`_.
|
||||
.. _`Git Commit Good Practice`: https://wiki.openstack.org/wiki/GitCommitMessages
|
||||
.. _`OpenStack Project Wiki`: https://wiki.openstack.org/wiki/Main_Page
|
||||
|
||||
See also our own `Submitting Patches
|
||||
See also Ceph's own `Submitting Patches
|
||||
<https://github.com/ceph/ceph/blob/main/SubmittingPatches.rst>`_ document.
|
||||
|
||||
After your pull request (PR) has been opened, update the :ref:`issue-tracker`
|
||||
@ -347,24 +349,25 @@ Understanding Automated PR validation
|
||||
|
||||
When you create or update your PR, the Ceph project's `Continuous Integration
|
||||
(CI) <https://en.wikipedia.org/wiki/Continuous_integration>`_ infrastructure
|
||||
automatically tests it. At the time of this writing (May 2022), the automated
|
||||
CI testing included many tests. These five are among them:
|
||||
automatically tests it. Here are just some of the automated tests that are
|
||||
performed on your PR:
|
||||
|
||||
#. a test to check that the commits are properly signed (see :ref:`submitting-patches`):
|
||||
#. a test to check that the commits are properly signed (see
|
||||
:ref:`submitting-patches`):
|
||||
#. a test to check that the documentation builds
|
||||
#. a test to check that the submodules are unmodified
|
||||
#. a test to check that the API is in order
|
||||
#. a :ref:`make check<make-check>` test
|
||||
|
||||
Additional tests may be run depending on which files your PR modifies.
|
||||
Additional tests may be run, depending which files your PR modifies.
|
||||
|
||||
The :ref:`make check<make-check>` test builds the PR and runs it through a
|
||||
battery of tests. These tests run on servers that are operated by the Ceph
|
||||
Continuous Integration (CI) team. When the tests have completed their run, the
|
||||
result is shown on GitHub in the pull request itself.
|
||||
|
||||
Test your modifications before you open a PR. Refer to the chapters
|
||||
on testing for details.
|
||||
Test your modifications before you open a PR. Refer to :ref:`the sections on
|
||||
testing <dev-testing-unit-tests>` for details.
|
||||
|
||||
Notes on PR make check test
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -399,7 +402,7 @@ Integration tests AKA ceph-qa-suite
|
||||
-----------------------------------
|
||||
|
||||
It may be necessary to test your fix on real Ceph clusters that run on physical
|
||||
or virtual hardware. Tests designed for this purpose live in the `ceph/qa
|
||||
or virtual hardware. Tests designed for this purpose reside in the `ceph/qa
|
||||
sub-directory`_ and are run via the `teuthology framework`_.
|
||||
|
||||
.. _`ceph/qa sub-directory`: https://github.com/ceph/ceph/tree/main/qa/
|
||||
@ -410,12 +413,16 @@ The Ceph community has access to the `Sepia lab
|
||||
<https://wiki.sepia.ceph.com/doku.php>`_ where `integration tests`_ can be run
|
||||
on physical hardware.
|
||||
|
||||
Other contributors might add tags like `needs-qa` to your PR. This allows PRs
|
||||
Other contributors might add tags like ``needs-qa`` to your PR. This allows PRs
|
||||
to be merged into a single branch and then efficiently tested together.
|
||||
Teuthology test suites can take hours (and even days in some cases) to
|
||||
complete, so batching tests reduces contention for resources and saves a lot of
|
||||
Teuthology test suites can take hours (and, in some cases, days) to
|
||||
complete, so batching tests reduces contention for resources and saves
|
||||
time.
|
||||
|
||||
If your code change has any effect on upgrades, add the
|
||||
``needs-upgrade-testing`` label. This indicates that an upgrade testing suite
|
||||
should be scheduled.
|
||||
|
||||
To request access to the Sepia lab, start `here
|
||||
<https://wiki.sepia.ceph.com/doku.php?id=vpnaccess>`_.
|
||||
|
||||
@ -427,10 +434,11 @@ tests`_ chapter.
|
||||
Code review
|
||||
-----------
|
||||
|
||||
Once your bugfix has been thoroughly tested, or even during this process,
|
||||
it will be subjected to code review by other developers. This typically
|
||||
takes the form of comments in the PR itself, but can be supplemented
|
||||
by discussions on :ref:`irc` and the :ref:`mailing-list`.
|
||||
After your bugfix has been thoroughly tested--and sometimeseven during the
|
||||
testing--it will be subjected to code review by other developers. This
|
||||
typically takes the form of comments in the PR itself, but can be supplemented
|
||||
by discussions on :ref:`irc`, or on :ref:`Slack <ceph-slack>` or on the
|
||||
:ref:`mailing-list`.
|
||||
|
||||
Amending your PR
|
||||
----------------
|
||||
@ -439,24 +447,24 @@ While your PR is going through testing and `Code Review`_, you can
|
||||
modify it at any time by editing files in your local branch.
|
||||
|
||||
After updates are committed locally (to the ``fix_1`` branch in our
|
||||
example), they need to be pushed to GitHub so they appear in the PR.
|
||||
example), they must be pushed to GitHub in order to appear in the PR.
|
||||
|
||||
Modifying the PR is done by adding commits to the ``fix_1`` branch upon
|
||||
which it is based, often followed by rebasing to modify the branch's git
|
||||
history. See `this tutorial
|
||||
<https://www.atlassian.com/git/tutorials/rewriting-history>`_ for a good
|
||||
introduction to rebasing. When you are done with your modifications, you
|
||||
will need to force push your branch with:
|
||||
Modifying the PR is done by adding commits to the ``fix_1`` branch upon which
|
||||
it is based, often followed by rebasing to modify the branch's git history. See
|
||||
`this tutorial <https://www.atlassian.com/git/tutorials/rewriting-history>`_
|
||||
for an introduction to rebasing. When you are done with your modifications, you
|
||||
will need to force push your branch by running a command of the following form:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git push --force origin fix_1
|
||||
|
||||
Why do we take these extra steps instead of simply adding additional commits
|
||||
the PR? It is best practice for a PR to consist of a single commit; this
|
||||
makes for clean history, eases peer review of your changes, and facilitates
|
||||
merges. In rare circumstances it also makes it easier to cleanly revert
|
||||
changes.
|
||||
Why do we take these extra steps instead of simply adding additional commits to
|
||||
the PR? It is best practice for a PR to consist of a single commit; this makes
|
||||
it possible to maintain a clean history, it simplifies peer review of your
|
||||
changes, and it makes merging your PR easier. In the unlikely event that your
|
||||
PR has to be reverted, having a single commit associated with that PR makes the
|
||||
procession of reversion easier.
|
||||
|
||||
Merging
|
||||
-------
|
||||
@ -468,7 +476,7 @@ to change the :ref:`issue-tracker` status to "Resolved". Some issues may be
|
||||
flagged for backporting, in which case the status should be changed to
|
||||
"Pending Backport" (see the :ref:`backporting` chapter for details).
|
||||
|
||||
See also :ref:`merging` for more information on merging.
|
||||
See :ref:`merging` for more information on merging.
|
||||
|
||||
Proper Merge Commit Format
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -481,37 +489,37 @@ This is the most basic form of a merge commit::
|
||||
|
||||
This consists of two parts:
|
||||
|
||||
#. The title of the commit / PR to be merged.
|
||||
#. The title of the commit to be merged.
|
||||
#. The name and email address of the reviewer. Enclose the reviewer's email
|
||||
address in angle brackets.
|
||||
|
||||
Using a browser extension to auto-fill the merge message
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Using a browser extension to auto-fill the merge message
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
If you use a browser for merging GitHub PRs, the easiest way to fill in
|
||||
If you use a browser to merge GitHub PRs, the easiest way to fill in
|
||||
the merge message is with the `"Ceph GitHub Helper Extension"
|
||||
<https://github.com/tspmelo/ceph-github-helper>`_ (available for `Chrome
|
||||
<https://chrome.google.com/webstore/detail/ceph-github-helper/ikpfebikkeabmdnccbimlomheocpgkmn>`_
|
||||
and `Firefox <https://addons.mozilla.org/en-US/firefox/addon/ceph-github-helper/>`_).
|
||||
|
||||
After enabling this extension, if you go to a GitHub PR page, a vertical helper
|
||||
will be displayed at the top-right corner. If you click on the user silhouette button
|
||||
the merge message input will be automatically populated.
|
||||
will be displayed at the top-right corner. If you click on the user silhouette
|
||||
button the merge message input will be automatically populated.
|
||||
|
||||
Using .githubmap to Find a Reviewer's Email Address
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
If you cannot find the email address of the reviewer on his or her GitHub
|
||||
page, you can look it up in the **.githubmap** file, which can be found in
|
||||
the repository at **/ceph/.githubmap**.
|
||||
If you cannot find the email address of the reviewer on his or her GitHub page,
|
||||
you can look it up in the ``.githubmap`` file, which can be found in the
|
||||
repository at ``/ceph/.githubmap``.
|
||||
|
||||
Using "git log" to find a Reviewer's Email Address
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
If you cannot find a reviewer's email address by using the above methods, you
|
||||
can search the git log for their email address. Reviewers are likely to have
|
||||
committed something before. If they have made previous contributions, the git
|
||||
committed something before. If they have made previous contributions, the git
|
||||
log will probably contain their email address.
|
||||
|
||||
Use the following command
|
||||
Use the following command:
|
||||
|
||||
.. prompt:: bash [branch-under-review]$
|
||||
|
||||
@ -521,9 +529,9 @@ Using ptl-tool to Generate Merge Commits
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Another method of generating merge commits involves using Patrick Donnelly's
|
||||
**ptl-tool** pull commits. This tool can be found at
|
||||
**/ceph/src/script/ptl-tool.py**. Merge commits that have been generated by
|
||||
the **ptl-tool** have the following form::
|
||||
``ptl-tool`` to pull commits. This tool can be found at
|
||||
``/ceph/src/script/ptl-tool.py``. Merge commits that have been generated by the
|
||||
``ptl-tool`` have the following form::
|
||||
|
||||
Merge PR #36257 into main
|
||||
* refs/pull/36257/head:
|
||||
@ -547,7 +555,8 @@ push`` command, you will see the following error message:
|
||||
git push --set-upstream origin {x}
|
||||
|
||||
To set up git to automatically create the upstream branch that corresponds to
|
||||
the branch in your local working copy, run this command from within the
|
||||
the branch in your local working copy (without having to add the option
|
||||
``--set-upstream origin x`` every time), run this command from within the
|
||||
``ceph/`` directory:
|
||||
|
||||
.. prompt:: bash $
|
||||
@ -569,7 +578,7 @@ Deleting a Branch Remotely
|
||||
|
||||
To delete the branch named ``remoteBranchName`` from the remote upstream branch
|
||||
(which is also your fork of ``ceph/ceph``, as described in :ref:`forking`), run
|
||||
a command of this form:
|
||||
a command of the following form:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
@ -580,7 +589,8 @@ Searching a File Longitudinally for a String
|
||||
|
||||
To search for the commit that introduced a given string (in this example, that
|
||||
string is ``foo``) into a given file (in this example, that file is
|
||||
``file.rst``), run a command of this form:
|
||||
``file.rst``), use the ``-S <string>`` option. Run a command of the following
|
||||
form:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
|
@ -2423,8 +2423,10 @@ also manually invoked: ``tox -e openapi-check``.
|
||||
If that checker failed, it means that the current Pull Request is modifying the
|
||||
Ceph API and therefore:
|
||||
|
||||
#. The versioned OpenAPI specification should be updated explicitly: ``tox -e openapi-fix``.
|
||||
#. The team @ceph/api will be requested for reviews (this is automated via GitHub CODEOWNERS), in order to asses the impact of changes.
|
||||
#. The versioned OpenAPI specification should be updated explicitly: ``tox -e
|
||||
openapi-fix``.
|
||||
#. The team @ceph/api will be requested for reviews (this is automated via
|
||||
GitHub CODEOWNERS), in order to assess the impact of changes.
|
||||
|
||||
Additionally, Sphinx documentation can be generated from the OpenAPI
|
||||
specification with ``tox -e openapi-doc``.
|
||||
|
@ -8,25 +8,14 @@ Leads
|
||||
-----
|
||||
|
||||
The Ceph project was created by Sage Weil and is led by the Ceph Leadership
|
||||
Team (CLT). In addition, each major project component has its own lead. The
|
||||
following table shows all the leads and their nicks on `GitHub`_:
|
||||
Team (CLT). Each major component of the Ceph project has its own lead. The
|
||||
`team list`_ on the Ceph community website shows all the leads and their nicks
|
||||
on `GitHub`_:
|
||||
|
||||
.. _github: https://github.com/
|
||||
.. _team list: https://ceph.io/en/community/team
|
||||
|
||||
========= ================== =============
|
||||
Scope Lead GitHub nick
|
||||
========= ================== =============
|
||||
RADOS Radoslaw Zarzynski rzarzynski
|
||||
RGW Casey Bodley cbodley
|
||||
RGW Matt Benjamin mattbenjamin
|
||||
RBD Ilya Dryomov dis
|
||||
CephFS Venky Shankar vshankar
|
||||
Dashboard Nizamudeen A nizamial09
|
||||
Build/Ops Ken Dreyer ktdreyer
|
||||
Docs Zac Dover zdover23
|
||||
========= ================== =============
|
||||
|
||||
The Ceph-specific acronyms in the table are explained in
|
||||
Ceph-specific acronyms in the table of leads are explained in
|
||||
:doc:`/architecture`.
|
||||
|
||||
History
|
||||
@ -87,6 +76,8 @@ click on `New issue`_.
|
||||
.. _`jump to the Ceph project`: http://tracker.ceph.com/projects/ceph
|
||||
.. _`New issue`: http://tracker.ceph.com/projects/ceph/issues/new
|
||||
|
||||
.. _ceph-slack:
|
||||
|
||||
Slack
|
||||
-----
|
||||
|
||||
|
@ -52,12 +52,35 @@ Running your first test
|
||||
The Python tests in Ceph repository can be executed on your local machine
|
||||
using `vstart_runner.py`_. To do that, you'd need `teuthology`_ installed::
|
||||
|
||||
$ git clone https://github.com/ceph/teuthology
|
||||
$ cd teuthology
|
||||
$ ./bootstrap install
|
||||
|
||||
This will create a virtual environment named ``virtualenv`` in root of the
|
||||
teuthology repository and install teuthology in it.
|
||||
|
||||
You can also install teuthology via ``pip`` if you would like to install it
|
||||
in a custom virtual environment with clone `teuthology`_ repository using
|
||||
``git``::
|
||||
|
||||
$ virtualenv --python=python3 venv
|
||||
$ source venv/bin/activate
|
||||
$ pip install 'setuptools >= 12'
|
||||
$ pip install teuthology[test]@git+https://github.com/ceph/teuthology
|
||||
$ deactivate
|
||||
|
||||
If for some unforeseen reason above approaches do no work (maybe boostrap
|
||||
script doesn't work due to a bug or you can't download tethology at the
|
||||
moment) teuthology can be installed manually manually from copy of
|
||||
teuthology repo already present on your machine::
|
||||
|
||||
$ cd teuthology
|
||||
$ virtualenv -p python3 venv
|
||||
$ source venv/bin/activate
|
||||
$ pip install -r requirements.txt
|
||||
$ pip install .
|
||||
$ deactivate
|
||||
|
||||
The above steps installs teuthology in a virtual environment. Before running
|
||||
a test locally, build Ceph successfully from the source (refer
|
||||
:doc:`/install/build-ceph`) and do::
|
||||
|
@ -1,3 +1,5 @@
|
||||
.. _dev-testing-unit-tests:
|
||||
|
||||
Testing - unit tests
|
||||
====================
|
||||
|
||||
|
@ -30,36 +30,35 @@ by a programmer by implementing the ``encode`` and ``decode`` methods.
|
||||
|
||||
Principles for format change
|
||||
----------------------------
|
||||
It is not unusual that the format of serialization changes. This
|
||||
process requires careful attention from during both development
|
||||
It is not unusual for the format of serialization to change. This
|
||||
process requires careful attention both during development
|
||||
and review.
|
||||
|
||||
The general rule is that a decoder must understand what had been
|
||||
encoded by an encoder. Most of the problems come from ensuring
|
||||
that compatibility continues between old decoders and new encoders
|
||||
as well as new decoders and old decoders. One should assume
|
||||
that -- if not otherwise derogated -- any mix (old/new) is
|
||||
possible in a cluster. There are 2 main reasons for that:
|
||||
The general rule is that a decoder must understand what has been encoded by an
|
||||
encoder. Most difficulties arise during the process of ensuring the continuity
|
||||
of compatibility of old decoders with new encoders, and ensuring the continuity
|
||||
of compatibility of new decoders with old decoders. One should assume -- if not
|
||||
otherwise specified -- that any mix of old and new is possible in a cluster.
|
||||
There are two primary concerns:
|
||||
|
||||
1. Upgrades. Although there are recommendations related to the order
|
||||
of entity types (mons/osds/clients), it is not mandatory and
|
||||
no assumption should be made about it.
|
||||
2. Huge variability of client versions. It was always the case
|
||||
that kernel (and thus kernel clients) upgrades are decoupled
|
||||
from Ceph upgrades. Moreover, proliferation of containerization
|
||||
bring the variability even to e.g. ``librbd`` -- now user space
|
||||
libraries live on the container own.
|
||||
1. **Upgrades.** Although there are recommendations related to the order of
|
||||
entity types (mons/OSDs/clients), it is not mandatory and no assumption
|
||||
should be made.
|
||||
2. **Huge variability of client versions.** It has always been the case that
|
||||
kernel upgrades (and thus kernel clients) are decoupled from Ceph upgrades.
|
||||
Containerization brings variability even to ``librbd`` -- now user space
|
||||
libraries live in the container itself:
|
||||
|
||||
With this being said, there are few rules limiting the degree
|
||||
of interoperability between dencoders:
|
||||
There are a few rules limiting the degree of interoperability between
|
||||
dencoders:
|
||||
|
||||
* ``n-2`` for dencoding between daemons,
|
||||
* ``n-3`` hard requirement for client-involved scenarios,
|
||||
* ``n-3..`` soft requirements for clinet-involved scenarios. Ideally
|
||||
every client should be able to talk any version of daemons.
|
||||
* ``n-3`` hard requirement for client scenarios,
|
||||
* ``n-3..`` soft requirement for client scenarios. Ideally every client should
|
||||
be able to talk to any version of daemons.
|
||||
|
||||
As the underlying reasons are the same, the rules dencoders
|
||||
follow are virtually the same as for deprecations of our features
|
||||
As the underlying reasons are the same, the rules that dencoders
|
||||
follow are nearly the same as the rules for deprecations of our features
|
||||
bits. See the ``Notes on deprecation`` in ``src/include/ceph_features.h``.
|
||||
|
||||
Frameworks
|
||||
@ -163,7 +162,7 @@ macro.
|
||||
The append-extendability of our dencoders is a result of the forward
|
||||
compatibility that the ``ENCODE_START`` and ``DECODE_FINISH`` macros bring.
|
||||
|
||||
They are implementing extendibility facilities. An encoder, when filling
|
||||
They are implementing extensibility facilities. An encoder, when filling
|
||||
the bufferlist, prepends three fields: version of the current format,
|
||||
minimal version of a decoder compatible with it and the total size of
|
||||
all encoded fields.
|
||||
|
@ -20,12 +20,10 @@ development-mode Ceph cluster::
|
||||
|
||||
The ``dev@ceph.io`` list is for discussion about the development of Ceph,
|
||||
its interoperability with other technology, and the operations of the
|
||||
project itself. Subscribe by sending a message to ``dev-request@ceph.io``
|
||||
with the line::
|
||||
project itself. Subscribe by sending a message to ``dev-join@ceph.io``
|
||||
with the word `subscribe` in the subject.
|
||||
|
||||
subscribe ceph-devel
|
||||
|
||||
in the body of the message.
|
||||
Alternatively you can visit https://lists.ceph.io and register.
|
||||
|
||||
The ceph-devel@vger.kernel.org list is for discussion
|
||||
and patch review for the Linux kernel Ceph client component.
|
||||
|
@ -207,6 +207,8 @@ The actual dump is similar to the schema, except that average values are grouped
|
||||
Labeled Perf Counters
|
||||
---------------------
|
||||
|
||||
.. note:: Labeled perf counters were introduced in the Reef release of Ceph.
|
||||
|
||||
A Ceph daemon has the ability to emit a set of perf counter instances with varying labels. These counters are intended for visualizing specific metrics in 3rd party tools like Prometheus and Grafana.
|
||||
|
||||
For example, the below counters show the number of put requests for different users on different buckets::
|
||||
|
@ -105,6 +105,7 @@ Code cleanup
|
||||
`ceph_release_t::*`)
|
||||
- [ ] search code for `require_osd_release`
|
||||
- [ ] search code for `min_mon_release`
|
||||
- [ ] check include/denc.h if DENC_START macro still needs reference to squid
|
||||
|
||||
QA suite
|
||||
--------
|
||||
@ -158,6 +159,6 @@ First release candidate
|
||||
First stable release
|
||||
====================
|
||||
|
||||
- [ ] src/ceph_release: change type `stable`
|
||||
- [x] src/ceph_release: change type `stable`
|
||||
- [ ] generate new object corpus for encoding/decoding tests - see :doc:`corpus`
|
||||
- [ ] src/cephadm/cephadmlib/constants.py: update `LATEST_STABLE_RELEASE`
|
||||
|
@ -106,11 +106,13 @@ NOTE: if for some reason the build has to be restarted (for example if one distr
|
||||
+-------------------+--------------------------------------------------+
|
||||
| Release | Distro Codemap |
|
||||
+===================+==================================================+
|
||||
| pacific (16.X.X) | ``focal bionic centos8 buster bullseye`` |
|
||||
| pacific (16.X.X) | ``focal bionic buster bullseye`` |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| quincy (17.X.X) | ``focal centos8 centos9 bullseye`` |
|
||||
| quincy (17.X.X) | ``jammy focal centos9 bullseye`` |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| reef (18.X.X) | ``jammy focal centos8 centos9 windows bookworm`` |
|
||||
| reef (18.X.X) | ``jammy focal centos9 windows bookworm`` |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| squid (19.X.X) | ``jammy centos9 windows bookworm`` |
|
||||
+-------------------+--------------------------------------------------+
|
||||
|
||||
5. Click ``Build``.
|
||||
@ -185,11 +187,11 @@ See `the Ceph Tracker wiki page that explains how to write the release notes <ht
|
||||
|
||||
.. prompt:: bash
|
||||
|
||||
sign-rpms octopus
|
||||
sign-rpms ceph octopus
|
||||
|
||||
Example::
|
||||
|
||||
$ sign-rpms octopus
|
||||
$ sign-rpms ceph octopus
|
||||
Checking packages in: /opt/repos/ceph/octopus-15.2.17/centos/7
|
||||
signing: /opt/repos/ceph/octopus-15.2.17/centos/7/SRPMS/ceph-release-1-1.el7.src.rpm
|
||||
/opt/repos/ceph/octopus-15.2.17/centos/7/SRPMS/ceph-release-1-1.el7.src.rpm:
|
||||
@ -203,7 +205,13 @@ See `the Ceph Tracker wiki page that explains how to write the release notes <ht
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
sync-push octopus
|
||||
sync-push ceph octopus
|
||||
|
||||
This leaves the packages in a password-protected prerelease area
|
||||
at https://download.ceph.com/prerelease/ceph. Verify them from there.
|
||||
When done and ready for release, mv the directories to the release
|
||||
directory (that is, "mv <whatever you're promoting> ../..".
|
||||
|
||||
|
||||
5. Build Containers
|
||||
===================
|
||||
|
@ -90,6 +90,11 @@
|
||||
object store, RADOS. See :ref:`CephFS Architecture
|
||||
<arch-cephfs>` for more details.
|
||||
|
||||
:ref:`ceph-fuse <man-ceph-fuse>`
|
||||
:ref:`ceph-fuse <man-ceph-fuse>` is a FUSE ("**F**\ilesystem in
|
||||
**USE**\rspace") client for CephFS. ceph-fuse mounts a Ceph FS
|
||||
ata specified mount point.
|
||||
|
||||
Ceph Interim Release
|
||||
See :term:`Releases`.
|
||||
|
||||
@ -326,6 +331,14 @@
|
||||
<https://ceph.io/assets/pdfs/CawthonKeyValueStore.pdf>`_ (17
|
||||
pages).
|
||||
|
||||
OpenStack Swift
|
||||
In the context of Ceph, OpenStack Swift is one of the two APIs
|
||||
supported by the Ceph Object Store. The other API supported by
|
||||
the Ceph Object Store is S3.
|
||||
|
||||
See `the OpenStack Storage API overview page
|
||||
<https://docs.openstack.org/swift/latest/api/object_api_v1_overview.html>`_.
|
||||
|
||||
OSD
|
||||
Probably :term:`Ceph OSD`, but not necessarily. Sometimes
|
||||
(especially in older correspondence, and especially in
|
||||
@ -384,6 +397,12 @@
|
||||
Firefly (v. 0.80). See :ref:`Primary Affinity
|
||||
<rados_ops_primary_affinity>`.
|
||||
|
||||
:ref:`Prometheus <mgr-prometheus>`
|
||||
An open-source monitoring and alerting toolkit. Ceph offers a
|
||||
:ref:`"Prometheus module" <mgr-prometheus>`, which provides a
|
||||
Prometheus exporter that passes performance counters from a
|
||||
collection point in ``ceph-mgr`` to Prometheus.
|
||||
|
||||
Quorum
|
||||
Quorum is the state that exists when a majority of the
|
||||
:ref:`Monitors<arch_monitor>` in the cluster are ``up``. A
|
||||
@ -452,6 +471,14 @@
|
||||
provides a gateway to both the Amazon S3 RESTful API and the
|
||||
OpenStack Swift API.
|
||||
|
||||
S3
|
||||
In the context of Ceph, S3 is one of the two APIs supported by
|
||||
the Ceph Object Store. The other API supported by the Ceph
|
||||
Object Store is OpenStack Swift.
|
||||
|
||||
See `the Amazon S3 overview page
|
||||
<https://aws.amazon.com/s3/>`_.
|
||||
|
||||
scrubs
|
||||
|
||||
The processes by which Ceph ensures data integrity. During the
|
||||
@ -488,6 +515,9 @@
|
||||
which will exit upon completion (it is not intended to
|
||||
daemonize)
|
||||
|
||||
Swift
|
||||
See :term:`OpenStack Swift`.
|
||||
|
||||
Teuthology
|
||||
The collection of software that performs scripted tests on Ceph.
|
||||
|
||||
|
@ -101,7 +101,7 @@ about Ceph, see our `Architecture`_ section.
|
||||
:maxdepth: 3
|
||||
:hidden:
|
||||
|
||||
start/intro
|
||||
start/index
|
||||
install/index
|
||||
cephadm/index
|
||||
rados/index
|
||||
|
@ -1,5 +1,7 @@
|
||||
:orphan:
|
||||
|
||||
.. _man-ceph-fuse:
|
||||
|
||||
=========================================
|
||||
ceph-fuse -- FUSE-based client for ceph
|
||||
=========================================
|
||||
|
@ -1631,13 +1631,13 @@ Usage::
|
||||
Options
|
||||
=======
|
||||
|
||||
.. option:: -i infile
|
||||
.. option:: -i infile, --in-file=infile
|
||||
|
||||
will specify an input file to be passed along as a payload with the
|
||||
command to the monitor cluster. This is only used for specific
|
||||
monitor commands.
|
||||
|
||||
.. option:: -o outfile
|
||||
.. option:: -o outfile, --out-file=outfile
|
||||
|
||||
will write any payload returned by the monitor cluster with its
|
||||
reply to outfile. Only specific monitor commands (e.g. osd getmap)
|
||||
|
@ -1,5 +1,7 @@
|
||||
:orphan:
|
||||
|
||||
.. _cephfs-shell:
|
||||
|
||||
===================================================
|
||||
cephfs-shell -- Shell-like tool talking with CephFS
|
||||
===================================================
|
||||
|
@ -6,33 +6,29 @@ Ceph Dashboard
|
||||
Overview
|
||||
--------
|
||||
|
||||
The Ceph Dashboard is a built-in web-based Ceph management and monitoring
|
||||
application through which you can inspect and administer various aspects
|
||||
and resources within the cluster. It is implemented as a :ref:`ceph-manager-daemon` module.
|
||||
The Ceph Dashboard is a web-based Ceph management-and-monitoring tool that can
|
||||
be used to inspect and administer resources in the cluster. It is implemented
|
||||
as a :ref:`ceph-manager-daemon` module.
|
||||
|
||||
The original Ceph Dashboard that was shipped with Ceph Luminous started
|
||||
out as a simple read-only view into run-time information and performance
|
||||
data of Ceph clusters. It used a very simple architecture to achieve the
|
||||
original goal. However, there was growing demand for richer web-based
|
||||
management capabilities, to make it easier to administer Ceph for users that
|
||||
prefer a WebUI over the CLI.
|
||||
The original Ceph Dashboard shipped with Ceph Luminous and was a simple
|
||||
read-only view into the run-time information and performance data of Ceph
|
||||
clusters. It had a simple architecture. However, demand grew for richer,
|
||||
web-based management capabilities for users who prefer a WebUI over the CLI.
|
||||
|
||||
The new :term:`Ceph Dashboard` module adds web-based monitoring and
|
||||
administration to the Ceph Manager. The architecture and functionality of this new
|
||||
module are derived from
|
||||
and inspired by the `openATTIC Ceph management and monitoring tool
|
||||
<https://openattic.org/>`_. Development is actively driven by the
|
||||
openATTIC team at `SUSE <https://www.suse.com/>`_, with support from
|
||||
companies including `Red Hat <https://redhat.com/>`_ and members of the Ceph
|
||||
community.
|
||||
The :term:`Ceph Dashboard` module adds web-based monitoring and administration
|
||||
to the Ceph Manager. The architecture and functionality of this new module are
|
||||
derived from the `openATTIC Ceph management and monitoring tool
|
||||
<https://openattic.org/>`_. Development was originally driven by the openATTIC
|
||||
team at `SUSE <https://www.suse.com/>`_, with support from members of the Ceph
|
||||
community and from companies including `Red Hat <https://redhat.com/>`_.
|
||||
|
||||
The dashboard module's backend code uses the CherryPy framework and implements
|
||||
a custom REST API. The WebUI implementation is based on
|
||||
Angular/TypeScript and includes both functionality from the original dashboard
|
||||
and new features originally developed for the standalone version
|
||||
of openATTIC. The Ceph Dashboard module is implemented as an
|
||||
application that provides a graphical representation of information and statistics
|
||||
through a web server hosted by ``ceph-mgr``.
|
||||
The dashboard module's backend code uses the CherryPy framework, and implements
|
||||
a custom REST API. The WebUI implementation is based on Angular/TypeScript and
|
||||
includes both functionality from the original dashboard and new features
|
||||
originally developed for the standalone version of openATTIC. The Ceph
|
||||
Dashboard module is implemented as an application that provides a graphical
|
||||
representation of information and statistics through a web server hosted by
|
||||
``ceph-mgr``.
|
||||
|
||||
Feature Overview
|
||||
^^^^^^^^^^^^^^^^
|
||||
@ -1243,19 +1239,29 @@ code of standby dashboards. To do so you need to run the command:
|
||||
Resolve IP address to hostname before redirect
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The redirect from a standby to the active dashboard is done via the IP
|
||||
address. This is done because resolving IP addresses to hostnames can be error
|
||||
prone in containerized environments. It is also the reason why the option is
|
||||
Redirection from a standby dashboard to the active dashboard is done via the
|
||||
manager's IP address, not via the manager's hostname. In virtualized
|
||||
environments, IP-address-based redirection reduces the incidence of error as
|
||||
compared to hostname-based resolution. Because of the increased risk of error
|
||||
due to hostname-based resolution, the option for hostname resolution is
|
||||
disabled by default.
|
||||
|
||||
However, in some situations it might be helpful to redirect via the hostname.
|
||||
For example if the configured TLS certificate matches only the hostnames. To
|
||||
activate the redirection via the hostname run the following command::
|
||||
For example, if the configured TLS certificate matches only the hostnames and
|
||||
not the IP addresses of those hosts, hostname redirection would be preferable.
|
||||
|
||||
$ ceph config set mgr mgr/dashboard/redirect_resolve_ip_addr True
|
||||
To activate redirection from standby dashboards to active dashboards via the
|
||||
manager's hostname, run the following command:
|
||||
|
||||
You can disable it again by::
|
||||
.. prompt:: bash $
|
||||
|
||||
$ ceph config set mgr mgr/dashboard/redirect_resolve_ip_addr False
|
||||
ceph config set mgr mgr/dashboard/redirect_resolve_ip_addr True
|
||||
|
||||
Disable hostname redirection by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/dashboard/redirect_resolve_ip_addr False
|
||||
|
||||
.. warning::
|
||||
|
||||
|
@ -56,7 +56,7 @@ distributions, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo yum install librados2-devel
|
||||
sudo yum install librados2-devel libradospp-devel
|
||||
|
||||
Once you install ``librados`` for developers, you can find the required
|
||||
headers for C/C++ under ``/usr/include/rados``:
|
||||
|
@ -21,6 +21,13 @@ which is configured by the *mon_dns_srv_name* configuration directive.
|
||||
|
||||
.. confval:: mon_dns_srv_name
|
||||
|
||||
.. note:: Instead of using a DNS search domain, it is possible to manually
|
||||
designate the search domain by passing the search domain's name followed by
|
||||
an underscore to ``mon_dns_srv_name``. The syntax for this is
|
||||
``<service-name>_<upper-level-domain>``. For example, passing
|
||||
``ceph-mon_example.com`` will direct Ceph to look for the ``SRV`` record at
|
||||
``_ceph-mon._tcp.example.com``.
|
||||
|
||||
Example
|
||||
-------
|
||||
When the DNS search domain is set to *example.com* a DNS zone file might contain the following elements.
|
||||
|
@ -273,6 +273,7 @@ and subnets for the public network. You may specifically assign static IP
|
||||
addresses or override ``public_network`` settings using the ``public_addr``
|
||||
setting for a specific daemon.
|
||||
|
||||
.. confval:: public_network_interface
|
||||
.. confval:: public_network
|
||||
.. confval:: public_addr
|
||||
|
||||
@ -285,6 +286,7 @@ specifically assign static IP addresses or override ``cluster_network``
|
||||
settings using the ``cluster_addr`` setting for specific OSD daemons.
|
||||
|
||||
|
||||
.. confval:: cluster_network_interface
|
||||
.. confval:: cluster_network
|
||||
.. confval:: cluster_addr
|
||||
|
||||
|
@ -102,9 +102,9 @@ This hook is passed several arguments (see below). The hook outputs a single
|
||||
line to ``stdout`` that contains the CRUSH location description. The arguments
|
||||
resemble the following:::
|
||||
|
||||
--cluster CLUSTER --id ID --type TYPE
|
||||
--id ID --type TYPE
|
||||
|
||||
Here the cluster name is typically ``ceph``, the ``id`` is the daemon
|
||||
Here the ``id`` is the daemon
|
||||
identifier or (in the case of OSDs) the OSD number, and the daemon type is
|
||||
``osd``, ``mds``, ``mgr``, or ``mon``.
|
||||
|
||||
|
@ -39,6 +39,8 @@ CRUSH algorithm.
|
||||
erasure-code
|
||||
cache-tiering
|
||||
placement-groups
|
||||
pg-states
|
||||
pg-concepts
|
||||
upmap
|
||||
read-balancer
|
||||
balancer
|
||||
|
@ -130,6 +130,58 @@ your CRUSH map. This procedure shows how to do this.
|
||||
step emit
|
||||
}
|
||||
|
||||
.. warning:: If a CRUSH rule is defined for a stretch mode cluster and the
|
||||
rule has multiple "takes" in it, then ``MAX AVAIL`` for the pools
|
||||
associated with the CRUSH rule will report that the available size is all
|
||||
of the available space from the datacenter, not the available space for
|
||||
the pools associated with the CRUSH rule.
|
||||
|
||||
For example, consider a cluster with two CRUSH rules, ``stretch_rule`` and
|
||||
``stretch_replicated_rule``::
|
||||
|
||||
rule stretch_rule {
|
||||
id 1
|
||||
type replicated
|
||||
step take DC1
|
||||
step chooseleaf firstn 2 type host
|
||||
step emit
|
||||
step take DC2
|
||||
step chooseleaf firstn 2 type host
|
||||
step emit
|
||||
}
|
||||
|
||||
rule stretch_replicated_rule {
|
||||
id 2
|
||||
type replicated
|
||||
step take default
|
||||
step choose firstn 0 type datacenter
|
||||
step chooseleaf firstn 2 type host
|
||||
step emit
|
||||
}
|
||||
|
||||
In the above example, ``stretch_rule`` will report an incorrect value for
|
||||
``MAX AVAIL``. ``stretch_replicated_rule`` will report the correct value.
|
||||
This is because ``stretch_rule`` is defined in such a way that
|
||||
``PGMap::get_rule_avail`` considers only the available size of a single
|
||||
data center, and not (as would be correct) the total available size from
|
||||
both datacenters.
|
||||
|
||||
Here is a workaround. Instead of defining the stretch rule as defined in
|
||||
the ``stretch_rule`` function above, define it as follows::
|
||||
|
||||
rule stretch_rule {
|
||||
id 2
|
||||
type replicated
|
||||
step take default
|
||||
step choose firstn 0 type datacenter
|
||||
step chooseleaf firstn 2 type host
|
||||
step emit
|
||||
}
|
||||
|
||||
See https://tracker.ceph.com/issues/56650 for more detail on this workaround.
|
||||
|
||||
*The above procedure was developed in May and June of 2024 by Prashant Dhange.*
|
||||
|
||||
#. Inject the CRUSH map to make the rule available to the cluster:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
@ -133,10 +133,14 @@ Understanding mon_status
|
||||
|
||||
The status of a Monitor (as reported by the ``ceph tell mon.X mon_status``
|
||||
command) can be obtained via the admin socket. The ``ceph tell mon.X
|
||||
mon_status`` command outputs a great deal of information about the monitor
|
||||
mon_status`` command outputs a great deal of information about the monitor
|
||||
(including the information found in the output of the ``quorum_status``
|
||||
command).
|
||||
|
||||
.. note:: The command ``ceph tell mon.X mon_status`` is not meant to be input
|
||||
literally. The ``X`` portion of ``mon.X`` is meant to be replaced with a
|
||||
value specific to your Ceph cluster when you run the command.
|
||||
|
||||
To understand this command's output, let us consider the following example, in
|
||||
which we see the output of ``ceph tell mon.c mon_status``::
|
||||
|
||||
@ -165,24 +169,24 @@ which we see the output of ``ceph tell mon.c mon_status``::
|
||||
"name": "c",
|
||||
"addr": "127.0.0.1:6795\/0"}]}}
|
||||
|
||||
This output reports that there are three monitors in the monmap (*a*, *b*, and
|
||||
*c*), that quorum is formed by only two monitors, and that *c* is in quorum as
|
||||
a *peon*.
|
||||
This output reports that there are three monitors in the monmap (``a``, ``b``,
|
||||
and ``c``), that quorum is formed by only two monitors, and that ``c`` is a
|
||||
``peon``.
|
||||
|
||||
**Which monitor is out of quorum?**
|
||||
|
||||
The answer is **a** (that is, ``mon.a``). ``mon.a`` is out of quorum.
|
||||
The answer is ``a`` (that is, ``mon.a``). ``mon.a`` is out of quorum.
|
||||
|
||||
**How do we know, in this example, that mon.a is out of quorum?**
|
||||
|
||||
We know that ``mon.a`` is out of quorum because it has rank 0, and Monitors
|
||||
with rank 0 are by definition out of quorum.
|
||||
We know that ``mon.a`` is out of quorum because it has rank ``0``, and
|
||||
Monitors with rank ``0`` are by definition out of quorum.
|
||||
|
||||
If we examine the ``quorum`` set, we can see that there are clearly two
|
||||
monitors in the set: *1* and *2*. But these are not monitor names. They are
|
||||
monitor ranks, as established in the current ``monmap``. The ``quorum`` set
|
||||
does not include the monitor that has rank 0, and according to the ``monmap``
|
||||
that monitor is ``mon.a``.
|
||||
monitors in the set: ``1`` and ``2``. But these are not monitor names. They
|
||||
are monitor ranks, as established in the current ``monmap``. The ``quorum``
|
||||
set does not include the monitor that has rank ``0``, and according to the
|
||||
``monmap`` that monitor is ``mon.a``.
|
||||
|
||||
**How are monitor ranks determined?**
|
||||
|
||||
@ -192,7 +196,7 @@ a *peon*.
|
||||
case, because ``127.0.0.1:6789`` (``mon.a``) is numerically less than the
|
||||
other two ``IP:PORT`` combinations (which are ``127.0.0.1:6790`` for "Monitor
|
||||
b" and ``127.0.0.1:6795`` for "Monitor c"), ``mon.a`` has the highest rank:
|
||||
namely, rank 0.
|
||||
namely, rank ``0``.
|
||||
|
||||
|
||||
Most Common Monitor Issues
|
||||
@ -316,6 +320,12 @@ detail`` returns a message similar to the following::
|
||||
|
||||
**What does it mean when a Monitor's state is ``leader`` or ``peon``?**
|
||||
|
||||
During normal Ceph operations when the cluster is in the ``HEALTH_OK`` state,
|
||||
one monitor in the Ceph cluster is in the ``leader`` state and the rest of
|
||||
the monitors are in the ``peon`` state. The state of a given monitor can be
|
||||
determined by examining the value of the state key returned by the command
|
||||
``ceph tell <mon_name> mon_status``.
|
||||
|
||||
If ``ceph health detail`` shows that the Monitor is in the ``leader`` state
|
||||
or in the ``peon`` state, it is likely that clock skew is present. Follow the
|
||||
instructions in `Clock Skews`_. If you have followed those instructions and
|
||||
@ -388,7 +398,11 @@ B. **Inject a monmap into the monitor.**
|
||||
|
||||
In this example, the ID of the stopped Monitor is ``ID-FOO``.
|
||||
|
||||
#. Stop the Monitor into which the ``monmap`` will be injected.
|
||||
#. Stop the Monitor into which the ``monmap`` will be injected:
|
||||
|
||||
.. prompt:: bash
|
||||
|
||||
service ceph -a stop mon.{mon-id}
|
||||
|
||||
#. Inject the monmap into the stopped Monitor:
|
||||
|
||||
|
@ -655,7 +655,7 @@ Events from the OSD as it processes ops:
|
||||
is now being performed.
|
||||
- ``waiting for subops from``: The op has been sent to replica OSDs.
|
||||
|
||||
Events from ```Filestore```:
|
||||
Events from ``Filestore``:
|
||||
|
||||
- ``commit_queued_for_journal_write``: The op has been given to the FileStore.
|
||||
- ``write_thread_in_journal_buffer``: The op is in the journal's buffer and is waiting
|
||||
@ -667,7 +667,7 @@ Events from the OSD after data has been given to underlying storage:
|
||||
|
||||
- ``op_commit``: The op has been committed (that is, written to journal) by the
|
||||
primary OSD.
|
||||
- ``op_applied``: The op has been `write()'en
|
||||
- ``op_applied``: The op has been `written with write()
|
||||
<https://www.freebsd.org/cgi/man.cgi?write(2)>`_ to the backing FS (that is,
|
||||
applied in memory but not flushed out to disk) on the primary.
|
||||
- ``sub_op_applied``: ``op_applied``, but for a replica's "subop".
|
||||
@ -676,8 +676,9 @@ Events from the OSD after data has been given to underlying storage:
|
||||
hears about the above, but for a particular replica (i.e. ``<X>``).
|
||||
- ``commit_sent``: We sent a reply back to the client (or primary OSD, for sub ops).
|
||||
|
||||
Some of these events may appear redundant, but they cross important boundaries
|
||||
in the internal code (such as passing data across locks into new threads).
|
||||
Although some of these events may appear redundant, they cross important
|
||||
boundaries in the internal code (such as passing data across locks into new
|
||||
threads).
|
||||
|
||||
|
||||
Flapping OSDs
|
||||
|
@ -36,60 +36,87 @@ For example, one may use s3cmd to set or delete a policy thus::
|
||||
Limitations
|
||||
===========
|
||||
|
||||
Currently, we support only the following actions:
|
||||
.. note:: This list of S3 actions is accurate only for the Squid release of
|
||||
Ceph. If you are using a different release of Ceph, the list of supported S3
|
||||
actions will be different.
|
||||
|
||||
- s3:AbortMultipartUpload
|
||||
- s3:CreateBucket
|
||||
- s3:DeleteBucketPolicy
|
||||
- s3:DeleteBucket
|
||||
- s3:DeleteBucketWebsite
|
||||
- s3:DeleteObject
|
||||
- s3:DeleteObjectVersion
|
||||
- s3:DeleteReplicationConfiguration
|
||||
- s3:GetAccelerateConfiguration
|
||||
- s3:GetBucketAcl
|
||||
- s3:GetBucketCORS
|
||||
- s3:GetBucketLocation
|
||||
- s3:GetBucketLogging
|
||||
- s3:GetBucketNotification
|
||||
- s3:GetBucketPolicy
|
||||
- s3:GetBucketRequestPayment
|
||||
- s3:GetBucketTagging
|
||||
- s3:GetBucketVersioning
|
||||
- s3:GetBucketWebsite
|
||||
- s3:GetLifecycleConfiguration
|
||||
- s3:GetObjectAcl
|
||||
- s3:GetObject
|
||||
- s3:GetObjectTorrent
|
||||
- s3:GetObjectVersionAcl
|
||||
- s3:GetObjectVersion
|
||||
- s3:GetObjectVersionTorrent
|
||||
- s3:GetReplicationConfiguration
|
||||
- s3:IPAddress
|
||||
- s3:NotIpAddress
|
||||
- s3:ListAllMyBuckets
|
||||
- s3:ListBucketMultipartUploads
|
||||
- s3:ListBucket
|
||||
- s3:ListBucketVersions
|
||||
- s3:ListMultipartUploadParts
|
||||
- s3:PutAccelerateConfiguration
|
||||
- s3:PutBucketAcl
|
||||
- s3:PutBucketCORS
|
||||
- s3:PutBucketLogging
|
||||
- s3:PutBucketNotification
|
||||
- s3:PutBucketPolicy
|
||||
- s3:PutBucketRequestPayment
|
||||
- s3:PutBucketTagging
|
||||
- s3:PutBucketVersioning
|
||||
- s3:PutBucketWebsite
|
||||
- s3:PutLifecycleConfiguration
|
||||
- s3:PutObjectAcl
|
||||
- s3:PutObject
|
||||
- s3:PutObjectVersionAcl
|
||||
- s3:PutReplicationConfiguration
|
||||
- s3:RestoreObject
|
||||
In Squid only the following actions are supported:
|
||||
|
||||
We do not yet support setting policies on users, groups, or roles.
|
||||
- ``s3:GetObject``
|
||||
- ``s3:GetObjectVersion``
|
||||
- ``s3:PutObject``
|
||||
- ``s3:GetObjectAcl``
|
||||
- ``s3:GetObjectVersionAcl``
|
||||
- ``s3:PutObjectAcl``
|
||||
- ``s3:PutObjectVersionAcl``
|
||||
- ``s3:DeleteObject``
|
||||
- ``s3:DeleteObjectVersion``
|
||||
- ``s3:ListMultipartUploadParts``
|
||||
- ``s3:AbortMultipartUpload``
|
||||
- ``s3:GetObjectTorrent``
|
||||
- ``s3:GetObjectVersionTorrent``
|
||||
- ``s3:RestoreObject``
|
||||
- ``s3:CreateBucket``
|
||||
- ``s3:DeleteBucket``
|
||||
- ``s3:ListBucket``
|
||||
- ``s3:ListBucketVersions``
|
||||
- ``s3:ListAllMyBuckets``
|
||||
- ``s3:ListBucketMultipartUploads``
|
||||
- ``s3:GetAccelerateConfiguration``
|
||||
- ``s3:PutAccelerateConfiguration``
|
||||
- ``s3:GetBucketAcl``
|
||||
- ``s3:PutBucketAcl``
|
||||
- ``s3:GetBucketOwnershipControls``
|
||||
- ``s3:PutBucketOwnershipControls``
|
||||
- ``s3:GetBucketCORS``
|
||||
- ``s3:PutBucketCORS``
|
||||
- ``s3:GetBucketVersioning``
|
||||
- ``s3:PutBucketVersioning``
|
||||
- ``s3:GetBucketRequestPayment``
|
||||
- ``s3:PutBucketRequestPayment``
|
||||
- ``s3:GetBucketLocation``
|
||||
- ``s3:GetBucketPolicy``
|
||||
- ``s3:DeleteBucketPolicy``
|
||||
- ``s3:PutBucketPolicy``
|
||||
- ``s3:GetBucketNotification``
|
||||
- ``s3:PutBucketNotification``
|
||||
- ``s3:GetBucketLogging``
|
||||
- ``s3:PutBucketLogging``
|
||||
- ``s3:GetBucketTagging``
|
||||
- ``s3:PutBucketTagging``
|
||||
- ``s3:GetBucketWebsite``
|
||||
- ``s3:PutBucketWebsite``
|
||||
- ``s3:DeleteBucketWebsite``
|
||||
- ``s3:GetLifecycleConfiguration``
|
||||
- ``s3:PutLifecycleConfiguration``
|
||||
- ``s3:PutReplicationConfiguration``
|
||||
- ``s3:GetReplicationConfiguration``
|
||||
- ``s3:DeleteReplicationConfiguration``
|
||||
- ``s3:GetObjectTagging``
|
||||
- ``s3:PutObjectTagging``
|
||||
- ``s3:DeleteObjectTagging``
|
||||
- ``s3:GetObjectVersionTagging``
|
||||
- ``s3:PutObjectVersionTagging``
|
||||
- ``s3:DeleteObjectVersionTagging``
|
||||
- ``s3:PutBucketObjectLockConfiguration``
|
||||
- ``s3:GetBucketObjectLockConfiguration``
|
||||
- ``s3:PutObjectRetention``
|
||||
- ``s3:GetObjectRetention``
|
||||
- ``s3:PutObjectLegalHold``
|
||||
- ``s3:GetObjectLegalHold``
|
||||
- ``s3:BypassGovernanceRetention``
|
||||
- ``s3:GetBucketPolicyStatus``
|
||||
- ``s3:PutPublicAccessBlock``
|
||||
- ``s3:GetPublicAccessBlock``
|
||||
- ``s3:DeletePublicAccessBlock``
|
||||
- ``s3:GetBucketPublicAccessBlock``
|
||||
- ``s3:PutBucketPublicAccessBlock``
|
||||
- ``s3:DeleteBucketPublicAccessBlock``
|
||||
- ``s3:GetBucketEncryption``
|
||||
- ``s3:PutBucketEncryption``
|
||||
- ``s3:DescribeJob``
|
||||
- ``s3:objectlambdaGetObject``
|
||||
- ``s3:objectlambdaListBucket``
|
||||
|
||||
We use the RGW ‘tenant’ identifier in place of the Amazon twelve-digit
|
||||
account ID. In the future we may allow you to assign an account ID to
|
||||
|
@ -28,8 +28,8 @@ Storage Classes
|
||||
|
||||
.. versionadded:: Nautilus
|
||||
|
||||
Storage classes are used to customize the placement of object data. S3 Bucket
|
||||
Lifecycle rules can automate the transition of objects between storage classes.
|
||||
Storage classes specify the placement of object data. S3 Bucket
|
||||
Lifecycle (LC) rules can automate the transition of objects between storage classes.
|
||||
|
||||
Storage classes are defined in terms of placement targets. Each zonegroup
|
||||
placement target lists its available storage classes with an initial class
|
||||
@ -125,7 +125,7 @@ Then provide the zone placement info for that target:
|
||||
--data-extra-pool default.rgw.temporary.non-ec
|
||||
|
||||
.. note:: With default placement target settings, RGW stores an object's first data chunk in the RADOS "head" object along
|
||||
with xattr metadata. The `--placement-inline-data=false` flag may be passed with the `zone placement add` or
|
||||
with XATTR metadata. The `--placement-inline-data=false` flag may be passed with the `zone placement add` or
|
||||
`zone placement modify` commands to change this behavior for new objects stored on the target.
|
||||
When data is stored inline (default), it may provide an advantage for read/write workloads since the first chunk of
|
||||
an object's data can be retrieved/stored in a single librados call along with object metadata. On the other hand, a
|
||||
@ -139,7 +139,7 @@ Then provide the zone placement info for that target:
|
||||
Adding a Storage Class
|
||||
----------------------
|
||||
|
||||
To add a new storage class named ``GLACIER`` to the ``default-placement`` target,
|
||||
To add a new storage class named ``STANDARD_IA`` to the ``default-placement`` target,
|
||||
start by adding it to the zonegroup:
|
||||
|
||||
::
|
||||
@ -147,7 +147,7 @@ start by adding it to the zonegroup:
|
||||
$ radosgw-admin zonegroup placement add \
|
||||
--rgw-zonegroup default \
|
||||
--placement-id default-placement \
|
||||
--storage-class GLACIER
|
||||
--storage-class STANDARD_IA
|
||||
|
||||
Then provide the zone placement info for that storage class:
|
||||
|
||||
@ -156,7 +156,7 @@ Then provide the zone placement info for that storage class:
|
||||
$ radosgw-admin zone placement add \
|
||||
--rgw-zone default \
|
||||
--placement-id default-placement \
|
||||
--storage-class GLACIER \
|
||||
--storage-class STANDARD_IA \
|
||||
--data-pool default.rgw.glacier.data \
|
||||
--compression lz4
|
||||
|
||||
@ -252,12 +252,19 @@ name in an HTTP header with the request. The S3 protocol uses the
|
||||
``X-Amz-Storage-Class`` header, while the Swift protocol uses the
|
||||
``X-Object-Storage-Class`` header.
|
||||
|
||||
When using AWS S3 SDKs such as ``boto3``, it is important that non-default
|
||||
storage class names match those provided by AWS S3, or else the SDK
|
||||
will drop the request and raise an exception.
|
||||
|
||||
S3 Object Lifecycle Management can then be used to move object data between
|
||||
storage classes using ``Transition`` actions.
|
||||
|
||||
When using AWS S3 SDKs such as ``boto3``, it is important that
|
||||
storage class names match those provided by AWS S3, or else the SDK
|
||||
will drop the request and raise an exception. Moreover, some S3 clients
|
||||
and libraries expect AWS-specific behavior when a storage class named
|
||||
or prefixed with ``GLACIER`` is used and thus will fail when accessing
|
||||
Ceph RGW services. For this reason we advise that other storage class
|
||||
names be used with Ceph, including ``INTELLIGENT-TIERING``, ``STANDARD_IA``,
|
||||
``REDUCED_REDUNDANCY``, and ``ONEZONE_IA``. Custom storage class names like
|
||||
``CHEAPNDEEP`` are accepted by Ceph but might not be by some clients and
|
||||
libraries.
|
||||
|
||||
.. _`Pools`: ../pools
|
||||
.. _`Multisite Configuration`: ../multisite
|
||||
|
@ -8,46 +8,32 @@ user. RGW supports canned ACLs.
|
||||
|
||||
Authentication
|
||||
--------------
|
||||
Authenticating a request requires including an access key and a Hash-based
|
||||
Message Authentication Code (HMAC) in the request before it is sent to the
|
||||
RGW server. RGW uses an S3-compatible authentication approach.
|
||||
Requests are authenticated with AWS Signatures which are derived from the
|
||||
user's credentials (S3 access key and secret key).
|
||||
|
||||
::
|
||||
Most S3 clients and AWS SDKs will generate these signatures for you, given the
|
||||
necessary credentials. When issuing raw http requests, these signatures must be
|
||||
added manually.
|
||||
|
||||
HTTP/1.1
|
||||
PUT /buckets/bucket/object.mpeg
|
||||
Host: cname.domain.com
|
||||
Date: Mon, 2 Jan 2012 00:01:01 +0000
|
||||
Content-Encoding: mpeg
|
||||
Content-Length: 9999999
|
||||
AWS Signature v4
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Authorization: AWS {access-key}:{hash-of-header-and-secret}
|
||||
Please refer to the official documentation in `Authenticating Requests (AWS Signature Version 4)`_.
|
||||
|
||||
In the foregoing example, replace ``{access-key}`` with the value for your access
|
||||
key ID followed by a colon (``:``). Replace ``{hash-of-header-and-secret}`` with
|
||||
a hash of the header string and the secret corresponding to the access key ID.
|
||||
The following values of the `x-amz-content-sha256` request header are supported:
|
||||
|
||||
To generate the hash of the header string and secret, you must:
|
||||
* Actual payload checksum value
|
||||
* `UNSIGNED-PAYLOAD`
|
||||
* `STREAMING-UNSIGNED-PAYLOAD-TRAILER`
|
||||
* `STREAMING-AWS4-HMAC-SHA256-PAYLOAD`
|
||||
* `STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER`
|
||||
|
||||
#. Get the value of the header string.
|
||||
#. Normalize the request header string into canonical form.
|
||||
#. Generate an HMAC using a SHA-1 hashing algorithm.
|
||||
See `RFC 2104`_ and `HMAC`_ for details.
|
||||
#. Encode the ``hmac`` result as base-64.
|
||||
AWS Signature v2
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
To normalize the header into canonical form:
|
||||
Please refer to the official documentation in `Authenticating Requests (AWS Signature Version 2)`_.
|
||||
|
||||
#. Get all fields beginning with ``x-amz-``.
|
||||
#. Ensure that the fields are all lowercase.
|
||||
#. Sort the fields lexicographically.
|
||||
#. Combine multiple instances of the same field name into a
|
||||
single field and separate the field values with a comma.
|
||||
#. Replace white space and line breaks in field values with a single space.
|
||||
#. Remove white space before and after colons.
|
||||
#. Append a new line after each field.
|
||||
#. Merge the fields back into the header.
|
||||
|
||||
Replace the ``{hash-of-header-and-secret}`` with the base-64 encoded HMAC string.
|
||||
.. note:: While v2 signatures have been deprecated in AWS, RGW continues to support them.
|
||||
|
||||
Authentication against OpenStack Keystone
|
||||
-----------------------------------------
|
||||
@ -231,5 +217,5 @@ play. This is one of the many reasons that you should use S3 bucket
|
||||
policies rather than S3 ACLs when possible.
|
||||
|
||||
|
||||
.. _RFC 2104: http://www.ietf.org/rfc/rfc2104.txt
|
||||
.. _HMAC: https://en.wikipedia.org/wiki/HMAC
|
||||
.. _Authenticating Requests (AWS Signature Version 4): https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html
|
||||
.. _Authenticating requests (AWS signature version 2): https://docs.aws.amazon.com/AmazonS3/latest/userguide/auth-request-sig-v2.html
|
||||
|
@ -11,4 +11,4 @@ network for the gateway. For hardware recommendations, see
|
||||
|
||||
.. note:: On the NVMe-oF gateway, the memory footprint is a function of the
|
||||
number of mapped RBD images and can grow to be large. Plan memory
|
||||
requirements accordingly based on the number RBD images to be mapped.
|
||||
requirements accordingly based on the number of RBD images to be mapped.
|
||||
|
@ -25,10 +25,10 @@ Explanation
|
||||
The Ceph NVMe-oF gateway is both an NVMe-oF target and a Ceph client. Think of
|
||||
it as a "translator" between Ceph's RBD interface and the NVME-oF protocol. The
|
||||
Ceph NVMe-oF gateway can run on a standalone node or be colocated with other
|
||||
daemons, for example on a Ceph Object Store Disk (OSD) node. When colocating
|
||||
the Ceph NVMe-oF gateway with other daemons, ensure that sufficient CPU and
|
||||
memory are available. The steps below explain how to install and configure the
|
||||
Ceph NVMe/TCP gateway for basic operation.
|
||||
daemons, for example on an OSD node. When colocating the Ceph NVMe-oF gateway
|
||||
with other daemons, ensure that sufficient CPU and memory are available.
|
||||
The steps below explain how to install and configure the Ceph NVMe/TCP gateway
|
||||
for basic operation.
|
||||
|
||||
|
||||
Installation
|
||||
@ -52,7 +52,7 @@ Complete the following steps to install the Ceph NVME-oF gateway:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply nvmeof NVME-OF_POOL_NAME --placment="host01, host02"
|
||||
ceph orch apply nvmeof NVME-OF_POOL_NAME --placement="host01, host02"
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
@ -60,6 +60,10 @@ Exclusive locking is mostly transparent to the user:
|
||||
exclusive lock. This is exposed by the ``--exclusive`` option for ``rbd
|
||||
device map`` command.
|
||||
|
||||
.. note::
|
||||
The ``exclusive-lock`` feature is incompatible with RBD advisory locks
|
||||
(the ``rbd lock add`` and ``rbd lock rm`` commands).
|
||||
|
||||
|
||||
Blocklisting
|
||||
============
|
||||
|
102
ceph/doc/start/beginners-guide.rst
Normal file
102
ceph/doc/start/beginners-guide.rst
Normal file
@ -0,0 +1,102 @@
|
||||
==========================
|
||||
Beginner's Guide
|
||||
==========================
|
||||
|
||||
The purpose of A Beginner's Guide to Ceph is to make Ceph comprehensible.
|
||||
|
||||
Ceph is a clustered and distributed storage manager. If that's too cryptic,
|
||||
then just think of Ceph as a computer program that stores data and uses a
|
||||
network to make sure that there is a backup copy of the data.
|
||||
|
||||
Storage Interfaces
|
||||
------------------
|
||||
|
||||
Ceph offers several "storage interfaces", which is another
|
||||
way of saying "ways of storing data". These storage interfaces include:
|
||||
- CephFS (a file system)
|
||||
- RBD (block devices)
|
||||
- RADOS (an object store).
|
||||
|
||||
Deep down, though, all three of these are really RADOS object stores. CephFS
|
||||
and RBD are just presenting themselves as file systems and block devices.
|
||||
|
||||
Storage Manager: What is It?
|
||||
----------------------------
|
||||
|
||||
Ceph is a clustered and distributed storage manager that offers data
|
||||
redundancy. This sentence might be too cryptic for first-time readers of the
|
||||
Ceph Beginner's Guide, so let's explain all of the terms in it:
|
||||
|
||||
- **Storage Manager.** Ceph is a storage manager. This means that Ceph is
|
||||
software that helps storage resources store data. Storage resources come in
|
||||
several forms: hard disk drives (HDD), solid-state drives (SSD), magnetic
|
||||
tape, floppy disks, punched tape, Hollerith-style punch cards, and magnetic
|
||||
drum memory are all forms of storage resources. In this beginner's guide,
|
||||
we'll focus on hard disk drives (HDD) and solid-state drives (SSD).
|
||||
- **Clustered storage manager.** Ceph is a clustered storage manager. That
|
||||
means that the storage manager installed not just on a single machine but on
|
||||
several machines that work together as a system.
|
||||
- **Distributed storage manager.** Ceph is a clustered and distributed storage
|
||||
manager. That means that the data that is stored and the infrastructure that
|
||||
supports it is spread across multiple machines and is not centralized in a
|
||||
single machine. To better understand what distributed means in this context,
|
||||
it might be helpful to describe what it is not: it is not a system ISCSI,
|
||||
which is a system that exposes a single logical disk over the network in a
|
||||
1:1 (one-to-one) mapping.
|
||||
- **Data Redundancy.** Having a second copy of your data somewhere.
|
||||
|
||||
Ceph Monitor
|
||||
------------
|
||||
|
||||
The Ceph Monitor is one of the daemons essential to the functioning of a Ceph
|
||||
cluster. Monitors know the location of all the data in the Ceph cluster.
|
||||
Monitors maintain maps of the cluster state, and those maps make it possible
|
||||
for Ceph daemons to work together. These maps include the monitor map, the OSD
|
||||
map, the MDS map, and the CRUSH map. Three monitors are required to reach
|
||||
quorum. Quorum is a state that is necessary for a Ceph cluster to work
|
||||
properly. Quorum means that a majority of the monitors are in the "up" state.
|
||||
|
||||
MANAGER
|
||||
-------
|
||||
The manager balances the data in the Ceph cluster, distributing load evenly so
|
||||
that no part of the cluster gets overloaded. The manager is one of the daemons
|
||||
essential to the functioning of the Ceph cluster. Managers keep track of
|
||||
runtime metrics, system utilization, CPU performance, disk load, and they host
|
||||
the Ceph dashboard web GUI.
|
||||
|
||||
OSD
|
||||
---
|
||||
|
||||
Object Storage Daemons (OSDs) store objects.
|
||||
|
||||
An OSD is a process that runs on a storage server. The OSD is responsible for
|
||||
managing a single unit of storage, which is usually a single disk.
|
||||
|
||||
POOLS
|
||||
-----
|
||||
|
||||
A pool is an abstraction that can be designated as either "replicated" or
|
||||
"erasure coded". In Ceph, the method of data protection is set at the pool
|
||||
level. Ceph offers and supports two types of data protection: replication and
|
||||
erasure coding. Objects are stored in pools. "A storage pool is a collection of
|
||||
storage volumes. A storage volume is the basic unit of storage, such as
|
||||
allocated space on a disk or a single tape cartridge. The server uses the
|
||||
storage volumes to store backed-up, archived, or space-managed files." (IBM
|
||||
Tivoli Storage Manager, Version 7.1, "Storage Pools")
|
||||
|
||||
PLACEMENT GROUPS
|
||||
----------------
|
||||
|
||||
Placement groups are a part of pools.
|
||||
|
||||
MDS
|
||||
---
|
||||
A metadata server (MDS) is necessary for the proper functioning of CephFS.
|
||||
See :ref:`orchestrator-cli-cephfs` and :ref:`arch-cephfs`.
|
||||
|
||||
LINKS
|
||||
-----
|
||||
|
||||
#. `Ceph Wiki (requires Ceph Redmine Tracker account) <https://tracker.ceph.com/projects/ceph/wiki>`_
|
||||
#. `Sage Weil's 27 June 2019 "Intro To Ceph" tech talk (1h27m) <https://www.youtube.com/watch?v=PmLPbrf-x9g>`_
|
||||
#. `Sage Weil's 2018 talk "Ceph, the Future of Storage" (27m) <https://www.youtube.com/watch?v=szE4Hg1eXoA>`_
|
@ -9,7 +9,7 @@ These are exciting times in the Ceph community! Get involved!
|
||||
+----------------------+-------------------------------------------------+-----------------------------------------------+
|
||||
|Channel | Description | Contact Info |
|
||||
+======================+=================================================+===============================================+
|
||||
| **Blog** | Check the Ceph Blog_ periodically to keep track | http://ceph.com/community/blog/ |
|
||||
| **Blog** | Check the Ceph Blog_ periodically to keep track | https://ceph.com/community/blog/ |
|
||||
| | of Ceph progress and important announcements. | |
|
||||
+----------------------+-------------------------------------------------+-----------------------------------------------+
|
||||
| **Planet Ceph** | Check the blog aggregation on Planet Ceph for | https://old.ceph.com/category/planet/ |
|
||||
@ -38,14 +38,14 @@ These are exciting times in the Ceph community! Get involved!
|
||||
+----------------------+-------------------------------------------------+-----------------------------------------------+
|
||||
| **User List** | Ask and answer user-related questions by | |
|
||||
| | subscribing to the email list at | - `User Subscribe`_ |
|
||||
| | ceph-users@ceph.io. You can opt out of the email| - `User Unsubscribe`_ |
|
||||
| | list at any time by unsubscribing. A simple | - `User Archives`_ |
|
||||
| | ceph-users@ceph.io. You can opt out of the email| - `User Archives`_ |
|
||||
| | list at any time by unsubscribing. A simple | |
|
||||
| | email is all it takes! | |
|
||||
+----------------------+-------------------------------------------------+-----------------------------------------------+
|
||||
| **Devel List** | Keep in touch with developer activity by | |
|
||||
| | subscribing to the email list at dev@ceph.io. | - `Devel Subscribe`_ |
|
||||
| | You can opt out of the email list at any time by| - `Devel Unsubscribe`_ |
|
||||
| | unsubscribing. A simple email is all it takes! | - `Devel Archives`_ |
|
||||
| | You can opt out of the email list at any time by| - `Devel Archives`_ |
|
||||
| | unsubscribing. A simple email is all it takes! | |
|
||||
+----------------------+-------------------------------------------------+-----------------------------------------------+
|
||||
| **Kernel Client** | Linux kernel-related traffic, including kernel | - `Kernel Client Subscribe`_ |
|
||||
| | patches and discussion of implementation details| - `Kernel Client Unsubscribe`_ |
|
||||
@ -66,13 +66,13 @@ These are exciting times in the Ceph community! Get involved!
|
||||
| | opt out of the email list at any time by | - `Community Unsubscribe`_ |
|
||||
| | unsubscribing. A simple email is all it takes! | - `Mailing list archives`_ |
|
||||
+----------------------+-------------------------------------------------+-----------------------------------------------+
|
||||
| **Bug Tracker** | You can help keep Ceph production worthy by | http://tracker.ceph.com/projects/ceph |
|
||||
| **Bug Tracker** | You can help keep Ceph production worthy by | https://tracker.ceph.com/projects/ceph |
|
||||
| | filing and tracking bugs, and providing feature | |
|
||||
| | requests using the Bug Tracker_. | |
|
||||
+----------------------+-------------------------------------------------+-----------------------------------------------+
|
||||
| **Source Code** | If you would like to participate in | |
|
||||
| | development, bug fixing, or if you just want | - http://github.com/ceph/ceph |
|
||||
| | the very latest code for Ceph, you can get it | - http://download.ceph.com/tarballs/ |
|
||||
| | development, bug fixing, or if you just want | - https://github.com/ceph/ceph |
|
||||
| | the very latest code for Ceph, you can get it | - https://download.ceph.com/tarballs/ |
|
||||
| | at http://github.com. See `Ceph Source Code`_ | |
|
||||
| | for details on cloning from github. | |
|
||||
+----------------------+-------------------------------------------------+-----------------------------------------------+
|
||||
@ -81,12 +81,10 @@ These are exciting times in the Ceph community! Get involved!
|
||||
|
||||
|
||||
|
||||
.. _Devel Subscribe: mailto:dev-request@ceph.io?body=subscribe
|
||||
.. _Devel Unsubscribe: mailto:dev-request@ceph.io?body=unsubscribe
|
||||
.. _Devel Subscribe: https://lists.ceph.io/postorius/lists/dev.ceph.io/
|
||||
.. _Kernel Client Subscribe: mailto:majordomo@vger.kernel.org?body=subscribe+ceph-devel
|
||||
.. _Kernel Client Unsubscribe: mailto:majordomo@vger.kernel.org?body=unsubscribe+ceph-devel
|
||||
.. _User Subscribe: mailto:ceph-users-request@ceph.io?body=subscribe
|
||||
.. _User Unsubscribe: mailto:ceph-users-request@ceph.io?body=unsubscribe
|
||||
.. _User Subscribe: https://lists.ceph.io/postorius/lists/ceph-users.ceph.io/
|
||||
.. _Community Subscribe: mailto:ceph-community-join@lists.ceph.com
|
||||
.. _Community Unsubscribe: mailto:ceph-community-leave@lists.ceph.com
|
||||
.. _Commit Subscribe: mailto:ceph-commit-join@lists.ceph.com
|
||||
|
@ -78,6 +78,7 @@ recover dynamically.
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
Beginner's Guide <beginners-guide>
|
||||
Hardware Recommendations <hardware-recommendations>
|
||||
OS Recommendations <os-recommendations>
|
||||
|
||||
@ -95,3 +96,4 @@ recover dynamically.
|
||||
|
||||
get-involved
|
||||
documenting-ceph
|
||||
|
@ -43,40 +43,46 @@ distribution that includes a supported kernel and supported system startup
|
||||
framework, for example ``sysvinit`` or ``systemd``. Ceph is sometimes ported to
|
||||
non-Linux systems but these are not supported by the core Ceph effort.
|
||||
|
||||
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
| | Reef (18.2.z) | Quincy (17.2.z) | Pacific (16.2.z) | Octopus (15.2.z) |
|
||||
+===============+===============+=================+==================+==================+
|
||||
| Centos 7 | | | A | B |
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
| Centos 8 | A | A | A | A |
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
| Centos 9 | A | | | |
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
| Debian 10 | C | | C | C |
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
| Debian 11 | C | C | C | |
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
| OpenSUSE 15.2 | C | | C | C |
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
| OpenSUSE 15.3 | C | C | | |
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
| Ubuntu 18.04 | | | C | C |
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
| Ubuntu 20.04 | A | A | A | A |
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
| Ubuntu 22.04 | A | | | |
|
||||
+---------------+---------------+-----------------+------------------+------------------+
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
| | Reef (18.2.z) | Quincy (17.2.z) | Pacific (16.2.z) | Octopus (15.2.z) |
|
||||
+===============+===============+==================+==================+==================+
|
||||
| Centos 7 | | | | B |
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
| Centos 8 | | | | |
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
| Centos 9 | A H | A :sup:`1` H | | |
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
| Debian 10 | C | | C | C |
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
| Debian 11 | C | C | C | |
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
| OpenSUSE 15.2 | C | | C | C |
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
| OpenSUSE 15.3 | C | C | | |
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
| Ubuntu 18.04 | | | C | C |
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
| Ubuntu 20.04 | A | A | A | A |
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
| Ubuntu 22.04 | A H | | | |
|
||||
+---------------+---------------+------------------+------------------+------------------+
|
||||
|
||||
- **A**: Ceph provides packages and has done comprehensive tests on the software in them.
|
||||
- **B**: Ceph provides packages and has done basic tests on the software in them.
|
||||
- **C**: Ceph provides packages only. No tests have been done on these releases.
|
||||
- **H**: Ceph tests this distribution as a container host.
|
||||
- **1**: Testing has been done on Centos 9 starting on version 17.2.8 for Quincy.
|
||||
|
||||
.. note::
|
||||
**For Centos 7 Users**
|
||||
|
||||
``Btrfs`` is no longer tested on Centos 7 in the Octopus release. We recommend using ``bluestore`` instead.
|
||||
|
||||
.. note:: See the list of QAed container hosts in the Ceph repository here:
|
||||
`List of Container Hosts
|
||||
<https://github.com/ceph/ceph/tree/main/qa/distros/supported-container-hosts>`_.
|
||||
|
||||
|
||||
.. _CRUSH Tunables: ../../rados/operations/crush-map#tunables
|
||||
|
||||
.. _Mounting CephFS using Kernel Driver: ../../cephfs/mount-using-kernel-driver#which-kernel-version
|
||||
|
@ -502,14 +502,14 @@ else
|
||||
install_cortx_motr_on_ubuntu
|
||||
fi
|
||||
;;
|
||||
rocky|centos|fedora|rhel|ol|virtuozzo)
|
||||
almalinux|rocky|centos|fedora|rhel|ol|virtuozzo)
|
||||
builddepcmd="dnf -y builddep --allowerasing"
|
||||
echo "Using dnf to install dependencies"
|
||||
case "$ID" in
|
||||
fedora)
|
||||
$SUDO dnf install -y dnf-utils
|
||||
;;
|
||||
rocky|centos|rhel|ol|virtuozzo)
|
||||
almalinux|rocky|centos|rhel|ol|virtuozzo)
|
||||
MAJOR_VERSION="$(echo $VERSION_ID | cut -d. -f1)"
|
||||
$SUDO dnf install -y dnf-utils selinux-policy-targeted
|
||||
rpm --quiet --query epel-release || \
|
||||
|
@ -3,7 +3,7 @@
|
||||
dashboardTags: ['ceph-mixin'],
|
||||
|
||||
clusterLabel: 'cluster',
|
||||
showMultiCluster: false,
|
||||
showMultiCluster: true,
|
||||
|
||||
CephNodeNetworkPacketDropsThreshold: 0.005,
|
||||
CephNodeNetworkPacketDropsPerSec: 10,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -35,9 +35,6 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('mds_servers',
|
||||
'$datasource',
|
||||
@ -57,7 +54,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'none',
|
||||
'Reads(-) / Writes (+)',
|
||||
0,
|
||||
'sum(rate(ceph_objecter_op_r{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % $.matchers(),
|
||||
'sum(rate(ceph_objecter_op_r{ceph_daemon=~"($mds_servers).*", %(matchers)s}[$__rate_interval]))' % $.matchers(),
|
||||
'Read Ops',
|
||||
0,
|
||||
1,
|
||||
@ -65,7 +62,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
9
|
||||
)
|
||||
.addTarget($.addTargetSchema(
|
||||
'sum(rate(ceph_objecter_op_w{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % $.matchers(),
|
||||
'sum(rate(ceph_objecter_op_w{ceph_daemon=~"($mds_servers).*", %(matchers)s}[$__rate_interval]))' % $.matchers(),
|
||||
'Write Ops'
|
||||
))
|
||||
.addSeriesOverride(
|
||||
@ -78,7 +75,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'none',
|
||||
'Client Requests',
|
||||
0,
|
||||
'ceph_mds_server_handle_client_request{%(matchers)s, ceph_daemon=~"($mds_servers).*"}' % $.matchers(),
|
||||
'ceph_mds_server_handle_client_request{ceph_daemon=~"($mds_servers).*", %(matchers)s}' % $.matchers(),
|
||||
'{{ceph_daemon}}',
|
||||
12,
|
||||
1,
|
||||
|
@ -41,13 +41,10 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('osd_hosts',
|
||||
'$datasource',
|
||||
'label_values(ceph_disk_occupation{%(matchers)s}, exported_instance)' % $.matchers(),
|
||||
'label_values(ceph_osd_metadata{%(matchers)s}, hostname)' % $.matchers(),
|
||||
1,
|
||||
true,
|
||||
1,
|
||||
@ -57,7 +54,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('mon_hosts',
|
||||
'$datasource',
|
||||
'label_values(ceph_mon_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
|
||||
'label_values(ceph_mon_metadata{%(matchers)s}, hostname)' % $.matchers(),
|
||||
1,
|
||||
true,
|
||||
1,
|
||||
@ -67,7 +64,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('mds_hosts',
|
||||
'$datasource',
|
||||
'label_values(ceph_mds_inodes{%(matchers)s}, ceph_daemon)' % $.matchers(),
|
||||
'label_values(ceph_mds_inodes{hostname, %(matchers)s})' % $.matchers(),
|
||||
1,
|
||||
true,
|
||||
1,
|
||||
@ -77,7 +74,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('rgw_hosts',
|
||||
'$datasource',
|
||||
'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
|
||||
'label_values(ceph_rgw_metadata{hostname, %(matchers)s})' % $.matchers(),
|
||||
1,
|
||||
true,
|
||||
1,
|
||||
@ -188,7 +185,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
"instance", "$1", "instance", "([^.:]*).*"
|
||||
) * on(instance, device) group_left(ceph_daemon) label_replace(
|
||||
label_replace(
|
||||
ceph_disk_occupation_human{%(matchers)s, instance=~"($osd_hosts).*"},
|
||||
ceph_disk_occupation_human{instance=~"($osd_hosts).*", %(matchers)s},
|
||||
"device", "$1", "device", "/dev/(.*)"
|
||||
), "instance", "$1", "instance", "([^.:]*).*"
|
||||
)
|
||||
@ -209,17 +206,17 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
|||
|
||||
sum (
|
||||
(
|
||||
rate(node_network_receive_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or
|
||||
rate(node_network_receive_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval])
|
||||
rate(node_network_receive_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
|
||||
rate(node_network_receive_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
|
||||
) unless on (device, instance)
|
||||
label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
|
||||
label_replace((node_bonding_slaves > 0), "device", "$1", "master", "(.+)")
|
||||
) +
|
||||
sum (
|
||||
(
|
||||
rate(node_network_transmit_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or
|
||||
rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval])
|
||||
rate(node_network_transmit_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
|
||||
rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
|
||||
) unless on (device, instance)
|
||||
label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
|
||||
label_replace((node_bonding_slaves > 0), "device", "$1", "master", "(.+)")
|
||||
)
|
||||
|||,
|
||||
true,
|
||||
@ -271,7 +268,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
rate(node_network_transmit_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
|
||||
rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
|
||||
) unless on (device, instance)
|
||||
label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)"))
|
||||
label_replace((node_bonding_slaves > 0), "device", "$1", "master", "(.+)"))
|
||||
))
|
||||
|||,
|
||||
'{{instance}}',
|
||||
@ -312,18 +309,15 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('ceph_hosts',
|
||||
'$datasource',
|
||||
if $._config.showMultiCluster then ('label_values({%(clusterMatcher)s}, instance)' % $.matchers()) else 'label_values(instance)',
|
||||
'label_values({__name__=~"ceph_.+_metadata", %(matchers)s}, hostname)' % $.matchers(),
|
||||
1,
|
||||
false,
|
||||
3,
|
||||
'Hostname',
|
||||
'([^.:]*).*')
|
||||
true,
|
||||
1,
|
||||
null,
|
||||
'([^.]*).*')
|
||||
)
|
||||
.addPanels([
|
||||
$.addRowSchema(false, true, '$ceph_hosts System Overview') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } },
|
||||
@ -332,7 +326,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'OSDs',
|
||||
'',
|
||||
'current',
|
||||
"count(sum by (ceph_daemon) (ceph_osd_metadata{%(matchers)s, hostname='$ceph_hosts'}))" % $.matchers(),
|
||||
'count(sum by (ceph_daemon) (ceph_osd_metadata{%(matchers)s}))' % $.matchers(),
|
||||
null,
|
||||
'time_series',
|
||||
0,
|
||||
@ -532,7 +526,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
|||
|
||||
sum(
|
||||
ceph_osd_stat_bytes{%(matchers)s} and
|
||||
on (ceph_daemon) ceph_disk_occupation{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"}
|
||||
on (ceph_daemon) ceph_disk_occupation{instance=~"($ceph_hosts)([\\\\.:].*)?", %(matchers)s}
|
||||
)
|
||||
||| % $.matchers(),
|
||||
null,
|
||||
@ -709,7 +703,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
rate(node_disk_io_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) * 100
|
||||
), "instance", "$1", "instance", "([^:.]*).*"
|
||||
) * on(instance, device) group_left(ceph_daemon) label_replace(
|
||||
label_replace(ceph_disk_occupation_human{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"},
|
||||
label_replace(ceph_disk_occupation_human{instance=~"($ceph_hosts)([\\\\.:].*)?", %(matchers)s},
|
||||
"device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*"
|
||||
)
|
||||
||| % $.matchers(),
|
||||
@ -786,7 +780,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
$.addTargetSchema(
|
||||
|||
|
||||
topk(10,
|
||||
(sum by (instance)(ceph_daemon_health_metrics{type="SLOW_OPS", ceph_daemon=~"osd.*"}))
|
||||
(sum by (instance)(ceph_daemon_health_metrics{type="SLOW_OPS", ceph_daemon=~"osd.*", %(matchers)s}))
|
||||
)
|
||||
||| % $.matchers(),
|
||||
'',
|
||||
|
@ -42,9 +42,6 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addPanels([
|
||||
$.simpleGraphPanel(
|
||||
{ '@95%ile': '#e0752d' },
|
||||
@ -317,7 +314,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'count(ceph_bluefs_wal_total_bytes{%(matchers)s})' % $.matchers(), 'bluestore', 'time_series', 2
|
||||
))
|
||||
.addTarget($.addTargetSchema(
|
||||
'absent(ceph_bluefs_wal_total_bytes{job=~"$job"}) * count(ceph_osd_metadata{job=~"$job"})' % $.matchers(), 'filestore', 'time_series', 2
|
||||
'absent(ceph_bluefs_wal_total_bytes{%(matchers)s}) * count(ceph_osd_metadata{%(matchers)s})' % $.matchers(), 'filestore', 'time_series', 2
|
||||
)),
|
||||
$.pieChartPanel('OSD Size Summary', 'The pie chart shows the various OSD sizes used within the cluster', '$datasource', { x: 8, y: 8, w: 4, h: 8 }, 'table', 'bottom', true, ['percent'], { mode: 'single', sort: 'none' }, 'pie', ['percent', 'value'], 'palette-classic')
|
||||
.addTarget($.addTargetSchema(
|
||||
@ -554,9 +551,6 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('osd',
|
||||
'$datasource',
|
||||
@ -577,11 +571,11 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
's',
|
||||
'Read (-) / Write (+)',
|
||||
|||
|
||||
rate(ceph_osd_op_r_latency_sum{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval]) /
|
||||
rate(ceph_osd_op_r_latency_sum{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval]) /
|
||||
on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval])
|
||||
||| % $.matchers(),
|
||||
|||
|
||||
rate(ceph_osd_op_w_latency_sum{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval]) /
|
||||
rate(ceph_osd_op_w_latency_sum{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval]) /
|
||||
on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval])
|
||||
||| % $.matchers(),
|
||||
'read',
|
||||
@ -602,8 +596,8 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'',
|
||||
'short',
|
||||
'Read (-) / Write (+)',
|
||||
'rate(ceph_osd_op_r{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
|
||||
'rate(ceph_osd_op_w{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
|
||||
'rate(ceph_osd_op_r{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(),
|
||||
'rate(ceph_osd_op_w{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(),
|
||||
'Reads',
|
||||
'Writes',
|
||||
6,
|
||||
@ -619,8 +613,8 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'',
|
||||
'bytes',
|
||||
'Read (-) / Write (+)',
|
||||
'rate(ceph_osd_op_r_out_bytes{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
|
||||
'rate(ceph_osd_op_w_in_bytes{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
|
||||
'rate(ceph_osd_op_r_out_bytes{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(),
|
||||
'rate(ceph_osd_op_w_in_bytes{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(),
|
||||
'Read Bytes',
|
||||
'Write Bytes',
|
||||
12,
|
||||
@ -640,12 +634,12 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
|||
|
||||
(
|
||||
label_replace(
|
||||
rate(node_disk_read_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]) /
|
||||
rate(node_disk_reads_completed_total{%(clusterMatcher)s}[$__rate_interval]),
|
||||
rate(node_disk_read_time_seconds_total[$__rate_interval]) /
|
||||
rate(node_disk_reads_completed_total[$__rate_interval]),
|
||||
"instance", "$1", "instance", "([^:.]*).*"
|
||||
) and on (instance, device) label_replace(
|
||||
label_replace(
|
||||
ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
|
||||
ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s},
|
||||
"device", "$1", "device", "/dev/(.*)"
|
||||
), "instance", "$1", "instance", "([^:.]*).*"
|
||||
)
|
||||
@ -654,12 +648,12 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
|||
|
||||
(
|
||||
label_replace(
|
||||
rate(node_disk_write_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]) /
|
||||
rate(node_disk_writes_completed_total{%(clusterMatcher)s}[$__rate_interval]),
|
||||
rate(node_disk_write_time_seconds_total[$__rate_interval]) /
|
||||
rate(node_disk_writes_completed_total[$__rate_interval]),
|
||||
"instance", "$1", "instance", "([^:.]*).*") and on (instance, device)
|
||||
label_replace(
|
||||
label_replace(
|
||||
ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"
|
||||
ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)"
|
||||
), "instance", "$1", "instance", "([^:.]*).*"
|
||||
)
|
||||
)
|
||||
@ -681,22 +675,22 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'Read (-) / Write (+)',
|
||||
|||
|
||||
label_replace(
|
||||
rate(node_disk_writes_completed_total{%(clusterMatcher)s}[$__rate_interval]),
|
||||
rate(node_disk_writes_completed_total[$__rate_interval]),
|
||||
"instance", "$1", "instance", "([^:.]*).*"
|
||||
) and on (instance, device) label_replace(
|
||||
label_replace(
|
||||
ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
|
||||
ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s},
|
||||
"device", "$1", "device", "/dev/(.*)"
|
||||
), "instance", "$1", "instance", "([^:.]*).*"
|
||||
)
|
||||
||| % $.matchers(),
|
||||
|||
|
||||
label_replace(
|
||||
rate(node_disk_reads_completed_total{%(clusterMatcher)s}[$__rate_interval]),
|
||||
rate(node_disk_reads_completed_total[$__rate_interval]),
|
||||
"instance", "$1", "instance", "([^:.]*).*"
|
||||
) and on (instance, device) label_replace(
|
||||
label_replace(
|
||||
ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
|
||||
ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s},
|
||||
"device", "$1", "device", "/dev/(.*)"
|
||||
), "instance", "$1", "instance", "([^:.]*).*"
|
||||
)
|
||||
@ -718,20 +712,20 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'Read (-) / Write (+)',
|
||||
|||
|
||||
label_replace(
|
||||
rate(node_disk_read_bytes_total{%(clusterMatcher)s}[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
|
||||
rate(node_disk_read_bytes_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
|
||||
) and on (instance, device) label_replace(
|
||||
label_replace(
|
||||
ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
|
||||
ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s},
|
||||
"device", "$1", "device", "/dev/(.*)"
|
||||
), "instance", "$1", "instance", "([^:.]*).*"
|
||||
)
|
||||
||| % $.matchers(),
|
||||
|||
|
||||
label_replace(
|
||||
rate(node_disk_written_bytes_total{%(clusterMatcher)s}[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
|
||||
rate(node_disk_written_bytes_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
|
||||
) and on (instance, device) label_replace(
|
||||
label_replace(
|
||||
ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
|
||||
ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s},
|
||||
"device", "$1", "device", "/dev/(.*)"
|
||||
), "instance", "$1", "instance", "([^:.]*).*"
|
||||
)
|
||||
@ -763,11 +757,11 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTarget($.addTargetSchema(
|
||||
|||
|
||||
label_replace(
|
||||
rate(node_disk_io_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]),
|
||||
rate(node_disk_io_time_seconds_total[$__rate_interval]),
|
||||
"instance", "$1", "instance", "([^:.]*).*"
|
||||
) and on (instance, device) label_replace(
|
||||
label_replace(
|
||||
ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"
|
||||
ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)"
|
||||
), "instance", "$1", "instance", "([^:.]*).*"
|
||||
)
|
||||
||| % $.matchers(),
|
||||
|
@ -29,9 +29,6 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
g.template.custom(label='TopK',
|
||||
name='topk',
|
||||
@ -57,7 +54,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'Pools with Compression',
|
||||
'Count of the pools that have compression enabled',
|
||||
'current',
|
||||
'count(ceph_pool_metadata{%(matchers)s, compression_mode!="none"})' % $.matchers(),
|
||||
'count(ceph_pool_metadata{compression_mode!="none", %(matchers)s})' % $.matchers(),
|
||||
null,
|
||||
'',
|
||||
3,
|
||||
@ -510,7 +507,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
'ceph_pool_metadata{%(matchers)s, compression_mode!="none"}' % $.matchers(), 'K', 'table', 1, true
|
||||
'ceph_pool_metadata{compression_mode!="none", %(matchers)s}' % $.matchers(), 'K', 'table', 1, true
|
||||
),
|
||||
$.addTargetSchema('', 'L', '', '', null),
|
||||
]
|
||||
@ -622,9 +619,6 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('pool_name',
|
||||
'$datasource',
|
||||
@ -648,7 +642,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'.7,.8',
|
||||
|||
|
||||
(ceph_pool_stored{%(matchers)s} / (ceph_pool_stored{%(matchers)s} + ceph_pool_max_avail{%(matchers)s})) *
|
||||
on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
|
||||
on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'time_series',
|
||||
0,
|
||||
@ -668,7 +662,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
'current',
|
||||
|||
|
||||
(ceph_pool_max_avail{%(matchers)s} / deriv(ceph_pool_stored{%(matchers)s}[6h])) *
|
||||
on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} > 0
|
||||
on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s} > 0
|
||||
||| % $.matchers(),
|
||||
'time_series',
|
||||
7,
|
||||
@ -689,7 +683,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
null,
|
||||
|||
|
||||
deriv(ceph_pool_objects{%(matchers)s}[1m]) *
|
||||
on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
|
||||
on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'Objects per second',
|
||||
12,
|
||||
@ -709,7 +703,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
null,
|
||||
|||
|
||||
rate(ceph_pool_rd{%(matchers)s}[$__rate_interval]) *
|
||||
on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
|
||||
on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'reads',
|
||||
0,
|
||||
@ -722,7 +716,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
$.addTargetSchema(
|
||||
|||
|
||||
rate(ceph_pool_wr{%(matchers)s}[$__rate_interval]) *
|
||||
on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
|
||||
on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'writes'
|
||||
)
|
||||
@ -739,7 +733,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
null,
|
||||
|||
|
||||
rate(ceph_pool_rd_bytes{%(matchers)s}[$__rate_interval]) +
|
||||
on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
|
||||
on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'reads',
|
||||
12,
|
||||
@ -752,7 +746,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
$.addTargetSchema(
|
||||
|||
|
||||
rate(ceph_pool_wr_bytes{%(matchers)s}[$__rate_interval]) +
|
||||
on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
|
||||
on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'writes'
|
||||
)
|
||||
@ -769,7 +763,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
null,
|
||||
|||
|
||||
ceph_pool_objects{%(matchers)s} *
|
||||
on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
|
||||
on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'Number of Objects',
|
||||
0,
|
||||
|
@ -1,5 +1,4 @@
|
||||
local g = import 'grafonnet/grafana.libsonnet';
|
||||
local u = import 'utils.libsonnet';
|
||||
|
||||
local info_rbd_stats = std.join(
|
||||
'',
|
||||
@ -67,23 +66,21 @@ local info_rbd_stats = std.join(
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('pool',
|
||||
'$datasource',
|
||||
'label_values(pool)',
|
||||
'label_values(ceph_rbd_read_ops{%(matchers)s}, pool)' % $.matchers(),
|
||||
1,
|
||||
false,
|
||||
0,
|
||||
'',
|
||||
'')
|
||||
)
|
||||
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('image',
|
||||
'$datasource',
|
||||
'label_values(image)',
|
||||
'label_values(ceph_rbd_read_ops{%(matchers)s, pool="$pool"}, image)' % $.matchers(),
|
||||
1,
|
||||
false,
|
||||
0,
|
||||
@ -95,9 +92,9 @@ local info_rbd_stats = std.join(
|
||||
'IOPS',
|
||||
info_rbd_stats,
|
||||
'iops',
|
||||
'rate(ceph_rbd_write_ops{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers()
|
||||
'rate(ceph_rbd_write_ops{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers()
|
||||
,
|
||||
'rate(ceph_rbd_read_ops{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(),
|
||||
'rate(ceph_rbd_read_ops{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers(),
|
||||
0,
|
||||
0,
|
||||
8,
|
||||
@ -107,8 +104,8 @@ local info_rbd_stats = std.join(
|
||||
'Throughput',
|
||||
info_rbd_stats,
|
||||
'Bps',
|
||||
'rate(ceph_rbd_write_bytes{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(),
|
||||
'rate(ceph_rbd_read_bytes{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(),
|
||||
'rate(ceph_rbd_write_bytes{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers(),
|
||||
'rate(ceph_rbd_read_bytes{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers(),
|
||||
8,
|
||||
0,
|
||||
8,
|
||||
@ -119,12 +116,12 @@ local info_rbd_stats = std.join(
|
||||
info_rbd_stats,
|
||||
'ns',
|
||||
|||
|
||||
rate(ceph_rbd_write_latency_sum{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) /
|
||||
rate(ceph_rbd_write_latency_count{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])
|
||||
rate(ceph_rbd_write_latency_sum{pool="$pool", image="$image", %(matchers)s}[$__rate_interval]) /
|
||||
rate(ceph_rbd_write_latency_count{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])
|
||||
||| % $.matchers(),
|
||||
|||
|
||||
rate(ceph_rbd_read_latency_sum{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) /
|
||||
rate(ceph_rbd_read_latency_count{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])
|
||||
rate(ceph_rbd_read_latency_sum{pool="$pool", image="$image", %(matchers)s}[$__rate_interval]) /
|
||||
rate(ceph_rbd_read_latency_count{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])
|
||||
||| % $.matchers(),
|
||||
16,
|
||||
0,
|
||||
@ -204,9 +201,6 @@ local info_rbd_stats = std.join(
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addPanels([
|
||||
RbdOverviewPanel(
|
||||
'IOPS',
|
||||
|
@ -28,10 +28,14 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
|
||||
)
|
||||
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('rgw_servers',
|
||||
'$datasource',
|
||||
'label_values(ceph_rgw_metadata{}, ceph_daemon)',
|
||||
'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
|
||||
2,
|
||||
true,
|
||||
0,
|
||||
@ -63,7 +67,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
])
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='__auto',
|
||||
range=true
|
||||
@ -87,7 +91,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
])
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='__auto',
|
||||
range=true
|
||||
@ -111,7 +115,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
])
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='__auto',
|
||||
range=true
|
||||
@ -135,7 +139,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
])
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='__auto',
|
||||
range=true
|
||||
@ -151,48 +155,48 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='List Objects',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='List Buckets',
|
||||
range=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Put Objects',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Get Objects',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Delete Objects',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Delete Buckets',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Copy Objects',
|
||||
range=true
|
||||
@ -210,28 +214,28 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Put Objects',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Get Objects',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Delete Objects',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Copy Objects',
|
||||
range=true
|
||||
@ -248,48 +252,48 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='List Object',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='List Bucket',
|
||||
range=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Put Object',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Get Object',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Delete Object',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Delete Bucket',
|
||||
range=false,
|
||||
instant=true
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='Copy Object',
|
||||
range=true
|
||||
@ -443,7 +447,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
},
|
||||
]).addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -454,7 +458,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -465,7 +469,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -476,7 +480,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -646,7 +650,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
},
|
||||
]).addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -657,7 +661,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -668,7 +672,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -679,7 +683,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -690,7 +694,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -850,7 +854,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
},
|
||||
]).addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -861,7 +865,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -872,7 +876,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -883,7 +887,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1053,7 +1057,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
},
|
||||
]).addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1064,7 +1068,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1075,7 +1079,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1086,7 +1090,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1097,7 +1101,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
|
||||
expr='ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1121,7 +1125,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
|
||||
expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='{{ceph_daemon}} - {{bucket}}',
|
||||
range=false,
|
||||
@ -1140,7 +1144,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
|
||||
expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='{{ceph_daemon}} - {{bucket}}',
|
||||
range=false,
|
||||
@ -1159,7 +1163,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
|
||||
expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='{{ceph_daemon}} - {{bucket}}',
|
||||
range=false,
|
||||
@ -1178,7 +1182,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
|
||||
expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='{{ceph_daemon}} - {{bucket}}',
|
||||
range=false,
|
||||
@ -1220,7 +1224,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1264,7 +1268,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1308,7 +1312,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1352,7 +1356,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1396,7 +1400,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1440,7 +1444,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1484,7 +1488,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1528,7 +1532,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1572,7 +1576,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1741,7 +1745,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
},
|
||||
]).addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1752,7 +1756,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1763,7 +1767,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1774,7 +1778,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1785,7 +1789,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -1810,7 +1814,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)\n',
|
||||
expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)\n' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='{{ceph_daemon}} - {{user}}',
|
||||
range=false,
|
||||
@ -1829,7 +1833,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)\n',
|
||||
expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)\n' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='{{ceph_daemon}} - {{user}}',
|
||||
range=false,
|
||||
@ -1848,7 +1852,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
|
||||
expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='{{ceph_daemon}} - {{user}}',
|
||||
range=false,
|
||||
@ -1867,7 +1871,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
)
|
||||
.addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
|
||||
expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
legendFormat='{{ceph_daemon}} - {{user}}',
|
||||
range=false,
|
||||
@ -1909,7 +1913,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1953,7 +1957,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -1997,7 +2001,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -2041,7 +2045,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -2085,7 +2089,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -2129,7 +2133,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -2173,7 +2177,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -2217,7 +2221,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -2261,7 +2265,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource='${datasource}',
|
||||
format='time_series',
|
||||
instant=false,
|
||||
@ -2386,7 +2390,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
},
|
||||
]).addTargets([
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -2397,7 +2401,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -2408,7 +2412,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -2419,7 +2423,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
@ -2430,7 +2434,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
||||
range=false,
|
||||
),
|
||||
$.addTargetSchema(
|
||||
expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
|
||||
expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
|
||||
datasource={ type: 'prometheus', uid: '${datasource}' },
|
||||
format='table',
|
||||
hide=false,
|
||||
|
@ -1,5 +1,4 @@
|
||||
local g = import 'grafonnet/grafana.libsonnet';
|
||||
local u = import 'utils.libsonnet';
|
||||
|
||||
(import 'utils.libsonnet') {
|
||||
'radosgw-sync-overview.json':
|
||||
@ -59,9 +58,7 @@ local u = import 'utils.libsonnet';
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
|
||||
.addTemplate(
|
||||
$.addTemplateSchema(
|
||||
'rgw_servers',
|
||||
@ -70,8 +67,8 @@ local u = import 'utils.libsonnet';
|
||||
1,
|
||||
true,
|
||||
1,
|
||||
'',
|
||||
'RGW Server'
|
||||
null,
|
||||
'rgw.(.*)'
|
||||
)
|
||||
)
|
||||
.addPanels([
|
||||
@ -115,6 +112,45 @@ local u = import 'utils.libsonnet';
|
||||
8,
|
||||
7
|
||||
),
|
||||
$.timeSeriesPanel(
|
||||
lineInterpolation='linear',
|
||||
lineWidth=1,
|
||||
drawStyle='line',
|
||||
axisPlacement='auto',
|
||||
title='Replication(Time) Delta per shard',
|
||||
datasource='$datasource',
|
||||
gridPosition={ h: 7, w: 16, x: 8, y: 7 },
|
||||
fillOpacity=0,
|
||||
pointSize=5,
|
||||
showPoints='auto',
|
||||
unit='s',
|
||||
displayMode='table',
|
||||
showLegend=true,
|
||||
placement='right',
|
||||
tooltip={ mode: 'multi', sort: 'desc' },
|
||||
stackingMode='none',
|
||||
spanNulls=false,
|
||||
decimals=2,
|
||||
thresholdsMode='absolute',
|
||||
sortBy='Last *',
|
||||
sortDesc=true
|
||||
)
|
||||
.addCalcs(['lastNotNull'])
|
||||
.addThresholds([
|
||||
{ color: 'green', value: null },
|
||||
{ color: 'red', value: 80 },
|
||||
])
|
||||
.addTargets(
|
||||
[
|
||||
$.addTargetSchema(
|
||||
expr='rate(ceph_rgw_sync_delta_sync_delta[$__rate_interval])',
|
||||
datasource='$datasource',
|
||||
instant=false,
|
||||
legendFormat='{{instance_id}} - {{shard_id}}',
|
||||
range=true,
|
||||
),
|
||||
]
|
||||
),
|
||||
]),
|
||||
'radosgw-overview.json':
|
||||
local RgwOverviewPanel(
|
||||
@ -196,9 +232,6 @@ local u = import 'utils.libsonnet';
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addTemplateSchema(
|
||||
'rgw_servers',
|
||||
@ -208,7 +241,7 @@ local u = import 'utils.libsonnet';
|
||||
true,
|
||||
1,
|
||||
'',
|
||||
'RGW Server'
|
||||
'.*'
|
||||
)
|
||||
)
|
||||
.addTemplate(
|
||||
@ -713,9 +746,6 @@ local u = import 'utils.libsonnet';
|
||||
.addTemplate(
|
||||
$.addClusterTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addJobTemplate()
|
||||
)
|
||||
.addTemplate(
|
||||
$.addTemplateSchema('rgw_servers',
|
||||
'$datasource',
|
||||
@ -738,13 +768,13 @@ local u = import 'utils.libsonnet';
|
||||
sum by (instance_id) (
|
||||
rate(ceph_rgw_op_get_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
|
||||
rate(ceph_rgw_op_get_obj_lat_count{%(matchers)s}[$__rate_interval])
|
||||
) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
|||
|
||||
sum by (instance_id) (
|
||||
rate(ceph_rgw_op_put_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
|
||||
rate(ceph_rgw_op_put_obj_lat_count{%(matchers)s}[$__rate_interval])
|
||||
) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'GET {{ceph_daemon}}',
|
||||
'PUT {{ceph_daemon}}',
|
||||
@ -761,12 +791,12 @@ local u = import 'utils.libsonnet';
|
||||
'short',
|
||||
|||
|
||||
rate(ceph_rgw_op_get_obj_bytes{%(matchers)s}[$__rate_interval]) *
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
|||
|
||||
rate(ceph_rgw_op_put_obj_bytes{%(matchers)s}[$__rate_interval]) *
|
||||
on (instance_id) group_left (ceph_daemon)
|
||||
ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'GETs {{ceph_daemon}}',
|
||||
'PUTs {{ceph_daemon}}',
|
||||
@ -789,11 +819,11 @@ local u = import 'utils.libsonnet';
|
||||
'short',
|
||||
|||
|
||||
rate(ceph_rgw_failed_req{%(matchers)s}[$__rate_interval]) *
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s,ceph_daemon=~"$rgw_servers"}
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
|||
|
||||
rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) *
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'Requests Failed {{ceph_daemon}}',
|
||||
'GETs {{ceph_daemon}}',
|
||||
@ -807,7 +837,7 @@ local u = import 'utils.libsonnet';
|
||||
$.addTargetSchema(
|
||||
|||
|
||||
rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) *
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'PUTs {{ceph_daemon}}'
|
||||
),
|
||||
@ -819,7 +849,7 @@ local u = import 'utils.libsonnet';
|
||||
rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) +
|
||||
rate(ceph_rgw_put{%(matchers)s}[$__rate_interval])
|
||||
)
|
||||
) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'Other {{ceph_daemon}}'
|
||||
),
|
||||
@ -874,21 +904,21 @@ local u = import 'utils.libsonnet';
|
||||
.addTarget($.addTargetSchema(
|
||||
|||
|
||||
rate(ceph_rgw_failed_req{%(matchers)s}[$__rate_interval]) *
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'Failures {{ceph_daemon}}'
|
||||
))
|
||||
.addTarget($.addTargetSchema(
|
||||
|||
|
||||
rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) *
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'GETs {{ceph_daemon}}'
|
||||
))
|
||||
.addTarget($.addTargetSchema(
|
||||
|||
|
||||
rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) *
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'PUTs {{ceph_daemon}}'
|
||||
))
|
||||
@ -901,7 +931,7 @@ local u = import 'utils.libsonnet';
|
||||
rate(ceph_rgw_put{%(matchers)s}[$__rate_interval])
|
||||
)
|
||||
) * on (instance_id) group_left (ceph_daemon)
|
||||
ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
|
||||
ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
|
||||
||| % $.matchers(),
|
||||
'Other (DELETE,LIST) {{ceph_daemon}}'
|
||||
)),
|
||||
|
@ -218,44 +218,28 @@ local timeSeries = import 'timeseries_panel.libsonnet';
|
||||
unit: unit,
|
||||
valueMaps: valueMaps,
|
||||
},
|
||||
|
||||
matchers()::
|
||||
local jobMatcher = 'job=~"$job"';
|
||||
local clusterMatcher = '%s=~"$cluster"' % $._config.clusterLabel;
|
||||
{
|
||||
// Common labels
|
||||
jobMatcher: jobMatcher,
|
||||
clusterMatcher: (if $._config.showMultiCluster then clusterMatcher else ''),
|
||||
matchers: jobMatcher +
|
||||
(if $._config.showMultiCluster then ', ' + clusterMatcher else ''),
|
||||
matchers: (if $._config.showMultiCluster then clusterMatcher + ', ' else ''),
|
||||
},
|
||||
|
||||
|
||||
addClusterTemplate()::
|
||||
$.addTemplateSchema(
|
||||
'cluster',
|
||||
'$datasource',
|
||||
'label_values(ceph_osd_metadata, %s)' % $._config.clusterLabel,
|
||||
'label_values(ceph_health_status, %s)' % $._config.clusterLabel,
|
||||
1,
|
||||
true,
|
||||
false,
|
||||
1,
|
||||
'cluster',
|
||||
'(.*)',
|
||||
if !$._config.showMultiCluster then 'variable' else '',
|
||||
multi=true,
|
||||
allValues='.+',
|
||||
),
|
||||
|
||||
addJobTemplate()::
|
||||
$.addTemplateSchema(
|
||||
'job',
|
||||
'$datasource',
|
||||
'label_values(ceph_osd_metadata{%(clusterMatcher)s}, job)' % $.matchers(),
|
||||
1,
|
||||
true,
|
||||
1,
|
||||
'job',
|
||||
'(.*)',
|
||||
multi=true,
|
||||
allValues='.+',
|
||||
multi=false,
|
||||
allValues=null,
|
||||
),
|
||||
|
||||
overviewStyle(alias,
|
||||
@ -493,7 +477,7 @@ local timeSeries = import 'timeseries_panel.libsonnet';
|
||||
addGaugePanel(title='',
|
||||
description='',
|
||||
transparent=false,
|
||||
datasource='${DS_PROMETHEUS}',
|
||||
datasource='$datasource',
|
||||
gridPosition={},
|
||||
pluginVersion='9.1.3',
|
||||
unit='percentunit',
|
||||
|
@ -1,5 +0,0 @@
|
||||
exclusions:
|
||||
template-instance-rule:
|
||||
reason: "Instance template not needed because of ceph-mgr leader election."
|
||||
target-instance-rule:
|
||||
reason: "Instance matcher not needed because of ceph-mgr leader election."
|
File diff suppressed because it is too large
Load Diff
@ -113,14 +113,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
|
||||
"expr": "sum(rate(ceph_objecter_op_r{ceph_daemon=~\"($mds_servers).*\", cluster=~\"$cluster\", }[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Read Ops",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
|
||||
"expr": "sum(rate(ceph_objecter_op_w{ceph_daemon=~\"($mds_servers).*\", cluster=~\"$cluster\", }[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Write Ops",
|
||||
@ -215,7 +215,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceph_mds_server_handle_client_request{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}",
|
||||
"expr": "ceph_mds_server_handle_client_request{ceph_daemon=~\"($mds_servers).*\", cluster=~\"$cluster\", }",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ceph_daemon}}",
|
||||
@ -283,36 +283,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
@ -332,7 +312,7 @@
|
||||
"multi": false,
|
||||
"name": "mds_servers",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)",
|
||||
"query": "label_values(ceph_mds_inodes{cluster=~\"$cluster\", }, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -123,7 +123,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{job=~\"$job\", hostname='$ceph_hosts'}))",
|
||||
"expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{cluster=~\"$cluster\", }))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
@ -651,7 +651,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(\n ceph_osd_stat_bytes{job=~\"$job\"} and\n on (ceph_daemon) ceph_disk_occupation{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n",
|
||||
"expr": "sum(\n ceph_osd_stat_bytes{cluster=~\"$cluster\", } and\n on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", cluster=~\"$cluster\", }\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
@ -854,14 +854,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}}({{ceph_daemon}}) writes",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"},\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{cluster=~\"$cluster\", },\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}}({{ceph_daemon}}) reads",
|
||||
@ -961,14 +961,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
|
||||
"expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}}({{ceph_daemon}}) write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
|
||||
"expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}}({{ceph_daemon}}) read",
|
||||
@ -1158,7 +1158,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}}({{ceph_daemon}})",
|
||||
@ -1304,7 +1304,7 @@
|
||||
"styles": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10,\n (sum by (instance)(ceph_daemon_health_metrics{type=\"SLOW_OPS\", ceph_daemon=~\"osd.*\"}))\n)\n",
|
||||
"expr": "topk(10,\n (sum by (instance)(ceph_daemon_health_metrics{type=\"SLOW_OPS\", ceph_daemon=~\"osd.*\", cluster=~\"$cluster\", }))\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1357,36 +1357,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
@ -1401,15 +1381,15 @@
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Hostname",
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "ceph_hosts",
|
||||
"options": [ ],
|
||||
"query": "label_values(instance)",
|
||||
"query": "label_values({__name__=~\"ceph_.+_metadata\", cluster=~\"$cluster\", }, hostname)",
|
||||
"refresh": 1,
|
||||
"regex": "([^.:]*).*",
|
||||
"sort": 3,
|
||||
"regex": "([^.]*).*",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
|
@ -104,7 +104,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(sum by (hostname) (ceph_osd_metadata{job=~\"$job\"}))",
|
||||
"expr": "count(sum by (hostname) (ceph_osd_metadata{cluster=~\"$cluster\", }))",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -436,7 +436,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n",
|
||||
"expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($osd_hosts).*\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -519,7 +519,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n",
|
||||
"expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((node_bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((node_bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -687,7 +687,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n",
|
||||
"expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((node_bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -755,36 +755,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
@ -804,7 +784,7 @@
|
||||
"multi": false,
|
||||
"name": "osd_hosts",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_disk_occupation{job=~\"$job\"}, exported_instance)",
|
||||
"query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\", }, hostname)",
|
||||
"refresh": 1,
|
||||
"regex": "([^.]*).*",
|
||||
"sort": 1,
|
||||
@ -824,7 +804,7 @@
|
||||
"multi": false,
|
||||
"name": "mon_hosts",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_mon_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"query": "label_values(ceph_mon_metadata{cluster=~\"$cluster\", }, hostname)",
|
||||
"refresh": 1,
|
||||
"regex": "mon.(.*)",
|
||||
"sort": 1,
|
||||
@ -844,7 +824,7 @@
|
||||
"multi": false,
|
||||
"name": "mds_hosts",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)",
|
||||
"query": "label_values(ceph_mds_inodes{hostname, cluster=~\"$cluster\", })",
|
||||
"refresh": 1,
|
||||
"regex": "mds.(.*)",
|
||||
"sort": 1,
|
||||
@ -864,7 +844,7 @@
|
||||
"multi": false,
|
||||
"name": "rgw_hosts",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"query": "label_values(ceph_rgw_metadata{hostname, cluster=~\"$cluster\", })",
|
||||
"refresh": 1,
|
||||
"regex": "rgw.(.*)",
|
||||
"sort": 1,
|
||||
|
@ -113,14 +113,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n",
|
||||
"expr": "rate(ceph_osd_op_r_latency_sum{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "read",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n",
|
||||
"expr": "rate(ceph_osd_op_w_latency_sum{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "write",
|
||||
@ -220,14 +220,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_r{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
|
||||
"expr": "rate(ceph_osd_op_r{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_w{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
|
||||
"expr": "rate(ceph_osd_op_w{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Writes",
|
||||
@ -327,14 +327,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
|
||||
"expr": "rate(ceph_osd_op_r_out_bytes{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Read Bytes",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
|
||||
"expr": "rate(ceph_osd_op_w_in_bytes{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Write Bytes",
|
||||
@ -453,14 +453,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n",
|
||||
"expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total[$__rate_interval]) /\n rate(node_disk_reads_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}}/{{device}} Reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n",
|
||||
"expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total[$__rate_interval]) /\n rate(node_disk_writes_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}}/{{device}} Writes",
|
||||
@ -560,14 +560,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"expr": "label_replace(\n rate(node_disk_writes_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}} on {{instance}} Writes",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "label_replace(\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"expr": "label_replace(\n rate(node_disk_reads_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}} on {{instance}} Reads",
|
||||
@ -667,14 +667,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(node_disk_read_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"expr": "label_replace(\n rate(node_disk_read_bytes_total[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}} {{device}} Reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "label_replace(\n rate(node_disk_written_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"expr": "label_replace(\n rate(node_disk_written_bytes_total[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}} {{device}} Writes",
|
||||
@ -769,7 +769,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(node_disk_io_time_seconds_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"expr": "label_replace(\n rate(node_disk_io_time_seconds_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}} on {{instance}}",
|
||||
@ -837,36 +837,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
@ -886,7 +866,7 @@
|
||||
"multi": false,
|
||||
"name": "osd",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\", }, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
|
@ -103,21 +103,21 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n",
|
||||
"expr": "avg (\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) * 1000\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "AVG read",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "max(\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n",
|
||||
"expr": "max(\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) * 1000\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "MAX read",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n )\n)\n",
|
||||
"expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n * 1000\n )\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "@95%ile",
|
||||
@ -263,7 +263,7 @@
|
||||
"styles": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n )\n ))\n)\n",
|
||||
"expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000\n )\n ))\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -350,21 +350,21 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n)\n",
|
||||
"expr": "avg(\n rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n * 1000\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "AVG write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "max(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n)\n",
|
||||
"expr": "max(\n rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "MAX write",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n))\n",
|
||||
"expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000\n))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "@95%ile write",
|
||||
@ -545,7 +545,7 @@
|
||||
"styles": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000)\n ))\n)\n",
|
||||
"expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000)\n ))\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -627,7 +627,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\"})",
|
||||
"expr": "count by (device_class) (ceph_osd_metadata{cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device_class}}",
|
||||
@ -686,14 +686,14 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\"})",
|
||||
"expr": "count(ceph_bluefs_wal_total_bytes{cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "bluestore",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "absent(ceph_bluefs_wal_total_bytes{job=~\"$job\"}) * count(ceph_osd_metadata{job=~\"$job\"})",
|
||||
"expr": "absent(ceph_bluefs_wal_total_bytes{cluster=~\"$cluster\", }) * count(ceph_osd_metadata{cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "filestore",
|
||||
@ -752,63 +752,63 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} < 1099511627776)",
|
||||
"expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } < 1099511627776)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "<1TB",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 1099511627776 < 2199023255552)",
|
||||
"expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 1099511627776 < 2199023255552)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "<2TB",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 2199023255552 < 3298534883328)",
|
||||
"expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 2199023255552 < 3298534883328)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "<3TB",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 3298534883328 < 4398046511104)",
|
||||
"expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 3298534883328 < 4398046511104)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "<4TB",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 4398046511104 < 6597069766656)",
|
||||
"expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 4398046511104 < 6597069766656)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "<6TB",
|
||||
"refId": "E"
|
||||
},
|
||||
{
|
||||
"expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 6597069766656 < 8796093022208)",
|
||||
"expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 6597069766656 < 8796093022208)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "<8TB",
|
||||
"refId": "F"
|
||||
},
|
||||
{
|
||||
"expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 8796093022208 < 10995116277760)",
|
||||
"expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 8796093022208 < 10995116277760)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "<10TB",
|
||||
"refId": "G"
|
||||
},
|
||||
{
|
||||
"expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 10995116277760 < 13194139533312)",
|
||||
"expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 10995116277760 < 13194139533312)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "<12TB",
|
||||
"refId": "H"
|
||||
},
|
||||
{
|
||||
"expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 13194139533312)",
|
||||
"expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 13194139533312)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "<12TB+",
|
||||
@ -869,7 +869,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceph_osd_numpg{job=~\"$job\"}",
|
||||
"expr": "ceph_osd_numpg{cluster=~\"$cluster\", }",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -975,7 +975,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(ceph_bluestore_onode_hits{job=~\"$job\"}) / (\n sum(ceph_bluestore_onode_hits{job=~\"$job\"}) +\n sum(ceph_bluestore_onode_misses{job=~\"$job\"})\n)\n",
|
||||
"expr": "sum(ceph_bluestore_onode_hits{cluster=~\"$cluster\", }) / (\n sum(ceph_bluestore_onode_hits{cluster=~\"$cluster\", }) +\n sum(ceph_bluestore_onode_misses{cluster=~\"$cluster\", })\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
@ -1066,14 +1066,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "round(sum(rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])))",
|
||||
"expr": "round(sum(rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "round(sum(rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])))",
|
||||
"expr": "round(sum(rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Writes",
|
||||
@ -1282,36 +1282,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
|
@ -104,7 +104,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(ceph_pool_stored{job=~\"$job\"} / (ceph_pool_stored{job=~\"$job\"} + ceph_pool_max_avail{job=~\"$job\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"expr": "(ceph_pool_stored{cluster=~\"$cluster\", } / (ceph_pool_stored{cluster=~\"$cluster\", } + ceph_pool_max_avail{cluster=~\"$cluster\", })) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
@ -186,7 +186,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(ceph_pool_max_avail{job=~\"$job\"} / deriv(ceph_pool_stored{job=~\"$job\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"} > 0\n",
|
||||
"expr": "(ceph_pool_max_avail{cluster=~\"$cluster\", } / deriv(ceph_pool_stored{cluster=~\"$cluster\", }[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", } > 0\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
@ -261,7 +261,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "deriv(ceph_pool_objects{job=~\"$job\"}[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"expr": "deriv(ceph_pool_objects{cluster=~\"$cluster\", }[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Objects per second",
|
||||
@ -364,14 +364,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"expr": "rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"expr": "rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "writes",
|
||||
@ -474,14 +474,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"expr": "rate(ceph_pool_rd_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"expr": "rate(ceph_pool_wr_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "writes",
|
||||
@ -579,7 +579,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceph_pool_objects{job=~\"$job\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"expr": "ceph_pool_objects{cluster=~\"$cluster\", } *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Number of Objects",
|
||||
@ -647,36 +647,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
@ -696,7 +676,7 @@
|
||||
"multi": false,
|
||||
"name": "pool_name",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_pool_metadata{job=~\"$job\"}, name)",
|
||||
"query": "label_values(ceph_pool_metadata{cluster=~\"$cluster\", }, name)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -85,7 +85,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(ceph_pool_metadata{job=~\"$job\"})",
|
||||
"expr": "count(ceph_pool_metadata{cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -168,7 +168,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"})",
|
||||
"expr": "count(ceph_pool_metadata{compression_mode!=\"none\", cluster=~\"$cluster\", })",
|
||||
"format": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
@ -250,7 +250,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(ceph_osd_stat_bytes{job=~\"$job\"})",
|
||||
"expr": "sum(ceph_osd_stat_bytes{cluster=~\"$cluster\", })",
|
||||
"format": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
@ -332,7 +332,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(ceph_pool_bytes_used{job=~\"$job\"})",
|
||||
"expr": "sum(ceph_pool_bytes_used{cluster=~\"$cluster\", })",
|
||||
"format": "",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -415,7 +415,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(ceph_pool_stored{job=~\"$job\"})",
|
||||
"expr": "sum(ceph_pool_stored{cluster=~\"$cluster\", })",
|
||||
"format": "",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -498,7 +498,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"}\n)\n",
|
||||
"expr": "sum(\n ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } -\n ceph_pool_compress_bytes_used{cluster=~\"$cluster\", }\n)\n",
|
||||
"format": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
@ -580,7 +580,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n sum(ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n sum(ceph_pool_stored_raw{job=~\"$job\"} and ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n) * 100\n",
|
||||
"expr": "(\n sum(ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0) /\n sum(ceph_pool_stored_raw{cluster=~\"$cluster\", } and ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0)\n) * 100\n",
|
||||
"format": "table",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
@ -662,7 +662,7 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n / sum(ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n)\n",
|
||||
"expr": "sum(\n ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0)\n / sum(ceph_pool_compress_bytes_used{cluster=~\"$cluster\", } > 0\n)\n",
|
||||
"format": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
@ -1161,7 +1161,7 @@
|
||||
"styles": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n ceph_pool_compress_under_bytes{job=~\"$job\"} /\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n ) * 100 > 0.5\n)\n",
|
||||
"expr": "(\n ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } /\n ceph_pool_compress_bytes_used{cluster=~\"$cluster\", } > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0) /\n ceph_pool_stored_raw{cluster=~\"$cluster\", }\n ) * 100 > 0.5\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1169,7 +1169,7 @@
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "ceph_pool_max_avail{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n",
|
||||
"expr": "ceph_pool_max_avail{cluster=~\"$cluster\", } *\n on(pool_id) group_left(name) ceph_pool_metadata{cluster=~\"$cluster\", }\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1177,7 +1177,7 @@
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n) * 100\n",
|
||||
"expr": "(\n (ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0) /\n ceph_pool_stored_raw{cluster=~\"$cluster\", }\n) * 100\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1185,7 +1185,7 @@
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "ceph_pool_percent_used{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n",
|
||||
"expr": "ceph_pool_percent_used{cluster=~\"$cluster\", } *\n on(pool_id) group_left(name) ceph_pool_metadata{cluster=~\"$cluster\", }\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1193,7 +1193,7 @@
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"expr": "ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n",
|
||||
"expr": "ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } -\n ceph_pool_compress_bytes_used{cluster=~\"$cluster\", } > 0\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1201,7 +1201,7 @@
|
||||
"refId": "E"
|
||||
},
|
||||
{
|
||||
"expr": "delta(ceph_pool_stored{job=~\"$job\"}[5d])",
|
||||
"expr": "delta(ceph_pool_stored{cluster=~\"$cluster\", }[5d])",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1209,7 +1209,7 @@
|
||||
"refId": "F"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])\n + rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n",
|
||||
"expr": "rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval])\n + rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval])\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1217,7 +1217,7 @@
|
||||
"refId": "G"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n",
|
||||
"expr": "rate(ceph_pool_rd_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{cluster=~\"$cluster\", }[$__rate_interval])\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1225,7 +1225,7 @@
|
||||
"refId": "H"
|
||||
},
|
||||
{
|
||||
"expr": "ceph_pool_metadata{job=~\"$job\"}",
|
||||
"expr": "ceph_pool_metadata{cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1233,7 +1233,7 @@
|
||||
"refId": "I"
|
||||
},
|
||||
{
|
||||
"expr": "ceph_pool_stored{job=~\"$job\"} * on(pool_id) group_left ceph_pool_metadata{job=~\"$job\"}",
|
||||
"expr": "ceph_pool_stored{cluster=~\"$cluster\", } * on(pool_id) group_left ceph_pool_metadata{cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1241,7 +1241,7 @@
|
||||
"refId": "J"
|
||||
},
|
||||
{
|
||||
"expr": "ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"}",
|
||||
"expr": "ceph_pool_metadata{compression_mode!=\"none\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1348,14 +1348,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"})\n",
|
||||
"expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{cluster=~\"$cluster\", })\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{name}} ",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "topk($topk,\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"}\n)\n",
|
||||
"expr": "topk($topk,\n rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{cluster=~\"$cluster\", }\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{name}} - write",
|
||||
@ -1450,7 +1450,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\"}\n)\n",
|
||||
"expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{cluster=~\"$cluster\", }[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{cluster=~\"$cluster\", }\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{name}}",
|
||||
@ -1545,7 +1545,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceph_pool_bytes_used{job=~\"$job\"} * on(pool_id) group_right ceph_pool_metadata{job=~\"$job\"}",
|
||||
"expr": "ceph_pool_bytes_used{cluster=~\"$cluster\", } * on(pool_id) group_right ceph_pool_metadata{cluster=~\"$cluster\", }",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{name}}",
|
||||
@ -1613,36 +1613,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
|
@ -114,14 +114,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GET {{ceph_daemon}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (instance_id) (\n rate(ceph_rgw_op_put_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "sum by (instance_id) (\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUT {{ceph_daemon}}",
|
||||
@ -216,14 +216,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rgw_op_get_obj_bytes{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GETs {{ceph_daemon}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rgw_op_put_obj_bytes{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUTs {{ceph_daemon}}",
|
||||
@ -324,28 +324,28 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\",ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "rate(ceph_rgw_failed_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Requests Failed {{ceph_daemon}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GETs {{ceph_daemon}}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUTs {{ceph_daemon}}",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) -\n (\n rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Other {{ceph_daemon}}",
|
||||
@ -517,28 +517,28 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "rate(ceph_rgw_failed_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Failures {{ceph_daemon}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GETs {{ceph_daemon}}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUTs {{ceph_daemon}}",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"expr": "(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) -\n (\n rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}",
|
||||
@ -574,36 +574,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
@ -623,7 +603,7 @@
|
||||
"multi": false,
|
||||
"name": "rgw_servers",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
|
@ -108,14 +108,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GET {{rgw_host}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUT {{rgw_host}}",
|
||||
@ -210,7 +210,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
|
||||
"expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{rgw_host}}",
|
||||
@ -305,7 +305,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{rgw_host}}",
|
||||
@ -400,14 +400,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(ceph_rgw_op_get_obj_bytes{job=~\"$job\"}[$__rate_interval]))",
|
||||
"expr": "sum(rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GETs",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(ceph_rgw_op_put_obj_bytes{job=~\"$job\"}[$__rate_interval]))",
|
||||
"expr": "sum(rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUTs",
|
||||
@ -502,7 +502,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_op_put_obj_bytes{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{rgw_host}}",
|
||||
@ -597,7 +597,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{rgw_host}}",
|
||||
@ -1199,36 +1199,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
@ -1248,9 +1228,9 @@
|
||||
"multi": false,
|
||||
"name": "rgw_servers",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "RGW Server",
|
||||
"regex": ".*",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
|
@ -89,7 +89,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\"}[$__rate_interval]))",
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{cluster=~\"$cluster\", }[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
@ -184,7 +184,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\"}[$__rate_interval]))",
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{cluster=~\"$cluster\", }[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
@ -279,7 +279,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\"}[$__rate_interval]))",
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
@ -374,7 +374,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\"}[$__rate_interval]))",
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{cluster=~\"$cluster\", }[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
@ -416,6 +416,100 @@
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 2,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 16,
|
||||
"x": 8,
|
||||
"y": 7
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"sortBy": "Last *",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "$datasource",
|
||||
"expr": "rate(ceph_rgw_sync_delta_sync_delta[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance_id}} - {{shard_id}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Replication(Time) Delta per shard",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
@ -443,36 +537,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
@ -488,13 +562,13 @@
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "rgw_servers",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "RGW Server",
|
||||
"regex": "rgw.(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
|
@ -89,14 +89,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rbd_write_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
|
||||
"expr": "rate(ceph_rbd_write_ops{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rbd_read_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
|
||||
"expr": "rate(ceph_rbd_read_ops{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Read",
|
||||
@ -191,14 +191,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
|
||||
"expr": "rate(ceph_rbd_write_bytes{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
|
||||
"expr": "rate(ceph_rbd_read_bytes{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Read",
|
||||
@ -293,14 +293,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
|
||||
"expr": "rate(ceph_rbd_write_latency_sum{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
|
||||
"expr": "rate(ceph_rbd_read_latency_sum{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Read",
|
||||
@ -368,36 +368,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
@ -417,7 +397,7 @@
|
||||
"multi": false,
|
||||
"name": "pool",
|
||||
"options": [ ],
|
||||
"query": "label_values(pool)",
|
||||
"query": "label_values(ceph_rbd_read_ops{cluster=~\"$cluster\", }, pool)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
@ -437,7 +417,7 @@
|
||||
"multi": false,
|
||||
"name": "image",
|
||||
"options": [ ],
|
||||
"query": "label_values(image)",
|
||||
"query": "label_values(ceph_rbd_read_ops{cluster=~\"$cluster\", , pool=\"$pool\"}, image)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
|
@ -101,14 +101,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval])))",
|
||||
"expr": "round(sum(rate(ceph_rbd_write_ops{cluster=~\"$cluster\", }[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Writes",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])))",
|
||||
"expr": "round(sum(rate(ceph_rbd_read_ops{cluster=~\"$cluster\", }[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Reads",
|
||||
@ -203,14 +203,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])))",
|
||||
"expr": "round(sum(rate(ceph_rbd_write_bytes{cluster=~\"$cluster\", }[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval])))",
|
||||
"expr": "round(sum(rate(ceph_rbd_read_bytes{cluster=~\"$cluster\", }[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Read",
|
||||
@ -305,14 +305,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n",
|
||||
"expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{cluster=~\"$cluster\", }[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{cluster=~\"$cluster\", }[$__rate_interval]))\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n",
|
||||
"expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{cluster=~\"$cluster\", }[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{cluster=~\"$cluster\", }[$__rate_interval]))\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Read",
|
||||
@ -483,7 +483,7 @@
|
||||
"styles": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])\n ))\n )\n)\n",
|
||||
"expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{cluster=~\"$cluster\", }[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{cluster=~\"$cluster\", }[$__rate_interval])\n ))\n )\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -632,7 +632,7 @@
|
||||
"styles": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n",
|
||||
"expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{cluster=~\"$cluster\", }[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -781,7 +781,7 @@
|
||||
"styles": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n",
|
||||
"expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{cluster=~\"$cluster\", }[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{cluster=~\"$cluster\", }[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -828,36 +828,16 @@
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
|
@ -94,7 +94,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
@ -157,7 +157,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
@ -220,7 +220,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
@ -283,7 +283,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
@ -333,7 +333,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -343,7 +343,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "List Buckets",
|
||||
@ -352,7 +352,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -362,7 +362,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -372,7 +372,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -382,7 +382,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -392,7 +392,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Copy Objects",
|
||||
@ -455,7 +455,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -465,7 +465,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -475,7 +475,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -485,7 +485,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Copy Objects",
|
||||
@ -548,7 +548,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -558,7 +558,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "List Bucket",
|
||||
@ -567,7 +567,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -577,7 +577,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -587,7 +587,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -597,7 +597,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -607,7 +607,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Copy Object",
|
||||
@ -692,7 +692,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -708,7 +708,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -724,7 +724,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -740,7 +740,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -938,7 +938,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -954,7 +954,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -970,7 +970,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -986,7 +986,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1002,7 +1002,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1212,7 +1212,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1228,7 +1228,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1244,7 +1244,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1260,7 +1260,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1460,7 +1460,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1476,7 +1476,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1492,7 +1492,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1508,7 +1508,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1524,7 +1524,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
|
||||
"expr": "ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -1727,7 +1727,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
|
||||
"expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1795,7 +1795,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
|
||||
"expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1842,7 +1842,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
|
||||
"expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1889,7 +1889,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
|
||||
"expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -1996,7 +1996,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -2104,7 +2104,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -2212,7 +2212,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -2320,7 +2320,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -2428,7 +2428,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -2536,7 +2536,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -2644,7 +2644,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -2752,7 +2752,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -2860,7 +2860,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -2946,7 +2946,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -2962,7 +2962,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -2978,7 +2978,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -2994,7 +2994,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -3010,7 +3010,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -3222,7 +3222,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)\n",
|
||||
"expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)\n",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -3290,7 +3290,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)\n",
|
||||
"expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)\n",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -3337,7 +3337,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
|
||||
"expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -3384,7 +3384,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
|
||||
"expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
@ -3491,7 +3491,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -3599,7 +3599,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -3707,7 +3707,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -3815,7 +3815,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -3923,7 +3923,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -4031,7 +4031,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -4139,7 +4139,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -4247,7 +4247,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -4355,7 +4355,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
@ -4441,7 +4441,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -4457,7 +4457,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -4473,7 +4473,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -4489,7 +4489,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -4505,7 +4505,7 @@
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
|
||||
"expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
|
||||
"format": "table",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
@ -4630,6 +4630,26 @@
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_health_status, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
@ -4640,7 +4660,7 @@
|
||||
"multi": false,
|
||||
"name": "rgw_servers",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_rgw_metadata{}, ceph_daemon)",
|
||||
"query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
|
@ -49,17 +49,17 @@ class PromqlTest:
|
||||
The workflow of testing would be something like:
|
||||
|
||||
# add prometheus query to test
|
||||
self.set_expression('bonding_slaves > 0')
|
||||
self.set_expression('node_bonding_slaves > 0')
|
||||
|
||||
# add some prometheus input series
|
||||
self.add_series('bonding_slaves{master="bond0"}', '2')
|
||||
self.add_series('bonding_slaves{master="bond1"}', '3')
|
||||
self.add_series('node_bonding_slaves{master="bond0"}', '2')
|
||||
self.add_series('node_bonding_slaves{master="bond1"}', '3')
|
||||
self.add_series('node_network_receive_bytes{instance="127.0.0.1",
|
||||
device="eth1"}', "10 100 230 22")
|
||||
|
||||
# expected output of the query
|
||||
self.add_exp_samples('bonding_slaves{master="bond0"}', 2)
|
||||
self.add_exp_samples('bonding_slaves{master="bond1"}', 3)
|
||||
self.add_exp_samples('node_bonding_slaves{master="bond0"}', 2)
|
||||
self.add_exp_samples('node_bonding_slaves{master="bond1"}', 3)
|
||||
|
||||
# at last, always call promtool with:
|
||||
self.assertTrue(self.run_promtool())
|
||||
@ -150,10 +150,10 @@ class PromqlTest:
|
||||
'$osd_hosts', you should change this to a real value. Example:
|
||||
|
||||
|
||||
> self.set_expression('bonding_slaves{master="$osd_hosts"} > 0')
|
||||
> self.set_expression('node_bonding_slaves{master="$osd_hosts"} > 0')
|
||||
> self.set_variable('osd_hosts', '127.0.0.1')
|
||||
> print(self.query)
|
||||
> bonding_slaves{master="127.0.0.1"} > 0
|
||||
> node_bonding_slaves{master="127.0.0.1"} > 0
|
||||
|
||||
Args:
|
||||
variable(str): Variable name
|
||||
|
@ -3,16 +3,17 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test cluster health"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_health_status{} | 1.0 |
|
||||
Then Grafana panel `Health Status` with legend `EMPTY` shows:
|
||||
| ceph_health_status{job="ceph",cluster="mycluster"} | 1 |
|
||||
Then Grafana panel `Ceph health status` with legend `EMPTY` shows:
|
||||
| metrics | values |
|
||||
| ceph_health_status{} | 1.0 |
|
||||
| ceph_health_status{job="ceph",cluster="mycluster"} | 1 |
|
||||
|
||||
Scenario: "Test Firing Alerts Warning"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning"} | 1 |
|
||||
| ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical"} | 1 |
|
||||
| ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning", cluster="mycluster"} | 1 |
|
||||
| ALERTS{alertstate="firing",alertname="Ceph.1", severity="critical", cluster="someothercluster"} | 1 |
|
||||
| ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical", cluster="mycluster"} | 1 |
|
||||
Then Grafana panel `Firing Alerts` with legend `Warning` shows:
|
||||
| metrics | values |
|
||||
| {} | 1 |
|
||||
@ -20,8 +21,9 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Firing Alerts Critical"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning"} | 1 |
|
||||
| ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical"} | 1 |
|
||||
| ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning", cluster="mycluster"} | 1 |
|
||||
| ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning", cluster="someothercluster"} | 1 |
|
||||
| ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical", cluster="mycluster"} | 1 |
|
||||
Then Grafana panel `Firing Alerts` with legend `Critical` shows:
|
||||
| metrics | values |
|
||||
| {} | 1 |
|
||||
@ -29,33 +31,33 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Available Capacity"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_cluster_total_bytes{}| 100 |
|
||||
| ceph_cluster_total_used_bytes{}| 70 |
|
||||
| ceph_cluster_total_bytes{job="ceph",cluster="mycluster"}| 100 |
|
||||
| ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster"}| 70 |
|
||||
Then Grafana panel `Available Capacity` with legend `EMPTY` shows:
|
||||
| metrics | values |
|
||||
| {} | 0.3 |
|
||||
| {job="ceph",cluster="mycluster"} | 0.3 |
|
||||
|
||||
Scenario: "Test Cluster Capacity"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_cluster_total_bytes{}| 100 |
|
||||
| ceph_cluster_total_bytes{job="ceph",cluster="mycluster"}| 100 |
|
||||
Then Grafana panel `Cluster Capacity` with legend `EMPTY` shows:
|
||||
| metrics | values |
|
||||
| ceph_cluster_total_bytes{} | 100 |
|
||||
| ceph_cluster_total_bytes{job="ceph",cluster="mycluster"} | 100 |
|
||||
|
||||
Scenario: "Test Used Capacity"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_cluster_total_used_bytes{}| 100 |
|
||||
| ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster"}| 100 |
|
||||
Then Grafana panel `Used Capacity` with legend `EMPTY` shows:
|
||||
| metrics | values |
|
||||
| ceph_cluster_total_used_bytes{} | 100 |
|
||||
| ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster"} | 100 |
|
||||
|
||||
Scenario: "Test Write Throughput"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.0"} | 500 500 500 |
|
||||
| ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.1"} | 500 120 110 |
|
||||
| ceph_osd_op_w_in_bytes{job="ceph", cluster="mycluster", osd="osd.0"} | 500 500 500 |
|
||||
| ceph_osd_op_w_in_bytes{job="ceph", cluster="mycluster", osd="osd.1"} | 500 120 110 |
|
||||
Then Grafana panel `Write Throughput` with legend `EMPTY` shows:
|
||||
| metrics | values |
|
||||
| {} | 2 |
|
||||
@ -63,8 +65,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Write IOPS"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_op_w{cluster="mycluster", osd="osd.0"} | 500 500 500 |
|
||||
| ceph_osd_op_w{cluster="mycluster", osd="osd.1"} | 500 120 110 |
|
||||
| ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.0"} | 500 500 500 |
|
||||
| ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.1"} | 500 120 110 |
|
||||
Then Grafana panel `Write IOPS` with legend `EMPTY` shows:
|
||||
| metrics | values |
|
||||
| {} | 2 |
|
||||
@ -72,8 +74,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Read Throughput"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.0"} | 500 500 500 |
|
||||
| ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.1"} | 500 120 110 |
|
||||
| ceph_osd_op_r_out_bytes{job="ceph", cluster="mycluster", osd="osd.0"} | 500 500 500 |
|
||||
| ceph_osd_op_r_out_bytes{job="ceph", cluster="mycluster", osd="osd.1"} | 500 120 110 |
|
||||
Then Grafana panel `Read Throughput` with legend `EMPTY` shows:
|
||||
| metrics | values |
|
||||
| {} | 2 |
|
||||
@ -81,8 +83,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Read IOPS"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_op_r{cluster="mycluster", osd="osd.0"} | 500 500 500 |
|
||||
| ceph_osd_op_r{cluster="mycluster", osd="osd.1"} | 500 120 110 |
|
||||
| ceph_osd_op_r{job="ceph", cluster="mycluster", osd="osd.0"} | 500 500 500 |
|
||||
| ceph_osd_op_r{job="ceph", cluster="mycluster", osd="osd.1"} | 500 120 110 |
|
||||
Then Grafana panel `Read IOPS` with legend `EMPTY` shows:
|
||||
| metrics | values |
|
||||
| {} | 2 |
|
||||
@ -90,9 +92,9 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test OSDs All"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_metadata{cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_osd_metadata{cluster="mycluster", osd="osd.2"} | 1 |
|
||||
| ceph_osd_metadata{cluster="mycluster", osd="osd.3"} | 1 |
|
||||
| ceph_osd_metadata{job="ceph", cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_osd_metadata{job="ceph", cluster="mycluster", osd="osd.2"} | 1 |
|
||||
| ceph_osd_metadata{job="ceph", cluster="mycluster", osd="osd.3"} | 1 |
|
||||
Then Grafana panel `OSDs` with legend `All` shows:
|
||||
| metrics | values |
|
||||
| {} | 3 |
|
||||
@ -100,9 +102,9 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test OSDs In"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_in{cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_osd_in{cluster="mycluster", osd="osd.1"} | 1 |
|
||||
| ceph_osd_in{cluster="mycluster", osd="osd.2"} | 1 |
|
||||
| ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.1"} | 1 |
|
||||
| ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.2"} | 1 |
|
||||
Then Grafana panel `OSDs` with legend `In` shows:
|
||||
| metrics | values |
|
||||
| {} | 3 |
|
||||
@ -110,9 +112,9 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test OSDs Out"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_in{cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_osd_in{cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_osd_in{cluster="mycluster", osd="osd.2"} | 0 |
|
||||
| ceph_osd_in{cjob="ceph", cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.2"} | 0 |
|
||||
Then Grafana panel `OSDs` with legend `Out` shows:
|
||||
| metrics | values |
|
||||
| {} | 2 |
|
||||
@ -120,9 +122,9 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test OSDs Up"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_up{cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_osd_up{cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_osd_up{cluster="mycluster", osd="osd.2"} | 0 |
|
||||
| ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.2"} | 0 |
|
||||
Then Grafana panel `OSDs` with legend `Up` shows:
|
||||
| metrics | values |
|
||||
| {} | 1 |
|
||||
@ -130,9 +132,9 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test OSDs Down"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_up{cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_osd_up{cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_osd_up{cluster="mycluster", osd="osd.2"} | 0 |
|
||||
| ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.2"} | 0 |
|
||||
Then Grafana panel `OSDs` with legend `Down` shows:
|
||||
| metrics | values |
|
||||
| {} | 2 |
|
||||
@ -140,9 +142,9 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test MGRs Standby"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_mgr_status{cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_mgr_status{cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_mgr_status{cluster="mycluster", osd="osd.2"} | 0 |
|
||||
| ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 |
|
||||
Then Grafana panel `MGRs` with legend `Standby` shows:
|
||||
| metrics | values |
|
||||
| {} | 2 |
|
||||
@ -150,8 +152,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test MGRs Active"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_mgr_status{cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_mgr_status{cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 |
|
||||
Then Grafana panel `MGRs` with legend `Active` shows:
|
||||
| metrics | values |
|
||||
| {} | 1 |
|
||||
@ -159,9 +161,9 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Monitors Total"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 |
|
||||
| ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 |
|
||||
Then Grafana panel `Monitors` with legend `Total` shows:
|
||||
| metrics | values |
|
||||
| {} | 3 |
|
||||
@ -169,9 +171,9 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Monitors In Quorum"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 |
|
||||
| ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 |
|
||||
Then Grafana panel `Monitors` with legend `In Quorum` shows:
|
||||
| metrics | values |
|
||||
| {} | 1 |
|
||||
@ -179,9 +181,9 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Monitors out of Quorum"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 |
|
||||
| ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 |
|
||||
| ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 |
|
||||
| ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 |
|
||||
Then Grafana panel `Monitors` with legend `MONs out of Quorum` shows:
|
||||
| metrics | values |
|
||||
| {} | 2 |
|
||||
@ -189,24 +191,24 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Total Capacity"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_cluster_total_bytes{cluster="mycluster", osd="osd.0"} | 100 |
|
||||
| ceph_cluster_total_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 100 |
|
||||
Then Grafana panel `Capacity` with legend `Total Capacity` shows:
|
||||
| metrics | values |
|
||||
| ceph_cluster_total_bytes{cluster="mycluster", osd="osd.0"} | 100 |
|
||||
| ceph_cluster_total_bytes{job="ceph", cluster="mycluster", osd="osd.0"} | 100 |
|
||||
|
||||
Scenario: "Test Used Capacity"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_cluster_total_used_bytes{cluster="mycluster", osd="osd.0"} | 100 |
|
||||
| ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 100 |
|
||||
Then Grafana panel `Capacity` with legend `Used` shows:
|
||||
| metrics | values |
|
||||
| ceph_cluster_total_used_bytes{cluster="mycluster", osd="osd.0"} | 100 |
|
||||
| ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 100 |
|
||||
|
||||
Scenario: "Test Cluster Throughput Write"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.0"} | 1000 1000|
|
||||
| ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.1"} | 2000 1500 |
|
||||
| ceph_osd_op_w_in_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000|
|
||||
| ceph_osd_op_w_in_bytes{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 |
|
||||
Then Grafana panel `Cluster Throughput` with legend `Write` shows:
|
||||
| metrics | values |
|
||||
| {} | 25 |
|
||||
@ -214,8 +216,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Cluster Throughput Read"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.0"} | 1000 1000|
|
||||
| ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.1"} | 2000 1500 |
|
||||
| ceph_osd_op_r_out_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000|
|
||||
| ceph_osd_op_r_out_bytes{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 |
|
||||
Then Grafana panel `Cluster Throughput` with legend `Read` shows:
|
||||
| metrics | values |
|
||||
| {} | 25 |
|
||||
@ -223,8 +225,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test IOPS Read"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_op_r{cluster="mycluster", osd="osd.0"} | 1000 1000|
|
||||
| ceph_osd_op_r{cluster="mycluster", osd="osd.1"} | 2000 1500 |
|
||||
| ceph_osd_op_r{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000|
|
||||
| ceph_osd_op_r{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 |
|
||||
Then Grafana panel `IOPS` with legend `Read` shows:
|
||||
| metrics | values |
|
||||
| {} | 25 |
|
||||
@ -232,8 +234,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test IOPS Write"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_op_w{cluster="mycluster", osd="osd.0"} | 1000 1000|
|
||||
| ceph_osd_op_w{cluster="mycluster", osd="osd.1"} | 2000 1500 |
|
||||
| ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000|
|
||||
| ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 |
|
||||
Then Grafana panel `IOPS` with legend `Write` shows:
|
||||
| metrics | values |
|
||||
| {} | 25 |
|
||||
@ -241,83 +243,83 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Pool Used Bytes"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pool_bytes_used{cluster="mycluster", pool_id="1"} | 10000 |
|
||||
| ceph_pool_bytes_used{cluster="mycluster", pool_id="2"} | 20000 |
|
||||
| ceph_pool_bytes_used{cluster="mycluster", pool_id="3"} | 30000 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 2000 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 4000 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 6000 |
|
||||
| ceph_pool_bytes_used{job="ceph", cluster="mycluster", pool_id="1"} | 10000 |
|
||||
| ceph_pool_bytes_used{job="ceph", cluster="mycluster", pool_id="2"} | 20000 |
|
||||
| ceph_pool_bytes_used{job="ceph", cluster="mycluster", pool_id="3"} | 30000 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 2000 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 4000 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 6000 |
|
||||
Then Grafana panel `Pool Used Bytes` with legend `{{name}}` shows:
|
||||
| metrics | values |
|
||||
| {cluster="mycluster", name="pool1", pool_id="1"} | 20000000 |
|
||||
| {cluster="mycluster", name="pool2", pool_id="2"} | 80000000 |
|
||||
| {cluster="mycluster", name="pool3", pool_id="3"} | 180000000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 20000000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 80000000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 180000000 |
|
||||
|
||||
Scenario: "Test Pool Used RAW Bytes"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pool_stored_raw{cluster="mycluster", pool_id="1"} | 10000 |
|
||||
| ceph_pool_stored_raw{cluster="mycluster", pool_id="2"} | 20000 |
|
||||
| ceph_pool_stored_raw{cluster="mycluster", pool_id="3"} | 30000 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 2000 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 4000 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 6000 |
|
||||
| ceph_pool_stored_raw{job="ceph", cluster="mycluster", pool_id="1"} | 10000 |
|
||||
| ceph_pool_stored_raw{job="ceph", cluster="mycluster", pool_id="2"} | 20000 |
|
||||
| ceph_pool_stored_raw{job="ceph", cluster="mycluster", pool_id="3"} | 30000 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 2000 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 4000 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 6000 |
|
||||
Then Grafana panel `Pool Used RAW Bytes` with legend `{{name}}` shows:
|
||||
| metrics | values |
|
||||
| {cluster="mycluster", name="pool1", pool_id="1"} | 20000000 |
|
||||
| {cluster="mycluster", name="pool2", pool_id="2"} | 80000000 |
|
||||
| {cluster="mycluster", name="pool3", pool_id="3"} | 180000000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 20000000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 80000000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 180000000 |
|
||||
|
||||
Scenario: "Test Pool Objects Quota"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pool_quota_objects{cluster="mycluster", pool_id="1"} | 10 |
|
||||
| ceph_pool_quota_objects{cluster="mycluster", pool_id="2"} | 20 |
|
||||
| ceph_pool_quota_objects{cluster="mycluster", pool_id="3"} | 30 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 10 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 15 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 15 |
|
||||
| ceph_pool_quota_objects{job="ceph", cluster="mycluster", pool_id="1"} | 10 |
|
||||
| ceph_pool_quota_objects{job="ceph", cluster="mycluster", pool_id="2"} | 20 |
|
||||
| ceph_pool_quota_objects{job="ceph", cluster="mycluster", pool_id="3"} | 30 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 10 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 15 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 15 |
|
||||
Then Grafana panel `Pool Objects Quota` with legend `{{name}}` shows:
|
||||
| metrics | values |
|
||||
| {cluster="mycluster", name="pool1", pool_id="1"} | 100 |
|
||||
| {cluster="mycluster", name="pool2", pool_id="2"} | 300 |
|
||||
| {cluster="mycluster", name="pool3", pool_id="3"} | 450|
|
||||
| {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 100 |
|
||||
| {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 300 |
|
||||
| {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 450|
|
||||
|
||||
Scenario: "Test Pool Quota Bytes"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pool_quota_bytes{cluster="mycluster", pool_id="1"} | 100 |
|
||||
| ceph_pool_quota_bytes{cluster="mycluster", pool_id="2"} | 200 |
|
||||
| ceph_pool_quota_bytes{cluster="mycluster", pool_id="3"} | 300 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 100 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 150 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 150 |
|
||||
| ceph_pool_quota_bytes{job="ceph", cluster="mycluster", pool_id="1"} | 100 |
|
||||
| ceph_pool_quota_bytes{job="ceph", cluster="mycluster", pool_id="2"} | 200 |
|
||||
| ceph_pool_quota_bytes{job="ceph", cluster="mycluster", pool_id="3"} | 300 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 100 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 150 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 150 |
|
||||
Then Grafana panel `Pool Quota Bytes` with legend `{{name}}` shows:
|
||||
| metrics | values |
|
||||
| {cluster="mycluster", name="pool1", pool_id="1"} | 10000 |
|
||||
| {cluster="mycluster", name="pool2", pool_id="2"} | 30000 |
|
||||
| {cluster="mycluster", name="pool3", pool_id="3"} | 45000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 10000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 30000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 45000 |
|
||||
|
||||
Scenario: "Test Objects Per Pool"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pool_objects{cluster="mycluster", pool_id="1"} | 100 |
|
||||
| ceph_pool_objects{cluster="mycluster", pool_id="2"} | 200 |
|
||||
| ceph_pool_objects{cluster="mycluster", pool_id="3"} | 300 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 100 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 150 |
|
||||
| ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 150 |
|
||||
| ceph_pool_objects{job="ceph", cluster="mycluster", pool_id="1"} | 100 |
|
||||
| ceph_pool_objects{job="ceph", cluster="mycluster", pool_id="2"} | 200 |
|
||||
| ceph_pool_objects{job="ceph", cluster="mycluster", pool_id="3"} | 300 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 100 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 150 |
|
||||
| ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 150 |
|
||||
Then Grafana panel `Objects Per Pool` with legend `{{name}}` shows:
|
||||
| metrics | values |
|
||||
| {cluster="mycluster", name="pool1", pool_id="1"} | 10000 |
|
||||
| {cluster="mycluster", name="pool2", pool_id="2"} | 30000 |
|
||||
| {cluster="mycluster", name="pool3", pool_id="3"} | 45000|
|
||||
| {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 10000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 30000 |
|
||||
| {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 45000|
|
||||
|
||||
Scenario: "Test OSD Type Count"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pool_objects{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pool_objects{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pool_objects{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pool_objects{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `OSD Type Count` with legend `Total` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -325,8 +327,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Backfill Toofull"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_backfill_toofull{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_backfill_toofull{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_backfill_toofull{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_backfill_toofull{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Backfill Toofull` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -334,8 +336,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Remapped"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_remapped{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_remapped{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_remapped{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_remapped{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Remapped` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -343,8 +345,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Backfill"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_backfill{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_backfill{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_backfill{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_backfill{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Backfill` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -352,8 +354,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Peered"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_peered{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_peered{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_peered{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_peered{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Peered` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -361,8 +363,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Down"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_down{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_down{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_down{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_down{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Down` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -370,8 +372,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Repair"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_repair{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_repair{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_repair{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_repair{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Repair` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -379,8 +381,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Recovering"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_recovering{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_recovering{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_recovering{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_recovering{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Recovering` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -388,8 +390,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Deep"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_deep{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_deep{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_deep{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_deep{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Deep` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -397,8 +399,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Wait Backfill"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_wait_backfill{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_wait_backfill{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_wait_backfill{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_wait_backfill{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Wait Backfill` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -406,8 +408,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Creating"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_creating{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_creating{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_creating{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_creating{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Creating` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -415,8 +417,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Forced Recovery"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_forced_recovery{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_forced_recovery{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_forced_recovery{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_forced_recovery{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Forced Recovery` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -424,8 +426,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Forced Backfill"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_forced_backfill{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_forced_backfill{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_forced_backfill{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_forced_backfill{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Forced Backfill` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -433,8 +435,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Incomplete"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_incomplete{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_incomplete{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_incomplete{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_incomplete{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Incomplete` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -442,8 +444,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test PGs State Undersized"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_undersized{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_undersized{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `PGs State` with legend `Undersized` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -451,8 +453,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Stuck PGs Undersized"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_undersized{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_undersized{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `Stuck PGs` with legend `Undersized` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -460,8 +462,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Stuck PGs Stale"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_stale{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_stale{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_stale{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_stale{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `Stuck PGs` with legend `Stale` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -469,8 +471,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Stuck PGs Degraded"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_pg_degraded{cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_degraded{cluster="mycluster", osd="osd.1"} | 20 |
|
||||
| ceph_pg_degraded{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
|
||||
| ceph_pg_degraded{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
|
||||
Then Grafana panel `Stuck PGs` with legend `Degraded` shows:
|
||||
| metrics | values |
|
||||
| {} | 30 |
|
||||
@ -478,8 +480,8 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Recovery Operations"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_recovery_ops{cluster="mycluster", osd="osd.0"}| 250 200 |
|
||||
| ceph_osd_recovery_ops{cluster="mycluster", osd="osd.1"} | 800 100 |
|
||||
| ceph_osd_recovery_ops{job="ceph", cluster="mycluster", osd="osd.0"}| 250 200 |
|
||||
| ceph_osd_recovery_ops{job="ceph", cluster="mycluster", osd="osd.1"} | 800 100 |
|
||||
When variable `interval` is `120s`
|
||||
Then Grafana panel `Recovery Operations` with legend `OPS` shows:
|
||||
| metrics | values |
|
||||
@ -488,39 +490,49 @@ Feature: Ceph Cluster Dashboard
|
||||
Scenario: "Test Ceph Versions OSD"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_metadata{cluster="mycluster", osd="osd.0"}| 17 |
|
||||
| ceph_osd_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", ceph_daemon="osd.0", device_class="ssd"} | 1 |
|
||||
| ceph_osd_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", ceph_daemon="osd.1", device_class="hdd"} | 1 |
|
||||
Then Grafana panel `Ceph Versions` with legend `OSD Services` shows:
|
||||
| metrics | values |
|
||||
| {} | 1 |
|
||||
| {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 |
|
||||
| {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 |
|
||||
|
||||
Scenario: "Test Ceph Versions Mon"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_mon_metadata{cluster="mycluster", osd="osd.0"}| 17 |
|
||||
| ceph_mon_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", hostname="somehostname"}| 1 |
|
||||
| ceph_mon_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", hostname="someotherhostname"}| 1 |
|
||||
Then Grafana panel `Ceph Versions` with legend `Mon Services` shows:
|
||||
| metrics | values |
|
||||
| {} | 1 |
|
||||
| {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 |
|
||||
| {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 |
|
||||
|
||||
Scenario: "Test Ceph Versions MDS"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_mds_metadata{cluster="mycluster", osd="osd.0"}| 17 |
|
||||
| ceph_mds_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", hostname="someotherhostname", ceph_daemon="mds.someotherhostname",fs_id="1"}| 1 |
|
||||
| ceph_mds_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", hostname="somehostname", ceph_daemon="mds.somehostname",fs_id="1"}| 1 |
|
||||
Then Grafana panel `Ceph Versions` with legend `MDS Services` shows:
|
||||
| metrics | values |
|
||||
| {} | 1 |
|
||||
| {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 |
|
||||
| {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 |
|
||||
|
||||
Scenario: "Test Ceph Versions RGW"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_rgw_metadata{cluster="mycluster", osd="osd.0"}| 17 |
|
||||
| ceph_rgw_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", ceph_daemon="rgw.somehostname", hostname="somehostname"}| 1 |
|
||||
| ceph_rgw_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", ceph_daemon="rgw.someotherhostname", hostname="someotherhostname"}| 1 |
|
||||
Then Grafana panel `Ceph Versions` with legend `RGW Services` shows:
|
||||
| metrics | values |
|
||||
| {} | 1 |
|
||||
| {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 |
|
||||
| {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 |
|
||||
|
||||
Scenario: "Test Ceph Versions MGR"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_mgr_metadata{cluster="mycluster", osd="osd.0"}| 17 |
|
||||
| ceph_mgr_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", ceph_daemon="mgr.somehostname", hostname="somehostname"}| 1 |
|
||||
| ceph_mgr_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", ceph_daemon="mgr.someotherhostname", hostname="someotherhostname"}| 1 |
|
||||
Then Grafana panel `Ceph Versions` with legend `MGR Services` shows:
|
||||
| metrics | values |
|
||||
| {} | 1 |
|
||||
| {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 |
|
||||
| {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 |
|
||||
|
@ -113,6 +113,7 @@ def step_impl(context, panel_name, legend):
|
||||
legend = ''
|
||||
query_id = panel_name + '-' + legend
|
||||
if query_id not in global_context.query_map:
|
||||
print(f"QueryMap: {global_context.query_map}")
|
||||
raise KeyError((f'Query with legend {legend} in panel "{panel_name}"'
|
||||
'couldn\'t be found'))
|
||||
|
||||
|
@ -3,9 +3,9 @@ Feature: Host Details Dashboard
|
||||
Scenario: "Test OSD"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.0",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
|
||||
| ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.1",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
|
||||
| ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.2",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
|
||||
| ceph_osd_metadata{job="ceph",cluster="mycluster",back_iface="",ceph_daemon="osd.0",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
|
||||
| ceph_osd_metadata{job="ceph",cluster="mycluster",back_iface="",ceph_daemon="osd.1",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
|
||||
| ceph_osd_metadata{job="ceph",cluster="mycluster",back_iface="",ceph_daemon="osd.2",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
|
||||
When variable `ceph_hosts` is `127.0.0.1`
|
||||
Then Grafana panel `OSDs` with legend `EMPTY` shows:
|
||||
| metrics | values |
|
||||
@ -16,54 +16,54 @@ Scenario: "Test OSD"
|
||||
Scenario: "Test Disk IOPS - Writes - Several OSDs per device"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
| node_disk_writes_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_writes_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
When variable `ceph_hosts` is `localhost`
|
||||
Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) writes` shows:
|
||||
| metrics | values |
|
||||
| {job="ceph",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
|
||||
| {job="ceph",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
|
||||
| {job="node",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
|
||||
| {job="node",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
|
||||
|
||||
Scenario: "Test Disk IOPS - Writes - Single OSD per device"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
| node_disk_writes_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_writes_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
When variable `ceph_hosts` is `localhost`
|
||||
Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) writes` shows:
|
||||
| metrics | values |
|
||||
| {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
|
||||
| {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
|
||||
| {job="node", ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
|
||||
| {job="node", ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
|
||||
|
||||
Scenario: "Test Disk IOPS - Reads - Several OSDs per device"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
| node_disk_reads_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_reads_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
When variable `ceph_hosts` is `localhost`
|
||||
Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) reads` shows:
|
||||
| metrics | values |
|
||||
| {job="ceph",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
|
||||
| {job="ceph",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
|
||||
| {job="node",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
|
||||
| {job="node",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
|
||||
|
||||
Scenario: "Test Disk IOPS - Reads - Single OSD per device"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
| node_disk_reads_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_reads_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
When variable `ceph_hosts` is `localhost`
|
||||
Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) reads` shows:
|
||||
| metrics | values |
|
||||
| {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
|
||||
| {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
|
||||
| {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
|
||||
| {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
|
||||
|
||||
# IOPS Panel - end
|
||||
|
||||
@ -72,44 +72,44 @@ Scenario: "Test Disk IOPS - Reads - Single OSD per device"
|
||||
Scenario: "Test disk throughput - read"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| node_disk_read_bytes_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_read_bytes_total{job="ceph",device="sdb",instance="localhost:9100"} | 100+600x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
| node_disk_read_bytes_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_read_bytes_total{job="node",device="sdb",instance="localhost:9100"} | 100+600x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
When variable `ceph_hosts` is `localhost`
|
||||
Then Grafana panel `$ceph_hosts Throughput by Disk` with legend `{{device}}({{ceph_daemon}}) read` shows:
|
||||
| metrics | values |
|
||||
| {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
|
||||
| {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
|
||||
| {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
|
||||
| {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
|
||||
|
||||
Scenario: "Test disk throughput - write"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| node_disk_written_bytes_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_written_bytes_total{job="ceph",device="sdb",instance="localhost:9100"} | 100+600x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
| node_disk_written_bytes_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_written_bytes_total{job="node",device="sdb",instance="localhost:9100"} | 100+600x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
When variable `ceph_hosts` is `localhost`
|
||||
Then Grafana panel `$ceph_hosts Throughput by Disk` with legend `{{device}}({{ceph_daemon}}) write` shows:
|
||||
| metrics | values |
|
||||
| {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
|
||||
| {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
|
||||
| {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
|
||||
| {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
|
||||
|
||||
# Node disk bytes written/read panel - end
|
||||
|
||||
Scenario: "Test $ceph_hosts Disk Latency panel"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| node_disk_write_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_write_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_read_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_read_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
| node_disk_write_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_write_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_writes_completed_total{job="ndoe",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_writes_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_read_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_read_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_reads_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_reads_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
When variable `ceph_hosts` is `localhost`
|
||||
Then Grafana panel `$ceph_hosts Disk Latency` with legend `{{device}}({{ceph_daemon}})` shows:
|
||||
| metrics | values |
|
||||
@ -119,13 +119,13 @@ Scenario: "Test $ceph_hosts Disk Latency panel"
|
||||
Scenario: "Test $ceph_hosts Disk utilization"
|
||||
Given the following series:
|
||||
| metrics | values |
|
||||
| node_disk_io_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_io_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
| node_disk_io_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
|
||||
| node_disk_io_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
|
||||
| ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
|
||||
When variable `ceph_hosts` is `localhost`
|
||||
Then Grafana panel `$ceph_hosts Disk utilization` with legend `{{device}}({{ceph_daemon}})` shows:
|
||||
| metrics | values |
|
||||
| {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 100 |
|
||||
| {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 100 |
|
||||
| {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 100 |
|
||||
| {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 100 |
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user