mirror of
https://git.proxmox.com/git/ceph.git
synced 2025-05-29 18:00:42 +00:00
import ceph 16.2.6
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
parent
a7eef15ae0
commit
522d829b51
1
ceph/.github/pull_request_template.md
vendored
1
ceph/.github/pull_request_template.md
vendored
@ -48,6 +48,7 @@ https://raw.githubusercontent.com/ceph/ceph/master/SubmittingPatches.rst
|
||||
- `jenkins test make check arm64`
|
||||
- `jenkins test submodules`
|
||||
- `jenkins test dashboard`
|
||||
- `jenkins test dashboard cephadm`
|
||||
- `jenkins test api`
|
||||
- `jenkins test docs`
|
||||
- `jenkins render docs`
|
||||
|
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10.2)
|
||||
# remove cmake/modules/FindPython* once 3.12 is required
|
||||
|
||||
project(ceph
|
||||
VERSION 16.0.0
|
||||
VERSION 16.2.6
|
||||
LANGUAGES CXX C ASM)
|
||||
|
||||
foreach(policy
|
||||
|
@ -1,5 +1,13 @@
|
||||
>=17.0.0
|
||||
|
||||
* `ceph-mgr-modules-core` debian package does not recommend `ceph-mgr-rook`
|
||||
anymore. As the latter depends on `python3-numpy` which cannot be imported in
|
||||
different Python sub-interpreters multi-times if the version of
|
||||
`python3-numpy` is older than 1.19. Since `apt-get` installs the `Recommends`
|
||||
packages by default, `ceph-mgr-rook` was always installed along with
|
||||
`ceph-mgr` debian package as an indirect dependency. If your workflow depends
|
||||
on this behavior, you might want to install `ceph-mgr-rook` separately.
|
||||
|
||||
* A new library is available, libcephsqlite. It provides a SQLite Virtual File
|
||||
System (VFS) on top of RADOS. The database and journals are striped over
|
||||
RADOS across multiple objects for virtually unlimited scaling and throughput
|
||||
@ -9,6 +17,28 @@
|
||||
that were storing state in RADOS omap, especially without striping which
|
||||
limits scalability.
|
||||
|
||||
* MDS upgrades no longer require stopping all standby MDS daemons before
|
||||
upgrading the sole active MDS for a file system.
|
||||
|
||||
* RGW: It is possible to specify ssl options and ciphers for beast frontend now.
|
||||
The default ssl options setting is "no_sslv2:no_sslv3:no_tlsv1:no_tlsv1_1".
|
||||
If you want to return back the old behavior add 'ssl_options=' (empty) to
|
||||
``rgw frontends`` configuration.
|
||||
|
||||
* fs: A file system can be created with a specific ID ("fscid"). This is useful
|
||||
in certain recovery scenarios, e.g., monitor database lost and rebuilt, and
|
||||
the restored file system is expected to have the same ID as before.
|
||||
|
||||
>=16.2.6
|
||||
--------
|
||||
|
||||
* MGR: The pg_autoscaler has a new default 'scale-down' profile which provides more
|
||||
performance from the start for new pools (for newly created clusters).
|
||||
Existing clusters will retain the old behavior, now called the 'scale-up' profile.
|
||||
For more details, see:
|
||||
|
||||
https://docs.ceph.com/en/latest/rados/operations/placement-groups/
|
||||
|
||||
>=16.0.0
|
||||
--------
|
||||
|
||||
@ -42,12 +72,6 @@
|
||||
deprecated and will be removed in a future release. Please use
|
||||
``nfs cluster rm`` and ``nfs export rm`` instead.
|
||||
|
||||
* mgr-pg_autoscaler: Autoscaler will now start out by scaling each
|
||||
pool to have a full complements of pgs from the start and will only
|
||||
decrease it when other pools need more pgs due to increased usage.
|
||||
This improves out of the box performance of Ceph by allowing more PGs
|
||||
to be created for a given pool.
|
||||
|
||||
* CephFS: Disabling allow_standby_replay on a file system will also stop all
|
||||
standby-replay daemons for that file system.
|
||||
|
||||
@ -159,6 +183,8 @@
|
||||
CentOS 7.6 and later. To enable older clients, set ``cephx_require_version``
|
||||
and ``cephx_service_require_version`` config options to 1.
|
||||
|
||||
* rgw: The Civetweb frontend is now deprecated and will be removed in Quincy.
|
||||
|
||||
>=15.0.0
|
||||
--------
|
||||
|
||||
|
@ -19,7 +19,6 @@ The following AIX packages are required for developing and compilation, they hav
|
||||
gettext
|
||||
less
|
||||
perl
|
||||
gdbm
|
||||
pcre
|
||||
rsync
|
||||
zlib
|
||||
|
@ -49,6 +49,8 @@
|
||||
%bcond_without lttng
|
||||
%bcond_without libradosstriper
|
||||
%bcond_without ocf
|
||||
%global luarocks_package_name luarocks
|
||||
%bcond_without lua_packages
|
||||
%global _remote_tarball_prefix https://download.ceph.com/tarballs/
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
@ -73,6 +75,21 @@
|
||||
%if ! %{defined _fillupdir}
|
||||
%global _fillupdir /var/adm/fillup-templates
|
||||
%endif
|
||||
#luarocks
|
||||
%if 0%{?is_opensuse}
|
||||
# openSUSE
|
||||
%bcond_without lua_packages
|
||||
%if 0%{?sle_version}
|
||||
# openSUSE Leap
|
||||
%global luarocks_package_name lua53-luarocks
|
||||
%else
|
||||
# openSUSE Tumbleweed
|
||||
%global luarocks_package_name lua54-luarocks
|
||||
%endif
|
||||
%else
|
||||
# SLE
|
||||
%bcond_with lua_packages
|
||||
%endif
|
||||
%endif
|
||||
%bcond_with seastar
|
||||
%bcond_with jaeger
|
||||
@ -96,19 +113,6 @@
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%if 0%{?suse_version}
|
||||
%if !0%{?is_opensuse}
|
||||
# SLE does not support luarocks
|
||||
%bcond_with lua_packages
|
||||
%else
|
||||
%global luarocks_package_name lua53-luarocks
|
||||
%bcond_without lua_packages
|
||||
%endif
|
||||
%else
|
||||
%global luarocks_package_name luarocks
|
||||
%bcond_without lua_packages
|
||||
%endif
|
||||
|
||||
%{!?_udevrulesdir: %global _udevrulesdir /lib/udev/rules.d}
|
||||
%{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create}
|
||||
%{!?python3_pkgversion: %global python3_pkgversion 3}
|
||||
@ -122,7 +126,7 @@
|
||||
# main package definition
|
||||
#################################################################################
|
||||
Name: ceph
|
||||
Version: 16.2.5
|
||||
Version: 16.2.6
|
||||
Release: 0%{?dist}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
Epoch: 2
|
||||
@ -138,7 +142,7 @@ License: LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-
|
||||
Group: System/Filesystems
|
||||
%endif
|
||||
URL: http://ceph.com/
|
||||
Source0: %{?_remote_tarball_prefix}ceph-16.2.5.tar.bz2
|
||||
Source0: %{?_remote_tarball_prefix}ceph-16.2.6.tar.bz2
|
||||
%if 0%{?suse_version}
|
||||
# _insert_obs_source_lines_here
|
||||
ExclusiveArch: x86_64 aarch64 ppc64le s390x
|
||||
@ -168,7 +172,6 @@ BuildRequires: gcc-toolset-9-gcc-c++ >= 9.2.1-2.3
|
||||
%else
|
||||
BuildRequires: gcc-c++
|
||||
%endif
|
||||
BuildRequires: gdbm
|
||||
%if 0%{with tcmalloc}
|
||||
# libprofiler did not build on ppc64le until 2.7.90
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8
|
||||
@ -292,7 +295,6 @@ BuildRequires: libbz2-devel
|
||||
BuildRequires: mozilla-nss-devel
|
||||
BuildRequires: keyutils-devel
|
||||
BuildRequires: libopenssl-devel
|
||||
BuildRequires: lsb-release
|
||||
BuildRequires: openldap2-devel
|
||||
#BuildRequires: krb5
|
||||
#BuildRequires: krb5-devel
|
||||
@ -317,7 +319,6 @@ BuildRequires: openldap-devel
|
||||
#BuildRequires: krb5-devel
|
||||
BuildRequires: openssl-devel
|
||||
BuildRequires: CUnit-devel
|
||||
BuildRequires: redhat-lsb-core
|
||||
BuildRequires: python%{python3_pkgversion}-devel
|
||||
BuildRequires: python%{python3_pkgversion}-setuptools
|
||||
BuildRequires: python%{python3_pkgversion}-Cython
|
||||
@ -329,6 +330,7 @@ BuildRequires: lz4-devel >= 1.7
|
||||
%if 0%{with make_check}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
BuildRequires: golang-github-prometheus
|
||||
BuildRequires: jsonnet
|
||||
BuildRequires: libtool-ltdl-devel
|
||||
BuildRequires: xmlsec1
|
||||
BuildRequires: xmlsec1-devel
|
||||
@ -346,6 +348,7 @@ BuildRequires: python%{python3_pkgversion}-pyOpenSSL
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
BuildRequires: golang-github-prometheus-prometheus
|
||||
BuildRequires: jsonnet
|
||||
BuildRequires: libxmlsec1-1
|
||||
BuildRequires: libxmlsec1-nss1
|
||||
BuildRequires: libxmlsec1-openssl1
|
||||
@ -1205,7 +1208,7 @@ This package provides Ceph default alerts for Prometheus.
|
||||
# common
|
||||
#################################################################################
|
||||
%prep
|
||||
%autosetup -p1 -n ceph-16.2.5
|
||||
%autosetup -p1 -n ceph-16.2.6
|
||||
|
||||
%build
|
||||
# LTO can be enabled as soon as the following GCC bug is fixed:
|
||||
@ -1335,6 +1338,9 @@ ${CMAKE} .. \
|
||||
-DWITH_SYSTEM_PMDK:BOOL=ON \
|
||||
%endif
|
||||
-DBOOST_J=$CEPH_SMP_NCPUS \
|
||||
%if 0%{?rhel}
|
||||
-DWITH_FMT_HEADER_ONLY:BOOL=ON \
|
||||
%endif
|
||||
-DWITH_GRAFANA=ON
|
||||
|
||||
%if %{with cmake_verbose_logging}
|
||||
@ -1990,9 +1996,8 @@ fi
|
||||
%endif
|
||||
|
||||
%postun immutable-object-cache
|
||||
test -n "$FIRST_ARG" || FIRST_ARG=$1
|
||||
%systemd_postun ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target
|
||||
if [ $FIRST_ARG -ge 1 ] ; then
|
||||
if [ $1 -ge 1 ] ; then
|
||||
# Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
|
||||
# "yes". In any case: if units are not running, do not touch them.
|
||||
SYSCONF_CEPH=%{_sysconfdir}/sysconfig/ceph
|
||||
|
@ -49,6 +49,8 @@
|
||||
%bcond_without lttng
|
||||
%bcond_without libradosstriper
|
||||
%bcond_without ocf
|
||||
%global luarocks_package_name luarocks
|
||||
%bcond_without lua_packages
|
||||
%global _remote_tarball_prefix https://download.ceph.com/tarballs/
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
@ -73,6 +75,21 @@
|
||||
%if ! %{defined _fillupdir}
|
||||
%global _fillupdir /var/adm/fillup-templates
|
||||
%endif
|
||||
#luarocks
|
||||
%if 0%{?is_opensuse}
|
||||
# openSUSE
|
||||
%bcond_without lua_packages
|
||||
%if 0%{?sle_version}
|
||||
# openSUSE Leap
|
||||
%global luarocks_package_name lua53-luarocks
|
||||
%else
|
||||
# openSUSE Tumbleweed
|
||||
%global luarocks_package_name lua54-luarocks
|
||||
%endif
|
||||
%else
|
||||
# SLE
|
||||
%bcond_with lua_packages
|
||||
%endif
|
||||
%endif
|
||||
%bcond_with seastar
|
||||
%bcond_with jaeger
|
||||
@ -96,19 +113,6 @@
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%if 0%{?suse_version}
|
||||
%if !0%{?is_opensuse}
|
||||
# SLE does not support luarocks
|
||||
%bcond_with lua_packages
|
||||
%else
|
||||
%global luarocks_package_name lua53-luarocks
|
||||
%bcond_without lua_packages
|
||||
%endif
|
||||
%else
|
||||
%global luarocks_package_name luarocks
|
||||
%bcond_without lua_packages
|
||||
%endif
|
||||
|
||||
%{!?_udevrulesdir: %global _udevrulesdir /lib/udev/rules.d}
|
||||
%{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create}
|
||||
%{!?python3_pkgversion: %global python3_pkgversion 3}
|
||||
@ -168,7 +172,6 @@ BuildRequires: gcc-toolset-9-gcc-c++ >= 9.2.1-2.3
|
||||
%else
|
||||
BuildRequires: gcc-c++
|
||||
%endif
|
||||
BuildRequires: gdbm
|
||||
%if 0%{with tcmalloc}
|
||||
# libprofiler did not build on ppc64le until 2.7.90
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8
|
||||
@ -292,7 +295,6 @@ BuildRequires: libbz2-devel
|
||||
BuildRequires: mozilla-nss-devel
|
||||
BuildRequires: keyutils-devel
|
||||
BuildRequires: libopenssl-devel
|
||||
BuildRequires: lsb-release
|
||||
BuildRequires: openldap2-devel
|
||||
#BuildRequires: krb5
|
||||
#BuildRequires: krb5-devel
|
||||
@ -317,7 +319,6 @@ BuildRequires: openldap-devel
|
||||
#BuildRequires: krb5-devel
|
||||
BuildRequires: openssl-devel
|
||||
BuildRequires: CUnit-devel
|
||||
BuildRequires: redhat-lsb-core
|
||||
BuildRequires: python%{python3_pkgversion}-devel
|
||||
BuildRequires: python%{python3_pkgversion}-setuptools
|
||||
BuildRequires: python%{python3_pkgversion}-Cython
|
||||
@ -329,6 +330,7 @@ BuildRequires: lz4-devel >= 1.7
|
||||
%if 0%{with make_check}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
BuildRequires: golang-github-prometheus
|
||||
BuildRequires: jsonnet
|
||||
BuildRequires: libtool-ltdl-devel
|
||||
BuildRequires: xmlsec1
|
||||
BuildRequires: xmlsec1-devel
|
||||
@ -346,6 +348,7 @@ BuildRequires: python%{python3_pkgversion}-pyOpenSSL
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
BuildRequires: golang-github-prometheus-prometheus
|
||||
BuildRequires: jsonnet
|
||||
BuildRequires: libxmlsec1-1
|
||||
BuildRequires: libxmlsec1-nss1
|
||||
BuildRequires: libxmlsec1-openssl1
|
||||
@ -1335,6 +1338,9 @@ ${CMAKE} .. \
|
||||
-DWITH_SYSTEM_PMDK:BOOL=ON \
|
||||
%endif
|
||||
-DBOOST_J=$CEPH_SMP_NCPUS \
|
||||
%if 0%{?rhel}
|
||||
-DWITH_FMT_HEADER_ONLY:BOOL=ON \
|
||||
%endif
|
||||
-DWITH_GRAFANA=ON
|
||||
|
||||
%if %{with cmake_verbose_logging}
|
||||
@ -1990,9 +1996,8 @@ fi
|
||||
%endif
|
||||
|
||||
%postun immutable-object-cache
|
||||
test -n "$FIRST_ARG" || FIRST_ARG=$1
|
||||
%systemd_postun ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target
|
||||
if [ $FIRST_ARG -ge 1 ] ; then
|
||||
if [ $1 -ge 1 ] ; then
|
||||
# Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
|
||||
# "yes". In any case: if units are not running, do not touch them.
|
||||
SYSCONF_CEPH=%{_sysconfdir}/sysconfig/ceph
|
||||
|
@ -1,7 +1,8 @@
|
||||
ceph (16.2.5-1focal) focal; urgency=medium
|
||||
ceph (16.2.6-1) stable; urgency=medium
|
||||
|
||||
* New upstream release
|
||||
|
||||
-- Jenkins Build Slave User <jenkins-build@braggi17.front.sepia.ceph.com> Thu, 08 Jul 2021 14:16:59 +0000
|
||||
-- Ceph Release Team <ceph-maintainers@ceph.com> Thu, 16 Sep 2021 14:27:16 +0000
|
||||
|
||||
ceph (16.2.5-1) stable; urgency=medium
|
||||
|
||||
|
@ -155,7 +155,7 @@ function(do_build_boost version)
|
||||
set(boost_sha256 4eb3b8d442b426dc35346235c8733b5ae35ba431690e38c6a8263dce9fcbb402)
|
||||
string(REPLACE "." "_" boost_version_underscore ${boost_version} )
|
||||
set(boost_url
|
||||
https://dl.bintray.com/boostorg/release/${boost_version}/source/boost_${boost_version_underscore}.tar.bz2)
|
||||
https://boostorg.jfrog.io/artifactory/main/release/${boost_version}/source/boost_${boost_version_underscore}.tar.bz2)
|
||||
if(CMAKE_VERSION VERSION_GREATER 3.7)
|
||||
set(boost_url
|
||||
"${boost_url} http://downloads.sourceforge.net/project/boost/boost/${boost_version}/boost_${boost_version_underscore}.tar.bz2")
|
||||
|
@ -56,7 +56,7 @@ endif()
|
||||
CHECK_INCLUDE_FILES("valgrind/helgrind.h" HAVE_VALGRIND_HELGRIND_H)
|
||||
|
||||
include(CheckTypeSize)
|
||||
set(CMAKE_EXTRA_INCLUDE_FILES "linux/types.h")
|
||||
set(CMAKE_EXTRA_INCLUDE_FILES "linux/types.h" "netinet/in.h")
|
||||
CHECK_TYPE_SIZE(__u8 __U8)
|
||||
CHECK_TYPE_SIZE(__u16 __U16)
|
||||
CHECK_TYPE_SIZE(__u32 __U32)
|
||||
@ -65,6 +65,7 @@ CHECK_TYPE_SIZE(__s8 __S8)
|
||||
CHECK_TYPE_SIZE(__s16 __S16)
|
||||
CHECK_TYPE_SIZE(__s32 __S32)
|
||||
CHECK_TYPE_SIZE(__s64 __S64)
|
||||
CHECK_TYPE_SIZE(in_addr_t IN_ADDR_T)
|
||||
unset(CMAKE_EXTRA_INCLUDE_FILES)
|
||||
|
||||
include(CheckSymbolExists)
|
||||
|
@ -35,9 +35,27 @@ mark_as_advanced(
|
||||
fmt_VERSION_STRING)
|
||||
|
||||
if(fmt_FOUND AND NOT (TARGET fmt::fmt))
|
||||
add_library(fmt::fmt UNKNOWN IMPORTED)
|
||||
set_target_properties(fmt::fmt PROPERTIES
|
||||
add_library(fmt-header-only INTERFACE)
|
||||
set_target_properties(fmt-header-only PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${fmt_INCLUDE_DIR}"
|
||||
INTERFACE_COMPILE_DEFINITIONS FMT_HEADER_ONLY=1
|
||||
INTERFACE_COMPILE_FEATURES cxx_std_11)
|
||||
|
||||
add_library(fmt UNKNOWN IMPORTED GLOBAL)
|
||||
set_target_properties(fmt PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${fmt_INCLUDE_DIR}"
|
||||
INTERFACE_COMPILE_FEATURES cxx_std_11
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||
IMPORTED_LOCATION "${fmt_LIBRARY}")
|
||||
|
||||
if(WITH_FMT_HEADER_ONLY)
|
||||
# please note, this is different from how upstream defines fmt::fmt.
|
||||
# in order to force 3rd party libraries to link against fmt-header-only if
|
||||
# WITH_FMT_HEADER_ONLY is ON, we have to point fmt::fmt to fmt-header-only
|
||||
# in this case.
|
||||
add_library(fmt::fmt ALIAS fmt-header-only)
|
||||
else()
|
||||
add_library(fmt::fmt ALIAS fmt)
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
@ -12,19 +12,18 @@ Build-Depends: automake,
|
||||
cmake (>= 3.10.2),
|
||||
cpio,
|
||||
cryptsetup-bin | cryptsetup,
|
||||
cython,
|
||||
cython3,
|
||||
debhelper (>= 9),
|
||||
debhelper (>= 10),
|
||||
default-jdk,
|
||||
dh-exec,
|
||||
dh-python,
|
||||
dh-systemd,
|
||||
# Jaeger flex,
|
||||
git,
|
||||
gperf,
|
||||
g++ (>= 7),
|
||||
javahelper,
|
||||
# Make-Check jq,
|
||||
jq <pkg.ceph.check>,
|
||||
jsonnet <pkg.ceph.check>,
|
||||
junit4,
|
||||
libaio-dev,
|
||||
libbabeltrace-ctf-dev,
|
||||
@ -74,7 +73,6 @@ Build-Depends: automake,
|
||||
# Make-Check libxmlsec1-openssl,
|
||||
# Make-Check libxmlsec1-dev,
|
||||
# Crimson libyaml-cpp-dev,
|
||||
lsb-release,
|
||||
# Jaeger nlohmann-json-dev | nlohmann-json3-dev,
|
||||
parted,
|
||||
patch,
|
||||
|
@ -254,6 +254,7 @@ class CephMgrCommands(Directive):
|
||||
'jsonpatch',
|
||||
'rook.rook_client',
|
||||
'rook.rook_client.ceph',
|
||||
'rook.rook_client._helper',
|
||||
'cherrypy=3.2.3']
|
||||
|
||||
# make restful happy
|
||||
|
@ -76,6 +76,9 @@ and ``ceph-disk`` is fully disabled. Encryption is fully supported.
|
||||
lvm/systemd
|
||||
lvm/list
|
||||
lvm/zap
|
||||
lvm/migrate
|
||||
lvm/newdb
|
||||
lvm/newwal
|
||||
simple/index
|
||||
simple/activate
|
||||
simple/scan
|
||||
|
@ -15,6 +15,12 @@ Implements the functionality needed to deploy OSDs from the ``lvm`` subcommand:
|
||||
|
||||
* :ref:`ceph-volume-lvm-list`
|
||||
|
||||
* :ref:`ceph-volume-lvm-migrate`
|
||||
|
||||
* :ref:`ceph-volume-lvm-newdb`
|
||||
|
||||
* :ref:`ceph-volume-lvm-newwal`
|
||||
|
||||
.. not yet implemented
|
||||
.. * :ref:`ceph-volume-lvm-scan`
|
||||
|
||||
|
47
ceph/doc/ceph-volume/lvm/migrate.rst
Normal file
47
ceph/doc/ceph-volume/lvm/migrate.rst
Normal file
@ -0,0 +1,47 @@
|
||||
.. _ceph-volume-lvm-migrate:
|
||||
|
||||
``migrate``
|
||||
===========
|
||||
|
||||
Moves BlueFS data from source volume(s) to the target one, source volumes
|
||||
(except the main, i.e. data or block one) are removed on success.
|
||||
|
||||
LVM volumes are permitted for Target only, both already attached or new one.
|
||||
|
||||
In the latter case it is attached to the OSD replacing one of the source
|
||||
devices.
|
||||
|
||||
Following replacement rules apply (in the order of precedence, stop
|
||||
on the first match):
|
||||
|
||||
- if source list has DB volume - target device replaces it.
|
||||
- if source list has WAL volume - target device replaces it.
|
||||
- if source list has slow volume only - operation is not permitted,
|
||||
requires explicit allocation via new-db/new-wal command.
|
||||
|
||||
Moves BlueFS data from main device to LV already attached as DB::
|
||||
|
||||
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data --target vgname/db
|
||||
|
||||
Moves BlueFS data from shared main device to LV which will be attached as a
|
||||
new DB::
|
||||
|
||||
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data --target vgname/new_db
|
||||
|
||||
Moves BlueFS data from DB device to new LV, DB is replaced::
|
||||
|
||||
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from db --target vgname/new_db
|
||||
|
||||
Moves BlueFS data from main and DB devices to new LV, DB is replaced::
|
||||
|
||||
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data db --target vgname/new_db
|
||||
|
||||
Moves BlueFS data from main, DB and WAL devices to new LV, WAL is removed and
|
||||
DB is replaced::
|
||||
|
||||
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data db wal --target vgname/new_db
|
||||
|
||||
Moves BlueFS data from main, DB and WAL devices to main device, WAL and DB are
|
||||
removed::
|
||||
|
||||
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from db wal --target vgname/data
|
11
ceph/doc/ceph-volume/lvm/newdb.rst
Normal file
11
ceph/doc/ceph-volume/lvm/newdb.rst
Normal file
@ -0,0 +1,11 @@
|
||||
.. _ceph-volume-lvm-newdb:
|
||||
|
||||
``new-db``
|
||||
===========
|
||||
|
||||
Attaches the given logical volume to OSD as a DB.
|
||||
Logical volume name format is vg/lv. Fails if OSD has already got attached DB.
|
||||
|
||||
Attach vgname/lvname as a DB volume to OSD 1::
|
||||
|
||||
ceph-volume lvm new-db --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_db
|
11
ceph/doc/ceph-volume/lvm/newwal.rst
Normal file
11
ceph/doc/ceph-volume/lvm/newwal.rst
Normal file
@ -0,0 +1,11 @@
|
||||
.. _ceph-volume-lvm-newwal:
|
||||
|
||||
``new-wal``
|
||||
===========
|
||||
|
||||
Attaches the given logical volume to the given OSD as a WAL volume.
|
||||
Logical volume format is vg/lv. Fails if OSD has already got attached DB.
|
||||
|
||||
Attach vgname/lvname as a WAL volume to OSD 1::
|
||||
|
||||
ceph-volume lvm new-wal --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_wal
|
@ -1,40 +1,45 @@
|
||||
=======================
|
||||
Basic Ceph Client Setup
|
||||
=======================
|
||||
Client machines need some basic configuration in order to interact with
|
||||
a cluster. This document describes how to configure a client machine
|
||||
for cluster interaction.
|
||||
Client machines require some basic configuration to interact with
|
||||
Ceph clusters. This section describes how to configure a client machine
|
||||
so that it can interact with a Ceph cluster.
|
||||
|
||||
.. note:: Most client machines only need the `ceph-common` package and
|
||||
its dependencies installed. That will supply the basic `ceph`
|
||||
and `rados` commands, as well as other commands like
|
||||
`mount.ceph` and `rbd`.
|
||||
.. note::
|
||||
Most client machines need to install only the `ceph-common` package
|
||||
and its dependencies. Such a setup supplies the basic `ceph` and
|
||||
`rados` commands, as well as other commands including `mount.ceph`
|
||||
and `rbd`.
|
||||
|
||||
Config File Setup
|
||||
=================
|
||||
Client machines can generally get away with a smaller config file than
|
||||
a full-fledged cluster member. To generate a minimal config file, log
|
||||
into a host that is already configured as a client or running a cluster
|
||||
daemon, and then run
|
||||
Client machines usually require smaller configuration files (here
|
||||
sometimes called "config files") than do full-fledged cluster members.
|
||||
To generate a minimal config file, log into a host that has been
|
||||
configured as a client or that is running a cluster daemon, and then run the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config generate-minimal-conf
|
||||
ceph config generate-minimal-conf
|
||||
|
||||
This will generate a minimal config file that will tell the client how to
|
||||
reach the Ceph Monitors. The contents of this file should typically be
|
||||
installed in `/etc/ceph/ceph.conf`.
|
||||
This command generates a minimal config file that tells the client how
|
||||
to reach the Ceph monitors. The contents of this file should usually
|
||||
be installed in ``/etc/ceph/ceph.conf``.
|
||||
|
||||
Keyring Setup
|
||||
=============
|
||||
Most Ceph clusters are run with authentication enabled, and the client will
|
||||
need keys in order to communicate with cluster machines. To generate a
|
||||
keyring file with credentials for `client.fs`, log into an extant cluster
|
||||
member and run
|
||||
Most Ceph clusters run with authentication enabled. This means that
|
||||
the client needs keys in order to communicate with the machines in the
|
||||
cluster. To generate a keyring file with credentials for `client.fs`,
|
||||
log into an running cluster member and run the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph auth get-or-create client.fs
|
||||
ceph auth get-or-create client.fs
|
||||
|
||||
The resulting output should be put into a keyring file, typically
|
||||
`/etc/ceph/ceph.keyring`.
|
||||
The resulting output is directed into a keyring file, typically
|
||||
``/etc/ceph/ceph.keyring``.
|
||||
|
||||
To gain a broader understanding of client keyring distribution and administration, you should read :ref:`client_keyrings_and_configs`.
|
||||
|
||||
To see an example that explains how to distribute ``ceph.conf`` configuration files to hosts that are tagged with the ``bare_config`` label, you should read the section called "Distributing ceph.conf to hosts tagged with bare_config" in the section called :ref:`etc_ceph_conf_distribution`.
|
||||
|
@ -64,48 +64,47 @@ To add each new host to the cluster, perform two steps:
|
||||
Removing Hosts
|
||||
==============
|
||||
|
||||
If the node that want you to remove is running OSDs, make sure you remove the OSDs from the node.
|
||||
A host can safely be removed from a the cluster once all daemons are removed from it.
|
||||
|
||||
To remove a host from a cluster, do the following:
|
||||
|
||||
For all Ceph service types, except for ``node-exporter`` and ``crash``, remove
|
||||
the host from the placement specification file (for example, cluster.yml).
|
||||
For example, if you are removing the host named host2, remove all occurrences of
|
||||
``- host2`` from all ``placement:`` sections.
|
||||
|
||||
Update:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
service_type: rgw
|
||||
placement:
|
||||
hosts:
|
||||
- host1
|
||||
- host2
|
||||
|
||||
To:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
|
||||
service_type: rgw
|
||||
placement:
|
||||
hosts:
|
||||
- host1
|
||||
|
||||
Remove the host from cephadm's environment:
|
||||
To drain all daemons from a host do the following:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host rm host2
|
||||
ceph orch host drain *<host>*
|
||||
|
||||
The '_no_schedule' label will be applied to the host. See :ref:`cephadm-special-host-labels`
|
||||
|
||||
If the host is running ``node-exporter`` and crash services, remove them by running
|
||||
the following command on the host:
|
||||
All osds on the host will be scheduled to be removed. You can check osd removal progress with the following:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm rm-daemon --fsid CLUSTER_ID --name SERVICE_NAME
|
||||
ceph orch osd rm status
|
||||
|
||||
see :ref:`cephadm-osd-removal` for more details about osd removal
|
||||
|
||||
You can check if there are no deamons left on the host with the following:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch ps <host>
|
||||
|
||||
Once all daemons are removed you can remove the host with the following:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host rm <host>
|
||||
|
||||
Offline host removal
|
||||
--------------------
|
||||
|
||||
If a host is offline and can not be recovered it can still be removed from the cluster with the following:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host rm <host> --offline --force
|
||||
|
||||
This can potentially cause data loss as osds will be forcefully purged from the cluster by calling ``osd purge-actual`` for each osd.
|
||||
Service specs that still contain this host should be manually updated.
|
||||
|
||||
.. _orchestrator-host-labels:
|
||||
|
||||
|
@ -173,6 +173,11 @@ immediately to know more about ``cephadm bootstrap``, read the list below.
|
||||
Also, you can run ``cephadm bootstrap -h`` to see all of ``cephadm``'s
|
||||
available options.
|
||||
|
||||
* By default, Ceph daemons send their log output to stdout/stderr, which is picked
|
||||
up by the container runtime (docker or podman) and (on most systems) sent to
|
||||
journald. If you want Ceph to write traditional log files to ``/var/log/ceph/$fsid``,
|
||||
use ``--log-to-file`` option during bootstrap.
|
||||
|
||||
* Larger Ceph clusters perform better when (external to the Ceph cluster)
|
||||
public network traffic is separated from (internal to the Ceph cluster)
|
||||
cluster traffic. The internal cluster traffic handles replication, recovery,
|
||||
|
@ -28,6 +28,10 @@ manual administration of the ceph monitor daemons is not necessary.
|
||||
``cephadm`` will automatically add up to five monitors to the subnet, as
|
||||
needed, as new hosts are added to the cluster.
|
||||
|
||||
By default, cephadm will deploy 5 daemons on arbitrary hosts. See
|
||||
:ref:`orchestrator-cli-placement-spec` for details of specifying
|
||||
the placement of daemons.
|
||||
|
||||
Designating a Particular Subnet for Monitors
|
||||
--------------------------------------------
|
||||
|
||||
@ -48,67 +52,18 @@ format (e.g., ``10.1.2.0/24``):
|
||||
Cephadm deploys new monitor daemons only on hosts that have IP addresses in
|
||||
the designated subnet.
|
||||
|
||||
Changing the number of monitors from the default
|
||||
------------------------------------------------
|
||||
|
||||
If you want to adjust the default of 5 monitors, run this command:
|
||||
You can also specify two public networks by using a list of networks:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply mon *<number-of-monitors>*
|
||||
|
||||
Deploying monitors only to specific hosts
|
||||
-----------------------------------------
|
||||
|
||||
To deploy monitors on a specific set of hosts, run this command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply mon *<host1,host2,host3,...>*
|
||||
|
||||
Be sure to include the first (bootstrap) host in this list.
|
||||
|
||||
Using Host Labels
|
||||
-----------------
|
||||
|
||||
You can control which hosts the monitors run on by making use of host labels.
|
||||
To set the ``mon`` label to the appropriate hosts, run this command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host label add *<hostname>* mon
|
||||
|
||||
To view the current hosts and labels, run this command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host ls
|
||||
ceph config set mon public_network *<mon-cidr-network1>,<mon-cidr-network2>*
|
||||
|
||||
For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host label add host1 mon
|
||||
ceph orch host label add host2 mon
|
||||
ceph orch host label add host3 mon
|
||||
ceph orch host ls
|
||||
ceph config set mon public_network 10.1.2.0/24,192.168.0.1/24
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
HOST ADDR LABELS STATUS
|
||||
host1 mon
|
||||
host2 mon
|
||||
host3 mon
|
||||
host4
|
||||
host5
|
||||
|
||||
Tell cephadm to deploy monitors based on the label by running this command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply mon label:mon
|
||||
|
||||
See also :ref:`host labels <orchestrator-host-labels>`.
|
||||
|
||||
Deploying Monitors on a Particular Network
|
||||
------------------------------------------
|
||||
@ -125,7 +80,7 @@ run this command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch daemon add mon *<host1:ip-or-network1> [<host1:ip-or-network-2>...]*
|
||||
ceph orch daemon add mon *<host1:ip-or-network1>
|
||||
|
||||
For example, to deploy a second monitor on ``newhost1`` using an IP
|
||||
address ``10.1.2.123`` and a third monitor on ``newhost2`` in
|
||||
@ -137,52 +92,80 @@ run this command:
|
||||
ceph orch daemon add mon newhost1:10.1.2.123
|
||||
ceph orch daemon add mon newhost2:10.1.2.0/24
|
||||
|
||||
.. note::
|
||||
The **apply** command can be confusing. For this reason, we recommend using
|
||||
YAML specifications.
|
||||
Now, enable automatic placement of Daemons
|
||||
|
||||
Each ``ceph orch apply mon`` command supersedes the one before it.
|
||||
This means that you must use the proper comma-separated list-based
|
||||
syntax when you want to apply monitors to more than one host.
|
||||
If you do not use the proper syntax, you will clobber your work
|
||||
as you go.
|
||||
.. prompt:: bash #
|
||||
|
||||
For example:
|
||||
ceph orch apply mon --placement="newhost1,newhost2,newhost3" --dry-run
|
||||
|
||||
.. prompt:: bash #
|
||||
See :ref:`orchestrator-cli-placement-spec` for details of specifying
|
||||
the placement of daemons.
|
||||
|
||||
ceph orch apply mon host1
|
||||
ceph orch apply mon host2
|
||||
ceph orch apply mon host3
|
||||
Finally apply this new placement by dropping ``--dry-run``
|
||||
|
||||
This results in only one host having a monitor applied to it: host 3.
|
||||
.. prompt:: bash #
|
||||
|
||||
(The first command creates a monitor on host1. Then the second command
|
||||
clobbers the monitor on host1 and creates a monitor on host2. Then the
|
||||
third command clobbers the monitor on host2 and creates a monitor on
|
||||
host3. In this scenario, at this point, there is a monitor ONLY on
|
||||
host3.)
|
||||
ceph orch apply mon --placement="newhost1,newhost2,newhost3"
|
||||
|
||||
To make certain that a monitor is applied to each of these three hosts,
|
||||
run a command like this:
|
||||
|
||||
.. prompt:: bash #
|
||||
Moving Monitors to a Different Network
|
||||
--------------------------------------
|
||||
|
||||
ceph orch apply mon "host1,host2,host3"
|
||||
To move Monitors to a new network, deploy new monitors on the new network and
|
||||
subsequently remove monitors from the old network. It is not advised to
|
||||
modify and inject the ``monmap`` manually.
|
||||
|
||||
There is another way to apply monitors to multiple hosts: a ``yaml`` file
|
||||
can be used. Instead of using the "ceph orch apply mon" commands, run a
|
||||
command of this form:
|
||||
First, disable the automated placement of daemons:
|
||||
|
||||
.. prompt:: bash #
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply -i file.yaml
|
||||
ceph orch apply mon --unmanaged
|
||||
|
||||
Here is a sample **file.yaml** file::
|
||||
To deploy each additional monitor:
|
||||
|
||||
service_type: mon
|
||||
placement:
|
||||
hosts:
|
||||
- host1
|
||||
- host2
|
||||
- host3
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch daemon add mon *<newhost1:ip-or-network1>*
|
||||
|
||||
For example, to deploy a second monitor on ``newhost1`` using an IP
|
||||
address ``10.1.2.123`` and a third monitor on ``newhost2`` in
|
||||
network ``10.1.2.0/24``, run the following commands:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply mon --unmanaged
|
||||
ceph orch daemon add mon newhost1:10.1.2.123
|
||||
ceph orch daemon add mon newhost2:10.1.2.0/24
|
||||
|
||||
Subsequently remove monitors from the old network:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch daemon rm *mon.<oldhost1>*
|
||||
|
||||
Update the ``public_network``:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mon public_network *<mon-cidr-network>*
|
||||
|
||||
For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mon public_network 10.1.2.0/24
|
||||
|
||||
Now, enable automatic placement of Daemons
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply mon --placement="newhost1,newhost2,newhost3" --dry-run
|
||||
|
||||
See :ref:`orchestrator-cli-placement-spec` for details of specifying
|
||||
the placement of daemons.
|
||||
|
||||
Finally apply this new placement by dropping ``--dry-run``
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply mon --placement="newhost1,newhost2,newhost3"
|
||||
|
@ -52,12 +52,6 @@ cluster (which had no monitoring stack) to cephadm management.)
|
||||
To set up monitoring on a Ceph cluster that has no monitoring, follow the
|
||||
steps below:
|
||||
|
||||
#. Enable the Prometheus module in the ceph-mgr daemon. This exposes the internal Ceph metrics so that Prometheus can scrape them:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph mgr module enable prometheus
|
||||
|
||||
#. Deploy a node-exporter service on every node of the cluster. The node-exporter provides host-level metrics like CPU and memory utilization:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
@ -2,28 +2,40 @@
|
||||
Cephadm Operations
|
||||
==================
|
||||
|
||||
.. _watching_cephadm_logs:
|
||||
|
||||
Watching cephadm log messages
|
||||
=============================
|
||||
|
||||
Cephadm logs to the ``cephadm`` cluster log channel, meaning you can
|
||||
monitor progress in realtime with::
|
||||
Cephadm writes logs to the ``cephadm`` cluster log channel. You can
|
||||
monitor Ceph's activity in real time by reading the logs as they fill
|
||||
up. Run the following command to see the logs in real time:
|
||||
|
||||
# ceph -W cephadm
|
||||
.. prompt:: bash #
|
||||
|
||||
By default it will show info-level events and above. To see
|
||||
debug-level messages too::
|
||||
ceph -W cephadm
|
||||
|
||||
# ceph config set mgr mgr/cephadm/log_to_cluster_level debug
|
||||
# ceph -W cephadm --watch-debug
|
||||
By default, this command shows info-level events and above. To see
|
||||
debug-level messages as well as info-level events, run the following
|
||||
commands:
|
||||
|
||||
Be careful: the debug messages are very verbose!
|
||||
.. prompt:: bash #
|
||||
|
||||
You can see recent events with::
|
||||
ceph config set mgr mgr/cephadm/log_to_cluster_level debug
|
||||
ceph -W cephadm --watch-debug
|
||||
|
||||
# ceph log last cephadm
|
||||
.. warning::
|
||||
|
||||
The debug messages are very verbose!
|
||||
|
||||
You can see recent events by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph log last cephadm
|
||||
|
||||
These events are also logged to the ``ceph.cephadm.log`` file on
|
||||
monitor hosts and to the monitor daemons' stderr.
|
||||
monitor hosts as well as to the monitor daemons' stderr.
|
||||
|
||||
|
||||
.. _cephadm-logs:
|
||||
@ -31,45 +43,68 @@ monitor hosts and to the monitor daemons' stderr.
|
||||
Ceph daemon logs
|
||||
================
|
||||
|
||||
Logging to stdout
|
||||
-----------------
|
||||
Logging to journald
|
||||
-------------------
|
||||
|
||||
Traditionally, Ceph daemons have logged to ``/var/log/ceph``. By
|
||||
default, cephadm daemons log to stderr and the logs are
|
||||
captured by the container runtime environment. For most systems, by
|
||||
default, these logs are sent to journald and accessible via
|
||||
``journalctl``.
|
||||
Ceph daemons traditionally write logs to ``/var/log/ceph``. Ceph daemons log to
|
||||
journald by default and Ceph logs are captured by the container runtime
|
||||
environment. They are accessible via ``journalctl``.
|
||||
|
||||
.. note:: Prior to Quincy, ceph daemons logged to stderr.
|
||||
|
||||
Example of logging to journald
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
For example, to view the logs for the daemon ``mon.foo`` for a cluster
|
||||
with ID ``5c5a50ae-272a-455d-99e9-32c6a013e694``, the command would be
|
||||
something like::
|
||||
something like:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
journalctl -u ceph-5c5a50ae-272a-455d-99e9-32c6a013e694@mon.foo
|
||||
|
||||
This works well for normal operations when logging levels are low.
|
||||
|
||||
To disable logging to stderr::
|
||||
|
||||
ceph config set global log_to_stderr false
|
||||
ceph config set global mon_cluster_log_to_stderr false
|
||||
|
||||
Logging to files
|
||||
----------------
|
||||
|
||||
You can also configure Ceph daemons to log to files instead of stderr,
|
||||
just like they have in the past. When logging to files, Ceph logs appear
|
||||
in ``/var/log/ceph/<cluster-fsid>``.
|
||||
You can also configure Ceph daemons to log to files instead of to
|
||||
journald if you prefer logs to appear in files (as they did in earlier,
|
||||
pre-cephadm, pre-Octopus versions of Ceph). When Ceph logs to files,
|
||||
the logs appear in ``/var/log/ceph/<cluster-fsid>``. If you choose to
|
||||
configure Ceph to log to files instead of to journald, remember to
|
||||
configure Ceph so that it will not log to journald (the commands for
|
||||
this are covered below).
|
||||
|
||||
To enable logging to files::
|
||||
Enabling logging to files
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
To enable logging to files, run the following commands:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set global log_to_file true
|
||||
ceph config set global mon_cluster_log_to_file true
|
||||
|
||||
We recommend disabling logging to stderr (see above) or else everything
|
||||
will be logged twice::
|
||||
Disabling logging to journald
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
If you choose to log to files, we recommend disabling logging to journald or else
|
||||
everything will be logged twice. Run the following commands to disable logging
|
||||
to stderr:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set global log_to_stderr false
|
||||
ceph config set global mon_cluster_log_to_stderr false
|
||||
ceph config set global log_to_journald false
|
||||
ceph config set global mon_cluster_log_to_journald false
|
||||
|
||||
.. note:: You can change the default by passing --log-to-file during
|
||||
bootstrapping a new cluster.
|
||||
|
||||
Modifying the log retention schedule
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
By default, cephadm sets up log rotation on each host to rotate these
|
||||
files. You can configure the logging retention schedule by modifying
|
||||
@ -79,12 +114,13 @@ files. You can configure the logging retention schedule by modifying
|
||||
Data location
|
||||
=============
|
||||
|
||||
Cephadm daemon data and logs in slightly different locations than older
|
||||
versions of ceph:
|
||||
Cephadm stores daemon data and logs in different locations than did
|
||||
older, pre-cephadm (pre Octopus) versions of ceph:
|
||||
|
||||
* ``/var/log/ceph/<cluster-fsid>`` contains all cluster logs. Note
|
||||
that by default cephadm logs via stderr and the container runtime,
|
||||
so these logs are normally not present.
|
||||
* ``/var/log/ceph/<cluster-fsid>`` contains all cluster logs. By
|
||||
default, cephadm logs via stderr and the container runtime. These
|
||||
logs will not exist unless you have enabled logging to files as
|
||||
described in `cephadm-logs`_.
|
||||
* ``/var/lib/ceph/<cluster-fsid>`` contains all cluster daemon data
|
||||
(besides logs).
|
||||
* ``/var/lib/ceph/<cluster-fsid>/<daemon-name>`` contains all data for
|
||||
@ -98,58 +134,69 @@ versions of ceph:
|
||||
Disk usage
|
||||
----------
|
||||
|
||||
Because a few Ceph daemons may store a significant amount of data in
|
||||
``/var/lib/ceph`` (notably, the monitors and prometheus), we recommend
|
||||
moving this directory to its own disk, partition, or logical volume so
|
||||
that it does not fill up the root file system.
|
||||
Because a few Ceph daemons (notably, the monitors and prometheus) store a
|
||||
large amount of data in ``/var/lib/ceph`` , we recommend moving this
|
||||
directory to its own disk, partition, or logical volume so that it does not
|
||||
fill up the root file system.
|
||||
|
||||
|
||||
Health checks
|
||||
=============
|
||||
The cephadm module provides additional healthchecks to supplement the default healthchecks
|
||||
provided by the Cluster. These additional healthchecks fall into two categories;
|
||||
The cephadm module provides additional health checks to supplement the
|
||||
default health checks provided by the Cluster. These additional health
|
||||
checks fall into two categories:
|
||||
|
||||
- **cephadm operations**: Healthchecks in this category are always executed when the cephadm module is active.
|
||||
- **cluster configuration**: These healthchecks are *optional*, and focus on the configuration of the hosts in
|
||||
the cluster
|
||||
- **cephadm operations**: Health checks in this category are always
|
||||
executed when the cephadm module is active.
|
||||
- **cluster configuration**: These health checks are *optional*, and
|
||||
focus on the configuration of the hosts in the cluster.
|
||||
|
||||
CEPHADM Operations
|
||||
------------------
|
||||
|
||||
CEPHADM_PAUSED
|
||||
^^^^^^^^^^^^^^
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
Cephadm background work has been paused with ``ceph orch pause``. Cephadm
|
||||
continues to perform passive monitoring activities (like checking
|
||||
host and daemon status), but it will not make any changes (like deploying
|
||||
or removing daemons).
|
||||
This indicates that cephadm background work has been paused with
|
||||
``ceph orch pause``. Cephadm continues to perform passive monitoring
|
||||
activities (like checking host and daemon status), but it will not
|
||||
make any changes (like deploying or removing daemons).
|
||||
|
||||
Resume cephadm work with::
|
||||
Resume cephadm work by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch resume
|
||||
|
||||
.. _cephadm-stray-host:
|
||||
|
||||
CEPHADM_STRAY_HOST
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
One or more hosts have running Ceph daemons but are not registered as
|
||||
hosts managed by *cephadm*. This means that those services cannot
|
||||
currently be managed by cephadm (e.g., restarted, upgraded, included
|
||||
in `ceph orch ps`).
|
||||
This indicates that one or more hosts have Ceph daemons that are
|
||||
running, but are not registered as hosts managed by *cephadm*. This
|
||||
means that those services cannot currently be managed by cephadm
|
||||
(e.g., restarted, upgraded, included in `ceph orch ps`).
|
||||
|
||||
You can manage the host(s) with::
|
||||
You can manage the host(s) by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host add *<hostname>*
|
||||
|
||||
Note that you may need to configure SSH access to the remote host
|
||||
before this will work.
|
||||
.. note::
|
||||
|
||||
You might need to configure SSH access to the remote host
|
||||
before this will work.
|
||||
|
||||
Alternatively, you can manually connect to the host and ensure that
|
||||
services on that host are removed or migrated to a host that is
|
||||
managed by *cephadm*.
|
||||
|
||||
You can also disable this warning entirely with::
|
||||
This warning can be disabled entirely by running the following
|
||||
command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/warn_on_stray_hosts false
|
||||
|
||||
@ -157,7 +204,7 @@ See :ref:`cephadm-fqdn` for more information about host names and
|
||||
domain names.
|
||||
|
||||
CEPHADM_STRAY_DAEMON
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
One or more Ceph daemons are running but not are not managed by
|
||||
*cephadm*. This may be because they were deployed using a different
|
||||
@ -170,12 +217,14 @@ by cephadm; see :ref:`cephadm-adoption`. For stateless daemons, it is
|
||||
usually easiest to provision a new daemon with the ``ceph orch apply``
|
||||
command and then stop the unmanaged daemon.
|
||||
|
||||
This warning can be disabled entirely with::
|
||||
This warning can be disabled entirely by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/warn_on_stray_daemons false
|
||||
|
||||
CEPHADM_HOST_CHECK_FAILED
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
One or more hosts have failed the basic cephadm host check, which verifies
|
||||
that (1) the host is reachable and cephadm can be executed there, and (2)
|
||||
@ -183,58 +232,80 @@ that the host satisfies basic prerequisites, like a working container
|
||||
runtime (podman or docker) and working time synchronization.
|
||||
If this test fails, cephadm will no be able to manage services on that host.
|
||||
|
||||
You can manually run this check with::
|
||||
You can manually run this check by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph cephadm check-host *<hostname>*
|
||||
|
||||
You can remove a broken host from management with::
|
||||
You can remove a broken host from management by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host rm *<hostname>*
|
||||
|
||||
You can disable this health warning with::
|
||||
You can disable this health warning by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/warn_on_failed_host_check false
|
||||
|
||||
Cluster Configuration Checks
|
||||
----------------------------
|
||||
Cephadm periodically scans each of the hosts in the cluster, to understand the state
|
||||
of the OS, disks, NICs etc. These facts can then be analysed for consistency across the hosts
|
||||
in the cluster to identify any configuration anomalies.
|
||||
Cephadm periodically scans each of the hosts in the cluster in order
|
||||
to understand the state of the OS, disks, NICs etc. These facts can
|
||||
then be analysed for consistency across the hosts in the cluster to
|
||||
identify any configuration anomalies.
|
||||
|
||||
The configuration checks are an **optional** feature, enabled by the following command
|
||||
::
|
||||
Enabling Cluster Configuration Checks
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The configuration checks are an **optional** feature, and are enabled
|
||||
by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/config_checks_enabled true
|
||||
|
||||
The configuration checks are triggered after each host scan (1m). The cephadm log entries will
|
||||
show the current state and outcome of the configuration checks as follows;
|
||||
States Returned by Cluster Configuration Checks
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Disabled state (config_checks_enabled false)
|
||||
::
|
||||
The configuration checks are triggered after each host scan (1m). The
|
||||
cephadm log entries will show the current state and outcome of the
|
||||
configuration checks as follows:
|
||||
|
||||
Disabled state (config_checks_enabled false):
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
ALL cephadm checks are disabled, use 'ceph config set mgr mgr/cephadm/config_checks_enabled true' to enable
|
||||
|
||||
Enabled state (config_checks_enabled true)
|
||||
::
|
||||
Enabled state (config_checks_enabled true):
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
CEPHADM 8/8 checks enabled and executed (0 bypassed, 0 disabled). No issues detected
|
||||
|
||||
The configuration checks themselves are managed through several cephadm sub-commands.
|
||||
Managing Configuration Checks (subcommands)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
To determine whether the configuration checks are enabled, you can use the following command
|
||||
::
|
||||
The configuration checks themselves are managed through several cephadm subcommands.
|
||||
|
||||
To determine whether the configuration checks are enabled, run the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph cephadm config-check status
|
||||
|
||||
This command will return the status of the configuration checker as either "Enabled" or "Disabled".
|
||||
This command returns the status of the configuration checker as either "Enabled" or "Disabled".
|
||||
|
||||
|
||||
Listing all the configuration checks and their current state
|
||||
::
|
||||
To list all the configuration checks and their current states, run the following command:
|
||||
|
||||
ceph cephadm config-check ls
|
||||
.. code-block:: console
|
||||
|
||||
# ceph cephadm config-check ls
|
||||
|
||||
e.g.
|
||||
NAME HEALTHCHECK STATUS DESCRIPTION
|
||||
kernel_security CEPHADM_CHECK_KERNEL_LSM enabled checks SELINUX/Apparmor profiles are consistent across cluster hosts
|
||||
os_subscription CEPHADM_CHECK_SUBSCRIPTION enabled checks subscription states are consistent for all cluster hosts
|
||||
@ -245,128 +316,191 @@ Listing all the configuration checks and their current state
|
||||
ceph_release CEPHADM_CHECK_CEPH_RELEASE enabled check for Ceph version consistency - ceph daemons should be on the same release (unless upgrade is active)
|
||||
kernel_version CEPHADM_CHECK_KERNEL_VERSION enabled checks that the MAJ.MIN of the kernel on Ceph hosts is consistent
|
||||
|
||||
The name of each configuration check, can then be used to enable or disable a specific check.
|
||||
::
|
||||
The name of each configuration check can be used to enable or disable a specific check by running a command of the following form:
|
||||
:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph cephadm config-check disable <name>
|
||||
|
||||
eg.
|
||||
For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph cephadm config-check disable kernel_security
|
||||
|
||||
CEPHADM_CHECK_KERNEL_LSM
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Each host within the cluster is expected to operate within the same Linux Security Module (LSM) state. For example,
|
||||
if the majority of the hosts are running with SELINUX in enforcing mode, any host not running in this mode
|
||||
would be flagged as an anomaly and a healtcheck (WARNING) state raised.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Each host within the cluster is expected to operate within the same Linux
|
||||
Security Module (LSM) state. For example, if the majority of the hosts are
|
||||
running with SELINUX in enforcing mode, any host not running in this mode is
|
||||
flagged as an anomaly and a healtcheck (WARNING) state raised.
|
||||
|
||||
CEPHADM_CHECK_SUBSCRIPTION
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
This check relates to the status of vendor subscription. This check is only performed for hosts using RHEL, but helps
|
||||
to confirm that all your hosts are covered by an active subscription so patches and updates
|
||||
are available.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
This check relates to the status of vendor subscription. This check is
|
||||
performed only for hosts using RHEL, but helps to confirm that all hosts are
|
||||
covered by an active subscription, which ensures that patches and updates are
|
||||
available.
|
||||
|
||||
CEPHADM_CHECK_PUBLIC_MEMBERSHIP
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
All members of the cluster should have NICs configured on at least one of the public network subnets. Hosts
|
||||
that are not on the public network will rely on routing which may affect performance
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
All members of the cluster should have NICs configured on at least one of the
|
||||
public network subnets. Hosts that are not on the public network will rely on
|
||||
routing, which may affect performance.
|
||||
|
||||
CEPHADM_CHECK_MTU
|
||||
^^^^^^^^^^^^^^^^^
|
||||
The MTU of the NICs on OSDs can be a key factor in consistent performance. This check examines hosts
|
||||
that are running OSD services to ensure that the MTU is configured consistently within the cluster. This is
|
||||
determined by establishing the MTU setting that the majority of hosts are using, with any anomalies being
|
||||
resulting in a Ceph healthcheck.
|
||||
~~~~~~~~~~~~~~~~~
|
||||
The MTU of the NICs on OSDs can be a key factor in consistent performance. This
|
||||
check examines hosts that are running OSD services to ensure that the MTU is
|
||||
configured consistently within the cluster. This is determined by establishing
|
||||
the MTU setting that the majority of hosts is using. Any anomalies result in a
|
||||
Ceph health check.
|
||||
|
||||
CEPHADM_CHECK_LINKSPEED
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Similar to the MTU check, linkspeed consistency is also a factor in consistent cluster performance.
|
||||
This check determines the linkspeed shared by the majority of "OSD hosts", resulting in a healthcheck for
|
||||
any hosts that are set at a lower linkspeed rate.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
This check is similar to the MTU check. Linkspeed consistency is a factor in
|
||||
consistent cluster performance, just as the MTU of the NICs on the OSDs is.
|
||||
This check determines the linkspeed shared by the majority of OSD hosts, and a
|
||||
health check is run for any hosts that are set at a lower linkspeed rate.
|
||||
|
||||
CEPHADM_CHECK_NETWORK_MISSING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
The public_network and cluster_network settings support subnet definitions for IPv4 and IPv6. If these
|
||||
settings are not found on any host in the cluster a healthcheck is raised.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
The `public_network` and `cluster_network` settings support subnet definitions
|
||||
for IPv4 and IPv6. If these settings are not found on any host in the cluster,
|
||||
a health check is raised.
|
||||
|
||||
CEPHADM_CHECK_CEPH_RELEASE
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Under normal operations, the ceph cluster should be running daemons under the same ceph release (i.e. all
|
||||
pacific). This check looks at the active release for each daemon, and reports any anomalies as a
|
||||
healthcheck. *This check is bypassed if an upgrade process is active within the cluster.*
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Under normal operations, the Ceph cluster runs daemons under the same ceph
|
||||
release (that is, the Ceph cluster runs all daemons under (for example)
|
||||
Octopus). This check determines the active release for each daemon, and
|
||||
reports any anomalies as a healthcheck. *This check is bypassed if an upgrade
|
||||
process is active within the cluster.*
|
||||
|
||||
CEPHADM_CHECK_KERNEL_VERSION
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
The OS kernel version (maj.min) is checked for consistency across the hosts. Once again, the
|
||||
majority of the hosts is used as the basis of identifying anomalies.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
The OS kernel version (maj.min) is checked for consistency across the hosts.
|
||||
The kernel version of the majority of the hosts is used as the basis for
|
||||
identifying anomalies.
|
||||
|
||||
.. _client_keyrings_and_configs:
|
||||
|
||||
Client keyrings and configs
|
||||
===========================
|
||||
|
||||
Cephadm can distribute copies of the ``ceph.conf`` and client keyring
|
||||
files to hosts. For example, it is usually a good idea to store a
|
||||
copy of the config and ``client.admin`` keyring on any hosts that will
|
||||
be used to administer the cluster via the CLI. By default, cephadm will do
|
||||
this for any nodes with the ``_admin`` label (which normally includes the bootstrap
|
||||
host).
|
||||
Cephadm can distribute copies of the ``ceph.conf`` file and client keyring
|
||||
files to hosts. It is usually a good idea to store a copy of the config and
|
||||
``client.admin`` keyring on any host used to administer the cluster via the
|
||||
CLI. By default, cephadm does this for any nodes that have the ``_admin``
|
||||
label (which normally includes the bootstrap host).
|
||||
|
||||
When a client keyring is placed under management, cephadm will:
|
||||
|
||||
- build a list of target hosts based on the specified placement spec (see :ref:`orchestrator-cli-placement-spec`)
|
||||
- build a list of target hosts based on the specified placement spec (see
|
||||
:ref:`orchestrator-cli-placement-spec`)
|
||||
- store a copy of the ``/etc/ceph/ceph.conf`` file on the specified host(s)
|
||||
- store a copy of the keyring file on the specified host(s)
|
||||
- update the ``ceph.conf`` file as needed (e.g., due to a change in the cluster monitors)
|
||||
- update the keyring file if the entity's key is changed (e.g., via ``ceph auth ...`` commands)
|
||||
- ensure the keyring file has the specified ownership and mode
|
||||
- update the keyring file if the entity's key is changed (e.g., via ``ceph
|
||||
auth ...`` commands)
|
||||
- ensure that the keyring file has the specified ownership and specified mode
|
||||
- remove the keyring file when client keyring management is disabled
|
||||
- remove the keyring file from old hosts if the keyring placement spec is updated (as needed)
|
||||
- remove the keyring file from old hosts if the keyring placement spec is
|
||||
updated (as needed)
|
||||
|
||||
To view which client keyrings are currently under management::
|
||||
Listing Client Keyrings
|
||||
-----------------------
|
||||
|
||||
To see the list of client keyrings are currently under management, run the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch client-keyring ls
|
||||
|
||||
To place a keyring under management::
|
||||
Putting a Keyring Under Management
|
||||
----------------------------------
|
||||
|
||||
To put a keyring under management, run a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch client-keyring set <entity> <placement> [--mode=<mode>] [--owner=<uid>.<gid>] [--path=<path>]
|
||||
|
||||
- By default, the *path* will be ``/etc/ceph/client.{entity}.keyring``, which is where
|
||||
Ceph looks by default. Be careful specifying alternate locations as existing files
|
||||
may be overwritten.
|
||||
- By default, the *path* is ``/etc/ceph/client.{entity}.keyring``, which is
|
||||
where Ceph looks by default. Be careful when specifying alternate locations,
|
||||
as existing files may be overwritten.
|
||||
- A placement of ``*`` (all hosts) is common.
|
||||
- The mode defaults to ``0600`` and ownership to ``0:0`` (user root, group root).
|
||||
|
||||
For example, to create and deploy a ``client.rbd`` key to hosts with the ``rbd-client`` label and group readable by uid/gid 107 (qemu),::
|
||||
For example, to create a ``client.rbd`` key and deploy it to hosts with the
|
||||
``rbd-client`` label and make it group readable by uid/gid 107 (qemu), run the
|
||||
following commands:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph auth get-or-create-key client.rbd mon 'profile rbd' mgr 'profile rbd' osd 'profile rbd pool=my_rbd_pool'
|
||||
ceph orch client-keyring set client.rbd label:rbd-client --owner 107:107 --mode 640
|
||||
|
||||
The resulting keyring file is::
|
||||
The resulting keyring file is:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
-rw-r-----. 1 qemu qemu 156 Apr 21 08:47 /etc/ceph/client.client.rbd.keyring
|
||||
|
||||
To disable management of a keyring file::
|
||||
Disabling Management of a Keyring File
|
||||
--------------------------------------
|
||||
|
||||
To disable management of a keyring file, run a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch client-keyring rm <entity>
|
||||
|
||||
Note that this will delete any keyring files for this entity that were previously written
|
||||
to cluster nodes.
|
||||
.. note::
|
||||
|
||||
This deletes any keyring files for this entity that were previously written
|
||||
to cluster nodes.
|
||||
|
||||
.. _etc_ceph_conf_distribution:
|
||||
|
||||
/etc/ceph/ceph.conf
|
||||
===================
|
||||
|
||||
It may also be useful to distribute ``ceph.conf`` files to hosts without an associated
|
||||
client keyring file. By default, cephadm only deploys a ``ceph.conf`` file to hosts where a client keyring
|
||||
is also distributed (see above). To write config files to hosts without client keyrings::
|
||||
Distributing ceph.conf to hosts that have no keyrings
|
||||
-----------------------------------------------------
|
||||
|
||||
It might be useful to distribute ``ceph.conf`` files to hosts without an
|
||||
associated client keyring file. By default, cephadm deploys only a
|
||||
``ceph.conf`` file to hosts where a client keyring is also distributed (see
|
||||
above). To write config files to hosts without client keyrings, run the
|
||||
following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf true
|
||||
|
||||
By default, the configs are written to all hosts (i.e., those listed
|
||||
by ``ceph orch host ls``). To specify which hosts get a ``ceph.conf``::
|
||||
Using Placement Specs to specify which hosts get keyrings
|
||||
---------------------------------------------------------
|
||||
|
||||
ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf_hosts <placement spec>
|
||||
By default, the configs are written to all hosts (i.e., those listed by ``ceph
|
||||
orch host ls``). To specify which hosts get a ``ceph.conf``, run a command of
|
||||
the following form:
|
||||
|
||||
For example, to distribute configs to hosts with the ``bare_config`` label,::
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf_hosts label:bare_config
|
||||
ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf_hosts <placement spec>
|
||||
|
||||
For example, to distribute configs to hosts with the ``bare_config`` label, run
|
||||
the following command:
|
||||
|
||||
Distributing ceph.conf to hosts tagged with bare_config
|
||||
-------------------------------------------------------
|
||||
|
||||
For example, to distribute configs to hosts with the ``bare_config`` label, run the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf_hosts label:bare_config
|
||||
|
||||
(See :ref:`orchestrator-cli-placement-spec` for more information about placement specs.)
|
||||
|
@ -7,7 +7,7 @@ OSD Service
|
||||
List Devices
|
||||
============
|
||||
|
||||
``ceph-volume`` scans each cluster in the host from time to time in order
|
||||
``ceph-volume`` scans each host in the cluster from time to time in order
|
||||
to determine which devices are present and whether they are eligible to be
|
||||
used as OSDs.
|
||||
|
||||
@ -211,6 +211,7 @@ If you want to avoid this behavior (disable automatic creation of OSD on availab
|
||||
|
||||
* For cephadm, see also :ref:`cephadm-spec-unmanaged`.
|
||||
|
||||
.. _cephadm-osd-removal:
|
||||
|
||||
Remove an OSD
|
||||
=============
|
||||
@ -347,7 +348,7 @@ zap`` on the remote host.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
orch device zap <hostname> <path>
|
||||
ceph orch device zap <hostname> <path>
|
||||
|
||||
Example command:
|
||||
|
||||
|
@ -82,6 +82,41 @@ something like:
|
||||
See :ref:`orchestrator-cli-placement-spec` for details of the placement
|
||||
specification. See :ref:`multisite` for more information of setting up multisite RGW.
|
||||
|
||||
Setting up HTTPS
|
||||
----------------
|
||||
|
||||
In order to enable HTTPS for RGW services, apply a spec file following this scheme:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
service_type: rgw
|
||||
service_id: myrgw
|
||||
spec:
|
||||
rgw_frontend_ssl_certificate: |
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt
|
||||
ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15
|
||||
IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
|
||||
YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg
|
||||
ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=
|
||||
-----END PRIVATE KEY-----
|
||||
-----BEGIN CERTIFICATE-----
|
||||
V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt
|
||||
ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15
|
||||
IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
|
||||
YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg
|
||||
ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=
|
||||
-----END CERTIFICATE-----
|
||||
ssl: true
|
||||
|
||||
Then apply this yaml document:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply -i myrgw.yaml
|
||||
|
||||
Note the value of ``rgw_frontend_ssl_certificate`` is a literal string as
|
||||
indicated by a ``|`` character preserving newline characters.
|
||||
|
||||
.. _orchestrator-haproxy-service-spec:
|
||||
|
||||
|
@ -158,6 +158,54 @@ or in a YAML files.
|
||||
|
||||
cephadm will not deploy daemons on hosts with the ``_no_schedule`` label; see :ref:`cephadm-special-host-labels`.
|
||||
|
||||
.. note::
|
||||
The **apply** command can be confusing. For this reason, we recommend using
|
||||
YAML specifications.
|
||||
|
||||
Each ``ceph orch apply <service-name>`` command supersedes the one before it.
|
||||
If you do not use the proper syntax, you will clobber your work
|
||||
as you go.
|
||||
|
||||
For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply mon host1
|
||||
ceph orch apply mon host2
|
||||
ceph orch apply mon host3
|
||||
|
||||
This results in only one host having a monitor applied to it: host 3.
|
||||
|
||||
(The first command creates a monitor on host1. Then the second command
|
||||
clobbers the monitor on host1 and creates a monitor on host2. Then the
|
||||
third command clobbers the monitor on host2 and creates a monitor on
|
||||
host3. In this scenario, at this point, there is a monitor ONLY on
|
||||
host3.)
|
||||
|
||||
To make certain that a monitor is applied to each of these three hosts,
|
||||
run a command like this:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply mon "host1,host2,host3"
|
||||
|
||||
There is another way to apply monitors to multiple hosts: a ``yaml`` file
|
||||
can be used. Instead of using the "ceph orch apply mon" commands, run a
|
||||
command of this form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply -i file.yaml
|
||||
|
||||
Here is a sample **file.yaml** file::
|
||||
|
||||
service_type: mon
|
||||
placement:
|
||||
hosts:
|
||||
- host1
|
||||
- host2
|
||||
- host3
|
||||
|
||||
Explicit placements
|
||||
-------------------
|
||||
|
||||
@ -192,7 +240,39 @@ and ``=name`` specifies the name of the new monitor.
|
||||
Placement by labels
|
||||
-------------------
|
||||
|
||||
Daemons can be explicitly placed on hosts that match a specific label:
|
||||
Daemon placement can be limited to hosts that match a specific label. To set
|
||||
a label ``mylabel`` to the appropriate hosts, run this command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host label add *<hostname>* mylabel
|
||||
|
||||
To view the current hosts and labels, run this command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host ls
|
||||
|
||||
For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host label add host1 mylabel
|
||||
ceph orch host label add host2 mylabel
|
||||
ceph orch host label add host3 mylabel
|
||||
ceph orch host ls
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
HOST ADDR LABELS STATUS
|
||||
host1 mylabel
|
||||
host2 mylabel
|
||||
host3 mylabel
|
||||
host4
|
||||
host5
|
||||
|
||||
Now, Tell cephadm to deploy daemons based on the label by running
|
||||
this command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -240,8 +320,8 @@ Or in YAML:
|
||||
host_pattern: "*"
|
||||
|
||||
|
||||
Setting a limit
|
||||
---------------
|
||||
Changing the number of monitors
|
||||
-------------------------------
|
||||
|
||||
By specifying ``count``, only the number of daemons specified will be created:
|
||||
|
||||
@ -402,7 +482,17 @@ To disable the automatic management of dameons, set ``unmanaged=True`` in the
|
||||
Deploying a daemon on a host manually
|
||||
-------------------------------------
|
||||
|
||||
To manually deploy a daemon on a host, run a command of the following form:
|
||||
.. note::
|
||||
|
||||
This workflow has a very limited use case and should only be used
|
||||
in rare circumstances.
|
||||
|
||||
To manually deploy a daemon on a host, follow these steps:
|
||||
|
||||
Modify the service spec for a service by getting the
|
||||
existing spec, adding ``unmanaged: true``, and applying the modified spec.
|
||||
|
||||
Then manually deploy the daemon using the following:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -414,6 +504,13 @@ For example :
|
||||
|
||||
ceph orch daemon add mgr --placement=my_host
|
||||
|
||||
.. note::
|
||||
|
||||
Removing ``unmanaged: true`` from the service spec will
|
||||
enable the reconciliation loop for this service and will
|
||||
potentially lead to the removal of the daemon, depending
|
||||
on the placement spec.
|
||||
|
||||
Removing a daemon from a host manually
|
||||
--------------------------------------
|
||||
|
||||
|
@ -1,46 +1,70 @@
|
||||
Troubleshooting
|
||||
===============
|
||||
|
||||
Sometimes there is a need to investigate why a cephadm command failed or why
|
||||
a specific service no longer runs properly.
|
||||
You might need to investigate why a cephadm command failed
|
||||
or why a certain service no longer runs properly.
|
||||
|
||||
As cephadm deploys daemons as containers, troubleshooting daemons is slightly
|
||||
different. Here are a few tools and commands to help investigating issues.
|
||||
Cephadm deploys daemons as containers. This means that
|
||||
troubleshooting those containerized daemons might work
|
||||
differently than you expect (and that is certainly true if
|
||||
you expect this troubleshooting to work the way that
|
||||
troubleshooting does when the daemons involved aren't
|
||||
containerized).
|
||||
|
||||
Here are some tools and commands to help you troubleshoot
|
||||
your Ceph environment.
|
||||
|
||||
.. _cephadm-pause:
|
||||
|
||||
Pausing or disabling cephadm
|
||||
----------------------------
|
||||
|
||||
If something goes wrong and cephadm is doing behaving in a way you do
|
||||
not like, you can pause most background activity with::
|
||||
If something goes wrong and cephadm is behaving badly, you can
|
||||
pause most of the Ceph cluster's background activity by running
|
||||
the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch pause
|
||||
|
||||
This will stop any changes, but cephadm will still periodically check hosts to
|
||||
refresh its inventory of daemons and devices. You can disable cephadm
|
||||
completely with::
|
||||
This stops all changes in the Ceph cluster, but cephadm will
|
||||
still periodically check hosts to refresh its inventory of
|
||||
daemons and devices. You can disable cephadm completely by
|
||||
running the following commands:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch set backend ''
|
||||
ceph mgr module disable cephadm
|
||||
|
||||
This will disable all of the ``ceph orch ...`` CLI commands but the previously
|
||||
deployed daemon containers will still continue to exist and start as they
|
||||
did before.
|
||||
These commands disable all of the ``ceph orch ...`` CLI commands.
|
||||
All previously deployed daemon containers continue to exist and
|
||||
will start as they did before you ran these commands.
|
||||
|
||||
Please refer to :ref:`cephadm-spec-unmanaged` for disabling individual
|
||||
services.
|
||||
See :ref:`cephadm-spec-unmanaged` for information on disabling
|
||||
individual services.
|
||||
|
||||
|
||||
Per-service and per-daemon events
|
||||
---------------------------------
|
||||
|
||||
In order to aid debugging failed daemon deployments, cephadm stores
|
||||
events per service and per daemon. They often contain relevant information::
|
||||
In order to help with the process of debugging failed daemon
|
||||
deployments, cephadm stores events per service and per daemon.
|
||||
These events often contain information relevant to
|
||||
troubleshooting
|
||||
your Ceph cluster.
|
||||
|
||||
Listing service events
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
To see the events associated with a certain service, run a
|
||||
command of the and following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch ls --service_name=<service-name> --format yaml
|
||||
|
||||
for example:
|
||||
This will return something in the following form:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
@ -58,10 +82,18 @@ for example:
|
||||
- '2021-02-01T12:09:25.264584 service:alertmanager [ERROR] "Failed to apply: Cannot
|
||||
place <AlertManagerSpec for service_name=alertmanager> on unknown_host: Unknown hosts"'
|
||||
|
||||
Or per daemon::
|
||||
Listing daemon events
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
To see the events associated with a certain daemon, run a
|
||||
command of the and following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch ps --service-name <service-name> --daemon-id <daemon-id> --format yaml
|
||||
|
||||
This will return something in the following form:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
daemon_type: mds
|
||||
@ -77,16 +109,11 @@ Or per daemon::
|
||||
Checking cephadm logs
|
||||
---------------------
|
||||
|
||||
You can monitor the cephadm log in real time with::
|
||||
To learn how to monitor the cephadm logs as they are generated, read :ref:`watching_cephadm_logs`.
|
||||
|
||||
ceph -W cephadm
|
||||
|
||||
You can see the last few messages with::
|
||||
|
||||
ceph log last cephadm
|
||||
|
||||
If you have enabled logging to files, you can see a cephadm log file called
|
||||
``ceph.cephadm.log`` on monitor hosts (see :ref:`cephadm-logs`).
|
||||
If your Ceph cluster has been configured to log events to files, there will exist a
|
||||
cephadm log file called ``ceph.cephadm.log`` on all monitor hosts (see
|
||||
:ref:`cephadm-logs` for a more complete explanation of this).
|
||||
|
||||
Gathering log files
|
||||
-------------------
|
||||
@ -190,7 +217,8 @@ Things users can do:
|
||||
[root@mon1 ~]# ssh -F config -i ~/cephadm_private_key root@mon1
|
||||
|
||||
Verifying that the Public Key is Listed in the authorized_keys file
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
To verify that the public key is in the authorized_keys file, run the following commands::
|
||||
|
||||
[root@mon1 ~]# cephadm shell -- ceph cephadm get-pub-key > ~/ceph.pub
|
||||
|
@ -12,26 +12,32 @@ The automated upgrade process follows Ceph best practices. For example:
|
||||
* Each daemon is restarted only after Ceph indicates that the cluster
|
||||
will remain available.
|
||||
|
||||
Keep in mind that the Ceph cluster health status is likely to switch to
|
||||
``HEALTH_WARNING`` during the upgrade.
|
||||
.. note::
|
||||
|
||||
The Ceph cluster health status is likely to switch to
|
||||
``HEALTH_WARNING`` during the upgrade.
|
||||
|
||||
.. note::
|
||||
|
||||
In case a host of the cluster is offline, the upgrade is paused.
|
||||
|
||||
|
||||
Starting the upgrade
|
||||
====================
|
||||
|
||||
Before you begin using cephadm to upgrade Ceph, verify that all hosts are currently online and that your cluster is healthy:
|
||||
Before you use cephadm to upgrade Ceph, verify that all hosts are currently online and that your cluster is healthy by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph -s
|
||||
|
||||
To upgrade (or downgrade) to a specific release:
|
||||
To upgrade (or downgrade) to a specific release, run the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch upgrade start --ceph-version <version>
|
||||
|
||||
For example, to upgrade to v15.2.1:
|
||||
For example, to upgrade to v15.2.1, run the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -76,11 +82,11 @@ Watch the cephadm log by running the following command:
|
||||
Canceling an upgrade
|
||||
====================
|
||||
|
||||
You can stop the upgrade process at any time with:
|
||||
You can stop the upgrade process at any time by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
# ceph orch upgrade stop
|
||||
ceph orch upgrade stop
|
||||
|
||||
|
||||
Potential problems
|
||||
@ -91,22 +97,27 @@ There are a few health alerts that can arise during the upgrade process.
|
||||
UPGRADE_NO_STANDBY_MGR
|
||||
----------------------
|
||||
|
||||
This alert means that Ceph requires an active and standby manager daemon in
|
||||
order to proceed, but there is currently no standby.
|
||||
This alert (``UPGRADE_NO_STANDBY_MGR``) means that Ceph does not detect an
|
||||
active standby manager daemon. In order to proceed with the upgrade, Ceph
|
||||
requires an active standby manager daemon (which you can think of in this
|
||||
context as "a second manager").
|
||||
|
||||
You can ensure that Cephadm is configured to run 2 (or more) managers by running the following command:
|
||||
You can ensure that Cephadm is configured to run 2 (or more) managers by
|
||||
running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply mgr 2 # or more
|
||||
|
||||
You can check the status of existing mgr daemons by running the following command:
|
||||
You can check the status of existing mgr daemons by running the following
|
||||
command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch ps --daemon-type mgr
|
||||
|
||||
If an existing mgr daemon has stopped, you can try to restart it by running the following command:
|
||||
If an existing mgr daemon has stopped, you can try to restart it by running the
|
||||
following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -115,12 +126,13 @@ If an existing mgr daemon has stopped, you can try to restart it by running the
|
||||
UPGRADE_FAILED_PULL
|
||||
-------------------
|
||||
|
||||
This alert means that Ceph was unable to pull the container image for the
|
||||
target version. This can happen if you specify a version or container image
|
||||
that does not exist (e.g. "1.2.3"), or if the container registry can not
|
||||
be reached by one or more hosts in the cluster.
|
||||
This alert (``UPGRADE_FAILED_PULL``) means that Ceph was unable to pull the
|
||||
container image for the target version. This can happen if you specify a
|
||||
version or container image that does not exist (e.g. "1.2.3"), or if the
|
||||
container registry can not be reached by one or more hosts in the cluster.
|
||||
|
||||
To cancel the existing upgrade and to specify a different target version, run the following commands:
|
||||
To cancel the existing upgrade and to specify a different target version, run
|
||||
the following commands:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
|
@ -349,24 +349,6 @@ for use in exceptional circumstances. Incorrect use of these
|
||||
commands may cause serious problems, such as an inaccessible
|
||||
file system.
|
||||
|
||||
::
|
||||
|
||||
mds compat rm_compat
|
||||
|
||||
Removes an compatibility feature flag.
|
||||
|
||||
::
|
||||
|
||||
mds compat rm_incompat
|
||||
|
||||
Removes an incompatibility feature flag.
|
||||
|
||||
::
|
||||
|
||||
mds compat show
|
||||
|
||||
Show MDS compatibility flags.
|
||||
|
||||
::
|
||||
|
||||
mds rmfailed
|
||||
@ -379,3 +361,14 @@ This removes a rank from the failed set.
|
||||
|
||||
This command resets the file system state to defaults, except for the name and
|
||||
pools. Non-zero ranks are saved in the stopped set.
|
||||
|
||||
|
||||
::
|
||||
|
||||
fs new <file system name> <metadata pool name> <data pool name> --fscid <fscid> --force
|
||||
|
||||
This command creates a file system with a specific **fscid** (file system cluster ID).
|
||||
You may want to do this when an application expects the file system's ID to be
|
||||
stable after it has been recovered, e.g., after monitor databases are lost and
|
||||
rebuilt. Consequently, file system IDs don't always keep increasing with newer
|
||||
file systems.
|
||||
|
@ -15,6 +15,53 @@ Requirements
|
||||
|
||||
.. note:: From Pacific, the nfs mgr module must be enabled prior to use.
|
||||
|
||||
Ganesha Configuration Hierarchy
|
||||
===============================
|
||||
|
||||
Cephadm and rook starts nfs-ganesha daemon with `bootstrap configuration`
|
||||
containing minimal ganesha configuration, creates empty rados `common config`
|
||||
object in `nfs-ganesha` pool and watches this config object. The `mgr/nfs`
|
||||
module adds rados export object urls to the common config object. If cluster
|
||||
config is set, it creates `user config` object containing custom ganesha
|
||||
configuration and adds it url to common config object.
|
||||
|
||||
.. ditaa::
|
||||
|
||||
|
||||
rados://$pool/$namespace/export-$i rados://$pool/$namespace/userconf-nfs.$cluster_id
|
||||
(export config) (user config)
|
||||
|
||||
+----------+ +----------+ +----------+ +---------------------------+
|
||||
| | | | | | | |
|
||||
| export-1 | | export-2 | | export-3 | | userconf-nfs.$cluster_id |
|
||||
| | | | | | | |
|
||||
+----+-----+ +----+-----+ +-----+----+ +-------------+-------------+
|
||||
^ ^ ^ ^
|
||||
| | | |
|
||||
+--------------------------------+-------------------------+
|
||||
%url |
|
||||
|
|
||||
+--------+--------+
|
||||
| | rados://$pool/$namespace/conf-nfs.$svc
|
||||
| conf+nfs.$svc | (common config)
|
||||
| |
|
||||
+--------+--------+
|
||||
^
|
||||
|
|
||||
watch_url |
|
||||
+----------------------------------------------+
|
||||
| | |
|
||||
| | | RADOS
|
||||
+----------------------------------------------------------------------------------+
|
||||
| | | CONTAINER
|
||||
watch_url | watch_url | watch_url |
|
||||
| | |
|
||||
+--------+-------+ +--------+-------+ +-------+--------+
|
||||
| | | | | | /etc/ganesha/ganesha.conf
|
||||
| nfs.$svc.a | | nfs.$svc.b | | nfs.$svc.c | (bootstrap config)
|
||||
| | | | | |
|
||||
+----------------+ +----------------+ +----------------+
|
||||
|
||||
Create NFS Ganesha Cluster
|
||||
==========================
|
||||
|
||||
|
@ -6,13 +6,11 @@ flags to support seamless upgrades of the MDSs without potentially causing
|
||||
assertions or other faults due to incompatible messages or other functional
|
||||
differences. For this reason, it's necessary during any cluster upgrade to
|
||||
reduce the number of active MDS for a file system to one first so that two
|
||||
active MDS do not communicate with different versions. Further, it's also
|
||||
necessary to take standbys offline as any new CompatSet flags will propagate
|
||||
via the MDSMap to all MDS and cause older MDS to suicide.
|
||||
active MDS do not communicate with different versions.
|
||||
|
||||
The proper sequence for upgrading the MDS cluster is:
|
||||
|
||||
1. Disable and stop standby-replay daemons.
|
||||
1. For each file system, disable and stop standby-replay daemons.
|
||||
|
||||
::
|
||||
|
||||
@ -27,7 +25,7 @@ command. Older versions of Ceph require you to stop these daemons manually.
|
||||
ceph mds fail mds.<X>
|
||||
|
||||
|
||||
2. Reduce the number of ranks to 1:
|
||||
2. For each file system, reduce the number of ranks to 1:
|
||||
|
||||
::
|
||||
|
||||
@ -39,43 +37,20 @@ command. Older versions of Ceph require you to stop these daemons manually.
|
||||
|
||||
ceph status # wait for MDS to finish stopping
|
||||
|
||||
4. Take all standbys offline, e.g. using systemctl:
|
||||
|
||||
::
|
||||
|
||||
systemctl stop ceph-mds.target
|
||||
|
||||
5. Confirm only one MDS is online and is rank 0 for your FS:
|
||||
|
||||
::
|
||||
|
||||
ceph status
|
||||
|
||||
6. Upgrade the single active MDS, e.g. using systemctl:
|
||||
4. For each MDS, upgrade packages and restart. Note: to reduce failovers, it is
|
||||
recommended -- but not strictly necessary -- to first upgrade standby daemons.
|
||||
|
||||
::
|
||||
|
||||
# use package manager to update cluster
|
||||
systemctl restart ceph-mds.target
|
||||
|
||||
7. Upgrade/start the standby daemons.
|
||||
|
||||
::
|
||||
|
||||
# use package manager to update cluster
|
||||
systemctl restart ceph-mds.target
|
||||
|
||||
8. Restore the previous max_mds for your cluster:
|
||||
5. For each file system, restore the previous max_mds and allow_standby_replay settings for your cluster:
|
||||
|
||||
::
|
||||
|
||||
ceph fs set <fs_name> max_mds <old_max_mds>
|
||||
|
||||
9. Restore setting for ``allow_standby_replay`` (if applicable):
|
||||
|
||||
::
|
||||
|
||||
ceph fs set <fs_name> allow_standby_replay true
|
||||
ceph fs set <fs_name> allow_standby_replay <old_allow_standby_replay>
|
||||
|
||||
|
||||
Upgrading pre-Firefly file systems past Jewel
|
||||
|
@ -124,6 +124,20 @@ This means we should do very few synchronous calls to remote hosts.
|
||||
As a guideline, cephadm should do at most ``O(1)`` network calls in CLI handlers.
|
||||
Everything else should be done asynchronously in other threads, like ``serve()``.
|
||||
|
||||
Note regarding different variables used in the code
|
||||
===================================================
|
||||
|
||||
* a ``service_type`` is something like mon, mgr, alertmanager etc defined
|
||||
in ``ServiceSpec``
|
||||
* a ``service_id`` is the name of the service. Some services don't have
|
||||
names.
|
||||
* a ``service_name`` is ``<service_type>.<service_id>``
|
||||
* a ``daemon_type`` is the same as the service_type, except for ingress,
|
||||
which has the haproxy and keepalived daemon types.
|
||||
* a ``daemon_id`` is typically ``<service_id>.<hostname>.<random-string>``.
|
||||
(Not the case for e.g. OSDs. OSDs are always called OSD.N)
|
||||
* a ``daemon_name`` is ``<daemon_type>.<daemon_id>``
|
||||
|
||||
Kcli: a virtualization management tool to make easy orchestrators development
|
||||
=============================================================================
|
||||
`Kcli <https://github.com/karmab/kcli>`_ is meant to interact with existing
|
||||
|
@ -430,7 +430,14 @@ run-cephadm-e2e-tests.sh
|
||||
Orchestrator backend behave correctly.
|
||||
|
||||
Prerequisites: you need to install `KCLI
|
||||
<https://kcli.readthedocs.io/en/latest/>`_ in your local machine.
|
||||
<https://kcli.readthedocs.io/en/latest/>`_ and Node.js in your local machine.
|
||||
|
||||
Configure KCLI plan requirements::
|
||||
|
||||
$ sudo chown -R $(id -un) /var/lib/libvirt/images
|
||||
$ mkdir -p /var/lib/libvirt/images/ceph-dashboard dashboard
|
||||
$ kcli create pool -p /var/lib/libvirt/images/ceph-dashboard dashboard
|
||||
$ kcli create network -c 192.168.100.0/24 dashboard
|
||||
|
||||
Note:
|
||||
This script is aimed to be run as jenkins job so the cleanup is triggered only in a jenkins
|
||||
@ -439,9 +446,26 @@ Note:
|
||||
Start E2E tests by running::
|
||||
|
||||
$ cd <your/ceph/repo/dir>
|
||||
$ sudo chown -R $(id -un) src/pybind/mgr/dashboard/frontend/dist src/pybind/mgr/dashboard/frontend/node_modules
|
||||
$ sudo chown -R $(id -un) src/pybind/mgr/dashboard/frontend/{dist,node_modules,src/environments}
|
||||
$ ./src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh
|
||||
$ kcli delete plan -y ceph # After tests finish.
|
||||
|
||||
You can also start a cluster in development mode (so the frontend build starts in watch mode and you
|
||||
only have to reload the page for the changes to be reflected) by running::
|
||||
|
||||
$ ./src/pybind/mgr/dashboard/ci/cephadm/start-cluster.sh --dev-mode
|
||||
|
||||
Note:
|
||||
Add ``--expanded`` if you need a cluster ready to deploy services (one with enough monitor
|
||||
daemons spread across different hosts and enough OSDs).
|
||||
|
||||
Test your changes by running:
|
||||
|
||||
$ ./src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh
|
||||
|
||||
Shutdown the cluster by running:
|
||||
|
||||
$ kcli delete plan -y ceph
|
||||
$ # In development mode, also kill the npm build watch process (e.g., pkill -f "ng build")
|
||||
|
||||
Other running options
|
||||
.....................
|
||||
@ -1652,6 +1676,58 @@ load the controllers that we want to test. In the above example we are only
|
||||
loading the ``Ping`` controller. We can also disable authentication of a
|
||||
controller at this stage, as depicted in the example.
|
||||
|
||||
How to update or create new dashboards in grafana?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
We are using ``jsonnet`` and ``grafonnet-lib`` to write code for the grafana dashboards.
|
||||
All the dashboards are written inside ``grafana_dashboards.jsonnet`` file in the
|
||||
monitoring/grafana/dashboards/jsonnet directory.
|
||||
|
||||
We generate the dashboard json files directly from this jsonnet file by running this
|
||||
command in the grafana/dashboards directory:
|
||||
``jsonnet -m . jsonnet/grafana_dashboards.jsonnet``.
|
||||
(For the above command to succeed we need ``jsonnet`` package installed and ``grafonnet-lib``
|
||||
directory cloned in our machine. Please refer -
|
||||
``https://grafana.github.io/grafonnet-lib/getting-started/`` in case you have some trouble.)
|
||||
|
||||
To update an existing grafana dashboard or to create a new one, we need to update
|
||||
the ``grafana_dashboards.jsonnet`` file and generate the new/updated json files using the
|
||||
above mentioned command. For people who are not familiar with grafonnet or jsonnet implementation
|
||||
can follow this doc - ``https://grafana.github.io/grafonnet-lib/``.
|
||||
|
||||
Example grafana dashboard in jsonnet format:
|
||||
|
||||
To specify the grafana dashboard properties such as title, uid etc we can create a local function -
|
||||
|
||||
::
|
||||
|
||||
local dashboardSchema(title, uid, time_from, refresh, schemaVersion, tags,timezone, timepicker)
|
||||
|
||||
To add a graph panel we can spcify the graph schema in a local function such as -
|
||||
|
||||
::
|
||||
|
||||
local graphPanelSchema(title, nullPointMode, stack, formatY1, formatY2, labelY1, labelY2, min, fill, datasource)
|
||||
|
||||
and then use these functions inside the dashboard definition like -
|
||||
|
||||
::
|
||||
|
||||
{
|
||||
radosgw-sync-overview.json: //json file name to be generated
|
||||
|
||||
dashboardSchema(
|
||||
'RGW Sync Overview', 'rgw-sync-overview', 'now-1h', '15s', .., .., ..
|
||||
)
|
||||
|
||||
.addPanels([
|
||||
graphPanelSchema(
|
||||
'Replication (throughput) from Source Zone', 'Bps', null, .., .., ..)
|
||||
])
|
||||
}
|
||||
|
||||
The valid grafonnet-lib attributes can be found here - ``https://grafana.github.io/grafonnet-lib/api-docs/``.
|
||||
|
||||
|
||||
How to listen for manager notifications in a controller?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -19,11 +19,11 @@ Ceph Container Images
|
||||
Official Releases
|
||||
-----------------
|
||||
|
||||
Ceph Container images are available from Docker Hub at::
|
||||
Ceph Container images are available from both Quay and Docker Hub::
|
||||
|
||||
https://quay.io/repository/ceph/ceph
|
||||
https://hub.docker.com/r/ceph
|
||||
|
||||
|
||||
ceph/ceph
|
||||
^^^^^^^^^
|
||||
|
||||
@ -42,6 +42,13 @@ ceph/ceph
|
||||
| vRELNUM.Y.Z-YYYYMMDD | A specific build (e.g., *v14.2.4-20191203*) |
|
||||
+----------------------+--------------------------------------------------------------+
|
||||
|
||||
Legacy container images
|
||||
-----------------------
|
||||
|
||||
Legacy container images are available from Docker Hub at::
|
||||
|
||||
https://hub.docker.com/r/ceph
|
||||
|
||||
ceph/daemon-base
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -15,7 +15,7 @@ Synopsis
|
||||
| **ceph-volume** **inventory**
|
||||
|
||||
| **ceph-volume** **lvm** [ *trigger* | *create* | *activate* | *prepare*
|
||||
| *zap* | *list* | *batch*]
|
||||
| *zap* | *list* | *batch* | *new-wal* | *new-db* | *migrate* ]
|
||||
|
||||
| **ceph-volume** **simple** [ *trigger* | *scan* | *activate* ]
|
||||
|
||||
@ -241,6 +241,96 @@ Positional arguments:
|
||||
``/path/to/sda1`` or ``/path/to/sda`` for regular devices.
|
||||
|
||||
|
||||
new-wal
|
||||
^^^^^^^
|
||||
|
||||
Attaches the given logical volume to OSD as a WAL. Logical volume
|
||||
name format is vg/lv. Fails if OSD has already got attached WAL.
|
||||
|
||||
Usage::
|
||||
|
||||
ceph-volume lvm new-wal --osd-id OSD_ID --osd-fsid OSD_FSID --target <target lv>
|
||||
|
||||
Optional arguments:
|
||||
|
||||
.. option:: -h, --help
|
||||
|
||||
show the help message and exit
|
||||
|
||||
.. option:: --no-systemd
|
||||
|
||||
Skip checking OSD systemd unit
|
||||
|
||||
Required arguments:
|
||||
|
||||
.. option:: --target
|
||||
|
||||
logical volume name to attach as WAL
|
||||
|
||||
new-db
|
||||
^^^^^^
|
||||
|
||||
Attaches the given logical volume to OSD as a DB. Logical volume
|
||||
name format is vg/lv. Fails if OSD has already got attached DB.
|
||||
|
||||
Usage::
|
||||
|
||||
ceph-volume lvm new-db --osd-id OSD_ID --osd-fsid OSD_FSID --target <target lv>
|
||||
|
||||
Optional arguments:
|
||||
|
||||
.. option:: -h, --help
|
||||
|
||||
show the help message and exit
|
||||
|
||||
.. option:: --no-systemd
|
||||
|
||||
Skip checking OSD systemd unit
|
||||
|
||||
Required arguments:
|
||||
|
||||
.. option:: --target
|
||||
|
||||
logical volume name to attach as DB
|
||||
|
||||
migrate
|
||||
^^^^^^^
|
||||
|
||||
Moves BlueFS data from source volume(s) to the target one, source volumes
|
||||
(except the main, i.e. data or block one) are removed on success. LVM volumes
|
||||
are permitted for Target only, both already attached or new one. In the latter
|
||||
case it is attached to the OSD replacing one of the source devices. Following
|
||||
replacement rules apply (in the order of precedence, stop on the first match):
|
||||
|
||||
- if source list has DB volume - target device replaces it.
|
||||
- if source list has WAL volume - target device replace it.
|
||||
- if source list has slow volume only - operation is not permitted,
|
||||
requires explicit allocation via new-db/new-wal command.
|
||||
|
||||
Usage::
|
||||
|
||||
ceph-volume lvm migrate --osd-id OSD_ID --osd-fsid OSD_FSID --target <target lv> --from {data|db|wal} [{data|db|wal} ...]
|
||||
|
||||
Optional arguments:
|
||||
|
||||
.. option:: -h, --help
|
||||
|
||||
show the help message and exit
|
||||
|
||||
.. option:: --no-systemd
|
||||
|
||||
Skip checking OSD systemd unit
|
||||
|
||||
Required arguments:
|
||||
|
||||
.. option:: --from
|
||||
|
||||
list of source device type names
|
||||
|
||||
.. option:: --target
|
||||
|
||||
logical volume to move data to
|
||||
|
||||
simple
|
||||
------
|
||||
|
||||
|
@ -53,6 +53,7 @@ Synopsis
|
||||
| **cephadm** **bootstrap** [-h] [--config CONFIG] [--mon-id MON_ID]
|
||||
| [--mon-addrv MON_ADDRV] [--mon-ip MON_IP]
|
||||
| [--mgr-id MGR_ID] [--fsid FSID]
|
||||
| [--log-to-file] [--single-host-defaults]
|
||||
| [--output-dir OUTPUT_DIR]
|
||||
| [--output-keyring OUTPUT_KEYRING]
|
||||
| [--output-config OUTPUT_CONFIG]
|
||||
@ -126,13 +127,14 @@ Options
|
||||
.. option:: --docker
|
||||
|
||||
use docker instead of podman (default: False)
|
||||
.. option::data-dir DATA_DIR
|
||||
|
||||
base directory for daemon data (default:/var/lib/ceph)
|
||||
.. option:: --data-dir DATA_DIR
|
||||
|
||||
base directory for daemon data (default: /var/lib/ceph)
|
||||
|
||||
.. option:: --log-dir LOG_DIR
|
||||
|
||||
base directory for daemon logs (default:.. option:: /var/log/ceph)
|
||||
base directory for daemon logs (default: /var/log/ceph)
|
||||
|
||||
.. option:: --logrotate-dir LOGROTATE_DIR
|
||||
|
||||
@ -208,6 +210,8 @@ Arguments:
|
||||
* [--mon-ip MON_IP] mon IP
|
||||
* [--mgr-id MGR_ID] mgr id (default: randomly generated)
|
||||
* [--fsid FSID] cluster FSID
|
||||
* [--log-to-file] configure cluster to log to traditional log files
|
||||
* [--single-host-defaults] configure cluster to run on a single host
|
||||
* [--output-dir OUTPUT_DIR] directory to write config, keyring, and pub key files
|
||||
* [--output-keyring OUTPUT_KEYRING] location to write keyring file with new cluster admin and mon keys
|
||||
* [--output-config OUTPUT_CONFIG] location to write conf file to connect to new cluster
|
||||
|
@ -376,50 +376,17 @@ password.
|
||||
Enabling the Object Gateway Management Frontend
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To use the Object Gateway management functionality of the dashboard, you will
|
||||
need to provide the login credentials of a user with the ``system`` flag
|
||||
enabled. If you do not have a ``system`` user already, you must create one::
|
||||
When RGW is deployed with cephadm, the RGW credentials used by the
|
||||
dashboard will be automatically configured. You can also manually force the
|
||||
credentials to be set up with::
|
||||
|
||||
$ radosgw-admin user create --uid=<user_id> --display-name=<display_name> \
|
||||
--system
|
||||
$ ceph dashboard set-rgw-credentials
|
||||
|
||||
Take note of the keys ``access_key`` and ``secret_key`` in the output.
|
||||
This will create an RGW user with uid ``dashboard`` for each realm in
|
||||
the system.
|
||||
|
||||
To obtain the credentials of an existing user via `radosgw-admin`::
|
||||
If you've configured a custom 'admin' resource in your RGW admin API, you should set it here also::
|
||||
|
||||
$ radosgw-admin user info --uid=<user_id>
|
||||
|
||||
In case of having several Object Gateways, you will need the required users' credentials
|
||||
to connect to each Object Gateway.
|
||||
Finally, provide these credentials to the dashboard::
|
||||
|
||||
$ echo -n "{'<daemon1.id>': '<user1-access-key>', '<daemon2.id>': '<user2-access-key>', ...}" > <file-containing-access-key>
|
||||
$ echo -n "{'<daemon1.id>': '<user1-secret-key>', '<daemon2.id>': '<user2-secret-key>', ...}" > <file-containing-secret-key>
|
||||
$ ceph dashboard set-rgw-api-access-key -i <file-containing-access-key>
|
||||
$ ceph dashboard set-rgw-api-secret-key -i <file-containing-secret-key>
|
||||
|
||||
.. note::
|
||||
|
||||
Legacy way of providing credentials (connect to single Object Gateway)::
|
||||
|
||||
$ echo -n "<access-key>" > <file-containing-access-key>
|
||||
$ echo -n "<secret-key>" > <file-containing-secret-key>
|
||||
|
||||
In a simple configuration with a single RGW endpoint, this is all you
|
||||
have to do to get the Object Gateway management functionality working. The
|
||||
dashboard will try to automatically determine the host and port
|
||||
from the Ceph Manager's service map.
|
||||
|
||||
In case of having several Object Gateways, you might want to set
|
||||
the default one by setting its host and port manually::
|
||||
|
||||
$ ceph dashboard set-rgw-api-host <host>
|
||||
$ ceph dashboard set-rgw-api-port <port>
|
||||
|
||||
In addition to the settings mentioned so far, the following settings do also
|
||||
exist and you may find yourself in the situation that you have to use them::
|
||||
|
||||
$ ceph dashboard set-rgw-api-scheme <scheme> # http or https
|
||||
$ ceph dashboard set-rgw-api-admin-resource <admin_resource>
|
||||
|
||||
If you are using a self-signed certificate in your Object Gateway setup,
|
||||
@ -1314,6 +1281,7 @@ and loosely coupled fashion.
|
||||
|
||||
.. include:: dashboard_plugins/feature_toggles.inc.rst
|
||||
.. include:: dashboard_plugins/debug.inc.rst
|
||||
.. include:: dashboard_plugins/motd.inc.rst
|
||||
|
||||
|
||||
Troubleshooting the Dashboard
|
||||
|
30
ceph/doc/mgr/dashboard_plugins/motd.inc.rst
Normal file
30
ceph/doc/mgr/dashboard_plugins/motd.inc.rst
Normal file
@ -0,0 +1,30 @@
|
||||
.. _dashboard-motd:
|
||||
|
||||
Message of the day (MOTD)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Displays a configured `message of the day` at the top of the Ceph Dashboard.
|
||||
|
||||
The importance of a MOTD can be configured by its severity, which is
|
||||
`info`, `warning` or `danger`. The MOTD can expire after a given time,
|
||||
this means it will not be displayed in the UI anymore. Use the following
|
||||
syntax to specify the expiration time: `Ns|m|h|d|w` for seconds, minutes,
|
||||
hours, days and weeks. If the MOTD should expire after 2 hours, use `2h`
|
||||
or `5w` for 5 weeks. Use `0` to configure a MOTD that does not expire.
|
||||
|
||||
To configure a MOTD, run the following command::
|
||||
|
||||
$ ceph dashboard motd set <severity:info|warning|danger> <expires> <message>
|
||||
|
||||
To show the configured MOTD::
|
||||
|
||||
$ ceph dashboard motd get
|
||||
|
||||
To clear the configured MOTD run::
|
||||
|
||||
$ ceph dashboard motd clear
|
||||
|
||||
A MOTD with a `info` or `warning` severity can be closed by the user. The
|
||||
`info` MOTD is not displayed anymore until the local storage cookies are
|
||||
cleared or a new MOTD with a different severity is displayed. A MOTD with
|
||||
a 'warning' severity will be displayed again in a new session.
|
@ -40,9 +40,37 @@ healed itself).
|
||||
When the cluster is healthy, the balancer will throttle its changes
|
||||
such that the percentage of PGs that are misplaced (i.e., that need to
|
||||
be moved) is below a threshold of (by default) 5%. The
|
||||
``max_misplaced`` threshold can be adjusted with::
|
||||
``target_max_misplaced_ratio`` threshold can be adjusted with::
|
||||
|
||||
ceph config set mgr mgr/balancer/max_misplaced .07 # 7%
|
||||
ceph config set mgr target_max_misplaced_ratio .07 # 7%
|
||||
|
||||
Set the number of seconds to sleep in between runs of the automatic balancer::
|
||||
|
||||
ceph config set mgr mgr/balancer/sleep_interval 60
|
||||
|
||||
Set the time of day to begin automatic balancing in HHMM format::
|
||||
|
||||
ceph config set mgr mgr/balancer/begin_time 0000
|
||||
|
||||
Set the time of day to finish automatic balancing in HHMM format::
|
||||
|
||||
ceph config set mgr mgr/balancer/end_time 2400
|
||||
|
||||
Restrict automatic balancing to this day of the week or later.
|
||||
Uses the same conventions as crontab, 0 or 7 is Sunday, 1 is Monday, and so on::
|
||||
|
||||
ceph config set mgr mgr/balancer/begin_weekday 0
|
||||
|
||||
Restrict automatic balancing to this day of the week or earlier.
|
||||
Uses the same conventions as crontab, 0 or 7 is Sunday, 1 is Monday, and so on::
|
||||
|
||||
ceph config set mgr mgr/balancer/end_weekday 7
|
||||
|
||||
Pool IDs to which the automatic balancing will be limited.
|
||||
The default for this is an empty string, meaning all pools will be balanced.
|
||||
The numeric pool IDs can be gotten with the :command:`ceph osd pool ls detail` command::
|
||||
|
||||
ceph config set mgr mgr/balancer/pool_ids 1,2,3
|
||||
|
||||
|
||||
Modes
|
||||
@ -136,3 +164,4 @@ The quality of the distribution that would result after executing a plan can be
|
||||
Assuming the plan is expected to improve the distribution (i.e., it has a lower score than the current cluster state), the user can execute that plan with::
|
||||
|
||||
ceph balancer execute <plan-name>
|
||||
|
||||
|
@ -410,10 +410,9 @@ on the number of replicas, clones and snapshots.
|
||||
to this pool.
|
||||
- **QUOTA OBJECTS:** The number of quota objects.
|
||||
- **QUOTA BYTES:** The number of bytes in the quota objects.
|
||||
- **DIRTY:** "DIRTY" is meaningful only when cache tiering is in use. If cache
|
||||
tiering is in use, the "DIRTY" column lists the number of objects in the
|
||||
cache pool that have been written to the cache pool but have not flushed yet
|
||||
to the base pool.
|
||||
- **DIRTY:** The number of objects in the cache pool that have been written to
|
||||
the cache pool but have not been flushed yet to the base pool. This field is
|
||||
only available when cache tiering is in use.
|
||||
- **USED COMPR:** amount of space allocated for compressed data (i.e. this
|
||||
includes comrpessed data plus all the allocation, replication and erasure
|
||||
coding overhead).
|
||||
|
@ -41,10 +41,10 @@ the PG count with this command::
|
||||
|
||||
Output will be something like::
|
||||
|
||||
POOL SIZE TARGET SIZE RATE RAW CAPACITY RATIO TARGET RATIO EFFECTIVE RATIO PG_NUM NEW PG_NUM AUTOSCALE
|
||||
a 12900M 3.0 82431M 0.4695 8 128 warn
|
||||
c 0 3.0 82431M 0.0000 0.2000 0.9884 1 64 warn
|
||||
b 0 953.6M 3.0 82431M 0.0347 8 warn
|
||||
POOL SIZE TARGET SIZE RATE RAW CAPACITY RATIO TARGET RATIO EFFECTIVE RATIO BIAS PG_NUM NEW PG_NUM AUTOSCALE PROFILE
|
||||
a 12900M 3.0 82431M 0.4695 8 128 warn scale-up
|
||||
c 0 3.0 82431M 0.0000 0.2000 0.9884 1.0 1 64 warn scale-down
|
||||
b 0 953.6M 3.0 82431M 0.0347 8 warn scale-down
|
||||
|
||||
**SIZE** is the amount of data stored in the pool. **TARGET SIZE**, if
|
||||
present, is the amount of data the administrator has specified that
|
||||
@ -77,6 +77,10 @@ ratio takes precedence.
|
||||
The system uses the larger of the actual ratio and the effective ratio
|
||||
for its calculation.
|
||||
|
||||
**BIAS** is used as a multiplier to manually adjust a pool's PG based
|
||||
on prior information about how much PGs a specific pool is expected
|
||||
to have.
|
||||
|
||||
**PG_NUM** is the current number of PGs for the pool (or the current
|
||||
number of PGs that the pool is working towards, if a ``pg_num``
|
||||
change is in progress). **NEW PG_NUM**, if present, is what the
|
||||
@ -84,9 +88,13 @@ system believes the pool's ``pg_num`` should be changed to. It is
|
||||
always a power of 2, and will only be present if the "ideal" value
|
||||
varies from the current value by more than a factor of 3.
|
||||
|
||||
The final column, **AUTOSCALE**, is the pool ``pg_autoscale_mode``,
|
||||
**AUTOSCALE**, is the pool ``pg_autoscale_mode``
|
||||
and will be either ``on``, ``off``, or ``warn``.
|
||||
|
||||
The final column, **PROFILE** shows the autoscale profile
|
||||
used by each pool. ``scale-up`` and ``scale-down`` are the
|
||||
currently available profiles.
|
||||
|
||||
|
||||
Automated scaling
|
||||
-----------------
|
||||
@ -113,6 +121,28 @@ example, a pool that maps to OSDs of class `ssd` and a pool that maps
|
||||
to OSDs of class `hdd` will each have optimal PG counts that depend on
|
||||
the number of those respective device types.
|
||||
|
||||
The autoscaler uses the `scale-down` profile by default,
|
||||
where each pool starts out with a full complements of PGs and only scales
|
||||
down when the usage ratio across the pools is not even. However, it also has
|
||||
a `scale-up` profile, where it starts out each pool with minimal PGs and scales
|
||||
up PGs when there is more usage in each pool.
|
||||
|
||||
With only the `scale-down` profile, the autoscaler identifies
|
||||
any overlapping roots and prevents the pools with such roots
|
||||
from scaling because overlapping roots can cause problems
|
||||
with the scaling process.
|
||||
|
||||
To use the `scale-up` profile::
|
||||
|
||||
ceph osd pool set autoscale-profile scale-up
|
||||
|
||||
To switch back to the default `scale-down` profile::
|
||||
|
||||
ceph osd pool set autoscale-profile scale-down
|
||||
|
||||
Existing clusters will continue to use the `scale-up` profile.
|
||||
To use the `scale-down` profile, users will need to set autoscale-profile `scale-down`,
|
||||
after upgrading to a version of Ceph that provides the `scale-down` feature.
|
||||
|
||||
.. _specifying_pool_target_size:
|
||||
|
||||
|
@ -64,6 +64,38 @@ Options
|
||||
:Type: String
|
||||
:Default: None
|
||||
|
||||
``ssl_options``
|
||||
|
||||
:Description: Optional colon separated list of ssl context options:
|
||||
|
||||
``default_workarounds`` Implement various bug workarounds.
|
||||
|
||||
``no_compression`` Disable compression.
|
||||
|
||||
``no_sslv2`` Disable SSL v2.
|
||||
|
||||
``no_sslv3`` Disable SSL v3.
|
||||
|
||||
``no_tlsv1`` Disable TLS v1.
|
||||
|
||||
``no_tlsv1_1`` Disable TLS v1.1.
|
||||
|
||||
``no_tlsv1_2`` Disable TLS v1.2.
|
||||
|
||||
``single_dh_use`` Always create a new key when using tmp_dh parameters.
|
||||
|
||||
:Type: String
|
||||
:Default: ``no_sslv2:no_sslv3:no_tlsv1:no_tlsv1_1``
|
||||
|
||||
``ssl_ciphers``
|
||||
|
||||
:Description: Optional list of one or more cipher strings separated by colons.
|
||||
The format of the string is described in openssl's ciphers(1)
|
||||
manual.
|
||||
|
||||
:Type: String
|
||||
:Default: None
|
||||
|
||||
``tcp_nodelay``
|
||||
|
||||
:Description: If set the socket option will disable Nagle's algorithm on
|
||||
@ -100,6 +132,7 @@ Civetweb
|
||||
========
|
||||
|
||||
.. versionadded:: Firefly
|
||||
.. deprecated:: Pacific
|
||||
|
||||
The ``civetweb`` frontend uses the Civetweb HTTP library, which is a
|
||||
fork of Mongoose.
|
||||
|
@ -400,6 +400,19 @@ Or, when using the transit secret engine::
|
||||
In the example above, the Gateway would only fetch transit encryption keys under
|
||||
``https://vault-server:8200/v1/transit``.
|
||||
|
||||
You can use custom ssl certs to authenticate with vault with help of
|
||||
following options::
|
||||
|
||||
rgw crypt vault verify ssl = true
|
||||
rgw crypt vault ssl cacert = /etc/ceph/vault.ca
|
||||
rgw crypt vault ssl clientcert = /etc/ceph/vault.crt
|
||||
rgw crypt vault ssl clientkey = /etc/ceph/vault.key
|
||||
|
||||
where vault.ca is CA certificate and vault.key/vault.crt are private key and ssl
|
||||
ceritificate generated for RGW to access the vault server. It highly recommended to
|
||||
set this option true, setting false is very dangerous and need to avoid since this
|
||||
runs in very secured enviroments.
|
||||
|
||||
Transit engine compatibility support
|
||||
------------------------------------
|
||||
The transit engine has compatibility support for previous
|
||||
|
@ -16,9 +16,9 @@
|
||||
#
|
||||
set -xe
|
||||
|
||||
. /etc/os-release
|
||||
base=${1:-/tmp/release}
|
||||
codename=$(lsb_release -sc)
|
||||
releasedir=$base/$(lsb_release -si)/WORKDIR
|
||||
releasedir=$base/$NAME/WORKDIR
|
||||
rm -fr $(dirname $releasedir)
|
||||
mkdir -p $releasedir
|
||||
#
|
||||
@ -60,7 +60,7 @@ dvers="$vers-1"
|
||||
cd ceph-$vers
|
||||
chvers=$(head -1 debian/changelog | perl -ne 's/.*\(//; s/\).*//; print')
|
||||
if [ "$chvers" != "$dvers" ]; then
|
||||
DEBEMAIL="contact@ceph.com" dch -D $codename --force-distribution -b -v "$dvers" "new version"
|
||||
DEBEMAIL="contact@ceph.com" dch -D $VERSION_CODENAME --force-distribution -b -v "$dvers" "new version"
|
||||
fi
|
||||
#
|
||||
# create the packages
|
||||
@ -74,18 +74,18 @@ if test $NPROC -gt 1 ; then
|
||||
fi
|
||||
PATH=/usr/lib/ccache:$PATH dpkg-buildpackage $j -uc -us
|
||||
cd ../..
|
||||
mkdir -p $codename/conf
|
||||
cat > $codename/conf/distributions <<EOF
|
||||
Codename: $codename
|
||||
mkdir -p $VERSION_CODENAME/conf
|
||||
cat > $VERSION_CODENAME/conf/distributions <<EOF
|
||||
Codename: $VERSION_CODENAME
|
||||
Suite: stable
|
||||
Components: main
|
||||
Architectures: $(dpkg --print-architecture) source
|
||||
EOF
|
||||
if [ ! -e conf ]; then
|
||||
ln -s $codename/conf conf
|
||||
ln -s $VERSION_CODENAME/conf conf
|
||||
fi
|
||||
reprepro --basedir $(pwd) include $codename WORKDIR/*.changes
|
||||
reprepro --basedir $(pwd) include $VERSION_CODENAME WORKDIR/*.changes
|
||||
#
|
||||
# teuthology needs the version in the version file
|
||||
#
|
||||
echo $dvers > $codename/version
|
||||
echo $dvers > $VERSION_CODENAME/version
|
||||
|
@ -163,7 +163,7 @@ tar cvf $outfile.version.tar $outfile/src/.git_version $outfile/ceph.spec
|
||||
# at the three URLs referenced below (may involve uploading to download.ceph.com)
|
||||
boost_version=1.73.0
|
||||
download_boost $boost_version 4eb3b8d442b426dc35346235c8733b5ae35ba431690e38c6a8263dce9fcbb402 \
|
||||
https://dl.bintray.com/boostorg/release/$boost_version/source \
|
||||
https://boostorg.jfrog.io/artifactory/main/release/$boost_version/source \
|
||||
https://downloads.sourceforge.net/project/boost/boost/$boost_version \
|
||||
https://download.ceph.com/qa
|
||||
download_liburing 0.7 8e2842cfe947f3a443af301bdd6d034455536c38a455c7a700d0c1ad165a7543 \
|
||||
|
@ -1,33 +1,38 @@
|
||||
|
||||
GRAFANA_VERSION := 6.7.4-1
|
||||
PIECHART_VERSION := "1.4.0"
|
||||
STATUS_PANEL_VERSION := "1.0.9"
|
||||
DASHBOARD_DIR := "monitoring/grafana/dashboards"
|
||||
GRAFANA_VERSION ?= 6.7.4-1
|
||||
PIECHART_VERSION ?= "1.4.0"
|
||||
STATUS_PANEL_VERSION ?= "1.0.9"
|
||||
DASHBOARD_DIR := "../dashboards"
|
||||
DASHBOARD_PROVISIONING := "ceph-dashboard.yml"
|
||||
IMAGE := "centos:8"
|
||||
VERSION := "${IMAGE: -1}"
|
||||
IMAGE := "docker.io/centos:8"
|
||||
PKGMGR := "dnf"
|
||||
# CONTAINER := $(shell buildah from ${IMAGE})
|
||||
GF_CONFIG := "/etc/grafana/grafana.ini"
|
||||
ceph_version := "master"
|
||||
# clip off "-<whatever> from the end of GRAFANA_VERSION
|
||||
CONTAINER_VERSION := $(shell /bin/echo $(GRAFANA_VERSION) | /bin/sed 's/-.*//')
|
||||
|
||||
ARCH ?= x86_64
|
||||
ifeq "$(ARCH)" "arm64"
|
||||
override ARCH := aarch64
|
||||
endif
|
||||
|
||||
LOCALTAG=ceph-grafana:$(CONTAINER_VERSION)-$(ARCH)
|
||||
TAG=ceph/ceph-grafana:$(CONTAINER_VERSION)-$(ARCH)
|
||||
|
||||
# Build a grafana instance - preconfigured for use within Ceph's dashboard UI
|
||||
|
||||
build : fetch_dashboards
|
||||
build :
|
||||
echo "Creating base container"
|
||||
$(eval CONTAINER := $(shell buildah from ${IMAGE}))
|
||||
$(eval CONTAINER := $(shell sudo buildah from ${IMAGE}))
|
||||
# Using upstream grafana build
|
||||
wget https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.x86_64.rpm
|
||||
#wget localhost:8000/grafana-${GRAFANA_VERSION}.x86_64.rpm
|
||||
#cp grafana-${GRAFANA_VERSION}.x86_64.rpm ${mountpoint}/tmp/.
|
||||
buildah copy $(CONTAINER) grafana-${GRAFANA_VERSION}.x86_64.rpm /tmp/grafana-${GRAFANA_VERSION}.x86_64.rpm
|
||||
buildah run $(CONTAINER) ${PKGMGR} install -y --setopt install_weak_deps=false --setopt=tsflags=nodocs /tmp/grafana-${GRAFANA_VERSION}.x86_64.rpm
|
||||
buildah run $(CONTAINER) ${PKGMGR} clean all
|
||||
buildah run $(CONTAINER) rm -f /tmp/grafana*.rpm
|
||||
buildah run $(CONTAINER) grafana-cli plugins install grafana-piechart-panel ${PIECHART_VERSION}
|
||||
buildah run $(CONTAINER) grafana-cli plugins install vonage-status-panel ${STATUS_PANEL_VERSION}
|
||||
buildah run $(CONTAINER) mkdir -p /etc/grafana/dashboards/ceph-dashboard
|
||||
buildah copy $(CONTAINER) jsonfiles/*.json /etc/grafana/dashboards/ceph-dashboard
|
||||
curl -fLO https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.${ARCH}.rpm
|
||||
sudo buildah copy $(CONTAINER) grafana-${GRAFANA_VERSION}.${ARCH}.rpm /tmp/grafana-${GRAFANA_VERSION}.${ARCH}.rpm
|
||||
sudo buildah run $(CONTAINER) ${PKGMGR} install -y --setopt install_weak_deps=false --setopt=tsflags=nodocs /tmp/grafana-${GRAFANA_VERSION}.${ARCH}.rpm
|
||||
sudo buildah run $(CONTAINER) ${PKGMGR} clean all
|
||||
sudo buildah run $(CONTAINER) rm -f /tmp/grafana*.rpm
|
||||
sudo buildah run $(CONTAINER) grafana-cli plugins install grafana-piechart-panel ${PIECHART_VERSION}
|
||||
sudo buildah run $(CONTAINER) grafana-cli plugins install vonage-status-panel ${STATUS_PANEL_VERSION}
|
||||
sudo buildah run $(CONTAINER) mkdir -p /etc/grafana/dashboards/ceph-dashboard
|
||||
sudo buildah copy $(CONTAINER) ${DASHBOARD_DIR}/*.json /etc/grafana/dashboards/ceph-dashboard
|
||||
|
||||
@/bin/echo -e "\
|
||||
apiVersion: 1 \\n\
|
||||
@ -43,55 +48,49 @@ providers: \\n\
|
||||
path: '/etc/grafana/dashboards/ceph-dashboard'" >> ${DASHBOARD_PROVISIONING}
|
||||
|
||||
|
||||
buildah copy $(CONTAINER) ${DASHBOARD_PROVISIONING} /etc/grafana/provisioning/dashboards/${DASHBOARD_PROVISIONING}
|
||||
sudo buildah copy $(CONTAINER) ${DASHBOARD_PROVISIONING} /etc/grafana/provisioning/dashboards/${DASHBOARD_PROVISIONING}
|
||||
|
||||
# expose tcp/3000 for grafana
|
||||
buildah config --port 3000 $(CONTAINER)
|
||||
sudo buildah config --port 3000 $(CONTAINER)
|
||||
|
||||
# set working dir
|
||||
buildah config --workingdir /usr/share/grafana $(CONTAINER)
|
||||
sudo buildah config --workingdir /usr/share/grafana $(CONTAINER)
|
||||
|
||||
# set environment overrides from the default locations in /usr/share
|
||||
buildah config --env GF_PATHS_LOGS="/var/log/grafana" $(CONTAINER)
|
||||
buildah config --env GF_PATHS_PLUGINS="/var/lib/grafana/plugins" $(CONTAINER)
|
||||
buildah config --env GF_PATHS_PROVISIONING="/etc/grafana/provisioning" $(CONTAINER)
|
||||
buildah config --env GF_PATHS_DATA="/var/lib/grafana" $(CONTAINER)
|
||||
sudo buildah config --env GF_PATHS_LOGS="/var/log/grafana" $(CONTAINER)
|
||||
sudo buildah config --env GF_PATHS_PLUGINS="/var/lib/grafana/plugins" $(CONTAINER)
|
||||
sudo buildah config --env GF_PATHS_PROVISIONING="/etc/grafana/provisioning" $(CONTAINER)
|
||||
sudo buildah config --env GF_PATHS_DATA="/var/lib/grafana" $(CONTAINER)
|
||||
|
||||
# entrypoint
|
||||
buildah config --entrypoint "grafana-server --config=${GF_CONFIG}" $(CONTAINER)
|
||||
sudo buildah config --entrypoint "grafana-server --config=${GF_CONFIG}" $(CONTAINER)
|
||||
|
||||
# finalize
|
||||
buildah config --label maintainer="Paul Cuzner <pcuzner@redhat.com>" $(CONTAINER)
|
||||
buildah config --label description="Ceph Grafana Container" $(CONTAINER)
|
||||
buildah config --label summary="Grafana Container configured for Ceph mgr/dashboard integration" $(CONTAINER)
|
||||
buildah commit --format docker --squash $(CONTAINER) ceph-grafana:${ceph_version}
|
||||
buildah tag ceph-grafana:${ceph_version} ceph/ceph-grafana:${ceph_version}
|
||||
sudo buildah config --label maintainer="Paul Cuzner <pcuzner@redhat.com>" $(CONTAINER)
|
||||
sudo buildah config --label description="Ceph Grafana Container" $(CONTAINER)
|
||||
sudo buildah config --label summary="Grafana Container configured for Ceph mgr/dashboard integration" $(CONTAINER)
|
||||
sudo buildah commit --format docker --squash $(CONTAINER) $(LOCALTAG)
|
||||
|
||||
push:
|
||||
# this transition-through-oci image is a workaround for
|
||||
# https://github.com/containers/buildah/issues/3253 and
|
||||
# can be removed when that is fixed and released. The
|
||||
# --format v2s2 on push is to convert oci back to docker format.
|
||||
sudo podman push $(LOCALTAG) --format=oci dir://tmp/oci-image
|
||||
sudo podman pull dir://tmp/oci-image
|
||||
sudo rm -rf /tmp/oci-image
|
||||
sudo podman tag localhost/tmp/oci-image docker.io/${TAG}
|
||||
sudo podman tag localhost/tmp/oci-image quay.io/${TAG}
|
||||
# sudo podman has issues with auth.json; just override it
|
||||
sudo podman login --authfile=auth.json -u ${DOCKER_HUB_USERNAME} -p ${DOCKER_HUB_PASSWORD} docker.io
|
||||
sudo podman login --authfile=auth.json -u $(CONTAINER_REPO_USERNAME) -p $(CONTAINER_REPO_PASSWORD) quay.io
|
||||
sudo podman push --authfile=auth.json --format v2s2 docker.io/${TAG}
|
||||
sudo podman push --authfile=auth.json --format v2s2 quay.io/${TAG}
|
||||
|
||||
fetch_dashboards: clean
|
||||
wget -O - https://api.github.com/repos/ceph/ceph/contents/${DASHBOARD_DIR}?ref=${ceph_version} | jq '.[].download_url' > dashboards
|
||||
|
||||
# drop quotes from the list and pick out only json files
|
||||
sed -i 's/\"//g' dashboards
|
||||
sed -i '/\.json/!d' dashboards
|
||||
mkdir jsonfiles
|
||||
while read -r line; do \
|
||||
wget "$$line" -P jsonfiles; \
|
||||
done < dashboards
|
||||
|
||||
clean :
|
||||
rm -f dashboards
|
||||
rm -fr jsonfiles
|
||||
rm -f grafana-*.rpm*
|
||||
clean:
|
||||
sudo podman rmi ${LOCALTAG} || true
|
||||
sudo podman rmi docker.io/${TAG} || true
|
||||
sudo podman rmi quay.io/${TAG} || true
|
||||
sudo podman rmi localhost/tmp/oci-image || true
|
||||
rm -f grafana-*.rpm* auth.json
|
||||
rm -f ${DASHBOARD_PROVISIONING}
|
||||
|
||||
|
||||
nautilus :
|
||||
$(MAKE) ceph_version="nautilus" build
|
||||
octopus :
|
||||
$(MAKE) ceph_version="octopus" build
|
||||
master :
|
||||
$(MAKE) ceph_version="master" build
|
||||
|
||||
all : nautilus octopus master
|
||||
.PHONY : all
|
||||
|
@ -1,8 +1,34 @@
|
||||
set(CEPH_GRAFANA_DASHBOARDS_DIR "${CMAKE_INSTALL_SYSCONFDIR}/grafana/dashboards/ceph-dashboard"
|
||||
CACHE PATH "Location for grafana dashboards")
|
||||
|
||||
FILE(GLOB CEPH_GRAFANA_DASHBOARDS "*.json")
|
||||
|
||||
file(GLOB CEPH_GRAFANA_DASHBOARDS "*.json")
|
||||
install(FILES
|
||||
${CEPH_GRAFANA_DASHBOARDS}
|
||||
DESTINATION ${CEPH_GRAFANA_DASHBOARDS_DIR})
|
||||
|
||||
set(CEPH_BUILD_VIRTUALENV $ENV{TMPDIR})
|
||||
if(NOT CEPH_BUILD_VIRTUALENV)
|
||||
set(CEPH_BUILD_VIRTUALENV ${CMAKE_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
if(WITH_GRAFANA)
|
||||
include(AddCephTest)
|
||||
add_tox_test(grafana TOX_ENVS grafonnet-check)
|
||||
set(ver 0.1.0)
|
||||
set(name grafonnet-lib)
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(${name}
|
||||
URL https://github.com/grafana/${name}/archive/v${ver}/${name}-${ver}.tar.gz
|
||||
URL_MD5 0798752ed40864fa8b3db40a3c970642
|
||||
BUILD_COMMAND ""
|
||||
CONFIGURE_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
EXCLUDE_FROM_ALL ON)
|
||||
add_dependencies(tests
|
||||
${name})
|
||||
ExternalProject_Get_Property(${name} SOURCE_DIR)
|
||||
set_property(
|
||||
TEST run-tox-grafana
|
||||
APPEND
|
||||
PROPERTY ENVIRONMENT
|
||||
GRAFONNET_PATH=${SOURCE_DIR}/grafonnet)
|
||||
endif()
|
||||
|
@ -107,8 +107,9 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceph_health_status{instance=~'$instance'}",
|
||||
"expr": "ceph_health_status",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"interval": "$interval",
|
||||
"intervalFactor": 1,
|
||||
"refId": "A",
|
||||
@ -174,7 +175,7 @@
|
||||
"displayAliasType": "Always",
|
||||
"displayType": "Regular",
|
||||
"displayValueWithAlias": "When Alias Displayed",
|
||||
"expr": "count(ceph_osd_metadata{instance=~\"$instance\"})",
|
||||
"expr": "count(ceph_osd_metadata)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "All",
|
||||
@ -189,7 +190,7 @@
|
||||
"displayAliasType": "Always",
|
||||
"displayType": "Regular",
|
||||
"displayValueWithAlias": "When Alias Displayed",
|
||||
"expr": "sum(ceph_osds_in{instance=~\"$instance\"})",
|
||||
"expr": "sum(ceph_osds_in)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "In",
|
||||
@ -204,7 +205,7 @@
|
||||
"displayAliasType": "Warning / Critical",
|
||||
"displayType": "Regular",
|
||||
"displayValueWithAlias": "When Alias Displayed",
|
||||
"expr": "sum(ceph_osd_in{instance=~\"$instance\"} == bool 0)",
|
||||
"expr": "sum(ceph_osd_in == bool 0)",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
@ -221,7 +222,7 @@
|
||||
"displayAliasType": "Always",
|
||||
"displayType": "Regular",
|
||||
"displayValueWithAlias": "When Alias Displayed",
|
||||
"expr": "sum(ceph_osd_up{instance=~\"$instance\"})",
|
||||
"expr": "sum(ceph_osd_up)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Up",
|
||||
@ -237,7 +238,7 @@
|
||||
"displayAliasType": "Warning / Critical",
|
||||
"displayType": "Regular",
|
||||
"displayValueWithAlias": "When Alias Displayed",
|
||||
"expr": "sum(ceph_osd_up{instance=~\"$instance\"} == bool 0)",
|
||||
"expr": "sum(ceph_osd_up == bool 0)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Down",
|
||||
@ -263,7 +264,7 @@
|
||||
"decimals": 2,
|
||||
"format": "percentunit",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"maxValue": 1,
|
||||
"minValue": 0,
|
||||
"show": true,
|
||||
"thresholdLabels": false,
|
||||
@ -312,14 +313,14 @@
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(ceph_osd_stat_bytes_used{instance=~\"$instance\"})/sum(ceph_osd_stat_bytes{instance=~\"$instance\"})",
|
||||
"expr": "sum(ceph_osd_stat_bytes_used)/sum(ceph_osd_stat_bytes)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Used",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "70,80",
|
||||
"thresholds": "0.7,0.8",
|
||||
"title": "Capacity used",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
@ -530,28 +531,28 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "quantile(0.95, ceph_osd_apply_latency_ms{instance=~\"$instance\"})",
|
||||
"expr": "quantile(0.95, ceph_osd_apply_latency_ms)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Apply Latency P_95",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "quantile(0.95, ceph_osd_commit_latency_ms{instance=~\"$instance\"})",
|
||||
"expr": "quantile(0.95, ceph_osd_commit_latency_ms)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Commit Latency P_95",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "avg(ceph_osd_apply_latency_ms{instance=~\"$instance\"})",
|
||||
"expr": "avg(ceph_osd_apply_latency_ms)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Avg Apply Latency",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "avg(ceph_osd_commit_latency_ms{instance=~\"$instance\"})",
|
||||
"expr": "avg(ceph_osd_commit_latency_ms)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Avg Commit Latency",
|
||||
@ -629,7 +630,7 @@
|
||||
"displayAliasType": "Always",
|
||||
"displayType": "Regular",
|
||||
"displayValueWithAlias": "When Alias Displayed",
|
||||
"expr": "sum(ceph_mon_quorum_status{instance=~\"$instance\"})",
|
||||
"expr": "sum(ceph_mon_quorum_status)",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
@ -646,7 +647,7 @@
|
||||
"displayAliasType": "Always",
|
||||
"displayType": "Regular",
|
||||
"displayValueWithAlias": "When Alias Displayed",
|
||||
"expr": "count(ceph_mon_quorum_status{instance=~\"$instance\"})",
|
||||
"expr": "count(ceph_mon_quorum_status)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Total",
|
||||
@ -663,7 +664,7 @@
|
||||
"displayAliasType": "Warning / Critical",
|
||||
"displayType": "Annotation",
|
||||
"displayValueWithAlias": "Never",
|
||||
"expr": "count(ceph_mon_quorum_status{instance=~\"$instance\"}) / sum(ceph_mon_quorum_status{instance=~\"$instance\"})",
|
||||
"expr": "count(ceph_mon_quorum_status) / sum(ceph_mon_quorum_status)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "MONs out of Quorum",
|
||||
@ -710,7 +711,7 @@
|
||||
"displayAliasType": "Always",
|
||||
"displayType": "Regular",
|
||||
"displayValueWithAlias": "When Alias Displayed",
|
||||
"expr": "ceph_mds_server_handle_client_session{instance=~\"$instance\"}",
|
||||
"expr": "ceph_mds_server_handle_client_session",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Clients",
|
||||
@ -764,14 +765,14 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(ceph_osd_op_w_in_bytes{instance=~\"$instance\"}[1m]))",
|
||||
"expr": "sum(irate(ceph_osd_op_w_in_bytes[1m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Writes",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(ceph_osd_op_r_out_bytes{instance=~\"$instance\"}[1m]))",
|
||||
"expr": "sum(irate(ceph_osd_op_r_out_bytes[1m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Reads",
|
||||
@ -851,7 +852,7 @@
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(deriv(ceph_pool_stored{instance=~\"$instance\"}[1m]))",
|
||||
"expr": "sum(deriv(ceph_pool_stored[1m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"refId": "A"
|
||||
@ -924,7 +925,7 @@
|
||||
"span": 12,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceph_osd_stat_bytes_used{instance=~'$instance'} / ceph_osd_stat_bytes{instance=~'$instance'}",
|
||||
"expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 1,
|
||||
@ -946,7 +947,7 @@
|
||||
"xBucketNumber": null,
|
||||
"xBucketSize": "",
|
||||
"yAxis": {
|
||||
"decimals": null,
|
||||
"decimals": 2,
|
||||
"format": "percentunit",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
@ -986,7 +987,7 @@
|
||||
"links": [],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceph_osd_numpg{instance=~\"$instance\"}",
|
||||
"expr": "ceph_osd_numpg",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "#PGs",
|
||||
@ -1190,29 +1191,6 @@
|
||||
"query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
|
||||
"refresh": 2,
|
||||
"type": "interval"
|
||||
},
|
||||
{
|
||||
"allFormat": "glob",
|
||||
"allValue": null,
|
||||
"current": {},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"hideLabel": false,
|
||||
"includeAll": true,
|
||||
"label": "Exporter Instance",
|
||||
"multi": false,
|
||||
"multiFormat": "glob",
|
||||
"name": "instance",
|
||||
"options": [],
|
||||
"query": "label_values(ceph_health_status, instance)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -1245,7 +1223,7 @@
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"timezone": "",
|
||||
"title": "Ceph - Cluster",
|
||||
"version": 13
|
||||
}
|
||||
|
@ -1208,7 +1208,7 @@
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"timezone": "",
|
||||
"title": "Host Details",
|
||||
"uid": "rtOg0AiWz",
|
||||
"version": 4
|
||||
|
@ -0,0 +1,54 @@
|
||||
local g = import 'grafana.libsonnet';
|
||||
|
||||
local dashboardSchema(title, uid, time_from, refresh, schemaVersion, tags,timezone, timepicker) =
|
||||
g.dashboard.new(title=title, uid=uid, time_from=time_from, refresh=refresh, schemaVersion=schemaVersion, tags=tags, timezone=timezone, timepicker=timepicker);
|
||||
|
||||
local graphPanelSchema(title, nullPointMode, stack, formatY1, formatY2, labelY1, labelY2, min, fill, datasource) =
|
||||
g.graphPanel.new(title=title, nullPointMode=nullPointMode, stack=stack, formatY1=formatY1, formatY2=formatY2, labelY1=labelY1, labelY2=labelY2, min=min, fill=fill, datasource=datasource);
|
||||
|
||||
local addTargetSchema(expr, intervalFactor, format, legendFormat) =
|
||||
g.prometheus.target(expr=expr, intervalFactor=intervalFactor, format=format, legendFormat=legendFormat);
|
||||
|
||||
local addTemplateSchema(name, datasource, query, refresh, hide, includeAll, sort) =
|
||||
g.template.new(name=name, datasource=datasource, query=query, refresh=refresh, hide=hide, includeAll=includeAll, sort=sort);
|
||||
|
||||
local addAnnotationSchema(builtIn, datasource, enable, hide, iconColor, name, type) =
|
||||
g.annotation.datasource(builtIn=builtIn, datasource=datasource, enable=enable, hide=hide, iconColor=iconColor, name=name, type=type);
|
||||
|
||||
{
|
||||
"radosgw-sync-overview.json":
|
||||
local RgwSyncOverviewPanel(title, formatY1, labelY1, rgwMetric, x, y, w, h) =
|
||||
graphPanelSchema(title, 'null as zero', true, formatY1, 'short', labelY1, null, 0, 1, '$datasource')
|
||||
.addTargets(
|
||||
[addTargetSchema('sum by (source_zone) (rate(%s[30s]))' % rgwMetric, 1, 'time_series', '{{source_zone}}')]) + {gridPos: {x: x, y: y, w: w, h: h}};
|
||||
|
||||
dashboardSchema(
|
||||
'RGW Sync Overview', 'rgw-sync-overview', 'now-1h', '15s', 16, ["overview"], '', {refresh_intervals:['5s','10s','15s','30s','1m','5m','15m','30m','1h','2h','1d'],time_options:['5m','15m','1h','6h','12h','24h','2d','7d','30d']}
|
||||
)
|
||||
.addAnnotation(
|
||||
addAnnotationSchema(
|
||||
1, '-- Grafana --', true, true, 'rgba(0, 211, 255, 1)', 'Annotations & Alerts', 'dashboard')
|
||||
)
|
||||
.addRequired(
|
||||
type='grafana', id='grafana', name='Grafana', version='5.0.0'
|
||||
)
|
||||
.addRequired(
|
||||
type='panel', id='graph', name='Graph', version='5.0.0'
|
||||
)
|
||||
.addTemplate(
|
||||
addTemplateSchema('rgw_servers', '$datasource', 'prometehus', 1, 2, true, 1)
|
||||
)
|
||||
.addTemplate(
|
||||
g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
|
||||
)
|
||||
.addPanels([
|
||||
RgwSyncOverviewPanel(
|
||||
'Replication (throughput) from Source Zone', 'Bps', null, 'ceph_data_sync_from_zone_fetch_bytes_sum', 0, 0, 8, 7),
|
||||
RgwSyncOverviewPanel(
|
||||
'Replication (objects) from Source Zone', 'short', 'Objects/s', 'ceph_data_sync_from_zone_fetch_bytes_count', 8, 0, 8, 7),
|
||||
RgwSyncOverviewPanel(
|
||||
'Polling Request Latency from Source Zone', 'ms', null, 'ceph_data_sync_from_zone_poll_latency_sum', 16, 0, 8, 7),
|
||||
RgwSyncOverviewPanel(
|
||||
'Unsuccessful Object Replications from Source Zone', 'short', 'Count/s', 'ceph_data_sync_from_zone_fetch_errors', 0, 7, 8, 7)
|
||||
])
|
||||
}
|
@ -423,7 +423,7 @@
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"format": "s",
|
||||
"label": "Read (-) / Write (+)",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
|
@ -658,7 +658,7 @@
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"timezone": "",
|
||||
"title": "Ceph Pool Details",
|
||||
"uid": "-xyV8KCiz",
|
||||
"version": 1
|
||||
|
@ -1554,7 +1554,7 @@
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"timezone": "",
|
||||
"title": "Ceph Pools Overview",
|
||||
"uid": "z99hzWtmk",
|
||||
"variables": {
|
||||
|
@ -1,440 +1,455 @@
|
||||
{
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "5.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
"__inputs": [ ],
|
||||
"__requires": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "5.0.0"
|
||||
},
|
||||
{
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"iteration": 1534386107523,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum[30s]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Replication (throughput) from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"unit": "bytes",
|
||||
"format": "Bps",
|
||||
"decimals": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": "0",
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 7.4,
|
||||
"x": 8.3,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count[30s]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Replication (objects) from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"decimals": null,
|
||||
"label": "Objects/s",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": "0",
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum[30s]) * 1000)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Polling Request Latency from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"unit": "s",
|
||||
"format": "ms",
|
||||
"decimals": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": "0",
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors[30s]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Unsuccessful Object Replications from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"decimals": null,
|
||||
"label": "Count/s",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": "0",
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "15s",
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"overview"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
},
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "rgw_servers",
|
||||
"options": [],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum[30s]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Replication (throughput) from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"tags": [],
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count[30s]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Replication (objects) from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": "Objects/s",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum[30s]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Polling Request Latency from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors[30s]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Unsuccessful Object Replications from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": "Count/s",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"15s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "RGW Sync Overview",
|
||||
"uid": "rgw-sync-overview",
|
||||
"version": 2
|
||||
],
|
||||
"refresh": "15s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"overview"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "rgw_servers",
|
||||
"options": [ ],
|
||||
"query": "prometehus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"15s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "RGW Sync Overview",
|
||||
"uid": "rgw-sync-overview",
|
||||
"version": 0
|
||||
}
|
||||
|
@ -0,0 +1 @@
|
||||
jsondiff
|
30
ceph/monitoring/grafana/dashboards/test-jsonnet.sh
Normal file
30
ceph/monitoring/grafana/dashboards/test-jsonnet.sh
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
TEMPDIR=`mktemp -d`
|
||||
BASEDIR=$(dirname "$0")
|
||||
|
||||
JSONNET_PATH="${GRAFONNET_PATH}" jsonnet -m ${TEMPDIR} $BASEDIR/jsonnet/grafana_dashboards.jsonnet
|
||||
|
||||
truncate -s 0 ${TEMPDIR}/json_difference.log
|
||||
for json_files in $BASEDIR/*.json
|
||||
do
|
||||
JSON_FILE_NAME=$(basename $json_files)
|
||||
for generated_files in ${TEMPDIR}/*.json
|
||||
do
|
||||
GENERATED_FILE_NAME=$(basename $generated_files)
|
||||
if [ $JSON_FILE_NAME == $GENERATED_FILE_NAME ]; then
|
||||
jsondiff --indent 2 $generated_files $json_files | tee -a ${TEMPDIR}/json_difference.log
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
if [[ $(wc -l < ${TEMPDIR}/json_difference.log) -eq 0 ]]
|
||||
then
|
||||
rm -rf ${TEMPDIR}
|
||||
echo "Congratulations! Grafonnet Check Passed"
|
||||
else
|
||||
rm -rf ${TEMPDIR}
|
||||
echo "Grafonnet Check Failed, failed comparing generated file with existing"
|
||||
exit 1
|
||||
fi
|
22
ceph/monitoring/grafana/dashboards/tox.ini
Normal file
22
ceph/monitoring/grafana/dashboards/tox.ini
Normal file
@ -0,0 +1,22 @@
|
||||
[tox]
|
||||
envlist = grafonnet-{check,fix}
|
||||
skipsdist = true
|
||||
|
||||
[grafonnet]
|
||||
deps =
|
||||
-rrequirements-grafonnet.txt
|
||||
|
||||
[testenv:grafonnet-{check,fix}]
|
||||
basepython = python3
|
||||
whitelist_externals =
|
||||
jsonnet
|
||||
bash
|
||||
description =
|
||||
check: Ensure that auto-generated grafana dashboard files matches the current version
|
||||
fix: generate dashboard json files from jsonnet file with latest changes
|
||||
deps =
|
||||
{[grafonnet]deps}
|
||||
passenv = GRAFONNET_PATH
|
||||
commands =
|
||||
check: bash test-jsonnet.sh
|
||||
fix: jsonnet -m . jsonnet/grafana_dashboards.jsonnet
|
14
ceph/qa/distros/podman/centos_8.2_container_tools_3.0.yaml
Normal file
14
ceph/qa/distros/podman/centos_8.2_container_tools_3.0.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
os_type: centos
|
||||
os_version: "8.2"
|
||||
overrides:
|
||||
selinux:
|
||||
whitelist:
|
||||
- scontext=system_u:system_r:logrotate_t:s0
|
||||
|
||||
tasks:
|
||||
- pexec:
|
||||
all:
|
||||
- sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
|
||||
- sudo dnf -y module reset container-tools
|
||||
- sudo dnf -y module install container-tools:3.0
|
||||
- sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
|
@ -1,18 +0,0 @@
|
||||
os_type: centos
|
||||
os_version: "8.2"
|
||||
overrides:
|
||||
selinux:
|
||||
whitelist:
|
||||
- scontext=system_u:system_r:logrotate_t:s0
|
||||
|
||||
tasks:
|
||||
- pexec:
|
||||
all:
|
||||
- sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
|
||||
- sudo dnf -y module disable container-tools
|
||||
- sudo dnf -y install 'dnf-command(copr)'
|
||||
- sudo dnf -y copr enable rhcontainerbot/container-selinux
|
||||
- sudo curl -L -o /etc/yum.repos.d/devel:kubic:libcontainers:stable.repo https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/CentOS_8/devel:kubic:libcontainers:stable.repo
|
||||
- sudo dnf remove -y podman
|
||||
- sudo dnf -y install podman
|
||||
- sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
|
14
ceph/qa/distros/podman/centos_8.3_container_tools_3.0.yaml
Normal file
14
ceph/qa/distros/podman/centos_8.3_container_tools_3.0.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
os_type: centos
|
||||
os_version: "8.3"
|
||||
overrides:
|
||||
selinux:
|
||||
whitelist:
|
||||
- scontext=system_u:system_r:logrotate_t:s0
|
||||
|
||||
tasks:
|
||||
- pexec:
|
||||
all:
|
||||
- sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
|
||||
- sudo dnf -y module reset container-tools
|
||||
- sudo dnf -y module install container-tools:3.0
|
||||
- sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
|
@ -8,16 +8,6 @@ function run() {
|
||||
local dir=$1
|
||||
shift
|
||||
|
||||
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
|
||||
export CEPH_ARGS
|
||||
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
||||
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
||||
CEPH_ARGS+="--bluestore_block_size=2147483648 "
|
||||
CEPH_ARGS+="--bluestore_block_db_create=true "
|
||||
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
|
||||
CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
|
||||
CEPH_ARGS+="--bluestore_block_wal_create=true "
|
||||
CEPH_ARGS+="--bluestore_fsck_on_mount=true "
|
||||
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
||||
for func in $funcs ; do
|
||||
setup $dir || return 1
|
||||
@ -33,6 +23,16 @@ function TEST_bluestore() {
|
||||
if [ $flimit -lt 1536 ]; then
|
||||
echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
|
||||
fi
|
||||
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
|
||||
export CEPH_ARGS
|
||||
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
||||
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
||||
CEPH_ARGS+="--bluestore_block_size=2147483648 "
|
||||
CEPH_ARGS+="--bluestore_block_db_create=true "
|
||||
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
|
||||
CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
|
||||
CEPH_ARGS+="--bluestore_block_wal_create=true "
|
||||
CEPH_ARGS+="--bluestore_fsck_on_mount=true "
|
||||
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
@ -337,6 +337,63 @@ function TEST_bluestore() {
|
||||
wait_for_clean || return 1
|
||||
}
|
||||
|
||||
function TEST_bluestore2() {
|
||||
local dir=$1
|
||||
|
||||
local flimit=$(ulimit -n)
|
||||
if [ $flimit -lt 1536 ]; then
|
||||
echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
|
||||
fi
|
||||
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
|
||||
export CEPH_ARGS
|
||||
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
||||
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
||||
CEPH_ARGS+="--bluestore_block_size=4294967296 "
|
||||
CEPH_ARGS+="--bluestore_block_db_create=true "
|
||||
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
|
||||
CEPH_ARGS+="--bluestore_block_wal_create=false "
|
||||
CEPH_ARGS+="--bluestore_fsck_on_mount=true "
|
||||
CEPH_ARGS+="--osd_pool_default_size=1 "
|
||||
CEPH_ARGS+="--osd_pool_default_min_size=1 "
|
||||
CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
|
||||
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
osd_pid0=$(cat $dir/osd.0.pid)
|
||||
|
||||
sleep 5
|
||||
create_pool foo 16
|
||||
|
||||
# write some objects
|
||||
timeout 60 rados bench -p foo 10 write --write-omap --no-cleanup #|| return 1
|
||||
|
||||
#give RocksDB some time to cooldown and put files to slow level(s)
|
||||
sleep 10
|
||||
|
||||
spilled_over=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_used_bytes" )
|
||||
test $spilled_over -gt 0 || return 1
|
||||
|
||||
while kill $osd_pid0; do sleep 1 ; done
|
||||
ceph osd down 0
|
||||
|
||||
ceph-bluestore-tool --path $dir/0 \
|
||||
--devs-source $dir/0/block.db \
|
||||
--dev-target $dir/0/block \
|
||||
--command bluefs-bdev-migrate || return 1
|
||||
|
||||
ceph-bluestore-tool --path $dir/0 \
|
||||
--command bluefs-bdev-sizes || return 1
|
||||
|
||||
ceph-bluestore-tool --path $dir/0 \
|
||||
--command fsck || return 1
|
||||
|
||||
activate_osd $dir 0 || return 1
|
||||
osd_pid0=$(cat $dir/osd.0.pid)
|
||||
|
||||
wait_for_clean || return 1
|
||||
}
|
||||
|
||||
main osd-bluefs-volume-ops "$@"
|
||||
|
||||
# Local Variables:
|
||||
|
@ -12,14 +12,15 @@ function run() {
|
||||
|
||||
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
||||
for func in $funcs ; do
|
||||
setup $dir || return 1
|
||||
$func $dir || return 1
|
||||
teardown $dir || return 1
|
||||
done
|
||||
}
|
||||
|
||||
function TEST_reuse_id() {
|
||||
local dir=$1
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
|
@ -27,14 +27,15 @@ function run() {
|
||||
|
||||
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
||||
for func in $funcs ; do
|
||||
setup $dir || return 1
|
||||
$func $dir || return 1
|
||||
teardown $dir || return 1
|
||||
done
|
||||
}
|
||||
|
||||
function TEST_reuse_id() {
|
||||
local dir=$1
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
|
@ -12,14 +12,15 @@ function run() {
|
||||
|
||||
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
||||
for func in $funcs ; do
|
||||
setup $dir || return 1
|
||||
$func $dir || return 1
|
||||
teardown $dir || return 1
|
||||
done
|
||||
}
|
||||
|
||||
function TEST_a_merge_empty() {
|
||||
local dir=$1
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=3 || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
@ -87,7 +88,6 @@ function TEST_a_merge_empty() {
|
||||
function TEST_import_after_merge_and_gap() {
|
||||
local dir=$1
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
@ -162,7 +162,6 @@ function TEST_import_after_merge_and_gap() {
|
||||
function TEST_import_after_split() {
|
||||
local dir=$1
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
|
@ -60,7 +60,9 @@ function run() {
|
||||
export -n CEPH_CLI_TEST_DUP_COMMAND
|
||||
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
||||
for func in $funcs ; do
|
||||
setup $dir || return 1
|
||||
$func $dir || return 1
|
||||
teardown $dir || return 1
|
||||
done
|
||||
}
|
||||
|
||||
@ -91,7 +93,6 @@ function TEST_corrupt_and_repair_replicated() {
|
||||
local dir=$1
|
||||
local poolname=rbd
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=2 || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
@ -103,8 +104,6 @@ function TEST_corrupt_and_repair_replicated() {
|
||||
corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
|
||||
# Reproduces http://tracker.ceph.com/issues/8914
|
||||
corrupt_and_repair_one $dir $poolname $(get_primary $poolname SOMETHING) || return 1
|
||||
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
#
|
||||
@ -114,7 +113,6 @@ function TEST_allow_repair_during_recovery() {
|
||||
local dir=$1
|
||||
local poolname=rbd
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=2 || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 --osd_scrub_during_recovery=false \
|
||||
@ -128,8 +126,6 @@ function TEST_allow_repair_during_recovery() {
|
||||
|
||||
add_something $dir $poolname || return 1
|
||||
corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
|
||||
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
#
|
||||
@ -139,7 +135,6 @@ function TEST_skip_non_repair_during_recovery() {
|
||||
local dir=$1
|
||||
local poolname=rbd
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=2 || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 --osd_scrub_during_recovery=false \
|
||||
@ -153,8 +148,6 @@ function TEST_skip_non_repair_during_recovery() {
|
||||
|
||||
add_something $dir $poolname || return 1
|
||||
scrub_and_not_schedule $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
|
||||
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function scrub_and_not_schedule() {
|
||||
@ -276,7 +269,6 @@ function auto_repair_erasure_coded() {
|
||||
local poolname=ecpool
|
||||
|
||||
# Launch a cluster with 5 seconds scrub interval
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
@ -285,11 +277,11 @@ function auto_repair_erasure_coded() {
|
||||
--osd-scrub-min-interval=5 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
if [ "$allow_overwrites" = "true" ]; then
|
||||
if [ "$allow_overwrites" = "true" ]; then
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
else
|
||||
else
|
||||
run_osd_filestore $dir $id $ceph_osd_args || return 1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
create_rbd_pool || return 1
|
||||
wait_for_clean || return 1
|
||||
@ -314,9 +306,6 @@ function auto_repair_erasure_coded() {
|
||||
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
|
||||
rados --pool $poolname get SOMETHING $dir/COPY || return 1
|
||||
diff $dir/ORIGINAL $dir/COPY || return 1
|
||||
|
||||
# Tear down
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_auto_repair_erasure_coded_appends() {
|
||||
@ -329,16 +318,135 @@ function TEST_auto_repair_erasure_coded_overwrites() {
|
||||
fi
|
||||
}
|
||||
|
||||
# initiate a scrub, then check for the (expected) 'scrubbing' and the
|
||||
# (not expected until an error was identified) 'repair'
|
||||
# Arguments: osd#, pg, sleep time
|
||||
function initiate_and_fetch_state() {
|
||||
local the_osd="osd.$1"
|
||||
local pgid=$2
|
||||
local last_scrub=$(get_last_scrub_stamp $pgid)
|
||||
|
||||
set_config "osd" "$1" "osd_scrub_sleep" "$3"
|
||||
set_config "osd" "$1" "osd_scrub_auto_repair" "true"
|
||||
|
||||
flush_pg_stats
|
||||
date --rfc-3339=ns
|
||||
|
||||
# note: must initiate a "regular" (periodic) deep scrub - not an operator-initiated one
|
||||
env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) deep_scrub "$pgid"
|
||||
env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid"
|
||||
|
||||
# wait for 'scrubbing' to appear
|
||||
for ((i=0; i < 80; i++)); do
|
||||
|
||||
st=`ceph pg $pgid query --format json | jq '.state' `
|
||||
echo $i ") state now: " $st
|
||||
|
||||
case "$st" in
|
||||
*scrubbing*repair* ) echo "found scrub+repair"; return 1;; # PR #41258 should have prevented this
|
||||
*scrubbing* ) echo "found scrub"; return 0;;
|
||||
*inconsistent* ) echo "Got here too late. Scrub has already finished"; return 1;;
|
||||
*recovery* ) echo "Got here too late. Scrub has already finished."; return 1;;
|
||||
* ) echo $st;;
|
||||
esac
|
||||
|
||||
if [ $((i % 10)) == 4 ]; then
|
||||
echo "loop --------> " $i
|
||||
fi
|
||||
sleep 0.3
|
||||
done
|
||||
|
||||
echo "Timeout waiting for deep-scrub of " $pgid " on " $the_osd " to start"
|
||||
return 1
|
||||
}
|
||||
|
||||
function wait_end_of_scrub() { # osd# pg
|
||||
local the_osd="osd.$1"
|
||||
local pgid=$2
|
||||
|
||||
for ((i=0; i < 40; i++)); do
|
||||
st=`ceph pg $pgid query --format json | jq '.state' `
|
||||
echo "wait-scrub-end state now: " $st
|
||||
[[ $st =~ (.*scrubbing.*) ]] || break
|
||||
if [ $((i % 5)) == 4 ] ; then
|
||||
flush_pg_stats
|
||||
fi
|
||||
sleep 0.3
|
||||
done
|
||||
|
||||
if [[ $st =~ (.*scrubbing.*) ]]
|
||||
then
|
||||
# a timeout
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
function TEST_auto_repair_bluestore_tag() {
|
||||
local dir=$1
|
||||
local poolname=testpool
|
||||
|
||||
# Launch a cluster with 3 seconds scrub interval
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
done
|
||||
|
||||
create_pool $poolname 1 1 || return 1
|
||||
ceph osd pool set $poolname size 2
|
||||
wait_for_clean || return 1
|
||||
|
||||
# Put an object
|
||||
local payload=ABCDEF
|
||||
echo $payload > $dir/ORIGINAL
|
||||
rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
|
||||
|
||||
# Remove the object from one shard physically
|
||||
# Restarted osd get $ceph_osd_args passed
|
||||
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
|
||||
|
||||
local pgid=$(get_pg $poolname SOMETHING)
|
||||
local primary=$(get_primary $poolname SOMETHING)
|
||||
echo "Affected PG " $pgid " w/ primary " $primary
|
||||
local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
|
||||
initiate_and_fetch_state $primary $pgid "3.0"
|
||||
r=$?
|
||||
echo "initiate_and_fetch_state ret: " $r
|
||||
set_config "osd" "$1" "osd_scrub_sleep" "0"
|
||||
if [ $r -ne 0 ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
wait_end_of_scrub "$primary" "$pgid" || return 1
|
||||
ceph pg dump pgs
|
||||
|
||||
# Verify - the file should be back
|
||||
# Restarted osd get $ceph_osd_args passed
|
||||
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
|
||||
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1
|
||||
diff $dir/ORIGINAL $dir/COPY || return 1
|
||||
grep scrub_finish $dir/osd.${primary}.log
|
||||
|
||||
# Tear down
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
|
||||
function TEST_auto_repair_bluestore_basic() {
|
||||
local dir=$1
|
||||
local poolname=testpool
|
||||
|
||||
# Launch a cluster with 5 seconds scrub interval
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
@ -373,9 +481,6 @@ function TEST_auto_repair_bluestore_basic() {
|
||||
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1
|
||||
diff $dir/ORIGINAL $dir/COPY || return 1
|
||||
grep scrub_finish $dir/osd.${primary}.log
|
||||
|
||||
# Tear down
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_auto_repair_bluestore_scrub() {
|
||||
@ -383,12 +488,12 @@ function TEST_auto_repair_bluestore_scrub() {
|
||||
local poolname=testpool
|
||||
|
||||
# Launch a cluster with 5 seconds scrub interval
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0 \
|
||||
--osd-scrub-backoff-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
done
|
||||
@ -428,9 +533,6 @@ function TEST_auto_repair_bluestore_scrub() {
|
||||
# This should have caused 1 object to be repaired
|
||||
COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
|
||||
test "$COUNT" = "1" || return 1
|
||||
|
||||
# Tear down
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_auto_repair_bluestore_failed() {
|
||||
@ -438,11 +540,10 @@ function TEST_auto_repair_bluestore_failed() {
|
||||
local poolname=testpool
|
||||
|
||||
# Launch a cluster with 5 seconds scrub interval
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
@ -498,9 +599,6 @@ function TEST_auto_repair_bluestore_failed() {
|
||||
ceph pg dump pgs
|
||||
ceph pg dump pgs | grep -q -e "^${pgid}.* active+clean " -e "^${pgid}.* active+clean+wait " || return 1
|
||||
grep scrub_finish $dir/osd.${primary}.log
|
||||
|
||||
# Tear down
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_auto_repair_bluestore_failed_norecov() {
|
||||
@ -508,11 +606,10 @@ function TEST_auto_repair_bluestore_failed_norecov() {
|
||||
local poolname=testpool
|
||||
|
||||
# Launch a cluster with 5 seconds scrub interval
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
@ -552,9 +649,6 @@ function TEST_auto_repair_bluestore_failed_norecov() {
|
||||
grep -q "scrub_finish.*present with no repair possible" $dir/osd.${primary}.log || return 1
|
||||
ceph pg dump pgs
|
||||
ceph pg dump pgs | grep -q "^${pgid}.*+failed_repair" || return 1
|
||||
|
||||
# Tear down
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_repair_stats() {
|
||||
@ -566,7 +660,6 @@ function TEST_repair_stats() {
|
||||
local REPAIRS=20
|
||||
|
||||
# Launch a cluster with 5 seconds scrub interval
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
|
||||
@ -626,9 +719,6 @@ function TEST_repair_stats() {
|
||||
ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
|
||||
COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
|
||||
test "$COUNT" = "$REPAIRS" || return 1
|
||||
|
||||
# Tear down
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_repair_stats_ec() {
|
||||
@ -641,7 +731,6 @@ function TEST_repair_stats_ec() {
|
||||
local allow_overwrites=false
|
||||
|
||||
# Launch a cluster with 5 seconds scrub interval
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
|
||||
@ -704,9 +793,6 @@ function TEST_repair_stats_ec() {
|
||||
ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
|
||||
COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
|
||||
test "$COUNT" = "$REPAIRS" || return 1
|
||||
|
||||
# Tear down
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function corrupt_and_repair_jerasure() {
|
||||
@ -714,7 +800,6 @@ function corrupt_and_repair_jerasure() {
|
||||
local allow_overwrites=$2
|
||||
local poolname=ecpool
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
for id in $(seq 0 3) ; do
|
||||
@ -729,8 +814,6 @@ function corrupt_and_repair_jerasure() {
|
||||
|
||||
create_ec_pool $poolname $allow_overwrites k=2 m=2 || return 1
|
||||
corrupt_and_repair_erasure_coded $dir $poolname || return 1
|
||||
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_corrupt_and_repair_jerasure_appends() {
|
||||
@ -748,7 +831,6 @@ function corrupt_and_repair_lrc() {
|
||||
local allow_overwrites=$2
|
||||
local poolname=ecpool
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
for id in $(seq 0 9) ; do
|
||||
@ -763,8 +845,6 @@ function corrupt_and_repair_lrc() {
|
||||
|
||||
create_ec_pool $poolname $allow_overwrites k=4 m=2 l=3 plugin=lrc || return 1
|
||||
corrupt_and_repair_erasure_coded $dir $poolname || return 1
|
||||
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_corrupt_and_repair_lrc_appends() {
|
||||
@ -783,7 +863,6 @@ function unfound_erasure_coded() {
|
||||
local poolname=ecpool
|
||||
local payload=ABCDEF
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
for id in $(seq 0 3) ; do
|
||||
@ -831,8 +910,6 @@ function unfound_erasure_coded() {
|
||||
ceph -s|grep "4 up" || return 1
|
||||
ceph -s|grep "4 in" || return 1
|
||||
ceph -s|grep "1/1 objects unfound" || return 1
|
||||
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_unfound_erasure_coded_appends() {
|
||||
@ -853,7 +930,6 @@ function list_missing_erasure_coded() {
|
||||
local allow_overwrites=$2
|
||||
local poolname=ecpool
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
for id in $(seq 0 2) ; do
|
||||
@ -913,8 +989,6 @@ function list_missing_erasure_coded() {
|
||||
matches=$(ceph pg $pg list_unfound | egrep "MOBJ0|MOBJ1" | wc -l)
|
||||
[ $matches -eq 2 ] && break
|
||||
done
|
||||
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_list_missing_erasure_coded_appends() {
|
||||
@ -935,7 +1009,6 @@ function TEST_corrupt_scrub_replicated() {
|
||||
local poolname=csr_pool
|
||||
local total_objs=19
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=2 || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
@ -3530,7 +3603,6 @@ EOF
|
||||
fi
|
||||
|
||||
ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
|
||||
@ -3543,7 +3615,6 @@ function corrupt_scrub_erasure() {
|
||||
local poolname=ecpool
|
||||
local total_objs=7
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
for id in $(seq 0 2) ; do
|
||||
@ -5690,7 +5761,6 @@ EOF
|
||||
fi
|
||||
|
||||
ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_corrupt_scrub_erasure_appends() {
|
||||
@ -5711,7 +5781,6 @@ function TEST_periodic_scrub_replicated() {
|
||||
local poolname=psr_pool
|
||||
local objname=POBJ
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=2 || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
|
||||
@ -5803,7 +5872,6 @@ function TEST_scrub_warning() {
|
||||
local conf_overdue_seconds=$(calc $i7_days + $i1_day + \( $i7_days \* $overdue \) )
|
||||
local pool_overdue_seconds=$(calc $i14_days + $i1_day + \( $i14_days \* $overdue \) )
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
|
||||
run_mgr $dir x --mon_warn_pg_not_scrubbed_ratio=${overdue} --mon_warn_pg_not_deep_scrubbed_ratio=${overdue} || return 1
|
||||
run_osd $dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 || return 1
|
||||
@ -5870,7 +5938,6 @@ function TEST_scrub_warning() {
|
||||
ceph health detail | grep "not deep-scrubbed since"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
#
|
||||
@ -5881,7 +5948,6 @@ function TEST_corrupt_snapset_scrub_rep() {
|
||||
local poolname=csr_pool
|
||||
local total_objs=2
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=2 || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
@ -6141,7 +6207,6 @@ EOF
|
||||
fi
|
||||
|
||||
ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_request_scrub_priority() {
|
||||
@ -6151,7 +6216,6 @@ function TEST_request_scrub_priority() {
|
||||
local OBJECTS=64
|
||||
local PGS=8
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
|
||||
@ -6199,8 +6263,6 @@ function TEST_request_scrub_priority() {
|
||||
|
||||
# Verify that the requested scrub ran first
|
||||
grep "log_channel.*scrub ok" $dir/osd.${primary}.log | grep -v purged_snaps | head -1 | sed 's/.*[[]DBG[]]//' | grep -q $pg || return 1
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
|
@ -12,6 +12,8 @@ overrides:
|
||||
- is full \(reached quota
|
||||
- POOL_FULL
|
||||
- POOL_BACKFILLFULL
|
||||
- PG_RECOVERY_FULL
|
||||
- PG_DEGRADED
|
||||
conf:
|
||||
mon:
|
||||
mon osd nearfull ratio: 0.6
|
||||
|
0
ceph/qa/suites/fs/upgrade/nofs/%
Normal file
0
ceph/qa/suites/fs/upgrade/nofs/%
Normal file
1
ceph/qa/suites/fs/upgrade/nofs/.qa
Symbolic link
1
ceph/qa/suites/fs/upgrade/nofs/.qa
Symbolic link
@ -0,0 +1 @@
|
||||
../.qa/
|
3
ceph/qa/suites/fs/upgrade/nofs/README
Normal file
3
ceph/qa/suites/fs/upgrade/nofs/README
Normal file
@ -0,0 +1,3 @@
|
||||
This test just verifies that upgrades work with no file system present. In
|
||||
particular, catch that MDSMonitor doesn't blow up somehow with version
|
||||
mismatches.
|
1
ceph/qa/suites/fs/upgrade/nofs/bluestore-bitmap.yaml
Symbolic link
1
ceph/qa/suites/fs/upgrade/nofs/bluestore-bitmap.yaml
Symbolic link
@ -0,0 +1 @@
|
||||
.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
|
1
ceph/qa/suites/fs/upgrade/nofs/centos_latest.yaml
Symbolic link
1
ceph/qa/suites/fs/upgrade/nofs/centos_latest.yaml
Symbolic link
@ -0,0 +1 @@
|
||||
.qa/distros/supported/centos_latest.yaml
|
1
ceph/qa/suites/fs/upgrade/nofs/conf
Symbolic link
1
ceph/qa/suites/fs/upgrade/nofs/conf
Symbolic link
@ -0,0 +1 @@
|
||||
.qa/cephfs/conf/
|
6
ceph/qa/suites/fs/upgrade/nofs/no-mds-cluster.yaml
Normal file
6
ceph/qa/suites/fs/upgrade/nofs/no-mds-cluster.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
roles:
|
||||
- [mon.a, mon.b, mon.c, mgr.x, mgr.y, osd.0, osd.1, osd.2, osd.3]
|
||||
openstack:
|
||||
- volumes: # attached to each instance
|
||||
count: 4
|
||||
size: 10 # GB
|
0
ceph/qa/suites/fs/upgrade/nofs/overrides/%
Normal file
0
ceph/qa/suites/fs/upgrade/nofs/overrides/%
Normal file
1
ceph/qa/suites/fs/upgrade/nofs/overrides/.qa
Symbolic link
1
ceph/qa/suites/fs/upgrade/nofs/overrides/.qa
Symbolic link
@ -0,0 +1 @@
|
||||
../.qa/
|
5
ceph/qa/suites/fs/upgrade/nofs/overrides/pg-warn.yaml
Normal file
5
ceph/qa/suites/fs/upgrade/nofs/overrides/pg-warn.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
overrides:
|
||||
ceph:
|
||||
conf:
|
||||
global:
|
||||
mon pg warn min per osd: 0
|
1
ceph/qa/suites/fs/upgrade/nofs/overrides/whitelist_health.yaml
Symbolic link
1
ceph/qa/suites/fs/upgrade/nofs/overrides/whitelist_health.yaml
Symbolic link
@ -0,0 +1 @@
|
||||
.qa/cephfs/overrides/whitelist_health.yaml
|
@ -0,0 +1 @@
|
||||
.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml
|
0
ceph/qa/suites/fs/upgrade/nofs/tasks/%
Normal file
0
ceph/qa/suites/fs/upgrade/nofs/tasks/%
Normal file
1
ceph/qa/suites/fs/upgrade/nofs/tasks/.qa
Symbolic link
1
ceph/qa/suites/fs/upgrade/nofs/tasks/.qa
Symbolic link
@ -0,0 +1 @@
|
||||
../.qa/
|
38
ceph/qa/suites/fs/upgrade/nofs/tasks/0-octopus.yaml
Normal file
38
ceph/qa/suites/fs/upgrade/nofs/tasks/0-octopus.yaml
Normal file
@ -0,0 +1,38 @@
|
||||
meta:
|
||||
- desc: |
|
||||
install ceph/octopus latest
|
||||
tasks:
|
||||
- install:
|
||||
branch: octopus
|
||||
exclude_packages:
|
||||
- librados3
|
||||
- ceph-mgr-dashboard
|
||||
- ceph-mgr-diskprediction-local
|
||||
- ceph-mgr-rook
|
||||
- ceph-mgr-cephadm
|
||||
- cephadm
|
||||
extra_packages: ['librados2']
|
||||
- print: "**** done installing octopus"
|
||||
- ceph:
|
||||
log-ignorelist:
|
||||
- overall HEALTH_
|
||||
- \(FS_
|
||||
- \(MDS_
|
||||
- \(OSD_
|
||||
- \(MON_DOWN\)
|
||||
- \(CACHE_POOL_
|
||||
- \(POOL_
|
||||
- \(MGR_DOWN\)
|
||||
- \(PG_
|
||||
- \(SMALLER_PGP_NUM\)
|
||||
- Monitor daemon marked osd
|
||||
- Behind on trimming
|
||||
- Manager daemon
|
||||
conf:
|
||||
global:
|
||||
mon warn on pool no app: false
|
||||
ms bind msgr2: false
|
||||
- exec:
|
||||
osd.0:
|
||||
- ceph osd set-require-min-compat-client octopus
|
||||
- print: "**** done ceph"
|
45
ceph/qa/suites/fs/upgrade/nofs/tasks/1-upgrade.yaml
Normal file
45
ceph/qa/suites/fs/upgrade/nofs/tasks/1-upgrade.yaml
Normal file
@ -0,0 +1,45 @@
|
||||
overrides:
|
||||
ceph:
|
||||
log-ignorelist:
|
||||
- scrub mismatch
|
||||
- ScrubResult
|
||||
- wrongly marked
|
||||
- \(POOL_APP_NOT_ENABLED\)
|
||||
- \(SLOW_OPS\)
|
||||
- overall HEALTH_
|
||||
- \(MON_MSGR2_NOT_ENABLED\)
|
||||
- slow request
|
||||
conf:
|
||||
global:
|
||||
bluestore warn on legacy statfs: false
|
||||
bluestore warn on no per pool omap: false
|
||||
mon:
|
||||
mon warn on osd down out interval zero: false
|
||||
|
||||
tasks:
|
||||
- print: "*** upgrading, no cephfs present"
|
||||
- exec:
|
||||
mon.a:
|
||||
- ceph fs dump
|
||||
- install.upgrade:
|
||||
mon.a:
|
||||
- print: "**** done install.upgrade"
|
||||
- ceph.restart:
|
||||
daemons: [mon.*, mgr.*]
|
||||
mon-health-to-clog: false
|
||||
wait-for-healthy: false
|
||||
- ceph.healthy:
|
||||
- ceph.restart:
|
||||
daemons: [osd.*]
|
||||
wait-for-healthy: false
|
||||
wait-for-osds-up: true
|
||||
- exec:
|
||||
mon.a:
|
||||
- ceph versions
|
||||
- ceph osd dump -f json-pretty
|
||||
- ceph fs dump
|
||||
- ceph osd require-osd-release octopus
|
||||
- for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done
|
||||
#- ceph osd set-require-min-compat-client octopus
|
||||
- ceph.healthy:
|
||||
- print: "**** done ceph.restart"
|
@ -0,0 +1 @@
|
||||
.qa/distros/podman/centos_8.2_container_tools_3.0.yaml
|
@ -1 +0,0 @@
|
||||
.qa/distros/podman/centos_8.2_kubic_stable.yaml
|
@ -1 +0,0 @@
|
||||
.qa/distros/podman/centos_8.2_kubic_stable.yaml
|
@ -0,0 +1 @@
|
||||
.qa/distros/podman/centos_8.3_container_tools_3.0.yaml
|
@ -1 +0,0 @@
|
||||
.qa/distros/podman/rhel_8.3_kubic_stable.yaml
|
@ -0,0 +1 @@
|
||||
.qa/distros/podman/centos_8.2_container_tools_3.0.yaml
|
@ -1 +0,0 @@
|
||||
.qa/distros/podman/centos_8.2_kubic_stable.yaml
|
@ -0,0 +1 @@
|
||||
.qa/distros/podman/centos_8.2_container_tools_3.0.yaml
|
@ -57,3 +57,4 @@ tasks:
|
||||
- tasks.mgr.dashboard.test_summary
|
||||
- tasks.mgr.dashboard.test_telemetry
|
||||
- tasks.mgr.dashboard.test_user
|
||||
- tasks.mgr.dashboard.test_motd
|
||||
|
@ -1 +1 @@
|
||||
../ignore-pg-availability.yaml
|
||||
.qa/rgw/ignore-pg-availability.yaml
|
@ -1 +1 @@
|
||||
../ignore-pg-availability.yaml
|
||||
.qa/rgw/ignore-pg-availability.yaml
|
@ -1 +1 @@
|
||||
../ignore-pg-availability.yaml
|
||||
.qa/rgw/ignore-pg-availability.yaml
|
@ -14,5 +14,6 @@ overrides:
|
||||
rgw md log max shards: 4
|
||||
rgw data log num shards: 4
|
||||
rgw sync obj etag verify: true
|
||||
rgw sync meta inject err probability: 0.1
|
||||
rgw:
|
||||
compression type: random
|
||||
|
1
ceph/qa/suites/rgw/sts/.qa
Symbolic link
1
ceph/qa/suites/rgw/sts/.qa
Symbolic link
@ -0,0 +1 @@
|
||||
../.qa
|
@ -1 +1 @@
|
||||
../ignore-pg-availability.yaml
|
||||
.qa/rgw/ignore-pg-availability.yaml
|
@ -1 +1 @@
|
||||
../ignore-pg-availability.yaml
|
||||
.qa/rgw/ignore-pg-availability.yaml
|
@ -3,7 +3,7 @@ meta:
|
||||
Run ceph on two nodes, using one of them as a client,
|
||||
with a separate client-only node.
|
||||
Use xfs beneath the osds.
|
||||
install ceph/pacific v16.2.2 and the v16.2.x point versions
|
||||
install ceph/pacific v16.2.4 and the v16.2.x point versions
|
||||
run workload and upgrade-sequence in parallel
|
||||
(every point release should be tested)
|
||||
run workload and upgrade-sequence in parallel
|
||||
@ -69,32 +69,32 @@ openstack:
|
||||
count: 3
|
||||
size: 30 # GB
|
||||
tasks:
|
||||
- print: "**** done pacific v16.2.0 about to install"
|
||||
- print: "**** done pacific about to install v16.2.4 "
|
||||
- install:
|
||||
tag: v16.2.2
|
||||
tag: v16.2.4
|
||||
# line below can be removed its from jewel test
|
||||
#exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev', 'librgw2']
|
||||
- print: "**** done v16.2.2 install"
|
||||
- print: "**** done v16.2.4 install"
|
||||
- ceph:
|
||||
fs: xfs
|
||||
add_osds_to_crush: true
|
||||
- print: "**** done ceph xfs"
|
||||
- sequential:
|
||||
- workload
|
||||
- print: "**** done workload v16.2.2"
|
||||
- print: "**** done workload v16.2.4"
|
||||
|
||||
|
||||
####### upgrade to v16.2.3
|
||||
####### upgrade to v16.2.5
|
||||
- install.upgrade:
|
||||
#exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev']
|
||||
mon.a:
|
||||
tag: v16.2.3
|
||||
tag: v16.2.5
|
||||
mon.b:
|
||||
tag: v16.2.3
|
||||
tag: v16.2.5
|
||||
- parallel:
|
||||
- workload_pacific
|
||||
- upgrade-sequence_pacific
|
||||
- print: "**** done parallel pacific v16.2.3"
|
||||
- print: "**** done parallel pacific v16.2.5"
|
||||
|
||||
#### upgrade to latest pacific
|
||||
- install.upgrade:
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user