import ceph 16.2.6

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2025-05-29 18:00:42 +00:00 · 2021-09-20 09:15:23 +02:00 · 2021-09-20 09:15:23 +02:00 · 522d829b51
commit 522d829b51
parent a7eef15ae0
467 changed files with 36554 additions and 23886 deletions
--- a/ceph/.github/pull_request_template.md
+++ b/ceph/.github/pull_request_template.md
@ -48,6 +48,7 @@ https://raw.githubusercontent.com/ceph/ceph/master/SubmittingPatches.rst
 - `jenkins test make check arm64`
 - `jenkins test submodules`
 - `jenkins test dashboard`
+- `jenkins test dashboard cephadm`
 - `jenkins test api`
 - `jenkins test docs`
 - `jenkins render docs`
--- a/ceph/CMakeLists.txt
+++ b/ceph/CMakeLists.txt
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10.2)
 # remove cmake/modules/FindPython* once 3.12 is required

 project(ceph
-  VERSION 16.0.0
+  VERSION 16.2.6
  LANGUAGES CXX C ASM)

 foreach(policy
--- a/ceph/PendingReleaseNotes
+++ b/ceph/PendingReleaseNotes
@ -1,5 +1,13 @@
 >=17.0.0

+* `ceph-mgr-modules-core` debian package does not recommend `ceph-mgr-rook`
+  anymore. As the latter depends on `python3-numpy` which cannot be imported in
+  different Python sub-interpreters multi-times if the version of
+  `python3-numpy` is older than 1.19. Since `apt-get` installs the `Recommends`
+  packages by default, `ceph-mgr-rook` was always installed along with
+  `ceph-mgr` debian package as an indirect dependency. If your workflow depends
+  on this behavior, you might want to install `ceph-mgr-rook` separately.
+
 * A new library is available, libcephsqlite. It provides a SQLite Virtual File
  System (VFS) on top of RADOS. The database and journals are striped over
  RADOS across multiple objects for virtually unlimited scaling and throughput
@ -9,6 +17,28 @@
  that were storing state in RADOS omap, especially without striping which
  limits scalability.

+* MDS upgrades no longer require stopping all standby MDS daemons before
+  upgrading the sole active MDS for a file system.
+
+* RGW: It is possible to specify ssl options and ciphers for beast frontend now.
+  The default ssl options setting is "no_sslv2:no_sslv3:no_tlsv1:no_tlsv1_1".
+  If you want to return back the old behavior add 'ssl_options=' (empty) to
+  ``rgw frontends`` configuration.
+
+* fs: A file system can be created with a specific ID ("fscid"). This is useful
+  in certain recovery scenarios, e.g., monitor database lost and rebuilt, and
+  the restored file system is expected to have the same ID as before.
+
+>=16.2.6
+--------
+
+* MGR: The pg_autoscaler has a new default 'scale-down' profile which provides more
+  performance from the start for new pools (for newly created clusters).
+  Existing clusters will retain the old behavior, now called the 'scale-up' profile.
+  For more details, see:
+
+  https://docs.ceph.com/en/latest/rados/operations/placement-groups/
+
 >=16.0.0
 --------

@ -42,12 +72,6 @@
  deprecated and will be removed in a future release.  Please use
  ``nfs cluster rm`` and ``nfs export rm`` instead.

-* mgr-pg_autoscaler: Autoscaler will now start out by scaling each
-  pool to have a full complements of pgs from the start and will only
-  decrease it when other pools need more pgs due to increased usage.
-  This improves out of the box performance of Ceph by allowing more PGs 
-  to be created for a given pool.
-
 * CephFS: Disabling allow_standby_replay on a file system will also stop all
  standby-replay daemons for that file system.

@ -159,6 +183,8 @@
  CentOS 7.6 and later.  To enable older clients, set ``cephx_require_version``
  and ``cephx_service_require_version`` config options to 1.

+* rgw: The Civetweb frontend is now deprecated and will be removed in Quincy.
+
 >=15.0.0
 --------

--- a/ceph/README.aix
+++ b/ceph/README.aix
@ -19,7 +19,6 @@ The following AIX packages are required for developing and compilation, they hav
 	gettext
 	less
 	perl
-	gdbm
 	pcre
 	rsync
 	zlib
--- a/ceph/ceph.spec
+++ b/ceph/ceph.spec
@ -49,6 +49,8 @@
 %bcond_without lttng
 %bcond_without libradosstriper
 %bcond_without ocf
+%global luarocks_package_name luarocks
+%bcond_without lua_packages
 %global _remote_tarball_prefix https://download.ceph.com/tarballs/
 %endif
 %if 0%{?suse_version}
@ -73,6 +75,21 @@
 %if ! %{defined _fillupdir}
 %global _fillupdir /var/adm/fillup-templates
 %endif
+#luarocks
+%if 0%{?is_opensuse}
+# openSUSE
+%bcond_without lua_packages
+%if 0%{?sle_version}
+# openSUSE Leap
+%global luarocks_package_name lua53-luarocks
+%else
+# openSUSE Tumbleweed
+%global luarocks_package_name lua54-luarocks
+%endif
+%else
+# SLE
+%bcond_with lua_packages
+%endif
 %endif
 %bcond_with seastar
 %bcond_with jaeger
@ -96,19 +113,6 @@
 %endif
 %endif

-%if 0%{?suse_version}
-%if !0%{?is_opensuse}
-# SLE does not support luarocks
-%bcond_with lua_packages
-%else
-%global luarocks_package_name lua53-luarocks
-%bcond_without lua_packages
-%endif
-%else
-%global luarocks_package_name luarocks
-%bcond_without lua_packages
-%endif
-
 %{!?_udevrulesdir: %global _udevrulesdir /lib/udev/rules.d}
 %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create}
 %{!?python3_pkgversion: %global python3_pkgversion 3}
@ -122,7 +126,7 @@
 # main package definition
 #################################################################################
 Name:		ceph
-Version:	16.2.5
+Version:	16.2.6
 Release:	0%{?dist}
 %if 0%{?fedora} || 0%{?rhel}
 Epoch:		2
@ -138,7 +142,7 @@ License:	LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-
 Group:		System/Filesystems
 %endif
 URL:		http://ceph.com/
-Source0:	%{?_remote_tarball_prefix}ceph-16.2.5.tar.bz2
+Source0:	%{?_remote_tarball_prefix}ceph-16.2.6.tar.bz2
 %if 0%{?suse_version}
 # _insert_obs_source_lines_here
 ExclusiveArch:  x86_64 aarch64 ppc64le s390x
@ -168,7 +172,6 @@ BuildRequires:	gcc-toolset-9-gcc-c++ >= 9.2.1-2.3
 %else
 BuildRequires:	gcc-c++
 %endif
-BuildRequires:	gdbm
 %if 0%{with tcmalloc}
 # libprofiler did not build on ppc64le until 2.7.90
 %if 0%{?fedora} || 0%{?rhel} >= 8
@ -292,7 +295,6 @@ BuildRequires:	libbz2-devel
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
 BuildRequires:  libopenssl-devel
-BuildRequires:  lsb-release
 BuildRequires:  openldap2-devel
 #BuildRequires:  krb5
 #BuildRequires:  krb5-devel
@ -317,7 +319,6 @@ BuildRequires:  openldap-devel
 #BuildRequires:  krb5-devel
 BuildRequires:  openssl-devel
 BuildRequires:  CUnit-devel
-BuildRequires:  redhat-lsb-core
 BuildRequires:	python%{python3_pkgversion}-devel
 BuildRequires:	python%{python3_pkgversion}-setuptools
 BuildRequires:	python%{python3_pkgversion}-Cython
@ -329,6 +330,7 @@ BuildRequires:	lz4-devel >= 1.7
 %if 0%{with make_check}
 %if 0%{?fedora} || 0%{?rhel}
 BuildRequires:	golang-github-prometheus
+BuildRequires:	jsonnet
 BuildRequires:	libtool-ltdl-devel
 BuildRequires:	xmlsec1
 BuildRequires:	xmlsec1-devel
@ -346,6 +348,7 @@ BuildRequires:	python%{python3_pkgversion}-pyOpenSSL
 %endif
 %if 0%{?suse_version}
 BuildRequires:	golang-github-prometheus-prometheus
+BuildRequires:	jsonnet
 BuildRequires:	libxmlsec1-1
 BuildRequires:	libxmlsec1-nss1
 BuildRequires:	libxmlsec1-openssl1
@ -1205,7 +1208,7 @@ This package provides Ceph default alerts for Prometheus.
 # common
 #################################################################################
 %prep
-%autosetup -p1 -n ceph-16.2.5
+%autosetup -p1 -n ceph-16.2.6

 %build
 # LTO can be enabled as soon as the following GCC bug is fixed:
@ -1335,6 +1338,9 @@ ${CMAKE} .. \
    -DWITH_SYSTEM_PMDK:BOOL=ON \
 %endif
    -DBOOST_J=$CEPH_SMP_NCPUS \
+%if 0%{?rhel}
+    -DWITH_FMT_HEADER_ONLY:BOOL=ON \
+%endif
    -DWITH_GRAFANA=ON

 %if %{with cmake_verbose_logging}
@ -1990,9 +1996,8 @@ fi
 %endif

 %postun immutable-object-cache
-test -n "$FIRST_ARG" || FIRST_ARG=$1
 %systemd_postun ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target
-if [ $FIRST_ARG -ge 1 ] ; then
+if [ $1 -ge 1 ] ; then
  # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
  # "yes". In any case: if units are not running, do not touch them.
  SYSCONF_CEPH=%{_sysconfdir}/sysconfig/ceph
--- a/ceph/ceph.spec.in
+++ b/ceph/ceph.spec.in
@ -49,6 +49,8 @@
 %bcond_without lttng
 %bcond_without libradosstriper
 %bcond_without ocf
+%global luarocks_package_name luarocks
+%bcond_without lua_packages
 %global _remote_tarball_prefix https://download.ceph.com/tarballs/
 %endif
 %if 0%{?suse_version}
@ -73,6 +75,21 @@
 %if ! %{defined _fillupdir}
 %global _fillupdir /var/adm/fillup-templates
 %endif
+#luarocks
+%if 0%{?is_opensuse}
+# openSUSE
+%bcond_without lua_packages
+%if 0%{?sle_version}
+# openSUSE Leap
+%global luarocks_package_name lua53-luarocks
+%else
+# openSUSE Tumbleweed
+%global luarocks_package_name lua54-luarocks
+%endif
+%else
+# SLE
+%bcond_with lua_packages
+%endif
 %endif
 %bcond_with seastar
 %bcond_with jaeger
@ -96,19 +113,6 @@
 %endif
 %endif

-%if 0%{?suse_version}
-%if !0%{?is_opensuse}
-# SLE does not support luarocks
-%bcond_with lua_packages
-%else
-%global luarocks_package_name lua53-luarocks
-%bcond_without lua_packages
-%endif
-%else
-%global luarocks_package_name luarocks
-%bcond_without lua_packages
-%endif
-
 %{!?_udevrulesdir: %global _udevrulesdir /lib/udev/rules.d}
 %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create}
 %{!?python3_pkgversion: %global python3_pkgversion 3}
@ -168,7 +172,6 @@ BuildRequires:	gcc-toolset-9-gcc-c++ >= 9.2.1-2.3
 %else
 BuildRequires:	gcc-c++
 %endif
-BuildRequires:	gdbm
 %if 0%{with tcmalloc}
 # libprofiler did not build on ppc64le until 2.7.90
 %if 0%{?fedora} || 0%{?rhel} >= 8
@ -292,7 +295,6 @@ BuildRequires:	libbz2-devel
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
 BuildRequires:  libopenssl-devel
-BuildRequires:  lsb-release
 BuildRequires:  openldap2-devel
 #BuildRequires:  krb5
 #BuildRequires:  krb5-devel
@ -317,7 +319,6 @@ BuildRequires:  openldap-devel
 #BuildRequires:  krb5-devel
 BuildRequires:  openssl-devel
 BuildRequires:  CUnit-devel
-BuildRequires:  redhat-lsb-core
 BuildRequires:	python%{python3_pkgversion}-devel
 BuildRequires:	python%{python3_pkgversion}-setuptools
 BuildRequires:	python%{python3_pkgversion}-Cython
@ -329,6 +330,7 @@ BuildRequires:	lz4-devel >= 1.7
 %if 0%{with make_check}
 %if 0%{?fedora} || 0%{?rhel}
 BuildRequires:	golang-github-prometheus
+BuildRequires:	jsonnet
 BuildRequires:	libtool-ltdl-devel
 BuildRequires:	xmlsec1
 BuildRequires:	xmlsec1-devel
@ -346,6 +348,7 @@ BuildRequires:	python%{python3_pkgversion}-pyOpenSSL
 %endif
 %if 0%{?suse_version}
 BuildRequires:	golang-github-prometheus-prometheus
+BuildRequires:	jsonnet
 BuildRequires:	libxmlsec1-1
 BuildRequires:	libxmlsec1-nss1
 BuildRequires:	libxmlsec1-openssl1
@ -1335,6 +1338,9 @@ ${CMAKE} .. \
    -DWITH_SYSTEM_PMDK:BOOL=ON \
 %endif
    -DBOOST_J=$CEPH_SMP_NCPUS \
+%if 0%{?rhel}
+    -DWITH_FMT_HEADER_ONLY:BOOL=ON \
+%endif
    -DWITH_GRAFANA=ON

 %if %{with cmake_verbose_logging}
@ -1990,9 +1996,8 @@ fi
 %endif

 %postun immutable-object-cache
-test -n "$FIRST_ARG" || FIRST_ARG=$1
 %systemd_postun ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target
-if [ $FIRST_ARG -ge 1 ] ; then
+if [ $1 -ge 1 ] ; then
  # Restart on upgrade, but only if "CEPH_AUTO_RESTART_ON_UPGRADE" is set to
  # "yes". In any case: if units are not running, do not touch them.
  SYSCONF_CEPH=%{_sysconfdir}/sysconfig/ceph
--- a/ceph/changelog.upstream
+++ b/ceph/changelog.upstream
@ -1,7 +1,8 @@
-ceph (16.2.5-1focal) focal; urgency=medium
+ceph (16.2.6-1) stable; urgency=medium

+  * New upstream release

- -- Jenkins Build Slave User <jenkins-build@braggi17.front.sepia.ceph.com>  Thu, 08 Jul 2021 14:16:59 +0000
+ -- Ceph Release Team <ceph-maintainers@ceph.com>  Thu, 16 Sep 2021 14:27:16 +0000

 ceph (16.2.5-1) stable; urgency=medium

--- a/ceph/cmake/modules/BuildBoost.cmake
+++ b/ceph/cmake/modules/BuildBoost.cmake
@ -155,7 +155,7 @@ function(do_build_boost version)
    set(boost_sha256 4eb3b8d442b426dc35346235c8733b5ae35ba431690e38c6a8263dce9fcbb402)
    string(REPLACE "." "_" boost_version_underscore ${boost_version} )
    set(boost_url 
-      https://dl.bintray.com/boostorg/release/${boost_version}/source/boost_${boost_version_underscore}.tar.bz2)
+      https://boostorg.jfrog.io/artifactory/main/release/${boost_version}/source/boost_${boost_version_underscore}.tar.bz2)
    if(CMAKE_VERSION VERSION_GREATER 3.7)
      set(boost_url
        "${boost_url} http://downloads.sourceforge.net/project/boost/boost/${boost_version}/boost_${boost_version_underscore}.tar.bz2")
--- a/ceph/cmake/modules/CephChecks.cmake
+++ b/ceph/cmake/modules/CephChecks.cmake
@ -56,7 +56,7 @@ endif()
 CHECK_INCLUDE_FILES("valgrind/helgrind.h" HAVE_VALGRIND_HELGRIND_H)

 include(CheckTypeSize)
-set(CMAKE_EXTRA_INCLUDE_FILES "linux/types.h")
+set(CMAKE_EXTRA_INCLUDE_FILES "linux/types.h" "netinet/in.h")
 CHECK_TYPE_SIZE(__u8 __U8) 
 CHECK_TYPE_SIZE(__u16 __U16) 
 CHECK_TYPE_SIZE(__u32 __U32) 
@ -65,6 +65,7 @@ CHECK_TYPE_SIZE(__s8 __S8)
 CHECK_TYPE_SIZE(__s16 __S16) 
 CHECK_TYPE_SIZE(__s32 __S32) 
 CHECK_TYPE_SIZE(__s64 __S64) 
+CHECK_TYPE_SIZE(in_addr_t IN_ADDR_T)
 unset(CMAKE_EXTRA_INCLUDE_FILES)

 include(CheckSymbolExists)
--- a/ceph/cmake/modules/Findfmt.cmake
+++ b/ceph/cmake/modules/Findfmt.cmake
@ -35,9 +35,27 @@ mark_as_advanced(
  fmt_VERSION_STRING)

 if(fmt_FOUND AND NOT (TARGET fmt::fmt))
-  add_library(fmt::fmt UNKNOWN IMPORTED)
-  set_target_properties(fmt::fmt PROPERTIES
+  add_library(fmt-header-only INTERFACE)
+  set_target_properties(fmt-header-only PROPERTIES
    INTERFACE_INCLUDE_DIRECTORIES "${fmt_INCLUDE_DIR}"
+    INTERFACE_COMPILE_DEFINITIONS FMT_HEADER_ONLY=1
+    INTERFACE_COMPILE_FEATURES cxx_std_11)
+
+  add_library(fmt UNKNOWN IMPORTED GLOBAL)
+  set_target_properties(fmt PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${fmt_INCLUDE_DIR}"
+    INTERFACE_COMPILE_FEATURES cxx_std_11
    IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
    IMPORTED_LOCATION "${fmt_LIBRARY}")
+
+  if(WITH_FMT_HEADER_ONLY)
+    # please note, this is different from how upstream defines fmt::fmt.
+    # in order to force 3rd party libraries to link against fmt-header-only if
+    # WITH_FMT_HEADER_ONLY is ON, we have to point fmt::fmt to fmt-header-only
+    # in this case.
+    add_library(fmt::fmt ALIAS fmt-header-only)
+  else()
+    add_library(fmt::fmt ALIAS fmt)
+  endif()
+
 endif()
--- a/ceph/debian/control
+++ b/ceph/debian/control
@ -12,19 +12,18 @@ Build-Depends: automake,
               cmake (>= 3.10.2),
               cpio,
               cryptsetup-bin | cryptsetup,
-               cython,
               cython3,
-               debhelper (>= 9),
+               debhelper (>= 10),
               default-jdk,
               dh-exec,
               dh-python,
-               dh-systemd,
 # Jaeger       flex,
               git,
               gperf,
               g++ (>= 7),
               javahelper,
-# Make-Check   jq,
+               jq <pkg.ceph.check>,
+               jsonnet <pkg.ceph.check>,
               junit4,
               libaio-dev,
               libbabeltrace-ctf-dev,
@ -74,7 +73,6 @@ Build-Depends: automake,
 # Make-Check   libxmlsec1-openssl,
 # Make-Check   libxmlsec1-dev,
 # Crimson      libyaml-cpp-dev,
-               lsb-release,
 # Jaeger       nlohmann-json-dev | nlohmann-json3-dev,
               parted,
               patch,
--- a/ceph/doc/_ext/ceph_commands.py
+++ b/ceph/doc/_ext/ceph_commands.py
@ -254,6 +254,7 @@ class CephMgrCommands(Directive):
                         'jsonpatch',
                         'rook.rook_client',
                         'rook.rook_client.ceph',
+                         'rook.rook_client._helper',
                         'cherrypy=3.2.3']

        # make restful happy
--- a/ceph/doc/ceph-volume/index.rst
+++ b/ceph/doc/ceph-volume/index.rst
@ -76,6 +76,9 @@ and ``ceph-disk`` is fully disabled. Encryption is fully supported.
   lvm/systemd
   lvm/list
   lvm/zap
+   lvm/migrate
+   lvm/newdb
+   lvm/newwal
   simple/index
   simple/activate
   simple/scan
--- a/ceph/doc/ceph-volume/lvm/index.rst
+++ b/ceph/doc/ceph-volume/lvm/index.rst
@ -15,6 +15,12 @@ Implements the functionality needed to deploy OSDs from the ``lvm`` subcommand:

 * :ref:`ceph-volume-lvm-list`

+* :ref:`ceph-volume-lvm-migrate`
+
+* :ref:`ceph-volume-lvm-newdb`
+
+* :ref:`ceph-volume-lvm-newwal`
+
 .. not yet implemented
 .. * :ref:`ceph-volume-lvm-scan`

--- a/ceph/doc/ceph-volume/lvm/migrate.rst
+++ b/ceph/doc/ceph-volume/lvm/migrate.rst
@ -0,0 +1,47 @@
+.. _ceph-volume-lvm-migrate:
+
+``migrate``
+===========
+
+Moves BlueFS data from source volume(s) to the target one, source volumes
+(except the main, i.e. data or block one) are removed on success.
+
+LVM volumes are permitted for Target only, both already attached or new one.
+
+In the latter case it is attached to the OSD replacing one of the source
+devices.
+
+Following replacement rules apply (in the order of precedence, stop
+on the first match):
+
+    - if source list has DB volume - target device replaces it.
+    - if source list has WAL volume - target device replaces it.
+    - if source list has slow volume only - operation is not permitted,
+      requires explicit allocation via new-db/new-wal command.
+
+Moves BlueFS data from main device to LV already attached as DB::
+
+    ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data --target vgname/db
+
+Moves BlueFS data from shared main device to LV which will be attached as a
+new DB::
+
+    ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data --target vgname/new_db
+
+Moves BlueFS data from DB device to new LV, DB is replaced::
+
+    ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from db --target vgname/new_db
+
+Moves BlueFS data from main and DB devices to new LV, DB is replaced::
+
+    ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data db --target vgname/new_db
+
+Moves BlueFS data from main, DB and WAL devices to new LV, WAL is  removed and
+DB is replaced::
+
+    ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data db wal --target vgname/new_db
+
+Moves BlueFS data from main, DB and WAL devices to main device, WAL and DB are
+removed::
+
+    ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from db wal --target vgname/data
--- a/ceph/doc/ceph-volume/lvm/newdb.rst
+++ b/ceph/doc/ceph-volume/lvm/newdb.rst
@ -0,0 +1,11 @@
+.. _ceph-volume-lvm-newdb:
+
+``new-db``
+===========
+
+Attaches the given logical volume to OSD as a DB.
+Logical volume name format is vg/lv. Fails if OSD has already got attached DB.
+
+Attach vgname/lvname as a DB volume to OSD 1::
+
+    ceph-volume lvm new-db --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_db
--- a/ceph/doc/ceph-volume/lvm/newwal.rst
+++ b/ceph/doc/ceph-volume/lvm/newwal.rst
@ -0,0 +1,11 @@
+.. _ceph-volume-lvm-newwal:
+
+``new-wal``
+===========
+
+Attaches the given logical volume to the given OSD as a WAL volume.
+Logical volume format is vg/lv. Fails if OSD has already got attached DB.
+
+Attach vgname/lvname as a WAL volume to OSD 1::
+
+    ceph-volume lvm new-wal --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_wal
--- a/ceph/doc/cephadm/client-setup.rst
+++ b/ceph/doc/cephadm/client-setup.rst
@ -1,40 +1,45 @@
 =======================
 Basic Ceph Client Setup
 =======================
-Client machines need some basic configuration in order to interact with
-a cluster. This document describes how to configure a client machine
-for cluster interaction.
+Client machines require some basic configuration to interact with
+Ceph clusters. This section describes how to configure a client machine
+so that it can interact with a Ceph cluster.

-.. note:: Most client machines only need the `ceph-common` package and
-          its dependencies installed. That will supply the basic `ceph`
-          and `rados` commands, as well as other commands like
-          `mount.ceph` and `rbd`.
+.. note:: 
+   Most client machines need to install only the `ceph-common` package
+   and its dependencies. Such a setup supplies the basic `ceph` and
+   `rados` commands, as well as other commands including `mount.ceph`
+   and `rbd`.

 Config File Setup
 =================
-Client machines can generally get away with a smaller config file than
-a full-fledged cluster member. To generate a minimal config file, log
-into a host that is already configured as a client or running a cluster
-daemon, and then run
+Client machines usually require smaller configuration files (here
+sometimes called "config files") than do full-fledged cluster members.
+To generate a minimal config file, log into a host that has been
+configured as a client or that is running a cluster daemon, and then run the following command:

-.. code-block:: bash
+.. prompt:: bash #

-    ceph config generate-minimal-conf
+  ceph config generate-minimal-conf

-This will generate a minimal config file that will tell the client how to
-reach the Ceph Monitors. The contents of this file should typically be
-installed in `/etc/ceph/ceph.conf`.
+This command generates a minimal config file that tells the client how
+to reach the Ceph monitors. The contents of this file should usually 
+be installed in ``/etc/ceph/ceph.conf``.

 Keyring Setup
 =============
-Most Ceph clusters are run with authentication enabled, and the client will
-need keys in order to communicate with cluster machines. To generate a
-keyring file with credentials for `client.fs`, log into an extant cluster
-member and run
+Most Ceph clusters run with authentication enabled. This means that
+the client needs keys in order to communicate with the machines in the
+cluster. To generate a keyring file with credentials for `client.fs`,
+log into an running cluster member and run the following command:

-.. code-block:: bash
+.. prompt:: bash $

-    ceph auth get-or-create client.fs
+  ceph auth get-or-create client.fs

-The resulting output should be put into a keyring file, typically
-`/etc/ceph/ceph.keyring`.
+The resulting output is directed into a keyring file, typically
+``/etc/ceph/ceph.keyring``.
+
+To gain a broader understanding of client keyring distribution and administration, you should read :ref:`client_keyrings_and_configs`.
+
+To see an example that explains how to distribute ``ceph.conf`` configuration files to hosts that are tagged with the ``bare_config`` label, you should read the section called "Distributing ceph.conf to hosts tagged with bare_config" in the section called :ref:`etc_ceph_conf_distribution`.
--- a/ceph/doc/cephadm/host-management.rst
+++ b/ceph/doc/cephadm/host-management.rst
@ -64,48 +64,47 @@ To add each new host to the cluster, perform two steps:
 Removing Hosts
 ==============

-If the node that want you to remove is running OSDs, make sure you remove the OSDs from the node.
+A host can safely be removed from a the cluster once all daemons are removed from it.

-To remove a host from a cluster, do the following:
-
-For all Ceph service types, except for ``node-exporter`` and ``crash``, remove
-the host from the placement specification file (for example, cluster.yml).
-For example, if you are removing the host named host2, remove all occurrences of
-``- host2`` from all ``placement:`` sections.
-
-Update:
-
-.. code-block:: yaml
-
-  service_type: rgw
-  placement:
-    hosts:
-    - host1
-    - host2
-
-To:
-
-.. code-block:: yaml
-
-
-  service_type: rgw
-  placement:
-    hosts:
-    - host1
-
-Remove the host from cephadm's environment:
+To drain all daemons from a host do the following:

 .. prompt:: bash #

-  ceph orch host rm host2
+  ceph orch host drain *<host>*

+The '_no_schedule' label will be applied to the host. See :ref:`cephadm-special-host-labels`

-If the host is running ``node-exporter`` and crash services, remove them by running
-the following command on the host:
+All osds on the host will be scheduled to be removed. You can check osd removal progress with the following:

 .. prompt:: bash #

-  cephadm rm-daemon --fsid CLUSTER_ID --name SERVICE_NAME
+  ceph orch osd rm status
+
+see :ref:`cephadm-osd-removal` for more details about osd removal
+
+You can check if there are no deamons left on the host with the following:
+
+.. prompt:: bash #
+
+  ceph orch ps <host> 
+
+Once all daemons are removed you can remove the host with the following:
+
+.. prompt:: bash #
+
+  ceph orch host rm <host>
+
+Offline host removal
+--------------------
+
+If a host is offline and can not be recovered it can still be removed from the cluster with the following:
+
+.. prompt:: bash #
+
+  ceph orch host rm <host> --offline --force
+
+This can potentially cause data loss as osds will be forcefully purged from the cluster by calling ``osd purge-actual`` for each osd.
+Service specs that still contain this host should be manually updated.

 .. _orchestrator-host-labels:

--- a/ceph/doc/cephadm/install.rst
+++ b/ceph/doc/cephadm/install.rst
@ -173,6 +173,11 @@ immediately to know more about ``cephadm bootstrap``, read the list below.
 Also, you can run ``cephadm bootstrap -h`` to see all of ``cephadm``'s
 available options.

+* By default, Ceph daemons send their log output to stdout/stderr, which is picked
+  up by the container runtime (docker or podman) and (on most systems) sent to
+  journald.  If you want Ceph to write traditional log files to ``/var/log/ceph/$fsid``,
+  use ``--log-to-file`` option during bootstrap.
+
 * Larger Ceph clusters perform better when (external to the Ceph cluster)
  public network traffic is separated from (internal to the Ceph cluster)
  cluster traffic. The internal cluster traffic handles replication, recovery,
--- a/ceph/doc/cephadm/mon.rst
+++ b/ceph/doc/cephadm/mon.rst
@ -28,6 +28,10 @@ manual administration of the ceph monitor daemons is not necessary.
 ``cephadm`` will automatically add up to five monitors to the subnet, as
 needed, as new hosts are added to the cluster.

+By default, cephadm will deploy 5 daemons on arbitrary hosts. See
+:ref:`orchestrator-cli-placement-spec` for details of specifying
+the placement of daemons.
+
 Designating a Particular Subnet for Monitors
 --------------------------------------------

@ -48,67 +52,18 @@ format (e.g., ``10.1.2.0/24``):
 Cephadm deploys new monitor daemons only on hosts that have IP addresses in
 the designated subnet.

-Changing the number of monitors from the default
------------------------------------------------
-
-If you want to adjust the default of 5 monitors, run this command:
+You can also specify two public networks by using a list of networks:

  .. prompt:: bash #

-     ceph orch apply mon *<number-of-monitors>*
-
-Deploying monitors only to specific hosts
-----------------------------------------
-
-To deploy monitors on a specific set of hosts, run this command:
-
-  .. prompt:: bash #
-
-    ceph orch apply mon *<host1,host2,host3,...>*
-
-  Be sure to include the first (bootstrap) host in this list.
-
-Using Host Labels
-----------------
-
-You can control which hosts the monitors run on by making use of host labels.
-To set the ``mon`` label to the appropriate hosts, run this command:
-  
-  .. prompt:: bash #
-
-    ceph orch host label add *<hostname>* mon
-
-  To view the current hosts and labels, run this command:
-
-  .. prompt:: bash #
-
-    ceph orch host ls
+     ceph config set mon public_network *<mon-cidr-network1>,<mon-cidr-network2>*

  For example:

  .. prompt:: bash #

-    ceph orch host label add host1 mon
-    ceph orch host label add host2 mon
-    ceph orch host label add host3 mon
-    ceph orch host ls
+     ceph config set mon public_network 10.1.2.0/24,192.168.0.1/24

-  .. code-block:: bash
-
-    HOST   ADDR   LABELS  STATUS
-    host1         mon
-    host2         mon
-    host3         mon
-    host4
-    host5
-
-  Tell cephadm to deploy monitors based on the label by running this command:
-
-  .. prompt:: bash #
-
-    ceph orch apply mon label:mon
-
-See also :ref:`host labels <orchestrator-host-labels>`.

 Deploying Monitors on a Particular Network 
 ------------------------------------------
@ -125,7 +80,7 @@ run this command:

  .. prompt:: bash #

-    ceph orch daemon add mon *<host1:ip-or-network1> [<host1:ip-or-network-2>...]*
+    ceph orch daemon add mon *<host1:ip-or-network1>

  For example, to deploy a second monitor on ``newhost1`` using an IP
  address ``10.1.2.123`` and a third monitor on ``newhost2`` in
@ -137,52 +92,80 @@ run this command:
    ceph orch daemon add mon newhost1:10.1.2.123
    ceph orch daemon add mon newhost2:10.1.2.0/24

-  .. note::
-     The **apply** command can be confusing. For this reason, we recommend using
-     YAML specifications. 
+  Now, enable automatic placement of Daemons

-     Each ``ceph orch apply mon`` command supersedes the one before it. 
-     This means that you must use the proper comma-separated list-based 
-     syntax when you want to apply monitors to more than one host. 
-     If you do not use the proper syntax, you will clobber your work 
-     as you go.
+  .. prompt:: bash #

-     For example:
+    ceph orch apply mon --placement="newhost1,newhost2,newhost3" --dry-run

-     .. prompt:: bash #
+  See :ref:`orchestrator-cli-placement-spec` for details of specifying
+  the placement of daemons.

-          ceph orch apply mon host1
-          ceph orch apply mon host2
-          ceph orch apply mon host3
+  Finally apply this new placement by dropping ``--dry-run``

-     This results in only one host having a monitor applied to it: host 3.
+  .. prompt:: bash #

-     (The first command creates a monitor on host1. Then the second command
-     clobbers the monitor on host1 and creates a monitor on host2. Then the
-     third command clobbers the monitor on host2 and creates a monitor on 
-     host3. In this scenario, at this point, there is a monitor ONLY on
-     host3.)
+    ceph orch apply mon --placement="newhost1,newhost2,newhost3"

-     To make certain that a monitor is applied to each of these three hosts,
-     run a command like this:

-     .. prompt:: bash #
+Moving Monitors to a Different Network
+--------------------------------------

-       ceph orch apply mon "host1,host2,host3"
+To move Monitors to a new network, deploy new monitors on the new network and
+subsequently remove monitors from the old network. It is not advised to
+modify and inject the ``monmap`` manually.

-     There is another way to apply monitors to multiple hosts: a ``yaml`` file
-     can be used. Instead of using the "ceph orch apply mon" commands, run a
-     command of this form:
+First, disable the automated placement of daemons:

-     .. prompt:: bash #
+  .. prompt:: bash #

-        ceph orch apply -i file.yaml
+    ceph orch apply mon --unmanaged

-     Here is a sample **file.yaml** file::
+To deploy each additional monitor:

-          service_type: mon
-          placement:
-            hosts:
-             - host1
-             - host2
-             - host3
+  .. prompt:: bash #
+
+    ceph orch daemon add mon *<newhost1:ip-or-network1>*
+
+For example, to deploy a second monitor on ``newhost1`` using an IP
+address ``10.1.2.123`` and a third monitor on ``newhost2`` in
+network ``10.1.2.0/24``, run the following commands:
+
+  .. prompt:: bash #
+
+    ceph orch apply mon --unmanaged
+    ceph orch daemon add mon newhost1:10.1.2.123
+    ceph orch daemon add mon newhost2:10.1.2.0/24
+
+  Subsequently remove monitors from the old network:
+
+  .. prompt:: bash #
+
+    ceph orch daemon rm *mon.<oldhost1>*
+
+  Update the ``public_network``:
+
+  .. prompt:: bash #
+
+     ceph config set mon public_network *<mon-cidr-network>*
+
+  For example:
+
+  .. prompt:: bash #
+
+     ceph config set mon public_network 10.1.2.0/24
+
+  Now, enable automatic placement of Daemons
+
+  .. prompt:: bash #
+
+    ceph orch apply mon --placement="newhost1,newhost2,newhost3" --dry-run
+
+  See :ref:`orchestrator-cli-placement-spec` for details of specifying
+  the placement of daemons.
+
+  Finally apply this new placement by dropping ``--dry-run``
+
+  .. prompt:: bash #
+
+    ceph orch apply mon --placement="newhost1,newhost2,newhost3" 
--- a/ceph/doc/cephadm/monitoring.rst
+++ b/ceph/doc/cephadm/monitoring.rst
@ -52,12 +52,6 @@ cluster (which had no monitoring stack) to cephadm management.)
 To set up monitoring on a Ceph cluster that has no monitoring, follow the
 steps below:

-#. Enable the Prometheus module in the ceph-mgr daemon. This exposes the internal Ceph metrics so that Prometheus can scrape them:
-
-   .. prompt:: bash #
-
-     ceph mgr module enable prometheus
-
 #. Deploy a node-exporter service on every node of the cluster.  The node-exporter provides host-level metrics like CPU and memory utilization:

   .. prompt:: bash #
--- a/ceph/doc/cephadm/operations.rst
+++ b/ceph/doc/cephadm/operations.rst
@ -2,28 +2,40 @@
 Cephadm Operations
 ==================

+.. _watching_cephadm_logs:
+
 Watching cephadm log messages
 =============================

-Cephadm logs to the ``cephadm`` cluster log channel, meaning you can
-monitor progress in realtime with::
+Cephadm writes logs to the ``cephadm`` cluster log channel. You can
+monitor Ceph's activity in real time by reading the logs as they fill
+up. Run the following command to see the logs in real time:

-  # ceph -W cephadm
+.. prompt:: bash #

-By default it will show info-level events and above.  To see
-debug-level messages too::
+  ceph -W cephadm

-  # ceph config set mgr mgr/cephadm/log_to_cluster_level debug
-  # ceph -W cephadm --watch-debug
+By default, this command shows info-level events and above.  To see
+debug-level messages as well as info-level events, run the following
+commands:

-Be careful: the debug messages are very verbose!
+.. prompt:: bash #

-You can see recent events with::
+  ceph config set mgr mgr/cephadm/log_to_cluster_level debug
+  ceph -W cephadm --watch-debug

-  # ceph log last cephadm
+.. warning::
+
+  The debug messages are very verbose!
+
+You can see recent events by running the following command:
+
+.. prompt:: bash #
+
+  ceph log last cephadm

 These events are also logged to the ``ceph.cephadm.log`` file on
-monitor hosts and to the monitor daemons' stderr.
+monitor hosts as well as to the monitor daemons' stderr.


 .. _cephadm-logs:
@ -31,45 +43,68 @@ monitor hosts and to the monitor daemons' stderr.
 Ceph daemon logs
 ================

-Logging to stdout
-----------------
+Logging to journald
+-------------------

-Traditionally, Ceph daemons have logged to ``/var/log/ceph``.  By
-default, cephadm daemons log to stderr and the logs are
-captured by the container runtime environment.  For most systems, by
-default, these logs are sent to journald and accessible via
-``journalctl``.
+Ceph daemons traditionally write logs to ``/var/log/ceph``. Ceph daemons log to
+journald by default and Ceph logs are captured by the container runtime
+environment. They are accessible via ``journalctl``.
+
+.. note:: Prior to Quincy, ceph daemons logged to stderr.
+
+Example of logging to journald
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 For example, to view the logs for the daemon ``mon.foo`` for a cluster
 with ID ``5c5a50ae-272a-455d-99e9-32c6a013e694``, the command would be
-something like::
+something like:
+
+.. prompt:: bash #

  journalctl -u ceph-5c5a50ae-272a-455d-99e9-32c6a013e694@mon.foo

 This works well for normal operations when logging levels are low.

-To disable logging to stderr::
-
-  ceph config set global log_to_stderr false
-  ceph config set global mon_cluster_log_to_stderr false
-
 Logging to files
 ----------------

-You can also configure Ceph daemons to log to files instead of stderr,
-just like they have in the past.  When logging to files, Ceph logs appear
-in ``/var/log/ceph/<cluster-fsid>``.
+You can also configure Ceph daemons to log to files instead of to
+journald if you prefer logs to appear in files (as they did in earlier,
+pre-cephadm, pre-Octopus versions of Ceph).  When Ceph logs to files,
+the logs appear in ``/var/log/ceph/<cluster-fsid>``. If you choose to
+configure Ceph to log to files instead of to journald, remember to
+configure Ceph so that it will not log to journald (the commands for
+this are covered below).

-To enable logging to files::
+Enabling logging to files
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To enable logging to files, run the following commands:
+
+.. prompt:: bash #

  ceph config set global log_to_file true
  ceph config set global mon_cluster_log_to_file true

-We recommend disabling logging to stderr (see above) or else everything
-will be logged twice::
+Disabling logging to journald
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you choose to log to files, we recommend disabling logging to journald or else
+everything will be logged twice. Run the following commands to disable logging
+to stderr:
+
+.. prompt:: bash #

  ceph config set global log_to_stderr false
  ceph config set global mon_cluster_log_to_stderr false
+  ceph config set global log_to_journald false
+  ceph config set global mon_cluster_log_to_journald false
+
+.. note:: You can change the default by passing --log-to-file during
+   bootstrapping a new cluster.
+
+Modifying the log retention schedule
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 By default, cephadm sets up log rotation on each host to rotate these
 files.  You can configure the logging retention schedule by modifying
@ -79,12 +114,13 @@ files.  You can configure the logging retention schedule by modifying
 Data location
 =============

-Cephadm daemon data and logs in slightly different locations than older
-versions of ceph:
+Cephadm stores daemon data and logs in different locations than did
+older, pre-cephadm (pre Octopus) versions of ceph:

-* ``/var/log/ceph/<cluster-fsid>`` contains all cluster logs.  Note
-  that by default cephadm logs via stderr and the container runtime,
-  so these logs are normally not present.
+* ``/var/log/ceph/<cluster-fsid>`` contains all cluster logs. By
+  default, cephadm logs via stderr and the container runtime. These
+  logs will not exist unless you have enabled logging to files as
+  described in `cephadm-logs`_.
 * ``/var/lib/ceph/<cluster-fsid>`` contains all cluster daemon data
  (besides logs).
 * ``/var/lib/ceph/<cluster-fsid>/<daemon-name>`` contains all data for
@ -98,58 +134,69 @@ versions of ceph:
 Disk usage
 ----------

-Because a few Ceph daemons may store a significant amount of data in
-``/var/lib/ceph`` (notably, the monitors and prometheus), we recommend
-moving this directory to its own disk, partition, or logical volume so
-that it does not fill up the root file system.
+Because a few Ceph daemons (notably, the monitors and prometheus) store a
+large amount of data in ``/var/lib/ceph`` , we recommend moving this
+directory to its own disk, partition, or logical volume so that it does not
+fill up the root file system.


 Health checks
 =============
-The cephadm module provides additional healthchecks to supplement the default healthchecks
-provided by the Cluster. These additional healthchecks fall into two categories;
+The cephadm module provides additional health checks to supplement the
+default health checks provided by the Cluster. These additional health
+checks fall into two categories:

- **cephadm operations**: Healthchecks in this category are always executed when the cephadm module is active.
- **cluster configuration**: These healthchecks are *optional*, and focus on the configuration of the hosts in
-  the cluster
+- **cephadm operations**: Health checks in this category are always
+  executed when the cephadm module is active.
+- **cluster configuration**: These health checks are *optional*, and
+  focus on the configuration of the hosts in the cluster.

 CEPHADM Operations
 ------------------

 CEPHADM_PAUSED
-^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~

-Cephadm background work has been paused with ``ceph orch pause``.  Cephadm
-continues to perform passive monitoring activities (like checking
-host and daemon status), but it will not make any changes (like deploying
-or removing daemons).
+This indicates that cephadm background work has been paused with
+``ceph orch pause``.  Cephadm continues to perform passive monitoring
+activities (like checking host and daemon status), but it will not
+make any changes (like deploying or removing daemons).

-Resume cephadm work with::
+Resume cephadm work by running the following command:
+
+.. prompt:: bash #

  ceph orch resume

 .. _cephadm-stray-host:

 CEPHADM_STRAY_HOST
-^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~

-One or more hosts have running Ceph daemons but are not registered as
-hosts managed by *cephadm*.  This means that those services cannot
-currently be managed by cephadm (e.g., restarted, upgraded, included
-in `ceph orch ps`).
+This indicates that one or more hosts have Ceph daemons that are
+running, but are not registered as hosts managed by *cephadm*.  This
+means that those services cannot currently be managed by cephadm
+(e.g., restarted, upgraded, included in `ceph orch ps`).

-You can manage the host(s) with::
+You can manage the host(s) by running the following command:
+
+.. prompt:: bash #

  ceph orch host add *<hostname>*

-Note that you may need to configure SSH access to the remote host
-before this will work.
+.. note::
+
+  You might need to configure SSH access to the remote host
+  before this will work.

 Alternatively, you can manually connect to the host and ensure that
 services on that host are removed or migrated to a host that is
 managed by *cephadm*.

-You can also disable this warning entirely with::
+This warning can be disabled entirely by running the following
+command:
+
+.. prompt:: bash #

  ceph config set mgr mgr/cephadm/warn_on_stray_hosts false

@ -157,7 +204,7 @@ See :ref:`cephadm-fqdn` for more information about host names and
 domain names.

 CEPHADM_STRAY_DAEMON
-^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~

 One or more Ceph daemons are running but not are not managed by
 *cephadm*.  This may be because they were deployed using a different
@ -170,12 +217,14 @@ by cephadm; see :ref:`cephadm-adoption`.  For stateless daemons, it is
 usually easiest to provision a new daemon with the ``ceph orch apply``
 command and then stop the unmanaged daemon.

-This warning can be disabled entirely with::
+This warning can be disabled entirely by running the following command:
+
+.. prompt:: bash #

  ceph config set mgr mgr/cephadm/warn_on_stray_daemons false

 CEPHADM_HOST_CHECK_FAILED
-^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~

 One or more hosts have failed the basic cephadm host check, which verifies
 that (1) the host is reachable and cephadm can be executed there, and (2)
@ -183,58 +232,80 @@ that the host satisfies basic prerequisites, like a working container
 runtime (podman or docker) and working time synchronization.
 If this test fails, cephadm will no be able to manage services on that host.

-You can manually run this check with::
+You can manually run this check by running the following command:
+
+.. prompt:: bash #

  ceph cephadm check-host *<hostname>*

-You can remove a broken host from management with::
+You can remove a broken host from management by running the following command:
+
+.. prompt:: bash #

  ceph orch host rm *<hostname>*

-You can disable this health warning with::
+You can disable this health warning by running the following command:
+
+.. prompt:: bash #

  ceph config set mgr mgr/cephadm/warn_on_failed_host_check false

 Cluster Configuration Checks
 ----------------------------
-Cephadm periodically scans each of the hosts in the cluster, to understand the state
-of the OS, disks, NICs etc. These facts can then be analysed for consistency across the hosts
-in the cluster to identify any configuration anomalies.
+Cephadm periodically scans each of the hosts in the cluster in order
+to understand the state of the OS, disks, NICs etc. These facts can
+then be analysed for consistency across the hosts in the cluster to
+identify any configuration anomalies.

-The configuration checks are an **optional** feature, enabled by the following command
-::
+Enabling Cluster Configuration Checks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The configuration checks are an **optional** feature, and are enabled
+by running the following command:
+
+.. prompt:: bash #

  ceph config set mgr mgr/cephadm/config_checks_enabled true

-The configuration checks are triggered after each host scan (1m). The cephadm log entries will
-show the current state and outcome of the configuration checks as follows;
+States Returned by Cluster Configuration Checks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-Disabled state (config_checks_enabled false)
-::
+The configuration checks are triggered after each host scan (1m). The
+cephadm log entries will show the current state and outcome of the
+configuration checks as follows:
+
+Disabled state (config_checks_enabled false):
+
+.. code-block:: bash 

  ALL cephadm checks are disabled, use 'ceph config set mgr mgr/cephadm/config_checks_enabled true' to enable

-Enabled state (config_checks_enabled true)
-::
+Enabled state (config_checks_enabled true):
+
+.. code-block:: bash 

  CEPHADM 8/8 checks enabled and executed (0 bypassed, 0 disabled). No issues detected

-The configuration checks themselves are managed through several cephadm sub-commands.
+Managing Configuration Checks (subcommands)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-To determine whether the configuration checks are enabled, you can use the following command
-::
+The configuration checks themselves are managed through several cephadm subcommands.
+
+To determine whether the configuration checks are enabled, run the following command:
+
+.. prompt:: bash #

  ceph cephadm config-check status

-This command will return the status of the configuration checker as either "Enabled" or "Disabled".
+This command returns the status of the configuration checker as either "Enabled" or "Disabled".


-Listing all the configuration checks and their current state
-::
+To list all the configuration checks and their current states, run the following command:

-  ceph cephadm config-check ls
+.. code-block:: console
+
+  # ceph cephadm config-check ls

-  e.g.
    NAME             HEALTHCHECK                      STATUS   DESCRIPTION
  kernel_security  CEPHADM_CHECK_KERNEL_LSM         enabled  checks SELINUX/Apparmor profiles are consistent across cluster hosts
  os_subscription  CEPHADM_CHECK_SUBSCRIPTION       enabled  checks subscription states are consistent for all cluster hosts
@ -245,128 +316,191 @@ Listing all the configuration checks and their current state
  ceph_release     CEPHADM_CHECK_CEPH_RELEASE       enabled  check for Ceph version consistency - ceph daemons should be on the same release (unless upgrade is active)
  kernel_version   CEPHADM_CHECK_KERNEL_VERSION     enabled  checks that the MAJ.MIN of the kernel on Ceph hosts is consistent

-The name of each configuration check, can then be used to enable or disable a specific check.
-::
+The name of each configuration check can be used to enable or disable a specific check by running a command of the following form:
+:
+
+.. prompt:: bash #

  ceph cephadm config-check disable <name>

-  eg.
+For example:
+
+.. prompt:: bash #
+
  ceph cephadm config-check disable kernel_security

 CEPHADM_CHECK_KERNEL_LSM
-^^^^^^^^^^^^^^^^^^^^^^^^
-Each host within the cluster is expected to operate within the same Linux Security Module (LSM) state. For example,
-if the majority of the hosts are running with SELINUX in enforcing mode, any host not running in this mode
-would be flagged as an anomaly and a healtcheck (WARNING) state raised.
+~~~~~~~~~~~~~~~~~~~~~~~~
+Each host within the cluster is expected to operate within the same Linux
+Security Module (LSM) state. For example, if the majority of the hosts are
+running with SELINUX in enforcing mode, any host not running in this mode is
+flagged as an anomaly and a healtcheck (WARNING) state raised.

 CEPHADM_CHECK_SUBSCRIPTION
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-This check relates to the status of vendor subscription. This check is only performed for hosts using RHEL, but helps
-to confirm that all your hosts are covered by an active subscription so patches and updates
-are available.
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+This check relates to the status of vendor subscription. This check is
+performed only for hosts using RHEL, but helps to confirm that all hosts are
+covered by an active subscription, which ensures that patches and updates are
+available.

 CEPHADM_CHECK_PUBLIC_MEMBERSHIP
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-All members of the cluster should have NICs configured on at least one of the public network subnets. Hosts
-that are not on the public network will rely on routing which may affect performance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All members of the cluster should have NICs configured on at least one of the
+public network subnets. Hosts that are not on the public network will rely on
+routing, which may affect performance.

 CEPHADM_CHECK_MTU
-^^^^^^^^^^^^^^^^^
-The MTU of the NICs on OSDs can be a key factor in consistent performance. This check examines hosts
-that are running OSD services to ensure that the MTU is configured consistently within the cluster. This is
-determined by establishing the MTU setting that the majority of hosts are using, with any anomalies being
-resulting in a Ceph healthcheck.
+~~~~~~~~~~~~~~~~~
+The MTU of the NICs on OSDs can be a key factor in consistent performance. This
+check examines hosts that are running OSD services to ensure that the MTU is
+configured consistently within the cluster. This is determined by establishing
+the MTU setting that the majority of hosts is using. Any anomalies result in a
+Ceph health check.

 CEPHADM_CHECK_LINKSPEED
-^^^^^^^^^^^^^^^^^^^^^^^
-Similar to the MTU check, linkspeed consistency is also a factor in consistent cluster performance.
-This check determines the linkspeed shared by the majority of "OSD hosts", resulting in a healthcheck for
-any hosts that are set at a lower linkspeed rate.
+~~~~~~~~~~~~~~~~~~~~~~~
+This check is similar to the MTU check. Linkspeed consistency is a factor in
+consistent cluster performance, just as the MTU of the NICs on the OSDs is.
+This check determines the linkspeed shared by the majority of OSD hosts, and a
+health check is run for any hosts that are set at a lower linkspeed rate.

 CEPHADM_CHECK_NETWORK_MISSING
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-The public_network and cluster_network settings support subnet definitions for IPv4 and IPv6. If these
-settings are not found on any host in the cluster a healthcheck is raised.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The `public_network` and `cluster_network` settings support subnet definitions
+for IPv4 and IPv6. If these settings are not found on any host in the cluster,
+a health check is raised.

 CEPHADM_CHECK_CEPH_RELEASE
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-Under normal operations, the ceph cluster should be running daemons under the same ceph release (i.e. all
-pacific). This check looks at the active release for each daemon, and reports any anomalies as a
-healthcheck. *This check is bypassed if an upgrade process is active within the cluster.*
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+Under normal operations, the Ceph cluster runs daemons under the same ceph
+release (that is, the Ceph cluster runs all daemons under (for example)
+Octopus).  This check determines the active release for each daemon, and
+reports any anomalies as a healthcheck. *This check is bypassed if an upgrade
+process is active within the cluster.*

 CEPHADM_CHECK_KERNEL_VERSION
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-The OS kernel version (maj.min) is checked for consistency across the hosts. Once again, the
-majority of the hosts is used as the basis of identifying anomalies.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The OS kernel version (maj.min) is checked for consistency across the hosts.
+The kernel version of the majority of the hosts is used as the basis for 
+identifying anomalies.
+
+.. _client_keyrings_and_configs:

 Client keyrings and configs
 ===========================

-Cephadm can distribute copies of the ``ceph.conf`` and client keyring
-files to hosts.  For example, it is usually a good idea to store a
-copy of the config and ``client.admin`` keyring on any hosts that will
-be used to administer the cluster via the CLI.  By default, cephadm will do
-this for any nodes with the ``_admin`` label (which normally includes the bootstrap
-host).
+Cephadm can distribute copies of the ``ceph.conf`` file and client keyring
+files to hosts. It is usually a good idea to store a copy of the config and
+``client.admin`` keyring on any host used to administer the cluster via the
+CLI.  By default, cephadm does this for any nodes that have the ``_admin``
+label (which normally includes the bootstrap host).

 When a client keyring is placed under management, cephadm will:

-  - build a list of target hosts based on the specified placement spec (see :ref:`orchestrator-cli-placement-spec`)
+  - build a list of target hosts based on the specified placement spec (see
+    :ref:`orchestrator-cli-placement-spec`)
  - store a copy of the ``/etc/ceph/ceph.conf`` file on the specified host(s)
  - store a copy of the keyring file on the specified host(s)
  - update the ``ceph.conf`` file as needed (e.g., due to a change in the cluster monitors)
-  - update the keyring file if the entity's key is changed (e.g., via ``ceph auth ...`` commands)
-  - ensure the keyring file has the specified ownership and mode
+  - update the keyring file if the entity's key is changed (e.g., via ``ceph
+    auth ...`` commands)
+  - ensure that the keyring file has the specified ownership and specified mode
  - remove the keyring file when client keyring management is disabled
-  - remove the keyring file from old hosts if the keyring placement spec is updated (as needed)
+  - remove the keyring file from old hosts if the keyring placement spec is
+    updated (as needed)

-To view which client keyrings are currently under management::
+Listing Client Keyrings
+-----------------------
+
+To see the list of client keyrings are currently under management, run the following command:
+
+.. prompt:: bash #

  ceph orch client-keyring ls

-To place a keyring under management::
+Putting a Keyring Under Management
+----------------------------------
+
+To put a keyring under management, run a command of the following form: 
+
+.. prompt:: bash #

  ceph orch client-keyring set <entity> <placement> [--mode=<mode>] [--owner=<uid>.<gid>] [--path=<path>]

- By default, the *path* will be ``/etc/ceph/client.{entity}.keyring``, which is where
-  Ceph looks by default.  Be careful specifying alternate locations as existing files
-  may be overwritten.
+- By default, the *path* is ``/etc/ceph/client.{entity}.keyring``, which is
+  where Ceph looks by default.  Be careful when specifying alternate locations,
+  as existing files may be overwritten.
 - A placement of ``*`` (all hosts) is common.
 - The mode defaults to ``0600`` and ownership to ``0:0`` (user root, group root).

-For example, to create and deploy a ``client.rbd`` key to hosts with the ``rbd-client`` label and group readable by uid/gid 107 (qemu),::
+For example, to create a ``client.rbd`` key and deploy it to hosts with the
+``rbd-client`` label and make it group readable by uid/gid 107 (qemu), run the
+following commands:
+
+.. prompt:: bash #

  ceph auth get-or-create-key client.rbd mon 'profile rbd' mgr 'profile rbd' osd 'profile rbd pool=my_rbd_pool'
  ceph orch client-keyring set client.rbd label:rbd-client --owner 107:107 --mode 640

-The resulting keyring file is::
+The resulting keyring file is:
+
+.. code-block:: console

  -rw-r-----. 1 qemu qemu 156 Apr 21 08:47 /etc/ceph/client.client.rbd.keyring

-To disable management of a keyring file::
+Disabling Management of a Keyring File
+--------------------------------------
+
+To disable management of a keyring file, run a command of the following form:
+
+.. prompt:: bash #

  ceph orch client-keyring rm <entity>

-Note that this will delete any keyring files for this entity that were previously written
-to cluster nodes.
+.. note::

+  This deletes any keyring files for this entity that were previously written
+  to cluster nodes.
+
+.. _etc_ceph_conf_distribution:

 /etc/ceph/ceph.conf
 ===================

-It may also be useful to distribute ``ceph.conf`` files to hosts without an associated
-client keyring file.  By default, cephadm only deploys a ``ceph.conf`` file to hosts where a client keyring
-is also distributed (see above).  To write config files to hosts without client keyrings::
+Distributing ceph.conf to hosts that have no keyrings
+-----------------------------------------------------
+
+It might be useful to distribute ``ceph.conf`` files to hosts without an
+associated client keyring file.  By default, cephadm deploys only a
+``ceph.conf`` file to hosts where a client keyring is also distributed (see
+above).  To write config files to hosts without client keyrings, run the
+following command:
+
+.. prompt:: bash #

    ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf true

-By default, the configs are written to all hosts (i.e., those listed
-by ``ceph orch host ls``).  To specify which hosts get a ``ceph.conf``::
+Using Placement Specs to specify which hosts get keyrings
+---------------------------------------------------------

-    ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf_hosts <placement spec>
+By default, the configs are written to all hosts (i.e., those listed by ``ceph
+orch host ls``).  To specify which hosts get a ``ceph.conf``, run a command of
+the following form:

-For example, to distribute configs to hosts with the ``bare_config`` label,::
+.. prompt:: bash #

-    ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf_hosts label:bare_config
+  ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf_hosts <placement spec>
+
+For example, to distribute configs to hosts with the ``bare_config`` label, run
+the following command:
+
+Distributing ceph.conf to hosts tagged with bare_config 
+-------------------------------------------------------
+
+For example, to distribute configs to hosts with the ``bare_config`` label, run the following command:
+
+.. prompt:: bash #
+
+  ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf_hosts label:bare_config

 (See :ref:`orchestrator-cli-placement-spec` for more information about placement specs.)
--- a/ceph/doc/cephadm/osd.rst
+++ b/ceph/doc/cephadm/osd.rst
@ -7,7 +7,7 @@ OSD Service
 List Devices
 ============

-``ceph-volume`` scans each cluster in the host from time to time in order
+``ceph-volume`` scans each host in the cluster from time to time in order
 to determine which devices are present and whether they are eligible to be
 used as OSDs.

@ -211,6 +211,7 @@ If you want to avoid this behavior (disable automatic creation of OSD on availab

 * For cephadm, see also :ref:`cephadm-spec-unmanaged`.

+.. _cephadm-osd-removal:

 Remove an OSD
 =============
@ -347,7 +348,7 @@ zap`` on the remote host.

 .. prompt:: bash #

-  orch device zap <hostname> <path>
+  ceph orch device zap <hostname> <path>

 Example command:

--- a/ceph/doc/cephadm/rgw.rst
+++ b/ceph/doc/cephadm/rgw.rst
@ -82,6 +82,41 @@ something like:
 See :ref:`orchestrator-cli-placement-spec` for details of the placement
 specification.  See :ref:`multisite` for more information of setting up multisite RGW.

+Setting up HTTPS
+----------------
+
+In order to enable HTTPS for RGW services, apply a spec file following this scheme:
+
+.. code-block:: yaml
+
+  service_type: rgw
+  service_id: myrgw
+  spec:
+    rgw_frontend_ssl_certificate: | 
+      -----BEGIN PRIVATE KEY-----
+      V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt
+      ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15
+      IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
+      YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg
+      ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=
+      -----END PRIVATE KEY-----
+      -----BEGIN CERTIFICATE-----
+      V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt
+      ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15
+      IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
+      YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg
+      ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=
+      -----END CERTIFICATE-----
+    ssl: true
+
+Then apply this yaml document:
+
+.. prompt:: bash #
+
+  ceph orch apply -i myrgw.yaml
+
+Note the value of ``rgw_frontend_ssl_certificate`` is a literal string as
+indicated by a ``|`` character preserving newline characters. 

 .. _orchestrator-haproxy-service-spec:

--- a/ceph/doc/cephadm/service-management.rst
+++ b/ceph/doc/cephadm/service-management.rst
@ -158,6 +158,54 @@ or in a YAML files.

   cephadm will not deploy daemons on hosts with the ``_no_schedule`` label; see :ref:`cephadm-special-host-labels`.

+  .. note::
+     The **apply** command can be confusing. For this reason, we recommend using
+     YAML specifications.
+
+     Each ``ceph orch apply <service-name>`` command supersedes the one before it.
+     If you do not use the proper syntax, you will clobber your work
+     as you go.
+
+     For example:
+
+     .. prompt:: bash #
+
+          ceph orch apply mon host1
+          ceph orch apply mon host2
+          ceph orch apply mon host3
+
+     This results in only one host having a monitor applied to it: host 3.
+
+     (The first command creates a monitor on host1. Then the second command
+     clobbers the monitor on host1 and creates a monitor on host2. Then the
+     third command clobbers the monitor on host2 and creates a monitor on
+     host3. In this scenario, at this point, there is a monitor ONLY on
+     host3.)
+
+     To make certain that a monitor is applied to each of these three hosts,
+     run a command like this:
+
+     .. prompt:: bash #
+
+       ceph orch apply mon "host1,host2,host3"
+
+     There is another way to apply monitors to multiple hosts: a ``yaml`` file
+     can be used. Instead of using the "ceph orch apply mon" commands, run a
+     command of this form:
+
+     .. prompt:: bash #
+
+        ceph orch apply -i file.yaml
+
+     Here is a sample **file.yaml** file::
+
+          service_type: mon
+          placement:
+            hosts:
+             - host1
+             - host2
+             - host3
+
 Explicit placements
 -------------------

@ -192,7 +240,39 @@ and ``=name`` specifies the name of the new monitor.
 Placement by labels
 -------------------

-Daemons can be explicitly placed on hosts that match a specific label:
+Daemon placement can be limited to hosts that match a specific label. To set
+a label ``mylabel`` to the appropriate hosts, run this command:
+
+  .. prompt:: bash #
+
+    ceph orch host label add *<hostname>* mylabel
+
+  To view the current hosts and labels, run this command:
+
+  .. prompt:: bash #
+
+    ceph orch host ls
+
+  For example:
+
+  .. prompt:: bash #
+
+    ceph orch host label add host1 mylabel
+    ceph orch host label add host2 mylabel
+    ceph orch host label add host3 mylabel
+    ceph orch host ls
+
+  .. code-block:: bash
+
+    HOST   ADDR   LABELS  STATUS
+    host1         mylabel
+    host2         mylabel
+    host3         mylabel
+    host4
+    host5
+
+Now, Tell cephadm to deploy daemons based on the label by running
+this command:

   .. prompt:: bash #

@ -240,8 +320,8 @@ Or in YAML:
      host_pattern: "*"


-Setting a limit
---------------
+Changing the number of monitors
+-------------------------------

 By specifying ``count``, only the number of daemons specified will be created:

@ -402,7 +482,17 @@ To disable the automatic management of dameons, set ``unmanaged=True`` in the
 Deploying a daemon on a host manually
 -------------------------------------

-To manually deploy a daemon on a host, run a command of the following form:
+.. note::
+
+  This workflow has a very limited use case and should only be used
+  in rare circumstances. 
+
+To manually deploy a daemon on a host, follow these steps:
+
+Modify the service spec for a service by getting the 
+existing spec, adding ``unmanaged: true``, and applying the modified spec. 
+
+Then manually deploy the daemon using the following:

   .. prompt:: bash #

@ -414,6 +504,13 @@ For example :

     ceph orch daemon add mgr --placement=my_host

+.. note:: 
+
+  Removing ``unmanaged: true`` from the service spec will 
+  enable the reconciliation loop for this service and will
+  potentially lead to the removal of the daemon, depending
+  on the placement spec. 
+
 Removing a daemon from a host manually
 --------------------------------------

--- a/ceph/doc/cephadm/troubleshooting.rst
+++ b/ceph/doc/cephadm/troubleshooting.rst
@ -1,46 +1,70 @@
 Troubleshooting
 ===============

-Sometimes there is a need to investigate why a cephadm command failed or why
-a specific service no longer runs properly.
+You might need to investigate why a cephadm command failed
+or why a certain service no longer runs properly.

-As cephadm deploys daemons as containers, troubleshooting daemons is slightly
-different. Here are a few tools and commands to help investigating issues.
+Cephadm deploys daemons as containers. This means that
+troubleshooting those containerized daemons might work
+differently than you expect (and that is certainly true if
+you expect this troubleshooting to work the way that
+troubleshooting does when the daemons involved aren't
+containerized). 
+
+Here are some tools and commands to help you troubleshoot
+your Ceph environment.

 .. _cephadm-pause:

 Pausing or disabling cephadm
 ----------------------------

-If something goes wrong and cephadm is doing behaving in a way you do
-not like, you can pause most background activity with::
+If something goes wrong and cephadm is behaving badly, you can
+pause most of the Ceph cluster's background activity by running
+the following command: 
+
+.. prompt:: bash #

  ceph orch pause

-This will stop any changes, but cephadm will still periodically check hosts to
-refresh its inventory of daemons and devices.  You can disable cephadm
-completely with::
+This stops all changes in the Ceph cluster, but cephadm will
+still periodically check hosts to refresh its inventory of
+daemons and devices.  You can disable cephadm completely by
+running the following commands:
+
+.. prompt:: bash #

  ceph orch set backend ''
  ceph mgr module disable cephadm

-This will disable all of the ``ceph orch ...`` CLI commands but the previously
-deployed daemon containers will still continue to exist and start as they
-did before.
+These commands disable all of the ``ceph orch ...`` CLI commands.
+All previously deployed daemon containers continue to exist and
+will start as they did before you ran these commands.

-Please refer to :ref:`cephadm-spec-unmanaged` for disabling individual
-services.
+See :ref:`cephadm-spec-unmanaged` for information on disabling
+individual services.


 Per-service and per-daemon events
 ---------------------------------

-In order to aid debugging failed daemon deployments, cephadm stores 
-events per service and per daemon. They often contain relevant information::
+In order to help with the process of debugging failed daemon
+deployments, cephadm stores events per service and per daemon.
+These events often contain information relevant to
+troubleshooting
+your Ceph cluster. 
+
+Listing service events
+~~~~~~~~~~~~~~~~~~~~~~
+
+To see the events associated with a certain service, run a
+command of the and following form:
+
+.. prompt:: bash #

  ceph orch ls --service_name=<service-name> --format yaml

-for example:
+This will return something in the following form:

 .. code-block:: yaml

@ -58,10 +82,18 @@ for example:
  - '2021-02-01T12:09:25.264584 service:alertmanager [ERROR] "Failed to apply: Cannot
    place <AlertManagerSpec for service_name=alertmanager> on unknown_host: Unknown hosts"'

-Or per daemon::
+Listing daemon events
+~~~~~~~~~~~~~~~~~~~~~
+
+To see the events associated with a certain daemon, run a
+command of the and following form:
+
+.. prompt:: bash #

  ceph orch ps --service-name <service-name> --daemon-id <daemon-id> --format yaml

+This will return something in the following form:
+
 .. code-block:: yaml

  daemon_type: mds
@ -77,16 +109,11 @@ Or per daemon::
 Checking cephadm logs
 ---------------------

-You can monitor the cephadm log in real time with::
+To learn how to monitor the cephadm logs as they are generated, read :ref:`watching_cephadm_logs`.

-  ceph -W cephadm
-
-You can see the last few messages with::
-
-  ceph log last cephadm
-
-If you have enabled logging to files, you can see a cephadm log file called
-``ceph.cephadm.log`` on monitor hosts (see :ref:`cephadm-logs`).
+If your Ceph cluster has been configured to log events to files, there will exist a
+cephadm log file called ``ceph.cephadm.log`` on all monitor hosts (see
+:ref:`cephadm-logs` for a more complete explanation of this).

 Gathering log files
 -------------------
@ -190,7 +217,8 @@ Things users can do:
     [root@mon1 ~]# ssh -F config -i ~/cephadm_private_key root@mon1

 Verifying that the Public Key is Listed in the authorized_keys file
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 To verify that the public key is in the authorized_keys file, run the following commands::

     [root@mon1 ~]# cephadm shell -- ceph cephadm get-pub-key > ~/ceph.pub
--- a/ceph/doc/cephadm/upgrade.rst
+++ b/ceph/doc/cephadm/upgrade.rst
@ -12,26 +12,32 @@ The automated upgrade process follows Ceph best practices.  For example:
 * Each daemon is restarted only after Ceph indicates that the cluster
  will remain available.

-Keep in mind that the Ceph cluster health status is likely to switch to
-``HEALTH_WARNING`` during the upgrade.
+.. note::
+
+   The Ceph cluster health status is likely to switch to
+   ``HEALTH_WARNING`` during the upgrade.
+
+.. note:: 
+
+   In case a host of the cluster is offline, the upgrade is paused.


 Starting the upgrade
 ====================

-Before you begin using cephadm to upgrade Ceph, verify that all hosts are currently online and that your cluster is healthy:
+Before you use cephadm to upgrade Ceph, verify that all hosts are currently online and that your cluster is healthy by running the following command:

 .. prompt:: bash #

   ceph -s

-To upgrade (or downgrade) to a specific release:
+To upgrade (or downgrade) to a specific release, run the following command:

 .. prompt:: bash #

  ceph orch upgrade start --ceph-version <version>

-For example, to upgrade to v15.2.1:
+For example, to upgrade to v15.2.1, run the following command:

 .. prompt:: bash #

@ -76,11 +82,11 @@ Watch the cephadm log by running the following command:
 Canceling an upgrade
 ====================

-You can stop the upgrade process at any time with:
+You can stop the upgrade process at any time by running the following command:

 .. prompt:: bash #

-  # ceph orch upgrade stop
+  ceph orch upgrade stop


 Potential problems
@ -91,22 +97,27 @@ There are a few health alerts that can arise during the upgrade process.
 UPGRADE_NO_STANDBY_MGR
 ----------------------

-This alert means that Ceph requires an active and standby manager daemon in
-order to proceed, but there is currently no standby.
+This alert (``UPGRADE_NO_STANDBY_MGR``) means that Ceph does not detect an
+active standby manager daemon. In order to proceed with the upgrade, Ceph
+requires an active standby manager daemon (which you can think of in this
+context as "a second manager").

-You can ensure that Cephadm is configured to run 2 (or more) managers by running the following command:
+You can ensure that Cephadm is configured to run 2 (or more) managers by
+running the following command:

 .. prompt:: bash #

  ceph orch apply mgr 2  # or more

-You can check the status of existing mgr daemons by running the following command:
+You can check the status of existing mgr daemons by running the following
+command:

 .. prompt:: bash #

  ceph orch ps --daemon-type mgr

-If an existing mgr daemon has stopped, you can try to restart it by running the following command: 
+If an existing mgr daemon has stopped, you can try to restart it by running the
+following command: 

 .. prompt:: bash #

@ -115,12 +126,13 @@ If an existing mgr daemon has stopped, you can try to restart it by running the
 UPGRADE_FAILED_PULL
 -------------------

-This alert means that Ceph was unable to pull the container image for the
-target version. This can happen if you specify a version or container image
-that does not exist (e.g. "1.2.3"), or if the container registry can not
-be reached by one or more hosts in the cluster.
+This alert (``UPGRADE_FAILED_PULL``) means that Ceph was unable to pull the
+container image for the target version. This can happen if you specify a
+version or container image that does not exist (e.g. "1.2.3"), or if the
+container registry can not be reached by one or more hosts in the cluster.

-To cancel the existing upgrade and to specify a different target version, run the following commands: 
+To cancel the existing upgrade and to specify a different target version, run
+the following commands: 

 .. prompt:: bash #

--- a/ceph/doc/cephfs/administration.rst
+++ b/ceph/doc/cephfs/administration.rst
@ -349,24 +349,6 @@ for use in exceptional circumstances.  Incorrect use of these
 commands may cause serious problems, such as an inaccessible
 file system.

-::
-
-    mds compat rm_compat
-
-Removes an compatibility feature flag.
-
-::
-
-    mds compat rm_incompat
-
-Removes an incompatibility feature flag.
-
-::
-
-    mds compat show
-
-Show MDS compatibility flags.
-
 ::

    mds rmfailed
@ -379,3 +361,14 @@ This removes a rank from the failed set.

 This command resets the file system state to defaults, except for the name and
 pools. Non-zero ranks are saved in the stopped set.
+
+
+::
+
+    fs new <file system name> <metadata pool name> <data pool name> --fscid <fscid> --force
+
+This command creates a file system with a specific **fscid** (file system cluster ID).
+You may want to do this when an application expects the file system's ID to be
+stable after it has been recovered, e.g., after monitor databases are lost and
+rebuilt. Consequently, file system IDs don't always keep increasing with newer
+file systems.
--- a/ceph/doc/cephfs/fs-nfs-exports.rst
+++ b/ceph/doc/cephfs/fs-nfs-exports.rst
@ -15,6 +15,53 @@ Requirements

 .. note:: From Pacific, the nfs mgr module must be enabled prior to use.

+Ganesha Configuration Hierarchy
+===============================
+
+Cephadm and rook starts nfs-ganesha daemon with `bootstrap configuration`
+containing minimal ganesha configuration, creates empty rados `common config`
+object in `nfs-ganesha` pool and watches this config object. The `mgr/nfs`
+module adds rados export object urls to the common config object. If cluster
+config is set, it creates `user config` object containing custom ganesha
+configuration and adds it url to common config object.
+
+.. ditaa::
+
+
+                             rados://$pool/$namespace/export-$i        rados://$pool/$namespace/userconf-nfs.$cluster_id
+                                      (export config)                          (user config)
+
+                        +----------+    +----------+    +----------+      +---------------------------+
+                        |          |    |          |    |          |      |                           |
+                        | export-1 |    | export-2 |    | export-3 |      | userconf-nfs.$cluster_id  |
+                        |          |    |          |    |          |      |                           |
+                        +----+-----+    +----+-----+    +-----+----+      +-------------+-------------+
+                             ^               ^                ^                         ^
+                             |               |                |                         |
+                             +--------------------------------+-------------------------+
+                                        %url |
+                                             |
+                                    +--------+--------+
+                                    |                 |  rados://$pool/$namespace/conf-nfs.$svc
+                                    |  conf+nfs.$svc  |  (common config)
+                                    |                 |
+                                    +--------+--------+
+                                             ^
+                                             |
+                                   watch_url |
+                     +----------------------------------------------+
+                     |                       |                      |
+                     |                       |                      |            RADOS
+             +----------------------------------------------------------------------------------+
+                     |                       |                      |            CONTAINER
+           watch_url |             watch_url |            watch_url |
+                     |                       |                      |
+            +--------+-------+      +--------+-------+      +-------+--------+
+            |                |      |                |      |                |  /etc/ganesha/ganesha.conf
+            |   nfs.$svc.a   |      |   nfs.$svc.b   |      |   nfs.$svc.c   |  (bootstrap config)
+            |                |      |                |      |                |
+            +----------------+      +----------------+      +----------------+
+
 Create NFS Ganesha Cluster
 ==========================

--- a/ceph/doc/cephfs/upgrading.rst
+++ b/ceph/doc/cephfs/upgrading.rst
@ -6,13 +6,11 @@ flags to support seamless upgrades of the MDSs without potentially causing
 assertions or other faults due to incompatible messages or other functional
 differences. For this reason, it's necessary during any cluster upgrade to
 reduce the number of active MDS for a file system to one first so that two
-active MDS do not communicate with different versions.  Further, it's also
-necessary to take standbys offline as any new CompatSet flags will propagate
-via the MDSMap to all MDS and cause older MDS to suicide.
+active MDS do not communicate with different versions.

 The proper sequence for upgrading the MDS cluster is:

-1. Disable and stop standby-replay daemons.
+1. For each file system, disable and stop standby-replay daemons.

 ::

@ -27,7 +25,7 @@ command. Older versions of Ceph require you to stop these daemons manually.
    ceph mds fail mds.<X>


-2. Reduce the number of ranks to 1:
+2. For each file system, reduce the number of ranks to 1:

 ::

@ -39,43 +37,20 @@ command. Older versions of Ceph require you to stop these daemons manually.

    ceph status # wait for MDS to finish stopping

-4. Take all standbys offline, e.g. using systemctl:
-
-::
-
-    systemctl stop ceph-mds.target
-
-5. Confirm only one MDS is online and is rank 0 for your FS:
-
-::
-
-    ceph status
-
-6. Upgrade the single active MDS, e.g. using systemctl:
+4. For each MDS, upgrade packages and restart. Note: to reduce failovers, it is
+   recommended -- but not strictly necessary -- to first upgrade standby daemons.

 ::

    # use package manager to update cluster
    systemctl restart ceph-mds.target

-7. Upgrade/start the standby daemons.
-
-::
-
-    # use package manager to update cluster
-    systemctl restart ceph-mds.target
-
-8. Restore the previous max_mds for your cluster:
+5. For each file system, restore the previous max_mds and allow_standby_replay settings for your cluster:

 ::

    ceph fs set <fs_name> max_mds <old_max_mds>
-
-9. Restore setting for ``allow_standby_replay`` (if applicable):
-
-::
-
-    ceph fs set <fs_name> allow_standby_replay true
+    ceph fs set <fs_name> allow_standby_replay <old_allow_standby_replay>


 Upgrading pre-Firefly file systems past Jewel
--- a/ceph/doc/dev/cephadm/developing-cephadm.rst
+++ b/ceph/doc/dev/cephadm/developing-cephadm.rst
@ -124,6 +124,20 @@ This means we should do very few synchronous calls to remote hosts.
 As a guideline, cephadm should do at most ``O(1)`` network calls in CLI handlers.
 Everything else should be done asynchronously in other threads, like ``serve()``.

+Note regarding different variables used in the code
+===================================================
+
+* a ``service_type`` is something like mon, mgr, alertmanager etc defined 
+  in ``ServiceSpec``
+* a ``service_id`` is the name of the service. Some services don't have 
+  names.
+* a ``service_name`` is ``<service_type>.<service_id>``
+* a ``daemon_type`` is the same as the service_type, except for ingress,
+  which has the haproxy and keepalived daemon types.
+* a ``daemon_id`` is typically ``<service_id>.<hostname>.<random-string>``. 
+  (Not the case for e.g. OSDs. OSDs are always called OSD.N)
+* a ``daemon_name`` is ``<daemon_type>.<daemon_id>``
+
 Kcli: a virtualization management tool to make easy orchestrators development
 =============================================================================
 `Kcli <https://github.com/karmab/kcli>`_ is meant to interact with existing
--- a/ceph/doc/dev/developer_guide/dash-devel.rst
+++ b/ceph/doc/dev/developer_guide/dash-devel.rst
@ -430,7 +430,14 @@ run-cephadm-e2e-tests.sh
 Orchestrator backend behave correctly.

 Prerequisites: you need to install `KCLI
-<https://kcli.readthedocs.io/en/latest/>`_ in your local machine.
+<https://kcli.readthedocs.io/en/latest/>`_ and Node.js in your local machine.
+
+Configure KCLI plan requirements::
+
+  $ sudo chown -R $(id -un) /var/lib/libvirt/images
+  $ mkdir -p /var/lib/libvirt/images/ceph-dashboard dashboard
+  $ kcli create pool -p /var/lib/libvirt/images/ceph-dashboard dashboard
+  $ kcli create network -c 192.168.100.0/24 dashboard

 Note:
  This script is aimed to be run as jenkins job so the cleanup is triggered only in a jenkins
@ -439,9 +446,26 @@ Note:
 Start E2E tests by running::

  $ cd <your/ceph/repo/dir>
-  $ sudo chown -R $(id -un) src/pybind/mgr/dashboard/frontend/dist src/pybind/mgr/dashboard/frontend/node_modules
+  $ sudo chown -R $(id -un) src/pybind/mgr/dashboard/frontend/{dist,node_modules,src/environments}
  $ ./src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh
-  $ kcli delete plan -y ceph  # After tests finish.
+
+You can also start a cluster in development mode (so the frontend build starts in watch mode and you
+only have to reload the page for the changes to be reflected) by running::
+
+  $ ./src/pybind/mgr/dashboard/ci/cephadm/start-cluster.sh --dev-mode
+
+Note:
+  Add ``--expanded`` if you need a cluster ready to deploy services (one with enough monitor
+  daemons spread across different hosts and enough OSDs).
+
+Test your changes by running:
+
+  $ ./src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh
+
+Shutdown the cluster by running:
+
+  $ kcli delete plan -y ceph
+  $ # In development mode, also kill the npm build watch process (e.g., pkill -f "ng build")

 Other running options
 .....................
@ -1652,6 +1676,58 @@ load the controllers that we want to test. In the above example we are only
 loading the ``Ping`` controller. We can also disable authentication of a
 controller at this stage, as depicted in the example.

+How to update or create new dashboards in grafana?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We are using ``jsonnet`` and ``grafonnet-lib`` to write code for the grafana dashboards.
+All the dashboards are written inside ``grafana_dashboards.jsonnet`` file in the
+monitoring/grafana/dashboards/jsonnet directory.
+
+We generate the dashboard json files directly from this jsonnet file by running this
+command in the grafana/dashboards directory:
+``jsonnet -m . jsonnet/grafana_dashboards.jsonnet``.
+(For the above command to succeed we need ``jsonnet`` package installed and ``grafonnet-lib``
+directory cloned in our machine. Please refer - 
+``https://grafana.github.io/grafonnet-lib/getting-started/`` in case you have some trouble.)
+
+To update an existing grafana dashboard or to create a new one, we need to update
+the ``grafana_dashboards.jsonnet`` file and generate the new/updated json files using the
+above mentioned command. For people who are not familiar with grafonnet or jsonnet implementation
+can follow this doc - ``https://grafana.github.io/grafonnet-lib/``.
+
+Example grafana dashboard in jsonnet format:
+
+To specify the grafana dashboard properties such as title, uid etc we can create a local function -
+
+::
+
+    local dashboardSchema(title, uid, time_from, refresh, schemaVersion, tags,timezone, timepicker)
+
+To add a graph panel we can spcify the graph schema in a local function such as -
+
+::
+
+    local graphPanelSchema(title, nullPointMode, stack, formatY1, formatY2, labelY1, labelY2, min, fill, datasource)
+
+and then use these functions inside the dashboard definition like -
+
+::
+
+    {
+        radosgw-sync-overview.json: //json file name to be generated
+
+        dashboardSchema(
+          'RGW Sync Overview', 'rgw-sync-overview', 'now-1h', '15s', .., .., ..
+        )
+
+        .addPanels([
+          graphPanelSchema(
+            'Replication (throughput) from Source Zone', 'Bps', null, .., .., ..)
+        ])
+    }
+
+The valid grafonnet-lib attributes can be found here - ``https://grafana.github.io/grafonnet-lib/api-docs/``.
+  

 How to listen for manager notifications in a controller?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/ceph/doc/install/containers.rst
+++ b/ceph/doc/install/containers.rst
@ -19,11 +19,11 @@ Ceph Container Images
 Official Releases
 -----------------

-Ceph Container images are available from Docker Hub at::
+Ceph Container images are available from both Quay and Docker Hub::

+  https://quay.io/repository/ceph/ceph
  https://hub.docker.com/r/ceph

-
 ceph/ceph
 ^^^^^^^^^

@ -42,6 +42,13 @@ ceph/ceph
 | vRELNUM.Y.Z-YYYYMMDD | A specific build (e.g., *v14.2.4-20191203*)                  |
 +----------------------+--------------------------------------------------------------+

+Legacy container images
+-----------------------
+
+Legacy container images are available from Docker Hub at::
+  
+  https://hub.docker.com/r/ceph
+
 ceph/daemon-base
 ^^^^^^^^^^^^^^^^

--- a/ceph/doc/man/8/ceph-volume.rst
+++ b/ceph/doc/man/8/ceph-volume.rst
@ -15,7 +15,7 @@ Synopsis
 | **ceph-volume** **inventory**

 | **ceph-volume** **lvm** [ *trigger* | *create* | *activate* | *prepare*
-| *zap* | *list* | *batch*]
+| *zap* | *list* | *batch* | *new-wal* | *new-db* | *migrate* ]

 | **ceph-volume** **simple** [ *trigger* | *scan* | *activate* ]

@ -241,6 +241,96 @@ Positional arguments:
  ``/path/to/sda1`` or ``/path/to/sda`` for regular devices.


+new-wal
+^^^^^^^
+
+Attaches the given logical volume to OSD as a WAL. Logical volume
+name format is vg/lv. Fails if OSD has already got attached WAL.
+
+Usage::
+
+    ceph-volume lvm new-wal --osd-id OSD_ID --osd-fsid OSD_FSID --target <target lv>
+
+Optional arguments:
+
+.. option:: -h, --help
+
+   show the help message and exit
+
+.. option:: --no-systemd
+
+   Skip checking OSD systemd unit
+
+Required arguments:
+
+.. option:: --target
+
+   logical volume name to attach as WAL
+
+new-db
+^^^^^^
+
+Attaches the given logical volume to OSD as a DB. Logical volume
+name format is vg/lv. Fails if OSD has already got attached DB.
+
+Usage::
+
+    ceph-volume lvm new-db --osd-id OSD_ID --osd-fsid OSD_FSID --target <target lv>
+
+Optional arguments:
+
+.. option:: -h, --help
+
+   show the help message and exit
+
+.. option:: --no-systemd
+
+   Skip checking OSD systemd unit
+
+Required arguments:
+
+.. option:: --target
+
+   logical volume name to attach as DB
+
+migrate
+^^^^^^^
+
+Moves BlueFS data from source volume(s) to the target one, source volumes
+(except the main, i.e. data or block one) are removed on success. LVM volumes
+are permitted for Target only, both already attached or new one. In the latter
+case it is attached to the OSD replacing one of the source devices. Following
+replacement rules apply (in the order of precedence, stop on the first match):
+
+    - if source list has DB volume - target device replaces it.
+    - if source list has WAL volume - target device replace it.
+    - if source list has slow volume only - operation is not permitted,
+      requires explicit allocation via new-db/new-wal command.
+
+Usage::
+
+    ceph-volume lvm migrate --osd-id OSD_ID --osd-fsid OSD_FSID --target <target lv> --from {data|db|wal} [{data|db|wal} ...]
+
+Optional arguments:
+
+.. option:: -h, --help
+
+   show the help message and exit
+
+.. option:: --no-systemd
+
+   Skip checking OSD systemd unit
+
+Required arguments:
+
+.. option:: --from
+
+   list of source device type names
+
+.. option:: --target
+
+   logical volume to move data to
+
 simple
 ------

--- a/ceph/doc/man/8/cephadm.rst
+++ b/ceph/doc/man/8/cephadm.rst
@ -53,6 +53,7 @@ Synopsis
 | **cephadm** **bootstrap** [-h] [--config CONFIG] [--mon-id MON_ID]
 |                           [--mon-addrv MON_ADDRV] [--mon-ip MON_IP]
 |                           [--mgr-id MGR_ID] [--fsid FSID]
+|                           [--log-to-file] [--single-host-defaults]
 |                           [--output-dir OUTPUT_DIR]
 |                           [--output-keyring OUTPUT_KEYRING]
 |                           [--output-config OUTPUT_CONFIG]
@ -126,13 +127,14 @@ Options
 .. option:: --docker

   use docker instead of podman (default: False)
-.. option::data-dir DATA_DIR

-   base directory for daemon data (default:/var/lib/ceph)
+.. option:: --data-dir DATA_DIR
+
+   base directory for daemon data (default: /var/lib/ceph)

 .. option:: --log-dir LOG_DIR

-   base directory for daemon logs (default:.. option:: /var/log/ceph)
+   base directory for daemon logs (default: /var/log/ceph)

 .. option:: --logrotate-dir LOGROTATE_DIR

@ -208,6 +210,8 @@ Arguments:
 * [--mon-ip MON_IP]               mon IP
 * [--mgr-id MGR_ID]               mgr id (default: randomly generated)
 * [--fsid FSID]                   cluster FSID
+* [--log-to-file]                 configure cluster to log to traditional log files
+* [--single-host-defaults]        configure cluster to run on a single host
 * [--output-dir OUTPUT_DIR]       directory to write config, keyring, and pub key files
 * [--output-keyring OUTPUT_KEYRING] location to write keyring file with new cluster admin and mon keys
 * [--output-config OUTPUT_CONFIG] location to write conf file to connect to new cluster
--- a/ceph/doc/mgr/dashboard.rst
+++ b/ceph/doc/mgr/dashboard.rst
@ -376,50 +376,17 @@ password.
 Enabling the Object Gateway Management Frontend
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-To use the Object Gateway management functionality of the dashboard, you will
-need to provide the login credentials of a user with the ``system`` flag
-enabled. If you do not have a ``system`` user already, you must create one::
+When RGW is deployed with cephadm, the RGW credentials used by the
+dashboard will be automatically configured. You can also manually force the
+credentials to be set up with::

-  $ radosgw-admin user create --uid=<user_id> --display-name=<display_name> \
-      --system
+  $ ceph dashboard set-rgw-credentials

-Take note of the keys ``access_key`` and ``secret_key`` in the output.
+This will create an RGW user with uid ``dashboard`` for each realm in
+the system.

-To obtain the credentials of an existing user via `radosgw-admin`::
+If you've configured a custom 'admin' resource in your RGW admin API, you should set it here also::

-  $ radosgw-admin user info --uid=<user_id>
-
-In case of having several Object Gateways, you will need the required users' credentials
-to connect to each Object Gateway.
-Finally, provide these credentials to the dashboard::
-
-  $ echo -n "{'<daemon1.id>': '<user1-access-key>', '<daemon2.id>': '<user2-access-key>', ...}" > <file-containing-access-key>
-  $ echo -n "{'<daemon1.id>': '<user1-secret-key>', '<daemon2.id>': '<user2-secret-key>', ...}" > <file-containing-secret-key>
-  $ ceph dashboard set-rgw-api-access-key -i <file-containing-access-key>
-  $ ceph dashboard set-rgw-api-secret-key -i <file-containing-secret-key>
-
-.. note::
-
-  Legacy way of providing credentials (connect to single Object Gateway)::
-
-  $ echo -n "<access-key>" > <file-containing-access-key>
-  $ echo -n "<secret-key>" > <file-containing-secret-key>
-
-In a simple configuration with a single RGW endpoint, this is all you
-have to do to get the Object Gateway management functionality working. The
-dashboard will try to automatically determine the host and port
-from the Ceph Manager's service map.
-
-In case of having several Object Gateways, you might want to set
-the default one by setting its host and port manually::
-
-  $ ceph dashboard set-rgw-api-host <host>
-  $ ceph dashboard set-rgw-api-port <port>
-
-In addition to the settings mentioned so far, the following settings do also
-exist and you may find yourself in the situation that you have to use them::
-
-  $ ceph dashboard set-rgw-api-scheme <scheme>  # http or https
  $ ceph dashboard set-rgw-api-admin-resource <admin_resource>

 If you are using a self-signed certificate in your Object Gateway setup,
@ -1314,6 +1281,7 @@ and loosely coupled fashion.

 .. include:: dashboard_plugins/feature_toggles.inc.rst
 .. include:: dashboard_plugins/debug.inc.rst
+.. include:: dashboard_plugins/motd.inc.rst


 Troubleshooting the Dashboard
--- a/ceph/doc/mgr/dashboard_plugins/motd.inc.rst
+++ b/ceph/doc/mgr/dashboard_plugins/motd.inc.rst
@ -0,0 +1,30 @@
+.. _dashboard-motd:
+
+Message of the day (MOTD)
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Displays a configured `message of the day` at the top of the Ceph Dashboard.
+
+The importance of a MOTD can be configured by its severity, which is
+`info`, `warning` or `danger`. The MOTD can expire after a given time,
+this means it will not be displayed in the UI anymore. Use the following
+syntax to specify the expiration time: `Ns|m|h|d|w` for seconds, minutes,
+hours, days and weeks. If the MOTD should expire after 2 hours, use `2h`
+or `5w` for 5 weeks. Use `0` to configure a MOTD that does not expire.
+
+To configure a MOTD, run the following command::
+
+  $ ceph dashboard motd set <severity:info|warning|danger> <expires> <message>
+
+To show the configured MOTD::
+
+  $ ceph dashboard motd get
+
+To clear the configured MOTD run::
+
+  $ ceph dashboard motd clear
+
+A MOTD with a `info` or `warning` severity can be closed by the user. The
+`info` MOTD is not displayed anymore until the local storage cookies are
+cleared or a new MOTD with a different severity is displayed. A MOTD with
+a 'warning' severity will be displayed again in a new session.
--- a/ceph/doc/rados/operations/balancer.rst
+++ b/ceph/doc/rados/operations/balancer.rst
@ -40,9 +40,37 @@ healed itself).
 When the cluster is healthy, the balancer will throttle its changes
 such that the percentage of PGs that are misplaced (i.e., that need to
 be moved) is below a threshold of (by default) 5%.  The
-``max_misplaced`` threshold can be adjusted with::
+``target_max_misplaced_ratio`` threshold can be adjusted with::

-  ceph config set mgr mgr/balancer/max_misplaced .07   # 7%
+  ceph config set mgr target_max_misplaced_ratio .07   # 7%
+
+Set the number of seconds to sleep in between runs of the automatic balancer::
+
+  ceph config set mgr mgr/balancer/sleep_interval 60
+
+Set the time of day to begin automatic balancing in HHMM format::
+
+  ceph config set mgr mgr/balancer/begin_time 0000
+
+Set the time of day to finish automatic balancing in HHMM format::
+
+  ceph config set mgr mgr/balancer/end_time 2400
+
+Restrict automatic balancing to this day of the week or later. 
+Uses the same conventions as crontab, 0 or 7 is Sunday, 1 is Monday, and so on::
+
+  ceph config set mgr mgr/balancer/begin_weekday 0
+
+Restrict automatic balancing to this day of the week or earlier. 
+Uses the same conventions as crontab, 0 or 7 is Sunday, 1 is Monday, and so on::
+
+  ceph config set mgr mgr/balancer/end_weekday 7
+
+Pool IDs to which the automatic balancing will be limited. 
+The default for this is an empty string, meaning all pools will be balanced. 
+The numeric pool IDs can be gotten with the :command:`ceph osd pool ls detail` command::
+
+  ceph config set mgr mgr/balancer/pool_ids 1,2,3


 Modes
@ -136,3 +164,4 @@ The quality of the distribution that would result after executing a plan can be
 Assuming the plan is expected to improve the distribution (i.e., it has a lower score than the current cluster state), the user can execute that plan with::

  ceph balancer execute <plan-name>
+
--- a/ceph/doc/rados/operations/monitoring.rst
+++ b/ceph/doc/rados/operations/monitoring.rst
@ -410,10 +410,9 @@ on the number of replicas, clones and snapshots.
  to this pool.
 - **QUOTA OBJECTS:** The number of quota objects.
 - **QUOTA BYTES:** The number of bytes in the quota objects.
- **DIRTY:** "DIRTY" is meaningful only when cache tiering is in use. If cache
-  tiering is in use, the "DIRTY" column lists the number of objects in the
-  cache pool that have been written to the cache pool but have not flushed yet
-  to the base pool.
+- **DIRTY:** The number of objects in the cache pool that have been written to
+  the cache pool but have not been flushed yet to the base pool. This field is
+  only available when cache tiering is in use.
 - **USED COMPR:** amount of space allocated for compressed data (i.e. this
  includes comrpessed data plus all the allocation, replication and erasure
  coding overhead).
--- a/ceph/doc/rados/operations/placement-groups.rst
+++ b/ceph/doc/rados/operations/placement-groups.rst
@ -41,10 +41,10 @@ the PG count with this command::

 Output will be something like::

-   POOL    SIZE  TARGET SIZE  RATE  RAW CAPACITY   RATIO  TARGET RATIO  EFFECTIVE RATIO PG_NUM  NEW PG_NUM  AUTOSCALE
-   a     12900M                3.0        82431M  0.4695                                     8         128  warn
-   c         0                 3.0        82431M  0.0000        0.2000           0.9884      1          64  warn
-   b         0        953.6M   3.0        82431M  0.0347                                     8              warn
+   POOL    SIZE  TARGET SIZE  RATE  RAW CAPACITY   RATIO  TARGET RATIO  EFFECTIVE RATIO BIAS PG_NUM  NEW PG_NUM  AUTOSCALE PROFILE 
+   a     12900M                3.0        82431M  0.4695                                          8         128  warn      scale-up
+   c         0                 3.0        82431M  0.0000        0.2000           0.9884  1.0      1          64  warn      scale-down
+   b         0        953.6M   3.0        82431M  0.0347                                          8              warn      scale-down

 **SIZE** is the amount of data stored in the pool. **TARGET SIZE**, if
 present, is the amount of data the administrator has specified that
@ -77,6 +77,10 @@ ratio takes precedence.
 The system uses the larger of the actual ratio and the effective ratio
 for its calculation.

+**BIAS** is used as a multiplier to manually adjust a pool's PG based
+on prior information about how much PGs a specific pool is expected
+to have.
+
 **PG_NUM** is the current number of PGs for the pool (or the current
 number of PGs that the pool is working towards, if a ``pg_num``
 change is in progress).  **NEW PG_NUM**, if present, is what the
@ -84,9 +88,13 @@ system believes the pool's ``pg_num`` should be changed to.  It is
 always a power of 2, and will only be present if the "ideal" value
 varies from the current value by more than a factor of 3.

-The final column, **AUTOSCALE**, is the pool ``pg_autoscale_mode``,
+**AUTOSCALE**, is the pool ``pg_autoscale_mode``
 and will be either ``on``, ``off``, or ``warn``.

+The final column, **PROFILE** shows the autoscale profile 
+used by each pool. ``scale-up`` and ``scale-down`` are the
+currently available profiles.
+

 Automated scaling
 -----------------
@ -113,6 +121,28 @@ example, a pool that maps to OSDs of class `ssd` and a pool that maps
 to OSDs of class `hdd` will each have optimal PG counts that depend on
 the number of those respective device types.

+The autoscaler uses the `scale-down` profile by default, 
+where each pool starts out with a full complements of PGs and only scales 
+down when the usage ratio across the pools is not even. However, it also has 
+a `scale-up` profile, where it starts out each pool with minimal PGs and scales
+up PGs when there is more usage in each pool.
+
+With only the `scale-down` profile, the autoscaler identifies
+any overlapping roots and prevents the pools with such roots
+from scaling because overlapping roots can cause problems
+with the scaling process.
+
+To use the `scale-up` profile::
+
+  ceph osd pool set autoscale-profile scale-up
+
+To switch back to the default `scale-down` profile::
+
+  ceph osd pool set autoscale-profile scale-down
+
+Existing clusters will continue to use the `scale-up` profile.
+To use the `scale-down` profile, users will need to set autoscale-profile `scale-down`,
+after upgrading to a version of Ceph that provides the `scale-down` feature.

 .. _specifying_pool_target_size:

--- a/ceph/doc/radosgw/frontends.rst
+++ b/ceph/doc/radosgw/frontends.rst
@ -64,6 +64,38 @@ Options
 :Type: String
 :Default: None

+``ssl_options``
+
+:Description: Optional colon separated list of ssl context options:
+
+              ``default_workarounds`` Implement various bug workarounds.
+
+              ``no_compression`` Disable compression.
+
+              ``no_sslv2`` Disable SSL v2.
+
+              ``no_sslv3`` Disable SSL v3.
+
+              ``no_tlsv1`` Disable TLS v1.
+
+              ``no_tlsv1_1`` Disable TLS v1.1.
+
+              ``no_tlsv1_2`` Disable TLS v1.2.
+
+              ``single_dh_use`` Always create a new key when using tmp_dh parameters.
+
+:Type: String
+:Default: ``no_sslv2:no_sslv3:no_tlsv1:no_tlsv1_1``
+
+``ssl_ciphers``
+
+:Description: Optional list of one or more cipher strings separated by colons.
+              The format of the string is described in openssl's ciphers(1)
+              manual.
+
+:Type: String
+:Default: None
+
 ``tcp_nodelay``

 :Description: If set the socket option will disable Nagle's algorithm on 
@ -100,6 +132,7 @@ Civetweb
 ========

 .. versionadded:: Firefly
+.. deprecated:: Pacific

 The ``civetweb`` frontend uses the Civetweb HTTP library, which is a
 fork of Mongoose.
--- a/ceph/doc/radosgw/vault.rst
+++ b/ceph/doc/radosgw/vault.rst
@ -400,6 +400,19 @@ Or, when using the transit secret engine::
 In the example above, the Gateway would only fetch transit encryption keys under
 ``https://vault-server:8200/v1/transit``.

+You can use custom ssl certs to authenticate with vault with help of
+following options::
+
+  rgw crypt vault verify ssl = true
+  rgw crypt vault ssl cacert = /etc/ceph/vault.ca
+  rgw crypt vault ssl clientcert = /etc/ceph/vault.crt
+  rgw crypt vault ssl clientkey = /etc/ceph/vault.key
+
+where vault.ca is CA certificate and vault.key/vault.crt are private key and ssl
+ceritificate generated for RGW to access the vault server. It highly recommended to
+set this option true, setting false is very dangerous and need to avoid since this
+runs in very secured enviroments.
+
 Transit engine compatibility support
 ------------------------------------
 The transit engine has compatibility support for previous
--- a/ceph/make-debs.sh
+++ b/ceph/make-debs.sh
@ -16,9 +16,9 @@
 #
 set -xe

+. /etc/os-release
 base=${1:-/tmp/release}
-codename=$(lsb_release -sc)
-releasedir=$base/$(lsb_release -si)/WORKDIR
+releasedir=$base/$NAME/WORKDIR
 rm -fr $(dirname $releasedir)
 mkdir -p $releasedir
 #
@ -60,7 +60,7 @@ dvers="$vers-1"
 cd ceph-$vers
 chvers=$(head -1 debian/changelog | perl -ne 's/.*\(//; s/\).*//; print')
 if [ "$chvers" != "$dvers" ]; then
-   DEBEMAIL="contact@ceph.com" dch -D $codename --force-distribution -b -v "$dvers" "new version"
+   DEBEMAIL="contact@ceph.com" dch -D $VERSION_CODENAME --force-distribution -b -v "$dvers" "new version"
 fi
 #
 # create the packages
@ -74,18 +74,18 @@ if test $NPROC -gt 1 ; then
 fi
 PATH=/usr/lib/ccache:$PATH dpkg-buildpackage $j -uc -us
 cd ../..
-mkdir -p $codename/conf
-cat > $codename/conf/distributions <<EOF
-Codename: $codename
+mkdir -p $VERSION_CODENAME/conf
+cat > $VERSION_CODENAME/conf/distributions <<EOF
+Codename: $VERSION_CODENAME
 Suite: stable
 Components: main
 Architectures: $(dpkg --print-architecture) source
 EOF
 if [ ! -e conf ]; then
-    ln -s $codename/conf conf
+    ln -s $VERSION_CODENAME/conf conf
 fi
-reprepro --basedir $(pwd) include $codename WORKDIR/*.changes
+reprepro --basedir $(pwd) include $VERSION_CODENAME WORKDIR/*.changes
 #
 # teuthology needs the version in the version file
 #
-echo $dvers > $codename/version
+echo $dvers > $VERSION_CODENAME/version
--- a/ceph/make-dist
+++ b/ceph/make-dist
@ -163,7 +163,7 @@ tar cvf $outfile.version.tar $outfile/src/.git_version $outfile/ceph.spec
 # at the three URLs referenced below (may involve uploading to download.ceph.com)
 boost_version=1.73.0
 download_boost $boost_version 4eb3b8d442b426dc35346235c8733b5ae35ba431690e38c6a8263dce9fcbb402 \
-               https://dl.bintray.com/boostorg/release/$boost_version/source \
+               https://boostorg.jfrog.io/artifactory/main/release/$boost_version/source \
               https://downloads.sourceforge.net/project/boost/boost/$boost_version \
               https://download.ceph.com/qa
 download_liburing 0.7 8e2842cfe947f3a443af301bdd6d034455536c38a455c7a700d0c1ad165a7543 \
--- a/ceph/monitoring/grafana/build/Makefile
+++ b/ceph/monitoring/grafana/build/Makefile
@ -1,33 +1,38 @@

-GRAFANA_VERSION := 6.7.4-1
-PIECHART_VERSION := "1.4.0"
-STATUS_PANEL_VERSION := "1.0.9"
-DASHBOARD_DIR := "monitoring/grafana/dashboards"
+GRAFANA_VERSION ?= 6.7.4-1
+PIECHART_VERSION ?= "1.4.0"
+STATUS_PANEL_VERSION ?= "1.0.9"
+DASHBOARD_DIR := "../dashboards"
 DASHBOARD_PROVISIONING := "ceph-dashboard.yml"
-IMAGE := "centos:8"
-VERSION := "${IMAGE: -1}"
+IMAGE := "docker.io/centos:8"
 PKGMGR := "dnf"
-# CONTAINER := $(shell buildah from ${IMAGE})
 GF_CONFIG := "/etc/grafana/grafana.ini"
-ceph_version := "master"
+# clip off "-<whatever> from the end of GRAFANA_VERSION
+CONTAINER_VERSION := $(shell /bin/echo $(GRAFANA_VERSION) | /bin/sed 's/-.*//')
+
+ARCH ?= x86_64
+ifeq "$(ARCH)" "arm64"
+	override ARCH := aarch64
+endif
+
+LOCALTAG=ceph-grafana:$(CONTAINER_VERSION)-$(ARCH)
+TAG=ceph/ceph-grafana:$(CONTAINER_VERSION)-$(ARCH)

 # Build a grafana instance - preconfigured for use within Ceph's dashboard UI

-build : fetch_dashboards
+build :
 	echo "Creating base container"
-	$(eval CONTAINER := $(shell buildah from ${IMAGE}))
+	$(eval CONTAINER := $(shell sudo buildah from ${IMAGE}))
 	# Using upstream grafana build
-	wget https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.x86_64.rpm
-	#wget localhost:8000/grafana-${GRAFANA_VERSION}.x86_64.rpm
-	#cp grafana-${GRAFANA_VERSION}.x86_64.rpm ${mountpoint}/tmp/.
-	buildah copy $(CONTAINER) grafana-${GRAFANA_VERSION}.x86_64.rpm /tmp/grafana-${GRAFANA_VERSION}.x86_64.rpm 
-	buildah run $(CONTAINER) ${PKGMGR} install -y --setopt install_weak_deps=false --setopt=tsflags=nodocs /tmp/grafana-${GRAFANA_VERSION}.x86_64.rpm
-	buildah run $(CONTAINER) ${PKGMGR} clean all
-	buildah run $(CONTAINER) rm -f /tmp/grafana*.rpm
-	buildah run $(CONTAINER) grafana-cli plugins install grafana-piechart-panel ${PIECHART_VERSION}
-	buildah run $(CONTAINER) grafana-cli plugins install vonage-status-panel ${STATUS_PANEL_VERSION}
-	buildah run $(CONTAINER) mkdir -p /etc/grafana/dashboards/ceph-dashboard
-	buildah copy $(CONTAINER) jsonfiles/*.json /etc/grafana/dashboards/ceph-dashboard
+	curl -fLO https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.${ARCH}.rpm
+	sudo buildah copy $(CONTAINER) grafana-${GRAFANA_VERSION}.${ARCH}.rpm /tmp/grafana-${GRAFANA_VERSION}.${ARCH}.rpm
+	sudo buildah run $(CONTAINER) ${PKGMGR} install -y --setopt install_weak_deps=false --setopt=tsflags=nodocs /tmp/grafana-${GRAFANA_VERSION}.${ARCH}.rpm
+	sudo buildah run $(CONTAINER) ${PKGMGR} clean all
+	sudo buildah run $(CONTAINER) rm -f /tmp/grafana*.rpm
+	sudo buildah run $(CONTAINER) grafana-cli plugins install grafana-piechart-panel ${PIECHART_VERSION}
+	sudo buildah run $(CONTAINER) grafana-cli plugins install vonage-status-panel ${STATUS_PANEL_VERSION}
+	sudo buildah run $(CONTAINER) mkdir -p /etc/grafana/dashboards/ceph-dashboard
+	sudo buildah copy $(CONTAINER) ${DASHBOARD_DIR}/*.json /etc/grafana/dashboards/ceph-dashboard

 	@/bin/echo -e "\
 apiVersion: 1 \\n\
@ -43,55 +48,49 @@ providers: \\n\
    path: '/etc/grafana/dashboards/ceph-dashboard'" >> ${DASHBOARD_PROVISIONING}


-	buildah copy $(CONTAINER) ${DASHBOARD_PROVISIONING} /etc/grafana/provisioning/dashboards/${DASHBOARD_PROVISIONING}
+	sudo buildah copy $(CONTAINER) ${DASHBOARD_PROVISIONING} /etc/grafana/provisioning/dashboards/${DASHBOARD_PROVISIONING}

 	# expose tcp/3000 for grafana
-	buildah config --port 3000 $(CONTAINER)
+	sudo buildah config --port 3000 $(CONTAINER)

 	# set working dir
-	buildah config --workingdir /usr/share/grafana $(CONTAINER)
+	sudo buildah config --workingdir /usr/share/grafana $(CONTAINER)

 	# set environment overrides from the default locations in /usr/share
-	buildah config --env GF_PATHS_LOGS="/var/log/grafana" $(CONTAINER)
-	buildah config --env GF_PATHS_PLUGINS="/var/lib/grafana/plugins" $(CONTAINER)
-	buildah config --env GF_PATHS_PROVISIONING="/etc/grafana/provisioning" $(CONTAINER)
-	buildah config --env GF_PATHS_DATA="/var/lib/grafana" $(CONTAINER)
+	sudo buildah config --env GF_PATHS_LOGS="/var/log/grafana" $(CONTAINER)
+	sudo buildah config --env GF_PATHS_PLUGINS="/var/lib/grafana/plugins" $(CONTAINER)
+	sudo buildah config --env GF_PATHS_PROVISIONING="/etc/grafana/provisioning" $(CONTAINER)
+	sudo buildah config --env GF_PATHS_DATA="/var/lib/grafana" $(CONTAINER)

 	# entrypoint
-	buildah config --entrypoint "grafana-server --config=${GF_CONFIG}" $(CONTAINER)
+	sudo buildah config --entrypoint "grafana-server --config=${GF_CONFIG}" $(CONTAINER)

 	# finalize
-	buildah config --label maintainer="Paul Cuzner <pcuzner@redhat.com>" $(CONTAINER)
-	buildah config --label description="Ceph Grafana Container" $(CONTAINER)
-	buildah config --label summary="Grafana Container configured for Ceph mgr/dashboard integration" $(CONTAINER)
-	buildah commit --format docker --squash $(CONTAINER) ceph-grafana:${ceph_version}
-	buildah tag ceph-grafana:${ceph_version} ceph/ceph-grafana:${ceph_version}
+	sudo buildah config --label maintainer="Paul Cuzner <pcuzner@redhat.com>" $(CONTAINER)
+	sudo buildah config --label description="Ceph Grafana Container" $(CONTAINER)
+	sudo buildah config --label summary="Grafana Container configured for Ceph mgr/dashboard integration" $(CONTAINER)
+	sudo buildah commit --format docker --squash $(CONTAINER) $(LOCALTAG)

+push:
+	# this transition-through-oci image is a workaround for
+	# https://github.com/containers/buildah/issues/3253 and
+	# can be removed when that is fixed and released.  The
+	# --format v2s2 on push is to convert oci back to docker format.
+	sudo podman push $(LOCALTAG) --format=oci dir://tmp/oci-image
+	sudo podman pull dir://tmp/oci-image
+	sudo rm -rf /tmp/oci-image
+	sudo podman tag localhost/tmp/oci-image docker.io/${TAG}
+	sudo podman tag localhost/tmp/oci-image quay.io/${TAG}
+	# sudo podman has issues with auth.json; just override it
+	sudo podman login --authfile=auth.json -u ${DOCKER_HUB_USERNAME} -p ${DOCKER_HUB_PASSWORD} docker.io
+	sudo podman login --authfile=auth.json -u $(CONTAINER_REPO_USERNAME) -p $(CONTAINER_REPO_PASSWORD) quay.io
+	sudo podman push --authfile=auth.json --format v2s2 docker.io/${TAG}
+	sudo podman push --authfile=auth.json --format v2s2 quay.io/${TAG}

-fetch_dashboards: clean
-	wget -O - https://api.github.com/repos/ceph/ceph/contents/${DASHBOARD_DIR}?ref=${ceph_version} | jq '.[].download_url' > dashboards
-
-	# drop quotes from the list and pick out only json files
-	sed -i 's/\"//g' dashboards
-	sed -i '/\.json/!d' dashboards
-	mkdir jsonfiles 
-	while read -r line; do \
-		wget "$$line" -P jsonfiles; \
-	done < dashboards
-
-clean :
-	rm -f dashboards
-	rm -fr jsonfiles
-	rm -f grafana-*.rpm*
+clean:
+	sudo podman rmi ${LOCALTAG} || true
+	sudo podman rmi docker.io/${TAG} || true
+	sudo podman rmi quay.io/${TAG} || true
+	sudo podman rmi localhost/tmp/oci-image || true
+	rm -f grafana-*.rpm* auth.json
 	rm -f ${DASHBOARD_PROVISIONING}
-
-
-nautilus : 
-	$(MAKE) ceph_version="nautilus" build
-octopus : 
-	$(MAKE) ceph_version="octopus" build
-master : 
-	$(MAKE) ceph_version="master" build
-
-all : nautilus octopus master
-.PHONY : all 
--- a/ceph/monitoring/grafana/dashboards/CMakeLists.txt
+++ b/ceph/monitoring/grafana/dashboards/CMakeLists.txt
@ -1,8 +1,34 @@
 set(CEPH_GRAFANA_DASHBOARDS_DIR "${CMAKE_INSTALL_SYSCONFDIR}/grafana/dashboards/ceph-dashboard"
  CACHE PATH "Location for grafana dashboards")
-
-FILE(GLOB CEPH_GRAFANA_DASHBOARDS "*.json")
-
+file(GLOB CEPH_GRAFANA_DASHBOARDS "*.json")
 install(FILES
  ${CEPH_GRAFANA_DASHBOARDS}
  DESTINATION ${CEPH_GRAFANA_DASHBOARDS_DIR})
+
+set(CEPH_BUILD_VIRTUALENV $ENV{TMPDIR})
+if(NOT CEPH_BUILD_VIRTUALENV)
+  set(CEPH_BUILD_VIRTUALENV ${CMAKE_BINARY_DIR})
+endif()
+
+if(WITH_GRAFANA)
+  include(AddCephTest)
+  add_tox_test(grafana TOX_ENVS grafonnet-check)
+  set(ver 0.1.0)
+  set(name grafonnet-lib)
+  include(ExternalProject)
+  ExternalProject_Add(${name}
+    URL https://github.com/grafana/${name}/archive/v${ver}/${name}-${ver}.tar.gz
+    URL_MD5 0798752ed40864fa8b3db40a3c970642
+    BUILD_COMMAND ""
+    CONFIGURE_COMMAND ""
+    INSTALL_COMMAND ""
+    EXCLUDE_FROM_ALL ON)
+  add_dependencies(tests
+    ${name})
+  ExternalProject_Get_Property(${name} SOURCE_DIR)
+  set_property(
+    TEST run-tox-grafana
+    APPEND
+    PROPERTY ENVIRONMENT
+    GRAFONNET_PATH=${SOURCE_DIR}/grafonnet)
+endif()
--- a/ceph/monitoring/grafana/dashboards/ceph-cluster.json
+++ b/ceph/monitoring/grafana/dashboards/ceph-cluster.json
@ -107,8 +107,9 @@
      "tableColumn": "",
      "targets": [
        {
-          "expr": "ceph_health_status{instance=~'$instance'}",
+          "expr": "ceph_health_status",
          "format": "time_series",
+          "instant": true,
          "interval": "$interval",
          "intervalFactor": 1,
          "refId": "A",
@ -174,7 +175,7 @@
          "displayAliasType": "Always",
          "displayType": "Regular",
          "displayValueWithAlias": "When Alias Displayed",
-          "expr": "count(ceph_osd_metadata{instance=~\"$instance\"})",
+          "expr": "count(ceph_osd_metadata)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "All",
@ -189,7 +190,7 @@
          "displayAliasType": "Always",
          "displayType": "Regular",
          "displayValueWithAlias": "When Alias Displayed",
-          "expr": "sum(ceph_osds_in{instance=~\"$instance\"})",
+          "expr": "sum(ceph_osds_in)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "In",
@ -204,7 +205,7 @@
          "displayAliasType": "Warning / Critical",
          "displayType": "Regular",
          "displayValueWithAlias": "When Alias Displayed",
-          "expr": "sum(ceph_osd_in{instance=~\"$instance\"} == bool 0)",
+          "expr": "sum(ceph_osd_in == bool 0)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
@ -221,7 +222,7 @@
          "displayAliasType": "Always",
          "displayType": "Regular",
          "displayValueWithAlias": "When Alias Displayed",
-          "expr": "sum(ceph_osd_up{instance=~\"$instance\"})",
+          "expr": "sum(ceph_osd_up)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Up",
@ -237,7 +238,7 @@
          "displayAliasType": "Warning / Critical",
          "displayType": "Regular",
          "displayValueWithAlias": "When Alias Displayed",
-          "expr": "sum(ceph_osd_up{instance=~\"$instance\"} == bool 0)",
+          "expr": "sum(ceph_osd_up == bool 0)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Down",
@ -263,7 +264,7 @@
      "decimals": 2,
      "format": "percentunit",
      "gauge": {
-        "maxValue": 100,
+        "maxValue": 1,
        "minValue": 0,
        "show": true,
        "thresholdLabels": false,
@ -312,14 +313,14 @@
      "tableColumn": "",
      "targets": [
        {
-          "expr": "sum(ceph_osd_stat_bytes_used{instance=~\"$instance\"})/sum(ceph_osd_stat_bytes{instance=~\"$instance\"})",
+          "expr": "sum(ceph_osd_stat_bytes_used)/sum(ceph_osd_stat_bytes)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Used",
          "refId": "A"
        }
      ],
-      "thresholds": "70,80",
+      "thresholds": "0.7,0.8",
      "title": "Capacity used",
      "type": "singlestat",
      "valueFontSize": "80%",
@ -530,28 +531,28 @@
      "steppedLine": false,
      "targets": [
        {
-          "expr": "quantile(0.95, ceph_osd_apply_latency_ms{instance=~\"$instance\"})",
+          "expr": "quantile(0.95, ceph_osd_apply_latency_ms)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Apply Latency P_95",
          "refId": "A"
        },
        {
-          "expr": "quantile(0.95, ceph_osd_commit_latency_ms{instance=~\"$instance\"})",
+          "expr": "quantile(0.95, ceph_osd_commit_latency_ms)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Commit Latency P_95",
          "refId": "B"
        },
        {
-          "expr": "avg(ceph_osd_apply_latency_ms{instance=~\"$instance\"})",
+          "expr": "avg(ceph_osd_apply_latency_ms)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Avg Apply Latency",
          "refId": "C"
        },
        {
-          "expr": "avg(ceph_osd_commit_latency_ms{instance=~\"$instance\"})",
+          "expr": "avg(ceph_osd_commit_latency_ms)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Avg Commit Latency",
@ -629,7 +630,7 @@
          "displayAliasType": "Always",
          "displayType": "Regular",
          "displayValueWithAlias": "When Alias Displayed",
-          "expr": "sum(ceph_mon_quorum_status{instance=~\"$instance\"})",
+          "expr": "sum(ceph_mon_quorum_status)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
@ -646,7 +647,7 @@
          "displayAliasType": "Always",
          "displayType": "Regular",
          "displayValueWithAlias": "When Alias Displayed",
-          "expr": "count(ceph_mon_quorum_status{instance=~\"$instance\"})",
+          "expr": "count(ceph_mon_quorum_status)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Total",
@ -663,7 +664,7 @@
          "displayAliasType": "Warning / Critical",
          "displayType": "Annotation",
          "displayValueWithAlias": "Never",
-          "expr": "count(ceph_mon_quorum_status{instance=~\"$instance\"}) / sum(ceph_mon_quorum_status{instance=~\"$instance\"})",
+          "expr": "count(ceph_mon_quorum_status) / sum(ceph_mon_quorum_status)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "MONs out of Quorum",
@ -710,7 +711,7 @@
          "displayAliasType": "Always",
          "displayType": "Regular",
          "displayValueWithAlias": "When Alias Displayed",
-          "expr": "ceph_mds_server_handle_client_session{instance=~\"$instance\"}",
+          "expr": "ceph_mds_server_handle_client_session",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Clients",
@ -764,14 +765,14 @@
      "steppedLine": false,
      "targets": [
        {
-          "expr": "sum(irate(ceph_osd_op_w_in_bytes{instance=~\"$instance\"}[1m]))",
+          "expr": "sum(irate(ceph_osd_op_w_in_bytes[1m]))",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Writes",
          "refId": "A"
        },
        {
-          "expr": "sum(irate(ceph_osd_op_r_out_bytes{instance=~\"$instance\"}[1m]))",
+          "expr": "sum(irate(ceph_osd_op_r_out_bytes[1m]))",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Reads",
@ -851,7 +852,7 @@
      "steppedLine": false,
      "targets": [
        {
-          "expr": "sum(deriv(ceph_pool_stored{instance=~\"$instance\"}[1m]))",
+          "expr": "sum(deriv(ceph_pool_stored[1m]))",
          "format": "time_series",
          "intervalFactor": 1,
          "refId": "A"
@ -924,7 +925,7 @@
      "span": 12,
      "targets": [
        {
-          "expr": "ceph_osd_stat_bytes_used{instance=~'$instance'} / ceph_osd_stat_bytes{instance=~'$instance'}",
+          "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes",
          "format": "time_series",
          "interval": "1m",
          "intervalFactor": 1,
@ -946,7 +947,7 @@
      "xBucketNumber": null,
      "xBucketSize": "",
      "yAxis": {
-        "decimals": null,
+        "decimals": 2,
        "format": "percentunit",
        "logBase": 1,
        "max": null,
@ -986,7 +987,7 @@
      "links": [],
      "targets": [
        {
-          "expr": "ceph_osd_numpg{instance=~\"$instance\"}",
+          "expr": "ceph_osd_numpg",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "#PGs",
@ -1190,29 +1191,6 @@
        "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
        "refresh": 2,
        "type": "interval"
-      },
-      {
-        "allFormat": "glob",
-        "allValue": null,
-        "current": {},
-        "datasource": "$datasource",
-        "hide": 0,
-        "hideLabel": false,
-        "includeAll": true,
-        "label": "Exporter Instance",
-        "multi": false,
-        "multiFormat": "glob",
-        "name": "instance",
-        "options": [],
-        "query": "label_values(ceph_health_status, instance)",
-        "refresh": 1,
-        "regex": "",
-        "sort": 0,
-        "tagValuesQuery": "",
-        "tags": [],
-        "tagsQuery": "",
-        "type": "query",
-        "useTags": false
      }
    ]
  },
@ -1245,7 +1223,7 @@
      "30d"
    ]
  },
-  "timezone": "browser",
+  "timezone": "",
  "title": "Ceph - Cluster",
  "version": 13
    }
--- a/ceph/monitoring/grafana/dashboards/host-details.json
+++ b/ceph/monitoring/grafana/dashboards/host-details.json
@ -1208,7 +1208,7 @@
      "30d"
    ]
  },
-  "timezone": "browser",
+  "timezone": "",
  "title": "Host Details",
  "uid": "rtOg0AiWz",
  "version": 4
--- a/ceph/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet
+++ b/ceph/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet
@ -0,0 +1,54 @@
+local g = import 'grafana.libsonnet';
+
+local dashboardSchema(title, uid, time_from, refresh, schemaVersion, tags,timezone, timepicker) =
+  g.dashboard.new(title=title, uid=uid, time_from=time_from, refresh=refresh, schemaVersion=schemaVersion, tags=tags, timezone=timezone, timepicker=timepicker);
+
+local graphPanelSchema(title, nullPointMode, stack, formatY1, formatY2, labelY1, labelY2, min, fill, datasource) =
+  g.graphPanel.new(title=title, nullPointMode=nullPointMode, stack=stack, formatY1=formatY1, formatY2=formatY2, labelY1=labelY1, labelY2=labelY2, min=min, fill=fill, datasource=datasource);
+
+local addTargetSchema(expr, intervalFactor, format, legendFormat) =
+  g.prometheus.target(expr=expr, intervalFactor=intervalFactor, format=format, legendFormat=legendFormat);
+
+local addTemplateSchema(name, datasource, query, refresh, hide, includeAll, sort) =
+  g.template.new(name=name, datasource=datasource, query=query, refresh=refresh, hide=hide, includeAll=includeAll, sort=sort);
+
+local addAnnotationSchema(builtIn, datasource, enable, hide, iconColor, name, type) =
+  g.annotation.datasource(builtIn=builtIn, datasource=datasource, enable=enable, hide=hide, iconColor=iconColor, name=name, type=type);
+
+{
+  "radosgw-sync-overview.json":
+    local RgwSyncOverviewPanel(title, formatY1, labelY1, rgwMetric, x, y, w, h) =
+      graphPanelSchema(title, 'null as zero', true, formatY1, 'short', labelY1, null, 0, 1, '$datasource')
+      .addTargets(
+        [addTargetSchema('sum by (source_zone) (rate(%s[30s]))' % rgwMetric, 1, 'time_series', '{{source_zone}}')]) + {gridPos: {x: x, y: y, w: w, h: h}};
+
+    dashboardSchema(
+      'RGW Sync Overview', 'rgw-sync-overview', 'now-1h', '15s', 16, ["overview"], '', {refresh_intervals:['5s','10s','15s','30s','1m','5m','15m','30m','1h','2h','1d'],time_options:['5m','15m','1h','6h','12h','24h','2d','7d','30d']}
+    )
+    .addAnnotation(
+      addAnnotationSchema(
+        1, '-- Grafana --', true, true, 'rgba(0, 211, 255, 1)', 'Annotations & Alerts', 'dashboard')
+    )
+    .addRequired(
+       type='grafana', id='grafana', name='Grafana', version='5.0.0'
+    )
+    .addRequired(
+       type='panel', id='graph', name='Graph', version='5.0.0'
+    )
+    .addTemplate(
+       addTemplateSchema('rgw_servers', '$datasource', 'prometehus', 1, 2, true, 1)
+    )
+    .addTemplate(
+       g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+    )
+    .addPanels([
+      RgwSyncOverviewPanel(
+        'Replication (throughput) from Source Zone', 'Bps', null, 'ceph_data_sync_from_zone_fetch_bytes_sum', 0, 0, 8, 7),
+      RgwSyncOverviewPanel(
+        'Replication (objects) from Source Zone', 'short', 'Objects/s', 'ceph_data_sync_from_zone_fetch_bytes_count', 8, 0, 8, 7),
+      RgwSyncOverviewPanel(
+        'Polling Request Latency from Source Zone', 'ms', null, 'ceph_data_sync_from_zone_poll_latency_sum', 16, 0, 8, 7),
+      RgwSyncOverviewPanel(
+        'Unsuccessful Object Replications from Source Zone', 'short', 'Count/s', 'ceph_data_sync_from_zone_fetch_errors', 0, 7, 8, 7)
+    ])
+}
--- a/ceph/monitoring/grafana/dashboards/osd-device-details.json
+++ b/ceph/monitoring/grafana/dashboards/osd-device-details.json
@ -423,7 +423,7 @@
      },
      "yaxes": [
        {
-          "format": "ms",
+          "format": "s",
          "label": "Read (-) / Write (+)",
          "logBase": 1,
          "max": null,
--- a/ceph/monitoring/grafana/dashboards/pool-detail.json
+++ b/ceph/monitoring/grafana/dashboards/pool-detail.json
@ -658,7 +658,7 @@
      "30d"
    ]
  },
-  "timezone": "browser",
+  "timezone": "",
  "title": "Ceph Pool Details",
  "uid": "-xyV8KCiz",
  "version": 1
--- a/ceph/monitoring/grafana/dashboards/pool-overview.json
+++ b/ceph/monitoring/grafana/dashboards/pool-overview.json
@ -1554,7 +1554,7 @@
      "30d"
    ]
  },
-  "timezone": "browser",
+  "timezone": "",
  "title": "Ceph Pools Overview",
  "uid": "z99hzWtmk",
  "variables": {
--- a/ceph/monitoring/grafana/dashboards/radosgw-sync-overview.json
+++ b/ceph/monitoring/grafana/dashboards/radosgw-sync-overview.json
@ -1,440 +1,455 @@
 {
-  "__requires": [
-    {
-      "type": "grafana",
-      "id": "grafana",
-      "name": "Grafana",
-      "version": "5.0.0"
-    },
-    {
-      "type": "panel",
-      "id": "graph",
-      "name": "Graph",
-      "version": "5.0.0"
-    }
-  ],
-  "annotations": {
-    "list": [
+   "__inputs": [ ],
+   "__requires": [
      {
-        "builtIn": 1,
-        "datasource": "-- Grafana --",
-        "enable": true,
-        "hide": true,
-        "iconColor": "rgba(0, 211, 255, 1)",
-        "name": "Annotations & Alerts",
-        "type": "dashboard"
+         "id": "grafana",
+         "name": "Grafana",
+         "type": "grafana",
+         "version": "5.0.0"
+      },
+      {
+         "id": "graph",
+         "name": "Graph",
+         "type": "panel",
+         "version": "5.0.0"
      }
-    ]
-  },
-  "editable": false,
-  "gnetId": null,
-  "graphTooltip": 0,
-  "id": null,
-  "iteration": 1534386107523,
-  "links": [],
-  "panels": [
-    {
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
-      "datasource": "$datasource",
-      "fill": 1,
-      "gridPos": {
-        "h": 7,
-        "w": 8,
-        "x": 0,
-        "y": 0
-      },
-      "id": 1,
-      "legend": {
-        "avg": false,
-        "current": false,
-        "max": false,
-        "min": false,
-        "show": true,
-        "total": false,
-        "values": false
-      },
-      "lines": true,
-      "linewidth": 1,
-      "links": [],
-      "nullPointMode": "null as zero",
-      "percentage": false,
-      "pointradius": 5,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": true,
-      "steppedLine": false,
-      "targets": [
-        {
-          "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum[30s]))",
-          "format": "time_series",
-          "intervalFactor": 1,
-          "legendFormat": "{{source_zone}}",
-          "refId": "A"
-        }
-      ],
-      "thresholds": [],
-      "timeFrom": null,
-      "timeShift": null,
-      "title": "Replication (throughput) from Source Zone",
-      "tooltip": {
-        "shared": true,
-        "sort": 0,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "unit": "bytes",
-          "format": "Bps",
-          "decimals": null,
-          "logBase": 1,
-          "max": null,
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
+   ],
+   "annotations": {
+      "list": [
+         {
+            "builtIn": 1,
+            "datasource": "-- Grafana --",
+            "enable": true,
+            "hide": true,
+            "iconColor": "rgba(0, 211, 255, 1)",
+            "name": "Annotations & Alerts",
+            "showIn": 0,
+            "tags": [ ],
+            "type": "dashboard"
+         }
      ]
-    },
-    {
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
-      "datasource": "$datasource",
-      "fill": 1,
-      "gridPos": {
-        "h": 7,
-        "w": 7.4,
-        "x": 8.3,
-        "y": 0
-      },
-      "id": 2,
-      "legend": {
-        "avg": false,
-        "current": false,
-        "max": false,
-        "min": false,
-        "show": true,
-        "total": false,
-        "values": false
-      },
-      "lines": true,
-      "linewidth": 1,
-      "links": [],
-      "nullPointMode": "null as zero",
-      "percentage": false,
-      "pointradius": 5,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": true,
-      "steppedLine": false,
-      "targets": [
-        {
-          "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count[30s]))",
-          "format": "time_series",
-          "intervalFactor": 1,
-          "legendFormat": "{{source_zone}}",
-          "refId": "A"
-        }
-      ],
-      "thresholds": [],
-      "timeFrom": null,
-      "timeShift": null,
-      "title": "Replication (objects) from Source Zone",
-      "tooltip": {
-        "shared": true,
-        "sort": 0,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "format": "short",
-          "decimals": null,
-          "label": "Objects/s",
-          "logBase": 1,
-          "max": null,
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
-      ]
-    },
-    {
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
-      "datasource": "$datasource",
-      "fill": 1,
-      "gridPos": {
-        "h": 7,
-        "w": 8,
-        "x": 16,
-        "y": 0
-      },
-      "id": 3,
-      "legend": {
-        "avg": false,
-        "current": false,
-        "max": false,
-        "min": false,
-        "show": true,
-        "total": false,
-        "values": false
-      },
-      "lines": true,
-      "linewidth": 1,
-      "links": [],
-      "nullPointMode": "null as zero",
-      "percentage": false,
-      "pointradius": 5,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": true,
-      "steppedLine": false,
-      "targets": [
-        {
-          "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum[30s]) * 1000)",
-          "format": "time_series",
-          "intervalFactor": 1,
-          "legendFormat": "{{source_zone}}",
-          "refId": "A"
-        }
-      ],
-      "thresholds": [],
-      "timeFrom": null,
-      "timeShift": null,
-      "title": "Polling Request Latency from Source Zone",
-      "tooltip": {
-        "shared": true,
-        "sort": 0,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "unit": "s",
-          "format": "ms",
-          "decimals": null,
-          "logBase": 1,
-          "max": null,
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
-      ]
-    },
-    {
-      "aliasColors": {},
-      "bars": false,
-      "dashLength": 10,
-      "dashes": false,
-      "datasource": "$datasource",
-      "fill": 1,
-      "gridPos": {
-        "h": 7,
-        "w": 8,
-        "x": 0,
-        "y": 7
-      },
-      "id": 4,
-      "legend": {
-        "avg": false,
-        "current": false,
-        "max": false,
-        "min": false,
-        "show": true,
-        "total": false,
-        "values": false
-      },
-      "lines": true,
-      "linewidth": 1,
-      "links": [],
-      "nullPointMode": "null as zero",
-      "percentage": false,
-      "pointradius": 5,
-      "points": false,
-      "renderer": "flot",
-      "seriesOverrides": [],
-      "spaceLength": 10,
-      "stack": true,
-      "steppedLine": false,
-      "targets": [
-        {
-          "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors[30s]))",
-          "format": "time_series",
-          "intervalFactor": 1,
-          "legendFormat": "{{source_zone}}",
-          "refId": "A"
-        }
-      ],
-      "thresholds": [],
-      "timeFrom": null,
-      "timeShift": null,
-      "title": "Unsuccessful Object Replications from Source Zone",
-      "tooltip": {
-        "shared": true,
-        "sort": 0,
-        "value_type": "individual"
-      },
-      "type": "graph",
-      "xaxis": {
-        "buckets": null,
-        "mode": "time",
-        "name": null,
-        "show": true,
-        "values": []
-      },
-      "yaxes": [
-        {
-          "format": "short",
-          "decimals": null,
-          "label": "Count/s",
-          "logBase": 1,
-          "max": null,
-          "min": "0",
-          "show": true
-        },
-        {
-          "format": "short",
-          "label": null,
-          "logBase": 1,
-          "max": null,
-          "min": null,
-          "show": false
-        }
-      ]
-    }
-  ],
-  "refresh": "15s",
-  "schemaVersion": 16,
-  "style": "dark",
-  "tags": [
-    "overview"
-  ],
-  "templating": {
-    "list": [
+   },
+   "editable": false,
+   "gnetId": null,
+   "graphTooltip": 0,
+   "hideControls": false,
+   "id": null,
+   "links": [ ],
+   "panels": [
      {
-        "allValue": null,
-        "current": {},
-        "datasource": "$datasource",
-        "hide": 2,
-        "includeAll": true,
-        "label": null,
-        "multi": false,
-        "name": "rgw_servers",
-        "options": [],
-        "query": "prometheus",
-        "refresh": 1,
-        "regex": "",
-        "sort": 1,
-        "tagValuesQuery": "",
-        "tags": [],
-        "tagsQuery": "",
-        "type": "query",
-        "useTags": false
+         "aliasColors": { },
+         "bars": false,
+         "dashLength": 10,
+         "dashes": false,
+         "datasource": "$datasource",
+         "fill": 1,
+         "gridPos": {
+            "h": 7,
+            "w": 8,
+            "x": 0,
+            "y": 0
+         },
+         "id": 2,
+         "legend": {
+            "alignAsTable": false,
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "rightSide": false,
+            "show": true,
+            "sideWidth": null,
+            "total": false,
+            "values": false
+         },
+         "lines": true,
+         "linewidth": 1,
+         "links": [ ],
+         "nullPointMode": "null as zero",
+         "percentage": false,
+         "pointradius": 5,
+         "points": false,
+         "renderer": "flot",
+         "repeat": null,
+         "seriesOverrides": [ ],
+         "spaceLength": 10,
+         "stack": true,
+         "steppedLine": false,
+         "targets": [
+            {
+               "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum[30s]))",
+               "format": "time_series",
+               "intervalFactor": 1,
+               "legendFormat": "{{source_zone}}",
+               "refId": "A"
+            }
+         ],
+         "thresholds": [ ],
+         "timeFrom": null,
+         "timeShift": null,
+         "title": "Replication (throughput) from Source Zone",
+         "tooltip": {
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+         },
+         "type": "graph",
+         "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": [ ]
+         },
+         "yaxes": [
+            {
+               "format": "Bps",
+               "label": null,
+               "logBase": 1,
+               "max": null,
+               "min": 0,
+               "show": true
+            },
+            {
+               "format": "short",
+               "label": null,
+               "logBase": 1,
+               "max": null,
+               "min": 0,
+               "show": true
+            }
+         ]
      },
      {
-        "current": {
-        "tags": [],
-        "text": "default",
-        "value": "default"
-        },
-        "hide": 0,
-        "label": "Data Source",
-        "name": "datasource",
-        "options": [],
-        "query": "prometheus",
-        "refresh": 1,
-        "regex": "",
-        "type": "datasource"
+         "aliasColors": { },
+         "bars": false,
+         "dashLength": 10,
+         "dashes": false,
+         "datasource": "$datasource",
+         "fill": 1,
+         "gridPos": {
+            "h": 7,
+            "w": 8,
+            "x": 8,
+            "y": 0
+         },
+         "id": 3,
+         "legend": {
+            "alignAsTable": false,
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "rightSide": false,
+            "show": true,
+            "sideWidth": null,
+            "total": false,
+            "values": false
+         },
+         "lines": true,
+         "linewidth": 1,
+         "links": [ ],
+         "nullPointMode": "null as zero",
+         "percentage": false,
+         "pointradius": 5,
+         "points": false,
+         "renderer": "flot",
+         "repeat": null,
+         "seriesOverrides": [ ],
+         "spaceLength": 10,
+         "stack": true,
+         "steppedLine": false,
+         "targets": [
+            {
+               "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count[30s]))",
+               "format": "time_series",
+               "intervalFactor": 1,
+               "legendFormat": "{{source_zone}}",
+               "refId": "A"
+            }
+         ],
+         "thresholds": [ ],
+         "timeFrom": null,
+         "timeShift": null,
+         "title": "Replication (objects) from Source Zone",
+         "tooltip": {
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+         },
+         "type": "graph",
+         "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": [ ]
+         },
+         "yaxes": [
+            {
+               "format": "short",
+               "label": "Objects/s",
+               "logBase": 1,
+               "max": null,
+               "min": 0,
+               "show": true
+            },
+            {
+               "format": "short",
+               "label": null,
+               "logBase": 1,
+               "max": null,
+               "min": 0,
+               "show": true
+            }
+         ]
+      },
+      {
+         "aliasColors": { },
+         "bars": false,
+         "dashLength": 10,
+         "dashes": false,
+         "datasource": "$datasource",
+         "fill": 1,
+         "gridPos": {
+            "h": 7,
+            "w": 8,
+            "x": 16,
+            "y": 0
+         },
+         "id": 4,
+         "legend": {
+            "alignAsTable": false,
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "rightSide": false,
+            "show": true,
+            "sideWidth": null,
+            "total": false,
+            "values": false
+         },
+         "lines": true,
+         "linewidth": 1,
+         "links": [ ],
+         "nullPointMode": "null as zero",
+         "percentage": false,
+         "pointradius": 5,
+         "points": false,
+         "renderer": "flot",
+         "repeat": null,
+         "seriesOverrides": [ ],
+         "spaceLength": 10,
+         "stack": true,
+         "steppedLine": false,
+         "targets": [
+            {
+               "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum[30s]))",
+               "format": "time_series",
+               "intervalFactor": 1,
+               "legendFormat": "{{source_zone}}",
+               "refId": "A"
+            }
+         ],
+         "thresholds": [ ],
+         "timeFrom": null,
+         "timeShift": null,
+         "title": "Polling Request Latency from Source Zone",
+         "tooltip": {
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+         },
+         "type": "graph",
+         "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": [ ]
+         },
+         "yaxes": [
+            {
+               "format": "ms",
+               "label": null,
+               "logBase": 1,
+               "max": null,
+               "min": 0,
+               "show": true
+            },
+            {
+               "format": "short",
+               "label": null,
+               "logBase": 1,
+               "max": null,
+               "min": 0,
+               "show": true
+            }
+         ]
+      },
+      {
+         "aliasColors": { },
+         "bars": false,
+         "dashLength": 10,
+         "dashes": false,
+         "datasource": "$datasource",
+         "fill": 1,
+         "gridPos": {
+            "h": 7,
+            "w": 8,
+            "x": 0,
+            "y": 7
+         },
+         "id": 5,
+         "legend": {
+            "alignAsTable": false,
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "rightSide": false,
+            "show": true,
+            "sideWidth": null,
+            "total": false,
+            "values": false
+         },
+         "lines": true,
+         "linewidth": 1,
+         "links": [ ],
+         "nullPointMode": "null as zero",
+         "percentage": false,
+         "pointradius": 5,
+         "points": false,
+         "renderer": "flot",
+         "repeat": null,
+         "seriesOverrides": [ ],
+         "spaceLength": 10,
+         "stack": true,
+         "steppedLine": false,
+         "targets": [
+            {
+               "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors[30s]))",
+               "format": "time_series",
+               "intervalFactor": 1,
+               "legendFormat": "{{source_zone}}",
+               "refId": "A"
+            }
+         ],
+         "thresholds": [ ],
+         "timeFrom": null,
+         "timeShift": null,
+         "title": "Unsuccessful Object Replications from Source Zone",
+         "tooltip": {
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+         },
+         "type": "graph",
+         "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": [ ]
+         },
+         "yaxes": [
+            {
+               "format": "short",
+               "label": "Count/s",
+               "logBase": 1,
+               "max": null,
+               "min": 0,
+               "show": true
+            },
+            {
+               "format": "short",
+               "label": null,
+               "logBase": 1,
+               "max": null,
+               "min": 0,
+               "show": true
+            }
+         ]
      }
-    ]
-  },
-  "time": {
-    "from": "now-1h",
-    "to": "now"
-  },
-  "timepicker": {
-    "refresh_intervals": [
-      "5s",
-      "10s",
-      "15s",
-      "30s",
-      "1m",
-      "5m",
-      "15m",
-      "30m",
-      "1h",
-      "2h",
-      "1d"
-    ],
-    "time_options": [
-      "5m",
-      "15m",
-      "1h",
-      "6h",
-      "12h",
-      "24h",
-      "2d",
-      "7d",
-      "30d"
-    ]
-  },
-  "timezone": "",
-  "title": "RGW Sync Overview",
-  "uid": "rgw-sync-overview",
-  "version": 2
+   ],
+   "refresh": "15s",
+   "rows": [ ],
+   "schemaVersion": 16,
+   "style": "dark",
+   "tags": [
+      "overview"
+   ],
+   "templating": {
+      "list": [
+         {
+            "allValue": null,
+            "current": { },
+            "datasource": "$datasource",
+            "hide": 2,
+            "includeAll": true,
+            "label": null,
+            "multi": false,
+            "name": "rgw_servers",
+            "options": [ ],
+            "query": "prometehus",
+            "refresh": 1,
+            "regex": "",
+            "sort": 1,
+            "tagValuesQuery": "",
+            "tags": [ ],
+            "tagsQuery": "",
+            "type": "query",
+            "useTags": false
+         },
+         {
+            "current": {
+               "text": "default",
+               "value": "default"
+            },
+            "hide": 0,
+            "label": "Data Source",
+            "name": "datasource",
+            "options": [ ],
+            "query": "prometheus",
+            "refresh": 1,
+            "regex": "",
+            "type": "datasource"
+         }
+      ]
+   },
+   "time": {
+      "from": "now-1h",
+      "to": "now"
+   },
+   "timepicker": {
+      "refresh_intervals": [
+         "5s",
+         "10s",
+         "15s",
+         "30s",
+         "1m",
+         "5m",
+         "15m",
+         "30m",
+         "1h",
+         "2h",
+         "1d"
+      ],
+      "time_options": [
+         "5m",
+         "15m",
+         "1h",
+         "6h",
+         "12h",
+         "24h",
+         "2d",
+         "7d",
+         "30d"
+      ]
+   },
+   "timezone": "",
+   "title": "RGW Sync Overview",
+   "uid": "rgw-sync-overview",
+   "version": 0
 }
--- a/ceph/monitoring/grafana/dashboards/requirements-grafonnet.txt
+++ b/ceph/monitoring/grafana/dashboards/requirements-grafonnet.txt
@ -0,0 +1 @@
+jsondiff
--- a/ceph/monitoring/grafana/dashboards/test-jsonnet.sh
+++ b/ceph/monitoring/grafana/dashboards/test-jsonnet.sh
@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+set -e
+TEMPDIR=`mktemp -d`
+BASEDIR=$(dirname "$0")
+
+JSONNET_PATH="${GRAFONNET_PATH}" jsonnet -m ${TEMPDIR} $BASEDIR/jsonnet/grafana_dashboards.jsonnet
+
+truncate -s 0 ${TEMPDIR}/json_difference.log
+for json_files in $BASEDIR/*.json
+do
+    JSON_FILE_NAME=$(basename $json_files)
+    for generated_files in ${TEMPDIR}/*.json
+    do
+        GENERATED_FILE_NAME=$(basename $generated_files)
+        if [ $JSON_FILE_NAME == $GENERATED_FILE_NAME ]; then
+            jsondiff --indent 2 $generated_files $json_files | tee -a ${TEMPDIR}/json_difference.log
+        fi
+    done
+done
+
+if [[ $(wc -l < ${TEMPDIR}/json_difference.log) -eq 0 ]]
+then
+    rm -rf ${TEMPDIR}
+    echo "Congratulations! Grafonnet Check Passed"
+else
+    rm -rf ${TEMPDIR}
+    echo "Grafonnet Check Failed, failed comparing generated file with existing"
+    exit 1
+fi
--- a/ceph/monitoring/grafana/dashboards/tox.ini
+++ b/ceph/monitoring/grafana/dashboards/tox.ini
@ -0,0 +1,22 @@
+[tox]
+envlist = grafonnet-{check,fix}
+skipsdist = true
+
+[grafonnet]
+deps =
+    -rrequirements-grafonnet.txt
+
+[testenv:grafonnet-{check,fix}]
+basepython = python3
+whitelist_externals =
+    jsonnet
+    bash
+description =
+    check: Ensure that auto-generated grafana dashboard files matches the current version
+    fix: generate dashboard json files from jsonnet file with latest changes
+deps =
+    {[grafonnet]deps}
+passenv = GRAFONNET_PATH
+commands =
+     check: bash test-jsonnet.sh
+     fix: jsonnet -m . jsonnet/grafana_dashboards.jsonnet
--- a/ceph/qa/distros/podman/centos_8.2_container_tools_3.0.yaml
+++ b/ceph/qa/distros/podman/centos_8.2_container_tools_3.0.yaml
@ -0,0 +1,14 @@
+os_type: centos
+os_version: "8.2"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+
+tasks:
+- pexec:
+    all:
+    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
+    - sudo dnf -y  module reset container-tools
+    - sudo dnf -y  module install container-tools:3.0
+    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
--- a/ceph/qa/distros/podman/centos_8.2_kubic_stable.yaml
+++ b/ceph/qa/distros/podman/centos_8.2_kubic_stable.yaml
@ -1,18 +0,0 @@
-os_type: centos
-os_version: "8.2"
-overrides:
-  selinux:
-    whitelist:
-      - scontext=system_u:system_r:logrotate_t:s0
-
-tasks:
- pexec:
-    all:
-    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
-    - sudo dnf -y module disable container-tools
-    - sudo dnf -y install 'dnf-command(copr)'
-    - sudo dnf -y copr enable rhcontainerbot/container-selinux
-    - sudo curl -L -o /etc/yum.repos.d/devel:kubic:libcontainers:stable.repo https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/CentOS_8/devel:kubic:libcontainers:stable.repo
-    - sudo dnf remove -y podman
-    - sudo dnf -y install podman
-    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
--- a/ceph/qa/distros/podman/centos_8.3_container_tools_3.0.yaml
+++ b/ceph/qa/distros/podman/centos_8.3_container_tools_3.0.yaml
@ -0,0 +1,14 @@
+os_type: centos
+os_version: "8.3"
+overrides:
+  selinux:
+    whitelist:
+      - scontext=system_u:system_r:logrotate_t:s0
+
+tasks:
+- pexec:
+    all:
+    - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
+    - sudo dnf -y  module reset container-tools
+    - sudo dnf -y  module install container-tools:3.0
+    - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
--- a/ceph/qa/suites/rgw/ignore-pg-availability.yaml
+++ b/ceph/qa/suites/rgw/ignore-pg-availability.yaml
--- a/ceph/qa/standalone/osd/osd-bluefs-volume-ops.sh
+++ b/ceph/qa/standalone/osd/osd-bluefs-volume-ops.sh
@ -8,16 +8,6 @@ function run() {
    local dir=$1
    shift

-    export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
-    export CEPH_ARGS
-    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
-    CEPH_ARGS+="--mon-host=$CEPH_MON "
-    CEPH_ARGS+="--bluestore_block_size=2147483648 "
-    CEPH_ARGS+="--bluestore_block_db_create=true "
-    CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
-    CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
-    CEPH_ARGS+="--bluestore_block_wal_create=true "
-    CEPH_ARGS+="--bluestore_fsck_on_mount=true "
    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
    for func in $funcs ; do
        setup $dir || return 1
@ -33,6 +23,16 @@ function TEST_bluestore() {
    if [ $flimit -lt 1536 ]; then
        echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
    fi
+    export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--bluestore_block_size=2147483648 "
+    CEPH_ARGS+="--bluestore_block_db_create=true "
+    CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+    CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
+    CEPH_ARGS+="--bluestore_block_wal_create=true "
+    CEPH_ARGS+="--bluestore_fsck_on_mount=true "

    run_mon $dir a || return 1
    run_mgr $dir x || return 1
@ -337,6 +337,63 @@ function TEST_bluestore() {
    wait_for_clean || return 1
 }

+function TEST_bluestore2() {
+    local dir=$1
+
+    local flimit=$(ulimit -n)
+    if [ $flimit -lt 1536 ]; then
+        echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
+    fi
+    export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--bluestore_block_size=4294967296 "
+    CEPH_ARGS+="--bluestore_block_db_create=true "
+    CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
+    CEPH_ARGS+="--bluestore_block_wal_create=false "
+    CEPH_ARGS+="--bluestore_fsck_on_mount=true "
+    CEPH_ARGS+="--osd_pool_default_size=1 "
+    CEPH_ARGS+="--osd_pool_default_min_size=1 "
+    CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd "
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    osd_pid0=$(cat $dir/osd.0.pid)
+
+    sleep 5
+    create_pool foo 16
+
+    # write some objects
+    timeout 60 rados bench -p foo 10 write --write-omap --no-cleanup #|| return 1
+
+    #give RocksDB some time to cooldown and put files to slow level(s)
+    sleep 10
+
+    spilled_over=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_used_bytes" )
+    test $spilled_over -gt 0 || return 1
+
+    while kill $osd_pid0; do sleep 1 ; done
+    ceph osd down 0
+
+    ceph-bluestore-tool --path $dir/0 \
+      --devs-source $dir/0/block.db \
+      --dev-target $dir/0/block \
+      --command bluefs-bdev-migrate || return 1
+
+    ceph-bluestore-tool --path $dir/0 \
+      --command bluefs-bdev-sizes || return 1
+
+    ceph-bluestore-tool --path $dir/0 \
+      --command fsck || return 1
+
+    activate_osd $dir 0 || return 1
+    osd_pid0=$(cat $dir/osd.0.pid)
+
+    wait_for_clean || return 1
+}
+
 main osd-bluefs-volume-ops "$@"

 # Local Variables:
--- a/ceph/qa/standalone/osd/osd-force-create-pg.sh
+++ b/ceph/qa/standalone/osd/osd-force-create-pg.sh
@ -12,14 +12,15 @@ function run() {

    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
    for func in $funcs ; do
+        setup $dir || return 1
        $func $dir || return 1
+        teardown $dir || return 1
    done
 }

 function TEST_reuse_id() {
    local dir=$1

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
--- a/ceph/qa/standalone/osd/osd-reuse-id.sh
+++ b/ceph/qa/standalone/osd/osd-reuse-id.sh
@ -27,14 +27,15 @@ function run() {

    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
    for func in $funcs ; do
+        setup $dir || return 1
        $func $dir || return 1
+        teardown $dir || return 1
    done
 }

 function TEST_reuse_id() {
    local dir=$1

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
--- a/ceph/qa/standalone/osd/pg-split-merge.sh
+++ b/ceph/qa/standalone/osd/pg-split-merge.sh
@ -12,14 +12,15 @@ function run() {

    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
    for func in $funcs ; do
+        setup $dir || return 1
        $func $dir || return 1
+        teardown $dir || return 1
    done
 }

 function TEST_a_merge_empty() {
    local dir=$1

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=3 || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
@ -87,7 +88,6 @@ function TEST_a_merge_empty() {
 function TEST_import_after_merge_and_gap() {
    local dir=$1

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
@ -162,7 +162,6 @@ function TEST_import_after_merge_and_gap() {
 function TEST_import_after_split() {
    local dir=$1

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
--- a/ceph/qa/standalone/scrub/osd-scrub-repair.sh
+++ b/ceph/qa/standalone/scrub/osd-scrub-repair.sh
@ -60,7 +60,9 @@ function run() {
    export -n CEPH_CLI_TEST_DUP_COMMAND
    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
    for func in $funcs ; do
+        setup $dir || return 1
        $func $dir || return 1
+        teardown $dir || return 1
    done
 }

@ -91,7 +93,6 @@ function TEST_corrupt_and_repair_replicated() {
    local dir=$1
    local poolname=rbd

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=2 || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
@ -103,8 +104,6 @@ function TEST_corrupt_and_repair_replicated() {
    corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
    # Reproduces http://tracker.ceph.com/issues/8914
    corrupt_and_repair_one $dir $poolname $(get_primary $poolname SOMETHING) || return 1
-
-    teardown $dir || return 1
 }

 #
@ -114,7 +113,6 @@ function TEST_allow_repair_during_recovery() {
    local dir=$1
    local poolname=rbd

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=2 || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 --osd_scrub_during_recovery=false \
@ -128,8 +126,6 @@ function TEST_allow_repair_during_recovery() {

    add_something $dir $poolname || return 1
    corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
-
-    teardown $dir || return 1
 }

 #
@ -139,7 +135,6 @@ function TEST_skip_non_repair_during_recovery() {
    local dir=$1
    local poolname=rbd

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=2 || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 --osd_scrub_during_recovery=false \
@ -153,8 +148,6 @@ function TEST_skip_non_repair_during_recovery() {

    add_something $dir $poolname || return 1
    scrub_and_not_schedule $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1
-
-    teardown $dir || return 1
 }

 function scrub_and_not_schedule() {
@ -276,7 +269,6 @@ function auto_repair_erasure_coded() {
    local poolname=ecpool

    # Launch a cluster with 5 seconds scrub interval
-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    local ceph_osd_args="--osd-scrub-auto-repair=true \
@ -285,11 +277,11 @@ function auto_repair_erasure_coded() {
            --osd-scrub-min-interval=5 \
            --osd-scrub-interval-randomize-ratio=0"
    for id in $(seq 0 2) ; do
-	if [ "$allow_overwrites" = "true" ]; then
+        if [ "$allow_overwrites" = "true" ]; then
            run_osd $dir $id $ceph_osd_args || return 1
-	else
+        else
            run_osd_filestore $dir $id $ceph_osd_args || return 1
-	fi
+        fi
    done
    create_rbd_pool || return 1
    wait_for_clean || return 1
@ -314,9 +306,6 @@ function auto_repair_erasure_coded() {
    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
    rados --pool $poolname get SOMETHING $dir/COPY || return 1
    diff $dir/ORIGINAL $dir/COPY || return 1
-
-    # Tear down
-    teardown $dir || return 1
 }

 function TEST_auto_repair_erasure_coded_appends() {
@ -329,16 +318,135 @@ function TEST_auto_repair_erasure_coded_overwrites() {
    fi
 }

+# initiate a scrub, then check for the (expected) 'scrubbing' and the
+# (not expected until an error was identified) 'repair'
+# Arguments: osd#, pg, sleep time
+function initiate_and_fetch_state() {
+    local the_osd="osd.$1"
+    local pgid=$2
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+
+    set_config "osd" "$1" "osd_scrub_sleep"  "$3"
+    set_config "osd" "$1" "osd_scrub_auto_repair" "true"
+
+    flush_pg_stats
+    date  --rfc-3339=ns
+
+    # note: must initiate a "regular" (periodic) deep scrub - not an operator-initiated one
+    env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) deep_scrub "$pgid"
+    env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid"
+
+    # wait for 'scrubbing' to appear
+    for ((i=0; i < 80; i++)); do
+
+        st=`ceph pg $pgid query --format json | jq '.state' `
+        echo $i ") state now: " $st
+
+        case "$st" in
+            *scrubbing*repair* ) echo "found scrub+repair"; return 1;; # PR #41258 should have prevented this
+            *scrubbing* ) echo "found scrub"; return 0;;
+            *inconsistent* ) echo "Got here too late. Scrub has already finished"; return 1;;
+            *recovery* ) echo "Got here too late. Scrub has already finished."; return 1;;
+            * ) echo $st;;
+        esac
+
+        if [ $((i % 10)) == 4 ]; then
+            echo "loop --------> " $i
+        fi
+    sleep 0.3
+    done
+
+    echo "Timeout waiting for deep-scrub of " $pgid " on " $the_osd " to start"
+    return 1
+}
+
+function wait_end_of_scrub() { # osd# pg
+    local the_osd="osd.$1"
+    local pgid=$2
+
+    for ((i=0; i < 40; i++)); do
+        st=`ceph pg $pgid query --format json | jq '.state' `
+        echo "wait-scrub-end state now: " $st
+        [[ $st =~ (.*scrubbing.*) ]] || break
+        if [ $((i % 5)) == 4 ] ; then
+            flush_pg_stats
+        fi
+        sleep 0.3
+    done
+
+    if [[ $st =~ (.*scrubbing.*) ]]
+    then
+        # a timeout
+        return 1
+    fi
+    return 0
+}
+
+
+function TEST_auto_repair_bluestore_tag() {
+    local dir=$1
+    local poolname=testpool
+
+    # Launch a cluster with 3 seconds scrub interval
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd-scrub-auto-repair=true \
+            --osd_deep_scrub_randomize_ratio=0 \
+            --osd-scrub-interval-randomize-ratio=0"
+    for id in $(seq 0 2) ; do
+        run_osd $dir $id $ceph_osd_args || return 1
+    done
+
+    create_pool $poolname 1 1 || return 1
+    ceph osd pool set $poolname size 2
+    wait_for_clean || return 1
+
+    # Put an object
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
+
+    # Remove the object from one shard physically
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
+
+    local pgid=$(get_pg $poolname SOMETHING)
+    local primary=$(get_primary $poolname SOMETHING)
+    echo "Affected PG " $pgid " w/ primary " $primary
+    local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
+    initiate_and_fetch_state $primary $pgid "3.0"
+    r=$?
+    echo "initiate_and_fetch_state ret: " $r
+    set_config "osd"  "$1"  "osd_scrub_sleep"  "0"
+    if [ $r -ne 0 ]; then
+        return 1
+    fi
+
+    wait_end_of_scrub "$primary" "$pgid" || return 1
+    ceph pg dump pgs
+
+    # Verify - the file should be back
+    # Restarted osd get $ceph_osd_args passed
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
+    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1
+    diff $dir/ORIGINAL $dir/COPY || return 1
+    grep scrub_finish $dir/osd.${primary}.log
+
+    # Tear down
+    teardown $dir || return 1
+}
+
+
 function TEST_auto_repair_bluestore_basic() {
    local dir=$1
    local poolname=testpool

    # Launch a cluster with 5 seconds scrub interval
-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    local ceph_osd_args="--osd-scrub-auto-repair=true \
-	    --osd_deep_scrub_randomize_ratio=0 \
+            --osd_deep_scrub_randomize_ratio=0 \
            --osd-scrub-interval-randomize-ratio=0"
    for id in $(seq 0 2) ; do
        run_osd $dir $id $ceph_osd_args || return 1
@ -373,9 +481,6 @@ function TEST_auto_repair_bluestore_basic() {
    objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1
    diff $dir/ORIGINAL $dir/COPY || return 1
    grep scrub_finish $dir/osd.${primary}.log
-
-    # Tear down
-    teardown $dir || return 1
 }

 function TEST_auto_repair_bluestore_scrub() {
@ -383,12 +488,12 @@ function TEST_auto_repair_bluestore_scrub() {
    local poolname=testpool

    # Launch a cluster with 5 seconds scrub interval
-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    local ceph_osd_args="--osd-scrub-auto-repair=true \
-	    --osd_deep_scrub_randomize_ratio=0 \
-            --osd-scrub-interval-randomize-ratio=0"
+            --osd_deep_scrub_randomize_ratio=0 \
+            --osd-scrub-interval-randomize-ratio=0 \
+            --osd-scrub-backoff-ratio=0"
    for id in $(seq 0 2) ; do
        run_osd $dir $id $ceph_osd_args || return 1
    done
@ -428,9 +533,6 @@ function TEST_auto_repair_bluestore_scrub() {
    # This should have caused 1 object to be repaired
    COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
    test "$COUNT" = "1" || return 1
-
-    # Tear down
-    teardown $dir || return 1
 }

 function TEST_auto_repair_bluestore_failed() {
@ -438,11 +540,10 @@ function TEST_auto_repair_bluestore_failed() {
    local poolname=testpool

    # Launch a cluster with 5 seconds scrub interval
-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    local ceph_osd_args="--osd-scrub-auto-repair=true \
-	    --osd_deep_scrub_randomize_ratio=0 \
+            --osd_deep_scrub_randomize_ratio=0 \
            --osd-scrub-interval-randomize-ratio=0"
    for id in $(seq 0 2) ; do
        run_osd $dir $id $ceph_osd_args || return 1
@ -498,9 +599,6 @@ function TEST_auto_repair_bluestore_failed() {
    ceph pg dump pgs
    ceph pg dump pgs | grep -q -e "^${pgid}.* active+clean " -e "^${pgid}.* active+clean+wait " || return 1
    grep scrub_finish $dir/osd.${primary}.log
-
-    # Tear down
-    teardown $dir || return 1
 }

 function TEST_auto_repair_bluestore_failed_norecov() {
@ -508,11 +606,10 @@ function TEST_auto_repair_bluestore_failed_norecov() {
    local poolname=testpool

    # Launch a cluster with 5 seconds scrub interval
-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    local ceph_osd_args="--osd-scrub-auto-repair=true \
-	    --osd_deep_scrub_randomize_ratio=0 \
+            --osd_deep_scrub_randomize_ratio=0 \
            --osd-scrub-interval-randomize-ratio=0"
    for id in $(seq 0 2) ; do
        run_osd $dir $id $ceph_osd_args || return 1
@ -552,9 +649,6 @@ function TEST_auto_repair_bluestore_failed_norecov() {
    grep -q "scrub_finish.*present with no repair possible" $dir/osd.${primary}.log || return 1
    ceph pg dump pgs
    ceph pg dump pgs | grep -q "^${pgid}.*+failed_repair" || return 1
-
-    # Tear down
-    teardown $dir || return 1
 }

 function TEST_repair_stats() {
@ -566,7 +660,6 @@ function TEST_repair_stats() {
    local REPAIRS=20

    # Launch a cluster with 5 seconds scrub interval
-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
@ -626,9 +719,6 @@ function TEST_repair_stats() {
    ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
    test "$COUNT" = "$REPAIRS" || return 1
-
-    # Tear down
-    teardown $dir || return 1
 }

 function TEST_repair_stats_ec() {
@ -641,7 +731,6 @@ function TEST_repair_stats_ec() {
    local allow_overwrites=false

    # Launch a cluster with 5 seconds scrub interval
-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
@ -704,9 +793,6 @@ function TEST_repair_stats_ec() {
    ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
    test "$COUNT" = "$REPAIRS" || return 1
-
-    # Tear down
-    teardown $dir || return 1
 }

 function corrupt_and_repair_jerasure() {
@ -714,7 +800,6 @@ function corrupt_and_repair_jerasure() {
    local allow_overwrites=$2
    local poolname=ecpool

-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    for id in $(seq 0 3) ; do
@ -729,8 +814,6 @@ function corrupt_and_repair_jerasure() {

    create_ec_pool $poolname $allow_overwrites k=2 m=2 || return 1
    corrupt_and_repair_erasure_coded $dir $poolname || return 1
-
-    teardown $dir || return 1
 }

 function TEST_corrupt_and_repair_jerasure_appends() {
@ -748,7 +831,6 @@ function corrupt_and_repair_lrc() {
    local allow_overwrites=$2
    local poolname=ecpool

-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    for id in $(seq 0 9) ; do
@ -763,8 +845,6 @@ function corrupt_and_repair_lrc() {

    create_ec_pool $poolname $allow_overwrites k=4 m=2 l=3 plugin=lrc || return 1
    corrupt_and_repair_erasure_coded $dir $poolname || return 1
-
-    teardown $dir || return 1
 }

 function TEST_corrupt_and_repair_lrc_appends() {
@ -783,7 +863,6 @@ function unfound_erasure_coded() {
    local poolname=ecpool
    local payload=ABCDEF

-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    for id in $(seq 0 3) ; do
@ -831,8 +910,6 @@ function unfound_erasure_coded() {
    ceph -s|grep "4 up" || return 1
    ceph -s|grep "4 in" || return 1
    ceph -s|grep "1/1 objects unfound" || return 1
-
-    teardown $dir || return 1
 }

 function TEST_unfound_erasure_coded_appends() {
@ -853,7 +930,6 @@ function list_missing_erasure_coded() {
    local allow_overwrites=$2
    local poolname=ecpool

-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    for id in $(seq 0 2) ; do
@ -913,8 +989,6 @@ function list_missing_erasure_coded() {
        matches=$(ceph pg $pg list_unfound | egrep "MOBJ0|MOBJ1" | wc -l)
        [ $matches -eq 2 ] && break
    done
-
-    teardown $dir || return 1
 }

 function TEST_list_missing_erasure_coded_appends() {
@ -935,7 +1009,6 @@ function TEST_corrupt_scrub_replicated() {
    local poolname=csr_pool
    local total_objs=19

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=2 || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
@ -3530,7 +3603,6 @@ EOF
    fi

    ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
-    teardown $dir || return 1
 }


@ -3543,7 +3615,6 @@ function corrupt_scrub_erasure() {
    local poolname=ecpool
    local total_objs=7

-    setup $dir || return 1
    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    for id in $(seq 0 2) ; do
@ -5690,7 +5761,6 @@ EOF
    fi

    ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
-    teardown $dir || return 1
 }

 function TEST_corrupt_scrub_erasure_appends() {
@ -5711,7 +5781,6 @@ function TEST_periodic_scrub_replicated() {
    local poolname=psr_pool
    local objname=POBJ

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=2 || return 1
    run_mgr $dir x || return 1
    local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
@ -5803,7 +5872,6 @@ function TEST_scrub_warning() {
    local conf_overdue_seconds=$(calc $i7_days + $i1_day + \( $i7_days \* $overdue \) )
    local pool_overdue_seconds=$(calc $i14_days + $i1_day + \( $i14_days \* $overdue \) )

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
    run_mgr $dir x --mon_warn_pg_not_scrubbed_ratio=${overdue} --mon_warn_pg_not_deep_scrubbed_ratio=${overdue} || return 1
    run_osd $dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 || return 1
@ -5870,7 +5938,6 @@ function TEST_scrub_warning() {
      ceph health detail | grep "not deep-scrubbed since"
      return 1
    fi
-    return 0
 }

 #
@ -5881,7 +5948,6 @@ function TEST_corrupt_snapset_scrub_rep() {
    local poolname=csr_pool
    local total_objs=2

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=2 || return 1
    run_mgr $dir x || return 1
    run_osd $dir 0 || return 1
@ -6141,7 +6207,6 @@ EOF
    fi

    ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
-    teardown $dir || return 1
 }

 function TEST_request_scrub_priority() {
@ -6151,7 +6216,6 @@ function TEST_request_scrub_priority() {
    local OBJECTS=64
    local PGS=8

-    setup $dir || return 1
    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
    run_mgr $dir x || return 1
    local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
@ -6199,8 +6263,6 @@ function TEST_request_scrub_priority() {

    # Verify that the requested scrub ran first
    grep "log_channel.*scrub ok" $dir/osd.${primary}.log | grep -v purged_snaps | head -1 | sed 's/.*[[]DBG[]]//' | grep -q $pg || return 1
-
-    return 0
 }


--- a/ceph/qa/suites/fs/functional/tasks/mds-full.yaml
+++ b/ceph/qa/suites/fs/functional/tasks/mds-full.yaml
@ -12,6 +12,8 @@ overrides:
      - is full \(reached quota
      - POOL_FULL
      - POOL_BACKFILLFULL
+      - PG_RECOVERY_FULL
+      - PG_DEGRADED
    conf:
      mon:
        mon osd nearfull ratio: 0.6
--- a/ceph/qa/suites/fs/upgrade/nofs/%
+++ b/ceph/qa/suites/fs/upgrade/nofs/%
--- a/ceph/qa/suites/fs/upgrade/nofs/.qa
+++ b/ceph/qa/suites/fs/upgrade/nofs/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/fs/upgrade/nofs/README
+++ b/ceph/qa/suites/fs/upgrade/nofs/README
@ -0,0 +1,3 @@
+This test just verifies that upgrades work with no file system present. In
+particular, catch that MDSMonitor doesn't blow up somehow with version
+mismatches.
--- a/ceph/qa/suites/fs/upgrade/nofs/bluestore-bitmap.yaml
+++ b/ceph/qa/suites/fs/upgrade/nofs/bluestore-bitmap.yaml
@ -0,0 +1 @@
+.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
--- a/ceph/qa/suites/fs/upgrade/nofs/centos_latest.yaml
+++ b/ceph/qa/suites/fs/upgrade/nofs/centos_latest.yaml
@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
--- a/ceph/qa/suites/fs/upgrade/nofs/conf
+++ b/ceph/qa/suites/fs/upgrade/nofs/conf
@ -0,0 +1 @@
+.qa/cephfs/conf/
--- a/ceph/qa/suites/fs/upgrade/nofs/no-mds-cluster.yaml
+++ b/ceph/qa/suites/fs/upgrade/nofs/no-mds-cluster.yaml
@ -0,0 +1,6 @@
+roles:
+- [mon.a, mon.b, mon.c, mgr.x, mgr.y, osd.0, osd.1, osd.2, osd.3]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
--- a/ceph/qa/suites/fs/upgrade/nofs/overrides/%
+++ b/ceph/qa/suites/fs/upgrade/nofs/overrides/%
--- a/ceph/qa/suites/fs/upgrade/nofs/overrides/.qa
+++ b/ceph/qa/suites/fs/upgrade/nofs/overrides/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/fs/upgrade/nofs/overrides/pg-warn.yaml
+++ b/ceph/qa/suites/fs/upgrade/nofs/overrides/pg-warn.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon pg warn min per osd: 0
--- a/ceph/qa/suites/fs/upgrade/nofs/overrides/whitelist_health.yaml
+++ b/ceph/qa/suites/fs/upgrade/nofs/overrides/whitelist_health.yaml
@ -0,0 +1 @@
+.qa/cephfs/overrides/whitelist_health.yaml
--- a/ceph/qa/suites/fs/upgrade/nofs/overrides/whitelist_wrongly_marked_down.yaml
+++ b/ceph/qa/suites/fs/upgrade/nofs/overrides/whitelist_wrongly_marked_down.yaml
@ -0,0 +1 @@
+.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml
--- a/ceph/qa/suites/fs/upgrade/nofs/tasks/%
+++ b/ceph/qa/suites/fs/upgrade/nofs/tasks/%
--- a/ceph/qa/suites/fs/upgrade/nofs/tasks/.qa
+++ b/ceph/qa/suites/fs/upgrade/nofs/tasks/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/fs/upgrade/nofs/tasks/0-octopus.yaml
+++ b/ceph/qa/suites/fs/upgrade/nofs/tasks/0-octopus.yaml
@ -0,0 +1,38 @@
+meta:
+- desc: |
+   install ceph/octopus latest
+tasks:
+- install:
+    branch: octopus
+    exclude_packages:
+      - librados3
+      - ceph-mgr-dashboard
+      - ceph-mgr-diskprediction-local
+      - ceph-mgr-rook
+      - ceph-mgr-cephadm
+      - cephadm
+    extra_packages: ['librados2']
+- print: "**** done installing octopus"
+- ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(FS_
+      - \(MDS_
+      - \(OSD_
+      - \(MON_DOWN\)
+      - \(CACHE_POOL_
+      - \(POOL_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - Monitor daemon marked osd
+      - Behind on trimming
+      - Manager daemon
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms bind msgr2: false
+- exec:
+    osd.0:
+      - ceph osd set-require-min-compat-client octopus
+- print: "**** done ceph"
--- a/ceph/qa/suites/fs/upgrade/nofs/tasks/1-upgrade.yaml
+++ b/ceph/qa/suites/fs/upgrade/nofs/tasks/1-upgrade.yaml
@ -0,0 +1,45 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - scrub mismatch
+    - ScrubResult
+    - wrongly marked
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(SLOW_OPS\)
+    - overall HEALTH_
+    - \(MON_MSGR2_NOT_ENABLED\)
+    - slow request
+    conf:
+      global:
+        bluestore warn on legacy statfs: false
+        bluestore warn on no per pool omap: false
+      mon:
+        mon warn on osd down out interval zero: false
+
+tasks:
+- print: "*** upgrading, no cephfs present"
+- exec:
+    mon.a:
+    - ceph fs dump
+- install.upgrade:
+    mon.a:
+- print: "**** done install.upgrade"
+- ceph.restart:
+    daemons: [mon.*, mgr.*]
+    mon-health-to-clog: false
+    wait-for-healthy: false
+- ceph.healthy:
+- ceph.restart:
+    daemons: [osd.*]
+    wait-for-healthy: false
+    wait-for-osds-up: true
+- exec:
+    mon.a:
+    - ceph versions
+    - ceph osd dump -f json-pretty
+    - ceph fs dump
+    - ceph osd require-osd-release octopus
+    - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done
+    #- ceph osd set-require-min-compat-client octopus
+- ceph.healthy:
+- print: "**** done ceph.restart"
--- a/ceph/qa/suites/orch/cephadm/dashboard/0-distro/centos_8.2_container_tools_3.0.yaml
+++ b/ceph/qa/suites/orch/cephadm/dashboard/0-distro/centos_8.2_container_tools_3.0.yaml
@ -0,0 +1 @@
+.qa/distros/podman/centos_8.2_container_tools_3.0.yaml
--- a/ceph/qa/suites/orch/cephadm/dashboard/0-distro/centos_8.2_kubic_stable.yaml
+++ b/ceph/qa/suites/orch/cephadm/dashboard/0-distro/centos_8.2_kubic_stable.yaml
@ -1 +0,0 @@
-.qa/distros/podman/centos_8.2_kubic_stable.yaml
--- a/ceph/qa/suites/orch/cephadm/smoke/distro/centos_8.2_kubic_stable.yaml
+++ b/ceph/qa/suites/orch/cephadm/smoke/distro/centos_8.2_kubic_stable.yaml
@ -1 +0,0 @@
-.qa/distros/podman/centos_8.2_kubic_stable.yaml
--- a/ceph/qa/suites/orch/cephadm/smoke/distro/centos_8.3_container_tools_3.0.yaml
+++ b/ceph/qa/suites/orch/cephadm/smoke/distro/centos_8.3_container_tools_3.0.yaml
@ -0,0 +1 @@
+.qa/distros/podman/centos_8.3_container_tools_3.0.yaml
--- a/ceph/qa/suites/orch/cephadm/smoke/distro/rhel_8.3_kubic_stable.yaml
+++ b/ceph/qa/suites/orch/cephadm/smoke/distro/rhel_8.3_kubic_stable.yaml
@ -1 +0,0 @@
-.qa/distros/podman/rhel_8.3_kubic_stable.yaml
--- a/ceph/qa/suites/orch/cephadm/workunits/0-distro/centos_8.2_container_tools_3.0.yaml
+++ b/ceph/qa/suites/orch/cephadm/workunits/0-distro/centos_8.2_container_tools_3.0.yaml
@ -0,0 +1 @@
+.qa/distros/podman/centos_8.2_container_tools_3.0.yaml
--- a/ceph/qa/suites/orch/cephadm/workunits/0-distro/centos_8.2_kubic_stable.yaml
+++ b/ceph/qa/suites/orch/cephadm/workunits/0-distro/centos_8.2_kubic_stable.yaml
@ -1 +0,0 @@
-.qa/distros/podman/centos_8.2_kubic_stable.yaml
--- a/ceph/qa/suites/rados/dashboard/centos_8.2_container_tools_3.0.yaml
+++ b/ceph/qa/suites/rados/dashboard/centos_8.2_container_tools_3.0.yaml
@ -0,0 +1 @@
+.qa/distros/podman/centos_8.2_container_tools_3.0.yaml
--- a/ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml
+++ b/ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml
@ -57,3 +57,4 @@ tasks:
        - tasks.mgr.dashboard.test_summary
        - tasks.mgr.dashboard.test_telemetry
        - tasks.mgr.dashboard.test_user
+        - tasks.mgr.dashboard.test_motd
--- a/ceph/qa/suites/rgw/crypt/ignore-pg-availability.yaml
+++ b/ceph/qa/suites/rgw/crypt/ignore-pg-availability.yaml
@ -1 +1 @@
-../ignore-pg-availability.yaml
+.qa/rgw/ignore-pg-availability.yaml
--- a/ceph/qa/suites/rgw/multifs/ignore-pg-availability.yaml
+++ b/ceph/qa/suites/rgw/multifs/ignore-pg-availability.yaml
@ -1 +1 @@
-../ignore-pg-availability.yaml
+.qa/rgw/ignore-pg-availability.yaml
--- a/ceph/qa/suites/rgw/multisite/ignore-pg-availability.yaml
+++ b/ceph/qa/suites/rgw/multisite/ignore-pg-availability.yaml
@ -1 +1 @@
-../ignore-pg-availability.yaml
+.qa/rgw/ignore-pg-availability.yaml
--- a/ceph/qa/suites/rgw/multisite/overrides.yaml
+++ b/ceph/qa/suites/rgw/multisite/overrides.yaml
@ -14,5 +14,6 @@ overrides:
        rgw md log max shards: 4
        rgw data log num shards: 4
        rgw sync obj etag verify: true
+        rgw sync meta inject err probability: 0.1
  rgw:
    compression type: random
--- a/ceph/qa/suites/rgw/sts/.qa
+++ b/ceph/qa/suites/rgw/sts/.qa
@ -0,0 +1 @@
+../.qa
--- a/ceph/qa/suites/rgw/sts/ignore-pg-availability.yaml
+++ b/ceph/qa/suites/rgw/sts/ignore-pg-availability.yaml
@ -1 +1 @@
-../ignore-pg-availability.yaml
+.qa/rgw/ignore-pg-availability.yaml
--- a/ceph/qa/suites/rgw/verify/ignore-pg-availability.yaml
+++ b/ceph/qa/suites/rgw/verify/ignore-pg-availability.yaml
@ -1 +1 @@
-../ignore-pg-availability.yaml
+.qa/rgw/ignore-pg-availability.yaml
--- a/ceph/qa/suites/upgrade/pacific-p2p/pacific-p2p-parallel/point-to-point-upgrade.yaml
+++ b/ceph/qa/suites/upgrade/pacific-p2p/pacific-p2p-parallel/point-to-point-upgrade.yaml
@ -3,7 +3,7 @@ meta:
   Run ceph on two nodes, using one of them as a client,
   with a separate client-only node.
   Use xfs beneath the osds.
-   install ceph/pacific v16.2.2 and the v16.2.x point versions
+   install ceph/pacific v16.2.4 and the v16.2.x point versions
   run workload and upgrade-sequence in parallel
   (every point release should be tested)
   run workload and upgrade-sequence in parallel
@ -69,32 +69,32 @@ openstack:
    count: 3
    size: 30 # GB
 tasks:
- print: "****  done pacific v16.2.0 about to install"
+- print: "****  done pacific about to install v16.2.4 "
 - install:
-    tag: v16.2.2
+    tag: v16.2.4
    # line below can be removed its from jewel test
    #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev', 'librgw2']
- print: "**** done v16.2.2 install"
+- print: "**** done v16.2.4 install"
 - ceph:
   fs: xfs
   add_osds_to_crush: true
 - print: "**** done ceph xfs"
 - sequential:
   - workload
- print: "**** done workload v16.2.2"
+- print: "**** done workload v16.2.4"


-#######  upgrade to v16.2.3
+#######  upgrade to v16.2.5
 - install.upgrade:
    #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev']
    mon.a:
-      tag: v16.2.3
+      tag: v16.2.5
    mon.b:
-      tag: v16.2.3
+      tag: v16.2.5
 - parallel:
   - workload_pacific
   - upgrade-sequence_pacific
- print: "**** done parallel pacific v16.2.3"
+- print: "**** done parallel pacific v16.2.5"

 ####  upgrade to latest pacific
 - install.upgrade:
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml`
				`@ -0,0 +1 @@`
				`.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml`
				`@ -0,0 +1 @@`
				`.qa/distros/podman/centos_8.2_container_tools_3.0.yaml`
				`@ -1 +0,0 @@`
				`.qa/distros/podman/centos_8.2_kubic_stable.yaml`
				`@ -0,0 +1 @@`
				`.qa/distros/podman/centos_8.3_container_tools_3.0.yaml`
				`@ -1 +0,0 @@`
				`.qa/distros/podman/rhel_8.3_kubic_stable.yaml`