import ceph pacific 16.2.14 source

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2025-04-28 12:54:34 +00:00 · 2023-08-31 14:47:29 +02:00 · 2023-08-31 14:47:29 +02:00 · a2f5a7e755
commit a2f5a7e755
parent b81a1d7f97
409 changed files with 11252 additions and 9636 deletions
--- a/ceph/CMakeLists.txt
+++ b/ceph/CMakeLists.txt
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10.2)
 # remove cmake/modules/FindPython* once 3.12 is required

 project(ceph
-  VERSION 16.2.13
+  VERSION 16.2.14
  LANGUAGES CXX C ASM)

 foreach(policy
--- a/ceph/PendingReleaseNotes
+++ b/ceph/PendingReleaseNotes
@ -32,6 +32,17 @@
  in certain recovery scenarios, e.g., monitor database lost and rebuilt, and
  the restored file system is expected to have the same ID as before.

+>= 16.2.14
+----------
+
+* CEPHFS: After recovering a Ceph File System post following the disaster recovery
+  procedure, the recovered files under `lost+found` directory can now be deleted.
+
+* `ceph mgr dump` command now displays the name of the mgr module that
+  registered a RADOS client in the `name` field added to elements of the
+  `active_clients` array. Previously, only the address of a module's RADOS
+  client was shown in the `active_clients` array.
+
 >=16.2.12
 ---------

@ -62,6 +73,65 @@
  namespaces was added to RBD in Nautilus 14.2.0 and it has been possible to
  map and unmap images in namespaces using the `image-spec` syntax since then
  but the corresponding option available in most other commands was missing.
+* RGW: Compression is now supported for objects uploaded with Server-Side Encryption.
+  When both are enabled, compression is applied before encryption.
+* RGW: the "pubsub" functionality for storing bucket notifications inside Ceph
+  is removed. Together with it, the "pubsub" zone should not be used anymore.
+  The REST operations, as well as radosgw-admin commands for manipulating
+  subscriptions, as well as fetching and acking the notifications are removed 
+  as well.
+  In case that the endpoint to which the notifications are sent maybe down or 
+  disconnected, it is recommended to use persistent notifications to guarantee 
+  the delivery of the notifications. In case the system that consumes the 
+  notifications needs to pull them (instead of the notifications be pushed 
+  to it), an external message bus (e.g. rabbitmq, Kafka) should be used for 
+  that purpose.
+* RGW: The serialized format of notification and topics has changed, so that 
+  new/updated topics will be unreadable by old RGWs. We recommend completing 
+  the RGW upgrades before creating or modifying any notification topics.
+* RBD: Trailing newline in passphrase files (`<passphrase-file>` argument in
+  `rbd encryption format` command and `--encryption-passphrase-file` option
+  in other commands) is no longer stripped.
+* RBD: Support for layered client-side encryption is added.  Cloned images
+  can now be encrypted each with its own encryption format and passphrase,
+  potentially different from that of the parent image.  The efficient
+  copy-on-write semantics intrinsic to unformatted (regular) cloned images
+  are retained.
+* CEPHFS: Rename the `mds_max_retries_on_remount_failure` option to
+  `client_max_retries_on_remount_failure` and move it from mds.yaml.in to
+  mds-client.yaml.in because this option was only used by MDS client from its
+  birth.
+* The `perf dump` and `perf schema` commands are deprecated in favor of new
+  `counter dump` and `counter schema` commands. These new commands add support
+  for labeled perf counters and also emit existing unlabeled perf counters. Some
+  unlabeled perf counters became labeled in this release, with more to follow in
+  future releases; such converted perf counters are no longer emitted by the
+  `perf dump` and `perf schema` commands.
+* `ceph mgr dump` command now outputs `last_failure_osd_epoch` and
+  `active_clients` fields at the top level.  Previously, these fields were
+  output under `always_on_modules` field.
+* RBD: All rbd-mirror daemon perf counters became labeled and as such are now
+  emitted only by the new `counter dump` and `counter schema` commands.  As part
+  of the conversion, many also got renamed to better disambiguate journal-based
+  and snapshot-based mirroring.
+* RBD: list-watchers C++ API (`Image::list_watchers`) now clears the passed
+  `std::list` before potentially appending to it, aligning with the semantics
+  of the corresponding C API (`rbd_watchers_list`).
+* Telemetry: Users who are opted-in to telemetry can also opt-in to
+  participating in a leaderboard in the telemetry public
+  dashboards (https://telemetry-public.ceph.com/). Users can now also add a
+  description of the cluster to publicly appear in the leaderboard.
+  For more details, see:
+  https://docs.ceph.com/en/latest/mgr/telemetry/#leaderboard
+  See a sample report with `ceph telemetry preview`.
+  Opt-in to telemetry with `ceph telemetry on`.
+  Opt-in to the leaderboard with
+  `ceph config set mgr mgr/telemetry/leaderboard true`.
+  Add leaderboard description with:
+  `ceph config set mgr mgr/telemetry/leaderboard_description ‘Cluster description’`.
+* CEPHFS: After recovering a Ceph File System post following the disaster recovery
+  procedure, the recovered files under `lost+found` directory can now be deleted.
+* core: cache-tiering is now deprecated.

 >=16.2.8
 --------
--- a/ceph/ceph.spec
+++ b/ceph/ceph.spec
@ -135,7 +135,7 @@
 # main package definition
 #################################################################################
 Name:		ceph
-Version:	16.2.13
+Version:	16.2.14
 Release:	0%{?dist}
 %if 0%{?fedora} || 0%{?rhel}
 Epoch:		2
@ -151,7 +151,7 @@ License:	LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-
 Group:		System/Filesystems
 %endif
 URL:		http://ceph.com/
-Source0:	%{?_remote_tarball_prefix}ceph-16.2.13.tar.bz2
+Source0:	%{?_remote_tarball_prefix}ceph-16.2.14.tar.bz2
 %if 0%{?suse_version}
 # _insert_obs_source_lines_here
 ExclusiveArch:  x86_64 aarch64 ppc64le s390x
@ -1208,7 +1208,7 @@ This package provides Ceph default alerts for Prometheus.
 # common
 #################################################################################
 %prep
-%autosetup -p1 -n ceph-16.2.13
+%autosetup -p1 -n ceph-16.2.14

 %build
 # Disable lto on systems that do not support symver attribute
--- a/ceph/changelog.upstream
+++ b/ceph/changelog.upstream
@ -1,7 +1,13 @@
-ceph (16.2.13-1focal) focal; urgency=medium
+ceph (16.2.14-1focal) focal; urgency=medium


- -- Jenkins Build Slave User <jenkins-build@braggi17.front.sepia.ceph.com>  Mon, 08 May 2023 20:49:59 +0000
+ -- Jenkins Build Slave User <jenkins-build@braggi13.front.sepia.ceph.com>  Tue, 29 Aug 2023 16:38:35 +0000
+
+ceph (16.2.14-1) stable; urgency=medium
+
+  * New upstream release
+
+ -- Ceph Release Team <ceph-maintainers@ceph.io>  Tue, 29 Aug 2023 15:43:56 +0000

 ceph (16.2.13-1) stable; urgency=medium

--- a/ceph/debian/cephfs-mirror.install
+++ b/ceph/debian/cephfs-mirror.install
@ -1 +1,3 @@
+lib/systemd/system/cephfs-mirror*
 usr/bin/cephfs-mirror
+usr/share/man/man8/cephfs-mirror.8
--- a/ceph/doc/cephadm/operations.rst
+++ b/ceph/doc/cephadm/operations.rst
@ -43,17 +43,17 @@ monitor hosts as well as to the monitor daemons' stderr.
 Ceph daemon logs
 ================

-Logging to journald
-------------------
+Logging to stdout
+-----------------

-Ceph daemons traditionally write logs to ``/var/log/ceph``. Ceph daemons log to
-journald by default and Ceph logs are captured by the container runtime
-environment. They are accessible via ``journalctl``.
+Ceph daemons traditionally write logs to ``/var/log/ceph``. Ceph
+daemons log to stderr by default and Ceph logs are captured by the
+container runtime environment. By default, most systems send these
+logs to journald, which means that they are accessible via
+``journalctl``.

-.. note:: Prior to Quincy, ceph daemons logged to stderr.
-
-Example of logging to journald
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Example of logging to stdout 
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 For example, to view the logs for the daemon ``mon.foo`` for a cluster
 with ID ``5c5a50ae-272a-455d-99e9-32c6a013e694``, the command would be
@ -69,11 +69,11 @@ Logging to files
 ----------------

 You can also configure Ceph daemons to log to files instead of to
-journald if you prefer logs to appear in files (as they did in earlier,
+stderr if you prefer logs to appear in files (as they did in earlier,
 pre-cephadm, pre-Octopus versions of Ceph).  When Ceph logs to files,
 the logs appear in ``/var/log/ceph/<cluster-fsid>``. If you choose to
-configure Ceph to log to files instead of to journald, remember to
-configure Ceph so that it will not log to journald (the commands for
+configure Ceph to log to files instead of to stderr, remember to
+configure Ceph so that it will not log to stderr (the commands for
 this are covered below).

 Enabling logging to files
@ -86,10 +86,10 @@ To enable logging to files, run the following commands:
  ceph config set global log_to_file true
  ceph config set global mon_cluster_log_to_file true

-Disabling logging to journald
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Disabling logging to stderr
+~~~~~~~~~~~~~~~~~~~~~~~~~~~

-If you choose to log to files, we recommend disabling logging to journald or else
+If you choose to log to files, we recommend disabling logging to stderr or else
 everything will be logged twice. Run the following commands to disable logging
 to stderr:

@ -97,11 +97,6 @@ to stderr:

  ceph config set global log_to_stderr false
  ceph config set global mon_cluster_log_to_stderr false
-  ceph config set global log_to_journald false
-  ceph config set global mon_cluster_log_to_journald false
-
-.. note:: You can change the default by passing --log-to-file during
-   bootstrapping a new cluster.

 Modifying the log retention schedule
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/ceph/doc/cephadm/services/index.rst
+++ b/ceph/doc/cephadm/services/index.rst
@ -558,6 +558,7 @@ For example:
 Extra Entrypoint Arguments
 ==========================

+
 .. note::

  For arguments intended for the container runtime rather than the process inside
@ -577,6 +578,57 @@ the node-exporter service , one could apply a service spec like
  extra_entrypoint_args:
    - "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector2"

+Custom Config Files
+===================
+
+Cephadm supports specifying miscellaneous config files for daemons.
+To do so, users must provide both the content of the config file and the
+location within the daemon's container at which it should be mounted. After
+applying a YAML spec with custom config files specified and having cephadm
+redeploy the daemons for which the config files are specified, these files will
+be mounted within the daemon's container at the specified location.
+
+Example service spec:
+
+.. code-block:: yaml
+
+    service_type: grafana
+    service_name: grafana
+    custom_configs:
+      - mount_path: /etc/example.conf
+        content: |
+          setting1 = value1
+          setting2 = value2
+      - mount_path: /usr/share/grafana/example.cert
+        content: |
+          -----BEGIN PRIVATE KEY-----
+          V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt
+          ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15
+          IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
+          YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg
+          ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=
+          -----END PRIVATE KEY-----
+          -----BEGIN CERTIFICATE-----
+          V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4gTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFt
+          ZXQsIGNvbnNldGV0dXIgc2FkaXBzY2luZyBlbGl0ciwgc2VkIGRpYW0gbm9udW15
+          IGVpcm1vZCB0ZW1wb3IgaW52aWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu
+          YSBhbGlxdXlhbSBlcmF0LCBzZWQgZGlhbSB2b2x1cHR1YS4gQXQgdmVybyBlb3Mg
+          ZXQgYWNjdXNhbSBldCBqdXN0byBkdW8=
+          -----END CERTIFICATE-----
+
+To make these new config files actually get mounted within the
+containers for the daemons
+
+.. prompt:: bash
+
+  ceph orch redeploy <service-name>
+
+For example:
+
+.. prompt:: bash
+
+  ceph orch redeploy grafana
+
 .. _orch-rm:

 Removing a Service
--- a/ceph/doc/cephadm/services/monitoring.rst
+++ b/ceph/doc/cephadm/services/monitoring.rst
@ -299,13 +299,16 @@ and the metrics will not be visible in Prometheus.
 Setting up Prometheus
 -----------------------

-Setting Prometheus Retention Time
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Setting Prometheus Retention Size and Time
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-Cephadm provides the option to set the Prometheus TDSB retention time using
-a ``retention_time`` field in the Prometheus service spec. The value defaults
-to 15 days (15d). If you would like a different value, such as 1 year (1y) you
-can apply a service spec similar to:
+Cephadm can configure Prometheus TSDB retention by specifying ``retention_time``
+and ``retention_size`` values in the Prometheus service spec.
+The retention time value defaults to 15 days (15d). Users can set a different value/unit where
+supported units are: 'y', 'w', 'd', 'h', 'm' and 's'. The retention size value defaults
+to 0 (disabled). Supported units in this case are: 'B', 'KB', 'MB', 'GB', 'TB', 'PB' and 'EB'.
+
+In the following example spec we set the retention time to 1 year and the size to 1GB.

 .. code-block:: yaml

@ -314,6 +317,7 @@ can apply a service spec similar to:
      count: 1
    spec:
      retention_time: "1y"
+      retention_size: "1GB"

 .. note::

--- a/ceph/doc/cephadm/services/osd.rst
+++ b/ceph/doc/cephadm/services/osd.rst
@ -308,7 +308,7 @@ Replacing an OSD

 .. prompt:: bash #

-  orch osd rm <osd_id(s)> --replace [--force]
+  ceph orch osd rm <osd_id(s)> --replace [--force]

 Example:

--- a/ceph/doc/cephfs/cephfs-mirroring.rst
+++ b/ceph/doc/cephfs/cephfs-mirroring.rst
@ -14,6 +14,8 @@ Requirements

 The primary (local) and secondary (remote) Ceph clusters version should be Pacific or later.

+.. _cephfs_mirroring_creating_users:
+
 Creating Users
 --------------

@ -42,80 +44,155 @@ Mirror daemon should be spawned using `systemctl(1)` unit files::

  $ cephfs-mirror --id mirror --cluster site-a -f

-.. note:: User used here is `mirror` created in the `Creating Users` section.
+.. note:: The user specified here is `mirror`, the creation of which is
+   described in the :ref:`Creating Users<cephfs_mirroring_creating_users>`
+   section.
+
+Multiple ``cephfs-mirror`` daemons may be deployed for concurrent
+synchronization and high availability. Mirror daemons share the synchronization
+load using a simple ``M/N`` policy, where ``M`` is the number of directories
+and ``N`` is the number of ``cephfs-mirror`` daemons.
+
+When ``cephadm`` is used to manage a Ceph cluster, ``cephfs-mirror`` daemons can be
+deployed by running the following command:
+
+.. prompt:: bash $
+
+   ceph orch apply cephfs-mirror
+
+To deploy multiple mirror daemons, run a command of the following form:
+
+.. prompt:: bash $
+
+   ceph orch apply cephfs-mirror --placement=<placement-spec>
+
+For example, to deploy 3 `cephfs-mirror` daemons on different hosts, run a command of the following form:
+
+.. prompt:: bash $
+
+  $ ceph orch apply cephfs-mirror --placement="3 host1,host2,host3"

 Interface
 ---------

-`Mirroring` module (manager plugin) provides interfaces for managing directory snapshot
-mirroring. Manager interfaces are (mostly) wrappers around monitor commands for managing
-file system mirroring and is the recommended control interface.
+The `Mirroring` module (manager plugin) provides interfaces for managing
+directory snapshot mirroring. These are (mostly) wrappers around monitor
+commands for managing file system mirroring and is the recommended control
+interface.

 Mirroring Module
 ----------------

-The mirroring module is responsible for assigning directories to mirror daemons for
-synchronization. Multiple mirror daemons can be spawned to achieve concurrency in
-directory snapshot synchronization. When mirror daemons are spawned (or terminated)
-, the mirroring module discovers the modified set of mirror daemons and rebalances
-the directory assignment amongst the new set thus providing high-availability.
+The mirroring module is responsible for assigning directories to mirror daemons
+for synchronization. Multiple mirror daemons can be spawned to achieve
+concurrency in directory snapshot synchronization. When mirror daemons are
+spawned (or terminated), the mirroring module discovers the modified set of
+mirror daemons and rebalances directory assignments across the new set, thus
+providing high-availability.

-.. note:: Multiple mirror daemons is currently untested. Only a single mirror daemon
-          is recommended.
+.. note:: Deploying a single mirror daemon is recommended. Running multiple
+   daemons is untested.

-Mirroring module is disabled by default. To enable mirroring use::
+The mirroring module is disabled by default. To enable the mirroring module,
+run the following command:

-  $ ceph mgr module enable mirroring
+.. prompt:: bash $

-Mirroring module provides a family of commands to control mirroring of directory
-snapshots. To add or remove directories, mirroring needs to be enabled for a given
-file system. To enable mirroring use::
+   ceph mgr module enable mirroring

-  $ ceph fs snapshot mirror enable <fs_name>
+The mirroring module provides a family of commands that can be used to control
+the mirroring of directory snapshots. To add or remove directories, mirroring
+must be enabled for a given file system. To enable mirroring for a given file
+system, run a command of the following form:

-.. note:: Mirroring module commands use `fs snapshot mirror` prefix as compared to
-          the monitor commands which `fs mirror` prefix. Make sure to use module
-          commands.
+.. prompt:: bash $

-To disable mirroring, use::
+   ceph fs snapshot mirror enable <fs_name>

-  $ ceph fs snapshot mirror disable <fs_name>
+.. note:: "Mirroring module" commands are prefixed with ``fs snapshot mirror``.
+   This distinguishes them from "monitor commands", which are prefixed with ``fs
+   mirror``. Be sure (in this context) to use module commands.

-Once mirroring is enabled, add a peer to which directory snapshots are to be mirrored.
-Peers follow `<client>@<cluster>` specification and get assigned a unique-id (UUID)
-when added. See `Creating Users` section on how to create Ceph users for mirroring.
+To disable mirroring for a given file system, run a command of the following form:

-To add a peer use::
+.. prompt:: bash $

-  $ ceph fs snapshot mirror peer_add <fs_name> <remote_cluster_spec> [<remote_fs_name>] [<remote_mon_host>] [<cephx_key>]
+   ceph fs snapshot mirror disable <fs_name>

-`<remote_fs_name>` is optional, and defaults to `<fs_name>` (on the remote cluster).
+After mirroring is enabled, add a peer to which directory snapshots are to be
+mirrored. Peers are specified by the ``<client>@<cluster>`` format, which is
+referred to elsewhere in this document as the ``remote_cluster_spec``. Peers
+are assigned a unique-id (UUID) when added. See the :ref:`Creating
+Users<cephfs_mirroring_creating_users>` section for instructions that describe
+how to create Ceph users for mirroring.

-This requires the remote cluster ceph configuration and user keyring to be available in
-the primary cluster. See `Bootstrap Peers` section to avoid this. `peer_add` additionally
-supports passing the remote cluster monitor address and the user key. However, bootstrapping
-a peer is the recommended way to add a peer.
+To add a peer, run a command of the following form:
+
+.. prompt:: bash $
+
+   ceph fs snapshot mirror peer_add <fs_name> <remote_cluster_spec> [<remote_fs_name>] [<remote_mon_host>] [<cephx_key>]
+
+``<remote_cluster_spec>`` is of the format ``client.<id>@<cluster_name>``.
+
+``<remote_fs_name>`` is optional, and defaults to `<fs_name>` (on the remote
+cluster).
+
+For this command to succeed, the remote cluster's Ceph configuration and user
+keyring must be available in the primary cluster. For example, if a user named
+``client_mirror`` is created on the remote cluster which has ``rwps``
+permissions for the remote file system named ``remote_fs`` (see `Creating
+Users`) and the remote cluster is named ``remote_ceph`` (that is, the remote
+cluster configuration file is named ``remote_ceph.conf`` on the primary
+cluster), run the following command to add the remote filesystem as a peer to
+the primary filesystem ``primary_fs``:
+
+.. prompt:: bash $
+
+   ceph fs snapshot mirror peer_add primary_fs client.mirror_remote@remote_ceph remote_fs
+
+To avoid having to maintain the remote cluster configuration file and remote
+ceph user keyring in the primary cluster, users can bootstrap a peer (which
+stores the relevant remote cluster details in the monitor config store on the
+primary cluster). See the :ref:`Bootstrap
+Peers<cephfs_mirroring_bootstrap_peers>` section.
+
+The ``peer_add`` command supports passing the remote cluster monitor address
+and the user key. However, bootstrapping a peer is the recommended way to add a
+peer.

 .. note:: Only a single peer is supported right now.

-To remove a peer use::
+To remove a peer, run a command of the following form:

-  $ ceph fs snapshot mirror peer_remove <fs_name> <peer_uuid>
+.. prompt:: bash $

-To list file system mirror peers use::
+   ceph fs snapshot mirror peer_remove <fs_name> <peer_uuid>

-  $ ceph fs snapshot mirror peer_list <fs_name>
+To list file system mirror peers, run a command of the following form:

-To configure a directory for mirroring, use::
+.. prompt:: bash $

-  $ ceph fs snapshot mirror add <fs_name> <path>
+   ceph fs snapshot mirror peer_list <fs_name>

-To stop a mirroring directory snapshots use::
+To configure a directory for mirroring, run a command of the following form:

-  $ ceph fs snapshot mirror remove <fs_name> <path>
+.. prompt:: bash $

-Only absolute directory paths are allowed. Also, paths are normalized by the mirroring
-module, therfore, `/a/b/../b` is equivalent to `/a/b`.
+   ceph fs snapshot mirror add <fs_name> <path>
+
+To stop mirroring directory snapshots, run a command of the following form:
+
+.. prompt:: bash $
+
+   ceph fs snapshot mirror remove <fs_name> <path>
+
+Only absolute directory paths are allowed. 
+
+Paths are normalized by the mirroring module. This means that ``/a/b/../b`` is
+equivalent to ``/a/b``. Paths always start from the CephFS file-system root and
+not from the host system mount point.
+
+For example::

  $ mkdir -p /d0/d1/d2
  $ ceph fs snapshot mirror add cephfs /d0/d1/d2
@ -123,16 +200,19 @@ module, therfore, `/a/b/../b` is equivalent to `/a/b`.
  $ ceph fs snapshot mirror add cephfs /d0/d1/../d1/d2
  Error EEXIST: directory /d0/d1/d2 is already tracked

-Once a directory is added for mirroring, its subdirectory or ancestor directories are
-disallowed to be added for mirorring::
+After a directory is added for mirroring, the additional mirroring of
+subdirectories or ancestor directories is disallowed::

  $ ceph fs snapshot mirror add cephfs /d0/d1
  Error EINVAL: /d0/d1 is a ancestor of tracked path /d0/d1/d2
  $ ceph fs snapshot mirror add cephfs /d0/d1/d2/d3
  Error EINVAL: /d0/d1/d2/d3 is a subtree of tracked path /d0/d1/d2

-Commands to check directory mapping (to mirror daemons) and directory distribution are
-detailed in `Mirroring Status` section.
+The :ref:`Mirroring Status<cephfs_mirroring_mirroring_status>` section contains
+information about the commands for checking the directory mapping (to mirror
+daemons) and for checking the directory distribution. 
+
+.. _cephfs_mirroring_bootstrap_peers:

 Bootstrap Peers
 ---------------
@ -160,6 +240,9 @@ e.g.::

  $ ceph fs snapshot mirror peer_bootstrap import cephfs eyJmc2lkIjogIjBkZjE3MjE3LWRmY2QtNDAzMC05MDc5LTM2Nzk4NTVkNDJlZiIsICJmaWxlc3lzdGVtIjogImJhY2t1cF9mcyIsICJ1c2VyIjogImNsaWVudC5taXJyb3JfcGVlcl9ib290c3RyYXAiLCAic2l0ZV9uYW1lIjogInNpdGUtcmVtb3RlIiwgImtleSI6ICJBUUFhcDBCZ0xtRmpOeEFBVnNyZXozai9YYUV0T2UrbUJEZlJDZz09IiwgIm1vbl9ob3N0IjogIlt2MjoxOTIuMTY4LjAuNTo0MDkxOCx2MToxOTIuMTY4LjAuNTo0MDkxOV0ifQ==

+
+.. _cephfs_mirroring_mirroring_status:
+
 Mirroring Status
 ----------------

--- a/ceph/doc/cephfs/cephfs-top.rst
+++ b/ceph/doc/cephfs/cephfs-top.rst
@ -78,7 +78,15 @@ By default, `cephfs-top` connects to cluster name `ceph`. To use a non-default c

  $ cephfs-top -d <seconds>

-Interval should be greater or equal to 0.5 second. Fractional seconds are honoured.
+Refresh interval should be a positive integer.
+
+To dump the metrics to stdout without creating a curses display use::
+
+  $ cephfs-top --dump
+
+To dump the metrics of the given filesystem to stdout without creating a curses display use::
+
+  $ cephfs-top --dumpfs <fs_name>

 Interactive Commands
 --------------------
@ -104,3 +112,5 @@ The metrics display can be scrolled using the Arrow Keys, PgUp/PgDn, Home/End an
 Sample screenshot running `cephfs-top` with 2 filesystems:

 .. image:: cephfs-top.png
+
+.. note:: Minimum compatible python version for cephfs-top is 3.6.0. cephfs-top is supported on distros RHEL 8, Ubuntu 18.04, CentOS 8 and above.
--- a/ceph/doc/cephfs/disaster-recovery-experts.rst
+++ b/ceph/doc/cephfs/disaster-recovery-experts.rst
@ -149,8 +149,8 @@ errors.

 ::

-    cephfs-data-scan scan_extents <data pool>
-    cephfs-data-scan scan_inodes <data pool>
+    cephfs-data-scan scan_extents [<data pool> [<extra data pool> ...]]
+    cephfs-data-scan scan_inodes [<data pool>]
    cephfs-data-scan scan_links

 'scan_extents' and 'scan_inodes' commands may take a *very long* time
@ -166,22 +166,22 @@ The example below shows how to run 4 workers simultaneously:
 ::

    # Worker 0
-    cephfs-data-scan scan_extents --worker_n 0 --worker_m 4 <data pool>
+    cephfs-data-scan scan_extents --worker_n 0 --worker_m 4
    # Worker 1
-    cephfs-data-scan scan_extents --worker_n 1 --worker_m 4 <data pool>
+    cephfs-data-scan scan_extents --worker_n 1 --worker_m 4
    # Worker 2
-    cephfs-data-scan scan_extents --worker_n 2 --worker_m 4 <data pool>
+    cephfs-data-scan scan_extents --worker_n 2 --worker_m 4
    # Worker 3
-    cephfs-data-scan scan_extents --worker_n 3 --worker_m 4 <data pool>
+    cephfs-data-scan scan_extents --worker_n 3 --worker_m 4

    # Worker 0
-    cephfs-data-scan scan_inodes --worker_n 0 --worker_m 4 <data pool>
+    cephfs-data-scan scan_inodes --worker_n 0 --worker_m 4
    # Worker 1
-    cephfs-data-scan scan_inodes --worker_n 1 --worker_m 4 <data pool>
+    cephfs-data-scan scan_inodes --worker_n 1 --worker_m 4
    # Worker 2
-    cephfs-data-scan scan_inodes --worker_n 2 --worker_m 4 <data pool>
+    cephfs-data-scan scan_inodes --worker_n 2 --worker_m 4
    # Worker 3
-    cephfs-data-scan scan_inodes --worker_n 3 --worker_m 4 <data pool>
+    cephfs-data-scan scan_inodes --worker_n 3 --worker_m 4

 It is **important** to ensure that all workers have completed the
 scan_extents phase before any workers enter the scan_inodes phase.
@ -191,8 +191,13 @@ operation to delete ancillary data geneated during recovery.

 ::

-    cephfs-data-scan cleanup <data pool>
+    cephfs-data-scan cleanup [<data pool>]

+Note, the data pool parameters for 'scan_extents', 'scan_inodes' and
+'cleanup' commands are optional, and usually the tool will be able to
+detect the pools automatically. Still you may override this. The
+'scan_extents' command needs all data pools to be specified, while
+'scan_inodes' and 'cleanup' commands need only the main data pool.


 Using an alternate metadata pool for recovery
@ -250,8 +255,8 @@ Now perform the recovery of the metadata pool from the data pool:
 ::

    cephfs-data-scan init --force-init --filesystem cephfs_recovery --alternate-pool cephfs_recovery_meta
-    cephfs-data-scan scan_extents --alternate-pool cephfs_recovery_meta --filesystem <fs_name> <data_pool>
-    cephfs-data-scan scan_inodes --alternate-pool cephfs_recovery_meta --filesystem <fs_name> --force-corrupt <data_pool>
+    cephfs-data-scan scan_extents --alternate-pool cephfs_recovery_meta --filesystem <fs_name>
+    cephfs-data-scan scan_inodes --alternate-pool cephfs_recovery_meta --filesystem <fs_name> --force-corrupt
    cephfs-data-scan scan_links --filesystem cephfs_recovery

 .. note::
--- a/ceph/doc/cephfs/fs-volumes.rst
+++ b/ceph/doc/cephfs/fs-volumes.rst
@ -3,23 +3,22 @@
 FS volumes and subvolumes
 =========================

-The volumes
-module of the :term:`Ceph Manager` daemon (ceph-mgr) provides a single
-source of truth for CephFS exports. The OpenStack shared
-file system service (manila_) and Ceph Container Storage Interface (CSI_)
-storage administrators among others can use the common CLI provided by the
-ceph-mgr volumes module to manage CephFS exports.
+The volumes module of the :term:`Ceph Manager` daemon (ceph-mgr) provides a
+single source of truth for CephFS exports. The OpenStack shared file system
+service (manila_) and the Ceph Container Storage Interface (CSI_) storage
+administrators use the common CLI provided by the ceph-mgr ``volumes`` module
+to manage CephFS exports.

-The ceph-mgr volumes module implements the following file system export
-abstactions:
+The ceph-mgr ``volumes`` module implements the following file system export
+abstractions:

 * FS volumes, an abstraction for CephFS file systems

 * FS subvolumes, an abstraction for independent CephFS directory trees

 * FS subvolume groups, an abstraction for a directory level higher than FS
-  subvolumes to effect policies (e.g., :doc:`/cephfs/file-layouts`) across a
-  set of subvolumes
+  subvolumes. Used to effect policies (e.g., :doc:`/cephfs/file-layouts`)
+  across a set of subvolumes

 Some possible use-cases for the export abstractions:

@ -38,67 +37,76 @@ Requirements
    mon 'allow r'
    mgr 'allow rw'

-
 FS Volumes
 ----------

-Create a volume using::
+Create a volume by running the following command:

-    $ ceph fs volume create <vol_name> [<placement>]
+.. prompt:: bash $
+
+   ceph fs volume create <vol_name> [<placement>]

 This creates a CephFS file system and its data and metadata pools. It can also
-deploy MDS daemons for the filesystem using a ceph-mgr orchestrator
-module (see :doc:`/mgr/orchestrator`), for example Rook.
+deploy MDS daemons for the filesystem using a ceph-mgr orchestrator module (for
+example Rook). See :doc:`/mgr/orchestrator`.

-<vol_name> is the volume name (an arbitrary string), and
-<placement> is an optional string that designates the hosts that should have
-an MDS running on them and, optionally, the total number of MDS daemons the cluster
-should have. For example, the
-following placement string means "deploy MDS on nodes ``host1`` and ``host2`` (one
-MDS per host):
+``<vol_name>`` is the volume name (an arbitrary string). ``<placement>`` is an
+optional string that specifies the hosts that should have an MDS running on
+them and, optionally, the total number of MDS daemons that the cluster should
+have. For example, the following placement string means "deploy MDS on nodes
+``host1`` and ``host2`` (one MDS per host)::

    "host1,host2"

-and this placement specification says to deploy two MDS daemons on each of
-nodes ``host1`` and ``host2`` (for a total of four MDS daemons in the cluster):
+The following placement specification means "deploy two MDS daemons on each of
+nodes ``host1`` and ``host2`` (for a total of four MDS daemons in the
+cluster)"::

    "4 host1,host2"

-For more details on placement specification refer to the :ref:`orchestrator-cli-service-spec`,
-but keep in mind that specifying placement via a YAML file is not supported.
+See :ref:`orchestrator-cli-service-spec` for more on placement specification.
+Specifying placement via a YAML file is not supported.

-To remove a volume, run the following command::
+To remove a volume, run the following command:

-    $ ceph fs volume rm <vol_name> [--yes-i-really-mean-it]
+.. prompt:: bash $
+
+   ceph fs volume rm <vol_name> [--yes-i-really-mean-it]

 This removes a file system and its data and metadata pools. It also tries to
 remove MDS daemons using the enabled ceph-mgr orchestrator module.

-List volumes using::
+List volumes by running the following command:

-    $ ceph fs volume ls
+.. prompt:: bash $

-Rename a volume using::
+   ceph fs volume ls

-    $ ceph fs volume rename <vol_name> <new_vol_name> [--yes-i-really-mean-it]
+Rename a volume by running the following command:
+
+.. prompt:: bash $
+
+   ceph fs volume rename <vol_name> <new_vol_name> [--yes-i-really-mean-it]

 Renaming a volume can be an expensive operation that requires the following:

- Rename the orchestrator-managed MDS service to match the <new_vol_name>.
-  This involves launching a MDS service with <new_vol_name> and bringing down
-  the MDS service with <vol_name>.
- Rename the file system matching <vol_name> to <new_vol_name>
- Change the application tags on the data and metadata pools of the file system
-  to <new_vol_name>
- Rename the metadata and data pools of the file system.
+- Renaming the orchestrator-managed MDS service to match the <new_vol_name>.
+  This involves launching a MDS service with ``<new_vol_name>`` and bringing
+  down the MDS service with ``<vol_name>``.
+- Renaming the file system matching ``<vol_name>`` to ``<new_vol_name>``.
+- Changing the application tags on the data and metadata pools of the file system
+  to ``<new_vol_name>``.
+- Renaming the metadata and data pools of the file system.

-The CephX IDs authorized for <vol_name> need to be reauthorized for <new_vol_name>. Any
-on-going operations of the clients using these IDs may be disrupted. Mirroring is
-expected to be disabled on the volume.
+The CephX IDs that are authorized for ``<vol_name>`` must be reauthorized for
+``<new_vol_name>``. Any ongoing operations of the clients using these IDs may
+be disrupted. Ensure that mirroring is disabled on the volume.

-To fetch the information of a CephFS volume, run::
+To fetch the information of a CephFS volume, run the following command:

-    $ ceph fs volume info vol_name [--human_readable]
+.. prompt:: bash $
+
+   ceph fs volume info vol_name [--human_readable]

 The ``--human_readable`` flag shows used and available pool capacities in KB/MB/GB.

@ -142,9 +150,11 @@ Sample output of the ``volume info`` command::
 FS Subvolume groups
 -------------------

-Create a subvolume group using::
+Create a subvolume group by running the following command:

-    $ ceph fs subvolumegroup create <vol_name> <group_name> [--size <size_in_bytes>] [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>]
+.. prompt:: bash $
+
+   ceph fs subvolumegroup create <vol_name> <group_name> [--size <size_in_bytes>] [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>]

 The command succeeds even if the subvolume group already exists.

@ -152,32 +162,41 @@ When creating a subvolume group you can specify its data pool layout (see
 :doc:`/cephfs/file-layouts`), uid, gid, file mode in octal numerals, and
 size in bytes. The size of the subvolume group is specified by setting
 a quota on it (see :doc:`/cephfs/quota`). By default, the subvolume group
-is created with octal file mode '755', uid '0', gid '0' and the data pool
+is created with octal file mode ``755``, uid ``0``, gid ``0`` and the data pool
 layout of its parent directory.

+Remove a subvolume group by running a command of the following form:

-Remove a subvolume group using::
+.. prompt:: bash $

-    $ ceph fs subvolumegroup rm <vol_name> <group_name> [--force]
+   ceph fs subvolumegroup rm <vol_name> <group_name> [--force]

-The removal of a subvolume group fails if it is not empty or non-existent.
-'--force' flag allows the non-existent subvolume group remove command to succeed.
+The removal of a subvolume group fails if the subvolume group is not empty or
+is non-existent. The ``--force`` flag allows the non-existent "subvolume group
+remove command" to succeed.


-Fetch the absolute path of a subvolume group using::
+Fetch the absolute path of a subvolume group by running a command of the
+following form:

-    $ ceph fs subvolumegroup getpath <vol_name> <group_name>
+.. prompt:: bash $

-List subvolume groups using::
+   ceph fs subvolumegroup getpath <vol_name> <group_name>

-    $ ceph fs subvolumegroup ls <vol_name>
+List subvolume groups by running a command of the following form:
+
+.. prompt:: bash $
+   
+   ceph fs subvolumegroup ls <vol_name>

 .. note:: Subvolume group snapshot feature is no longer supported in mainline CephFS (existing group
          snapshots can still be listed and deleted)

-Fetch the metadata of a subvolume group using::
+Fetch the metadata of a subvolume group by running a command of the following form:

-    $ ceph fs subvolumegroup info <vol_name> <group_name>
+.. prompt:: bash $
+
+   ceph fs subvolumegroup info <vol_name> <group_name>

 The output format is JSON and contains fields as follows:

@ -194,62 +213,77 @@ The output format is JSON and contains fields as follows:
 * ``created_at``: creation time of the subvolume group in the format "YYYY-MM-DD HH:MM:SS"
 * ``data_pool``: data pool to which the subvolume group belongs

-Check the presence of any subvolume group using::
+Check the presence of any subvolume group by running a command of the following form:

-    $ ceph fs subvolumegroup exist <vol_name>
+.. prompt:: bash $

-The 'exist' command outputs:
+   ceph fs subvolumegroup exist <vol_name>
+
+The ``exist`` command outputs:

 * "subvolumegroup exists": if any subvolumegroup is present
 * "no subvolumegroup exists": if no subvolumegroup is present

-.. note:: This command checks for the presence of custom groups and not presence of the default one. To validate the emptiness of the volume, a subvolumegroup existence check alone is not sufficient. Subvolume existence also needs to be checked as there might be subvolumes in the default group.
+.. note:: This command checks for the presence of custom groups and not
+   presence of the default one. To validate the emptiness of the volume, a
+   subvolumegroup existence check alone is not sufficient. Subvolume existence
+   also needs to be checked as there might be subvolumes in the default group.

-Resize a subvolume group using::
+Resize a subvolume group by running a command of the following form:

-    $ ceph fs subvolumegroup resize <vol_name> <group_name> <new_size> [--no_shrink]
+.. prompt:: bash $

-The command resizes the subvolume group quota using the size specified by ``new_size``.
-The ``--no_shrink`` flag prevents the subvolume group from shrinking below the current used
-size.
+   ceph fs subvolumegroup resize <vol_name> <group_name> <new_size> [--no_shrink]

-The subvolume group may be resized to an infinite size by passing ``inf`` or ``infinite``
-as the ``new_size``.
+The command resizes the subvolume group quota, using the size specified by
+``new_size``.  The ``--no_shrink`` flag prevents the subvolume group from
+shrinking below the current used size.

-Remove a snapshot of a subvolume group using::
+The subvolume group may be resized to an infinite size by passing ``inf`` or
+``infinite`` as the ``new_size``.

-    $ ceph fs subvolumegroup snapshot rm <vol_name> <group_name> <snap_name> [--force]
+Remove a snapshot of a subvolume group by running a command of the following form:
+
+.. prompt:: bash $
+
+   ceph fs subvolumegroup snapshot rm <vol_name> <group_name> <snap_name> [--force]

 Supplying the ``--force`` flag allows the command to succeed when it would otherwise
-fail due to the snapshot not existing.
+fail due to the nonexistence of the snapshot.

-List snapshots of a subvolume group using::
+List snapshots of a subvolume group by running a command of the following form:

-    $ ceph fs subvolumegroup snapshot ls <vol_name> <group_name>
+.. prompt:: bash $
+
+   ceph fs subvolumegroup snapshot ls <vol_name> <group_name>


 FS Subvolumes
 -------------

-Create a subvolume using::
+Create a subvolume using:

-    $ ceph fs subvolume create <vol_name> <subvol_name> [--size <size_in_bytes>] [--group_name <subvol_group_name>] [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>] [--namespace-isolated]
+.. prompt:: bash $
+
+   ceph fs subvolume create <vol_name> <subvol_name> [--size <size_in_bytes>] [--group_name <subvol_group_name>] [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>] [--namespace-isolated]


 The command succeeds even if the subvolume already exists.

-When creating a subvolume you can specify its subvolume group, data pool layout,
-uid, gid, file mode in octal numerals, and size in bytes. The size of the subvolume is
-specified by setting a quota on it (see :doc:`/cephfs/quota`). The subvolume can be
-created in a separate RADOS namespace by specifying --namespace-isolated option. By
-default a subvolume is created within the default subvolume group, and with an octal file
-mode '755', uid of its subvolume group, gid of its subvolume group, data pool layout of
-its parent directory and no size limit.
+When creating a subvolume you can specify its subvolume group, data pool
+layout, uid, gid, file mode in octal numerals, and size in bytes. The size of
+the subvolume is specified by setting a quota on it (see :doc:`/cephfs/quota`).
+The subvolume can be created in a separate RADOS namespace by specifying
+--namespace-isolated option. By default a subvolume is created within the
+default subvolume group, and with an octal file mode '755', uid of its
+subvolume group, gid of its subvolume group, data pool layout of its parent
+directory and no size limit.

-Remove a subvolume using::
+Remove a subvolume using:

-    $ ceph fs subvolume rm <vol_name> <subvol_name> [--group_name <subvol_group_name>] [--force] [--retain-snapshots]
+.. prompt:: bash $

+   ceph fs subvolume rm <vol_name> <subvol_name> [--group_name <subvol_group_name>] [--force] [--retain-snapshots]

 The command removes the subvolume and its contents. It does this in two steps.
 First, it moves the subvolume to a trash folder, and then asynchronously purges
@ -262,44 +296,62 @@ A subvolume can be removed retaining existing snapshots of the subvolume using t
 '--retain-snapshots' option. If snapshots are retained, the subvolume is considered
 empty for all operations not involving the retained snapshots.

-.. note:: Snapshot retained subvolumes can be recreated using 'ceph fs subvolume create'
+.. note:: Snapshot retained subvolumes can be recreated using 'ceph fs
+   subvolume create'

-.. note:: Retained snapshots can be used as a clone source to recreate the subvolume, or clone to a newer subvolume.
+.. note:: Retained snapshots can be used as a clone source to recreate the
+   subvolume, or clone to a newer subvolume.

-Resize a subvolume using::
+Resize a subvolume using:

-    $ ceph fs subvolume resize <vol_name> <subvol_name> <new_size> [--group_name <subvol_group_name>] [--no_shrink]
+.. prompt:: bash $

-The command resizes the subvolume quota using the size specified by ``new_size``.
-The `--no_shrink`` flag prevents the subvolume from shrinking below the current  used size of the subvolume.
+   ceph fs subvolume resize <vol_name> <subvol_name> <new_size> [--group_name <subvol_group_name>] [--no_shrink]

-The subvolume can be resized to an unlimited (but sparse) logical size by passing ``inf`` or ``infinite`` as `` new_size``.
+The command resizes the subvolume quota using the size specified by
+``new_size``.  The `--no_shrink`` flag prevents the subvolume from shrinking
+below the current  used size of the subvolume.

-Authorize cephx auth IDs, the read/read-write access to fs subvolumes::
+The subvolume can be resized to an unlimited (but sparse) logical size by
+passing ``inf`` or ``infinite`` as `` new_size``.

-    $ ceph fs subvolume authorize <vol_name> <sub_name> <auth_id> [--group_name=<group_name>] [--access_level=<access_level>]
+Authorize cephx auth IDs, the read/read-write access to fs subvolumes:

-The 'access_level' takes 'r' or 'rw' as value.
+.. prompt:: bash $

-Deauthorize cephx auth IDs, the read/read-write access to fs subvolumes::
+   ceph fs subvolume authorize <vol_name> <sub_name> <auth_id> [--group_name=<group_name>] [--access_level=<access_level>]

-    $ ceph fs subvolume deauthorize <vol_name> <sub_name> <auth_id> [--group_name=<group_name>]
+The ``access_level`` takes ``r`` or ``rw`` as value.

-List cephx auth IDs authorized to access fs subvolume::
+Deauthorize cephx auth IDs, the read/read-write access to fs subvolumes:

-    $ ceph fs subvolume authorized_list <vol_name> <sub_name> [--group_name=<group_name>]
+.. prompt:: bash $

-Evict fs clients based on auth ID and subvolume mounted::
+   ceph fs subvolume deauthorize <vol_name> <sub_name> <auth_id> [--group_name=<group_name>]

-    $ ceph fs subvolume evict <vol_name> <sub_name> <auth_id> [--group_name=<group_name>]
+List cephx auth IDs authorized to access fs subvolume:

-Fetch the absolute path of a subvolume using::
+.. prompt:: bash $

-    $ ceph fs subvolume getpath <vol_name> <subvol_name> [--group_name <subvol_group_name>]
+   ceph fs subvolume authorized_list <vol_name> <sub_name> [--group_name=<group_name>]

-Fetch the information of a subvolume using::
+Evict fs clients based on auth ID and subvolume mounted:

-    $ ceph fs subvolume info <vol_name> <subvol_name> [--group_name <subvol_group_name>]
+.. prompt:: bash $
+
+   ceph fs subvolume evict <vol_name> <sub_name> <auth_id> [--group_name=<group_name>]
+
+Fetch the absolute path of a subvolume using:
+
+.. prompt:: bash $
+
+   ceph fs subvolume getpath <vol_name> <subvol_name> [--group_name <subvol_group_name>]
+
+Fetch the information of a subvolume using:
+
+.. prompt:: bash $
+
+   ceph fs subvolume info <vol_name> <subvol_name> [--group_name <subvol_group_name>]

 The output format is JSON and contains fields as follows.

@ -339,67 +391,93 @@ A subvolume's ``state`` is based on the current state of the subvolume and conta
 * ``complete``: subvolume is ready for all operations
 * ``snapshot-retained``: subvolume is removed but its snapshots are retained

-List subvolumes using::
+List subvolumes using:

-    $ ceph fs subvolume ls <vol_name> [--group_name <subvol_group_name>]
+.. prompt:: bash $

-.. note:: subvolumes that are removed but have snapshots retained, are also listed.
+   ceph fs subvolume ls <vol_name> [--group_name <subvol_group_name>]

-Check the presence of any subvolume using::
+.. note:: subvolumes that are removed but have snapshots retained, are also
+   listed.

-    $ ceph fs subvolume exist <vol_name> [--group_name <subvol_group_name>]
+Check the presence of any subvolume using:
+
+.. prompt:: bash $
+
+   ceph fs subvolume exist <vol_name> [--group_name <subvol_group_name>]

 These are the possible results of the ``exist`` command:

 * ``subvolume exists``: if any subvolume of given group_name is present
 * ``no subvolume exists``: if no subvolume of given group_name is present

-Set custom metadata on the subvolume as a key-value pair using::
+Set custom metadata on the subvolume as a key-value pair using:

-    $ ceph fs subvolume metadata set <vol_name> <subvol_name> <key_name> <value> [--group_name <subvol_group_name>]
+.. prompt:: bash $

-.. note:: If the key_name already exists then the old value will get replaced by the new value.
+   ceph fs subvolume metadata set <vol_name> <subvol_name> <key_name> <value> [--group_name <subvol_group_name>]

-.. note:: key_name and value should be a string of ASCII characters (as specified in python's string.printable). key_name is case-insensitive and always stored in lower case.
+.. note:: If the key_name already exists then the old value will get replaced
+   by the new value.

-.. note:: Custom metadata on a subvolume is not preserved when snapshotting the subvolume, and hence, is also not preserved when cloning the subvolume snapshot.
+.. note:: key_name and value should be a string of ASCII characters (as
+   specified in python's string.printable). key_name is case-insensitive and
+   always stored in lower case.

-Get custom metadata set on the subvolume using the metadata key::
+.. note:: Custom metadata on a subvolume is not preserved when snapshotting the
+   subvolume, and hence, is also not preserved when cloning the subvolume
+   snapshot.

-    $ ceph fs subvolume metadata get <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>]
+Get custom metadata set on the subvolume using the metadata key:

-List custom metadata (key-value pairs) set on the subvolume using::
+.. prompt:: bash $

-    $ ceph fs subvolume metadata ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
+   ceph fs subvolume metadata get <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>]

-Remove custom metadata set on the subvolume using the metadata key::
+List custom metadata (key-value pairs) set on the subvolume using:

-    $ ceph fs subvolume metadata rm <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>] [--force]
+.. prompt:: bash $
+
+   ceph fs subvolume metadata ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
+
+Remove custom metadata set on the subvolume using the metadata key:
+
+.. prompt:: bash $
+
+   ceph fs subvolume metadata rm <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>] [--force]

 Using the ``--force`` flag allows the command to succeed that would otherwise
 fail if the metadata key did not exist.

-Create a snapshot of a subvolume using::
+Create a snapshot of a subvolume using:

-    $ ceph fs subvolume snapshot create <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
+.. prompt:: bash $

+   ceph fs subvolume snapshot create <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]

-Remove a snapshot of a subvolume using::
+Remove a snapshot of a subvolume using:

-    $ ceph fs subvolume snapshot rm <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>] [--force]
+.. prompt:: bash $
+
+   ceph fs subvolume snapshot rm <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>] [--force]

 Using the ``--force`` flag allows the command to succeed that would otherwise
 fail if the snapshot did not exist.

-.. note:: if the last snapshot within a snapshot retained subvolume is removed, the subvolume is also removed
+.. note:: if the last snapshot within a snapshot retained subvolume is removed,
+   the subvolume is also removed

-List snapshots of a subvolume using::
+List snapshots of a subvolume using:

-    $ ceph fs subvolume snapshot ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
+.. prompt:: bash $

-Fetch the information of a snapshot using::
+   ceph fs subvolume snapshot ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]

-    $ ceph fs subvolume snapshot info <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
+Fetch the information of a snapshot using:
+
+.. prompt:: bash $
+
+    ceph fs subvolume snapshot info <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]

 The output format is JSON and contains fields as follows.

@ -440,27 +518,40 @@ Sample output when no snapshot clone is in progress or pending::
      "has_pending_clones": "no"
  }

-Set custom key-value metadata on the snapshot by running::
+Set custom key-value metadata on the snapshot by running:

-    $ ceph fs subvolume snapshot metadata set <vol_name> <subvol_name> <snap_name> <key_name> <value> [--group_name <subvol_group_name>]
+.. prompt:: bash $

-.. note:: If the key_name already exists then the old value will get replaced by the new value.
+   ceph fs subvolume snapshot metadata set <vol_name> <subvol_name> <snap_name> <key_name> <value> [--group_name <subvol_group_name>]

-.. note:: The key_name and value should be a strings of ASCII characters (as specified in Python's ``string.printable``). The key_name is case-insensitive and always stored in lowercase.
+.. note:: If the key_name already exists then the old value will get replaced
+   by the new value.

-.. note:: Custom metadata on a snapshot is not preserved when snapshotting the subvolume, and hence is also not preserved when cloning the subvolume snapshot.
+.. note:: The key_name and value should be a strings of ASCII characters (as
+   specified in Python's ``string.printable``). The key_name is
+   case-insensitive and always stored in lowercase.

-Get custom metadata set on the snapshot using the metadata key::
+.. note:: Custom metadata on a snapshot is not preserved when snapshotting the
+   subvolume, and hence is also not preserved when cloning the subvolume
+   snapshot.

-    $ ceph fs subvolume snapshot metadata get <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>]
+Get custom metadata set on the snapshot using the metadata key:

-List custom metadata (key-value pairs) set on the snapshot using::
+.. prompt:: bash $

-    $ ceph fs subvolume snapshot metadata ls <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
+   ceph fs subvolume snapshot metadata get <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>]

-Remove custom metadata set on the snapshot using the metadata key::
+List custom metadata (key-value pairs) set on the snapshot using:

-    $ ceph fs subvolume snapshot metadata rm <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>] [--force]
+.. prompt:: bash $
+
+   ceph fs subvolume snapshot metadata ls <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
+
+Remove custom metadata set on the snapshot using the metadata key:
+
+.. prompt:: bash $
+
+   ceph fs subvolume snapshot metadata rm <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>] [--force]

 Using the ``--force`` flag allows the command to succeed that would otherwise
 fail if the metadata key did not exist.
@ -468,47 +559,73 @@ fail if the metadata key did not exist.
 Cloning Snapshots
 -----------------

-Subvolumes can be created by cloning subvolume snapshots. Cloning is an asynchronous operation that copies
-data from a snapshot to a subvolume. Due to this bulk copying, cloning is inefficient for very large
-data sets.
+Subvolumes can be created by cloning subvolume snapshots. Cloning is an
+asynchronous operation that copies data from a snapshot to a subvolume. Due to
+this bulk copying, cloning is inefficient for very large data sets.

-.. note:: Removing a snapshot (source subvolume) would fail if there are pending or in progress clone operations.
+.. note:: Removing a snapshot (source subvolume) would fail if there are
+   pending or in progress clone operations.

-Protecting snapshots prior to cloning was a prerequisite in the Nautilus release, and the commands to protect/unprotect
-snapshots were introduced for this purpose. This prerequisite, and hence the commands to protect/unprotect, is being
-deprecated and may be removed from a future release.
+Protecting snapshots prior to cloning was a prerequisite in the Nautilus
+release, and the commands to protect/unprotect snapshots were introduced for
+this purpose. This prerequisite, and hence the commands to protect/unprotect,
+is being deprecated and may be removed from a future release.

-The commands being deprecated are::
-  $ ceph fs subvolume snapshot protect <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
-  $ ceph fs subvolume snapshot unprotect <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
+The commands being deprecated are:

-.. note:: Using the above commands will not result in an error, but they have no useful purpose.
+.. prompt:: bash #

-.. note:: Use the ``subvolume info`` command to fetch subvolume metadata regarding supported ``features`` to help decide if protect/unprotect of snapshots is required, based on the availability of the ``snapshot-autoprotect`` feature.
+   ceph fs subvolume snapshot protect <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
+   ceph fs subvolume snapshot unprotect <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]

-To initiate a clone operation use::
+.. note:: Using the above commands will not result in an error, but they have
+   no useful purpose.

-  $ ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name>
+.. note:: Use the ``subvolume info`` command to fetch subvolume metadata
+   regarding supported ``features`` to help decide if protect/unprotect of
+   snapshots is required, based on the availability of the
+   ``snapshot-autoprotect`` feature.

-If a snapshot (source subvolume) is a part of non-default group, the group name needs to be specified::
+To initiate a clone operation use:

-  $ ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --group_name <subvol_group_name>
+.. prompt:: bash $

-Cloned subvolumes can be a part of a different group than the source snapshot (by default, cloned subvolumes are created in default group). To clone to a particular group use::
+   ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name>
+
+If a snapshot (source subvolume) is a part of non-default group, the group name
+needs to be specified:
+
+.. prompt:: bash $
+
+   ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --group_name <subvol_group_name>
+
+Cloned subvolumes can be a part of a different group than the source snapshot
+(by default, cloned subvolumes are created in default group). To clone to a
+particular group use:
+
+.. prompt:: bash $

  $ ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --target_group_name <subvol_group_name>

-Similar to specifying a pool layout when creating a subvolume, pool layout can be specified when creating a cloned subvolume. To create a cloned subvolume with a specific pool layout use::
+Similar to specifying a pool layout when creating a subvolume, pool layout can
+be specified when creating a cloned subvolume. To create a cloned subvolume
+with a specific pool layout use:

-  $ ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --pool_layout <pool_layout>
+.. prompt:: bash $

-Configure the maximum number of concurrent clones. The default is 4::
+   ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --pool_layout <pool_layout>

-  $ ceph config set mgr mgr/volumes/max_concurrent_clones <value>
+Configure the maximum number of concurrent clones. The default is 4:

-To check the status of a clone operation use::
+.. prompt:: bash $

-  $ ceph fs clone status <vol_name> <clone_name> [--group_name <group_name>]
+   ceph config set mgr mgr/volumes/max_concurrent_clones <value>
+
+To check the status of a clone operation use:
+
+.. prompt:: bash $
+
+   ceph fs clone status <vol_name> <clone_name> [--group_name <group_name>]

 A clone can be in one of the following states:

@ -538,7 +655,8 @@ Here is an example of an ``in-progress`` clone::
    }
  }

-.. note:: The ``failure`` section will be shown only if the clone's state is ``failed`` or ``cancelled``
+.. note:: The ``failure`` section will be shown only if the clone's state is
+   ``failed`` or ``cancelled``

 Here is an example of a ``failed`` clone::

@ -560,9 +678,11 @@ Here is an example of a ``failed`` clone::
    }
  }

-(NOTE: since ``subvol1`` is in the default group, the ``source`` object's  ``clone status`` does not include the group name)
+(NOTE: since ``subvol1`` is in the default group, the ``source`` object's
+``clone status`` does not include the group name)

-.. note:: Cloned subvolumes are accessible only after the clone operation has successfully completed.
+.. note:: Cloned subvolumes are accessible only after the clone operation has
+   successfully completed.

 After a successful clone operation, ``clone status`` will look like the below::

@ -576,37 +696,47 @@ After a successful clone operation, ``clone status`` will look like the below::
 If a clone operation is unsuccessful, the ``state`` value will be  ``failed``.

 To retry a failed clone operation, the incomplete clone must be deleted and the
-clone operation must be issued again.  To delete a partial clone use::
+clone operation must be issued again.  To delete a partial clone use:

-  $ ceph fs subvolume rm <vol_name> <clone_name> [--group_name <group_name>] --force
+.. prompt:: bash $
+
+   ceph fs subvolume rm <vol_name> <clone_name> [--group_name <group_name>] --force

 .. note:: Cloning synchronizes only directories, regular files and symbolic
   links. Inode timestamps (access and modification times) are synchronized up
   to seconds granularity.

 An ``in-progress`` or a ``pending`` clone operation may be canceled. To cancel
-a clone operation use the ``clone cancel`` command::
+a clone operation use the ``clone cancel`` command:

-  $ ceph fs clone cancel <vol_name> <clone_name> [--group_name <group_name>]
+.. prompt:: bash $

-On successful cancellation, the cloned subvolume is moved to the ``canceled``
-state::
+   ceph fs clone cancel <vol_name> <clone_name> [--group_name <group_name>]

-  $ ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1
-  $ ceph fs clone cancel cephfs clone1
-  $ ceph fs clone status cephfs clone1
-  {
-    "status": {
-      "state": "canceled",
-      "source": {
-        "volume": "cephfs",
-        "subvolume": "subvol1",
-        "snapshot": "snap1"
-      }
+On successful cancellation, the cloned subvolume is moved to the ``canceled`` state:
+
+.. prompt:: bash #
+
+   ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1
+   ceph fs clone cancel cephfs clone1
+   ceph fs clone status cephfs clone1
+
+::
+
+    {
+        "status": {
+            "state": "canceled",
+            "source": {
+                "volume": "cephfs",
+                "subvolume": "subvol1",
+                "snapshot": "snap1"
+            }
+        }
    }
  }

-.. note:: The canceled cloned may be deleted by supplying the ``--force`` option to the `fs subvolume rm` command.
+.. note:: The canceled cloned may be deleted by supplying the ``--force``
+   option to the `fs subvolume rm` command.


 .. _subvol-pinning:
@ -614,28 +744,33 @@ state::
 Pinning Subvolumes and Subvolume Groups
 ---------------------------------------

-
 Subvolumes and subvolume groups may be automatically pinned to ranks according
 to policies. This can distribute load across MDS ranks in predictable and
 stable ways.  Review :ref:`cephfs-pinning` and :ref:`cephfs-ephemeral-pinning`
 for details on how pinning works.

-Pinning is configured by::
+Pinning is configured by:

-  $ ceph fs subvolumegroup pin <vol_name> <group_name> <pin_type> <pin_setting>
+.. prompt:: bash $

-or for subvolumes::
+   ceph fs subvolumegroup pin <vol_name> <group_name> <pin_type> <pin_setting>

-  $ ceph fs subvolume pin <vol_name> <group_name> <pin_type> <pin_setting>
+or for subvolumes:
+
+.. prompt:: bash $
+
+   ceph fs subvolume pin <vol_name> <group_name> <pin_type> <pin_setting>

 Typically you will want to set subvolume group pins. The ``pin_type`` may be
 one of ``export``, ``distributed``, or ``random``. The ``pin_setting``
 corresponds to the extended attributed "value" as in the pinning documentation
 referenced above.

-So, for example, setting a distributed pinning strategy on a subvolume group::
+So, for example, setting a distributed pinning strategy on a subvolume group:

-  $ ceph fs subvolumegroup pin cephfilesystem-a csi distributed 1
+.. prompt:: bash $
+
+   ceph fs subvolumegroup pin cephfilesystem-a csi distributed 1

 Will enable distributed subtree partitioning policy for the "csi" subvolume
 group.  This will cause every subvolume within the group to be automatically
--- a/ceph/doc/cephfs/health-messages.rst
+++ b/ceph/doc/cephfs/health-messages.rst
@ -123,7 +123,9 @@ other daemons, please see :ref:`health-checks`.
    from properly cleaning up resources used by client requests.  This message
    appears if a client appears to have more than ``max_completed_requests``
    (default 100000) requests that are complete on the MDS side but haven't
-    yet been accounted for in the client's *oldest tid* value.
+    yet been accounted for in the client's *oldest tid* value. The last tid
+    used by the MDS to trim completed client requests (or flush) is included
+    as part of `session ls` (or `client ls`) command as a debug aid.
 * ``MDS_DAMAGE``

  Message
@ -168,3 +170,15 @@ other daemons, please see :ref:`health-checks`.
    the actual cache size (in memory) is at least 50% greater than
    ``mds_cache_memory_limit`` (default 1GB). Modify ``mds_health_cache_threshold``
    to set the warning ratio.
+
+* ``MDS_CLIENTS_LAGGY``
+
+  Message
+    "Client *ID* is laggy; not evicted because some OSD(s) is/are laggy"
+
+  Description
+    If OSD(s) is laggy (due to certain conditions like network cut-off, etc)
+    then it might make clients laggy(session might get idle or cannot flush
+    dirty data for cap revokes). If ``defer_client_eviction_on_laggy_osds`` is
+    set to true (default true), client eviction will not take place and thus
+    this health warning will be generated.
--- a/ceph/doc/cephfs/mds-config-ref.rst
+++ b/ceph/doc/cephfs/mds-config-ref.rst
@ -501,6 +501,25 @@
 :Type:  32-bit Integer
 :Default: ``0``

+``mds_inject_skip_replaying_inotable``
+
+:Description: Ceph will skip replaying the inotable when replaying the journal,
+              and the premary MDS will crash, while the replacing MDS won't.
+              (for developers only).
+
+:Type:  Boolean
+:Default: ``false``
+
+
+``mds_kill_skip_replaying_inotable``
+
+:Description: Ceph will skip replaying the inotable when replaying the journal,
+              and the premary MDS will crash, while the replacing MDS won't.
+              (for developers only).
+
+:Type:  Boolean
+:Default: ``false``
+

 ``mds_wipe_sessions``

--- a/ceph/doc/cephfs/mount-using-fuse.rst
+++ b/ceph/doc/cephfs/mount-using-fuse.rst
@ -53,7 +53,8 @@ If you have more than one FS on your Ceph cluster, use the option

    ceph-fuse --id foo --client_fs mycephfs2 /mnt/mycephfs2

-You may also add a ``client_fs`` setting to your ``ceph.conf``
+You may also add a ``client_fs`` setting to your ``ceph.conf``. Alternatively, the option
+``--client_mds_namespace`` is supported for backward compatibility.

 Unmounting CephFS
 =================
--- a/ceph/doc/cephfs/mount-using-kernel-driver.rst
+++ b/ceph/doc/cephfs/mount-using-kernel-driver.rst
@ -96,6 +96,28 @@ non-default FS as follows::

    mount -t ceph :/ /mnt/mycephfs2 -o name=fs,fs=mycephfs2

+Backward Compatibility
+======================
+The old syntax is supported for backward compatibility.
+
+To mount CephFS with the kernel driver::
+
+    mkdir /mnt/mycephfs
+    mount -t ceph :/ /mnt/mycephfs -o name=admin
+
+The key-value argument right after option ``-o`` is CephX credential;
+``name`` is the username of the CephX user we are using to mount CephFS.
+
+To mount a non-default FS ``cephfs2``, in case the cluster has multiple FSs::
+
+    mount -t ceph :/ /mnt/mycephfs -o name=admin,fs=cephfs2
+
+    or
+
+    mount -t ceph :/ /mnt/mycephfs -o name=admin,mds_namespace=cephfs2
+
+.. note:: The option ``mds_namespace`` is deprecated. Use ``fs=`` instead when using the old syntax for mounting.
+
 Unmounting CephFS
 =================
 To unmount the Ceph file system, use the ``umount`` command as usual::
--- a/ceph/doc/cephfs/nfs.rst
+++ b/ceph/doc/cephfs/nfs.rst
@ -60,6 +60,18 @@ added as comments in the sample conf. There are options to do the following:
 - enable read delegations (need at least v13.0.1 'libcephfs2' package
  and v2.6.0 stable 'nfs-ganesha' and 'nfs-ganesha-ceph' packages)

+.. important::
+
+   Under certain conditions, NFS access using the CephFS FSAL fails. This
+   causes an error to be thrown that reads "Input/output error". Under these
+   circumstances, the application metadata must be set for the CephFS metadata
+   and CephFS data pools. Do this by running the following command:
+
+   .. prompt:: bash $
+
+      ceph osd pool application set <cephfs_metadata_pool> cephfs <cephfs_data_pool> cephfs
+
+
 Configuration for libcephfs clients
 -----------------------------------

--- a/ceph/doc/cephfs/scrub.rst
+++ b/ceph/doc/cephfs/scrub.rst
@ -143,3 +143,14 @@ The types of damage that can be reported and repaired by File System Scrub are:

 * BACKTRACE : Inode's backtrace in the data pool is corrupted.

+Evaluate strays using recursive scrub
+=====================================
+
+- In order to evaluate strays i.e. purge stray directories in ``~mdsdir`` use the following command::
+
+    ceph tell mds.<fsname>:0 scrub start ~mdsdir recursive
+
+- ``~mdsdir`` is not enqueued by default when scrubbing at the CephFS root. In order to perform stray evaluation
+  at root, run scrub with flags ``scrub_mdsdir`` and ``recursive``::
+
+    ceph tell mds.<fsname>:0 scrub start / recursive,scrub_mdsdir
--- a/ceph/doc/cephfs/snap-schedule.rst
+++ b/ceph/doc/cephfs/snap-schedule.rst
@ -142,6 +142,19 @@ Examples::
  ceph fs snap-schedule retention add / 24h4w # add 24 hourly and 4 weekly to retention
  ceph fs snap-schedule retention remove / 7d4w # remove 7 daily and 4 weekly, leaves 24 hourly

+.. note: When adding a path to snap-schedule, remember to strip off the mount
+   point path prefix. Paths to snap-schedule should start at the appropriate
+   CephFS file system root and not at the host file system root.
+   e.g. if the Ceph File System is mounted at ``/mnt`` and the path under which
+   snapshots need to be taken is ``/mnt/some/path`` then the acutal path required
+   by snap-schedule is only ``/some/path``.
+
+.. note: It should be noted that the "created" field in the snap-schedule status
+   command output is the timestamp at which the schedule was created. The "created"
+   timestamp has nothing to do with the creation of actual snapshots. The actual
+   snapshot creation is accounted for in the "created_count" field, which is a
+   cumulative count of the total number of snapshots created so far.
+
 Active and inactive schedules
 -----------------------------
 Snapshot schedules can be added for a path that doesn't exist yet in the
--- a/ceph/doc/cephfs/troubleshooting.rst
+++ b/ceph/doc/cephfs/troubleshooting.rst
@ -188,6 +188,98 @@ You can enable dynamic debug against the CephFS module.

 Please see: https://github.com/ceph/ceph/blob/master/src/script/kcon_all.sh

+In-memory Log Dump
+==================
+
+In-memory logs can be dumped by setting ``mds_extraordinary_events_dump_interval``
+during a lower level debugging (log level < 10). ``mds_extraordinary_events_dump_interval``
+is the interval in seconds for dumping the recent in-memory logs when there is an Extra-Ordinary event.
+
+The Extra-Ordinary events are classified as:
+
+* Client Eviction
+* Missed Beacon ACK from the monitors
+* Missed Internal Heartbeats
+
+In-memory Log Dump is disabled by default to prevent log file bloat in a production environment.
+The below commands consecutively enables it::
+
+  $ ceph config set mds debug_mds <log_level>/<gather_level>
+  $ ceph config set mds mds_extraordinary_events_dump_interval <seconds>
+
+The ``log_level`` should be < 10 and ``gather_level`` should be >= 10 to enable in-memory log dump.
+When it is enabled, the MDS checks for the extra-ordinary events every
+``mds_extraordinary_events_dump_interval`` seconds and if any of them occurs, MDS dumps the
+in-memory logs containing the relevant event details in ceph-mds log.
+
+.. note:: For higher log levels (log_level >= 10) there is no reason to dump the In-memory Logs and a
+          lower gather level (gather_level < 10) is insufficient to gather In-memory Logs. Thus a
+          log level >=10 or a gather level < 10 in debug_mds would prevent enabling the In-memory Log Dump.
+          In such cases, when there is a failure it's required to reset the value of
+          mds_extraordinary_events_dump_interval to 0 before enabling using the above commands.
+
+The In-memory Log Dump can be disabled using::
+
+  $ ceph config set mds mds_extraordinary_events_dump_interval 0
+
+Filesystems Become Inaccessible After an Upgrade
+================================================
+
+.. note::
+   You can avoid ``operation not permitted`` errors by running this procedure
+   before an upgrade. As of May 2023, it seems that ``operation not permitted``
+   errors of the kind discussed here occur after upgrades after Nautilus
+   (inclusive).
+
+IF
+
+you have CephFS file systems that have data and metadata pools that were
+created by a ``ceph fs new`` command (meaning that they were not created
+with the defaults)
+
+OR
+
+you have an existing CephFS file system and are upgrading to a new post-Nautilus
+major version of Ceph
+
+THEN
+
+in order for the documented ``ceph fs authorize...`` commands to function as
+documented (and to avoid 'operation not permitted' errors when doing file I/O
+or similar security-related problems for all users except the ``client.admin``
+user), you must first run:
+
+.. prompt:: bash $
+
+   ceph osd pool application set <your metadata pool name> cephfs metadata <your ceph fs filesystem name>
+
+and
+
+.. prompt:: bash $
+
+   ceph osd pool application set <your data pool name> cephfs data <your ceph fs filesystem name>
+
+Otherwise, when the OSDs receive a request to read or write data (not the
+directory info, but file data) they will not know which Ceph file system name
+to look up. This is true also of pool names, because the 'defaults' themselves
+changed in the major releases, from::
+
+   data pool=fsname
+   metadata pool=fsname_metadata
+
+to::
+
+   data pool=fsname.data and
+   metadata pool=fsname.meta
+
+Any setup that used ``client.admin`` for all mounts did not run into this
+problem, because the admin key gave blanket permissions.
+
+A temporary fix involves changing mount requests to the 'client.admin' user and
+its associated key. A less drastic but half-fix is to change the osd cap for
+your user to just ``caps osd = "allow rw"``  and delete ``tag cephfs
+data=....``
+
 Reporting Issues
 ================

--- a/ceph/doc/dev/network-encoding.rst
+++ b/ceph/doc/dev/network-encoding.rst
@ -87,7 +87,8 @@ Optionals are represented as a presence byte, followed by the item if it exists.
 		T  element[present? 1 : 0]; // Only if present is non-zero.
 	}

-Optionals are used to encode ``boost::optional``.
+Optionals are used to encode ``boost::optional`` and, since introducing
+C++17 to Ceph, ``std::optional``.

 Pair
 ----
--- a/ceph/doc/dev/osd_internals/erasure_coding/jerasure.rst
+++ b/ceph/doc/dev/osd_internals/erasure_coding/jerasure.rst
@ -5,7 +5,7 @@ jerasure plugin
 Introduction
 ------------

-The parameters interpreted by the jerasure plugin are:
+The parameters interpreted by the ``jerasure`` plugin are:

 ::
 
@ -31,3 +31,5 @@ upstream repositories `http://jerasure.org/jerasure/jerasure
 `http://jerasure.org/jerasure/gf-complete
 <http://jerasure.org/jerasure/gf-complete>`_ . The difference
 between the two, if any, should match pull requests against upstream.
+Note that as of 2023, the ``jerasure.org`` web site may no longer be
+legitimate and/or associated with the original project.
--- a/ceph/doc/dev/osd_internals/past_intervals.rst
+++ b/ceph/doc/dev/osd_internals/past_intervals.rst
@ -0,0 +1,93 @@
+=============
+PastIntervals
+=============
+
+Purpose
+-------
+
+There are two situations where we need to consider the set of all acting-set
+OSDs for a PG back to some epoch ``e``:
+
+ * During peering, we need to consider the acting set for every epoch back to
+   ``last_epoch_started``, the last epoch in which the PG completed peering and
+   became active.
+   (see :doc:`/dev/osd_internals/last_epoch_started` for a detailed explanation)
+ * During recovery, we need to consider the acting set for every epoch back to
+   ``last_epoch_clean``, the last epoch at which all of the OSDs in the acting
+   set were fully recovered, and the acting set was full.
+
+For either of these purposes, we could build such a set by iterating backwards
+from the current OSDMap to the relevant epoch.  Instead, we maintain a structure
+PastIntervals for each PG.
+
+An ``interval`` is a contiguous sequence of OSDMap epochs where the PG mapping
+didn't change.  This includes changes to the acting set, the up set, the
+primary, and several other parameters fully spelled out in
+PastIntervals::check_new_interval.
+
+Maintenance and Trimming
+------------------------
+
+The PastIntervals structure stores a record for each ``interval`` back to
+last_epoch_clean.  On each new ``interval`` (See AdvMap reactions,
+PeeringState::should_restart_peering, and PeeringState::start_peering_interval)
+each OSD with the PG will add the new ``interval`` to its local PastIntervals.
+Activation messages to OSDs which do not already have the PG contain the
+sender's PastIntervals so that the recipient needn't rebuild it.  (See
+PeeringState::activate needs_past_intervals).
+
+PastIntervals are trimmed in two places.  First, when the primary marks the
+PG clean, it clears its past_intervals instance
+(PeeringState::try_mark_clean()).  The replicas will do the same thing when
+they receive the info (See PeeringState::update_history).
+
+The second, more complex, case is in PeeringState::start_peering_interval.  In
+the event of a "map gap", we assume that the PG actually has gone clean, but we
+haven't received a pg_info_t with the updated ``last_epoch_clean`` value yet.
+To explain this behavior, we need to discuss OSDMap trimming.
+
+OSDMap Trimming
+---------------
+
+OSDMaps are created by the Monitor quorum and gossiped out to the OSDs.  The
+Monitor cluster also determines when OSDs (and the Monitors) are allowed to
+trim old OSDMap epochs.  For the reasons explained above in this document, the
+primary constraint is that we must retain all OSDMaps back to some epoch such
+that all PGs have been clean at that or a later epoch (min_last_epoch_clean).
+(See OSDMonitor::get_trim_to).
+
+The Monitor quorum determines min_last_epoch_clean through MOSDBeacon messages
+sent periodically by each OSDs.  Each message contains a set of PGs for which
+the OSD is primary at that moment as well as the min_last_epoch_clean across
+that set.  The Monitors track these values in OSDMonitor::last_epoch_clean.
+
+There is a subtlety in the min_last_epoch_clean value used by the OSD to
+populate the MOSDBeacon.  OSD::collect_pg_stats invokes PG::with_pg_stats to
+obtain the lec value, which actually uses
+pg_stat_t::get_effective_last_epoch_clean() rather than
+info.history.last_epoch_clean.  If the PG is currently clean,
+pg_stat_t::get_effective_last_epoch_clean() is the current epoch rather than
+last_epoch_clean -- this works because the PG is clean at that epoch and it
+allows OSDMaps to be trimmed during periods where OSDMaps are being created
+(due to snapshot activity, perhaps), but no PGs are undergoing ``interval``
+changes.
+
+Back to PastIntervals
+---------------------
+
+We can now understand our second trimming case above.  If OSDMaps have been
+trimmed up to epoch ``e``, we know that the PG must have been clean at some epoch
+>= ``e`` (indeed, **all** PGs must have been), so we can drop our PastIntevals.
+
+This dependency also pops up in PeeringState::check_past_interval_bounds().
+PeeringState::get_required_past_interval_bounds takes as a parameter
+oldest_epoch, which comes from OSDSuperblock::cluster_osdmap_trim_lower_bound.
+We use cluster_osdmap_trim_lower_bound rather than a specific osd's oldest_map
+because we don't necessarily trim all MOSDMap::cluster_osdmap_trim_lower_bound.
+In order to avoid doing too much work at once we limit the amount of osdmaps
+trimmed using ``osd_target_transaction_size`` in OSD::trim_maps().
+For this reason, a specific OSD's oldest_map can lag behind
+OSDSuperblock::cluster_osdmap_trim_lower_bound
+for a while.
+
+See https://tracker.ceph.com/issues/49689 for an example.
--- a/ceph/doc/glossary.rst
+++ b/ceph/doc/glossary.rst
@ -12,12 +12,13 @@
 	:ref:`BlueStore<rados_config_storage_devices_bluestore>`
                OSD BlueStore is a storage back end used by OSD daemons, and
                was designed specifically for use with Ceph. BlueStore was
-                introduced in the Ceph Kraken release. In the Ceph Luminous
-                release, BlueStore became Ceph's default storage back end,
-                supplanting FileStore. Unlike :term:`filestore`, BlueStore
-                stores objects directly on Ceph block devices without any file
-                system interface. Since Luminous (12.2), BlueStore has been
-                Ceph's default and recommended storage back end.
+                introduced in the Ceph Kraken release. The Luminous release of
+                Ceph promoted BlueStore to the default OSD back end,
+                supplanting FileStore. As of the Reef release, FileStore is no
+                longer available as a storage backend.
+                
+                BlueStore stores objects directly on Ceph block devices without
+                a mounted file system.  

        Bucket
                In the context of :term:`RGW`, a bucket is a group of objects.
--- a/ceph/doc/index.rst
+++ b/ceph/doc/index.rst
@ -11,6 +11,12 @@ Ceph delivers **object, block, and file storage in one unified system**.
   Ceph project. (Click anywhere in this paragraph to read the "Basic 
   Workflow" page of the Ceph Developer Guide.) <basic workflow dev guide>`.

+.. note::
+
+   :ref:`If you want to make a commit to the documentation but you don't
+   know how to get started, read the "Documenting Ceph" page. (Click anywhere
+   in this paragraph to read the "Documenting Ceph" page.) <documenting_ceph>`.
+
 .. raw:: html

      <style type="text/css">div.body h3{margin:5px 0px 0px 0px;}</style>
--- a/ceph/doc/man/8/cephfs-top.rst
+++ b/ceph/doc/man/8/cephfs-top.rst
@ -36,6 +36,22 @@ Options

   Perform a selftest. This mode performs a sanity check of ``stats`` module.

+.. option:: --conffile [CONFFILE]
+
+   Path to cluster configuration file
+
+.. option:: -d [DELAY], --delay [DELAY]
+
+   Refresh interval in seconds (default: 1)
+
+.. option:: --dump
+
+   Dump the metrics to stdout
+
+.. option:: --dumpfs <fs_name>
+
+   Dump the metrics of the given filesystem to stdout
+
 Descriptions of fields
 ======================

--- a/ceph/doc/man/8/mount.ceph.rst
+++ b/ceph/doc/man/8/mount.ceph.rst
@ -110,6 +110,12 @@ Basic
      them. If an inode contains any stale file locks, read/write on the inode
      is not allowed until applications release all stale file locks.

+:command: `fs=<fs-name>`
+    Specify the non-default file system to be mounted, when using the old syntax.
+
+:command: `mds_namespace=<fs-name>`
+    A synonym of "fs=" (Deprecated).
+
 Advanced
 --------
 :command:`cap_release_safety`
@ -236,6 +242,10 @@ history::
    mount.ceph :/ /mnt/mycephfs -o name=fs_username,secretfile=/etc/ceph/fs_username.secret


+To mount using the old syntax::
+
+    mount -t ceph 192.168.0.1:/ /mnt/mycephfs
+
 Availability
 ============

--- a/ceph/doc/mgr/prometheus.rst
+++ b/ceph/doc/mgr/prometheus.rst
@ -18,9 +18,11 @@ for all reporting entities are returned in text exposition format.
 Enabling prometheus output
 ==========================

-The *prometheus* module is enabled with::
+The *prometheus* module is enabled with:

-  ceph mgr module enable prometheus
+.. prompt:: bash $
+
+   ceph mgr module enable prometheus

 Configuration
 -------------
@ -36,10 +38,10 @@ configurable with ``ceph config set``, with keys
 is registered with Prometheus's `registry
 <https://github.com/prometheus/prometheus/wiki/Default-port-allocations>`_.

-::
-
-    ceph config set mgr mgr/prometheus/server_addr 0.0.0.0
-    ceph config set mgr mgr/prometheus/server_port 9283
+.. prompt:: bash $
+   
+   ceph config set mgr mgr/prometheus/server_addr 0.0.0.
+   ceph config set mgr mgr/prometheus/server_port 9283

 .. warning::

@ -54,9 +56,11 @@ recommended to use 15 seconds as scrape interval, though, in some cases it
 might be useful to increase the scrape interval.

 To set a different scrape interval in the Prometheus module, set
-``scrape_interval`` to the desired value::
+``scrape_interval`` to the desired value:

-    ceph config set mgr mgr/prometheus/scrape_interval 20
+.. prompt:: bash $
+
+   ceph config set mgr mgr/prometheus/scrape_interval 20

 On large clusters (>1000 OSDs), the time to fetch the metrics may become
 significant.  Without the cache, the Prometheus manager module could, especially
@ -75,35 +79,47 @@ This behavior can be configured. By default, it will return a 503 HTTP status
 code (service unavailable). You can set other options using the ``ceph config
 set`` commands.

-To tell the module to respond with possibly stale data, set it to ``return``::
+To tell the module to respond with possibly stale data, set it to ``return``:
+
+.. prompt:: bash $

    ceph config set mgr mgr/prometheus/stale_cache_strategy return

-To tell the module to respond with "service unavailable", set it to ``fail``::
+To tell the module to respond with "service unavailable", set it to ``fail``:

-    ceph config set mgr mgr/prometheus/stale_cache_strategy fail
+.. prompt:: bash $

-If you are confident that you don't require the cache, you can disable it::
+   ceph config set mgr mgr/prometheus/stale_cache_strategy fail

-    ceph config set mgr mgr/prometheus/cache false
+If you are confident that you don't require the cache, you can disable it:
+
+.. prompt:: bash $
+
+   ceph config set mgr mgr/prometheus/cache false

 If you are using the prometheus module behind some kind of reverse proxy or
 loadbalancer, you can simplify discovering the active instance by switching
-to ``error``-mode::
+to ``error``-mode:

-    ceph config set mgr mgr/prometheus/standby_behaviour error
+.. prompt:: bash $
+
+   ceph config set mgr mgr/prometheus/standby_behaviour error

 If set, the prometheus module will repond with a HTTP error when requesting ``/``
 from the standby instance. The default error code is 500, but you can configure
-the HTTP response code with::
+the HTTP response code with:

-    ceph config set mgr mgr/prometheus/standby_error_status_code 503
+.. prompt:: bash $
+
+   ceph config set mgr mgr/prometheus/standby_error_status_code 503

 Valid error codes are between 400-599.

-To switch back to the default behaviour, simply set the config key to ``default``::
+To switch back to the default behaviour, simply set the config key to ``default``:

-    ceph config set mgr mgr/prometheus/standby_behaviour default
+.. prompt:: bash $
+
+   ceph config set mgr mgr/prometheus/standby_behaviour default

 .. _prometheus-rbd-io-statistics:

@ -154,9 +170,17 @@ configuration parameter. The parameter is a comma or space separated list
 of ``pool[/namespace]`` entries. If the namespace is not specified the
 statistics are collected for all namespaces in the pool.

-Example to activate the RBD-enabled pools ``pool1``, ``pool2`` and ``poolN``::
+Example to activate the RBD-enabled pools ``pool1``, ``pool2`` and ``poolN``:

-  ceph config set mgr mgr/prometheus/rbd_stats_pools "pool1,pool2,poolN"
+.. prompt:: bash $
+
+   ceph config set mgr mgr/prometheus/rbd_stats_pools "pool1,pool2,poolN"
+
+The wildcard can be used to indicate all pools or namespaces:
+
+.. prompt:: bash $
+
+   ceph config set mgr mgr/prometheus/rbd_stats_pools "*"

 The module makes the list of all available images scanning the specified
 pools and namespaces and refreshes it periodically. The period is
@ -165,9 +189,22 @@ parameter (in sec) and is 300 sec (5 minutes) by default. The module will
 force refresh earlier if it detects statistics from a previously unknown
 RBD image.

-Example to turn up the sync interval to 10 minutes::
+Example to turn up the sync interval to 10 minutes:

-  ceph config set mgr mgr/prometheus/rbd_stats_pools_refresh_interval 600
+.. prompt:: bash $
+
+   ceph config set mgr mgr/prometheus/rbd_stats_pools_refresh_interval 600
+
+Ceph daemon performance counters metrics
+-----------------------------------------
+
+With the introduction of ``ceph-exporter`` daemon, the prometheus module will no longer export Ceph daemon
+perf counters as prometheus metrics by default. However, one may re-enable exporting these metrics by setting
+the module option ``exclude_perf_counters`` to ``false``:
+
+.. prompt:: bash $
+
+   ceph config set mgr mgr/prometheus/exclude_perf_counters false

 Statistic names and labels
 ==========================
--- a/ceph/doc/mgr/telemetry.rst
+++ b/ceph/doc/mgr/telemetry.rst
@ -153,3 +153,24 @@ completely optional, and disabled by default.::
  ceph config set mgr mgr/telemetry/description 'My first Ceph cluster'
  ceph config set mgr mgr/telemetry/channel_ident true

+Leaderboard
+-----------
+
+To participate in a leaderboard in the `public dashboards
+<https://telemetry-public.ceph.com/>`_, run the following command:
+
+.. prompt:: bash $
+
+   ceph config set mgr mgr/telemetry/leaderboard true
+
+The leaderboard displays basic information about the cluster. This includes the
+total storage capacity and the number of OSDs. To add a description of the
+cluster, run a command of the following form: 
+
+.. prompt:: bash $
+
+   ceph config set mgr mgr/telemetry/leaderboard_description 'Ceph cluster for Computational Biology at the University of XYZ'
+
+If the ``ident`` channel is enabled, its details will not be displayed in the
+leaderboard.
+
--- a/ceph/doc/rados/configuration/bluestore-config-ref.rst
+++ b/ceph/doc/rados/configuration/bluestore-config-ref.rst
@ -1,84 +1,95 @@
-==========================
-BlueStore Config Reference
-==========================
+==================================
+ BlueStore Configuration Reference 
+==================================

 Devices
 =======

-BlueStore manages either one, two, or (in certain cases) three storage
-devices.
+BlueStore manages either one, two, or in certain cases three storage devices.
+These *devices* are "devices" in the Linux/Unix sense. This means that they are
+assets listed under ``/dev`` or ``/devices``. Each of these devices may be an
+entire storage drive, or a partition of a storage drive, or a logical volume.
+BlueStore does not create or mount a conventional file system on devices that
+it uses; BlueStore reads and writes to the devices directly in a "raw" fashion.

-In the simplest case, BlueStore consumes a single (primary) storage device.
-The storage device is normally used as a whole, occupying the full device that
-is managed directly by BlueStore. This *primary device* is normally identified
-by a ``block`` symlink in the data directory.
+In the simplest case, BlueStore consumes all of a single storage device. This
+device is known as the *primary device*. The primary device is identified by
+the ``block`` symlink in the data directory.

-The data directory is a ``tmpfs`` mount which gets populated (at boot time, or
-when ``ceph-volume`` activates it) with all the common OSD files that hold
-information about the OSD, like: its identifier, which cluster it belongs to,
-and its private keyring.
+The data directory is a ``tmpfs`` mount. When this data directory is booted or
+activated by ``ceph-volume``, it is populated with metadata files and links
+that hold information about the OSD: for example, the OSD's identifier, the
+name of the cluster that the OSD belongs to, and the OSD's private keyring.

-It is also possible to deploy BlueStore across one or two additional devices:
+In more complicated cases, BlueStore is deployed across one or two additional
+devices:

-* A *write-ahead log (WAL) device* (identified as ``block.wal`` in the data directory) can be
-  used for BlueStore's internal journal or write-ahead log. It is only useful
-  to use a WAL device if the device is faster than the primary device (e.g.,
-  when it is on an SSD and the primary device is an HDD).
+* A *write-ahead log (WAL) device* (identified as ``block.wal`` in the data
+  directory) can be used to separate out BlueStore's internal journal or
+  write-ahead log. Using a WAL device is advantageous only if the WAL device
+  is faster than the primary device (for example, if the WAL device is an SSD
+  and the primary device is an HDD).
 * A *DB device* (identified as ``block.db`` in the data directory) can be used
-  for storing BlueStore's internal metadata.  BlueStore (or rather, the
-  embedded RocksDB) will put as much metadata as it can on the DB device to
-  improve performance.  If the DB device fills up, metadata will spill back
-  onto the primary device (where it would have been otherwise).  Again, it is
-  only helpful to provision a DB device if it is faster than the primary
-  device.
+  to store BlueStore's internal metadata. BlueStore (or more precisely, the
+  embedded RocksDB) will put as much metadata as it can on the DB device in
+  order to improve performance. If the DB device becomes full, metadata will
+  spill back onto the primary device (where it would have been located in the
+  absence of the DB device). Again, it is advantageous to provision a DB device
+  only if it is faster than the primary device.

-If there is only a small amount of fast storage available (e.g., less
-than a gigabyte), we recommend using it as a WAL device.  If there is
-more, provisioning a DB device makes more sense.  The BlueStore
-journal will always be placed on the fastest device available, so
-using a DB device will provide the same benefit that the WAL device
-would while *also* allowing additional metadata to be stored there (if
-it will fit).  This means that if a DB device is specified but an explicit
-WAL device is not, the WAL will be implicitly colocated with the DB on the faster
-device.
+If there is only a small amount of fast storage available (for example, less
+than a gigabyte), we recommend using the available space as a WAL device. But
+if more fast storage is available, it makes more sense to provision a DB
+device. Because the BlueStore journal is always placed on the fastest device
+available, using a DB device provides the same benefit that using a WAL device
+would, while *also* allowing additional metadata to be stored off the primary
+device (provided that it fits). DB devices make this possible because whenever
+a DB device is specified but an explicit WAL device is not, the WAL will be
+implicitly colocated with the DB on the faster device.

-A single-device (colocated) BlueStore OSD can be provisioned with:
+To provision a single-device (colocated) BlueStore OSD, run the following
+command:

 .. prompt:: bash $

   ceph-volume lvm prepare --bluestore --data <device>

-To specify a WAL device and/or DB device:
+To specify a WAL device or DB device, run the following command:
   
 .. prompt:: bash $

   ceph-volume lvm prepare --bluestore --data <device> --block.wal <wal-device> --block.db <db-device>

-.. note:: ``--data`` can be a Logical Volume using  *vg/lv* notation. Other
-          devices can be existing logical volumes or GPT partitions.
+.. note:: The option ``--data`` can take as its argument any of the the
+   following devices: logical volumes specified using *vg/lv* notation,
+   existing logical volumes, and GPT partitions.
+
+

 Provisioning strategies
 -----------------------
-Although there are multiple ways to deploy a BlueStore OSD (unlike Filestore
-which had just one), there are two common arrangements that should help clarify
-the deployment strategy:
+
+BlueStore differs from Filestore in that there are several ways to deploy a
+BlueStore OSD. However, the overall deployment strategy for BlueStore can be
+clarified by examining just these two common arrangements:

 .. _bluestore-single-type-device-config:

 **block (data) only**
 ^^^^^^^^^^^^^^^^^^^^^
-If all devices are the same type, for example all rotational drives, and
-there are no fast devices to use for metadata, it makes sense to specify the
-block device only and to not separate ``block.db`` or ``block.wal``. The
-:ref:`ceph-volume-lvm` command for a single ``/dev/sda`` device looks like:
+If all devices are of the same type (for example, they are all HDDs), and if
+there are no fast devices available for the storage of metadata, then it makes
+sense to specify the block device only and to leave ``block.db`` and
+``block.wal`` unseparated. The :ref:`ceph-volume-lvm` command for a single
+``/dev/sda`` device is as follows:

 .. prompt:: bash $

   ceph-volume lvm create --bluestore --data /dev/sda

-If logical volumes have already been created for each device, (a single LV
-using 100% of the device), then the :ref:`ceph-volume-lvm` call for an LV named
-``ceph-vg/block-lv`` would look like:
+If the devices to be used for a BlueStore OSD are pre-created logical volumes,
+then the :ref:`ceph-volume-lvm` call for an logical volume named
+``ceph-vg/block-lv`` is as follows:

 .. prompt:: bash $

@ -88,15 +99,18 @@ using 100% of the device), then the :ref:`ceph-volume-lvm` call for an LV named

 **block and block.db**
 ^^^^^^^^^^^^^^^^^^^^^^
-If you have a mix of fast and slow devices (SSD / NVMe and rotational),
-it is recommended to place ``block.db`` on the faster device while ``block``
-(data) lives on the slower (spinning drive).

-You must create these volume groups and logical volumes manually as 
-the ``ceph-volume`` tool is currently not able to do so automatically.
+If you have a mix of fast and slow devices (for example, SSD or HDD), then we
+recommend placing ``block.db`` on the faster device while ``block`` (that is,
+the data) is stored on the slower device (that is, the rotational drive).

-For the below example, let us assume four rotational (``sda``, ``sdb``, ``sdc``, and ``sdd``)
-and one (fast) solid state drive (``sdx``). First create the volume groups:
+You must create these volume groups and these logical volumes manually. as The
+``ceph-volume`` tool is currently unable to do so [create them?] automatically.
+
+The following procedure illustrates the manual creation of volume groups and
+logical volumes.  For this example, we shall assume four rotational drives
+(``sda``, ``sdb``, ``sdc``, and ``sdd``) and one (fast) SSD (``sdx``). First,
+to create the volume groups, run the following commands:

 .. prompt:: bash $

@ -105,7 +119,7 @@ and one (fast) solid state drive (``sdx``). First create the volume groups:
   vgcreate ceph-block-2 /dev/sdc
   vgcreate ceph-block-3 /dev/sdd

-Now create the logical volumes for ``block``:
+Next, to create the logical volumes for ``block``, run the following commands:

 .. prompt:: bash $

@ -114,8 +128,9 @@ Now create the logical volumes for ``block``:
   lvcreate -l 100%FREE -n block-2 ceph-block-2
   lvcreate -l 100%FREE -n block-3 ceph-block-3

-We are creating 4 OSDs for the four slow spinning devices, so assuming a 200GB
-SSD in ``/dev/sdx`` we will create 4 logical volumes, each of 50GB:
+Because there are four HDDs, there will be four OSDs. Supposing that there is a
+200GB SSD in ``/dev/sdx``, we can create four 50GB logical volumes by running
+the following commands:

 .. prompt:: bash $

@ -125,7 +140,7 @@ SSD in ``/dev/sdx`` we will create 4 logical volumes, each of 50GB:
   lvcreate -L 50GB -n db-2 ceph-db-0
   lvcreate -L 50GB -n db-3 ceph-db-0

-Finally, create the 4 OSDs with ``ceph-volume``:
+Finally, to create the four OSDs, run the following commands:

 .. prompt:: bash $

@ -134,149 +149,153 @@ Finally, create the 4 OSDs with ``ceph-volume``:
   ceph-volume lvm create --bluestore --data ceph-block-2/block-2 --block.db ceph-db-0/db-2
   ceph-volume lvm create --bluestore --data ceph-block-3/block-3 --block.db ceph-db-0/db-3

-These operations should end up creating four OSDs, with ``block`` on the slower
-rotational drives with a 50 GB logical volume (DB) for each on the solid state
-drive.
+After this procedure is finished, there should be four OSDs, ``block`` should
+be on the four HDDs, and each HDD should have a 50GB logical volume
+(specifically, a DB device) on the shared SSD.

 Sizing
 ======
-When using a :ref:`mixed spinning and solid drive setup
-<bluestore-mixed-device-config>` it is important to make a large enough
-``block.db`` logical volume for BlueStore. Generally, ``block.db`` should have
-*as large as possible* logical volumes.
+When using a :ref:`mixed spinning-and-solid-drive setup
+<bluestore-mixed-device-config>`, it is important to make a large enough
+``block.db`` logical volume for BlueStore. The logical volumes associated with
+``block.db`` should have logical volumes that are *as large as possible*.

-The general recommendation is to have ``block.db`` size in between 1% to 4%
-of ``block`` size. For RGW workloads, it is recommended that the ``block.db``
-size isn't smaller than 4% of ``block``, because RGW heavily uses it to store
-metadata (omap keys). For example, if the ``block`` size is 1TB, then ``block.db`` shouldn't
-be less than 40GB. For RBD workloads, 1% to 2% of ``block`` size is usually enough.
+It is generally recommended that the size of ``block.db`` be somewhere between
+1% and 4% of the size of ``block``. For RGW workloads, it is recommended that
+the ``block.db`` be at least 4% of the ``block`` size, because RGW makes heavy
+use of ``block.db`` to store metadata (in particular, omap keys). For example,
+if the ``block`` size is 1TB, then ``block.db`` should have a size of at least
+40GB. For RBD workloads, however, ``block.db`` usually needs no more than 1% to
+2% of the ``block`` size.

-In older releases, internal level sizes mean that the DB can fully utilize only
-specific partition / LV sizes that correspond to sums of L0, L0+L1, L1+L2,
-etc. sizes, which with default settings means roughly 3 GB, 30 GB, 300 GB, and
-so forth.  Most deployments will not substantially benefit from sizing to
-accommodate L3 and higher, though DB compaction can be facilitated by doubling
-these figures to 6GB, 60GB, and 600GB.
+In older releases, internal level sizes are such that the DB can fully utilize
+only those specific partition / logical volume sizes that correspond to sums of
+L0, L0+L1, L1+L2, and so on--that is, given default settings, sizes of roughly
+3GB, 30GB, 300GB, and so on. Most deployments do not substantially benefit from
+sizing that accommodates L3 and higher, though DB compaction can be facilitated
+by doubling these figures to 6GB, 60GB, and 600GB.

-Improvements in releases beginning with Nautilus 14.2.12 and Octopus 15.2.6
-enable better utilization of arbitrary DB device sizes, and the Pacific
-release brings experimental dynamic level support.  Users of older releases may
-thus wish to plan ahead by provisioning larger DB devices today so that their
-benefits may be realized with future upgrades.
-
-When *not* using a mix of fast and slow devices, it isn't required to create
-separate logical volumes for ``block.db`` (or ``block.wal``). BlueStore will
-automatically colocate these within the space of ``block``.
+Improvements in Nautilus 14.2.12, Octopus 15.2.6, and subsequent releases allow
+for better utilization of arbitrarily-sized DB devices. Moreover, the Pacific
+release brings experimental dynamic-level support. Because of these advances,
+users of older releases might want to plan ahead by provisioning larger DB
+devices today so that the benefits of scale can be realized when upgrades are
+made in the future.

+When *not* using a mix of fast and slow devices, there is no requirement to
+create separate logical volumes for ``block.db`` or ``block.wal``. BlueStore
+will automatically colocate these devices within the space of ``block``.

 Automatic Cache Sizing
 ======================

-BlueStore can be configured to automatically resize its caches when TCMalloc
-is configured as the memory allocator and the ``bluestore_cache_autotune``
-setting is enabled.  This option is currently enabled by default.  BlueStore
-will attempt to keep OSD heap memory usage under a designated target size via
-the ``osd_memory_target`` configuration option.  This is a best effort
-algorithm and caches will not shrink smaller than the amount specified by
-``osd_memory_cache_min``.  Cache ratios will be chosen based on a hierarchy
-of priorities.  If priority information is not available, the
-``bluestore_cache_meta_ratio`` and ``bluestore_cache_kv_ratio`` options are
-used as fallbacks.
+BlueStore can be configured to automatically resize its caches, provided that
+certain conditions are met: TCMalloc must be configured as the memory allocator
+and the ``bluestore_cache_autotune`` configuration option must be enabled (note
+that it is currently enabled by default). When automatic cache sizing is in
+effect, BlueStore attempts to keep OSD heap-memory usage under a certain target
+size (as determined by ``osd_memory_target``). This approach makes use of a
+best-effort algorithm and caches do not shrink smaller than the size defined by
+the value of ``osd_memory_cache_min``. Cache ratios are selected in accordance
+with a hierarchy of priorities.  But if priority information is not available,
+the values specified in the ``bluestore_cache_meta_ratio`` and
+``bluestore_cache_kv_ratio`` options are used as fallback cache ratios.
+

 Manual Cache Sizing
 ===================

-The amount of memory consumed by each OSD for BlueStore caches is
-determined by the ``bluestore_cache_size`` configuration option.  If
-that config option is not set (i.e., remains at 0), there is a
-different default value that is used depending on whether an HDD or
-SSD is used for the primary device (set by the
-``bluestore_cache_size_ssd`` and ``bluestore_cache_size_hdd`` config
-options).
+The amount of memory consumed by each OSD to be used for its BlueStore cache is
+determined by the ``bluestore_cache_size`` configuration option. If that option
+has not been specified (that is, if it remains at 0), then Ceph uses a
+different configuration option to determine the default memory budget:
+``bluestore_cache_size_hdd`` if the primary device is an HDD, or
+``bluestore_cache_size_ssd`` if the primary device is an SSD.

-BlueStore and the rest of the Ceph OSD daemon do the best they can
-to work within this memory budget.  Note that on top of the configured
-cache size, there is also memory consumed by the OSD itself, and
-some additional utilization due to memory fragmentation and other
-allocator overhead.
+BlueStore and the rest of the Ceph OSD daemon make every effort to work within
+this memory budget. Note that in addition to the configured cache size, there
+is also memory consumed by the OSD itself. There is additional utilization due
+to memory fragmentation and other allocator overhead. 

-The configured cache memory budget can be used in a few different ways:
+The configured cache-memory budget can be used to store the following types of
+things:

-* Key/Value metadata (i.e., RocksDB's internal cache)
+* Key/Value metadata (that is, RocksDB's internal cache)
 * BlueStore metadata
-* BlueStore data (i.e., recently read or written object data)
+* BlueStore data (that is, recently read or recently written object data)

-Cache memory usage is governed by the following options:
-``bluestore_cache_meta_ratio`` and ``bluestore_cache_kv_ratio``.
-The fraction of the cache devoted to data
-is governed by the effective bluestore cache size (depending on
-``bluestore_cache_size[_ssd|_hdd]`` settings and the device class of the primary
-device) as well as the meta and kv ratios.
-The data fraction can be calculated by
-``<effective_cache_size> * (1 - bluestore_cache_meta_ratio - bluestore_cache_kv_ratio)``
+Cache memory usage is governed by the configuration options
+``bluestore_cache_meta_ratio`` and ``bluestore_cache_kv_ratio``.  The fraction
+of the cache that is reserved for data is governed by both the effective
+BlueStore cache size (which depends on the relevant
+``bluestore_cache_size[_ssd|_hdd]`` option and the device class of the primary
+device) and the "meta" and "kv" ratios.  This data fraction can be calculated
+with the following formula: ``<effective_cache_size> * (1 -
+bluestore_cache_meta_ratio - bluestore_cache_kv_ratio)``.

 Checksums
 =========

-BlueStore checksums all metadata and data written to disk.  Metadata
-checksumming is handled by RocksDB and uses `crc32c`. Data
-checksumming is done by BlueStore and can make use of `crc32c`,
-`xxhash32`, or `xxhash64`.  The default is `crc32c` and should be
-suitable for most purposes.
+BlueStore checksums all metadata and all data written to disk. Metadata
+checksumming is handled by RocksDB and uses the `crc32c` algorithm. By
+contrast, data checksumming is handled by BlueStore and can use either
+`crc32c`, `xxhash32`, or `xxhash64`. Nonetheless, `crc32c` is the default
+checksum algorithm and it is suitable for most purposes.

-Full data checksumming does increase the amount of metadata that
-BlueStore must store and manage.  When possible, e.g., when clients
-hint that data is written and read sequentially, BlueStore will
-checksum larger blocks, but in many cases it must store a checksum
-value (usually 4 bytes) for every 4 kilobyte block of data.
+Full data checksumming increases the amount of metadata that BlueStore must
+store and manage. Whenever possible (for example, when clients hint that data
+is written and read sequentially), BlueStore will checksum larger blocks. In
+many cases, however, it must store a checksum value (usually 4 bytes) for every
+4 KB block of data.

-It is possible to use a smaller checksum value by truncating the
-checksum to two or one byte, reducing the metadata overhead.  The
-trade-off is that the probability that a random error will not be
-detected is higher with a smaller checksum, going from about one in
-four billion with a 32-bit (4 byte) checksum to one in 65,536 for a
-16-bit (2 byte) checksum or one in 256 for an 8-bit (1 byte) checksum.
-The smaller checksum values can be used by selecting `crc32c_16` or
-`crc32c_8` as the checksum algorithm.
+It is possible to obtain a smaller checksum value by truncating the checksum to
+one or two bytes and reducing the metadata overhead.  A drawback of this
+approach is that it increases the probability of a random error going
+undetected: about one in four billion given a 32-bit (4 byte) checksum, 1 in
+65,536 given a 16-bit (2 byte) checksum, and 1 in 256 given an 8-bit (1 byte)
+checksum. To use the smaller checksum values, select `crc32c_16` or `crc32c_8`
+as the checksum algorithm.

-The *checksum algorithm* can be set either via a per-pool
-``csum_type`` property or the global config option.  For example:
+The *checksum algorithm* can be specified either via a per-pool ``csum_type``
+configuration option or via the global configuration option. For example:

 .. prompt:: bash $

   ceph osd pool set <pool-name> csum_type <algorithm>

+
 Inline Compression
 ==================

-BlueStore supports inline compression using `snappy`, `zlib`, or
-`lz4`. Please note that the `lz4` compression plugin is not
-distributed in the official release.
+BlueStore supports inline compression using `snappy`, `zlib`, `lz4`, or `zstd`. 

-Whether data in BlueStore is compressed is determined by a combination
-of the *compression mode* and any hints associated with a write
-operation.  The modes are:
+Whether data in BlueStore is compressed is determined by two factors: (1) the
+*compression mode* and (2) any client hints associated with a write operation.
+The compression modes are as follows:

 * **none**: Never compress data.
 * **passive**: Do not compress data unless the write operation has a
  *compressible* hint set.
-* **aggressive**: Compress data unless the write operation has an
+* **aggressive**: Do compress data unless the write operation has an
  *incompressible* hint set.
 * **force**: Try to compress data no matter what.

-For more information about the *compressible* and *incompressible* IO
-hints, see :c:func:`rados_set_alloc_hint`.
+For more information about the *compressible* and *incompressible* I/O hints,
+see :c:func:`rados_set_alloc_hint`.

-Note that regardless of the mode, if the size of the data chunk is not
-reduced sufficiently it will not be used and the original
-(uncompressed) data will be stored.  For example, if the ``bluestore
-compression required ratio`` is set to ``.7`` then the compressed data
-must be 70% of the size of the original (or smaller).
+Note that data in Bluestore will be compressed only if the data chunk will be
+sufficiently reduced in size (as determined by the ``bluestore compression
+required ratio`` setting). No matter which compression modes have been used, if
+the data chunk is too big, then it will be discarded and the original
+(uncompressed) data will be stored instead. For example, if ``bluestore
+compression required ratio`` is set to ``.7``, then data compression will take
+place only if the size of the compressed data is no more than 70% of the size
+of the original data.

-The *compression mode*, *compression algorithm*, *compression required
-ratio*, *min blob size*, and *max blob size* can be set either via a
-per-pool property or a global config option.  Pool properties can be
-set with:
+The *compression mode*, *compression algorithm*, *compression required ratio*,
+*min blob size*, and *max blob size* settings can be specified either via a
+per-pool property or via a global config option. To specify pool properties,
+run the following commands:

 .. prompt:: bash $

@ -291,192 +310,202 @@ set with:
 RocksDB Sharding
 ================

-Internally BlueStore uses multiple types of key-value data,
-stored in RocksDB.  Each data type in BlueStore is assigned a
-unique prefix. Until Pacific all key-value data was stored in
-single RocksDB column family: 'default'.  Since Pacific,
-BlueStore can divide this data into multiple RocksDB column
-families. When keys have similar access frequency, modification
-frequency and lifetime, BlueStore benefits from better caching
-and more precise compaction. This improves performance, and also
-requires less disk space during compaction, since each column
-family is smaller and can compact independent of others.
+BlueStore maintains several types of internal key-value data, all of which are
+stored in RocksDB. Each data type in BlueStore is assigned a unique prefix.
+Prior to the Pacific release, all key-value data was stored in a single RocksDB
+column family: 'default'. In Pacific and later releases, however, BlueStore can
+divide key-value data into several RocksDB column families. BlueStore achieves
+better caching and more precise compaction when keys are similar: specifically,
+when keys have similar access frequency, similar modification frequency, and a
+similar lifetime.  Under such conditions, performance is improved and less disk
+space is required during compaction (because each column family is smaller and
+is able to compact independently of the others).

-OSDs deployed in Pacific or later use RocksDB sharding by default.
-If Ceph is upgraded to Pacific from a previous version, sharding is off.
+OSDs deployed in Pacific or later releases use RocksDB sharding by default.
+However, if Ceph has been upgraded to Pacific or a later version from a
+previous version, sharding is disabled on any OSDs that were created before
+Pacific.

-To enable sharding and apply the Pacific defaults, stop an OSD and run
+To enable sharding and apply the Pacific defaults to a specific OSD, stop the
+OSD and run the following command:

    .. prompt:: bash #

-      ceph-bluestore-tool \
+       ceph-bluestore-tool \
        --path <data path> \
-        --sharding="m(3) p(3,0-12) O(3,0-13)=block_cache={type=binned_lru} L P" \
+        --sharding="m(3) p(3,0-12) o(3,0-13)=block_cache={type=binned_lru} l p" \
        reshard


-Throttling
+SPDK Usage
 ==========

-SPDK Usage
-==================
-
-If you want to use the SPDK driver for NVMe devices, you must prepare your system.
-Refer to `SPDK document`__ for more details.
+To use the SPDK driver for NVMe devices, you must first prepare your system.
+See `SPDK document`__.

 .. __: http://www.spdk.io/doc/getting_started.html#getting_started_examples

-SPDK offers a script to configure the device automatically. Users can run the
-script as root:
+SPDK offers a script that will configure the device automatically. Run this
+script with root permissions:

 .. prompt:: bash $

   sudo src/spdk/scripts/setup.sh

-You will need to specify the subject NVMe device's device selector with
-the "spdk:" prefix for ``bluestore_block_path``.
+You will need to specify the subject NVMe device's device selector with the
+"spdk:" prefix for ``bluestore_block_path``.

-For example, you can find the device selector of an Intel PCIe SSD with:
+In the following example, you first find the device selector of an Intel NVMe
+SSD by running the following command:

 .. prompt:: bash $

-   lspci -mm -n -D -d 8086:0953
+   lspci -mm -n -d -d 8086:0953

-The device selector always has the form of ``DDDD:BB:DD.FF`` or ``DDDD.BB.DD.FF``.
+The form of the device selector is either ``DDDD:BB:DD.FF`` or
+``DDDD.BB.DD.FF``.

-and then set::
+Next, supposing that ``0000:01:00.0`` is the device selector found in the
+output of the ``lspci`` command, you can specify the device selector by running
+the following command::

-  bluestore_block_path = "spdk:trtype:PCIe traddr:0000:01:00.0"
+  bluestore_block_path = "spdk:trtype:pcie traddr:0000:01:00.0"

-Where ``0000:01:00.0`` is the device selector found in the output of ``lspci``
-command above.
-
-You may also specify a remote NVMeoF target over the TCP transport as in the
+You may also specify a remote NVMeoF target over the TCP transport, as in the
 following example::

-  bluestore_block_path = "spdk:trtype:TCP traddr:10.67.110.197 trsvcid:4420 subnqn:nqn.2019-02.io.spdk:cnode1"
+  bluestore_block_path = "spdk:trtype:tcp traddr:10.67.110.197 trsvcid:4420 subnqn:nqn.2019-02.io.spdk:cnode1"

-To run multiple SPDK instances per node, you must specify the
-amount of dpdk memory in MB that each instance will use, to make sure each
-instance uses its own DPDK memory.
+To run multiple SPDK instances per node, you must make sure each instance uses
+its own DPDK memory by specifying for each instance the amount of DPDK memory
+(in MB) that the instance will use.

-In most cases, a single device can be used for data, DB, and WAL.  We describe
+In most cases, a single device can be used for data, DB, and WAL. We describe
 this strategy as *colocating* these components. Be sure to enter the below
-settings to ensure that all IOs are issued through SPDK.::
+settings to ensure that all I/Os are issued through SPDK::

  bluestore_block_db_path = ""
  bluestore_block_db_size = 0
  bluestore_block_wal_path = ""
  bluestore_block_wal_size = 0

-Otherwise, the current implementation will populate the SPDK map files with
-kernel file system symbols and will use the kernel driver to issue DB/WAL IO.
+If these settings are not entered, then the current implementation will
+populate the SPDK map files with kernel file system symbols and will use the
+kernel driver to issue DB/WAL I/Os.

 Minimum Allocation Size
-========================
+=======================

-There is a configured minimum amount of storage that BlueStore will allocate on
-an OSD.  In practice, this is the least amount of capacity that a RADOS object
-can consume.  The value of `bluestore_min_alloc_size` is derived from the
-value of `bluestore_min_alloc_size_hdd` or `bluestore_min_alloc_size_ssd`
-depending on the OSD's ``rotational`` attribute.  This means that when an OSD
-is created on an HDD, BlueStore will be initialized with the current value
-of `bluestore_min_alloc_size_hdd`, and SSD OSDs (including NVMe devices)
-with the value of `bluestore_min_alloc_size_ssd`.
+There is a configured minimum amount of storage that BlueStore allocates on an
+underlying storage device. In practice, this is the least amount of capacity
+that even a tiny RADOS object can consume on each OSD's primary device. The
+configuration option in question-- ``bluestore_min_alloc_size`` --derives
+its value from the value of either ``bluestore_min_alloc_size_hdd`` or
+``bluestore_min_alloc_size_ssd``, depending on the OSD's ``rotational``
+attribute. Thus if an OSD is created on an HDD, BlueStore is initialized with
+the current value of ``bluestore_min_alloc_size_hdd``; but with SSD OSDs
+(including NVMe devices), Bluestore is initialized with the current value of
+``bluestore_min_alloc_size_ssd``.

-Through the Mimic release, the default values were 64KB and 16KB for rotational
-(HDD) and non-rotational (SSD) media respectively.  Octopus changed the default
-for SSD (non-rotational) media to 4KB, and Pacific changed the default for HDD
-(rotational) media to 4KB as well.
+In Mimic and earlier releases, the default values were 64KB for rotational
+media (HDD) and 16KB for non-rotational media (SSD). The Octopus release
+changed the the default value for non-rotational media (SSD) to 4KB, and the
+Pacific release changed the default value for rotational media (HDD) to 4KB.

-These changes were driven by space amplification experienced by Ceph RADOS
-GateWay (RGW) deployments that host large numbers of small files
+These changes were driven by space amplification that was experienced by Ceph
+RADOS GateWay (RGW) deployments that hosted large numbers of small files
 (S3/Swift objects).

-For example, when an RGW client stores a 1KB S3 object, it is written to a
-single RADOS object.  With the default `min_alloc_size` value, 4KB of
-underlying drive space is allocated.  This means that roughly
-(4KB - 1KB) == 3KB is allocated but never used, which corresponds to 300%
-overhead or 25% efficiency. Similarly, a 5KB user object will be stored
-as one 4KB and one 1KB RADOS object, again stranding 4KB of device capcity,
-though in this case the overhead is a much smaller percentage.  Think of this
-in terms of the remainder from a modulus operation. The overhead *percentage*
-thus decreases rapidly as user object size increases.
+For example, when an RGW client stores a 1 KB S3 object, that object is written
+to a single RADOS object. In accordance with the default
+``min_alloc_size`` value, 4 KB of underlying drive space is allocated.
+This means that roughly 3 KB (that is, 4 KB minus 1 KB) is allocated but never
+used: this corresponds to 300% overhead or 25% efficiency. Similarly, a 5 KB
+user object will be stored as two RADOS objects, a 4 KB RADOS object and a 1 KB
+RADOS object, with the result that 4KB of device capacity is stranded. In this
+case, however, the overhead percentage is much smaller. Think of this in terms
+of the remainder from a modulus operation. The overhead *percentage* thus
+decreases rapidly as object size increases.

-An easily missed additional subtlety is that this
-takes place for *each* replica.  So when using the default three copies of
-data (3R), a 1KB S3 object actually consumes roughly 9KB of storage device
-capacity.  If erasure coding (EC) is used instead of replication, the
-amplification may be even higher: for a ``k=4,m=2`` pool, our 1KB S3 object
-will allocate (6 * 4KB) = 24KB of device capacity.
+There is an additional subtlety that is easily missed: the amplification
+phenomenon just described takes place for *each* replica. For example, when
+using the default of three copies of data (3R), a 1 KB S3 object actually
+strands roughly 9 KB of storage device capacity. If erasure coding (EC) is used
+instead of replication, the amplification might be even higher: for a ``k=4,
+m=2`` pool, our 1 KB S3 object allocates 24 KB (that is, 4 KB multiplied by 6)
+of device capacity.

 When an RGW bucket pool contains many relatively large user objects, the effect
-of this phenomenon is often negligible, but should be considered for deployments
-that expect a signficiant fraction of relatively small objects.
+of this phenomenon is often negligible. However, with deployments that can
+expect a significant fraction of relatively small user objects, the effect
+should be taken into consideration.

-The 4KB default value aligns well with conventional HDD and SSD devices.  Some
-new coarse-IU (Indirection Unit) QLC SSDs however perform and wear best
-when `bluestore_min_alloc_size_ssd`
-is set at OSD creation to match the device's IU:. 8KB, 16KB, or even 64KB.
-These novel storage drives allow one to achieve read performance competitive
-with conventional TLC SSDs and write performance faster than HDDs, with
-high density and lower cost than TLC SSDs.
+The 4KB default value aligns well with conventional HDD and SSD devices.
+However, certain novel coarse-IU (Indirection Unit) QLC SSDs perform and wear
+best when ``bluestore_min_alloc_size_ssd`` is specified at OSD creation
+to match the device's IU: this might be 8KB, 16KB, or even 64KB.  These novel
+storage drives can achieve read performance that is competitive with that of
+conventional TLC SSDs and write performance that is faster than that of HDDs,
+with higher density and lower cost than TLC SSDs.

-Note that when creating OSDs on these devices, one must carefully apply the
-non-default value only to appropriate devices, and not to conventional SSD and
-HDD devices.  This may be done through careful ordering of OSD creation, custom
-OSD device classes, and especially by the use of central configuration _masks_.
+Note that when creating OSDs on these novel devices, one must be careful to
+apply the non-default value only to appropriate devices, and not to
+conventional HDD and SSD devices. Error can be avoided through careful ordering
+of OSD creation, with custom OSD device classes, and especially by the use of
+central configuration *masks*.

-Quincy and later releases add
-the `bluestore_use_optimal_io_size_for_min_alloc_size`
-option that enables automatic discovery of the appropriate value as each OSD is
-created.  Note that the use of ``bcache``, ``OpenCAS``, ``dmcrypt``,
-``ATA over Ethernet``, `iSCSI`, or other device layering / abstraction
-technologies may confound the determination of appropriate values. OSDs
-deployed on top of VMware storage have been reported to also
-sometimes report a ``rotational`` attribute that does not match the underlying
-hardware.
+In Quincy and later releases, you can use the
+``bluestore_use_optimal_io_size_for_min_alloc_size`` option to allow
+automatic discovery of the correct value as each OSD is created. Note that the
+use of ``bcache``, ``OpenCAS``, ``dmcrypt``, ``ATA over Ethernet``, `iSCSI`, or
+other device-layering and abstraction technologies might confound the
+determination of correct values. Moreover, OSDs deployed on top of VMware
+storage have sometimes been found to report a ``rotational`` attribute that
+does not match the underlying hardware.

-We suggest inspecting such OSDs at startup via logs and admin sockets to ensure that
-behavior is appropriate.  Note that this also may not work as desired with
-older kernels.  You can check for this by examining the presence and value
-of ``/sys/block/<drive>/queue/optimal_io_size``.
+We suggest inspecting such OSDs at startup via logs and admin sockets in order
+to ensure that their behavior is correct. Be aware that this kind of inspection
+might not work as expected with older kernels.  To check for this issue,
+examine the presence and value of ``/sys/block/<drive>/queue/optimal_io_size``.

-You may also inspect a given OSD:
+.. note:: When running Reef or a later Ceph release, the ``min_alloc_size``
+   baked into each OSD is conveniently reported by ``ceph osd metadata``.
+
+To inspect a specific OSD, run the following command:

 .. prompt:: bash #

-   ceph osd metadata osd.1701 | grep rotational
+   ceph osd metadata osd.1701 | egrep rotational\|alloc

-This space amplification may manifest as an unusually high ratio of raw to
-stored data reported by ``ceph df``.  ``ceph osd df`` may also report
-anomalously high ``%USE`` / ``VAR`` values when
-compared to other, ostensibly identical OSDs.  A pool using OSDs with
-mismatched ``min_alloc_size`` values may experience unexpected balancer
-behavior as well.
+This space amplification might manifest as an unusually high ratio of raw to
+stored data as reported by ``ceph df``. There might also be ``%USE`` / ``VAR``
+values reported by ``ceph osd df`` that are unusually high in comparison to
+other, ostensibly identical, OSDs. Finally, there might be unexpected balancer
+behavior in pools that use OSDs that have mismatched ``min_alloc_size`` values.

-Note that this BlueStore attribute takes effect *only* at OSD creation; if
-changed later, a given OSD's behavior will not change unless / until it is
-destroyed and redeployed with the appropriate option value(s).  Upgrading
-to a later Ceph release will *not* change the value used by OSDs deployed
-under older releases or with other settings.
+This BlueStore attribute takes effect *only* at OSD creation; if the attribute
+is changed later, a specific OSD's behavior will not change unless and until
+the OSD is destroyed and redeployed with the appropriate option value(s).
+Upgrading to a later Ceph release will *not* change the value used by OSDs that
+were deployed under older releases or with other settings.

-DSA (Data Streaming Accelerator Usage)
+DSA (Data Streaming Accelerator) Usage
 ======================================

-If you want to use the DML library to drive DSA device for offloading
-read/write operations on Persist memory in Bluestore. You need to install
-`DML`_ and `idxd-config`_ library in your machine with SPR (Sapphire Rapids) CPU.
+If you want to use the DML library to drive the DSA device for offloading
+read/write operations on persistent memory (PMEM) in BlueStore, you need to
+install `DML`_ and the `idxd-config`_ library. This will work only on machines
+that have a SPR (Sapphire Rapids) CPU.

-.. _DML: https://github.com/intel/DML
+.. _dml: https://github.com/intel/dml
 .. _idxd-config: https://github.com/intel/idxd-config

-After installing the DML software, you need to configure the shared
-work queues (WQs) with the following WQ configuration example via accel-config tool:
+After installing the DML software, configure the shared work queues (WQs) with
+reference to the following WQ configuration example:

 .. prompt:: bash $

-   accel-config config-wq --group-id=1 --mode=shared --wq-size=16 --threshold=15 --type=user --name="MyApp1" --priority=10 --block-on-fault=1 dsa0/wq0.1
+   accel-config config-wq --group-id=1 --mode=shared --wq-size=16 --threshold=15 --type=user --name="myapp1" --priority=10 --block-on-fault=1 dsa0/wq0.1
   accel-config config-engine dsa0/engine0.1 --group-id=1
   accel-config enable-device dsa0
   accel-config enable-wq dsa0/wq0.1
--- a/ceph/doc/rados/configuration/common.rst
+++ b/ceph/doc/rados/configuration/common.rst
@ -218,4 +218,4 @@ If you need to allow multiple clusters to exist on the same host, use
 .. _Hardware Recommendations: ../../../start/hardware-recommendations
 .. _Network Configuration Reference: ../network-config-ref
 .. _OSD Config Reference: ../osd-config-ref
-.. _Configuring Monitor/OSD Interaction: ../mon-osd-interactio
+.. _Configuring Monitor/OSD Interaction: ../mon-osd-interaction
--- a/ceph/doc/rados/configuration/filestore-config-ref.rst
+++ b/ceph/doc/rados/configuration/filestore-config-ref.rst
@ -2,8 +2,14 @@
 Filestore Config Reference
 ============================

-The Filestore back end is no longer the default when creating new OSDs,
-though Filestore OSDs are still supported.
+.. note:: Since the Luminous release of Ceph, Filestore has not been Ceph's
+   default storage back end. Since the Luminous release of Ceph, BlueStore has
+   been Ceph's default storage back end.  However, Filestore OSDs are still
+   supported. See :ref:`OSD Back Ends
+   <rados_config_storage_devices_osd_backends>`. See :ref:`BlueStore Migration
+   <rados_operations_bluestore_migration>` for instructions explaining how to
+   replace an existing Filestore back end with a BlueStore back end.
+

 ``filestore debug omap check``

@ -18,26 +24,31 @@ though Filestore OSDs are still supported.
 Extended Attributes
 ===================

-Extended Attributes (XATTRs) are important for Filestore OSDs.
-Some file systems have limits on the number of bytes that can be stored in XATTRs. 
-Additionally, in some cases, the file system may not be as fast as an alternative
-method of storing XATTRs. The following settings may help improve performance
-by using a method of storing XATTRs that is extrinsic to the underlying file system.
+Extended Attributes (XATTRs) are important for Filestore OSDs. However, Certain
+disadvantages can occur when the underlying file system is used for the storage
+of XATTRs: some file systems have limits on the number of bytes that can be
+stored in XATTRs, and your file system might in some cases therefore run slower
+than would an alternative method of storing XATTRs. For this reason, a method
+of storing XATTRs extrinsic to the underlying file system might improve
+performance. To implement such an extrinsic method, refer to the following
+settings.

-Ceph XATTRs are stored as ``inline xattr``, using the XATTRs provided
-by the underlying file system, if it does not impose a size limit. If
-there is a size limit (4KB total on ext4, for instance), some Ceph
-XATTRs will be stored in a key/value database when either the
+If the underlying file system has no size limit, then Ceph XATTRs are stored as
+``inline xattr``, using the XATTRs provided by the file system. But if there is
+a size limit (for example, ext4 imposes a limit of 4 KB total), then some Ceph
+XATTRs will be stored in a key/value database when the limit is reached. More
+precisely, this begins to occur when either the
 ``filestore_max_inline_xattr_size`` or ``filestore_max_inline_xattrs``
 threshold is reached.


 ``filestore_max_inline_xattr_size``

-:Description: The maximum size of an XATTR stored in the file system (i.e., XFS,
-              Btrfs, EXT4, etc.) per object. Should not be larger than the
-              file system can handle. Default value of 0 means to use the value
-              specific to the underlying file system.
+:Description: Defines the maximum size per object of an XATTR that can be
+              stored in the file system (for example, XFS, Btrfs, ext4). The
+              specified size should not be larger than the file system can
+              handle. Using the default value of 0 instructs Filestore to use
+              the value specific to the file system.
 :Type: Unsigned 32-bit Integer
 :Required: No
 :Default: ``0``
@ -45,8 +56,9 @@ threshold is reached.

 ``filestore_max_inline_xattr_size_xfs``

-:Description: The maximum size of an XATTR stored in the XFS file system.
-              Only used if ``filestore_max_inline_xattr_size`` == 0.
+:Description: Defines the maximum size of an XATTR that can be stored in the
+              XFS file system.  This setting is used only if
+              ``filestore_max_inline_xattr_size`` == 0.
 :Type: Unsigned 32-bit Integer
 :Required: No
 :Default: ``65536``
@ -54,8 +66,9 @@ threshold is reached.

 ``filestore_max_inline_xattr_size_btrfs``

-:Description: The maximum size of an XATTR stored in the Btrfs file system.
-              Only used if ``filestore_max_inline_xattr_size`` == 0.
+:Description: Defines the maximum size of an XATTR that can be stored in the
+              Btrfs file system.  This setting is used only if
+              ``filestore_max_inline_xattr_size`` == 0.
 :Type: Unsigned 32-bit Integer
 :Required: No
 :Default: ``2048``
@ -63,8 +76,8 @@ threshold is reached.

 ``filestore_max_inline_xattr_size_other``

-:Description: The maximum size of an XATTR stored in other file systems.
-              Only used if ``filestore_max_inline_xattr_size`` == 0.
+:Description: Defines the maximum size of an XATTR that can be stored in other file systems.
+              This setting is used only if ``filestore_max_inline_xattr_size`` == 0.
 :Type: Unsigned 32-bit Integer
 :Required: No
 :Default: ``512``
@ -72,9 +85,8 @@ threshold is reached.

 ``filestore_max_inline_xattrs``

-:Description: The maximum number of XATTRs stored in the file system per object.
-              Default value of 0 means to use the value specific to the
-              underlying file system.
+:Description: Defines the maximum number of XATTRs per object that can be stored in the file system.
+              Using the default value of 0 instructs Filestore to use the value specific to the file system.
 :Type: 32-bit Integer
 :Required: No
 :Default: ``0``
@ -82,8 +94,8 @@ threshold is reached.

 ``filestore_max_inline_xattrs_xfs``

-:Description: The maximum number of XATTRs stored in the XFS file system per object.
-              Only used if ``filestore_max_inline_xattrs`` == 0.
+:Description: Defines the maximum number of XATTRs per object that can be stored in the XFS file system.
+              This setting is used only if ``filestore_max_inline_xattrs`` == 0.
 :Type: 32-bit Integer
 :Required: No
 :Default: ``10``
@ -91,8 +103,8 @@ threshold is reached.

 ``filestore_max_inline_xattrs_btrfs``

-:Description: The maximum number of XATTRs stored in the Btrfs file system per object.
-              Only used if ``filestore_max_inline_xattrs`` == 0.
+:Description: Defines the maximum number of XATTRs per object that can be stored in the Btrfs file system.
+              This setting is used only if ``filestore_max_inline_xattrs`` == 0.
 :Type: 32-bit Integer
 :Required: No
 :Default: ``10``
@ -100,8 +112,8 @@ threshold is reached.

 ``filestore_max_inline_xattrs_other``

-:Description: The maximum number of XATTRs stored in other file systems per object.
-              Only used if ``filestore_max_inline_xattrs`` == 0.
+:Description: Defines the maximum number of XATTRs per object that can be stored in other file systems.
+              This setting is used only if ``filestore_max_inline_xattrs`` == 0.
 :Type: 32-bit Integer
 :Required: No
 :Default: ``2``
@ -111,18 +123,19 @@ threshold is reached.
 Synchronization Intervals
 =========================

-Filestore needs to periodically quiesce writes and synchronize the
-file system, which creates a consistent commit point. It can then free journal
-entries up to the commit point. Synchronizing more frequently tends to reduce
-the time required to perform synchronization, and reduces the amount of data
-that needs to remain in the  journal. Less frequent synchronization allows the
-backing file system to coalesce small writes and metadata updates more
-optimally, potentially resulting in more efficient synchronization at the
-expense of potentially increasing tail latency.
+Filestore must periodically quiesce writes and synchronize the file system.
+Each synchronization creates a consistent commit point. When the commit point
+is created, Filestore is able to free all journal entries up to that point.
+More-frequent synchronization tends to reduce both synchronization time and
+the amount of data that needs to remain in the journal. Less-frequent
+synchronization allows the backing file system to coalesce small writes and
+metadata updates, potentially increasing synchronization
+efficiency but also potentially increasing tail latency.
+

 ``filestore_max_sync_interval``

-:Description: The maximum interval in seconds for synchronizing Filestore.
+:Description: Defines the maximum interval (in seconds) for synchronizing Filestore.
 :Type: Double
 :Required: No
 :Default: ``5``
@ -130,7 +143,7 @@ expense of potentially increasing tail latency.

 ``filestore_min_sync_interval``

-:Description: The minimum interval in seconds for synchronizing Filestore.
+:Description: Defines the minimum interval (in seconds) for synchronizing Filestore.
 :Type: Double
 :Required: No
 :Default: ``.01``
@ -142,14 +155,14 @@ Flusher
 =======

 The Filestore flusher forces data from large writes to be written out using
-``sync_file_range`` before the sync in order to (hopefully) reduce the cost of
-the eventual sync. In practice, disabling 'filestore_flusher' seems to improve
-performance in some cases.
+``sync_file_range`` prior to the synchronization.
+Ideally, this action reduces the cost of the eventual synchronization. In practice, however, disabling
+'filestore_flusher' seems in some cases to improve performance.


 ``filestore_flusher``

-:Description: Enables the filestore flusher.
+:Description: Enables the Filestore flusher.
 :Type: Boolean
 :Required: No
 :Default: ``false``
@ -158,7 +171,7 @@ performance in some cases.

 ``filestore_flusher_max_fds``

-:Description: Sets the maximum number of file descriptors for the flusher.
+:Description: Defines the maximum number of file descriptors for the flusher.
 :Type: Integer
 :Required: No
 :Default: ``512``
@ -176,7 +189,7 @@ performance in some cases.

 ``filestore_fsync_flushes_journal_data``

-:Description: Flush journal data during file system synchronization.
+:Description: Flushes journal data during file-system synchronization.
 :Type: Boolean
 :Required: No
 :Default: ``false``
@ -187,11 +200,11 @@ performance in some cases.
 Queue
 =====

-The following settings provide limits on the size of the Filestore queue.
+The following settings define limits on the size of the Filestore queue:

 ``filestore_queue_max_ops``

-:Description: Defines the maximum number of in progress operations the file store accepts before blocking on queuing new operations. 
+:Description: Defines the maximum number of in-progress operations that Filestore accepts before it blocks the queueing of any new operations. 
 :Type: Integer
 :Required: No. Minimal impact on performance.
 :Default: ``50``
@ -199,23 +212,20 @@ The following settings provide limits on the size of the Filestore queue.

 ``filestore_queue_max_bytes``

-:Description: The maximum number of bytes for an operation. 
+:Description: Defines the maximum number of bytes permitted per operation.
 :Type: Integer
 :Required: No
 :Default: ``100 << 20``


-
-
 .. index:: filestore; timeouts

 Timeouts
 ========

-
 ``filestore_op_threads``

-:Description: The number of file system operation threads that execute in parallel. 
+:Description: Defines the number of file-system operation threads that execute in parallel. 
 :Type: Integer
 :Required: No
 :Default: ``2``
@ -223,7 +233,7 @@ Timeouts

 ``filestore_op_thread_timeout``

-:Description: The timeout for a file system operation thread (in seconds).
+:Description: Defines the timeout (in seconds) for a file-system operation thread.
 :Type: Integer
 :Required: No
 :Default: ``60``
@ -231,7 +241,7 @@ Timeouts

 ``filestore_op_thread_suicide_timeout``

-:Description: The timeout for a commit operation before cancelling the commit (in seconds). 
+:Description: Defines the timeout (in seconds) for a commit operation before the commit is cancelled.
 :Type: Integer
 :Required: No
 :Default: ``180``
@ -245,17 +255,17 @@ B-Tree Filesystem

 ``filestore_btrfs_snap``

-:Description: Enable snapshots for a ``btrfs`` filestore.
+:Description: Enables snapshots for a ``btrfs`` Filestore.
 :Type: Boolean
-:Required: No. Only used for ``btrfs``.
+:Required: No. Used only for ``btrfs``.
 :Default: ``true``


 ``filestore_btrfs_clone_range``

-:Description: Enable cloning ranges for a ``btrfs`` filestore.
+:Description: Enables cloning ranges for a ``btrfs`` Filestore.
 :Type: Boolean
-:Required: No. Only used for ``btrfs``.
+:Required: No. Used only for ``btrfs``.
 :Default: ``true``


@ -267,7 +277,7 @@ Journal

 ``filestore_journal_parallel``

-:Description: Enables parallel journaling, default for Btrfs.
+:Description: Enables parallel journaling, default for ``btrfs``.
 :Type: Boolean
 :Required: No
 :Default: ``false``
@ -275,7 +285,7 @@ Journal

 ``filestore_journal_writeahead``

-:Description: Enables writeahead journaling, default for XFS.
+:Description: Enables write-ahead journaling, default for XFS.
 :Type: Boolean
 :Required: No
 :Default: ``false``
@ -283,7 +293,7 @@ Journal

 ``filestore_journal_trailing``

-:Description: Deprecated, never use.
+:Description: Deprecated. **Never use.**
 :Type: Boolean
 :Required: No
 :Default: ``false``
@ -295,8 +305,8 @@ Misc

 ``filestore_merge_threshold``

-:Description: Min number of files in a subdir before merging into parent
-              NOTE: A negative value means to disable subdir merging
+:Description: Defines the minimum number of files permitted in a subdirectory before the subdirectory is merged into its parent directory.
+              NOTE: A negative value means that subdirectory merging is disabled.
 :Type: Integer
 :Required: No
 :Default: ``-10``
@ -305,8 +315,8 @@ Misc
 ``filestore_split_multiple``

 :Description:  ``(filestore_split_multiple * abs(filestore_merge_threshold) + (rand() % filestore_split_rand_factor)) * 16``
-               is the maximum number of files in a subdirectory before 
-               splitting into child directories.
+               is the maximum number of files permitted in a subdirectory
+               before the subdirectory is split into child directories.

 :Type: Integer
 :Required: No
@ -316,10 +326,10 @@ Misc
 ``filestore_split_rand_factor``

 :Description:  A random factor added to the split threshold to avoid
-               too many (expensive) Filestore splits occurring at once. See
-               ``filestore_split_multiple`` for details.
-               This can only be changed offline for an existing OSD,
-               via the ``ceph-objectstore-tool apply-layout-settings`` command.
+               too many (expensive) Filestore splits occurring at the same time.
+               For details, see ``filestore_split_multiple``.
+               To change this setting for an existing OSD, it is necessary to take the OSD
+               offline before running the ``ceph-objectstore-tool apply-layout-settings`` command.

 :Type: Unsigned 32-bit Integer
 :Required: No
@ -328,7 +338,7 @@ Misc

 ``filestore_update_to``

-:Description: Limits Filestore auto upgrade to specified version.
+:Description: Limits automatic upgrades to a specified version of Filestore. Useful in cases in which you want to avoid upgrading to a specific version.
 :Type: Integer
 :Required: No
 :Default: ``1000``
@ -336,7 +346,7 @@ Misc

 ``filestore_blackhole``

-:Description: Drop any new transactions on the floor.
+:Description: Drops any new transactions on the floor, similar to redirecting to NULL. 
 :Type: Boolean
 :Required: No
 :Default: ``false``
@ -344,7 +354,7 @@ Misc

 ``filestore_dump_file``

-:Description: File onto which store transaction dumps.
+:Description: Defines the file that transaction dumps are stored on.
 :Type: Boolean
 :Required: No
 :Default: ``false``
@ -352,7 +362,7 @@ Misc

 ``filestore_kill_at``

-:Description: inject a failure at the n'th opportunity
+:Description: Injects a failure at the *n*\th opportunity.
 :Type: String
 :Required: No
 :Default: ``false``
@ -360,8 +370,7 @@ Misc

 ``filestore_fail_eio``

-:Description: Fail/Crash on eio.
+:Description: Fail/Crash on EIO.
 :Type: Boolean
 :Required: No
 :Default: ``true``
-
--- a/ceph/doc/rados/configuration/mon-config-ref.rst
+++ b/ceph/doc/rados/configuration/mon-config-ref.rst
@ -16,24 +16,29 @@ consistent, but you can add, remove or replace a monitor in a cluster. See
 Background
 ==========

-Ceph Monitors maintain a "master copy" of the :term:`Cluster Map`, which means a
-:term:`Ceph Client` can determine the location of all Ceph Monitors, Ceph OSD
-Daemons, and Ceph Metadata Servers just by connecting to one Ceph Monitor and
-retrieving a current cluster map. Before Ceph Clients can read from or write to
-Ceph OSD Daemons or Ceph Metadata Servers, they must connect to a Ceph Monitor
-first. With a current copy of the cluster map and the CRUSH algorithm, a Ceph
-Client can compute the location for any object. The ability to compute object
-locations allows a Ceph Client to talk directly to Ceph OSD Daemons, which is a
-very important aspect of Ceph's high scalability and performance. See 
-`Scalability and High Availability`_ for additional details.
+Ceph Monitors maintain a "master copy" of the :term:`Cluster Map`. 

-The primary role of the Ceph Monitor is to maintain a master copy of the cluster
-map. Ceph Monitors also provide authentication and logging services. Ceph
-Monitors write all changes in the monitor services to a single Paxos instance,
-and Paxos writes the changes to a key/value store for strong consistency. Ceph
-Monitors can query the most recent version of the cluster map during sync
-operations. Ceph Monitors leverage the key/value store's snapshots and iterators
-(using leveldb) to perform store-wide synchronization.
+The maintenance by Ceph Monitors of a :term:`Cluster Map` makes it possible for
+a :term:`Ceph Client` to determine the location of all Ceph Monitors, Ceph OSD
+Daemons, and Ceph Metadata Servers by connecting to one Ceph Monitor and
+retrieving a current cluster map. Before Ceph Clients can read from or write to
+Ceph OSD Daemons or Ceph Metadata Servers, they must connect to a Ceph Monitor.
+When a Ceph client has a current copy of the cluster map and the CRUSH
+algorithm, it can compute the location for any RADOS object within in the
+cluster. This ability to compute the locations of objects makes it possible for
+Ceph Clients to talk directly to Ceph OSD Daemons. This direct communication
+with Ceph OSD Daemons represents an improvment upon traditional storage
+architectures in which clients were required to communicate with a central
+component, and that improvment contributes to Ceph's high scalability and
+performance. See `Scalability and High Availability`_ for additional details.
+
+The Ceph Monitor's primary function is to maintain a master copy of the cluster
+map. Monitors also provide authentication and logging services. All changes in
+the monitor services are written by the Ceph Monitor to a single Paxos
+instance, and Paxos writes the changes to a key/value store for strong
+consistency. Ceph Monitors are able to query the most recent version of the
+cluster map during sync operations, and they use the key/value store's
+snapshots and iterators (using leveldb) to perform store-wide synchronization.

 .. ditaa::
 /-------------\               /-------------\
@ -56,12 +61,6 @@ operations. Ceph Monitors leverage the key/value store's snapshots and iterators
 |    cCCC     |*---------------------+
 \-------------/

-
-.. deprecated:: version 0.58
-
-In Ceph versions 0.58 and earlier, Ceph Monitors use a Paxos instance for
-each service and store the map as a file. 
-
 .. index:: Ceph Monitor; cluster map

 Cluster Maps
--- a/ceph/doc/rados/configuration/storage-devices.rst
+++ b/ceph/doc/rados/configuration/storage-devices.rst
@ -25,6 +25,7 @@ There are two Ceph daemons that store data on devices:
  additional monitoring and providing interfaces to external
  monitoring and management systems.

+.. _rados_config_storage_devices_osd_backends:

 OSD Back Ends
 =============
--- a/ceph/doc/rados/operations/balancer.rst
+++ b/ceph/doc/rados/operations/balancer.rst
@ -3,14 +3,15 @@
 Balancer
 ========

-The *balancer* can optimize the placement of PGs across OSDs in
-order to achieve a balanced distribution, either automatically or in a
-supervised fashion.
+The *balancer* can optimize the allocation of placement groups (PGs) across
+OSDs in order to achieve a balanced distribution. The balancer can operate
+either automatically or in a supervised fashion.
+

 Status
 ------

-The current status of the balancer can be checked at any time with:
+To check the current status of the balancer, run the following command:

   .. prompt:: bash $

@ -20,70 +21,78 @@ The current status of the balancer can be checked at any time with:
 Automatic balancing
 -------------------

-The automatic balancing feature is enabled by default in ``upmap``
-mode. Please refer to :ref:`upmap` for more details. The balancer can be
-turned off with:
+When the balancer is in ``upmap`` mode, the automatic balancing feature is
+enabled by default. For more details, see :ref:`upmap`.  To disable the
+balancer, run the following command:

   .. prompt:: bash $

      ceph balancer off

-The balancer mode can be changed to ``crush-compat`` mode, which is
-backward compatible with older clients, and will make small changes to
-the data distribution over time to ensure that OSDs are equally utilized.
+The balancer mode can be changed from ``upmap`` mode to ``crush-compat`` mode.
+``crush-compat`` mode is backward compatible with older clients.  In
+``crush-compat`` mode, the balancer automatically makes small changes to the
+data distribution in order to ensure that OSDs are utilized equally.


 Throttling
 ----------

-No adjustments will be made to the PG distribution if the cluster is
-degraded (e.g., because an OSD has failed and the system has not yet
-healed itself).
+If the cluster is degraded (that is, if an OSD has failed and the system hasn't
+healed itself yet), then the balancer will not make any adjustments to the PG
+distribution.

-When the cluster is healthy, the balancer will throttle its changes
-such that the percentage of PGs that are misplaced (i.e., that need to
-be moved) is below a threshold of (by default) 5%.  The
-``target_max_misplaced_ratio`` threshold can be adjusted with:
+When the cluster is healthy, the balancer will incrementally move a small
+fraction of unbalanced PGs in order to improve distribution.  This fraction
+will not exceed a certain threshold that defaults to 5%. To adjust this
+``target_max_misplaced_ratio`` threshold setting, run the following command:

   .. prompt:: bash $

      ceph config set mgr target_max_misplaced_ratio .07   # 7%

-Set the number of seconds to sleep in between runs of the automatic balancer:
+The balancer sleeps between runs. To set the number of seconds for this
+interval of sleep, run the following command:

   .. prompt:: bash $

      ceph config set mgr mgr/balancer/sleep_interval 60

-Set the time of day to begin automatic balancing in HHMM format:
+To set the time of day (in HHMM format) at which automatic balancing begins,
+run the following command:

   .. prompt:: bash $

      ceph config set mgr mgr/balancer/begin_time 0000

-Set the time of day to finish automatic balancing in HHMM format:
+To set the time of day (in HHMM format) at which automatic balancing ends, run
+the following command:

   .. prompt:: bash $

      ceph config set mgr mgr/balancer/end_time 2359

-Restrict automatic balancing to this day of the week or later. 
-Uses the same conventions as crontab, 0 is Sunday, 1 is Monday, and so on:
+Automatic balancing can be restricted to certain days of the week.  To restrict
+it to a specific day of the week or later (as with crontab, ``0`` is Sunday,
+``1`` is Monday, and so on), run the following command:

   .. prompt:: bash $

      ceph config set mgr mgr/balancer/begin_weekday 0

-Restrict automatic balancing to this day of the week or earlier. 
-Uses the same conventions as crontab, 0 is Sunday, 1 is Monday, and so on:
+To restrict automatic balancing to a specific day of the week or earlier
+(again, ``0`` is Sunday, ``1`` is Monday, and so on), run the following
+command:

   .. prompt:: bash $

      ceph config set mgr mgr/balancer/end_weekday 6

-Pool IDs to which the automatic balancing will be limited. 
-The default for this is an empty string, meaning all pools will be balanced. 
-The numeric pool IDs can be gotten with the :command:`ceph osd pool ls detail` command:
+Automatic balancing can be restricted to certain pools. By default, the value
+of this setting is an empty string, so that all pools are automatically
+balanced.  To restrict automatic balancing to specific pools, retrieve their
+numeric pool IDs (by running the :command:`ceph osd pool ls detail` command),
+and then run the following command:

   .. prompt:: bash $

@ -93,43 +102,41 @@ The numeric pool IDs can be gotten with the :command:`ceph osd pool ls detail` c
 Modes
 -----

-There are currently two supported balancer modes:
+There are two supported balancer modes:

-#. **crush-compat**.  The CRUSH compat mode uses the compat weight-set
-   feature (introduced in Luminous) to manage an alternative set of
-   weights for devices in the CRUSH hierarchy.  The normal weights
-   should remain set to the size of the device to reflect the target
-   amount of data that we want to store on the device.  The balancer
-   then optimizes the weight-set values, adjusting them up or down in
-   small increments, in order to achieve a distribution that matches
-   the target distribution as closely as possible.  (Because PG
-   placement is a pseudorandom process, there is a natural amount of
-   variation in the placement; by optimizing the weights we
-   counter-act that natural variation.)
+#. **crush-compat**. This mode uses the compat weight-set feature (introduced
+   in Luminous) to manage an alternative set of weights for devices in the
+   CRUSH hierarchy. When the balancer is operating in this mode, the normal
+   weights should remain set to the size of the device in order to reflect the
+   target amount of data intended to be stored on the device. The balancer will
+   then optimize the weight-set values, adjusting them up or down in small
+   increments, in order to achieve a distribution that matches the target
+   distribution as closely as possible. (Because PG placement is a pseudorandom
+   process, it is subject to a natural amount of variation; optimizing the
+   weights serves to counteract that natural variation.)

-   Notably, this mode is *fully backwards compatible* with older
-   clients: when an OSDMap and CRUSH map is shared with older clients,
-   we present the optimized weights as the "real" weights.
+   Note that this mode is *fully backward compatible* with older clients: when
+   an OSD Map and CRUSH map are shared with older clients, Ceph presents the
+   optimized weights as the "real" weights.

-   The primary restriction of this mode is that the balancer cannot
-   handle multiple CRUSH hierarchies with different placement rules if
-   the subtrees of the hierarchy share any OSDs.  (This is normally
-   not the case, and is generally not a recommended configuration
-   because it is hard to manage the space utilization on the shared
-   OSDs.)
+   The primary limitation of this mode is that the balancer cannot handle
+   multiple CRUSH hierarchies with different placement rules if the subtrees of
+   the hierarchy share any OSDs. (Such sharing of OSDs is not typical and,
+   because of the difficulty of managing the space utilization on the shared
+   OSDs, is generally not recommended.)

-#. **upmap**.  Starting with Luminous, the OSDMap can store explicit
-   mappings for individual OSDs as exceptions to the normal CRUSH
-   placement calculation.  These `upmap` entries provide fine-grained
-   control over the PG mapping.  This CRUSH mode will optimize the
-   placement of individual PGs in order to achieve a balanced
-   distribution.  In most cases, this distribution is "perfect," which
-   an equal number of PGs on each OSD (+/-1 PG, since they might not
-   divide evenly).
+#. **upmap**. In Luminous and later releases, the OSDMap can store explicit
+   mappings for individual OSDs as exceptions to the normal CRUSH placement
+   calculation. These ``upmap`` entries provide fine-grained control over the
+   PG mapping. This balancer mode optimizes the placement of individual PGs in
+   order to achieve a balanced distribution.  In most cases, the resulting
+   distribution is nearly perfect: that is, there is an equal number of PGs on
+   each OSD (±1 PG, since the total number might not divide evenly).

-   Note that using upmap requires that all clients be Luminous or newer.
+   To use``upmap``, all clients must be Luminous or newer.

-The default mode is ``upmap``.  The mode can be adjusted with:
+The default mode is ``upmap``. The mode can be changed to ``crush-compat`` by
+running the following command:

   .. prompt:: bash $

@ -138,69 +145,77 @@ The default mode is ``upmap``.  The mode can be adjusted with:
 Supervised optimization
 -----------------------

-The balancer operation is broken into a few distinct phases:
+Supervised use of the balancer can be understood in terms of three distinct
+phases:

-#. building a *plan*
-#. evaluating the quality of the data distribution, either for the current PG distribution, or the PG distribution that would result after executing a *plan*
-#. executing the *plan*
+#. building a plan
+#. evaluating the quality of the data distribution, either for the current PG
+   distribution or for the PG distribution that would result after executing a
+   plan
+#. executing the plan

-To evaluate and score the current distribution:
+To evaluate the current distribution, run the following command:

   .. prompt:: bash $

      ceph balancer eval

-You can also evaluate the distribution for a single pool with:
+To evaluate the distribution for a single pool, run the following command:

   .. prompt:: bash $

      ceph balancer eval <pool-name>

-Greater detail for the evaluation can be seen with:
+To see the evaluation in greater detail, run the following command:

   .. prompt:: bash $

      ceph balancer eval-verbose ...
-  
-The balancer can generate a plan, using the currently configured mode, with:
+
+To instruct the balancer to generate a plan (using the currently configured
+mode), make up a name (any useful identifying string) for the plan, and run the
+following command:

   .. prompt:: bash $

      ceph balancer optimize <plan-name>

-The name is provided by the user and can be any useful identifying string.  The contents of a plan can be seen with:
+To see the contents of a plan, run the following command:

   .. prompt:: bash $

      ceph balancer show <plan-name>

-All plans can be shown with:
+To display all plans, run the following command:

   .. prompt:: bash $

      ceph balancer ls

-Old plans can be discarded with:
+To discard an old plan, run the following command:

   .. prompt:: bash $

      ceph balancer rm <plan-name>

-Currently recorded plans are shown as part of the status command:
+To see currently recorded plans, examine the output of the following status
+command:

   .. prompt:: bash $

      ceph balancer status

-The quality of the distribution that would result after executing a plan can be calculated with:
+To evaluate the distribution that would result from executing a specific plan,
+run the following command:

   .. prompt:: bash $

      ceph balancer eval <plan-name>

-Assuming the plan is expected to improve the distribution (i.e., it has a lower score than the current cluster state), the user can execute that plan with:
+If a plan is expected to improve the distribution (that is, the plan's score is
+lower than the current cluster state's score), you can execute that plan by
+running the following command:

   .. prompt:: bash $

      ceph balancer execute <plan-name>
-
--- a/ceph/doc/rados/operations/bluestore-migration.rst
+++ b/ceph/doc/rados/operations/bluestore-migration.rst
@ -1,3 +1,5 @@
+.. _rados_operations_bluestore_migration:
+
 =====================
 BlueStore Migration
 =====================
--- a/ceph/doc/rados/operations/cache-tiering.rst
+++ b/ceph/doc/rados/operations/cache-tiering.rst
@ -1,6 +1,10 @@
 ===============
 Cache Tiering
 ===============
+.. warning:: Cache tiering has been deprecated in the Reef release as it
+             has lacked a maintainer for a very long time. This does not mean
+             it will be certainly removed, but we may choose to remove it
+             without much further notice.

 A cache tier provides Ceph Clients with better I/O performance for a subset of
 the data stored in a backing storage tier. Cache tiering involves creating a
--- a/ceph/doc/rados/operations/crush-map.rst
+++ b/ceph/doc/rados/operations/crush-map.rst
@ -315,7 +315,7 @@ the hierarchy is visible as a separate column (labeled either

 .. prompt:: bash $

-   ceph osd tree
+   ceph osd crush tree

 When both *compat* and *per-pool* weight sets are in use, data
 placement for a particular pool will use its own per-pool weight set
--- a/ceph/doc/rados/operations/data-placement.rst
+++ b/ceph/doc/rados/operations/data-placement.rst
@ -2,40 +2,44 @@
 Data Placement Overview
 =========================

-Ceph stores, replicates and rebalances data objects across a RADOS cluster
-dynamically.  With many different users storing objects in different pools for
-different purposes on countless OSDs, Ceph operations require some data
-placement planning.  The main data placement planning concepts in Ceph include:
+Ceph stores, replicates, and rebalances data objects across a RADOS cluster
+dynamically. Because different users store objects in different pools for
+different purposes on many OSDs, Ceph operations require a certain amount of
+data- placement planning. The main data-placement planning concepts in Ceph
+include:

- **Pools:** Ceph stores data within pools, which are logical groups for storing
-  objects. Pools manage the number of placement groups, the number of replicas,
-  and the CRUSH rule for the pool. To store data in a pool, you must have
-  an authenticated user with permissions for the pool. Ceph can snapshot pools.
-  See `Pools`_ for additional details.
+- **Pools:** Ceph stores data within pools, which are logical groups used for
+  storing objects. Pools manage the number of placement groups, the number of
+  replicas, and the CRUSH rule for the pool. To store data in a pool, it is
+  necessary to be an authenticated user with permissions for the pool. Ceph is
+  able to make snapshots of pools. For additional details, see `Pools`_.

- **Placement Groups:** Ceph maps objects to placement groups (PGs).
-  Placement groups (PGs) are shards or fragments of a logical object pool
-  that place objects as a group into OSDs. Placement groups reduce the amount
-  of per-object metadata when Ceph stores the data in OSDs. A larger number of
-  placement groups (e.g., 100 per OSD) leads to better balancing. See
-  `Placement Groups`_ for additional details.
+- **Placement Groups:** Ceph maps objects to placement groups. Placement
+  groups (PGs) are shards or fragments of a logical object pool that place
+  objects as a group into OSDs. Placement groups reduce the amount of
+  per-object metadata that is necessary for Ceph to store the data in OSDs. A
+  greater number of placement groups (for example, 100 PGs per OSD as compared
+  with 50 PGs per OSD) leads to better balancing. 

- **CRUSH Maps:**  CRUSH is a big part of what allows Ceph to scale without
-  performance bottlenecks, without limitations to scalability, and without a
-  single point of failure. CRUSH maps provide the physical topology of the
-  cluster to the CRUSH algorithm to determine where the data for an object
-  and its replicas should be stored, and how to do so across failure domains
-  for added data safety among other things. See `CRUSH Maps`_ for additional
-  details.
+- **CRUSH Maps:**  CRUSH plays a major role in allowing Ceph to scale while
+  avoiding certain pitfalls, such as performance bottlenecks, limitations to
+  scalability, and single points of failure. CRUSH maps provide the physical
+  topology of the cluster to the CRUSH algorithm, so that it can determine both
+  (1) where the data for an object and its replicas should be stored and (2)
+  how to store that data across failure domains so as to improve data safety.
+  For additional details, see `CRUSH Maps`_.

- **Balancer:** The balancer is a feature that will automatically optimize the
-  distribution of PGs across devices to achieve a balanced data distribution,
-  maximizing the amount of data that can be stored in the cluster and evenly
-  distributing the workload across OSDs.
+- **Balancer:** The balancer is a feature that automatically optimizes the
+  distribution of placement groups across devices in order to achieve a
+  balanced data distribution, in order to maximize the amount of data that can
+  be stored in the cluster, and in order to evenly distribute the workload
+  across OSDs.

-When you initially set up a test cluster, you can use the default values. Once
-you begin planning for a large Ceph cluster, refer to pools, placement groups
-and CRUSH for data placement operations.
+It is possible to use the default values for each of the above components.
+Default values are recommended for a test cluster's initial setup. However,
+when planning a large Ceph cluster, values should be customized for
+data-placement operations with reference to the different roles played by
+pools, placement groups, and CRUSH.

 .. _Pools: ../pools
 .. _Placement Groups: ../placement-groups
--- a/ceph/doc/rados/operations/devices.rst
+++ b/ceph/doc/rados/operations/devices.rst
@ -3,28 +3,32 @@
 Device Management
 =================

-Ceph tracks which hardware storage devices (e.g., HDDs, SSDs) are consumed by
-which daemons, and collects health metrics about those devices in order to
-provide tools to predict and/or automatically respond to hardware failure.
+Device management allows Ceph to address hardware failure. Ceph tracks hardware
+storage devices (HDDs, SSDs) to see which devices are managed by which daemons.
+Ceph also collects health metrics about these devices. By doing so, Ceph can
+provide tools that predict hardware failure and can automatically respond to
+hardware failure.

 Device tracking
 ---------------

-You can query which storage devices are in use with:
+To see a list of the storage devices that are in use, run the following
+command:

 .. prompt:: bash $

   ceph device ls

-You can also list devices by daemon or by host:
+Alternatively, to list devices by daemon or by host, run a command of one of
+the following forms:

 .. prompt:: bash $

   ceph device ls-by-daemon <daemon>
   ceph device ls-by-host <host>

-For any individual device, you can query information about its
-location and how it is being consumed with:
+To see information about the location of an specific device and about how the
+device is being consumed, run a command of the following form:

 .. prompt:: bash $

@ -33,103 +37,107 @@ location and how it is being consumed with:
 Identifying physical devices
 ----------------------------

-You can blink the drive LEDs on hardware enclosures to make the replacement of
-failed disks easy and less error-prone.  Use the following command::
+To make the replacement of failed disks easier and less error-prone, you can
+(in some cases) "blink" the drive's LEDs on hardware enclosures by running a
+command of the following form::

  device light on|off <devid> [ident|fault] [--force]

-The ``<devid>`` parameter is the device identification. You can obtain this
-information using the following command:
+.. note:: Using this command to blink the lights might not work. Whether it
+   works will depend upon such factors as your kernel revision, your SES
+   firmware, or the setup of your HBA.
+
+The ``<devid>`` parameter is the device identification. To retrieve this
+information, run the following command:

 .. prompt:: bash $

   ceph device ls

-The ``[ident|fault]`` parameter is used to set the kind of light to blink.
-By default, the `identification` light is used.
+The ``[ident|fault]`` parameter determines which kind of light will blink.  By
+default, the `identification` light is used.

-.. note::
-   This command needs the Cephadm or the Rook `orchestrator <https://docs.ceph.com/docs/master/mgr/orchestrator/#orchestrator-cli-module>`_ module enabled.
-   The orchestrator module enabled is shown by executing the following command:
+.. note:: This command works only if the Cephadm or the Rook `orchestrator
+   <https://docs.ceph.com/docs/master/mgr/orchestrator/#orchestrator-cli-module>`_
+   module is enabled.  To see which orchestrator module is enabled, run the
+   following command:

   .. prompt:: bash $

      ceph orch status

-The command behind the scene to blink the drive LEDs is `lsmcli`. If you need
-to customize this command you can configure this via a Jinja2 template::
+The command that makes the drive's LEDs blink is `lsmcli`. To customize this
+command, configure it via a Jinja2 template by running commands of the
+following forms::

   ceph config-key set mgr/cephadm/blink_device_light_cmd "<template>"
   ceph config-key set mgr/cephadm/<host>/blink_device_light_cmd "lsmcli local-disk-{{ ident_fault }}-led-{{'on' if on else 'off'}} --path '{{ path or dev }}'"

-The Jinja2 template is rendered using the following arguments:
+The following arguments can be used to customize the Jinja2 template:

 * ``on``
    A boolean value.
 * ``ident_fault``
-    A string containing `ident` or `fault`.
+    A string that contains `ident` or `fault`.
 * ``dev``
-    A string containing the device ID, e.g. `SanDisk_X400_M.2_2280_512GB_162924424784`.
+    A string that contains the device ID: for example, `SanDisk_X400_M.2_2280_512GB_162924424784`.
 * ``path``
-    A string containing the device path, e.g. `/dev/sda`.
+    A string that contains the device path: for example, `/dev/sda`.

 .. _enabling-monitoring:

 Enabling monitoring
 -------------------

-Ceph can also monitor health metrics associated with your device.  For
-example, SATA hard disks implement a standard called SMART that
-provides a wide range of internal metrics about the device's usage and
-health, like the number of hours powered on, number of power cycles,
-or unrecoverable read errors.  Other device types like SAS and NVMe
-implement a similar set of metrics (via slightly different standards).
-All of these can be collected by Ceph via the ``smartctl`` tool.
+Ceph can also monitor the health metrics associated with your device. For
+example, SATA drives implement a standard called SMART that provides a wide
+range of internal metrics about the device's usage and health (for example: the
+number of hours powered on, the number of power cycles, the number of
+unrecoverable read errors). Other device types such as SAS and NVMe present a
+similar set of metrics (via slightly different standards).  All of these
+metrics can be collected by Ceph via the ``smartctl`` tool.

-You can enable or disable health monitoring with:
+You can enable or disable health monitoring by running one of the following
+commands:

 .. prompt:: bash $

   ceph device monitoring on
-
-or:
-
-.. prompt:: bash $
-
   ceph device monitoring off

-
 Scraping
 --------

-If monitoring is enabled, metrics will automatically be scraped at regular intervals.  That interval can be configured with:
+If monitoring is enabled, device metrics will be scraped automatically at
+regular intervals. To configure that interval, run a command of the following
+form:

 .. prompt:: bash $

   ceph config set mgr mgr/devicehealth/scrape_frequency <seconds>

-The default is to scrape once every 24 hours.
+By default, device metrics are scraped once every 24 hours.

-You can manually trigger a scrape of all devices with:
+To manually scrape all devices, run the following command:
   
 .. prompt:: bash $

   ceph device scrape-health-metrics

-A single device can be scraped with:
+To scrape a single device, run a command of the following form:

 .. prompt:: bash $

   ceph device scrape-health-metrics <device-id>

-Or a single daemon's devices can be scraped with:
+To scrape a single daemon's devices, run a command of the following form:

 .. prompt:: bash $

   ceph device scrape-daemon-health-metrics <who>

-The stored health metrics for a device can be retrieved (optionally
-for a specific timestamp) with:
+To retrieve the stored health metrics for a device (optionally for a specific
+timestamp),  run a command of the following form:

 .. prompt:: bash $

@ -138,71 +146,82 @@ for a specific timestamp) with:
 Failure prediction
 ------------------

-Ceph can predict life expectancy and device failures based on the
-health metrics it collects.  There are three modes:
+Ceph can predict drive life expectancy and device failures by analyzing the
+health metrics that it collects. The prediction modes are as follows:

 * *none*: disable device failure prediction.
-* *local*: use a pre-trained prediction model from the ceph-mgr daemon
+* *local*: use a pre-trained prediction model from the ``ceph-mgr`` daemon.

-The prediction mode can be configured with:
+To configure the prediction mode, run a command of the following form:

 .. prompt:: bash $

   ceph config set global device_failure_prediction_mode <mode>

-Prediction normally runs in the background on a periodic basis, so it
-may take some time before life expectancy values are populated.  You
-can see the life expectancy of all devices in output from:
+Under normal conditions, failure prediction runs periodically in the
+background.  For this reason, life expectancy values might be populated only
+after a significant amount of time has passed.  The life expectancy of all
+devices is displayed in the output of the following command:

 .. prompt:: bash $

   ceph device ls

-You can also query the metadata for a specific device with:
+To see the metadata of a specific device, run a command of the following form:

 .. prompt:: bash $

   ceph device info <devid>

-You can explicitly force prediction of a device's life expectancy with:
+To explicitly force prediction of a specific device's life expectancy, run a
+command of the following form:

 .. prompt:: bash $

   ceph device predict-life-expectancy <devid>

-If you are not using Ceph's internal device failure prediction but
-have some external source of information about device failures, you
-can inform Ceph of a device's life expectancy with:
+In addition to Ceph's internal device failure prediction, you might have an
+external source of information about device failures. To inform Ceph of a
+specific device's life expectancy, run a command of the following form:

 .. prompt:: bash $

   ceph device set-life-expectancy <devid> <from> [<to>]

-Life expectancies are expressed as a time interval so that
-uncertainty can be expressed in the form of a wide interval. The
-interval end can also be left unspecified.
+Life expectancies are expressed as a time interval. This means that the
+uncertainty of the life expectancy can be expressed in the form of a range of
+time, and perhaps a wide range of time. The interval's end can be left
+unspecified.

 Health alerts
 -------------

-The ``mgr/devicehealth/warn_threshold`` controls how soon an expected
-device failure must be before we generate a health warning.
+The ``mgr/devicehealth/warn_threshold`` configuration option controls the
+health check for an expected device failure. If the device is expected to fail
+within the specified time interval, an alert is raised.

-The stored life expectancy of all devices can be checked, and any
-appropriate health alerts generated, with:
+To check the stored life expectancy of all devices and generate any appropriate
+health alert, run the following command:

 .. prompt:: bash $

   ceph device check-health

-Automatic Mitigation
--------------------
+Automatic Migration
+-------------------

-If the ``mgr/devicehealth/self_heal`` option is enabled (it is by
-default), then for devices that are expected to fail soon the module
-will automatically migrate data away from them by marking the devices
-"out".
+The ``mgr/devicehealth/self_heal`` option (enabled by default) automatically
+migrates data away from devices that are expected to fail soon. If this option
+is enabled, the module marks such devices ``out`` so that automatic migration
+will occur.

-The ``mgr/devicehealth/mark_out_threshold`` controls how soon an
-expected device failure must be before we automatically mark an osd
-"out".
+.. note:: The ``mon_osd_min_up_ratio`` configuration option can help prevent
+   this process from cascading to total failure. If the "self heal" module
+   marks ``out`` so many OSDs that the ratio value of ``mon_osd_min_up_ratio``
+   is exceeded, then the cluster raises the ``DEVICE_HEALTH_TOOMANY`` health
+   check. For instructions on what to do in this situation, see
+   :ref:`DEVICE_HEALTH_TOOMANY<rados_health_checks_device_health_toomany>`.
+
+The ``mgr/devicehealth/mark_out_threshold`` configuration option specifies the
+time interval for automatic migration. If a device is expected to fail within
+the specified time interval, it will be automatically marked ``out``.
--- a/ceph/doc/rados/operations/erasure-code-jerasure.rst
+++ b/ceph/doc/rados/operations/erasure-code-jerasure.rst
@ -6,9 +6,11 @@ The *jerasure* plugin is the most generic and flexible plugin, it is
 also the default for Ceph erasure coded pools. 

 The *jerasure* plugin encapsulates the `Jerasure
-<http://jerasure.org>`_ library. It is
-recommended to read the *jerasure* documentation to get a better
-understanding of the parameters.
+<https://github.com/ceph/jerasure>`_ library. It is
+recommended to read the ``jerasure`` documentation to
+understand the parameters. Note that the ``jerasure.org``
+web site as of 2023 may no longer be connected to the original
+project or legitimate.

 Create a jerasure profile
 =========================
--- a/ceph/doc/rados/operations/health-checks.rst
+++ b/ceph/doc/rados/operations/health-checks.rst
@ -843,6 +843,8 @@ This message can be silenced by disabling self-heal behavior (that is, setting
 ``mgr/devicehealth/mark_out_threshold``, or by addressing whichever condition
 is preventing data from being migrated off of the ailing OSD(s).

+.. _rados_health_checks_device_health_toomany:
+
 DEVICE_HEALTH_TOOMANY
 _____________________

--- a/ceph/doc/rados/operations/monitoring-osd-pg.rst
+++ b/ceph/doc/rados/operations/monitoring-osd-pg.rst
@ -117,11 +117,12 @@ pseudo-random placement that takes into account the failure domains that you
 have set in your `CRUSH map`_; for this reason, PGs are rarely assigned to
 immediately adjacent OSDs in a large cluster.

-Ceph processes a client request using the **Acting Set**, which is the set of
-OSDs that will actually handle the requests since they have a full and working
-version of a placement group shard. The set of OSDs that should contain a shard
-of a particular placement group as the **Up Set**, i.e. where data is
-moved/copied to (or planned to be).
+Ceph processes client requests with the **Acting Set** of OSDs: this is the set
+of OSDs that currently have a full and working version of a PG shard and that
+are therefore responsible for handling requests. By contrast, the **Up Set** is
+the set of OSDs that contain a shard of a specific PG. Data is moved or copied
+to the **Up Set**, or planned to be moved or copied, to the **Up Set**. See
+:ref:`Placement Group Concepts <rados_operations_pg_concepts>`.

 Sometimes an OSD in the Acting Set is ``down`` or otherwise unable to
 service requests for objects in the PG. When this kind of situation
--- a/ceph/doc/rados/operations/pg-concepts.rst
+++ b/ceph/doc/rados/operations/pg-concepts.rst
@ -1,3 +1,5 @@
+.. _rados_operations_pg_concepts:
+
 ==========================
 Placement Group Concepts
 ==========================
--- a/ceph/doc/rados/operations/stretch-mode.rst
+++ b/ceph/doc/rados/operations/stretch-mode.rst
@ -7,209 +7,256 @@ Stretch Clusters

 Stretch Clusters
 ================
-Ceph generally expects all parts of its network and overall cluster to be
-equally reliable, with failures randomly distributed across the CRUSH map.
-So you may lose a switch that knocks out a number of OSDs, but we expect
-the remaining OSDs and monitors to route around that.

-This is usually a good choice, but may not work well in some
-stretched cluster configurations where a significant part of your cluster
-is stuck behind a single network component. For instance, a single
-cluster which is located in multiple data centers, and you want to
-sustain the loss of a full DC.
+A stretch cluster is a cluster that has servers in geographically separated
+data centers, distributed over a WAN. Stretch clusters have LAN-like high-speed
+and low-latency connections, but limited links. Stretch clusters have a higher
+likelihood of (possibly asymmetric) network splits, and a higher likelihood of
+temporary or complete loss of an entire data center (which can represent
+one-third to one-half of the total cluster).

-There are two standard configurations we've seen deployed, with either
-two or three data centers (or, in clouds, availability zones). With two
-zones, we expect each site to hold a copy of the data, and for a third
-site to have a tiebreaker monitor (this can be a VM or high-latency compared
-to the main sites) to pick a winner if the network connection fails and both
-DCs remain alive. For three sites, we expect a copy of the data and an equal
-number of monitors in each site.
+Ceph is designed with the expectation that all parts of its network and cluster
+will be reliable and that failures will be distributed randomly across the
+CRUSH map. Even if a switch goes down and causes the loss of many OSDs, Ceph is
+designed so that the remaining OSDs and monitors will route around such a loss. 

-Note that the standard Ceph configuration will survive MANY failures of the
-network or data centers and it will never compromise data consistency.  If you
-bring back enough Ceph servers following a failure, it will recover. If you
-lose a data center, but can still form a quorum of monitors and have all the data
-available (with enough copies to satisfy pools' ``min_size``, or CRUSH rules
-that will re-replicate to meet it), Ceph will maintain availability.
+Sometimes this cannot be relied upon. If you have a "stretched-cluster"
+deployment in which much of your cluster is behind a single network component,
+you might need to use **stretch mode** to ensure data integrity.

-What can't it handle?
+We will here consider two standard configurations: a configuration with two
+data centers (or, in clouds, two availability zones), and a configuration with
+three data centers (or, in clouds, three availability zones).
+
+In the two-site configuration, Ceph expects each of the sites to hold a copy of
+the data, and Ceph also expects there to be a third site that has a tiebreaker
+monitor. This tiebreaker monitor picks a winner if the network connection fails
+and both data centers remain alive.
+
+The tiebreaker monitor can be a VM. It can also have high latency relative to
+the two main sites.
+
+The standard Ceph configuration is able to survive MANY network failures or
+data-center failures without ever compromising data availability. If enough
+Ceph servers are brought back following a failure, the cluster *will* recover.
+If you lose a data center but are still able to form a quorum of monitors and
+still have all the data available, Ceph will maintain availability. (This
+assumes that the cluster has enough copies to satisfy the pools' ``min_size``
+configuration option, or (failing that) that the cluster has CRUSH rules in
+place that will cause the cluster to re-replicate the data until the
+``min_size`` configuration option has been met.)

 Stretch Cluster Issues
 ======================
-No matter what happens, Ceph will not compromise on data integrity
-and consistency. If there's a failure in your network or a loss of nodes and
-you can restore service, Ceph will return to normal functionality on its own.

-But there are scenarios where you lose data availibility despite having
-enough servers available to satisfy Ceph's consistency and sizing constraints, or
-where you may be surprised to not satisfy Ceph's constraints.
-The first important category of these failures resolve around inconsistent
-networks -- if there's a netsplit, Ceph may be unable to mark OSDs down and kick
-them out of the acting PG sets despite the primary being unable to replicate data.
-If this happens, IO will not be permitted, because Ceph can't satisfy its durability
-guarantees.
+Ceph does not permit the compromise of data integrity and data consistency
+under any circumstances. When service is restored after a network failure or a
+loss of Ceph nodes, Ceph will restore itself to a state of normal functioning
+without operator intervention.  
+
+Ceph does not permit the compromise of data integrity or data consistency, but
+there are situations in which *data availability* is compromised. These
+situations can occur even though there are enough clusters available to satisfy
+Ceph's consistency and sizing constraints. In some situations, you might
+discover that your cluster does not satisfy those constraints.
+
+The first category of these failures that we will discuss involves inconsistent
+networks -- if there is a netsplit (a disconnection between two servers that
+splits the network into two pieces), Ceph might be unable to mark OSDs ``down``
+and remove them from the acting PG sets. This failure to mark ODSs ``down``
+will occur, despite the fact that the primary PG is unable to replicate data (a
+situation that, under normal non-netsplit circumstances, would result in the
+marking of affected OSDs as ``down`` and their removal from the PG). If this
+happens, Ceph will be unable to satisfy its durability guarantees and
+consequently IO will not be permitted.
+
+The second category of failures that we will discuss involves the situation in
+which the constraints are not sufficient to guarantee the replication of data
+across data centers, though it might seem that the data is correctly replicated
+across data centers. For example, in a scenario in which there are two data
+centers named Data Center A and Data Center B, and the CRUSH rule targets three
+replicas and places a replica in each data center with a ``min_size`` of ``2``,
+the PG might go active with two replicas in Data Center A and zero replicas in
+Data Center B. In a situation of this kind, the loss of Data Center A means
+that the data is lost and Ceph will not be able to operate on it. This
+situation is surprisingly difficult to avoid using only standard CRUSH rules.

-The second important category of failures is when you think you have data replicated
-across data centers, but the constraints aren't sufficient to guarantee this.
-For instance, you might have data centers A and B, and your CRUSH rule targets 3 copies
-and places a copy in each data center with a ``min_size`` of 2. The PG may go active with
-2 copies in site A and no copies in site B, which means that if you then lose site A you
-have lost data and Ceph can't operate on it. This situation is surprisingly difficult
-to avoid with standard CRUSH rules.

 Stretch Mode
 ============
-The new stretch mode is designed to handle the 2-site case. Three sites are
-just as susceptible to netsplit issues, but are much more tolerant of
-component availability outages than 2-site clusters are.
+Stretch mode is designed to handle deployments in which you cannot guarantee the
+replication of data across two data centers. This kind of situation can arise
+when the cluster's CRUSH rule specifies that three copies are to be made, but 
+then a copy is placed in each data center with a ``min_size`` of 2. Under such
+conditions, a placement group can become active with two copies in the first
+data center and no copies in the second data center. 

-To enter stretch mode, you must set the location of each monitor, matching
-your CRUSH map. For instance, to place ``mon.a`` in your first data center:

-.. prompt:: bash $
+Entering Stretch Mode
+---------------------

-   ceph mon set_location a datacenter=site1
+To enable stretch mode, you must set the location of each monitor, matching
+your CRUSH map. This procedure shows how to do this.

-Next, generate a CRUSH rule which will place 2 copies in each data center. This
-will require editing the CRUSH map directly:

-.. prompt:: bash $
+#. Place ``mon.a`` in your first data center:

-   ceph osd getcrushmap > crush.map.bin
-   crushtool -d crush.map.bin -o crush.map.txt
+   .. prompt:: bash $

-Now edit the ``crush.map.txt`` file to add a new rule. Here
-there is only one other rule, so this is ID 1, but you may need
-to use a different rule ID. We also have two datacenter buckets
-named ``site1`` and ``site2``::
+      ceph mon set_location a datacenter=site1

-  rule stretch_rule {
-          id 1
-          type replicated
-          min_size 1
-          max_size 10
-          step take site1
-          step chooseleaf firstn 2 type host
-          step emit
-          step take site2
-          step chooseleaf firstn 2 type host
-          step emit
-  }
+#. Generate a CRUSH rule that places two copies in each data center.
+   This requires editing the CRUSH map directly:

-Finally, inject the CRUSH map to make the rule available to the cluster:
+   .. prompt:: bash $

-.. prompt:: bash $
+      ceph osd getcrushmap > crush.map.bin
+      crushtool -d crush.map.bin -o crush.map.txt

-   crushtool -c crush.map.txt -o crush2.map.bin
-   ceph osd setcrushmap -i crush2.map.bin
+#. Edit the ``crush.map.txt`` file to add a new rule. Here there is only one
+   other rule (``id 1``), but you might need to use a different rule ID. We
+   have two data-center buckets named ``site1`` and ``site2``:

-If you aren't already running your monitors in connectivity mode, do so with
-the instructions in `Changing Monitor Elections`_.
+   ::
+
+      rule stretch_rule {
+             id 1
+             min_size 1
+             max_size 10
+             type replicated
+             step take site1
+             step chooseleaf firstn 2 type host
+             step emit
+             step take site2
+             step chooseleaf firstn 2 type host
+             step emit
+     }
+
+#. Inject the CRUSH map to make the rule available to the cluster:
+
+   .. prompt:: bash $
+
+      crushtool -c crush.map.txt -o crush2.map.bin
+      ceph osd setcrushmap -i crush2.map.bin
+
+#. Run the monitors in connectivity mode. See `Changing Monitor Elections`_.
+
+#. Command the cluster to enter stretch mode. In this example, ``mon.e`` is the
+   tiebreaker monitor and we are splitting across data centers. The tiebreaker
+   monitor must be assigned a data center that is neither ``site1`` nor
+   ``site2``. For this purpose you can create another data-center bucket named
+   ``site3`` in your CRUSH and place ``mon.e`` there:
+
+   .. prompt:: bash $
+
+      ceph mon set_location e datacenter=site3
+      ceph mon enable_stretch_mode e stretch_rule datacenter
+
+When stretch mode is enabled, PGs will become active only when they peer
+across data centers (or across whichever CRUSH bucket type was specified),
+assuming both are alive. Pools will increase in size from the default ``3`` to
+``4``, and two copies will be expected in each site. OSDs will be allowed to
+connect to monitors only if they are in the same data center as the monitors.
+New monitors will not be allowed to join the cluster if they do not specify a
+location.
+
+If all OSDs and monitors in one of the data centers become inaccessible at once,
+the surviving data center enters a "degraded stretch mode". A warning will be
+issued, the ``min_size`` will be reduced to ``1``, and the cluster will be
+allowed to go active with the data in the single remaining site. The pool size
+does not change, so warnings will be generated that report that the pools are
+too small -- but a special stretch mode flag will prevent the OSDs from
+creating extra copies in the remaining data center. This means that the data
+center will keep only two copies, just as before.
+
+When the missing data center comes back, the cluster will enter a "recovery
+stretch mode". This changes the warning and allows peering, but requires OSDs
+only from the data center that was ``up`` throughout the duration of the
+downtime. When all PGs are in a known state, and are neither degraded nor
+incomplete, the cluster transitions back to regular stretch mode, ends the
+warning, restores ``min_size`` to its original value (``2``), requires both
+sites to peer, and no longer requires the site that was up throughout the
+duration of the downtime when peering (which makes failover to the other site
+possible, if needed).

 .. _Changing Monitor elections: ../change-mon-elections

-And lastly, tell the cluster to enter stretch mode. Here, ``mon.e`` is the
-tiebreaker and we are splitting across data centers. ``mon.e`` should be also
-set a datacenter, that will differ from ``site1`` and ``site2``. For this
-purpose you can create another datacenter bucket named ```site3`` in your
-CRUSH and place ``mon.e`` there:
+Limitations of Stretch Mode 
+===========================
+When using stretch mode, OSDs must be located at exactly two sites. 

-.. prompt:: bash $
+Two monitors should be run in each data center, plus a tiebreaker in a third
+(or in the cloud) for a total of five monitors. While in stretch mode, OSDs
+will connect only to monitors within the data center in which they are located.
+OSDs *DO NOT* connect to the tiebreaker monitor.

-   ceph mon set_location e datacenter=site3
-   ceph mon enable_stretch_mode e stretch_rule datacenter
+Erasure-coded pools cannot be used with stretch mode. Attempts to use erasure
+coded pools with stretch mode will fail. Erasure coded pools cannot be created
+while in stretch mode. 

-When stretch mode is enabled, the OSDs wlll only take PGs active when
-they peer across data centers (or whatever other CRUSH bucket type
-you specified), assuming both are alive. Pools will increase in size
-from the default 3 to 4, expecting 2 copies in each site. OSDs will only
-be allowed to connect to monitors in the same data center. New monitors
-will not be allowed to join the cluster if they do not specify a location.
+To use stretch mode, you will need to create a CRUSH rule that provides two
+replicas in each data center. Ensure that there are four total replicas: two in
+each data center. If pools exist in the cluster that do not have the default
+``size`` or ``min_size``, Ceph will not enter stretch mode. An example of such
+a CRUSH rule is given above.

-If all the OSDs and monitors from a data center become inaccessible
-at once, the surviving data center will enter a degraded stretch mode. This
-will issue a warning, reduce the min_size to 1, and allow
-the cluster to go active with data in the single remaining site. Note that
-we do not change the pool size, so you will also get warnings that the
-pools are too small -- but a special stretch mode flag will prevent the OSDs
-from creating extra copies in the remaining data center (so it will only keep
-2 copies, as before).
+Because stretch mode runs with ``min_size`` set to ``1`` (or, more directly,
+``min_size 1``), we recommend enabling stretch mode only when using OSDs on
+SSDs (including NVMe OSDs). Hybrid HDD+SDD or HDD-only OSDs are not recommended
+due to the long time it takes for them to recover after connectivity between
+data centers has been restored. This reduces the potential for data loss.

-When the missing data center comes back, the cluster will enter
-recovery stretch mode. This changes the warning and allows peering, but
-still only requires OSDs from the data center which was up the whole time.
-When all PGs are in a known state, and are neither degraded nor incomplete,
-the cluster transitions back to regular stretch mode, ends the warning,
-restores min_size to its starting value (2) and requires both sites to peer,
-and stops requiring the always-alive site when peering (so that you can fail
-over to the other site, if necessary).
-
-  
-Stretch Mode Limitations
-========================
-As implied by the setup, stretch mode only handles 2 sites with OSDs.
-
-While it is not enforced, you should run 2 monitors in each site plus
-a tiebreaker, for a total of 5. This is because OSDs can only connect
-to monitors in their own site when in stretch mode.
-
-You cannot use erasure coded pools with stretch mode. If you try, it will
-refuse, and it will not allow you to create EC pools once in stretch mode.
-
-You must create your own CRUSH rule which provides 2 copies in each site, and
-you must use 4 total copies with 2 in each site. If you have existing pools
-with non-default size/min_size, Ceph will object when you attempt to
-enable stretch mode.
-
-Because it runs with ``min_size 1`` when degraded, you should only use stretch
-mode with all-flash OSDs.  This minimizes the time needed to recover once
-connectivity is restored, and thus minimizes the potential for data loss.
-
-Hopefully, future development will extend this feature to support EC pools and
-running with more than 2 full sites.
+In the future, stretch mode might support erasure-coded pools and might support
+deployments that have more than two data centers.

 Other commands
 ==============
-If your tiebreaker monitor fails for some reason, you can replace it. Turn on
-a new monitor and run:
+
+Replacing a failed tiebreaker monitor
+-------------------------------------
+
+Turn on a new monitor and run the following command:

 .. prompt:: bash $

   ceph mon set_new_tiebreaker mon.<new_mon_name>

-This command will protest if the new monitor is in the same location as existing
-non-tiebreaker monitors. This command WILL NOT remove the previous tiebreaker
-monitor; you should do so yourself.
+This command protests if the new monitor is in the same location as the
+existing non-tiebreaker monitors. **This command WILL NOT remove the previous
+tiebreaker monitor.** Remove the previous tiebreaker monitor yourself.

-Also in 16.2.7, if you are writing your own tooling for deploying Ceph, you can use a new
-``--set-crush-location`` option when booting monitors, instead of running
-``ceph mon set_location``. This option accepts only a single "bucket=loc" pair, eg
-``ceph-mon --set-crush-location 'datacenter=a'``, which must match the
-bucket type you specified when running ``enable_stretch_mode``.
+Using "--set-crush-location" and not "ceph mon set_location"
+------------------------------------------------------------

+If you write your own tooling for deploying Ceph, use the
+``--set-crush-location`` option when booting monitors instead of running ``ceph
+mon set_location``. This option accepts only a single ``bucket=loc`` pair (for
+example, ``ceph-mon --set-crush-location 'datacenter=a'``), and that pair must
+match the bucket type that was specified when running ``enable_stretch_mode``.

-When in stretch degraded mode, the cluster will go into "recovery" mode automatically
-when the disconnected data center comes back. If that doesn't work, or you want to
-enable recovery mode early, you can invoke:
+Forcing recovery stretch mode
+-----------------------------
+
+When in stretch degraded mode, the cluster will go into "recovery" mode
+automatically when the disconnected data center comes back. If that does not
+happen or you want to enable recovery mode early, run the following command:

 .. prompt:: bash $

   ceph osd force_recovery_stretch_mode --yes-i-really-mean-it

-But this command should not be necessary; it is included to deal with
-unanticipated situations.
+Forcing normal stretch mode
+---------------------------

-When in recovery mode, the cluster should go back into normal stretch mode
-when the PGs are healthy. If this doesn't happen, or you want to force the
+When in recovery mode, the cluster should go back into normal stretch mode when
+the PGs are healthy. If this fails to happen or if you want to force the
 cross-data-center peering early and are willing to risk data downtime (or have
 verified separately that all the PGs can peer, even if they aren't fully
-recovered), you can invoke:
+recovered), run the following command:

 .. prompt:: bash $

   ceph osd force_healthy_stretch_mode --yes-i-really-mean-it

-This command should not be necessary; it is included to deal with
-unanticipated situations. But you might wish to invoke it to remove
-the ``HEALTH_WARN`` state which recovery mode generates.
+This command can be used to to remove the ``HEALTH_WARN`` state, which recovery
+mode generates.
--- a/ceph/doc/rados/operations/user-management.rst
+++ b/ceph/doc/rados/operations/user-management.rst
@ -337,45 +337,53 @@ Pool

 A pool is a logical partition where users store data.
 In Ceph deployments, it is common to create a pool as a logical partition for
-similar types of data. For example, when deploying Ceph as a backend for
+similar types of data. For example, when deploying Ceph as a back end for
 OpenStack, a typical deployment would have pools for volumes, images, backups
-and virtual machines, and users such as ``client.glance``, ``client.cinder``,
-etc.
+and virtual machines, and such users as ``client.glance`` and ``client.cinder``.

 Application Tags
 ----------------

 Access may be restricted to specific pools as defined by their application
 metadata. The ``*`` wildcard may be used for the ``key`` argument, the
-``value`` argument, or both. ``all`` is a synony for ``*``.
+``value`` argument, or both. The ``all`` tag is a synonym for ``*``.

 Namespace
 ---------

-Objects within a pool can be associated to a namespace--a logical group of
+Objects within a pool can be associated to a namespace: that is, to a logical group of
 objects within the pool. A user's access to a pool can be associated with a
-namespace such that reads and writes by the user take place only within the
-namespace. Objects written to a namespace within the pool can only be accessed
+namespace so that reads and writes by the user can take place only within the
+namespace. Objects written to a namespace within the pool can be accessed only
 by users who have access to the namespace.

 .. note:: Namespaces are primarily useful for applications written on top of
-   ``librados`` where the logical grouping can alleviate the need to create
-   different pools. Ceph Object Gateway (in releases beginning with
-   Luminous) uses namespaces for various
-   metadata objects.
+   ``librados``. In such situations, the logical grouping provided by
+   namespaces  can obviate the need to create different pools. In Luminous and
+   later releases, Ceph Object Gateway uses namespaces for various metadata
+   objects.

-The rationale for namespaces is that pools can be a computationally expensive
-method of segregating data sets for the purposes of authorizing separate sets
-of users. For example, a pool should have ~100 placement groups per OSD. So an
-exemplary cluster with 1000 OSDs would have 100,000 placement groups for one
-pool. Each pool would create another 100,000 placement groups in the exemplary
-cluster. By contrast, writing an object to a namespace simply associates the
-namespace to the object name with out the computational overhead of a separate
-pool. Rather than creating a separate pool for a user or set of users, you may
-use a namespace. **Note:** Only available using ``librados`` at this time.
+The rationale for namespaces is this: namespaces are relatively less
+computationally expensive than pools, which (pools) can be a computationally
+expensive method of segregating data sets between different authorized users.

-Access may be restricted to specific RADOS namespaces using the ``namespace``
-capability. Limited globbing of namespaces is supported; if the last character
+For example, a pool ought to host approximately 100 placement-group replicas
+per OSD. This means that a cluster with 1000 OSDs and three 3R replicated pools
+would have (in a single pool) 100,000 placement-group replicas, and that means
+that it has 33,333 Placement Groups.
+
+By contrast, writing an object to a namespace simply associates the namespace
+to the object name without incurring the computational overhead of a separate
+pool. Instead of creating a separate pool for a user or set of users, you can
+use a namespace. 
+
+.. note::
+
+   Namespaces are available only when using ``librados``.
+
+
+Access may be restricted to specific RADOS namespaces by use of the ``namespace``
+capability. Limited globbing of namespaces (that is, use of wildcards (``*``)) is supported: if the last character
 of the specified namespace is ``*``, then access is granted to any namespace
 starting with the provided argument.

@ -383,64 +391,60 @@ Managing Users
 ==============

 User management functionality provides Ceph Storage Cluster administrators with
-the ability to create, update and delete users directly in the Ceph Storage
+the ability to create, update, and delete users directly in the Ceph Storage
 Cluster.

-When you create or delete users in the Ceph Storage Cluster, you may need to
-distribute keys to clients so that they can be added to keyrings. See `Keyring
-Management`_ for details.
+When you create or delete users in the Ceph Storage Cluster, you might need to
+distribute keys to clients so that they can be added to keyrings. For details, see `Keyring
+Management`_.

-List Users
----------
+Listing Users
+-------------

-To list the users in your cluster, execute the following:
+To list the users in your cluster, run the following command:

 .. prompt:: bash $

-	ceph auth ls
+    ceph auth ls

-Ceph will list out all users in your cluster. For example, in a two-node
-exemplary cluster, ``ceph auth ls`` will output something that looks like
-this::
+Ceph will list all users in your cluster. For example, in a two-node
+cluster, ``ceph auth ls`` will provide an output that resembles the following::

-	installed auth entries:
+    installed auth entries:

-	osd.0
-		key: AQCvCbtToC6MDhAATtuT70Sl+DymPCfDSsyV4w==
-		caps: [mon] allow profile osd
-		caps: [osd] allow *
-	osd.1
-		key: AQC4CbtTCFJBChAAVq5spj0ff4eHZICxIOVZeA==
-		caps: [mon] allow profile osd
-		caps: [osd] allow *
-	client.admin
-		key: AQBHCbtT6APDHhAA5W00cBchwkQjh3dkKsyPjw==
-		caps: [mds] allow
-		caps: [mon] allow *
-		caps: [osd] allow *
-	client.bootstrap-mds
-		key: AQBICbtTOK9uGBAAdbe5zcIGHZL3T/u2g6EBww==
-		caps: [mon] allow profile bootstrap-mds
-	client.bootstrap-osd
-		key: AQBHCbtT4GxqORAADE5u7RkpCN/oo4e5W0uBtw==
-		caps: [mon] allow profile bootstrap-osd
+    osd.0
+        key: AQCvCbtToC6MDhAATtuT70Sl+DymPCfDSsyV4w==
+        caps: [mon] allow profile osd
+        caps: [osd] allow *
+    osd.1
+        key: AQC4CbtTCFJBChAAVq5spj0ff4eHZICxIOVZeA==
+        caps: [mon] allow profile osd
+        caps: [osd] allow *
+    client.admin
+        key: AQBHCbtT6APDHhAA5W00cBchwkQjh3dkKsyPjw==
+        caps: [mds] allow
+        caps: [mon] allow *
+        caps: [osd] allow *
+    client.bootstrap-mds
+        key: AQBICbtTOK9uGBAAdbe5zcIGHZL3T/u2g6EBww==
+        caps: [mon] allow profile bootstrap-mds
+    client.bootstrap-osd
+        key: AQBHCbtT4GxqORAADE5u7RkpCN/oo4e5W0uBtw==
+        caps: [mon] allow profile bootstrap-osd

-
-Note that the ``TYPE.ID`` notation for users applies such that ``osd.0`` is a
-user of type ``osd`` and its ID is ``0``, ``client.admin`` is a user of type
-``client`` and its ID is ``admin`` (i.e., the default ``client.admin`` user).
-Note also that each entry has a ``key: <value>`` entry, and one or more
+Note that, according to the ``TYPE.ID`` notation for users, ``osd.0`` is a
+user of type ``osd`` and an ID of ``0``, and ``client.admin`` is a user of type
+``client`` and an ID of ``admin`` (that is, the default ``client.admin`` user).
+Note too that each entry has a ``key: <value>`` entry, and also has one or more
 ``caps:`` entries.

-You may use the ``-o {filename}`` option with ``ceph auth ls`` to
-save the output to a file.
+To save the output of ``ceph auth ls`` to a file, use the ``-o {filename}`` option.


-Get a User
----------
+Getting a User
+--------------

-To retrieve a specific user, key and capabilities, execute the
-following:
+To retrieve a specific user, key, and capabilities, run the following command:

 .. prompt:: bash $

@ -452,8 +456,7 @@ For example:

   ceph auth get client.admin

-You may also use the ``-o {filename}`` option with ``ceph auth get`` to
-save the output to a file. Developers may also execute the following:
+To save the output of ``ceph auth get`` to a file, use the ``-o {filename}`` option. Developers may also run the following command:

 .. prompt:: bash $

@ -461,42 +464,49 @@ save the output to a file. Developers may also execute the following:

 The ``auth export`` command is identical to ``auth get``.

-Add a User
----------
+.. _rados_ops_adding_a_user:

-Adding a user creates a username (i.e., ``TYPE.ID``), a secret key and
-any capabilities included in the command you use to create the user.
+Adding a User
+-------------

-A user's key enables the user to authenticate with the Ceph Storage Cluster.
+Adding a user creates a user name (that is, ``TYPE.ID``), a secret key, and
+any capabilities specified in the command that creates the user.
+
+A user's key allows the user to authenticate with the Ceph Storage Cluster.
 The user's capabilities authorize the user to read, write, or execute on Ceph
-monitors (``mon``), Ceph OSDs (``osd``) or Ceph Metadata  Servers (``mds``).
+monitors (``mon``), Ceph OSDs (``osd``) or Ceph Metadata Servers (``mds``).

 There are a few ways to add a user:

 - ``ceph auth add``: This command is the canonical way to add a user. It
-  will create the user, generate a key and add any specified capabilities.
+  will create the user, generate a key, and add any specified capabilities.

 - ``ceph auth get-or-create``: This command is often the most convenient way
  to create a user, because it returns a keyfile format with the user name
  (in brackets) and the key. If the user already exists, this command
-  simply returns the user name and key in the keyfile format. You may use the
-  ``-o {filename}`` option to save the output to a file.
+  simply returns the user name and key in the keyfile format. To save the output to
+  a file, use the ``-o {filename}`` option.

 - ``ceph auth get-or-create-key``: This command is a convenient way to create
-  a user and return the user's key (only). This is useful for clients that
-  need the key only (e.g., libvirt). If the user already exists, this command
-  simply returns the key. You may use the ``-o {filename}`` option to save the
-  output to a file.
+  a user and return the user's key and nothing else. This is useful for clients that
+  need only the key (for example, libvirt). If the user already exists, this command
+  simply returns the key. To save the output to
+  a file, use the ``-o {filename}`` option.

-When creating client users, you may create a user with no capabilities. A user
+It is possible, when creating client users, to create a user with no capabilities. A user
 with no capabilities is useless beyond mere authentication, because the client
-cannot retrieve the cluster map from the monitor. However, you can create a
-user with no capabilities if you wish to defer adding capabilities later using
-the ``ceph auth caps`` command.
+cannot retrieve the cluster map from the monitor. However, you might want to create a user
+with no capabilities and wait until later to add capabilities to the user by using the ``ceph auth caps`` comand.

 A typical user has at least read capabilities on the Ceph monitor and
-read and write capability on Ceph OSDs. Additionally, a user's OSD permissions
-are often restricted to accessing a particular pool:
+read and write capabilities on Ceph OSDs. A user's OSD permissions
+are often restricted so that the user can access only one particular pool.
+In the following example, the commands (1) add a client named ``john`` that has read capabilities on the Ceph monitor
+and read and write capabilities on the pool named ``liverpool``, (2) authorize a client named ``paul`` to have read capabilities on the Ceph monitor and
+read and write capabilities on the pool named ``liverpool``, (3) authorize a client named ``george`` to have read capabilities on the Ceph monitor and
+read and write capabilities on the pool named ``liverpool`` and use the keyring named ``george.keyring`` to make this authorization, and (4) authorize
+a client named ``ringo`` to have read capabilities on the Ceph monitor and read and write capabilities on the pool named ``liverpool`` and use the key
+named ``ringo.key`` to make this authorization:

 .. prompt:: bash $

@ -505,21 +515,19 @@ are often restricted to accessing a particular pool:
   ceph auth get-or-create client.george mon 'allow r' osd 'allow rw pool=liverpool' -o george.keyring
   ceph auth get-or-create-key client.ringo mon 'allow r' osd 'allow rw pool=liverpool' -o ringo.key

-
-.. important:: If you provide a user with capabilities to OSDs, but you DO NOT
-   restrict access to particular pools, the user will have access to ALL
-   pools in the cluster!
+.. important:: Any user that has capabilities on OSDs will have access to ALL pools in the cluster
+   unless that user's access has been restricted to a proper subset of the pools in the cluster.


 .. _modify-user-capabilities:

-Modify User Capabilities
------------------------
+Modifying User Capabilities
+---------------------------

-The ``ceph auth caps`` command allows you to specify a user and change the
+The ``ceph auth caps`` command allows you to specify a user and change that
 user's capabilities. Setting new capabilities will overwrite current capabilities.
-To view current capabilities run ``ceph auth get USERTYPE.USERID``.  To add
-capabilities, you should also specify the existing capabilities when using the form:
+To view current capabilities, run ``ceph auth get USERTYPE.USERID``. 
+To add capabilities, run a command of the following form (and be sure to specify the existing capabilities):

 .. prompt:: bash $

@ -534,10 +542,10 @@ For example:
   ceph auth caps client.paul mon 'allow rw' osd 'allow rwx pool=liverpool'
   ceph auth caps client.brian-manager mon 'allow *' osd 'allow *'

-See `Authorization (Capabilities)`_ for additional details on capabilities.
+For additional details on capabilities, see `Authorization (Capabilities)`_.

-Delete a User
-------------
+Deleting a User
+---------------

 To delete a user, use ``ceph auth del``:

@ -545,34 +553,34 @@ To delete a user, use ``ceph auth del``:

   ceph auth del {TYPE}.{ID}

-Where ``{TYPE}`` is one of ``client``, ``osd``, ``mon``, or ``mds``,
-and ``{ID}`` is the user name or ID of the daemon.
+Here ``{TYPE}`` is either ``client``, ``osd``, ``mon``, or ``mds``,
+and ``{ID}`` is the user name or the ID of the daemon.


-Print a User's Key
------------------
+Printing a User's Key
+---------------------

-To print a user's authentication key to standard output, execute the following:
+To print a user's authentication key to standard output, run the following command:

 .. prompt:: bash $

   ceph auth print-key {TYPE}.{ID}

-Where ``{TYPE}`` is one of ``client``, ``osd``, ``mon``, or ``mds``,
-and ``{ID}`` is the user name or ID of the daemon.
+Here ``{TYPE}`` is either ``client``, ``osd``, ``mon``, or ``mds``,
+and ``{ID}`` is the user name or the ID of the daemon.

-Printing a user's key is useful when you need to populate client
-software with a user's key  (e.g., libvirt):
+When it is necessary to populate client software with a user's key (as in the case of libvirt),
+you can print the user's key by running the following command:

 .. prompt:: bash $

   mount -t ceph serverhost:/ mountpoint -o name=client.user,secret=`ceph auth print-key client.user`

-Import a User(s)
+Importing a User
 ----------------

 To import one or more users, use ``ceph auth import`` and
-specify a keyring:
+specify a keyring as follows:

 .. prompt:: bash $

@ -584,47 +592,49 @@ For example:

   sudo ceph auth import -i /etc/ceph/ceph.keyring

-
-.. note:: The Ceph storage cluster will add new users, their keys and their
-   capabilities and will update existing users, their keys and their
+.. note:: The Ceph storage cluster will add new users, their keys, and their
+   capabilities and will update existing users, their keys, and their
   capabilities.

 Keyring Management
 ==================

 When you access Ceph via a Ceph client, the Ceph client will look for a local
-keyring. Ceph presets the ``keyring`` setting with the following four keyring
-names by default so you don't have to set them in your Ceph configuration file
-unless you want to override the defaults (not recommended):
+keyring. Ceph presets the ``keyring`` setting with four keyring
+names by default. For this reason, you do not have to set the keyring names in your Ceph configuration file
+unless you want to override these defaults (which is not recommended). The four default keyring names are as follows:

 - ``/etc/ceph/$cluster.$name.keyring``
 - ``/etc/ceph/$cluster.keyring``
 - ``/etc/ceph/keyring``
 - ``/etc/ceph/keyring.bin``

-The ``$cluster`` metavariable is your Ceph cluster name as defined by the
-name of the Ceph configuration file (i.e., ``ceph.conf`` means the cluster name
-is ``ceph``; thus, ``ceph.keyring``). The ``$name`` metavariable is the user
-type and user ID (e.g., ``client.admin``; thus, ``ceph.client.admin.keyring``).
+The ``$cluster`` metavariable found in the first two default keyring names above
+is your Ceph cluster name as defined by the name of the Ceph configuration
+file: for example, if the Ceph configuration file is named ``ceph.conf``,
+then your Ceph cluster name is ``ceph`` and the second name above would be
+``ceph.keyring``. The ``$name`` metavariable is the user type and user ID:
+for example, given the user ``client.admin``, the first name above would be
+``ceph.client.admin.keyring``.

-.. note:: When executing commands that read or write to ``/etc/ceph``, you may
-   need to use ``sudo`` to execute the command as ``root``.
+.. note:: When running commands that read or write to ``/etc/ceph``, you might
+   need to use ``sudo`` to run the command as ``root``.

-After you create a user (e.g., ``client.ringo``), you must get the key and add
+After you create a user (for example, ``client.ringo``), you must get the key and add
 it to a keyring on a Ceph client so that the user can access the Ceph Storage
 Cluster.

-The `User Management`_ section details how to list, get, add, modify and delete
-users directly in the Ceph Storage Cluster. However, Ceph also provides the
+The `User Management`_ section details how to list, get, add, modify, and delete
+users directly in the Ceph Storage Cluster. In addition, Ceph provides the
 ``ceph-authtool`` utility to allow you to manage keyrings from a Ceph client.

-Create a Keyring
----------------
+Creating a Keyring
+------------------

 When you use the procedures in the `Managing Users`_ section to create users,
-you need to provide user keys to the Ceph client(s) so that the Ceph client
-can retrieve the key for the specified user and authenticate with the Ceph
-Storage Cluster. Ceph Clients access keyrings to lookup a user name and
+you must provide user keys to the Ceph client(s). This is required so that the Ceph client(s)
+can retrieve the key for the specified user and authenticate that user against the Ceph
+Storage Cluster. Ceph clients access keyrings in order to look up a user name and
 retrieve the user's key.

 The ``ceph-authtool`` utility allows you to create a keyring. To create an
@ -635,45 +645,44 @@ empty keyring, use ``--create-keyring`` or ``-C``. For example:
   ceph-authtool --create-keyring /path/to/keyring

 When creating a keyring with multiple users, we recommend using the cluster name
-(e.g., ``$cluster.keyring``) for the keyring filename and saving it in the
-``/etc/ceph`` directory so that the ``keyring`` configuration default setting
-will pick up the filename without requiring you to specify it in the local copy
-of your Ceph configuration file. For example, create ``ceph.keyring`` by
-executing the following:
+(of the form ``$cluster.keyring``) for the keyring filename and saving the keyring in the
+``/etc/ceph`` directory. By doing this, you ensure that the ``keyring`` configuration default setting
+will pick up the filename without requiring you to specify the filename in the local copy
+of your Ceph configuration file. For example, you can create ``ceph.keyring`` by
+running the following command:

 .. prompt:: bash $

   sudo ceph-authtool -C /etc/ceph/ceph.keyring

 When creating a keyring with a single user, we recommend using the cluster name,
-the user type and the user name and saving it in the ``/etc/ceph`` directory.
-For example, ``ceph.client.admin.keyring`` for the ``client.admin`` user.
+the user type, and the user name, and saving the keyring in the ``/etc/ceph`` directory.
+For example, we recommend that the ``client.admin`` user use ``ceph.client.admin.keyring``.

 To create a keyring in ``/etc/ceph``, you must do so as ``root``. This means
-the file will have ``rw`` permissions for the ``root`` user only, which is
+that the file will have ``rw`` permissions for the ``root`` user only, which is
 appropriate when the keyring contains administrator keys. However, if you
-intend to use the keyring for a particular user or group of users, ensure
-that you execute ``chown`` or ``chmod`` to establish appropriate keyring
+intend to use the keyring for a particular user or group of users, be sure to use ``chown`` or ``chmod`` to establish appropriate keyring
 ownership and access.

-Add a User to a Keyring
-----------------------
+Adding a User to a Keyring
+--------------------------

-When you  `Add a User`_ to the Ceph Storage Cluster, you can use the `Get a
-User`_ procedure to retrieve a user, key and capabilities and save the user to a
-keyring.
+When you :ref:`Add a user<rados_ops_adding_a_user>` to the Ceph Storage
+Cluster, you can use the `Getting a User`_ procedure to retrieve a user, key,
+and capabilities and then save the user to a keyring.

-When you only want to use one user per keyring, the `Get a User`_ procedure with
+If you want to use only one user per keyring, the `Getting a User`_ procedure with
 the ``-o`` option will save the output in the keyring file format. For example,
-to create a keyring for the ``client.admin`` user, execute the following:
+to create a keyring for the ``client.admin`` user, run the following command:

 .. prompt:: bash $

   sudo ceph auth get client.admin -o /etc/ceph/ceph.client.admin.keyring

-Notice that we use the recommended file format for an individual user.
+Notice that the file format in this command is the file format conventionally used when manipulating the keyrings of individual users.

-When you want to import users to a keyring, you can use ``ceph-authtool``
+If you want to import users to a keyring, you can use ``ceph-authtool``
 to specify the destination keyring and the source keyring.
 For example:

@ -681,19 +690,19 @@ For example:

   sudo ceph-authtool /etc/ceph/ceph.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring

-Create a User
-------------
+Creating a User
+---------------

-Ceph provides the `Add a User`_ function to create a user directly in the Ceph
-Storage Cluster. However, you can also create a user, keys and capabilities
-directly on a Ceph client keyring. Then, you can import the user to the Ceph
+Ceph provides the `Adding a User`_ function to create a user directly in the Ceph
+Storage Cluster. However, you can also create a user, keys, and capabilities
+directly on a Ceph client keyring, and then import the user to the Ceph
 Storage Cluster. For example:

 .. prompt:: bash $

   sudo ceph-authtool -n client.ringo --cap osd 'allow rwx' --cap mon 'allow rwx' /etc/ceph/ceph.keyring

-See `Authorization (Capabilities)`_ for additional details on capabilities.
+For additional details on capabilities, see `Authorization (Capabilities)`_.

 You can also create a keyring and add a new user to the keyring simultaneously.
 For example:
@ -702,36 +711,37 @@ For example:

   sudo ceph-authtool -C /etc/ceph/ceph.keyring -n client.ringo --cap osd 'allow rwx' --cap mon 'allow rwx' --gen-key

-In the foregoing scenarios, the new user ``client.ringo`` is only in the
-keyring. To add the new user to the Ceph Storage Cluster, you must still add
-the new user to the Ceph Storage Cluster:
+In the above examples, the new user ``client.ringo`` has been added only to the
+keyring. The new user has not been added to the Ceph Storage Cluster.
+
+To add the new user ``client.ringo`` to the Ceph Storage Cluster, run the following command:

 .. prompt:: bash $

   sudo ceph auth add client.ringo -i /etc/ceph/ceph.keyring

-Modify a User
-------------
+Modifying a User
+----------------

-To modify the capabilities of a user record in a keyring, specify the keyring,
-and the user followed by the capabilities. For example:
+To modify the capabilities of a user record in a keyring, specify the keyring
+and the user, followed by the capabilities. For example:

 .. prompt:: bash $

   sudo ceph-authtool /etc/ceph/ceph.keyring -n client.ringo --cap osd 'allow rwx' --cap mon 'allow rwx'

-To update the user to the Ceph Storage Cluster, you must update the user
-in the keyring to the user entry in the Ceph Storage Cluster:
+To update the user in the Ceph Storage Cluster, you must update the user
+in the keyring to the user entry in the Ceph Storage Cluster. To do so, run the following command:

 .. prompt:: bash $

   sudo ceph auth import -i /etc/ceph/ceph.keyring

-See `Import a User(s)`_ for details on updating a Ceph Storage Cluster user
-from a keyring.
+For details on updating a Ceph Storage Cluster user from a
+keyring, see `Importing a User`_

-You may also `Modify User Capabilities`_ directly in the cluster, store the
-results to a keyring file; then, import the keyring into your main
+You may also :ref:`Modify user capabilities<modify-user-capabilities>` directly in the cluster, store the
+results to a keyring file, and then import the keyring into your main
 ``ceph.keyring`` file.

 Command Line Usage
@ -741,12 +751,12 @@ Ceph supports the following usage for user name and secret:

 ``--id`` | ``--user``

-:Description: Ceph identifies users with a type and an ID (e.g., ``TYPE.ID`` or
-              ``client.admin``, ``client.user1``). The ``id``, ``name`` and
-              ``-n`` options enable you to specify the ID portion of the user
-              name (e.g., ``admin``, ``user1``, ``foo``, etc.). You can specify
+:Description: Ceph identifies users with a type and an ID: the form of this user identification is ``TYPE.ID``, and examples of the type and ID are
+              ``client.admin`` and ``client.user1``. The ``id``, ``name`` and
+              ``-n`` options allow you to specify the ID portion of the user
+              name (for example, ``admin``, ``user1``, ``foo``). You can specify
              the user with the ``--id`` and omit the type. For example,
-              to specify user ``client.foo`` enter the following:
+              to specify user ``client.foo``, run the following commands:

              .. prompt:: bash $

@ -756,10 +766,10 @@ Ceph supports the following usage for user name and secret:

 ``--name`` | ``-n``

-:Description: Ceph identifies users with a type and an ID (e.g., ``TYPE.ID`` or
-              ``client.admin``, ``client.user1``). The ``--name`` and ``-n``
-              options enables you to specify the fully qualified user name.
-              You must specify the user type (typically ``client``) with the
+:Description: Ceph identifies users with a type and an ID: the form of this user identification is ``TYPE.ID``, and examples of the type and ID are
+              ``client.admin`` and ``client.user1``. The ``--name`` and ``-n``
+              options allow you to specify the fully qualified user name.
+              You are required to specify the user type (typically ``client``) with the
              user ID. For example:

              .. prompt:: bash $
@ -770,8 +780,8 @@ Ceph supports the following usage for user name and secret:

 ``--keyring``

-:Description: The path to the keyring containing one or more user name and
-              secret. The ``--secret`` option provides the same functionality,
+:Description: The path to the keyring that contains one or more user names and
+              secrets. The ``--secret`` option provides the same functionality,
              but it does not work with Ceph RADOS Gateway, which uses
              ``--secret`` for another purpose. You may retrieve a keyring with
              ``ceph auth get-or-create`` and store it locally. This is a
@ -788,43 +798,42 @@ Ceph supports the following usage for user name and secret:
 Limitations
 ===========

-The ``cephx`` protocol authenticates Ceph clients and servers to each other.  It
+The ``cephx`` protocol authenticates Ceph clients and servers to each other. It
 is not intended to handle authentication of human users or application programs
-run on their behalf.  If that effect is required to handle your access control
-needs, you must have another mechanism, which is likely to be specific to the
-front end used to access the Ceph object store.  This other mechanism has the
-role of ensuring that only acceptable users and programs are able to run on the
-machine that Ceph will permit to access its object store.
+that are run on their behalf. If your access control
+needs require that kind of authentication, you will need to have some other mechanism, which is likely to be specific to the
+front end that is used to access the Ceph object store. This other mechanism would ensure that only acceptable users and programs are able to run on the
+machine that Ceph permits to access its object store.

 The keys used to authenticate Ceph clients and servers are typically stored in
-a plain text file with appropriate permissions in a trusted host.
+a plain text file on a trusted host. Appropriate permissions must be set on the plain text file.

 .. important:: Storing keys in plaintext files has security shortcomings, but
   they are difficult to avoid, given the basic authentication methods Ceph
-   uses in the background. Those setting up Ceph systems should be aware of
+   uses in the background. Anyone setting up Ceph systems should be aware of
   these shortcomings.

-In particular, arbitrary user machines, especially portable machines, should not
+In particular, user machines, especially portable machines, should not
 be configured to interact directly with Ceph, since that mode of use would
 require the storage of a plaintext authentication key on an insecure machine.
-Anyone  who stole that machine or obtained surreptitious access to it could
-obtain the key that will allow them to authenticate their own machines to Ceph.
+Anyone who stole that machine or obtained access to it could
+obtain a key that allows them to authenticate their own machines to Ceph.

-Rather than permitting potentially insecure machines to access a Ceph object
-store directly,  users should be required to sign in to a trusted machine in
-your environment using a method  that provides sufficient security for your
-purposes.  That trusted machine will store the plaintext Ceph keys for the
-human users.  A future version of Ceph may address these particular
+Instead of permitting potentially insecure machines to access a Ceph object
+store directly, you should require users to sign in to a trusted machine in
+your environment, using a method that provides sufficient security for your
+purposes. That trusted machine will store the plaintext Ceph keys for the
+human users. A future version of Ceph might address these particular
 authentication issues more fully.

-At the moment, none of the Ceph authentication protocols provide secrecy for
-messages in transit. Thus, an eavesdropper on the wire can hear and understand
-all data sent between clients and servers in Ceph, even if it cannot create or
-alter them. Further, Ceph does not include options to encrypt user data in the
-object store. Users can hand-encrypt and store their own data in the Ceph
-object store, of course, but Ceph provides no features to perform object
-encryption itself. Those storing sensitive data in Ceph should consider
-encrypting their data before providing it  to the Ceph system.
+At present, none of the Ceph authentication protocols provide secrecy for
+messages in transit. As a result, an eavesdropper on the wire can hear and understand
+all data sent between clients and servers in Ceph, even if the eavesdropper cannot create or
+alter the data. Similarly, Ceph does not include options to encrypt user data in the
+object store. Users can, of course, hand-encrypt and store their own data in the Ceph
+object store, but Ceph itself provides no features to perform object
+encryption. Anyone storing sensitive data in Ceph should consider
+encrypting their data before providing it to the Ceph system.


 .. _Architecture - High Availability Authentication: ../../../architecture#high-availability-authentication
--- a/ceph/doc/radosgw/dynamicresharding.rst
+++ b/ceph/doc/radosgw/dynamicresharding.rst
@ -36,8 +36,9 @@ resharding tasks, one at a time.
 Multisite
 =========

-Dynamic resharding is not supported in a multisite environment.
-
+Prior to the Reef release, RGW does not support dynamic resharding in a
+multisite environment. For information on dynamic resharding, see
+:ref:`Resharding <feature_resharding>` in the RGW multisite documentation.

 Configuration
 =============
--- a/ceph/doc/radosgw/multisite.rst
+++ b/ceph/doc/radosgw/multisite.rst
@ -1130,7 +1130,7 @@ To view the configuration of a zonegroup, run this command:

 .. prompt:: bash #
   
-   dosgw-admin zonegroup get [--rgw-zonegroup=<zonegroup>]
+   radosgw-admin zonegroup get [--rgw-zonegroup=<zonegroup>]

 The zonegroup configuration looks like this:

@ -1582,14 +1582,23 @@ Supported Features

 .. _feature_resharding:

-resharding
+Resharding
 ~~~~~~~~~~

-Allows buckets to be resharded in a multisite configuration without interrupting the replication of their objects. When ``rgw_dynamic_resharding`` is enabled, it runs on each zone independently, and zones may choose different shard counts for the same bucket. When buckets are resharded manually with ``radosgw-admin bucket reshard``, only that zone's bucket is modified. A zone feature should only be marked as supported after all of its radosgws and osds have upgraded.
+This feature allows buckets to be resharded in a multisite configuration
+without interrupting the replication of their objects. When
+``rgw_dynamic_resharding`` is enabled, it runs on each zone independently, and
+zones may choose different shard counts for the same bucket. When buckets are
+resharded manually with ``radosgw-admin bucket reshard``, only that zone's
+bucket is modified. A zone feature should only be marked as supported after all
+of its RGWs and OSDs have upgraded.
+
+.. note:: Dynamic resharding is not supported in multisite deployments prior to
+   the Reef release.


 Commands
-----------------
+--------

 Add support for a zone feature
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/ceph/doc/radosgw/notifications.rst
+++ b/ceph/doc/radosgw/notifications.rst
@ -138,9 +138,6 @@ updating, use the name of an existing topic and different endpoint values).
 .. tip:: Any notification already associated with the topic must be re-created
   in order for the topic to update.

-.. note:: For rabbitmq, ``push-endpoint`` (with a hyphen in the middle) must be
-   changed to ``push_endpoint`` (with an underscore in the middle).
-
 ::

   POST
--- a/ceph/doc/start/documenting-ceph.rst
+++ b/ceph/doc/start/documenting-ceph.rst
@ -1,3 +1,5 @@
+.. _documenting_ceph:
+
 ==================
 Documenting Ceph
 ==================
--- a/ceph/doc/start/get-involved.rst
+++ b/ceph/doc/start/get-involved.rst
@ -12,9 +12,10 @@ These are exciting times in the Ceph community! Get involved!
 | **Blog**             | Check the Ceph Blog_ periodically to keep track | http://ceph.com/community/blog/               |
 |                      | of Ceph progress and important announcements.   |                                               |
 +----------------------+-------------------------------------------------+-----------------------------------------------+
-| **Planet Ceph**      | Check the blog aggregation on Planet Ceph for   | https://ceph.com/category/planet/             |
+| **Planet Ceph**      | Check the blog aggregation on Planet Ceph for   | https://old.ceph.com/category/planet/         |
 |                      | interesting stories, information and            |                                               |
-|                      | experiences from the community.                 |                                               |
+|                      | experiences from the community. **NOTE: NO      |                                               |
+|                      | longer updated as of 2023.**                    |                                               |
 +----------------------+-------------------------------------------------+-----------------------------------------------+
 | **Wiki**             | Check the Ceph Wiki is a source for more        | http://wiki.ceph.com/                         |
 |                      | community and development related topics. You   |                                               |
--- a/ceph/doc/start/intro.rst
+++ b/ceph/doc/start/intro.rst
@ -2,14 +2,24 @@
 Intro to Ceph
 ===============

-Whether you want to provide :term:`Ceph Object Storage` and/or
-:term:`Ceph Block Device` services to :term:`Cloud Platforms`, deploy
-a :term:`Ceph File System` or use Ceph for another purpose, all
-:term:`Ceph Storage Cluster` deployments begin with setting up each
-:term:`Ceph Node`, your network, and the Ceph Storage Cluster. A Ceph
-Storage Cluster requires at least one Ceph Monitor, Ceph Manager, and
-Ceph OSD (Object Storage Daemon). The Ceph Metadata Server is also
-required when running Ceph File System clients.
+Ceph can be used to provide :term:`Ceph Object Storage` to :term:`Cloud
+Platforms` and Ceph can be used to provide :term:`Ceph Block Device` services
+to :term:`Cloud Platforms`. Ceph can be used to deploy a :term:`Ceph File
+System`.  All :term:`Ceph Storage Cluster` deployments begin with setting up
+each :term:`Ceph Node` and then setting up the network. 
+
+A Ceph Storage Cluster requires the following: at least one Ceph Monitor and at
+least one Ceph Manager, and at least as many Ceph OSDs as there are copies of
+an object stored on the Ceph cluster (for example, if three copies of a given
+object are stored on the Ceph cluster, then at least three OSDs must exist in
+that Ceph cluster). 
+
+The Ceph Metadata Server is necessary to run Ceph File System clients.
+
+.. note::
+
+   It is a best practice to have a Ceph Manager for each Monitor, but it is not
+   necessary. 

 .. ditaa::

--- a/ceph/doc/start/os-recommendations.rst
+++ b/ceph/doc/start/os-recommendations.rst
@ -18,19 +18,18 @@ Linux Kernel
  maintenance" kernel series provided by either http://kernel.org or
  your Linux distribution on any client hosts.

-  For RBD, if you choose to *track* long-term kernels, we currently recommend
-  4.x-based "longterm maintenance" kernel series or later:
-
-  - 4.19.z
-  - 4.14.z
-  - 5.x
+  For RBD, if you choose to *track* long-term kernels, we recommend
+  *at least* 4.19-based "longterm maintenance" kernel series.  If you can
+  use a newer "stable" or "longterm maintenance" kernel series, do it.

  For CephFS, see the section about `Mounting CephFS using Kernel Driver`_
  for kernel version guidance.

  Older kernel client versions may not support your `CRUSH tunables`_ profile
-  or other newer features of the Ceph cluster, requiring the storage cluster
-  to be configured with those features disabled.
+  or other newer features of the Ceph cluster, requiring the storage cluster to
+  be configured with those features disabled. For RBD, a kernel of version 5.3
+  or CentOS 8.2 is the minimum necessary for reasonable support for RBD image
+  features.


 Platforms
--- a/ceph/install-deps.sh
+++ b/ceph/install-deps.sh
@ -178,45 +178,77 @@ function install_pkg_on_ubuntu {
    fi
 }

+boost_ver=1.73
+
+function clean_boost_on_ubuntu {
+    in_jenkins && echo "CI_DEBUG: Start clean_boost_on_ubuntu() in install-deps.sh"
+    # Find currently installed version. If there are multiple
+    # versions, they end up newline separated
+    local installed_ver=$(apt -qq list --installed ceph-libboost*-dev 2>/dev/null |
+                              cut -d' ' -f2 |
+                              cut -d'.' -f1,2 |
+			      sort -u)
+    # If installed_ver contains whitespace, we can't really count on it,
+    # but otherwise, bail out if the version installed is the version
+    # we want.
+    if test -n "$installed_ver" &&
+	    echo -n "$installed_ver" | tr '[:space:]' ' ' | grep -v -q ' '; then
+	if echo "$installed_ver" | grep -q "^$boost_ver"; then
+	    return
+        fi
+    fi
+
+    # Historical packages
+    $SUDO rm -f /etc/apt/sources.list.d/ceph-libboost*.list
+    # Currently used
+    $SUDO rm -f /etc/apt/sources.list.d/libboost.list
+    # Refresh package list so things aren't in the available list.
+    $SUDO env DEBIAN_FRONTEND=noninteractive apt-get update -y || true
+    # Remove all ceph-libboost packages. We have an early return if
+    # the desired version is already (and the only) version installed,
+    # so no need to spare it.
+    if test -n "$installed_ver"; then
+	$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y --fix-missing remove "ceph-libboost*"
+    fi
+}
+
 function install_boost_on_ubuntu {
-    local ver=1.73
    in_jenkins && echo "CI_DEBUG: Running install_boost_on_ubuntu() in install-deps.sh"
+    # Once we get to this point, clean_boost_on_ubuntu() should ensure
+    # that there is no more than one installed version.
    local installed_ver=$(apt -qq list --installed ceph-libboost*-dev 2>/dev/null |
                              grep -e 'libboost[0-9].[0-9]\+-dev' |
                              cut -d' ' -f2 |
                              cut -d'.' -f1,2)
    if test -n "$installed_ver"; then
-        if echo "$installed_ver" | grep -q "^$ver"; then
+        if echo "$installed_ver" | grep -q "^$boost_ver"; then
            return
-        else
-            $SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove "ceph-libboost.*${installed_ver}.*"
-            $SUDO rm -f /etc/apt/sources.list.d/ceph-libboost${installed_ver}.list
        fi
    fi
    local codename=$1
    local project=libboost
    local sha1=7aba8a1882670522ee1d1ee1bba0ea170b292dec
    install_pkg_on_ubuntu \
-	$project \
-	$sha1 \
-	$codename \
-	check \
-	ceph-libboost-atomic$ver-dev \
-	ceph-libboost-chrono$ver-dev \
-	ceph-libboost-container$ver-dev \
-	ceph-libboost-context$ver-dev \
-	ceph-libboost-coroutine$ver-dev \
-	ceph-libboost-date-time$ver-dev \
-	ceph-libboost-filesystem$ver-dev \
-	ceph-libboost-iostreams$ver-dev \
-	ceph-libboost-program-options$ver-dev \
-	ceph-libboost-python$ver-dev \
-	ceph-libboost-random$ver-dev \
-	ceph-libboost-regex$ver-dev \
-	ceph-libboost-system$ver-dev \
-	ceph-libboost-test$ver-dev \
-	ceph-libboost-thread$ver-dev \
-	ceph-libboost-timer$ver-dev
+        $project \
+        $sha1 \
+        $codename \
+        check \
+        ceph-libboost-atomic${boost_ver}-dev \
+        ceph-libboost-chrono${boost_ver}-dev \
+        ceph-libboost-container${boost_ver}-dev \
+        ceph-libboost-context${boost_ver}-dev \
+        ceph-libboost-coroutine${boost_ver}-dev \
+        ceph-libboost-date-time${boost_ver}-dev \
+        ceph-libboost-filesystem${boost_ver}-dev \
+        ceph-libboost-iostreams${boost_ver}-dev \
+        ceph-libboost-program-options${boost_ver}-dev \
+        ceph-libboost-python${boost_ver}-dev \
+        ceph-libboost-random${boost_ver}-dev \
+        ceph-libboost-regex${boost_ver}-dev \
+        ceph-libboost-system${boost_ver}-dev \
+        ceph-libboost-test${boost_ver}-dev \
+        ceph-libboost-thread${boost_ver}-dev \
+        ceph-libboost-timer${boost_ver}-dev
 }

 function install_libzbd_on_ubuntu {
@ -310,6 +342,9 @@ else
    case "$ID" in
    debian|ubuntu|devuan|elementary)
        echo "Using apt-get to install dependencies"
+	# Put this before any other invocation of apt so it can clean
+	# up in a broken case.
+        clean_boost_on_ubuntu
        $SUDO apt-get install -y devscripts equivs
        $SUDO apt-get install -y dpkg-dev
        ensure_python3_sphinx_on_ubuntu
@ -319,6 +354,27 @@ else
                [ ! $NO_BOOST_PKGS ] && install_boost_on_ubuntu bionic
                $with_zbd && install_libzbd_on_ubuntu bionic
                ;;
+            *Jammy*)
+                [ ! $NO_BOOST_PKGS ] && \
+		    $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install -y \
+			  libboost-atomic-dev \
+			  libboost-chrono-dev \
+			  libboost-container-dev \
+			  libboost-context-dev \
+			  libboost-coroutine-dev \
+			  libboost-date-time-dev \
+			  libboost-filesystem-dev \
+			  libboost-iostreams-dev \
+			  libboost-program-options-dev \
+			  libboost-python-dev \
+			  libboost-random-dev \
+			  libboost-regex-dev \
+			  libboost-system-dev \
+			  libboost-test-dev \
+			  libboost-thread-dev \
+			  libboost-timer-dev \
+			  gcc
+                ;;
            *)
                $SUDO apt-get install -y gcc
                ;;
--- a/ceph/qa/rgw/ignore-pg-availability.yaml
+++ b/ceph/qa/rgw/ignore-pg-availability.yaml
@ -1,5 +1,7 @@
 # https://tracker.ceph.com/issues/45802
+# https://tracker.ceph.com/issues/61168
 overrides:
  ceph:
    log-ignorelist:
    - \(PG_AVAILABILITY\)
+    - \(POOL_APP_NOT_ENABLED\)
--- a/ceph/qa/suites/fs/functional/tasks/alternate-pool.yaml
+++ b/ceph/qa/suites/fs/functional/tasks/alternate-pool.yaml
@ -1,4 +1,3 @@
-
 overrides:
  ceph:
    log-ignorelist:
--- a/ceph/qa/suites/fs/functional/tasks/client-recovery.yaml
+++ b/ceph/qa/suites/fs/functional/tasks/client-recovery.yaml
@ -8,6 +8,9 @@ overrides:
      - slow request
      - MDS_CLIENT_LATE_RELEASE
      - t responding to mclientcaps
+      - Degraded data redundancy
+      - MDS_CLIENTS_LAGGY
+      - Reduced data availability
 tasks:
  - cephfs_test_runner:
      fail_on_skip: false
--- a/ceph/qa/suites/fs/mirror-ha/cephfs-mirror/+
+++ b/ceph/qa/suites/fs/mirror-ha/cephfs-mirror/+
--- a/ceph/qa/suites/fs/mirror-ha/cephfs-mirror/1-volume-create-rm.yaml
+++ b/ceph/qa/suites/fs/mirror-ha/cephfs-mirror/1-volume-create-rm.yaml
@ -0,0 +1,14 @@
+meta:
+- desc: create/rm volumes and set configs
+
+tasks:
+  - exec:
+      mon.a:
+        - "ceph fs volume create dc"
+        - "ceph fs volume create dc-backup"
+  - full_sequential_finally:
+    - exec:
+        mon.a:
+          - ceph config set mon mon_allow_pool_delete true
+          - ceph fs volume rm dc --yes-i-really-mean-it
+          - ceph fs volume rm dc-backup --yes-i-really-mean-it
--- a/ceph/qa/suites/fs/mirror-ha/cephfs-mirror/2-three-per-cluster.yaml
+++ b/ceph/qa/suites/fs/mirror-ha/cephfs-mirror/2-three-per-cluster.yaml
--- a/ceph/qa/suites/fs/mirror-ha/workloads/cephfs-mirror-ha-workunit.yaml
+++ b/ceph/qa/suites/fs/mirror-ha/workloads/cephfs-mirror-ha-workunit.yaml
@ -8,10 +8,6 @@ overrides:
        debug client: 10

 tasks:
-  - exec:
-      client.1:
-        - "ceph fs volume create dc"
-        - "ceph fs volume create dc-backup"
  - ceph-fuse:
      client.1:
        cephfs_name: dc
--- a/ceph/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml
+++ b/ceph/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml
@ -11,3 +11,4 @@ overrides:
      - has not responded to cap revoke by MDS for over
      - MDS_CLIENT_LATE_RELEASE
      - responding to mclientcaps
+      - RECENT_CRASH
--- a/ceph/qa/suites/fs/top/cluster/1-node.yaml
+++ b/ceph/qa/suites/fs/top/cluster/1-node.yaml
@ -1,10 +1,12 @@
 meta:
- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 1 mds
+- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 2 mds, 2 clients
 roles:
 - - mon.a
  - mgr.x
  - mds.a
+  - mds.b
  - osd.0
  - osd.1
  - osd.2
  - client.0
+  - client.1
--- a/ceph/qa/suites/fs/volumes/tasks/volumes/test/basic.yaml
+++ b/ceph/qa/suites/fs/volumes/tasks/volumes/test/basic.yaml
@ -5,3 +5,4 @@ tasks:
        - tasks.cephfs.test_volumes.TestVolumes
        - tasks.cephfs.test_volumes.TestSubvolumeGroups
        - tasks.cephfs.test_volumes.TestSubvolumes
+        - tasks.cephfs.test_subvolume.TestSubvolume
--- a/ceph/qa/suites/fs/workload/subvolume/$
+++ b/ceph/qa/suites/fs/workload/subvolume/$
--- a/ceph/qa/suites/fs/workload/subvolume/no-subvolume.yaml
+++ b/ceph/qa/suites/fs/workload/subvolume/no-subvolume.yaml
--- a/ceph/qa/suites/fs/workload/subvolume/with-namespace-isolated-and-quota.yaml
+++ b/ceph/qa/suites/fs/workload/subvolume/with-namespace-isolated-and-quota.yaml
@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    subvols:
+      create: 2
+      subvol_options: "--namespace-isolated --size 25000000000"
+  ceph-fuse:
+    client.0:
+      mount_subvol_num: 0
+  kclient:
+    client.0:
+      mount_subvol_num: 1
--- a/ceph/qa/suites/fs/workload/subvolume/with-namespace-isolated.yaml
+++ b/ceph/qa/suites/fs/workload/subvolume/with-namespace-isolated.yaml
@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    subvols:
+      create: 2
+      subvol_options: "--namespace-isolated"
+  ceph-fuse:
+    client.0:
+      mount_subvol_num: 0
+  kclient:
+    client.0:
+      mount_subvol_num: 1
--- a/ceph/qa/suites/fs/workload/subvolume/with-no-extra-options.yaml
+++ b/ceph/qa/suites/fs/workload/subvolume/with-no-extra-options.yaml
@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    subvols:
+      create: 2
+  ceph-fuse:
+    client.0:
+      mount_subvol_num: 0
+  kclient:
+    client.0:
+      mount_subvol_num: 1
--- a/ceph/qa/suites/fs/workload/subvolume/with-quota.yaml
+++ b/ceph/qa/suites/fs/workload/subvolume/with-quota.yaml
@ -0,0 +1,11 @@
+overrides:
+  ceph:
+    subvols:
+      create: 2
+      subvol_options: "--size 25000000000"
+  ceph-fuse:
+    client.0:
+      mount_subvol_num: 0
+  kclient:
+    client.0:
+      mount_subvol_num: 1
--- a/ceph/qa/suites/krbd/thrash/workloads/krbd_diff_continuous.yaml
+++ b/ceph/qa/suites/krbd/thrash/workloads/krbd_diff_continuous.yaml
@ -0,0 +1,12 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/diff_continuous.sh
+    env:
+      RBD_DEVICE_TYPE: "krbd"
--- a/ceph/qa/suites/orch/cephadm/workunits/task/.qa
+++ b/ceph/qa/suites/orch/cephadm/workunits/task/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/+
+++ b/ceph/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/+
--- a/ceph/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/.qa
+++ b/ceph/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/centos_8.stream_container_tools.yaml
+++ b/ceph/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/centos_8.stream_container_tools.yaml
@ -0,0 +1 @@
+.qa/distros/podman/centos_8.stream_container_tools.yaml
--- a/ceph/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/test_iscsi_container.yaml
+++ b/ceph/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/test_iscsi_container.yaml
@ -18,3 +18,4 @@ tasks:
    clients:
      client.0:
        - cephadm/test_iscsi_pids_limit.sh
+        - cephadm/test_iscsi_etc_hosts.sh
--- a/ceph/qa/suites/rados/rook
+++ b/ceph/qa/suites/rados/rook
@ -1 +0,0 @@
-../orch/rook
--- a/ceph/qa/suites/rados/singleton/all/thrash-backfill-full.yaml
+++ b/ceph/qa/suites/rados/singleton/all/thrash-backfill-full.yaml
@ -16,7 +16,7 @@ override:
  ceph:
    conf:
      mon:
-        osd default pool size: 3
+        osd pool default size: 3
        osd min pg log entries: 5
        osd max pg log entries: 10
 tasks:
--- a/ceph/qa/suites/rados/singleton/all/thrash-eio.yaml
+++ b/ceph/qa/suites/rados/singleton/all/thrash-eio.yaml
@ -12,11 +12,11 @@ openstack:
  - volumes: # attached to each instance
      count: 3
      size: 10 # GB
-override:
+overrides:
  ceph:
    conf:
      mon:
-        osd default pool size: 3
+        osd pool default size: 3
 tasks:
 - install:
 - ceph:
--- a/ceph/qa/suites/rados/verify/tasks/rados_api_tests.yaml
+++ b/ceph/qa/suites/rados/verify/tasks/rados_api_tests.yaml
@ -20,6 +20,10 @@ overrides:
        debug monc: 20
      mon:
        mon warn on pool no app: false
+      osd:
+        osd class load list: "*"
+        osd class default list: "*"
+        osd client watch timeout: 120
 tasks:
 - workunit:
    timeout: 6h
--- a/ceph/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml
+++ b/ceph/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml
@ -0,0 +1,14 @@
+overrides:
+  install:
+    ceph:
+      extra_packages:
+        - rbd-nbd
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rbd/diff_continuous.sh
+    env:
+      RBD_DEVICE_TYPE: "nbd"
--- a/ceph/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml
+++ b/ceph/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml
@ -8,6 +8,7 @@ tasks:
      - qemu-kvm-block-rbd
      deb:
      - qemu-block-extra
+      - qemu-utils
 - ceph:
    fs: xfs
    conf:
--- a/ceph/qa/suites/rbd/singleton/all/qemu-iotests-writearound.yaml
+++ b/ceph/qa/suites/rbd/singleton/all/qemu-iotests-writearound.yaml
@ -8,6 +8,7 @@ tasks:
      - qemu-kvm-block-rbd
      deb:
      - qemu-block-extra
+      - qemu-utils
 - ceph:
    fs: xfs
    conf:
--- a/ceph/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml
+++ b/ceph/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml
@ -8,6 +8,7 @@ tasks:
      - qemu-kvm-block-rbd
      deb:
      - qemu-block-extra
+      - qemu-utils
 - ceph:
    fs: xfs
    conf:
--- a/ceph/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml
+++ b/ceph/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml
@ -8,6 +8,7 @@ tasks:
      - qemu-kvm-block-rbd
      deb:
      - qemu-block-extra
+      - qemu-utils
 - ceph:
    fs: xfs
    conf:
--- a/ceph/qa/suites/rgw/multisite/realms/three-zone-plus-pubsub.yaml
+++ b/ceph/qa/suites/rgw/multisite/realms/three-zone-plus-pubsub.yaml
@ -18,6 +18,5 @@ overrides:
            endpoints: [c2.client.0]
          - name: test-zone3
            endpoints: [c1.client.1]
-          - name: test-zone4
-            endpoints: [c2.client.1]
-            is_pubsub: true
+  rgw-multisite-tests:
+    args: [tests.py]
--- a/ceph/qa/suites/rgw/verify/tasks/versioning.yaml
+++ b/ceph/qa/suites/rgw/verify/tasks/versioning.yaml
@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+        - rgw/run-versioning.sh
--- a/ceph/qa/suites/upgrade/octopus-x/parallel/workload/rados_api.yaml
+++ b/ceph/qa/suites/upgrade/octopus-x/parallel/workload/rados_api.yaml
@ -9,4 +9,6 @@ workload:
        clients:
          client.0:
            - cls
+        env:
+          CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.mirror_snapshot'
    - print: "**** done end rados_api.yaml"
--- a/ceph/qa/suites/upgrade/octopus-x/stress-split-no-cephadm/4-workload/rbd-cls.yaml
+++ b/ceph/qa/suites/upgrade/octopus-x/stress-split-no-cephadm/4-workload/rbd-cls.yaml
@ -7,4 +7,6 @@ stress-tasks:
    clients:
      client.0:
        - cls/test_cls_rbd.sh
+    env:
+      CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.mirror_snapshot'
 - print: "**** done cls/test_cls_rbd.sh 5-workload"
--- a/ceph/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd-cls.yaml
+++ b/ceph/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd-cls.yaml
@ -7,4 +7,6 @@ first-half-tasks:
    clients:
      client.0:
        - cls/test_cls_rbd.sh
+    env:
+      CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.mirror_snapshot'
 - print: "**** done cls/test_cls_rbd.sh 5-workload"
--- a/ceph/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd-cls.yaml
+++ b/ceph/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd-cls.yaml
@ -7,4 +7,6 @@ stress-tasks:
    clients:
      client.0:
        - cls/test_cls_rbd.sh
+    env:
+      CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.mirror_snapshot'
 - print: "**** done cls/test_cls_rbd.sh 5-workload"
--- a/ceph/qa/tasks/ceph.py
+++ b/ceph/qa/tasks/ceph.py
@ -262,6 +262,7 @@ def ceph_log(ctx, config):
            run.wait(
                ctx.cluster.run(
                    args=[
+                        'time',
                        'sudo',
                        'find',
                        '/var/log/ceph',
@ -271,10 +272,15 @@ def ceph_log(ctx, config):
                        run.Raw('|'),
                        'sudo',
                        'xargs',
+                        '--max-args=1',
+                        '--max-procs=0',
+                        '--verbose',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
+                        '-5',
+                        '--verbose',
                        '--',
                    ],
                    wait=False,
@ -445,6 +451,9 @@ def cephfs_setup(ctx, config):
            name = fs_config.pop('name')
            temp = deepcopy(cephfs_config)
            teuthology.deep_merge(temp, fs_config)
+            subvols = config.get('subvols', None)
+            if subvols:
+                teuthology.deep_merge(temp, {'subvols': subvols})
            fs = Filesystem(ctx, fs_config=temp, name=name, create=True)
            if set_allow_multifs:
                fs.set_allow_multifs()
--- a/ceph/qa/tasks/ceph_deploy.py
+++ b/ceph/qa/tasks/ceph_deploy.py
@ -524,6 +524,7 @@ def build_ceph_cluster(ctx, config):
            run.wait(
                ctx.cluster.run(
                    args=[
+                        'time',
                        'sudo',
                        'find',
                        '/var/log/ceph',
@ -533,10 +534,15 @@ def build_ceph_cluster(ctx, config):
                        run.Raw('|'),
                        'sudo',
                        'xargs',
+                        '--max-args=1',
+                        '--max-procs=0',
+                        '--verbose',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
+                        '-5',
+                        '--verbose',
                        '--',
                    ],
                    wait=False,
--- a/ceph/qa/tasks/ceph_fuse.py
+++ b/ceph/qa/tasks/ceph_fuse.py
@ -72,6 +72,20 @@ def task(ctx, config):
              mount_timeout: 120 # default is 30, give up if /sys/ is not populated
        - interactive:

+    Example that creates and mounts a subvol:
+
+        overrides:
+          ceph:
+            subvols:
+              create: 2
+              subvol_options: "--namespace-isolated --size 25000000000"
+          ceph-fuse:
+            client.0:
+              mount_subvol_num: 0
+          kclient:
+            client.1:
+              mount_subvol_num: 1
+
    :param ctx: Context
    :param config: Configuration
    """
--- a/ceph/qa/tasks/ceph_manager.py
+++ b/ceph/qa/tasks/ceph_manager.py
@ -3148,11 +3148,14 @@ class CephManager:
                        raise
        self.log("quorum is size %d" % size)

-    def get_mon_health(self, debug=False):
+    def get_mon_health(self, debug=False, detail=False):
        """
        Extract all the monitor health information.
        """
-        out = self.raw_cluster_cmd('health', '--format=json')
+        if detail:
+            out = self.raw_cluster_cmd('health', 'detail', '--format=json')
+        else:
+            out = self.raw_cluster_cmd('health', '--format=json')
        if debug:
            self.log('health:\n{h}'.format(h=out))
        return json.loads(out)
--- a/ceph/qa/tasks/ceph_test_case.py
+++ b/ceph/qa/tasks/ceph_test_case.py
@ -92,7 +92,7 @@ class CephTestCase(unittest.TestCase):


    def assert_cluster_log(self, expected_pattern, invert_match=False,
-                           timeout=10, watch_channel=None):
+                           timeout=10, watch_channel=None, present=True):
        """
        Context manager.  Assert that during execution, or up to 5 seconds later,
        the Ceph cluster log emits a message matching the expected pattern.
@ -102,6 +102,8 @@ class CephTestCase(unittest.TestCase):
        :param watch_channel: Specifies the channel to be watched. This can be
                              'cluster', 'audit', ...
        :type watch_channel: str
+        :param present: Assert the log entry is present (default: True) or not (False).
+        :type present: bool
        """

        ceph_manager = self.ceph_cluster.mon_manager
@ -118,10 +120,13 @@ class CephTestCase(unittest.TestCase):
                self.watcher_process = ceph_manager.run_ceph_w(watch_channel)

            def __exit__(self, exc_type, exc_val, exc_tb):
+                fail = False
                if not self.watcher_process.finished:
                    # Check if we got an early match, wait a bit if we didn't
-                    if self.match():
+                    if present and self.match():
                        return
+                    elif not present and self.match():
+                        fail = True
                    else:
                        log.debug("No log hits yet, waiting...")
                        # Default monc tick interval is 10s, so wait that long and
@ -134,18 +139,23 @@ class CephTestCase(unittest.TestCase):
                except CommandFailedError:
                    pass

-                if not self.match():
-                    log.error("Log output: \n{0}\n".format(self.watcher_process.stdout.getvalue()))
-                    raise AssertionError("Expected log message not found: '{0}'".format(expected_pattern))
+                if present and not self.match():
+                    log.error(f"Log output: \n{self.watcher_process.stdout.getvalue()}\n")
+                    raise AssertionError(f"Expected log message found: '{expected_pattern}'")
+                elif fail or (not present and self.match()):
+                    log.error(f"Log output: \n{self.watcher_process.stdout.getvalue()}\n")
+                    raise AssertionError(f"Unexpected log message found: '{expected_pattern}'")

        return ContextManager()

-    def wait_for_health(self, pattern, timeout):
+    def wait_for_health(self, pattern, timeout, check_in_detail=None):
        """
        Wait until 'ceph health' contains messages matching the pattern
+        Also check if @check_in_detail matches detailed health messages
+        only when @pattern is a code string.
        """
        def seen_health_warning():
-            health = self.ceph_cluster.mon_manager.get_mon_health()
+            health = self.ceph_cluster.mon_manager.get_mon_health(debug=False, detail=bool(check_in_detail))
            codes = [s for s in health['checks']]
            summary_strings = [s[1]['summary']['message'] for s in health['checks'].items()]
            if len(summary_strings) == 0:
@ -156,7 +166,16 @@ class CephTestCase(unittest.TestCase):
                    if pattern in ss:
                         return True
                if pattern in codes:
-                    return True
+                    if not check_in_detail:
+                        return True
+                    # check if the string is in detail list if asked
+                    detail_strings = [ss['message'] for ss in \
+                                      [s for s in health['checks'][pattern]['detail']]]
+                    log.debug(f'detail_strings: {detail_strings}')
+                    for ds in detail_strings:
+                        if check_in_detail in ds:
+                            return True
+                    log.debug(f'detail string "{check_in_detail}" not found')

            log.debug("Not found expected summary strings yet ({0})".format(summary_strings))
            return False
--- a/ceph/qa/tasks/cephadm.py
+++ b/ceph/qa/tasks/cephadm.py
@ -257,6 +257,7 @@ def ceph_log(ctx, config):
            run.wait(
                ctx.cluster.run(
                    args=[
+                        'time',
                        'sudo',
                        'find',
                        '/var/log/ceph',   # all logs, not just for the cluster
@ -267,10 +268,15 @@ def ceph_log(ctx, config):
                        run.Raw('|'),
                        'sudo',
                        'xargs',
+                        '--max-args=1',
+                        '--max-procs=0',
+                        '--verbose',
                        '-0',
                        '--no-run-if-empty',
                        '--',
                        'gzip',
+                        '-5',
+                        '--verbose',
                        '--',
                    ],
                    wait=False,
@ -818,7 +824,6 @@ def ceph_mdss(ctx, config):

    yield

-
@contextlib.contextmanager
 def ceph_monitoring(daemon_type, ctx, config):
    """
--- a/ceph/qa/tasks/cephfs/cephfs_test_case.py
+++ b/ceph/qa/tasks/cephfs/cephfs_test_case.py
@ -163,7 +163,7 @@ class CephFSTestCase(CephTestCase):
            # In case some test messed with auth caps, reset them
            for client_id in client_mount_ids:
                cmd = ['auth', 'caps', f'client.{client_id}', 'mon','allow r',
-                       'osd', f'allow rw pool={self.fs.get_data_pool_name()}',
+                       'osd', f'allow rw tag cephfs data={self.fs.name}',
                       'mds', 'allow']

                if self.run_cluster_cmd_result(cmd) == 0:
--- a/ceph/qa/tasks/cephfs/filesystem.py
+++ b/ceph/qa/tasks/cephfs/filesystem.py
@ -369,6 +369,9 @@ class MDSCluster(CephCluster):
        """
        self.mds_daemons[mds_id].signal(sig, silent);

+    def mds_is_running(self, mds_id):
+        return self.mds_daemons[mds_id].running()
+
    def newfs(self, name='cephfs', create=True):
        return Filesystem(self._ctx, name=name, create=create)

@ -748,6 +751,7 @@ class Filesystem(MDSCluster):
                raise

        if self.fs_config is not None:
+            log.debug(f"fs_config: {self.fs_config}")
            max_mds = self.fs_config.get('max_mds', 1)
            if max_mds > 1:
                self.set_max_mds(max_mds)
@ -760,6 +764,34 @@ class Filesystem(MDSCluster):
            if session_timeout != 60:
                self.set_session_timeout(session_timeout)

+            if self.fs_config.get('subvols', None) is not None:
+                log.debug(f"Creating {self.fs_config.get('subvols')} subvols "
+                          f"for filesystem '{self.name}'")
+                if not hasattr(self._ctx, "created_subvols"):
+                    self._ctx.created_subvols = dict()
+
+                subvols = self.fs_config.get('subvols')
+                assert(isinstance(subvols, dict))
+                assert(isinstance(subvols['create'], int))
+                assert(subvols['create'] > 0)
+
+                for sv in range(0, subvols['create']):
+                    sv_name = f'sv_{sv}'
+                    self.mon_manager.raw_cluster_cmd(
+                        'fs', 'subvolume', 'create', self.name, sv_name,
+                        self.fs_config.get('subvol_options', ''))
+
+                    if self.name not in self._ctx.created_subvols:
+                        self._ctx.created_subvols[self.name] = []
+
+                    subvol_path = self.mon_manager.raw_cluster_cmd(
+                        'fs', 'subvolume', 'getpath', self.name, sv_name)
+                    subvol_path = subvol_path.strip()
+                    self._ctx.created_subvols[self.name].append(subvol_path)
+            else:
+                log.debug(f"Not Creating any subvols for filesystem '{self.name}'")
+
+
        self.getinfo(refresh = True)

        # wait pgs to be clean
@ -1090,6 +1122,10 @@ class Filesystem(MDSCluster):
    def rank_fail(self, rank=0):
        self.mon_manager.raw_cluster_cmd("mds", "fail", "{}:{}".format(self.id, rank))

+    def rank_is_running(self, rank=0, status=None):
+        name = self.get_rank(rank=rank, status=status)['name']
+        return self.mds_is_running(name)
+
    def get_ranks(self, status=None):
        if status is None:
            status = self.getinfo()
@ -1537,7 +1573,7 @@ class Filesystem(MDSCluster):
        if quiet:
            base_args = [os.path.join(self._prefix, tool), '--debug-mds=1', '--debug-objecter=1']
        else:
-            base_args = [os.path.join(self._prefix, tool), '--debug-mds=4', '--debug-objecter=1']
+            base_args = [os.path.join(self._prefix, tool), '--debug-mds=20', '--debug-ms=1', '--debug-objecter=1']

        if rank is not None:
            base_args.extend(["--rank", "%s" % str(rank)])
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`.qa/distros/podman/centos_8.stream_container_tools.yaml`