import 15.2.9

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2025-04-28 16:34:15 +00:00 · 2021-02-25 16:56:55 +01:00 · 2021-02-25 16:56:55 +01:00 · adb31ebba1
commit adb31ebba1
parent 8b89984e92
386 changed files with 10834 additions and 2135 deletions
--- a/ceph/CMakeLists.txt
+++ b/ceph/CMakeLists.txt
@ -13,7 +13,8 @@ foreach(policy
    CMP0054
    CMP0056
    CMP0065
-    CMP0075)
+    CMP0075
+    CMP0093)
  if(POLICY ${policy})
    cmake_policy(SET ${policy} NEW)
  endif()
@ -667,4 +668,4 @@ add_custom_target(tags DEPENDS ctags)

 find_package(CppCheck)
 find_package(IWYU)
-set(VERSION 15.2.8)
+set(VERSION 15.2.9)
--- a/ceph/PendingReleaseNotes
+++ b/ceph/PendingReleaseNotes
@ -1,5 +1,13 @@
+15.2.9
+------
+* MGR: progress module can now be turned on/off, using the commands:
+  ``ceph progress on`` and ``ceph progress off``.
+
 15.2.8
 ------
+* $pid expansion in config paths like `admin_socket` will now properly expand
+  to the daemon pid for commands like `ceph-mds` or `ceph-osd`. Previously only
+  `ceph-fuse`/`rbd-nbd` expanded `$pid` with the actual daemon pid.

 * ceph-volume: The ``lvm batch` subcommand received a major rewrite. This closed
  a number of bugs and improves usability in terms of size specification and
--- a/ceph/admin/build-doc
+++ b/ceph/admin/build-doc
@ -59,7 +59,7 @@ cd build-doc
 if [ ! -e $vdir ]; then
    virtualenv --python=python3 $vdir
 fi
-$vdir/bin/pip install --quiet -r $TOPDIR/admin/doc-requirements.txt
+$vdir/bin/pip install --use-feature=2020-resolver --quiet -r $TOPDIR/admin/doc-requirements.txt -r $TOPDIR/admin/doc-python-common-requirements.txt

 install -d -m0755 \
    $TOPDIR/build-doc/output/html \
--- a/ceph/admin/doc-python-common-requirements.txt
+++ b/ceph/admin/doc-python-common-requirements.txt
@ -0,0 +1,3 @@
+pcpp
+Jinja2
+-e../src/python-common
--- a/ceph/alpine/APKBUILD
+++ b/ceph/alpine/APKBUILD
@ -1,7 +1,7 @@
 # Contributor: John Coyle <dx9err@gmail.com>
 # Maintainer: John Coyle <dx9err@gmail.com>
 pkgname=ceph
-pkgver=15.2.8
+pkgver=15.2.9
 pkgrel=0
 pkgdesc="Ceph is a distributed object store and file system"
 pkgusers="ceph"
@ -63,7 +63,7 @@ makedepends="
 	xmlstarlet
 	yasm
 "
-source="ceph-15.2.8.tar.bz2"
+source="ceph-15.2.9.tar.bz2"
 subpackages="
 	$pkgname-base
 	$pkgname-common
@ -116,7 +116,7 @@ _sysconfdir=/etc
 _udevrulesdir=/etc/udev/rules.d
 _python_sitelib=/usr/lib/python2.7/site-packages

-builddir=$srcdir/ceph-15.2.8
+builddir=$srcdir/ceph-15.2.9

 build() {
 	export CEPH_BUILD_VIRTUALENV=$builddir
--- a/ceph/ceph.spec
+++ b/ceph/ceph.spec
@ -98,7 +98,7 @@
 # main package definition
 #################################################################################
 Name:		ceph
-Version:	15.2.8
+Version:	15.2.9
 Release:	0%{?dist}
 %if 0%{?fedora} || 0%{?rhel}
 Epoch:		2
@ -114,7 +114,7 @@ License:	LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-
 Group:		System/Filesystems
 %endif
 URL:		http://ceph.com/
-Source0:	%{?_remote_tarball_prefix}ceph-15.2.8.tar.bz2
+Source0:	%{?_remote_tarball_prefix}ceph-15.2.9.tar.bz2
 %if 0%{?suse_version}
 # _insert_obs_source_lines_here
 ExclusiveArch:  x86_64 aarch64 ppc64le s390x
@ -414,10 +414,8 @@ Base is the package that includes all the files shared amongst ceph servers

 %package -n cephadm
 Summary:        Utility to bootstrap Ceph clusters
+BuildArch:      noarch
 Requires:       lvm2
-%if 0%{?suse_version}
-Requires:       apparmor-abstractions
-%endif
 Requires:       python%{python3_pkgversion}
 %if 0%{?weak_deps}
 Recommends:     podman
@ -477,8 +475,12 @@ Provides:	ceph-test:/usr/bin/ceph-monstore-tool
 Requires:	ceph-base = %{_epoch_prefix}%{version}-%{release}
 %if 0%{?weak_deps}
 Recommends:	nvme-cli
+%if 0%{?suse_version}
+Requires:       smartmontools
+%else
 Recommends:	smartmontools
 %endif
+%endif
 %description mon
 ceph-mon is the cluster monitor daemon for the Ceph distributed file
 system. One or more instances of ceph-mon form a Paxos part-time
@ -757,8 +759,12 @@ Requires:	libstoragemgmt
 Requires:	python%{python3_pkgversion}-ceph-common = %{_epoch_prefix}%{version}-%{release}
 %if 0%{?weak_deps}
 Recommends:	nvme-cli
+%if 0%{?suse_version}
+Requires:       smartmontools
+%else
 Recommends:	smartmontools
 %endif
+%endif
 %description osd
 ceph-osd is the object storage daemon for the Ceph distributed file
 system.  It is responsible for storing objects on a local file system
@ -1134,7 +1140,7 @@ This package provides Ceph’s default alerts for Prometheus.
 # common
 #################################################################################
 %prep
-%autosetup -p1 -n ceph-15.2.8
+%autosetup -p1 -n ceph-15.2.9

 %build
 # LTO can be enabled as soon as the following GCC bug is fixed:
@ -1309,7 +1315,7 @@ ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph
 install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules

 # sudoers.d
-install -m 0600 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl
+install -m 0440 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl

 %if 0%{?rhel} >= 8
 pathfix.py -pni "%{__python3} %{py3_shbang_opts}" %{buildroot}%{_bindir}/*
--- a/ceph/ceph.spec.in
+++ b/ceph/ceph.spec.in
@ -414,10 +414,8 @@ Base is the package that includes all the files shared amongst ceph servers

 %package -n cephadm
 Summary:        Utility to bootstrap Ceph clusters
+BuildArch:      noarch
 Requires:       lvm2
-%if 0%{?suse_version}
-Requires:       apparmor-abstractions
-%endif
 Requires:       python%{python3_pkgversion}
 %if 0%{?weak_deps}
 Recommends:     podman
@ -477,8 +475,12 @@ Provides:	ceph-test:/usr/bin/ceph-monstore-tool
 Requires:	ceph-base = %{_epoch_prefix}%{version}-%{release}
 %if 0%{?weak_deps}
 Recommends:	nvme-cli
+%if 0%{?suse_version}
+Requires:       smartmontools
+%else
 Recommends:	smartmontools
 %endif
+%endif
 %description mon
 ceph-mon is the cluster monitor daemon for the Ceph distributed file
 system. One or more instances of ceph-mon form a Paxos part-time
@ -757,8 +759,12 @@ Requires:	libstoragemgmt
 Requires:	python%{python3_pkgversion}-ceph-common = %{_epoch_prefix}%{version}-%{release}
 %if 0%{?weak_deps}
 Recommends:	nvme-cli
+%if 0%{?suse_version}
+Requires:       smartmontools
+%else
 Recommends:	smartmontools
 %endif
+%endif
 %description osd
 ceph-osd is the object storage daemon for the Ceph distributed file
 system.  It is responsible for storing objects on a local file system
@ -1309,7 +1315,7 @@ ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph
 install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules

 # sudoers.d
-install -m 0600 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl
+install -m 0440 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl

 %if 0%{?rhel} >= 8
 pathfix.py -pni "%{__python3} %{py3_shbang_opts}" %{buildroot}%{_bindir}/*
--- a/ceph/changelog.upstream
+++ b/ceph/changelog.upstream
@ -1,7 +1,13 @@
-ceph (15.2.8-1bionic) bionic; urgency=medium
+ceph (15.2.9-1bionic) bionic; urgency=medium


- -- Jenkins Build Slave User <jenkins-build@braggi12.front.sepia.ceph.com>  Wed, 16 Dec 2020 18:29:12 +0000
+ -- Jenkins Build Slave User <jenkins-build@braggi11.front.sepia.ceph.com>  Tue, 23 Feb 2021 14:23:03 +0000
+
+ceph (15.2.9-1) stable; urgency=medium
+
+  * New upstream release
+
+ -- Ceph Release Team <ceph-maintainers@ceph.com>  Tue, 23 Feb 2021 14:10:13 +0000

 ceph (15.2.8-1) stable; urgency=medium

--- a/ceph/cmake/modules/CephChecks.cmake
+++ b/ceph/cmake/modules/CephChecks.cmake
@ -24,6 +24,7 @@ check_function_exists(strerror_r HAVE_Strerror_R)
 check_function_exists(name_to_handle_at HAVE_NAME_TO_HANDLE_AT)
 check_function_exists(pipe2 HAVE_PIPE2)
 check_function_exists(accept4 HAVE_ACCEPT4)
+check_function_exists(sigdescr_np HAVE_SIGDESCR_NP)

 include(CMakePushCheckState)
 cmake_push_check_state(RESET)
--- a/ceph/debian/rules
+++ b/ceph/debian/rules
@ -61,7 +61,7 @@ override_dh_auto_install:
 	install -D -m 644 udev/50-rbd.rules $(DESTDIR)/lib/udev/rules.d/50-rbd.rules
 	install -D -m 644 src/etc-rbdmap $(DESTDIR)/etc/ceph/rbdmap
 	install -D -m 644 etc/sysctl/90-ceph-osd.conf $(DESTDIR)/etc/sysctl.d/30-ceph-osd.conf
-	install -D -m 600 sudoers.d/ceph-osd-smartctl $(DESTDIR)/etc/sudoers.d/ceph-osd-smartctl
+	install -D -m 440 sudoers.d/ceph-osd-smartctl $(DESTDIR)/etc/sudoers.d/ceph-osd-smartctl

 	install -m 755 src/cephadm/cephadm $(DESTDIR)/usr/sbin/cephadm

--- a/ceph/do_cmake.sh
+++ b/ceph/do_cmake.sh
@ -17,8 +17,10 @@ if [ -r /etc/os-release ]; then
  case "$ID" in
      fedora)
          PYBUILD="3.7"
-          if [ "$VERSION_ID" -ge "32" ] ; then
+          if [ "$VERSION_ID" -eq "32" ] ; then
              PYBUILD="3.8"
+          elif [ "$VERSION_ID" -ge "33" ] ; then
+              PYBUILD="3.9"
          fi
          ;;
      rhel|centos)
--- a/ceph/doc/cephadm/drivegroups.rst
+++ b/ceph/doc/cephadm/drivegroups.rst
@ -319,7 +319,7 @@ This can be described with two layouts.
    db_devices:
      model: MC-55-44-XZ
      limit: 2 (db_slots is actually to be favoured here, but it's not implemented yet)
-      
+    ---  
    service_type: osd
    service_id: osd_spec_ssd
    placement:
@ -376,8 +376,7 @@ You can use the 'host_pattern' key in the layout to target certain nodes. Salt t
      rotational: 1
    db_devices:
      rotational: 0
-      
-      
+    ---    
    service_type: osd
    service_id: osd_spec_six_to_ten
    placement:
--- a/ceph/doc/cephadm/index.rst
+++ b/ceph/doc/cephadm/index.rst
@ -37,4 +37,5 @@ versions of Ceph.
    Client Setup <client-setup>
    DriveGroups <drivegroups>
    troubleshooting
-    concepts
+    concepts
+    Cephadm Feature Planning <../dev/cephadm/index>
--- a/ceph/doc/cephadm/monitoring.rst
+++ b/ceph/doc/cephadm/monitoring.rst
@ -72,8 +72,20 @@ monitoring by following the steps below.

     ceph orch apply grafana 1

-Cephadm handles the prometheus, grafana, and alertmanager
-configurations automatically.
+Cephadm takes care of the configuration of Prometheus, Grafana, and Alertmanager
+automatically.
+
+However, there is one exception to this rule. In a some setups, the Dashboard
+user's browser might not be able to access the Grafana URL configured in Ceph
+Dashboard. One such scenario is when the cluster and the accessing user are each
+in a different DNS zone.
+
+For this case, there is an extra configuration option for Ceph Dashboard, which
+can be used to configure the URL for accessing Grafana by the user's browser.
+This value will never be altered by cephadm. To set this configuration option,
+issue the following command::
+
+  $ ceph dashboard set-grafana-frontend-api-url <grafana-server-api>

 It may take a minute or two for services to be deployed.  Once
 completed, you should see something like this from ``ceph orch ls``
@ -88,6 +100,37 @@ completed, you should see something like this from ``ceph orch ls``
  node-exporter      2/2  6s ago     docker.io/prom/node-exporter:latest             e5a616e4b9cf  present
  prometheus         1/1  6s ago     docker.io/prom/prometheus:latest                e935122ab143  present

+Configuring SSL/TLS for Grafana
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``cephadm`` will deploy Grafana using the certificate defined in the ceph
+key/value store. If a certificate is not specified, ``cephadm`` will generate a
+self-signed certificate during deployment of the Grafana service.
+
+A custom certificate can be configured using the following commands.
+
+.. code-block:: bash
+
+  ceph config-key set mgr/cephadm/grafana_key -i $PWD/key.pem
+  ceph config-key set mgr/cephadm/grafana_crt -i $PWD/certificate.pem
+
+The ``cephadm`` manager module needs to be restarted to be able to read updates
+to these keys.
+
+.. code-block:: bash
+
+  ceph orch restart mgr
+
+If you already deployed Grafana, you need to redeploy the service for the
+configuration to be updated.
+
+.. code-block:: bash
+
+  ceph orch redeploy grafana
+
+The ``redeploy`` command also takes care of setting the right URL for Ceph
+Dashboard.
+
 Using custom images
 ~~~~~~~~~~~~~~~~~~~

@ -120,7 +163,7 @@ For example
     you have set the custom image for automatically.  You will need to
     manually update the configuration (image name and tag) to be able to
     install updates.
-     
+
     If you choose to go with the recommendations instead, you can reset the
     custom image you have set before.  After that, the default value will be
     used again.  Use ``ceph config rm`` to reset the configuration option
@ -135,6 +178,86 @@ For example

          ceph config rm mgr mgr/cephadm/container_image_prometheus

+Using custom configuration files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By overriding cephadm templates, it is possible to completely customize the
+configuration files for monitoring services.
+
+Internally, cephadm already uses `Jinja2
+<https://jinja.palletsprojects.com/en/2.11.x/>`_ templates to generate the
+configuration files for all monitoring components. To be able to customize the
+configuration of Prometheus, Grafana or the Alertmanager it is possible to store
+a Jinja2 template for each service that will be used for configuration
+generation instead. This template will be evaluated every time a service of that
+kind is deployed or reconfigured. That way, the custom configuration is
+preserved and automatically applied on future deployments of these services.
+
+.. note::
+
+  The configuration of the custom template is also preserved when the default
+  configuration of cephadm changes. If the updated configuration is to be used,
+  the custom template needs to be migrated *manually*.
+
+Option names
+""""""""""""
+
+The following templates for files that will be generated by cephadm can be
+overridden. These are the names to be used when storing with ``ceph config-key
+set``:
+
+- ``alertmanager_alertmanager.yml``
+- ``grafana_ceph-dashboard.yml``
+- ``grafana_grafana.ini``
+- ``prometheus_prometheus.yml``
+
+You can look up the file templates that are currently used by cephadm in
+``src/pybind/mgr/cephadm/templates``:
+
+- ``services/alertmanager/alertmanager.yml.j2``
+- ``services/grafana/ceph-dashboard.yml.j2``
+- ``services/grafana/grafana.ini.j2``
+- ``services/prometheus/prometheus.yml.j2``
+
+Usage
+"""""
+
+The following command applies a single line value:
+
+.. code-block:: bash
+
+  ceph config-key set mgr/cephadm/<option_name> <value>
+
+To set contents of files as template use the ``-i`` argument:
+
+.. code-block:: bash
+
+  ceph config-key set mgr/cephadm/<option_name> -i $PWD/<filename>
+
+.. note::
+
+  When using files as input to ``config-key`` an absolute path to the file must
+  be used.
+
+It is required to restart the cephadm mgr module after a configuration option
+has been set. Then the configuration file for the service needs to be recreated.
+This is done using `redeploy`. For more details see the following example.
+
+Example
+"""""""
+
+.. code-block:: bash
+
+  # set the contents of ./prometheus.yml.j2 as template
+  ceph config-key set mgr/cephadm/services_prometheus_prometheus.yml \
+    -i $PWD/prometheus.yml.j2
+
+  # restart cephadm mgr module
+  ceph orch restart mgr
+
+  # redeploy the prometheus service
+  ceph orch redeploy prometheus
+
 Disabling monitoring
 --------------------

--- a/ceph/doc/cephadm/operations.rst
+++ b/ceph/doc/cephadm/operations.rst
@ -305,3 +305,53 @@ the cluster, create an initial ``ceph.conf`` file. For example::
 Then, run bootstrap referencing this file::

  cephadm bootstrap -c /root/ceph.conf ...
+
+
+.. _cephadm-removing-hosts:
+
+Removing Hosts
+==============
+
+If the node that want you to remove is running OSDs, make sure you remove the OSDs from the node.
+
+To remove a host from a cluster, do the following:
+
+For all Ceph service types, except for ``node-exporter`` and ``crash``, remove
+the host from the placement specification file (for example, cluster.yml).
+For example, if you are removing the host named host2, remove all occurrences of
+``- host2`` from all ``placement:`` sections.
+
+Update:
+
+.. code-block:: yaml
+
+  service_type: rgw
+  placement:
+    hosts:
+    - host1
+    - host2
+
+To:
+
+.. code-block:: yaml
+
+
+  service_type: rgw
+  placement:
+    hosts:
+    - host1
+
+Remove the host from cephadm's environment:
+
+.. code-block:: bash
+
+  ceph orch host rm host2
+
+See also :ref:`orchestrator-cli-host-management`.
+
+If the host is running ``node-exporter`` and crash services, remove them by running
+the following command on the host:
+
+.. code-block:: bash
+
+  cephadm rm-daemon --fsid CLUSTER_ID --name SERVICE_NAME
--- a/ceph/doc/cephfs/add-remove-mds.rst
+++ b/ceph/doc/cephfs/add-remove-mds.rst
@ -28,7 +28,7 @@ The other dimension to MDS performance is the available RAM for caching. The
 MDS necessarily manages a distributed and cooperative metadata cache among all
 clients and other active MDSs. Therefore it is essential to provide the MDS
 with sufficient RAM to enable faster metadata access and mutation. The default
-MDS cache size (see also :doc:`/cephfs/cache-size-limits`) is 4GB. It is
+MDS cache size (see also :doc:`/cephfs/cache-configuration`) is 4GB. It is
 recommended to provision at least 8GB of RAM for the MDS to support this cache
 size.

--- a/ceph/doc/cephfs/cache-configuration.rst
+++ b/ceph/doc/cephfs/cache-configuration.rst
@ -0,0 +1,181 @@
+=======================
+MDS Cache Configuration
+=======================
+
+The Metadata Server coordinates a distributed cache among all MDS and CephFS
+clients. The cache serves to improve metadata access latency and allow clients
+to safely (coherently) mutate metadata state (e.g. via `chmod`). The MDS issues
+**capabilities** and **directory entry leases** to indicate what state clients
+may cache and what manipulations clients may perform (e.g. writing to a file).
+
+The MDS and clients both try to enforce a cache size. The mechanism for
+specifying the MDS cache size is described below. Note that the MDS cache size
+is a not a hard limit. The MDS always allows clients to lookup new metadata
+which is loaded into the cache. This is an essential policy as its avoids
+deadlock in client requests (some requests may rely on held capabilities before
+capabilities are released).
+
+When the MDS cache is too large, the MDS will **recall** client state so cache
+items become unpinned and eligble to be dropped. The MDS can only drop cache
+state when no clients refer to the metadata to be dropped. Also described below
+is how to configure the MDS recall settings for your workload's needs. This is
+necessary if the internal throttles on the MDS recall can not keep up with the
+client workload.
+
+
+MDS Cache Size
+--------------
+
+You can limit the size of the Metadata Server (MDS) cache by a byte count. This
+is done through the `mds_cache_memory_limit` configuration. For example::
+
+    ceph config set mds mds_cache_memory_limit 8GB
+
+In addition, you can specify a cache reservation by using the
+`mds_cache_reservation` parameter for MDS operations. The cache reservation is
+limited as a percentage of the memory and is set to 5% by default. The intent
+of this parameter is to have the MDS maintain an extra reserve of memory for
+its cache for new metadata operations to use. As a consequence, the MDS should
+in general operate below its memory limit because it will recall old state from
+clients in order to drop unused metadata in its cache.
+
+If the MDS cannot keep its cache under the target size, the MDS will send a
+health alert to the Monitors indicating the cache is too large. This is
+controlled by the `mds_health_cache_threshold` configuration which is by
+default 150% of the maximum cache size.
+
+Because the cache limit is not a hard limit, potential bugs in the CephFS
+client, MDS, or misbehaving applications might cause the MDS to exceed its
+cache size. The health warnings are intended to help the operator detect this
+situation and make necessary adjustments or investigate buggy clients.
+
+MDS Cache Trimming
+------------------
+
+There are two configurations for throttling the rate of cache trimming in the MDS:
+
+::
+
+    mds_cache_trim_threshold (default 64k)
+
+
+and
+
+::
+
+    mds_cache_trim_decay_rate (default 1)
+
+
+The intent of the throttle is to prevent the MDS from spending too much time
+trimming its cache. This may limit its ability to handle client requests or
+perform other upkeep.
+
+The trim configurations control an internal **decay counter**. Anytime metadata
+is trimmed from the cache, the counter is incremented.  The threshold sets the
+maximum size of the counter while the decay rate indicates the exponential half
+life for the counter. If the MDS is continually removing items from its cache,
+it will reach a steady state of ``-ln(0.5)/rate*threshold`` items removed per
+second.
+
+The defaults are conservative and may need changed for production MDS with
+large cache sizes.
+
+
+MDS Recall
+----------
+
+MDS limits its recall of client state (capabilities/leases) to prevent creating
+too much work for itself handling release messages from clients. This is controlled
+via the following configurations:
+
+
+The maximum number of capabilities to recall from a single client in a given recall
+event::
+
+    mds_recall_max_caps (default: 5000)
+
+The threshold and decay rate for the decay counter on a session::
+
+    mds_recall_max_decay_threshold (default: 16k)
+
+and::
+
+    mds_recall_max_decay_rate (default: 2.5 seconds)
+
+The session decay counter controls the rate of recall for an individual
+session. The behavior of the counter works the same as for cache trimming
+above. Each capability that is recalled increments the counter.
+
+There is also a global decay counter that throttles for all session recall::
+
+    mds_recall_global_max_decay_threshold (default: 64k)
+
+its decay rate is the same as ``mds_recall_max_decay_rate``. Any recalled
+capability for any session also increments this counter.
+
+If clients are slow to release state, the warning "failing to respond to cache
+pressure" or ``MDS_HEALTH_CLIENT_RECALL`` will be reported. Each session's rate
+of release is monitored by another decay counter configured by::
+
+    mds_recall_warning_threshold (default: 32k)
+
+and::
+
+    mds_recall_warning_decay_rate (default: 60.0 seconds)
+
+Each time a capability is released, the counter is incremented.  If clients do
+not release capabilities quickly enough and there is cache pressure, the
+counter will indicate if the client is slow to release state.
+
+Some workloads and client behaviors may require faster recall of client state
+to keep up with capability acquisition. It is recommended to increase the above
+counters as needed to resolve any slow recall warnings in the cluster health
+state.
+
+
+Session Liveness
+----------------
+
+The MDS also keeps track of whether sessions are quiescent. If a client session
+is not utilizing its capabilities or is otherwise quiet, the MDS will begin
+recalling state from the session even if its not under cache pressure. This
+helps the MDS avoid future work when the cluster workload is hot and cache
+pressure is forcing the MDS to recall state. The expectation is that a client
+not utilizing its capabilities is unlikely to use those capabilities anytime
+in the near future.
+
+Determining whether a given session is quiescent is controlled by the following
+configuration variables::
+
+    mds_session_cache_liveness_magnitude (default: 10)
+
+and::
+
+    mds_session_cache_liveness_decay_rate (default: 5min)
+
+The configuration ``mds_session_cache_liveness_decay_rate`` indicates the
+half-life for the decay counter tracking the use of capabilities by the client.
+Each time a client manipulates or acquires a capability, the MDS will increment
+the counter. This is a rough but effective way to monitor utilization of the
+client cache.
+
+The ``mds_session_cache_liveness_magnitude`` is a base-2 magnitude difference
+of the liveness decay counter and the number of capabilities outstanding for
+the session. So if the client has ``1*2^20`` (1M) capabilities outstanding and
+only uses **less** than ``1*2^(20-mds_session_cache_liveness_magnitude)`` (1K
+using defaults), the MDS will consider the client to be quiescent and begin
+recall.
+
+
+Capability Limit
+----------------
+
+The MDS also tries to prevent a single client from acquiring too many
+capabilities. This helps prevent recovery from taking a long time in some
+situations.  It is not generally necessary for a client to have such a large
+cache. The limit is configured via::
+
+    mds_max_caps_per_client (default: 1M)
+
+It is not recommended to set this value above 5M but it may be helpful with
+some workloads.
--- a/ceph/doc/cephfs/cache-size-limits.rst
+++ b/ceph/doc/cephfs/cache-size-limits.rst
@ -1,14 +0,0 @@
-Understanding MDS Cache Size Limits
-===================================
-
-This section describes ways to limit MDS cache size.
-
-You can limit the size of the Metadata Server (MDS) cache by:
-
-* *A memory limit*: A new behavior introduced in the Luminous release. Use the `mds_cache_memory_limit` parameters.
-
-In addition, you can specify a cache reservation by using the `mds_cache_reservation` parameter for MDS operations. The cache reservation is limited as a percentage of the memory and is set to 5% by default. The intent of this parameter is to have the MDS maintain an extra reserve of memory for its cache for new metadata operations to use. As a consequence, the MDS should in general operate below its memory limit because it will recall old state from clients in order to drop unused metadata in its cache.
-
-The `mds_cache_reservation` parameter replaces the `mds_health_cache_threshold` in all situations except when MDS nodes sends a health alert to the Monitors indicating the cache is too large. By default, `mds_health_cache_threshold` is 150% of the maximum cache size.
-
-Be aware that the cache limit is not a hard limit. Potential bugs in the CephFS client or MDS or misbehaving applications might cause the MDS to exceed its cache size. The  `mds_health_cache_threshold` configures the cluster health warning message so that operators can investigate why the MDS cannot shrink its cache.
--- a/ceph/doc/cephfs/fs-volumes.rst
+++ b/ceph/doc/cephfs/fs-volumes.rst
@ -85,7 +85,7 @@ FS Subvolume groups

 Create a subvolume group using::

-    $ ceph fs subvolumegroup create <vol_name> <group_name> [--pool_layout <data_pool_name> --uid <uid> --gid <gid> --mode <octal_mode>]
+    $ ceph fs subvolumegroup create <vol_name> <group_name> [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>]

 The command succeeds even if the subvolume group already exists.

@ -111,12 +111,8 @@ List subvolume groups using::

    $ ceph fs subvolumegroup ls <vol_name>

-Create a snapshot (see :doc:`/cephfs/experimental-features`) of a
-subvolume group using::
-
-    $ ceph fs subvolumegroup snapshot create <vol_name> <group_name> <snap_name>
-
-This implicitly snapshots all the subvolumes under the subvolume group.
+.. note:: Subvolume group snapshot feature is no longer supported in mainline CephFS (existing group
+          snapshots can still be listed and deleted)

 Remove a snapshot of a subvolume group using::

@ -135,7 +131,7 @@ FS Subvolumes

 Create a subvolume using::

-    $ ceph fs subvolume create <vol_name> <subvol_name> [--size <size_in_bytes> --group_name <subvol_group_name> --pool_layout <data_pool_name> --uid <uid> --gid <gid> --mode <octal_mode> --namespace-isolated]
+    $ ceph fs subvolume create <vol_name> <subvol_name> [--size <size_in_bytes>] [--group_name <subvol_group_name>] [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>] [--namespace-isolated]


 The command succeeds even if the subvolume already exists.
@ -150,16 +146,24 @@ its parent directory and no size limit.

 Remove a subvolume using::

-    $ ceph fs subvolume rm <vol_name> <subvol_name> [--group_name <subvol_group_name> --force]
+    $ ceph fs subvolume rm <vol_name> <subvol_name> [--group_name <subvol_group_name>] [--force] [--retain-snapshots]


 The command removes the subvolume and its contents. It does this in two steps.
-First, it move the subvolume to a trash folder, and then asynchronously purges
+First, it moves the subvolume to a trash folder, and then asynchronously purges
 its contents.

 The removal of a subvolume fails if it has snapshots, or is non-existent.
 '--force' flag allows the non-existent subvolume remove command to succeed.

+A subvolume can be removed retaining existing snapshots of the subvolume using the
+'--retain-snapshots' option. If snapshots are retained, the subvolume is considered
+empty for all operations not involving the retained snapshots.
+
+.. note:: Snapshot retained subvolumes can be recreated using 'ceph fs subvolume create'
+
+.. note:: Retained snapshots can be used as a clone source to recreate the subvolume, or clone to a newer subvolume.
+
 Resize a subvolume using::

    $ ceph fs subvolume resize <vol_name> <subvol_name> <new_size> [--group_name <subvol_group_name>] [--no_shrink]
@ -195,17 +199,32 @@ The output format is json and contains fields as follows.
 * type: subvolume type indicating whether it's clone or subvolume
 * pool_namespace: RADOS namespace of the subvolume
 * features: features supported by the subvolume
+* state: current state of the subvolume
+
+If a subvolume has been removed retaining its snapshots, the output only contains fields as follows.
+
+* type: subvolume type indicating whether it's clone or subvolume
+* features: features supported by the subvolume
+* state: current state of the subvolume

 The subvolume "features" are based on the internal version of the subvolume and is a list containing
 a subset of the following features,

 * "snapshot-clone": supports cloning using a subvolumes snapshot as the source
 * "snapshot-autoprotect": supports automatically protecting snapshots, that are active clone sources, from deletion
+* "snapshot-retention": supports removing subvolume contents, retaining any existing snapshots
+
+The subvolume "state" is based on the current state of the subvolume and contains one of the following values.
+
+* "complete": subvolume is ready for all operations
+* "snapshot-retained": subvolume is removed but its snapshots are retained

 List subvolumes using::

    $ ceph fs subvolume ls <vol_name> [--group_name <subvol_group_name>]

+.. note:: subvolumes that are removed but have snapshots retained, are also listed.
+
 Create a snapshot of a subvolume using::

    $ ceph fs subvolume snapshot create <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
@ -213,11 +232,13 @@ Create a snapshot of a subvolume using::

 Remove a snapshot of a subvolume using::

-    $ ceph fs subvolume snapshot rm <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name> --force]
+    $ ceph fs subvolume snapshot rm <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>] [--force]

 Using the '--force' flag allows the command to succeed that would otherwise
 fail if the snapshot did not exist.

+.. note:: if the last snapshot within a snapshot retained subvolume is removed, the subvolume is also removed
+
 List snapshots of a subvolume using::

    $ ceph fs subvolume snapshot ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
--- a/ceph/doc/cephfs/index.rst
+++ b/ceph/doc/cephfs/index.rst
@ -79,7 +79,7 @@ Administration
    Administrative commands <administration>
 	Provision/Add/Remove MDS(s) <add-remove-mds>
    MDS failover and standby configuration <standby>
-    MDS Cache Size Limits <cache-size-limits>
+    MDS Cache Configuration <cache-configuration>
    MDS Configuration Settings <mds-config-ref>
    Manual: ceph-mds <../../man/8/ceph-mds>
    Export over NFS <nfs>
--- a/ceph/doc/dev/cephadm/compliance-check.rst
+++ b/ceph/doc/dev/cephadm/compliance-check.rst
@ -0,0 +1,121 @@
+================
+Compliance Check
+================
+
+The stability and reliability of a Ceph cluster is dependent not just upon the Ceph daemons, but
+also the OS and hardware that Ceph is installed on. This document is intended to promote a design 
+discussion for providing a "compliance" feature within mgr/cephadm, which would be responsible for
+identifying common platform-related issues that could impact Ceph stability and operation.
+
+The ultimate goal of these checks is to identify issues early and raise a healthcheck WARN
+event, to alert the Administrator to the issue.
+
+Prerequisites
+=============
+In order to effectively analyse the hosts that Ceph is deployed to, this feature requires a cache
+of host-related metadata. The metadata is already available from cephadm's HostFacts class and the
+``gather-facts`` cephadm command. For the purposes of this document, we will assume that this
+data is available within the mgr/cephadm "cache" structure.
+
+Some checks will require that the host status is also populated e.g. ONLINE, OFFLINE, MAINTENANCE
+
+Administrator Interaction
+=========================
+Not all users will require this feature, and must be able to 'opt out'. For this reason,
+mgr/cephadm must provide controls, such as the following;
+
+.. code-block::
+
+   ceph cephadm compliance enable | disable | status [--format json]
+   ceph cephadm compliance ls [--format json]
+   ceph cephadm compliance enable-check <name>
+   ceph cephadm compliance disable-check <name>
+   ceph cephadm compliance set-check-interval <int>
+   ceph cephadm compliance get-check-interval
+
+The status option would show the enabled/disabled state of the feature, along with the
+check-interval.
+
+The ``ls`` subcommand would show all checks in the following format;
+
+``check-name status description``
+
+Proposed Integration
+====================
+The compliance checks are not required to run all the time, but instead should run at discrete
+intervals. The interval would be configurable under via the :code:`set-check-interval`
+subcommand (default would be every 12 hours)
+
+
+mgr/cephadm currently executes an event driven (time based) serve loop to act on deploy/remove and
+reconcile activity. In order to execute the compliance checks, the compliance check code would be 
+called from this main serve loop - when the :code:`set-check-interval` is met.
+
+
+Proposed Checks
+===============
+All checks would push any errors to a list, so multiple issues can be escalated to the Admin at
+the same time. The list below provides a description of each check, with the text following the 
+name indicating a shortname version *(the shortname is the reference for command Interaction
+when enabling or disabling a check)*
+
+
+OS Consistency (OS)
+___________________
+* all hosts must use same vendor
+* all hosts must be on the same major release (this check would only be applicable to distributions that
+  offer a long-term-support strategy (RHEL, CentOS, SLES, Ubuntu etc)
+
+
+*src: gather-facts output*
+
+Linux Kernel Security Mode (LSM)
+________________________________
+* All hosts should have a consistent SELINUX/AppArmor configuration
+
+*src: gather-facts output*
+
+Services Check (SERVICES)
+_________________________
+Hosts that are in an ONLINE state should adhere to the following;
+
+* all daemons (systemd units) should be enabled
+* all daemons should be running (not dead)
+
+*src: list_daemons output*
+
+Support Status (SUPPORT)
+________________________
+If support status has been detected, it should be consistent across all hosts. At this point
+support status is available only for Red Hat machines.
+
+*src: gather-facts output*
+
+Network : MTU (MTU)
+________________________________
+All network interfaces on the same Ceph network (public/cluster) should have the same MTU
+
+*src: gather-facts output*
+
+Network : LinkSpeed (LINKSPEED)
+____________________________________________
+All network interfaces on the same Ceph network (public/cluster) should have the same Linkspeed
+
+*src: gather-facts output*
+
+Network : Consistency (INTERFACE)
+______________________________________________
+All hosts with OSDs should have consistent network configuration - eg. if some hosts do
+not separate cluster/public traffic but others do, that is an anomaly that would generate a
+compliance check warning.
+
+*src: gather-facts output*
+
+Notification Strategy
+=====================
+If any of the checks fail, mgr/cephadm would raise a WARN level alert
+
+Futures
+=======
+The checks highlighted here serve only as a starting point, and we should expect to expand
+on the checks over time.
--- a/ceph/doc/dev/cephadm/host-maintenance.rst
+++ b/ceph/doc/dev/cephadm/host-maintenance.rst
@ -0,0 +1,104 @@
+================
+Host Maintenance
+================
+
+All hosts that support Ceph daemons need to support maintenance activity, whether the host
+is physical or virtual. This means that management workflows should provide
+a simple and consistent way to support this operational requirement. This document defines
+the maintenance strategy that could be implemented in cephadm and mgr/cephadm.
+
+
+High Level Design
+=================
+Placing a host into maintenance, adopts the following workflow;
+
+#. confirm that the removal of the host does not impact data availability (the following
+   steps will assume it is safe to proceed)
+
+   * orch host ok-to-stop <host> would be used here
+
+#. if the host has osd daemons, apply noout to the host subtree to prevent data migration
+   from triggering during the planned maintenance slot.
+#. Stop the ceph target (all daemons stop)
+#. Disable the ceph target on that host, to prevent a reboot from automatically starting
+   ceph services again)
+
+
+Exiting Maintenance, is basically the reverse of the above sequence
+
+Admin Interaction
+=================
+The ceph orch command will be extended to support maintenance. 
+
+.. code-block::
+
+    ceph orch host enter-maintenance <host> [ --check ]
+    ceph orch host exit-maintenance <host> 
+
+.. note:: In addition, the host's status should be updated to reflect whether it
+   is in maintenance or not. 
+
+The 'check' Option 
+__________________
+The orch host ok-to-stop command focuses on ceph daemons (mon, osd, mds), which
+provides the first check. However, a ceph cluster also uses other types of daemons
+for monitoring, management and non-native protocol support which means the 
+logic will need to consider service impact too. The 'check' option provides 
+this additional layer to alert the user of service impact to *secondary*
+daemons.
+
+The list below shows some of these additional daemons.
+
+* mgr (not included in ok-to-stop checks) 
+* prometheus, grafana, alertmanager
+* rgw
+* haproxy
+* iscsi gateways
+* ganesha gateways
+
+By using the --check option first, the Admin can choose whether to proceed. This 
+workflow is obviously optional for the CLI user, but could be integrated into the
+UI workflow to help less experienced Administators manage the cluster.
+
+By adopting this two-phase approach, a UI based workflow would look something
+like this.
+
+#. User selects a host to place into maintenance
+ 
+   * orchestrator checks for data **and** service impact
+#. If potential impact is shown, the next steps depend on the impact type
+   
+   * **data availability** : maintenance is denied, informing the user of the issue
+   * **service availability** : user is provided a list of affected services and 
+     asked to confirm
+
+
+Components Impacted
+===================
+Implementing this capability will require changes to the following; 
+
+* cephadm
+
+  * Add maintenance subcommand with the following 'verbs'; enter, exit, check
+
+* mgr/cephadm
+
+  * add methods to CephadmOrchestrator for enter/exit and check
+  * data gathering would be skipped for hosts in a maintenance state
+
+* mgr/orchestrator
+
+  * add CLI commands to OrchestratorCli which expose the enter/exit and check interaction
+
+
+Ideas for Future Work
+=====================
+#. When a host is placed into maintenance, the time of the event could be persisted. This
+   would allow the orchestrator layer to establish a maintenance window for the task and 
+   alert if the maintenance window has been exceeded.
+#. The maintenance process could support plugins to allow other integration tasks to be
+   initiated as part of the transition to and from maintenance. This plugin capability could
+   support actions like; 
+   
+   * alert suppression to 3rd party monitoring framework(s)
+   * service level reporting, to record outage windows
--- a/ceph/doc/dev/cephadm/index.rst
+++ b/ceph/doc/dev/cephadm/index.rst
@ -0,0 +1,13 @@
+===================================
+CEPHADM Developer Documentation
+===================================
+
+.. rubric:: Contents
+
+.. toctree::
+   :maxdepth: 1
+
+
+   host-maintenance
+   compliance-check
+
--- a/ceph/doc/man/8/rbd.rst
+++ b/ceph/doc/man/8/rbd.rst
@ -732,24 +732,32 @@ Per client instance `rbd device map` options:

 * noshare - Disable sharing of client instances with other mappings.

-* crc - Enable CRC32C checksumming for data writes (default).
+* crc - Enable CRC32C checksumming for msgr1 on-the-wire protocol (default).
+  For msgr2.1 protocol this option is ignored: full checksumming is always on
+  in 'crc' mode and always off in 'secure' mode.

-* nocrc - Disable CRC32C checksumming for data writes.
+* nocrc - Disable CRC32C checksumming for msgr1 on-the-wire protocol.  Note
+  that only payload checksumming is disabled, header checksumming is always on.
+  For msgr2.1 protocol this option is ignored.

-* cephx_require_signatures - Require cephx message signing (since 3.19,
-  default).
+* cephx_require_signatures - Require msgr1 message signing feature (since 3.19,
+  default).  This option is deprecated and will be removed in the future as the
+  feature has been supported since the Bobtail release.

-* nocephx_require_signatures - Don't require cephx message signing (since
-  3.19).
+* nocephx_require_signatures - Don't require msgr1 message signing feature
+  (since 3.19).  This option is deprecated and will be removed in the future.

 * tcp_nodelay - Disable Nagle's algorithm on client sockets (since 4.0,
  default).

 * notcp_nodelay - Enable Nagle's algorithm on client sockets (since 4.0).

-* cephx_sign_messages - Enable message signing (since 4.4, default).
+* cephx_sign_messages - Enable message signing for msgr1 on-the-wire protocol
+  (since 4.4, default).  For msgr2.1 protocol this option is ignored: message
+  signing is built into 'secure' mode and not offered in 'crc' mode.

-* nocephx_sign_messages - Disable message signing (since 4.4).
+* nocephx_sign_messages - Disable message signing for msgr1 on-the-wire protocol
+  (since 4.4).  For msgr2.1 protocol this option is ignored.

 * mount_timeout=x - A timeout on various steps in `rbd device map` and
  `rbd device unmap` sequences (default is 60 seconds).  In particular,
@ -844,6 +852,25 @@ Per mapping (block device) `rbd device map` options:
  backend that the data is incompressible, disabling compression in aggressive
  mode (since 5.8).

+* ms_mode=legacy - Use msgr1 on-the-wire protocol (since 5.11, default).
+
+* ms_mode=crc - Use msgr2.1 on-the-wire protocol, select 'crc' mode, also
+  referred to as plain mode (since 5.11).  If the daemon denies 'crc' mode,
+  fail the connection.
+
+* ms_mode=secure - Use msgr2.1 on-the-wire protocol, select 'secure' mode
+  (since 5.11).  'secure' mode provides full in-transit encryption ensuring
+  both confidentiality and authenticity.  If the daemon denies 'secure' mode,
+  fail the connection.
+
+* ms_mode=prefer-crc - Use msgr2.1 on-the-wire protocol, select 'crc'
+  mode (since 5.11).  If the daemon denies 'crc' mode in favor of 'secure'
+  mode, agree to 'secure' mode.
+
+* ms_mode=prefer-secure - Use msgr2.1 on-the-wire protocol, select 'secure'
+  mode (since 5.11).  If the daemon denies 'secure' mode in favor of 'crc'
+  mode, agree to 'crc' mode.
+
 * udev - Wait for udev device manager to finish executing all matching
  "add" rules and release the device before exiting (default).  This option
  is not passed to the kernel.
--- a/ceph/doc/mgr/dashboard.rst
+++ b/ceph/doc/mgr/dashboard.rst
@ -270,6 +270,34 @@ commands::

  $ ceph dashboard ac-user-create <username> <password> administrator

+Account Lock-out
+^^^^^^^^^^^^^^^^
+
+It disables a user account if a user repeatedly enters the wrong credentials
+for multiple times. It is enabled by default to prevent brute-force or dictionary
+attacks. The user can get or set the default number of lock-out attempts using
+these commands respectively::
+
+  $ ceph dashboard get-account-lockout-attempts
+  $ ceph dashboard set-account-lockout-attempts <value:int>
+
+.. warning::
+
+  This feature can be disabled by setting the default number of lock-out attempts to 0.
+  However, by disabling this feature, the account is more vulnerable to brute-force or
+  dictionary based attacks. This can be disabled by::
+
+    $ ceph dashboard set-account-lockout-attempts 0
+
+Enable a Locked User
+^^^^^^^^^^^^^^^^^^^^
+
+If a user account is disabled as a result of multiple invalid login attempts, then
+it needs to be manually enabled by the administrator. This can be done by the following
+command::
+
+  $ ceph dashboard ac-user-enable <username>
+
 Accessing the Dashboard
 ^^^^^^^^^^^^^^^^^^^^^^^

@ -479,7 +507,8 @@ will not be visible in Prometheus.
 After you have set up Grafana and Prometheus, you will need to configure the
 connection information that the Ceph Dashboard will use to access Grafana.

-You need to tell the dashboard on which url Grafana instance is running/deployed::
+You need to tell the dashboard on which URL the Grafana instance is
+running/deployed::

  $ ceph dashboard set-grafana-api-url <grafana-server-url>  # default: ''

@ -503,6 +532,38 @@ e.g. caused by certificates signed by unknown CA or not matching the host name::

 You can directly access Grafana Instance as well to monitor your cluster.

+Alternative URL for Browsers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Ceph Dashboard backend requires the Grafana URL to be able to verify the
+existence of Grafana Dashboards before the frontend even loads them. Due to the
+nature of how Grafana is implemented in Ceph Dashboard, this means that two
+working connections are required in order to be able to see Grafana graphs in
+Ceph Dashboard:
+
+- The backend (Ceph Mgr module) needs to verify the existence of the requested
+  graph. If this request succeeds, it lets the frontend know that it can safely
+  access Grafana.
+- The frontend then requests the Grafana graphs directly from the user's
+  browser using an iframe. The Grafana instance is accessed directly without any
+  detour through Ceph Dashboard.
+
+Now, it might be the case that your environment makes it difficult for the
+user's browser to directly access the URL configured in Ceph Dashboard. To solve
+this issue, a separate URL can be configured which will solely be used to tell
+the frontend (the user's browser) which URL it should use to access Grafana.
+This setting won't ever be changed automatically, unlike the GRAFANA_API_URL
+which is set by :ref:`cephadm` (only if cephadm is used to deploy monitoring
+services).
+
+To change the URL that is returned to the frontend issue the following command::
+
+  $ ceph dashboard set-grafana-frontend-api-url <grafana-server-url>
+
+If no value is set for that option, it will simply fall back to the value of the
+GRAFANA_API_URL option. If set, it will instruct the browser to use this URL to
+access Grafana.
+
 .. _dashboard-sso-support:

 Enabling Single Sign-On (SSO)
--- a/ceph/doc/mgr/orchestrator.rst
+++ b/ceph/doc/mgr/orchestrator.rst
@ -58,6 +58,8 @@ Status
 Show current orchestrator mode and high-level status (whether the orchestrator
 plugin is available and operational)

+.. _orchestrator-cli-host-management:
+
 Host Management
 ===============

@ -70,7 +72,7 @@ Add and remove hosts::
    ceph orch host add <hostname> [<addr>] [<labels>...]
    ceph orch host rm <hostname>

-For cephadm, see also :ref:`cephadm-fqdn`.
+For cephadm, see also :ref:`cephadm-fqdn` and :ref:`cephadm-removing-hosts`.

 Host Specification
 ------------------
--- a/ceph/doc/rados/configuration/osd-config-ref.rst
+++ b/ceph/doc/rados/configuration/osd-config-ref.rst
@ -1127,7 +1127,7 @@ Miscellaneous
              when osd data is on HDD and osd journal is on SSD.

 :Type: Float
-:Default: ``2``
+:Default: ``1``


 ``osd command max records``
--- a/ceph/doc/rados/operations/erasure-code-clay.rst
+++ b/ceph/doc/rados/operations/erasure-code-clay.rst
@ -88,7 +88,7 @@ Where:

 :Description: Number of OSDs requested to send data during recovery of
              a single chunk. *d* needs to be chosen such that
-              k+1 <= d <= k+m-1. Larger the *d*, the better the savings.
+              k+1 <= d <= k+m-1. The larger the *d*, the better the savings.

 :Type: Integer
 :Required: No.
--- a/ceph/doc/rados/operations/health-checks.rst
+++ b/ceph/doc/rados/operations/health-checks.rst
@ -1175,3 +1175,16 @@ This warning can silenced by setting the
 ``mon_warn_on_osd_down_out_interval_zero`` to false::

  ceph config global mon mon_warn_on_osd_down_out_interval_zero false
+
+DASHBOARD_DEBUG
+_______________
+
+The Dashboard debug mode is enabled. This means, if there is an error
+while processing a REST API request, the HTTP error response contains
+a Python traceback. This behaviour should be disabled in production
+environments because such a traceback might contain and expose sensible
+information.
+
+The debug mode can be disabled with::
+
+  ceph dashboard debug disable
--- a/ceph/install-deps.sh
+++ b/ceph/install-deps.sh
@ -365,7 +365,8 @@ else
 			  --enable rhel-7-server-devtools-rpms
                    dts_ver=8
                elif test $ID = centos -a $MAJOR_VERSION = 8 ; then
-                    $SUDO dnf config-manager --set-enabled PowerTools
+                    # Enable 'powertools' or 'PowerTools' repo
+                    $SUDO dnf config-manager --set-enabled $(dnf repolist --all 2>/dev/null|gawk 'tolower($0) ~ /^powertools\s/{print $1}')
 		    # before EPEL8 and PowerTools provide all dependencies, we use sepia for the dependencies
                    $SUDO dnf config-manager --add-repo http://apt-mirror.front.sepia.ceph.com/lab-extras/8/
                    $SUDO dnf config-manager --setopt=apt-mirror.front.sepia.ceph.com_lab-extras_8_.gpgcheck=0 --save
--- a/ceph/monitoring/grafana/dashboards/osd-device-details.json
+++ b/ceph/monitoring/grafana/dashboards/osd-device-details.json
@ -126,7 +126,7 @@
          "label": "Read (-) / Write (+)",
          "logBase": 1,
          "max": null,
-          "min": "0",
+          "min": null,
          "show": true
        },
        {
--- a/ceph/monitoring/prometheus/alerts/ceph_default_alerts.yml
+++ b/ceph/monitoring/prometheus/alerts/ceph_default_alerts.yml
@ -230,8 +230,8 @@ groups:
      - alert: pool filling up
        expr: |
          (
-            predict_linear(ceph_pool_stored[2d], 3600 * 24 * 5) >=
-            ceph_pool_max_avail
+            predict_linear(ceph_pool_stored[2d], 3600 * 24 * 5)
+            >= ceph_pool_stored + ceph_pool_max_avail
          ) * on(pool_id) group_left(name) ceph_pool_metadata
        labels:
          severity: warning
@ -241,3 +241,15 @@ groups:
          description: >
            Pool {{ $labels.name }} will be full in less than 5 days
            assuming the average fill-up rate of the past 48 hours.
+
+  - name: healthchecks
+    rules:
+      - alert: Slow OSD Ops
+        expr: ceph_healthcheck_slow_ops > 0
+        for: 30s
+        labels:
+          severity: warning
+          type: ceph_default
+        annotations:
+          description: >
+            {{ $value }} OSD requests are taking too long to process (osd_op_complaint_time exceeded)
--- a/ceph/qa/distros/all/ubuntu_18.04_podman.yaml
+++ b/ceph/qa/distros/all/ubuntu_18.04_podman.yaml
@ -9,4 +9,4 @@ tasks:
    - echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_18.04/ /" | sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list
    - sudo apt update
    - sudo apt -y install podman
-    - echo -e "[registries.search]\nregistries = ['docker.io']" | sudo tee /etc/containers/registries.conf
+    - echo -e "[[registry]]\nlocation = 'docker.io'\n\n[[registry.mirror]]\nlocation='docker-mirror.front.sepia.ceph.com:5000'\n" | sudo tee /etc/containers/registries.conf
--- a/ceph/qa/standalone/mon/osd-erasure-code-profile.sh
+++ b/ceph/qa/standalone/mon/osd-erasure-code-profile.sh
@ -222,6 +222,17 @@ function TEST_profile_k_sanity() {
        m=1 || return 1
 }

+function TEST_invalid_crush_failure_domain() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+
+    local profile=ec_profile
+    local crush_failure_domain=invalid_failure_domain
+
+    ! ceph osd erasure-code-profile set $profile k=4 m=2 crush-failure-domain=$crush_failure_domain 2>&1 || return 1
+}
+
 main osd-erasure-code-profile "$@"

 # Local Variables:
--- a/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml
+++ b/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml
@ -15,5 +15,6 @@ overrides:

 tasks:
  - cephfs_test_runner:
+      fail_on_skip: false
      modules:
        - tasks.cephfs.test_volumes
--- a/ceph/qa/suites/krbd/basic/ms_mode/.qa
+++ b/ceph/qa/suites/krbd/basic/ms_mode/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/krbd/basic/ms_mode/crc.yaml
+++ b/ceph/qa/suites/krbd/basic/ms_mode/crc.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
--- a/ceph/qa/suites/krbd/basic/ms_mode/legacy.yaml
+++ b/ceph/qa/suites/krbd/basic/ms_mode/legacy.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
--- a/ceph/qa/suites/krbd/basic/ms_mode/secure.yaml
+++ b/ceph/qa/suites/krbd/basic/ms_mode/secure.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
--- a/ceph/qa/suites/krbd/fsx/conf.yaml
+++ b/ceph/qa/suites/krbd/fsx/conf.yaml
@ -3,5 +3,3 @@ overrides:
    conf:
      global:
        ms die on skipped message: false
-      client:
-        rbd default map options: read_from_replica=balance
--- a/ceph/qa/suites/krbd/fsx/ms_mode$/.qa
+++ b/ceph/qa/suites/krbd/fsx/ms_mode$/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/krbd/fsx/ms_mode$/crc.yaml
+++ b/ceph/qa/suites/krbd/fsx/ms_mode$/crc.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,read_from_replica=balance
--- a/ceph/qa/suites/krbd/fsx/ms_mode$/legacy.yaml
+++ b/ceph/qa/suites/krbd/fsx/ms_mode$/legacy.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,read_from_replica=balance
--- a/ceph/qa/suites/krbd/fsx/ms_mode$/prefer-crc.yaml
+++ b/ceph/qa/suites/krbd/fsx/ms_mode$/prefer-crc.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=prefer-crc,read_from_replica=balance
--- a/ceph/qa/suites/krbd/fsx/ms_mode$/secure.yaml
+++ b/ceph/qa/suites/krbd/fsx/ms_mode$/secure.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure,read_from_replica=balance
--- a/ceph/qa/suites/krbd/rbd-nomount/ms_mode/.qa
+++ b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/krbd/rbd-nomount/ms_mode/crc.yaml
+++ b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/crc.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
--- a/ceph/qa/suites/krbd/rbd-nomount/ms_mode/legacy.yaml
+++ b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/legacy.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
--- a/ceph/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml
+++ b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
--- a/ceph/qa/suites/krbd/rbd/ms_mode/.qa
+++ b/ceph/qa/suites/krbd/rbd/ms_mode/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/krbd/rbd/ms_mode/crc.yaml
+++ b/ceph/qa/suites/krbd/rbd/ms_mode/crc.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
--- a/ceph/qa/suites/krbd/rbd/ms_mode/legacy.yaml
+++ b/ceph/qa/suites/krbd/rbd/ms_mode/legacy.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
--- a/ceph/qa/suites/krbd/rbd/ms_mode/secure.yaml
+++ b/ceph/qa/suites/krbd/rbd/ms_mode/secure.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
--- a/ceph/qa/suites/krbd/singleton/ms_mode$/.qa
+++ b/ceph/qa/suites/krbd/singleton/ms_mode$/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/krbd/singleton/ms_mode$/crc.yaml
+++ b/ceph/qa/suites/krbd/singleton/ms_mode$/crc.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
--- a/ceph/qa/suites/krbd/singleton/ms_mode$/legacy.yaml
+++ b/ceph/qa/suites/krbd/singleton/ms_mode$/legacy.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
--- a/ceph/qa/suites/krbd/singleton/ms_mode$/prefer-crc.yaml
+++ b/ceph/qa/suites/krbd/singleton/ms_mode$/prefer-crc.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=prefer-crc
--- a/ceph/qa/suites/krbd/singleton/ms_mode$/secure.yaml
+++ b/ceph/qa/suites/krbd/singleton/ms_mode$/secure.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
--- a/ceph/qa/suites/krbd/thrash/ms_mode$/.qa
+++ b/ceph/qa/suites/krbd/thrash/ms_mode$/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/krbd/thrash/ms_mode$/crc.yaml
+++ b/ceph/qa/suites/krbd/thrash/ms_mode$/crc.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
--- a/ceph/qa/suites/krbd/thrash/ms_mode$/legacy.yaml
+++ b/ceph/qa/suites/krbd/thrash/ms_mode$/legacy.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
--- a/ceph/qa/suites/krbd/thrash/ms_mode$/prefer-crc.yaml
+++ b/ceph/qa/suites/krbd/thrash/ms_mode$/prefer-crc.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=prefer-crc
--- a/ceph/qa/suites/krbd/thrash/ms_mode$/secure.yaml
+++ b/ceph/qa/suites/krbd/thrash/ms_mode$/secure.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
--- a/ceph/qa/suites/krbd/wac/sysfs/tasks/stable_pages_required.yaml
+++ b/ceph/qa/suites/krbd/wac/sysfs/tasks/stable_pages_required.yaml
@ -1,5 +0,0 @@
-tasks:
- workunit:
-    clients:
-      all:
-        - rbd/krbd_stable_pages_required.sh
--- a/ceph/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml
+++ b/ceph/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml
@ -0,0 +1,5 @@
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/krbd_stable_writes.sh
--- a/ceph/qa/suites/rados/cephadm/smoke/start.yaml
+++ b/ceph/qa/suites/rados/cephadm/smoke/start.yaml
@ -11,3 +11,4 @@ tasks:
      - ceph orch ls
      - ceph orch host ls
      - ceph orch device ls
+      - ceph orch ls --format yaml
--- a/ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml
+++ b/ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml
@ -22,6 +22,7 @@ tasks:
        - \(OSD_HOST_DOWN\)
        - \(POOL_APP_NOT_ENABLED\)
        - \(OSDMAP_FLAGS\)
+        - \(OSD_FLAGS\)
        - pauserd,pausewr flag\(s\) set
        - Monitor daemon marked osd\.[[:digit:]]+ down, but it is still running
        - evicting unresponsive client .+
--- a/ceph/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml
+++ b/ceph/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml
@ -0,0 +1,44 @@
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+- - mon.b
+  - mon.c
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
+tasks:
+- install:
+- ceph:
+    create_rbd_pool: false
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(REQUEST_SLOW\)
+      - \(TOO_FEW_PGS\)
+      - slow request
+- exec:
+    client.0:
+    - ceph progress off
+
+- workunit:
+    clients:
+      all:
+        - mon/pg_autoscaler.sh
--- a/ceph/qa/suites/rgw/multisite/overrides.yaml
+++ b/ceph/qa/suites/rgw/multisite/overrides.yaml
@ -13,5 +13,6 @@ overrides:
        rgw curl low speed time: 300
        rgw md log max shards: 4
        rgw data log num shards: 4
+        rgw sync obj etag verify: true
  rgw:
    compression type: random
--- a/ceph/qa/suites/upgrade/nautilus-x-singleton/1-install/nautilus.yaml
+++ b/ceph/qa/suites/upgrade/nautilus-x-singleton/1-install/nautilus.yaml
@ -4,6 +4,7 @@ overrides:
      - \(MON_DOWN\)
      - \(MGR_DOWN\)
      - slow request
+      - evicting unresponsive client
 meta:
 - desc: install ceph/nautilus latest
 tasks:
--- a/ceph/qa/suites/upgrade/nautilus-x/parallel/1-ceph-install/nautilus.yaml
+++ b/ceph/qa/suites/upgrade/nautilus-x/parallel/1-ceph-install/nautilus.yaml
@ -26,6 +26,7 @@ tasks:
      - Monitor daemon marked osd
      - Behind on trimming
      - Manager daemon
+      - evicting unresponsive client
    conf:
      global:
        mon warn on pool no app: false
--- a/ceph/qa/suites/upgrade/nautilus-x/stress-split/1-ceph-install/nautilus.yaml
+++ b/ceph/qa/suites/upgrade/nautilus-x/stress-split/1-ceph-install/nautilus.yaml
@ -14,6 +14,8 @@ tasks:
        bluestore_warn_on_legacy_statfs: false
        bluestore warn on no per pool omap: false
        mon pg warn min per osd: 0
+    log-whitelist:
+      - evicting unresponsive client
 - exec:
    osd.0:
      - ceph osd require-osd-release nautilus
--- a/ceph/qa/tasks/ceph.py
+++ b/ceph/qa/tasks/ceph.py
@ -155,13 +155,12 @@ def ceph_log(ctx, config):
            while not self.stop_event.is_set():
                self.stop_event.wait(timeout=30)
                try:
-                    run.wait(
-                        ctx.cluster.run(
-                            args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'
-                                  ],
-                            wait=False,
-                        )
+                    procs = ctx.cluster.run(
+                          args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'],
+                          wait=False,
+                          stderr=StringIO()
                    )
+                    run.wait(procs)
                except exceptions.ConnectionLostError as e:
                    # Some tests may power off nodes during test, in which
                    # case we will see connection errors that we should ignore.
@ -175,6 +174,14 @@ def ceph_log(ctx, config):
                    log.debug("Missed logrotate, EOFError")
                except SSHException:
                    log.debug("Missed logrotate, SSHException")
+                except run.CommandFailedError as e:
+                    for p in procs:
+                        if p.finished and p.exitstatus != 0:
+                            err = p.stderr.getvalue()
+                            if 'error: error renaming temp state file' in err:
+                                log.info('ignoring transient state error: %s', e)
+                            else:
+                                raise
                except socket.error as e:
                    if e.errno in (errno.EHOSTUNREACH, errno.ECONNRESET):
                        log.debug("Missed logrotate, host unreachable")
--- a/ceph/qa/tasks/cephadm.py
+++ b/ceph/qa/tasks/cephadm.py
@ -1317,13 +1317,17 @@ def registries_add_mirror_to_docker_io(conf, mirror):
        }
    else:
        v2 = config  # type: ignore
-    dockers = [r for r in v2['registry'] if r['prefix'] == 'docker.io']
+    dockers = [
+        r for r in v2['registry'] if
+           r.get('prefix') == 'docker.io' or r.get('location') == 'docker.io'
+    ]
    if dockers:
        docker = dockers[0]
-        docker['mirror'] = [{
-            "location": mirror,
-            "insecure": True,
-        }]
+        if 'mirror' not in docker:
+            docker['mirror'] = [{
+                "location": mirror,
+                "insecure": True,
+            }]
    return v2


--- a/ceph/qa/tasks/cephfs/cephfs_test_case.py
+++ b/ceph/qa/tasks/cephfs/cephfs_test_case.py
@ -227,6 +227,9 @@ class CephFSTestCase(CephTestCase):
    def _session_by_id(self, session_ls):
        return dict([(s['id'], s) for s in session_ls])

+    def perf_dump(self, rank=None, status=None):
+        return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status)
+
    def wait_until_evicted(self, client_id, timeout=30):
        def is_client_evicted():
            ls = self._session_list()
--- a/ceph/qa/tasks/cephfs/mount.py
+++ b/ceph/qa/tasks/cephfs/mount.py
@ -201,7 +201,7 @@ class CephFSMount(object):
        return self.run_shell(["bash", "-c", Raw(f"'{payload}'")], **kwargs)

    def run_shell(self, args, wait=True, stdin=None, check_status=True,
-                  omit_sudo=True):
+                  omit_sudo=True, timeout=10800):
        if isinstance(args, str):
            args = args.split()

@ -209,7 +209,8 @@ class CephFSMount(object):
        return self.client_remote.run(args=args, stdout=StringIO(),
                                      stderr=StringIO(), wait=wait,
                                      stdin=stdin, check_status=check_status,
-                                      omit_sudo=omit_sudo)
+                                      omit_sudo=omit_sudo,
+                                      timeout=timeout)

    def open_no_data(self, basename):
        """
--- a/ceph/qa/tasks/cephfs/test_client_limits.py
+++ b/ceph/qa/tasks/cephfs/test_client_limits.py
@ -156,6 +156,36 @@ class TestClientLimits(CephFSTestCase):
        else:
            raise RuntimeError("expected no client recall warning")

+    def test_cap_acquisition_throttle_readdir(self):
+        """
+        Mostly readdir acquires caps faster than the mds recalls, so the cap
+        acquisition via readdir is throttled by retrying the readdir after
+        a fraction of second (0.5) by default when throttling condition is met.
+        """
+
+        max_caps_per_client = 500
+        cap_acquisition_throttle = 250
+
+        self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client)
+        self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle)
+
+        # Create 1500 files split across 6 directories, 250 each.
+        for i in range(1, 7):
+            self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True)
+
+        mount_a_client_id = self.mount_a.get_global_id()
+
+        # recursive readdir
+        self.mount_a.run_shell_payload("find | wc")
+
+        # validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250
+        cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
+        self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle)
+
+        # validate the throttle condition to be hit atleast once
+        cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle']
+        self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1)
+
    def test_client_release_bug(self):
        """
        When a client has a bug (which we will simulate) preventing it from releasing caps,
--- a/ceph/qa/tasks/cephfs/test_nfs.py
+++ b/ceph/qa/tasks/cephfs/test_nfs.py
@ -425,12 +425,14 @@ class TestNFS(MgrTestCase):
        '''
        self._test_create_cluster()
        info_output = json.loads(self._nfs_cmd('cluster', 'info', self.cluster_id))
+        info_ip = info_output[self.cluster_id][0].pop("ip")
        host_details = {self.cluster_id: [{
            "hostname": self._sys_cmd(['hostname']).decode("utf-8").strip(),
-            "ip": list(set(self._sys_cmd(['hostname', '-I']).decode("utf-8").split())),
            "port": 2049
            }]}
+        host_ip = self._sys_cmd(['hostname', '-I']).decode("utf-8").split()
        self.assertDictEqual(info_output, host_details)
+        self.assertTrue(any([ip in info_ip for ip in host_ip]))
        self._test_delete_cluster()

    def test_cluster_set_reset_user_config(self):
--- a/ceph/qa/tasks/cephfs/test_volume_client.py
+++ b/ceph/qa/tasks/cephfs/test_volume_client.py
@ -856,7 +856,7 @@ vc.disconnect()
        volume_id = "volumeid"

        # Create auth_id
-        out = self.fs.mon_manager.raw_cluster_cmd(
+        self.fs.mon_manager.raw_cluster_cmd(
            "auth", "get-or-create", "client.guest1",
            "mds", "allow *",
            "osd", "allow rw",
@ -918,7 +918,7 @@ vc.disconnect()
        volume_id = "volumeid"

        # Create auth_id
-        out = self.fs.mon_manager.raw_cluster_cmd(
+        self.fs.mon_manager.raw_cluster_cmd(
            "auth", "get-or-create", "client.guest1",
            "mds", "allow *",
            "osd", "allow rw",
--- a/ceph/qa/tasks/cephfs/test_volumes.py
+++ b/ceph/qa/tasks/cephfs/test_volumes.py
--- a/ceph/qa/tasks/mgr/dashboard/helper.py
+++ b/ceph/qa/tasks/mgr/dashboard/helper.py
@ -96,18 +96,19 @@ class DashboardTestCase(MgrTestCase):
            cls._ceph_cmd(set_roles_args)

    @classmethod
-    def login(cls, username, password):
+    def login(cls, username, password, set_cookies=False):
        if cls._loggedin:
            cls.logout()
-        cls._post('/api/auth', {'username': username, 'password': password})
+        cls._post('/api/auth', {'username': username,
+                                'password': password}, set_cookies=set_cookies)
        cls._assertEq(cls._resp.status_code, 201)
        cls._token = cls.jsonBody()['token']
        cls._loggedin = True

    @classmethod
-    def logout(cls):
+    def logout(cls, set_cookies=False):
        if cls._loggedin:
-            cls._post('/api/auth/logout')
+            cls._post('/api/auth/logout', set_cookies=set_cookies)
            cls._assertEq(cls._resp.status_code, 200)
            cls._token = None
            cls._loggedin = False
@ -195,29 +196,49 @@ class DashboardTestCase(MgrTestCase):
    def tearDownClass(cls):
        super(DashboardTestCase, cls).tearDownClass()

-    # pylint: disable=inconsistent-return-statements
+    # pylint: disable=inconsistent-return-statements, too-many-branches
    @classmethod
-    def _request(cls, url, method, data=None, params=None):
+    def _request(cls, url, method, data=None, params=None, set_cookies=False):
        url = "{}{}".format(cls._base_uri, url)
        log.info("Request %s to %s", method, url)
        headers = {}
+        cookies = {}
        if cls._token:
-            headers['Authorization'] = "Bearer {}".format(cls._token)
+            if set_cookies:
+                cookies['token'] = cls._token
+            else:
+                headers['Authorization'] = "Bearer {}".format(cls._token)

-        if method == 'GET':
-            cls._resp = cls._session.get(url, params=params, verify=False,
-                                         headers=headers)
-        elif method == 'POST':
-            cls._resp = cls._session.post(url, json=data, params=params,
-                                          verify=False, headers=headers)
-        elif method == 'DELETE':
-            cls._resp = cls._session.delete(url, json=data, params=params,
-                                            verify=False, headers=headers)
-        elif method == 'PUT':
-            cls._resp = cls._session.put(url, json=data, params=params,
-                                         verify=False, headers=headers)
+        if set_cookies:
+            if method == 'GET':
+                cls._resp = cls._session.get(url, params=params, verify=False,
+                                         headers=headers, cookies=cookies)
+            elif method == 'POST':
+                cls._resp = cls._session.post(url, json=data, params=params,
+                                          verify=False, headers=headers, cookies=cookies)
+            elif method == 'DELETE':
+                cls._resp = cls._session.delete(url, json=data, params=params,
+                                            verify=False, headers=headers, cookies=cookies)
+            elif method == 'PUT':
+                cls._resp = cls._session.put(url, json=data, params=params,
+                                         verify=False, headers=headers, cookies=cookies)
+            else:
+                assert False
        else:
-            assert False
+            if method == 'GET':
+                cls._resp = cls._session.get(url, params=params, verify=False,
+                                             headers=headers)
+            elif method == 'POST':
+                cls._resp = cls._session.post(url, json=data, params=params,
+                                              verify=False, headers=headers)
+            elif method == 'DELETE':
+                cls._resp = cls._session.delete(url, json=data, params=params,
+                                                verify=False, headers=headers)
+            elif method == 'PUT':
+                cls._resp = cls._session.put(url, json=data, params=params,
+                                             verify=False, headers=headers)
+            else:
+                assert False
        try:
            if not cls._resp.ok:
                # Output response for easier debugging.
@ -231,8 +252,8 @@ class DashboardTestCase(MgrTestCase):
            raise ex

    @classmethod
-    def _get(cls, url, params=None):
-        return cls._request(url, 'GET', params=params)
+    def _get(cls, url, params=None, set_cookies=False):
+        return cls._request(url, 'GET', params=params, set_cookies=set_cookies)

    @classmethod
    def _view_cache_get(cls, url, retries=5):
@ -253,16 +274,16 @@ class DashboardTestCase(MgrTestCase):
        return res

    @classmethod
-    def _post(cls, url, data=None, params=None):
-        cls._request(url, 'POST', data, params)
+    def _post(cls, url, data=None, params=None, set_cookies=False):
+        cls._request(url, 'POST', data, params, set_cookies=set_cookies)

    @classmethod
-    def _delete(cls, url, data=None, params=None):
-        cls._request(url, 'DELETE', data, params)
+    def _delete(cls, url, data=None, params=None, set_cookies=False):
+        cls._request(url, 'DELETE', data, params, set_cookies=set_cookies)

    @classmethod
-    def _put(cls, url, data=None, params=None):
-        cls._request(url, 'PUT', data, params)
+    def _put(cls, url, data=None, params=None, set_cookies=False):
+        cls._request(url, 'PUT', data, params, set_cookies=set_cookies)

    @classmethod
    def _assertEq(cls, v1, v2):
@ -281,8 +302,8 @@ class DashboardTestCase(MgrTestCase):

    # pylint: disable=too-many-arguments
    @classmethod
-    def _task_request(cls, method, url, data, timeout):
-        res = cls._request(url, method, data)
+    def _task_request(cls, method, url, data, timeout, set_cookies=False):
+        res = cls._request(url, method, data, set_cookies=set_cookies)
        cls._assertIn(cls._resp.status_code, [200, 201, 202, 204, 400, 403, 404])

        if cls._resp.status_code == 403:
@ -334,16 +355,16 @@ class DashboardTestCase(MgrTestCase):
            return res_task['exception']

    @classmethod
-    def _task_post(cls, url, data=None, timeout=60):
-        return cls._task_request('POST', url, data, timeout)
+    def _task_post(cls, url, data=None, timeout=60, set_cookies=False):
+        return cls._task_request('POST', url, data, timeout, set_cookies=set_cookies)

    @classmethod
-    def _task_delete(cls, url, timeout=60):
-        return cls._task_request('DELETE', url, None, timeout)
+    def _task_delete(cls, url, timeout=60, set_cookies=False):
+        return cls._task_request('DELETE', url, None, timeout, set_cookies=set_cookies)

    @classmethod
-    def _task_put(cls, url, data=None, timeout=60):
-        return cls._task_request('PUT', url, data, timeout)
+    def _task_put(cls, url, data=None, timeout=60, set_cookies=False):
+        return cls._task_request('PUT', url, data, timeout, set_cookies=set_cookies)

    @classmethod
    def cookies(cls):
--- a/ceph/qa/tasks/mgr/dashboard/test_auth.py
+++ b/ceph/qa/tasks/mgr/dashboard/test_auth.py
@ -30,6 +30,7 @@ class AuthTest(DashboardTestCase):
            self.assertIn('delete', perms)

    def test_a_set_login_credentials(self):
+        # test with Authorization header
        self.create_user('admin2', 'admin2', ['administrator'])
        self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'})
        self.assertStatus(201)
@ -37,7 +38,16 @@ class AuthTest(DashboardTestCase):
        self._validate_jwt_token(data['token'], "admin2", data['permissions'])
        self.delete_user('admin2')

+        # test with Cookies set
+        self.create_user('admin2', 'admin2', ['administrator'])
+        self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'}, set_cookies=True)
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self._validate_jwt_token(data['token'], "admin2", data['permissions'])
+        self.delete_user('admin2')
+
    def test_login_valid(self):
+        # test with Authorization header
        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
        self.assertStatus(201)
        data = self.jsonBody()
@ -51,7 +61,22 @@ class AuthTest(DashboardTestCase):
        }, allow_unknown=False))
        self._validate_jwt_token(data['token'], "admin", data['permissions'])

+        # test with Cookies set
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            'token': JLeaf(str),
+            'username': JLeaf(str),
+            'permissions': JObj(sub_elems={}, allow_unknown=True),
+            'sso': JLeaf(bool),
+            'pwdExpirationDate': JLeaf(int, none=True),
+            'pwdUpdateRequired': JLeaf(bool)
+        }, allow_unknown=False))
+        self._validate_jwt_token(data['token'], "admin", data['permissions'])
+
    def test_login_invalid(self):
+        # test with Authorization header
        self._post("/api/auth", {'username': 'admin', 'password': 'inval'})
        self.assertStatus(400)
        self.assertJsonBody({
@ -60,7 +85,17 @@ class AuthTest(DashboardTestCase):
            "detail": "Invalid credentials"
        })

+        # test with Cookies set
+        self._post("/api/auth", {'username': 'admin', 'password': 'inval'}, set_cookies=True)
+        self.assertStatus(400)
+        self.assertJsonBody({
+            "component": "auth",
+            "code": "invalid_credentials",
+            "detail": "Invalid credentials"
+        })
+
    def test_login_without_password(self):
+        # test with Authorization header
        self.create_user('admin2', '', ['administrator'])
        self._post("/api/auth", {'username': 'admin2', 'password': ''})
        self.assertStatus(400)
@ -71,7 +106,70 @@ class AuthTest(DashboardTestCase):
        })
        self.delete_user('admin2')

+        # test with Cookies set
+        self.create_user('admin2', '', ['administrator'])
+        self._post("/api/auth", {'username': 'admin2', 'password': ''}, set_cookies=True)
+        self.assertStatus(400)
+        self.assertJsonBody({
+            "component": "auth",
+            "code": "invalid_credentials",
+            "detail": "Invalid credentials"
+        })
+        self.delete_user('admin2')
+
+    def test_lockout_user(self):
+        # test with Authorization header
+        self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3'])
+        for _ in range(3):
+            self._post("/api/auth", {'username': 'admin', 'password': 'inval'})
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+        self.assertStatus(400)
+        self.assertJsonBody({
+            "component": "auth",
+            "code": "invalid_credentials",
+            "detail": "Invalid credentials"
+        })
+        self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin'])
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            'token': JLeaf(str),
+            'username': JLeaf(str),
+            'permissions': JObj(sub_elems={}, allow_unknown=True),
+            'sso': JLeaf(bool),
+            'pwdExpirationDate': JLeaf(int, none=True),
+            'pwdUpdateRequired': JLeaf(bool)
+        }, allow_unknown=False))
+        self._validate_jwt_token(data['token'], "admin", data['permissions'])
+
+        # test with Cookies set
+        self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3'])
+        for _ in range(3):
+            self._post("/api/auth", {'username': 'admin', 'password': 'inval'}, set_cookies=True)
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(400)
+        self.assertJsonBody({
+            "component": "auth",
+            "code": "invalid_credentials",
+            "detail": "Invalid credentials"
+        })
+        self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin'])
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            'token': JLeaf(str),
+            'username': JLeaf(str),
+            'permissions': JObj(sub_elems={}, allow_unknown=True),
+            'sso': JLeaf(bool),
+            'pwdExpirationDate': JLeaf(int, none=True),
+            'pwdUpdateRequired': JLeaf(bool)
+        }, allow_unknown=False))
+        self._validate_jwt_token(data['token'], "admin", data['permissions'])
+
    def test_logout(self):
+        # test with Authorization header
        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
        self.assertStatus(201)
        data = self.jsonBody()
@ -86,7 +184,23 @@ class AuthTest(DashboardTestCase):
        self.assertStatus(401)
        self.set_jwt_token(None)

+        # test with Cookies set
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        data = self.jsonBody()
+        self._validate_jwt_token(data['token'], "admin", data['permissions'])
+        self.set_jwt_token(data['token'])
+        self._post("/api/auth/logout", set_cookies=True)
+        self.assertStatus(200)
+        self.assertJsonBody({
+            "redirect_url": "#/login"
+        })
+        self._get("/api/host", set_cookies=True)
+        self.assertStatus(401)
+        self.set_jwt_token(None)
+
    def test_token_ttl(self):
+        # test with Authorization header
        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
        self.assertStatus(201)
@ -99,7 +213,21 @@ class AuthTest(DashboardTestCase):
        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
        self.set_jwt_token(None)

+        # test with Cookies set
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        self._get("/api/host", set_cookies=True)
+        self.assertStatus(200)
+        time.sleep(6)
+        self._get("/api/host", set_cookies=True)
+        self.assertStatus(401)
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
+        self.set_jwt_token(None)
+
    def test_remove_from_blacklist(self):
+        # test with Authorization header
        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
        self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
        self.assertStatus(201)
@ -119,11 +247,37 @@ class AuthTest(DashboardTestCase):
        self._post("/api/auth/logout")
        self.assertStatus(200)

+        # test with Cookies set
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        # the following call adds the token to the blocklist
+        self._post("/api/auth/logout", set_cookies=True)
+        self.assertStatus(200)
+        self._get("/api/host", set_cookies=True)
+        self.assertStatus(401)
+        time.sleep(6)
+        self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
+        self.set_jwt_token(None)
+        self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        # the following call removes expired tokens from the blocklist
+        self._post("/api/auth/logout", set_cookies=True)
+        self.assertStatus(200)
+
    def test_unauthorized(self):
+        # test with Authorization header
        self._get("/api/host")
        self.assertStatus(401)

+        # test with Cookies set
+        self._get("/api/host", set_cookies=True)
+        self.assertStatus(401)
+
    def test_invalidate_token_by_admin(self):
+        # test with Authorization header
        self._get("/api/host")
        self.assertStatus(401)
        self.create_user('user', 'user', ['read-only'])
@ -147,7 +301,32 @@ class AuthTest(DashboardTestCase):
        self.assertStatus(200)
        self.delete_user("user")

+        # test with Cookies set
+        self._get("/api/host", set_cookies=True)
+        self.assertStatus(401)
+        self.create_user('user', 'user', ['read-only'])
+        time.sleep(1)
+        self._post("/api/auth", {'username': 'user', 'password': 'user'}, set_cookies=True)
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        self._get("/api/host", set_cookies=True)
+        self.assertStatus(200)
+        time.sleep(1)
+        self._ceph_cmd(['dashboard', 'ac-user-set-password', '--force-password',
+                        'user', 'user2'])
+        time.sleep(1)
+        self._get("/api/host", set_cookies=True)
+        self.assertStatus(401)
+        self.set_jwt_token(None)
+        self._post("/api/auth", {'username': 'user', 'password': 'user2'}, set_cookies=True)
+        self.assertStatus(201)
+        self.set_jwt_token(self.jsonBody()['token'])
+        self._get("/api/host", set_cookies=True)
+        self.assertStatus(200)
+        self.delete_user("user")
+
    def test_check_token(self):
+        # test with Authorization header
        self.login("admin", "admin")
        self._post("/api/auth/check", {"token": self.jsonBody()["token"]})
        self.assertStatus(200)
@ -160,7 +339,21 @@ class AuthTest(DashboardTestCase):
        }, allow_unknown=False))
        self.logout()

+        # test with Cookies set
+        self.login("admin", "admin", set_cookies=True)
+        self._post("/api/auth/check", {"token": self.jsonBody()["token"]}, set_cookies=True)
+        self.assertStatus(200)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            "username": JLeaf(str),
+            "permissions": JObj(sub_elems={}, allow_unknown=True),
+            "sso": JLeaf(bool),
+            "pwdUpdateRequired": JLeaf(bool)
+        }, allow_unknown=False))
+        self.logout(set_cookies=True)
+
    def test_check_wo_token(self):
+        # test with Authorization header
        self.login("admin", "admin")
        self._post("/api/auth/check", {"token": ""})
        self.assertStatus(200)
@ -169,3 +362,13 @@ class AuthTest(DashboardTestCase):
            "login_url": JLeaf(str)
        }, allow_unknown=False))
        self.logout()
+
+        # test with Cookies set
+        self.login("admin", "admin", set_cookies=True)
+        self._post("/api/auth/check", {"token": ""}, set_cookies=True)
+        self.assertStatus(200)
+        data = self.jsonBody()
+        self.assertSchema(data, JObj(sub_elems={
+            "login_url": JLeaf(str)
+        }, allow_unknown=False))
+        self.logout(set_cookies=True)
--- a/ceph/qa/tasks/mgr/dashboard/test_osd.py
+++ b/ceph/qa/tasks/mgr/dashboard/test_osd.py
@ -237,36 +237,139 @@ class OsdTest(DashboardTestCase):
 class OsdFlagsTest(DashboardTestCase):
    def __init__(self, *args, **kwargs):
        super(OsdFlagsTest, self).__init__(*args, **kwargs)
-        self._initial_flags = sorted(  # These flags cannot be unset
-            ['sortbitwise', 'recovery_deletes', 'purged_snapdirs',
-             'pglog_hardlimit'])
+        self._initial_flags = ['sortbitwise', 'recovery_deletes', 'purged_snapdirs',
+                               'pglog_hardlimit']  # These flags cannot be unset

    @classmethod
-    def _get_cluster_osd_flags(cls):
-        return sorted(
-            json.loads(cls._ceph_cmd(['osd', 'dump',
-                                      '--format=json']))['flags_set'])
+    def _put_flags(cls, flags, ids=None):
+        url = '/api/osd/flags'
+        data = {'flags': flags}

-    @classmethod
-    def _put_flags(cls, flags):
-        cls._put('/api/osd/flags', data={'flags': flags})
-        return sorted(cls._resp.json())
+        if ids:
+            url = url + '/individual'
+            data['ids'] = ids
+
+        cls._put(url, data=data)
+        return cls._resp.json()

    def test_list_osd_flags(self):
        flags = self._get('/api/osd/flags')
        self.assertStatus(200)
        self.assertEqual(len(flags), 4)
-        self.assertEqual(sorted(flags), self._initial_flags)
+        self.assertCountEqual(flags, self._initial_flags)

    def test_add_osd_flag(self):
        flags = self._put_flags([
            'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout',
            'pause', 'pglog_hardlimit'
        ])
-        self.assertEqual(flags, sorted([
+        self.assertCountEqual(flags, [
            'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout',
            'pause', 'pglog_hardlimit'
-        ]))
+        ])

        # Restore flags
        self._put_flags(self._initial_flags)
+
+    def test_get_indiv_flag(self):
+        initial = self._get('/api/osd/flags/individual')
+        self.assertStatus(200)
+        self.assertSchema(initial, JList(JObj({
+            'osd': int,
+            'flags': JList(str)
+        })))
+
+        self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
+        flags_added = self._get('/api/osd/flags/individual')
+        self.assertStatus(200)
+        for osd in flags_added:
+            if osd['osd'] in [0, 1, 2]:
+                self.assertIn('noout', osd['flags'])
+                self.assertIn('noin', osd['flags'])
+                for osd_initial in initial:
+                    if osd['osd'] == osd_initial['osd']:
+                        self.assertGreater(len(osd['flags']), len(osd_initial['flags']))
+
+        self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
+        flags_removed = self._get('/api/osd/flags/individual')
+        self.assertStatus(200)
+        for osd in flags_removed:
+            if osd['osd'] in [0, 1, 2]:
+                self.assertNotIn('noout', osd['flags'])
+                self.assertNotIn('noin', osd['flags'])
+
+    def test_add_indiv_flag(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': True}
+        svc_id = 0
+
+        resp = self._put_flags(flags_update, [svc_id])
+        self._check_indiv_flags_resp(resp, [svc_id], ['noout'], [], ['noup', 'nodown', 'noin'])
+        self._check_indiv_flags_osd([svc_id], ['noout'], ['noup', 'nodown', 'noin'])
+
+        self._ceph_cmd(['osd', 'unset-group', 'noout', 'osd.{}'.format(svc_id)])
+
+    def test_add_multiple_indiv_flags(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True}
+        svc_id = 0
+
+        resp = self._put_flags(flags_update, [svc_id])
+        self._check_indiv_flags_resp(resp, [svc_id], ['noout', 'noin'], [], ['noup', 'nodown'])
+        self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown'])
+
+        self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.{}'.format(svc_id)])
+
+    def test_add_multiple_indiv_flags_multiple_osds(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True}
+        svc_id = [0, 1, 2]
+
+        resp = self._put_flags(flags_update, svc_id)
+        self._check_indiv_flags_resp(resp, svc_id, ['noout', 'noin'], [], ['noup', 'nodown'])
+        self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown'])
+
+        self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
+
+    def test_remove_indiv_flag(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': False}
+        svc_id = 0
+        self._ceph_cmd(['osd', 'set-group', 'noout', 'osd.{}'.format(svc_id)])
+
+        resp = self._put_flags(flags_update, [svc_id])
+        self._check_indiv_flags_resp(resp, [svc_id], [], ['noout'], ['noup', 'nodown', 'noin'])
+        self._check_indiv_flags_osd([svc_id], [], ['noup', 'nodown', 'noin', 'noout'])
+
+    def test_remove_multiple_indiv_flags(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False}
+        svc_id = 0
+        self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.{}'.format(svc_id)])
+
+        resp = self._put_flags(flags_update, [svc_id])
+        self._check_indiv_flags_resp(resp, [svc_id], [], ['noout', 'noin'], ['noup', 'nodown'])
+        self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown'])
+
+    def test_remove_multiple_indiv_flags_multiple_osds(self):
+        flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False}
+        svc_id = [0, 1, 2]
+        self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
+
+        resp = self._put_flags(flags_update, svc_id)
+        self._check_indiv_flags_resp(resp, svc_id, [], ['noout', 'noin'], ['noup', 'nodown'])
+        self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown'])
+
+    def _check_indiv_flags_resp(self, resp, ids, added, removed, ignored):
+        self.assertStatus(200)
+        self.assertCountEqual(resp['ids'], ids)
+        self.assertCountEqual(resp['added'], added)
+        self.assertCountEqual(resp['removed'], removed)
+
+        for flag in ignored:
+            self.assertNotIn(flag, resp['added'])
+            self.assertNotIn(flag, resp['removed'])
+
+    def _check_indiv_flags_osd(self, ids, activated_flags, deactivated_flags):
+        osds = json.loads(self._ceph_cmd(['osd', 'dump', '--format=json']))['osds']
+        for osd in osds:
+            if osd['osd'] in ids:
+                for flag in activated_flags:
+                    self.assertIn(flag, osd['state'])
+                for flag in deactivated_flags:
+                    self.assertNotIn(flag, osd['state'])
--- a/ceph/qa/tasks/mgr/mgr_test_case.py
+++ b/ceph/qa/tasks/mgr/mgr_test_case.py
@ -1,6 +1,8 @@
 import json
 import logging

+from unittest import SkipTest
+
 from teuthology import misc
 from tasks.ceph_test_case import CephTestCase

@ -99,7 +101,7 @@ class MgrTestCase(CephTestCase):
        assert cls.mgr_cluster is not None

        if len(cls.mgr_cluster.mgr_ids) < cls.MGRS_REQUIRED:
-            cls.skipTest(
+            raise SkipTest(
                "Only have {0} manager daemons, {1} are required".format(
                    len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED))

--- a/ceph/qa/tasks/mgr/test_dashboard.py
+++ b/ceph/qa/tasks/mgr/test_dashboard.py
@ -28,6 +28,16 @@ class TestDashboard(MgrTestCase):
                                                     "mgr/dashboard/standby_error_status_code",
                                                     "500")

+    def wait_until_webserver_available(self, url):
+        def _check_connection():
+            try:
+                requests.get(url, allow_redirects=False, verify=False)
+                return True
+            except requests.ConnectionError:
+                pass
+            return False
+        self.wait_until_true(_check_connection, timeout=30)
+
    def test_standby(self):
        original_active_id = self.mgr_cluster.get_active_id()
        original_uri = self._get_uri("dashboard")
@ -48,6 +58,9 @@ class TestDashboard(MgrTestCase):

        self.assertNotEqual(original_uri, failed_over_uri)

+        # Wait until web server of the standby node is settled.
+        self.wait_until_webserver_available(original_uri)
+
        # The original active daemon should have come back up as a standby
        # and be doing redirects to the new active daemon.
        r = requests.get(original_uri, allow_redirects=False, verify=False)
@ -55,7 +68,7 @@ class TestDashboard(MgrTestCase):
        self.assertEqual(r.headers['Location'], failed_over_uri)

        # Ensure that every URL redirects to the active daemon.
-        r = requests.get("{}/runtime.js".format(original_uri),
+        r = requests.get("{}/runtime.js".format(original_uri.strip('/')),
                         allow_redirects=False,
                         verify=False)
        self.assertEqual(r.status_code, 303)
@ -85,6 +98,9 @@ class TestDashboard(MgrTestCase):

        self.assertNotEqual(original_uri, failed_over_uri)

+        # Wait until web server of the standby node is settled.
+        self.wait_until_webserver_available(original_uri)
+
        # Redirection should be disabled now, instead a 500 must be returned.
        r = requests.get(original_uri, allow_redirects=False, verify=False)
        self.assertEqual(r.status_code, 500)
--- a/ceph/qa/tasks/mgr/test_progress.py
+++ b/ceph/qa/tasks/mgr/test_progress.py
@ -44,6 +44,88 @@ class TestProgress(MgrTestCase):
        log.info(json.dumps(p, indent=2))
        return p['events']

+    def _completed_events(self):
+        """
+        This function returns all events that are completed
+        """
+        p = self._get_progress()
+        log.info(json.dumps(p, indent=2))
+        return p['completed']
+
+    def is_osd_marked_out(self, ev):
+        return ev['message'].endswith('marked out')
+
+    def is_osd_marked_in(self, ev):
+        return ev['message'].endswith('marked in')
+
+    def _get_osd_in_out_events(self, marked='both'):
+        """
+        Return the event that deals with OSDs being
+        marked in, out or both
+        """
+
+        marked_in_events = []
+        marked_out_events = []
+
+        events_in_progress = self._events_in_progress()
+        for ev in events_in_progress:
+            if self.is_osd_marked_out(ev):
+                marked_out_events.append(ev)
+            elif self.is_osd_marked_in(ev):
+                marked_in_events.append(ev)
+
+        if marked == 'both':
+            return [marked_in_events] + [marked_out_events]
+        elif marked == 'in':
+            return marked_in_events
+        else:
+            return marked_out_events
+
+    def _osd_in_out_events_count(self, marked='both'):
+        """
+        Return the event that deals with OSDs being
+        marked in, out or both
+        """
+
+        marked_in_events = []
+        marked_out_events = []
+
+        events_in_progress = self._events_in_progress()
+        for ev in events_in_progress:
+            if self.is_osd_marked_out(ev):
+                marked_out_events.append(ev)
+            elif self.is_osd_marked_in(ev):
+                marked_in_events.append(ev)
+
+        if marked == 'both':
+            return [marked_in_events] + [marked_out_events]
+        elif marked == 'in':
+            return marked_in_events
+        else:
+            return marked_out_events
+
+    def _osd_in_out_events_count(self, marked='both'):
+        """
+        Count the number of on going recovery events that deals with
+        OSDs being marked in, out or both.
+        """
+        events_in_progress = self._events_in_progress()
+        marked_in_count = 0
+        marked_out_count = 0
+
+        for ev in events_in_progress:
+            if self.is_osd_marked_out(ev):
+                marked_out_count += 1
+            elif self.is_osd_marked_in(ev):
+                marked_in_count += 1
+
+        if marked == 'both':
+            return marked_in_count + marked_out_count
+        elif marked == 'in':
+            return marked_in_count
+        else:
+            return marked_out_count
+
    def _setup_pool(self, size=None):
        self.mgr_cluster.mon_manager.create_pool(self.POOL)
        if size is not None:
@ -105,9 +187,10 @@ class TestProgress(MgrTestCase):
                'osd', 'out', str(osd_id))

        # Wait for a progress event to pop up
-        self.wait_until_equal(lambda: len(self._all_events()), 1,
-                              timeout=self.EVENT_CREATION_PERIOD)
-        ev = self._all_events()[0]
+        self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
+                              timeout=self.EVENT_CREATION_PERIOD*2,
+                              period=1)
+        ev = self._get_osd_in_out_events('out')[0]
        log.info(json.dumps(ev, indent=1))
        self.assertIn("Rebalancing after osd.0 marked out", ev['message'])
        
@ -125,8 +208,9 @@ class TestProgress(MgrTestCase):

        try:
            # Wait for progress event marked in to pop up
-            self.wait_until_equal(lambda: len(self._events_in_progress()), 1,
-                                  timeout=self.EVENT_CREATION_PERIOD)
+            self.wait_until_equal(lambda: self._osd_in_out_events_count('in'), 1,
+                                  timeout=self.EVENT_CREATION_PERIOD*2,
+                                  period=1)
        except RuntimeError as ex:
            if not "Timed out after" in str(ex):
                raise ex
@ -134,12 +218,17 @@ class TestProgress(MgrTestCase):
            log.info("There was no PGs affected by osd being marked in")
            return None

-        new_event = self._events_in_progress()[0]
-        log.info(json.dumps(new_event, indent=1))
-        self.assertIn("Rebalancing after osd.0 marked in", new_event['message'])    
-        
+        new_event = self._get_osd_in_out_events('in')[0]
        return new_event

+    def _no_events_anywhere(self):
+        """
+        Whether there are any live or completed events
+        """
+        p = self._get_progress()
+        total_events = len(p['events']) + len(p['completed'])
+        return total_events == 0
+
    def _is_quiet(self):
        """
        Whether any progress events are live.
@ -261,4 +350,53 @@ class TestProgress(MgrTestCase):
        # Check that no event is created
        time.sleep(self.EVENT_CREATION_PERIOD)

-        self.assertEqual(len(self._all_events()), osd_count - pool_size)
+        self.assertEqual(
+            self._osd_in_out_completed_events_count('out'),
+            osd_count - pool_size)
+
+    def test_turn_off_module(self):
+        """
+        When the the module is turned off, there should not
+        be any on going events or completed events.
+        Also module should not accept any kind of Remote Event
+        coming in from other module, however, once it is turned
+        back, on creating an event should be working as it is.
+        """
+
+        pool_size = 3
+        self._setup_pool(size=pool_size)
+        self._write_some_data(self.WRITE_PERIOD)
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "off")
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                'osd', 'out', '0')
+
+        time.sleep(self.EVENT_CREATION_PERIOD)
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'in', '0')
+
+        time.sleep(self.EVENT_CREATION_PERIOD)
+
+        self.assertTrue(self._no_events_anywhere())
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "on")
+
+        self._write_some_data(self.WRITE_PERIOD)
+
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                'osd', 'out', '0')
+
+        # Wait for a progress event to pop up
+        self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
+                              timeout=self.EVENT_CREATION_PERIOD*2,
+                              period=1)
+
+        ev1 = self._get_osd_in_out_events('out')[0]
+
+        log.info(json.dumps(ev1, indent=1))
+
+        self.wait_until_true(lambda: self._is_complete(ev1['id']),
+                             timeout=self.RECOVERY_PERIOD)
+        self.assertTrue(self._is_quiet())
--- a/ceph/qa/tasks/radosgw_admin_rest.py
+++ b/ceph/qa/tasks/radosgw_admin_rest.py
@ -464,6 +464,11 @@ def task(ctx, config):
    assert out['usage']['rgw.main']['num_objects'] == 1
    assert out['usage']['rgw.main']['size_kb'] > 0

+    # TESTCASE 'bucket-stats6', 'bucket', 'stats', 'non-existent bucket', 'fails, 'bucket not found error'
+    (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : 'doesnotexist'})
+    assert ret == 404
+    assert out['Code'] == 'NoSuchBucket'
+
    # reclaim it
    key.delete()

--- a/ceph/qa/tasks/vstart_runner.py
+++ b/ceph/qa/tasks/vstart_runner.py
@ -1268,7 +1268,10 @@ class LocalContext(object):
                    self.daemons.daemons[prefixed_type][svc_id] = LocalDaemon(svc_type, svc_id)

    def __del__(self):
-        shutil.rmtree(self.teuthology_config['test_path'])
+        test_path = self.teuthology_config['test_path']
+        # opt_create_cluster_only does not create the test path
+        if test_path:
+            shutil.rmtree(test_path)

 def teardown_cluster():
    log.info('\ntearing down the cluster...')
--- a/ceph/qa/workunits/fs/misc/subvolume.sh
+++ b/ceph/qa/workunits/fs/misc/subvolume.sh
@ -0,0 +1,63 @@
+#!/bin/sh -x
+
+expect_failure() {
+	if "$@"; then return 1; else return 0; fi
+}
+
+set -e
+
+mkdir group
+mkdir group/subvol1
+
+setfattr -n ceph.dir.subvolume -v 1 group/subvol1
+
+# rename subvolume
+mv group/subvol1 group/subvol2
+
+# move file out of the subvolume
+touch group/subvol2/file1
+expect_failure python3 -c "import os; os.rename('group/subvol2/file1', 'group/file1')"
+# move file into the subvolume
+touch group/file2
+expect_failure python3 -c "import os; os.rename('group/file2', 'group/subvol2/file2')"
+
+# create hardlink within subvolume
+ln group/subvol2/file1 group/subvol2/file1_
+
+# create hardlink out of subvolume
+expect_failure ln group/subvol2/file1  group/file1_
+expect_failure ln group/file2 group/subvol1/file2_
+
+# create snapshot at subvolume root
+mkdir group/subvol2/.snap/s1
+
+# create snapshot at descendent dir of subvolume
+mkdir group/subvol2/dir
+expect_failure mkdir group/subvol2/dir/.snap/s2
+
+mkdir group/subvol3
+setfattr -n ceph.dir.subvolume -v 1 group/subvol3
+
+# move file across subvolumes
+expect_failure python3 -c "import os; os.rename('group/subvol2/file1', 'group/subvol3/file1')"
+
+# create hardlink across subvolumes
+expect_failure ln group/subvol2/file1 group/subvol3/file1
+
+# create subvolume inside existing subvolume
+expect_failure setfattr -n ceph.dir.subvolume -v 1 group/subvol2/dir
+
+# clear subvolume flag
+setfattr -n ceph.dir.subvolume -v 0 group/subvol2
+mkdir group/subvol2/dir/.snap/s2
+
+# parent subvolume override child subvolume
+setfattr -n ceph.dir.subvolume -v 1 group/subvol2/dir
+setfattr -n ceph.dir.subvolume -v 1 group/subvol2
+expect_failure mkdir group/subvol2/dir/.snap/s3
+
+rmdir group/subvol2/.snap/s1
+rmdir group/subvol2/dir/.snap/s2
+rm -rf group
+
+echo OK
--- a/ceph/qa/workunits/rbd/krbd_stable_pages_required.sh
+++ b/ceph/qa/workunits/rbd/krbd_stable_pages_required.sh
@ -8,7 +8,7 @@ function assert_dm() {

    local devno
    devno=$(sudo dmsetup info -c --noheadings -o Major,Minor $name)
-    grep -q $val /sys/dev/block/$devno/bdi/stable_pages_required
+    grep -q $val /sys/dev/block/$devno/queue/stable_writes
 }

 function dmsetup_reload() {
@ -22,7 +22,7 @@ function dmsetup_reload() {
    sudo dmsetup resume $name
 }

-IMAGE_NAME="stable-pages-required-test"
+IMAGE_NAME="stable-writes-test"

 rbd create --size 1 $IMAGE_NAME
 DEV=$(sudo rbd map $IMAGE_NAME)
@ -31,11 +31,11 @@ fallocate -l 1M loopfile
 LOOP_DEV=$(sudo losetup -f --show loopfile)

 [[ $(blockdev --getsize64 $DEV) -eq 1048576 ]]
-grep -q 1 /sys/block/${DEV#/dev/}/bdi/stable_pages_required
+grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes

 rbd resize --size 2 $IMAGE_NAME
 [[ $(blockdev --getsize64 $DEV) -eq 2097152 ]]
-grep -q 1 /sys/block/${DEV#/dev/}/bdi/stable_pages_required
+grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes

 cat <<EOF | sudo dmsetup create tbl
 0 1024 linear $LOOP_DEV 0
--- a/ceph/qa/workunits/rbd/rbd_mirror_helpers.sh
+++ b/ceph/qa/workunits/rbd/rbd_mirror_helpers.sh
@ -261,7 +261,7 @@ peer_add()
            peer_uuid=$(rbd mirror pool info --cluster ${cluster} --pool ${pool} --format xml | \
                xmlstarlet sel -t -v "//peers/peer[site_name='${remote_cluster}']/uuid")

-            rbd --cluster ${cluster} --pool ${pool} mirror pool peer remove ${peer_uuid}
+            CEPH_ARGS='' rbd --cluster ${cluster} --pool ${pool} mirror pool peer remove ${peer_uuid}
        else
            test $error_code -eq 0
            if [ -n "$uuid_var_name" ]; then
--- a/ceph/src/.git_version
+++ b/ceph/src/.git_version
@ -1,2 +1,2 @@
-bdf3eebcd22d7d0b3dd4d5501bee5bac354d5b55
-15.2.8
+357616cbf726abb779ca75a551e8d02568e15b17
+15.2.9
--- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/create.py
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/create.py
@ -42,7 +42,7 @@ class Create(object):
        Create an OSD by assigning an ID and FSID, registering them with the
        cluster with an ID and FSID, formatting and mounting the volume, adding
        all the metadata to the logical volumes using LVM tags, and starting
-        the OSD daemon. This is a convinience command that combines the prepare
+        the OSD daemon. This is a convenience command that combines the prepare
        and activate steps.

        Encryption is supported via dmcrypt and the --dmcrypt flag.
--- a/ceph/src/ceph_fuse.cc
+++ b/ceph/src/ceph_fuse.cc
@ -167,9 +167,8 @@ int main(int argc, const char **argv, const char *envp[]) {
  }

  {
-    g_ceph_context->_conf.finalize_reexpand_meta();
    common_init_finish(g_ceph_context);
-   
+
    init_async_signal_handler();
    register_async_signal_handler(SIGHUP, sighup_handler);

--- a/ceph/src/cephadm/cephadm
+++ b/ceph/src/cephadm/cephadm
@ -48,7 +48,6 @@ import os
 import platform
 import pwd
 import random
-import re
 import select
 import shutil
 import socket
@ -59,6 +58,7 @@ import tempfile
 import time
 import errno
 import struct
+from enum import Enum
 try:
    from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO
 except ImportError:
@ -93,7 +93,7 @@ if sys.version_info > (3, 0):
 container_path = ''
 cached_stdin = None

-DATEFMT = '%Y-%m-%dT%H:%M:%S.%f'
+DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'

 # Log and console output config
 logging_config = {
@ -924,12 +924,22 @@ class FileLock(object):
 ##################################
 # Popen wrappers, lifted from ceph-volume

-def call(command,  # type: List[str]
-         desc=None,  # type: Optional[str]
-         verbose=False,  # type: bool
-         verbose_on_failure=True,  # type: bool
-         timeout=DEFAULT_TIMEOUT,  # type: Optional[int]
-         **kwargs):
+class CallVerbosity(Enum):
+    SILENT = 0
+    # log stdout/stderr to logger.debug
+    DEBUG = 1
+    # On a non-zero exit status, it will forcefully set
+    # logging ON for the terminal
+    VERBOSE_ON_FAILURE = 2
+    # log at info (instead of debug) level.
+    VERBOSE = 3
+
+
+def call(command: List[str],
+         desc: Optional[str] = None,
+         verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
+         timeout: Optional[int] = DEFAULT_TIMEOUT,
+         **kwargs) -> Tuple[str, str, int]:
    """
    Wrap subprocess.Popen to

@ -937,14 +947,12 @@ def call(command,  # type: List[str]
    - decode utf-8
    - cleanly return out, err, returncode

-    If verbose=True, log at info (instead of debug) level.
-
-    :param verbose_on_failure: On a non-zero exit status, it will forcefully set
-                               logging ON for the terminal
    :param timeout: timeout in seconds
    """
-    if not desc:
+    if desc is None:
        desc = command[0]
+    if desc:
+        desc += ': '
    timeout = timeout or args.timeout

    logger.debug("Running command: %s" % ' '.join(command))
@ -977,7 +985,7 @@ def call(command,  # type: List[str]
        if end_time and (time.time() >= end_time):
            stop = True
            if process.poll() is None:
-                logger.info(desc + ':timeout after %s seconds' % timeout)
+                logger.info(desc + 'timeout after %s seconds' % timeout)
                process.kill()
        if reads and process.poll() is not None:
            # we want to stop, but first read off anything remaining
@ -1007,55 +1015,58 @@ def call(command,  # type: List[str]
                    lines = message.split('\n')
                    out_buffer = lines.pop()
                    for line in lines:
-                        if verbose:
-                            logger.info(desc + ':stdout ' + line)
-                        else:
-                            logger.debug(desc + ':stdout ' + line)
+                        if verbosity == CallVerbosity.VERBOSE:
+                            logger.info(desc + 'stdout ' + line)
+                        elif verbosity != CallVerbosity.SILENT:
+                            logger.debug(desc + 'stdout ' + line)
                elif fd == process.stderr.fileno():
                    err += message
                    message = err_buffer + message
                    lines = message.split('\n')
                    err_buffer = lines.pop()
                    for line in lines:
-                        if verbose:
-                            logger.info(desc + ':stderr ' + line)
-                        else:
-                            logger.debug(desc + ':stderr ' + line)
+                        if verbosity == CallVerbosity.VERBOSE:
+                            logger.info(desc + 'stderr ' + line)
+                        elif verbosity != CallVerbosity.SILENT:
+                            logger.debug(desc + 'stderr ' + line)
                else:
                    assert False
            except (IOError, OSError):
                pass
-        if verbose:
-            logger.debug(desc + ':profile rt=%s, stop=%s, exit=%s, reads=%s'
+        if verbosity == CallVerbosity.VERBOSE:
+            logger.debug(desc + 'profile rt=%s, stop=%s, exit=%s, reads=%s'
                % (time.time()-start_time, stop, process.poll(), reads))

    returncode = process.wait()

    if out_buffer != '':
-        if verbose:
-            logger.info(desc + ':stdout ' + out_buffer)
-        else:
-            logger.debug(desc + ':stdout ' + out_buffer)
+        if verbosity == CallVerbosity.VERBOSE:
+            logger.info(desc + 'stdout ' + out_buffer)
+        elif verbosity != CallVerbosity.SILENT:
+            logger.debug(desc + 'stdout ' + out_buffer)
    if err_buffer != '':
-        if verbose:
-            logger.info(desc + ':stderr ' + err_buffer)
-        else:
-            logger.debug(desc + ':stderr ' + err_buffer)
+        if verbosity == CallVerbosity.VERBOSE:
+            logger.info(desc + 'stderr ' + err_buffer)
+        elif verbosity != CallVerbosity.SILENT:
+            logger.debug(desc + 'stderr ' + err_buffer)

-    if returncode != 0 and verbose_on_failure and not verbose:
+    if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE:
        # dump stdout + stderr
        logger.info('Non-zero exit code %d from %s' % (returncode, ' '.join(command)))
        for line in out.splitlines():
-            logger.info(desc + ':stdout ' + line)
+            logger.info(desc + 'stdout ' + line)
        for line in err.splitlines():
-            logger.info(desc + ':stderr ' + line)
+            logger.info(desc + 'stderr ' + line)

    return out, err, returncode


-def call_throws(command, **kwargs):
-    # type: (List[str], Any) -> Tuple[str, str, int]
-    out, err, ret = call(command, **kwargs)
+def call_throws(command: List[str],
+         desc: Optional[str] = None,
+         verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
+         timeout: Optional[int] = DEFAULT_TIMEOUT,
+         **kwargs) -> Tuple[str, str, int]:
+    out, err, ret = call(command, desc, verbosity, timeout, **kwargs)
    if ret:
        raise RuntimeError('Failed command: %s' % ' '.join(command))
    return out, err, ret
@ -1166,7 +1177,7 @@ def get_file_timestamp(fn):
        return datetime.datetime.fromtimestamp(
            mt, tz=datetime.timezone.utc
        ).strftime(DATEFMT)
-    except Exception as e:
+    except Exception:
        return None


@ -1188,11 +1199,11 @@ def try_convert_datetime(s):
    p = re.compile(r'(\.[\d]{6})[\d]*')
    s = p.sub(r'\1', s)

-    # replace trailling Z with -0000, since (on python 3.6.8) it won't parse
+    # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
    if s and s[-1] == 'Z':
        s = s[:-1] + '-0000'

-    # cut off the redundnat 'CST' part that strptime can't parse, if
+    # cut off the redundant 'CST' part that strptime can't parse, if
    # present.
    v = s.split(' ')
    s = ' '.join(v[0:3])
@ -1409,13 +1420,16 @@ def get_last_local_ceph_image():
        [container_path, 'images',
         '--filter', 'label=ceph=True',
         '--filter', 'dangling=false',
-         '--format', '{{.Repository}} {{.Tag}}'])
-    for line in out.splitlines():
-        if len(line.split()) == 2:
-            repository, tag = line.split()
-            r = '{}:{}'.format(repository, tag)
-            logger.info('Using recent ceph image %s' % r)
-            return r
+         '--format', '{{.Repository}}@{{.Digest}}'])
+    return _filter_last_local_ceph_image(out)
+
+
+def _filter_last_local_ceph_image(out):
+    # str -> Optional[str]
+    for image in out.splitlines():
+        if image and not image.endswith('@'):
+            logger.info('Using recent ceph image %s' % image)
+            return image
    return None


@ -1627,7 +1641,7 @@ def check_unit(unit_name):
    installed = False
    try:
        out, err, code = call(['systemctl', 'is-enabled', unit_name],
-                              verbose_on_failure=False)
+                              verbosity=CallVerbosity.DEBUG)
        if code == 0:
            enabled = True
            installed = True
@ -1641,7 +1655,7 @@ def check_unit(unit_name):
    state = 'unknown'
    try:
        out, err, code = call(['systemctl', 'is-active', unit_name],
-                              verbose_on_failure=False)
+                              verbosity=CallVerbosity.DEBUG)
        out = out.strip()
        if out in ['active']:
            state = 'running'
@ -2177,10 +2191,10 @@ def _write_container_cmd_to_bash(file_obj, container, comment=None, background=F
        # unit file, makes it easier to read and grok.
        file_obj.write('# ' + comment + '\n')
    # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
-    file_obj.write('! '+ ' '.join(container.rm_cmd()) + '\n')
+    file_obj.write('! '+ ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
    # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
    if 'podman' in container_path:
-        file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + '\n')
+        file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + ' 2> /dev/null\n')

    # container run command
    file_obj.write(' '.join(container.run_cmd()) + (' &' if background else '') + '\n')
@ -2292,9 +2306,9 @@ def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,

    unit_name = get_unit_name(fsid, daemon_type, daemon_id)
    call(['systemctl', 'stop', unit_name],
-         verbose_on_failure=False)
+         verbosity=CallVerbosity.DEBUG)
    call(['systemctl', 'reset-failed', unit_name],
-         verbose_on_failure=False)
+         verbosity=CallVerbosity.DEBUG)
    if enable:
        call_throws(['systemctl', 'enable', unit_name])
    if start:
@ -2339,7 +2353,7 @@ class Firewalld(object):
        else:
            return

-        out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbose_on_failure=False)
+        out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
        if ret:
            logger.info('Enabling firewalld service %s in current zone...' % svc)
            out, err, ret = call([self.cmd, '--permanent', '--add-service', svc])
@ -2357,7 +2371,7 @@ class Firewalld(object):

        for port in fw_ports:
            tcp_port = str(port) + '/tcp'
-            out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbose_on_failure=False)
+            out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
            if ret:
                logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
                out, err, ret = call([self.cmd, '--permanent', '--add-port', tcp_port])
@ -2367,6 +2381,7 @@ class Firewalld(object):
            else:
                logger.debug('firewalld port %s is enabled in current zone' % tcp_port)

+            out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbose_on_failure=False)
    def apply_rules(self):
        # type: () -> None
        if not self.available:
@ -2485,7 +2500,6 @@ Before=ceph-{fsid}.target
 LimitNOFILE=1048576
 LimitNPROC=1048576
 EnvironmentFile=-/etc/environment
-ExecStartPre=-{container_path} rm ceph-{fsid}-%i
 ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
 ExecStop=-{container_path} stop ceph-{fsid}-%i
 ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
@ -2792,7 +2806,14 @@ def command_bootstrap():
                              '--allow-overwrite to overwrite' % f)
        dirname = os.path.dirname(f)
        if dirname and not os.path.exists(dirname):
-            raise Error('%s directory %s does not exist' % (f, dirname))
+            fname = os.path.basename(f)
+            logger.info(f"Creating directory {dirname} for {fname}")
+            try:
+                # use makedirs to create intermediate missing dirs
+                os.makedirs(dirname, 0o755)
+            except PermissionError:
+                raise Error(f"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.")
+

    if not args.skip_prepare_host:
        command_prepare_host()
@ -3608,7 +3629,7 @@ def command_ceph_volume():
        privileged=True,
        volume_mounts=mounts,
    )
-    out, err, code = call_throws(c.run_cmd(), verbose=True)
+    out, err, code = call_throws(c.run_cmd(), verbosity=CallVerbosity.VERBOSE)
    if not code:
        print(out)

@ -3626,7 +3647,10 @@ def command_unit():
    call_throws([
        'systemctl',
        args.command,
-        unit_name])
+        unit_name],
+        verbosity=CallVerbosity.VERBOSE,
+        desc=''
+    )

 ##################################

@ -3813,7 +3837,7 @@ def list_daemons(detail=True, legacy_dir=None):
                                '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field,
                                'ceph-%s-%s' % (fsid, j)
                            ],
-                            verbose_on_failure=False)
+                            verbosity=CallVerbosity.DEBUG)
                        if not code:
                            (container_id, image_name, image_id, start,
                             version) = out.strip().split(',')
@ -3975,7 +3999,7 @@ class AdoptOsd(object):
            args=['lvm', 'list', '--format=json'],
            privileged=True
        )
-        out, err, code = call_throws(c.run_cmd(), verbose=False)
+        out, err, code = call_throws(c.run_cmd())
        if not code:
            try:
                js = json.loads(out)
@ -4305,11 +4329,11 @@ def command_rm_daemon():
                      'this command may destroy precious data!')

    call(['systemctl', 'stop', unit_name],
-         verbose_on_failure=False)
+         verbosity=CallVerbosity.DEBUG)
    call(['systemctl', 'reset-failed', unit_name],
-         verbose_on_failure=False)
+         verbosity=CallVerbosity.DEBUG)
    call(['systemctl', 'disable', unit_name],
-         verbose_on_failure=False)
+         verbosity=CallVerbosity.DEBUG)
    data_dir = get_data_dir(args.fsid, daemon_type, daemon_id)
    if daemon_type in ['mon', 'osd', 'prometheus'] and \
       not args.force_delete_data:
@ -4344,25 +4368,25 @@ def command_rm_cluster():
            continue
        unit_name = get_unit_name(args.fsid, d['name'])
        call(['systemctl', 'stop', unit_name],
-             verbose_on_failure=False)
+             verbosity=CallVerbosity.DEBUG)
        call(['systemctl', 'reset-failed', unit_name],
-             verbose_on_failure=False)
+             verbosity=CallVerbosity.DEBUG)
        call(['systemctl', 'disable', unit_name],
-             verbose_on_failure=False)
+             verbosity=CallVerbosity.DEBUG)

    # cluster units
    for unit_name in ['ceph-%s.target' % args.fsid]:
        call(['systemctl', 'stop', unit_name],
-             verbose_on_failure=False)
+             verbosity=CallVerbosity.DEBUG)
        call(['systemctl', 'reset-failed', unit_name],
-             verbose_on_failure=False)
+             verbosity=CallVerbosity.DEBUG)
        call(['systemctl', 'disable', unit_name],
-             verbose_on_failure=False)
+             verbosity=CallVerbosity.DEBUG)

    slice_name = 'system-%s.slice' % (('ceph-%s' % args.fsid).replace('-',
                                                                      '\\x2d'))
    call(['systemctl', 'stop', slice_name],
-         verbose_on_failure=False)
+         verbosity=CallVerbosity.DEBUG)

    # rm units
    call_throws(['rm', '-f', args.unit_dir +
@ -4655,13 +4679,13 @@ class Apt(Packager):

    def install(self, ls):
        logger.info('Installing packages %s...' % ls)
-        call_throws(['apt', 'install', '-y'] + ls)
+        call_throws(['apt-get', 'install', '-y'] + ls)

    def install_podman(self):
        if self.distro == 'ubuntu':
            logger.info('Setting up repo for podman...')
            self.add_kubic_repo()
-            call_throws(['apt', 'update'])
+            call_throws(['apt-get', 'update'])

        logger.info('Attempting podman install...')
        try:
@ -5436,7 +5460,6 @@ class HostFacts():
        up_secs, _ = raw_time.split()
        return float(up_secs)

-    @property
    def kernel_security(self):
        # type: () -> Dict[str, str]
        """Determine the security features enabled in the kernel - SELinux, AppArmor"""
@ -5501,6 +5524,23 @@ class HostFacts():
            "description": "Linux Security Module framework is not available"
        }

+    @property
+    def kernel_parameters(self):
+        # type: () -> Dict[str, str]
+        """Get kernel parameters required/used in Ceph clusters"""
+
+        k_param = {}
+        out, _, _ = call_throws(['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
+        if out:
+            param_list = out.split('\n')
+            param_dict = { param.split(" = ")[0]:param.split(" = ")[-1] for param in param_list}
+
+            # return only desired parameters
+            if 'net.ipv4.ip_nonlocal_bind' in param_dict:
+                k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
+
+        return k_param
+
    def dump(self):
        # type: () -> str
        """Return the attributes of this HostFacts object as json"""
--- a/ceph/src/cephadm/tests/test_cephadm.py
+++ b/ceph/src/cephadm/tests/test_cephadm.py
@ -278,6 +278,15 @@ default via fe80::2480:28ec:5097:3fe2 dev wlp2s0 proto ra metric 20600 pref medi
        result = cd.dict_get_join({'a': 1}, 'a')
        assert result == 1

+    def test_last_local_images(self):
+        out = '''
+docker.io/ceph/daemon-base@
+docker.io/ceph/ceph:v15.2.5
+docker.io/ceph/daemon-base:octopus
+        '''
+        image = cd._filter_last_local_ceph_image(out)
+        assert image == 'docker.io/ceph/ceph:v15.2.5'
+

 class TestCustomContainer(unittest.TestCase):
    cc: cd.CustomContainer
--- a/ceph/src/client/Client.cc
+++ b/ceph/src/client/Client.cc
@ -124,6 +124,8 @@

 #define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED)

+using namespace TOPNSPC::common;
+
 void client_flush_set_callback(void *p, ObjectCacher::ObjectSet *oset)
 {
  Client *client = static_cast<Client*>(p);
@ -150,9 +152,11 @@ int Client::CommandHook::call(
    std::lock_guard l{m_client->client_lock};
    if (command == "mds_requests")
      m_client->dump_mds_requests(f);
-    else if (command == "mds_sessions")
-      m_client->dump_mds_sessions(f);
-    else if (command == "dump_cache")
+    else if (command == "mds_sessions") {
+      bool cap_dump = false;
+      cmd_getval(cmdmap, "cap_dump", cap_dump);
+      m_client->dump_mds_sessions(f, cap_dump);
+    } else if (command == "dump_cache")
      m_client->dump_cache(f);
    else if (command == "kick_stale_sessions")
      m_client->_kick_stale_sessions();
@ -468,6 +472,7 @@ void Client::dump_status(Formatter *f)
    f->dump_int("osd_epoch", osd_epoch);
    f->dump_int("osd_epoch_barrier", cap_epoch_barrier);
    f->dump_bool("blacklisted", blacklisted);
+    f->dump_string("fs_name", mdsmap->get_fs_name());
  }
 }

@ -519,7 +524,8 @@ void Client::_finish_init()
    lderr(cct) << "error registering admin socket command: "
 	       << cpp_strerror(-ret) << dendl;
  }
-  ret = admin_socket->register_command("mds_sessions",
+  ret = admin_socket->register_command("mds_sessions "
+		                       "name=cap_dump,type=CephBool,req=false",
 				       &m_command_hook,
 				       "show mds session state");
  if (ret < 0) {
@ -1534,7 +1540,7 @@ void Client::connect_mds_targets(mds_rank_t mds)
  }
 }

-void Client::dump_mds_sessions(Formatter *f)
+void Client::dump_mds_sessions(Formatter *f, bool cap_dump)
 {
  f->dump_int("id", get_nodeid().v);
  entity_inst_t inst(messenger->get_myname(), messenger->get_myaddr_legacy());
@ -1544,7 +1550,7 @@ void Client::dump_mds_sessions(Formatter *f)
  f->open_array_section("sessions");
  for (const auto &p : mds_sessions) {
    f->open_object_section("session");
-    p.second.dump(f);
+    p.second.dump(f, cap_dump);
    f->close_section();
  }
  f->close_section();
@ -1912,6 +1918,7 @@ void Client::encode_dentry_release(Dentry *dn, MetaRequest *req,
    rel.item.dname_len = dn->name.length();
    rel.item.dname_seq = dn->lease_seq;
    rel.dname = dn->name;
+    dn->lease_mds = -1;
  }
  ldout(cct, 25) << __func__ << " exit(dn:"
 	   << dn << ")" << dendl;
@ -4211,7 +4218,7 @@ void Client::remove_session_caps(MetaSession *s, int err)

 int Client::_do_remount(bool retry_on_error)
 {
-  uint64_t max_retries = g_conf().get_val<uint64_t>("mds_max_retries_on_remount_failure");
+  uint64_t max_retries = cct->_conf.get_val<uint64_t>("mds_max_retries_on_remount_failure");

  errno = 0;
  int r = remount_cb(callback_handle);
@ -7345,7 +7352,7 @@ unsigned Client::statx_to_mask(unsigned int flags, unsigned int want)
    mask |= CEPH_CAP_AUTH_SHARED;
  if (want & (CEPH_STATX_NLINK|CEPH_STATX_CTIME|CEPH_STATX_VERSION))
    mask |= CEPH_CAP_LINK_SHARED;
-  if (want & (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION))
+  if (want & (CEPH_STATX_NLINK|CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION))
    mask |= CEPH_CAP_FILE_SHARED;
  if (want & (CEPH_STATX_VERSION|CEPH_STATX_CTIME))
    mask |= CEPH_CAP_XATTR_SHARED;
@ -8130,6 +8137,7 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p,

  string dn_name;
  while (true) {
+    int mask = caps;
    if (!dirp->inode->is_complete_and_ordered())
      return -EAGAIN;
    if (pd == dir->readdir_cache.end())
@ -8147,7 +8155,10 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p,
    }

    int idx = pd - dir->readdir_cache.begin();
-    int r = _getattr(dn->inode, caps, dirp->perms);
+    if (dn->inode->is_dir()) {
+      mask |= CEPH_STAT_RSTAT;
+    }
+    int r = _getattr(dn->inode, mask, dirp->perms);
    if (r < 0)
      return r;
    
@ -8231,7 +8242,7 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p,
    uint64_t next_off = 1;

    int r;
-    r = _getattr(diri, caps, dirp->perms);
+    r = _getattr(diri, caps | CEPH_STAT_RSTAT, dirp->perms);
    if (r < 0)
      return r;

@ -8264,7 +8275,7 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p,
      in = diri->get_first_parent()->dir->parent_inode;

    int r;
-    r = _getattr(in, caps, dirp->perms);
+    r = _getattr(in, caps | CEPH_STAT_RSTAT, dirp->perms);
    if (r < 0)
      return r;

@ -8330,7 +8341,11 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p,

      int r;
      if (check_caps) {
-	r = _getattr(entry.inode, caps, dirp->perms);
+	int mask = caps;
+	if(entry.inode->is_dir()){
+          mask |= CEPH_STAT_RSTAT;
+	}
+	r = _getattr(entry.inode, mask, dirp->perms);
 	if (r < 0)
 	  return r;
      }
@ -9224,7 +9239,7 @@ int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl)
  int want, have = 0;
  bool movepos = false;
  std::unique_ptr<C_SaferCond> onuninline;
-  int64_t r = 0;
+  int64_t rc = 0;
  const auto& conf = cct->_conf;
  Inode *in = f->inode.get();
  utime_t lat;
@ -9242,8 +9257,9 @@ int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl)
  loff_t start_pos = offset;

  if (in->inline_version == 0) {
-    r = _getattr(in, CEPH_STAT_CAP_INLINE_DATA, f->actor_perms, true);
+    auto r = _getattr(in, CEPH_STAT_CAP_INLINE_DATA, f->actor_perms, true);
    if (r < 0) {
+      rc = r;
      goto done;
    }
    ceph_assert(in->inline_version > 0);
@ -9254,9 +9270,12 @@ retry:
    want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
  else
    want = CEPH_CAP_FILE_CACHE;
-  r = get_caps(f, CEPH_CAP_FILE_RD, want, &have, -1);
-  if (r < 0) {
-    goto done;
+  {
+    auto r = get_caps(f, CEPH_CAP_FILE_RD, want, &have, -1);
+    if (r < 0) {
+      rc = r;
+      goto done;
+    }
  }
  if (f->flags & O_DIRECT)
    have &= ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO);
@ -9278,12 +9297,12 @@ retry:
          bl->substr_of(in->inline_data, offset, len - offset);
          bl->append_zero(endoff - len);
        }
-        r = endoff - offset;
+        rc = endoff - offset;
      } else if ((uint64_t)offset < endoff) {
        bl->append_zero(endoff - offset);
-        r = endoff - offset;
+        rc = endoff - offset;
      } else {
-        r = 0;
+        rc = 0;
      }
      goto success;
    }
@ -9296,27 +9315,31 @@ retry:
    if (f->flags & O_RSYNC) {
      _flush_range(in, offset, size);
    }
-    r = _read_async(f, offset, size, bl);
-    if (r < 0)
+    rc = _read_async(f, offset, size, bl);
+    if (rc < 0)
      goto done;
  } else {
    if (f->flags & O_DIRECT)
      _flush_range(in, offset, size);

    bool checkeof = false;
-    r = _read_sync(f, offset, size, bl, &checkeof);
-    if (r < 0)
+    rc = _read_sync(f, offset, size, bl, &checkeof);
+    if (rc < 0)
      goto done;
    if (checkeof) {
-      offset += r;
-      size -= r;
+      offset += rc;
+      size -= rc;

      put_cap_ref(in, CEPH_CAP_FILE_RD);
      have = 0;
      // reverify size
-      r = _getattr(in, CEPH_STAT_CAP_SIZE, f->actor_perms);
-      if (r < 0)
-	goto done;
+      {
+        auto r = _getattr(in, CEPH_STAT_CAP_SIZE, f->actor_perms);
+        if (r < 0) {
+          rc = r;
+          goto done;
+        }
+      }

      // eof?  short read.
      if ((uint64_t)offset < in->size)
@ -9325,10 +9348,10 @@ retry:
  }

 success:
-  ceph_assert(r >= 0);
+  ceph_assert(rc >= 0);
  if (movepos) {
    // adjust fd pos
-    f->pos = start_pos + r;
+    f->pos = start_pos + rc;
  }
  
  lat = ceph_clock_now();
@ -9348,7 +9371,7 @@ done:
      in->mark_caps_dirty(CEPH_CAP_FILE_WR);
      check_caps(in, 0);
    } else
-      r = ret;
+      rc = ret;
  }
  if (have) {
    put_cap_ref(in, CEPH_CAP_FILE_RD);
@ -9356,7 +9379,7 @@ done:
  if (movepos) {
    unlock_fh_pos(f);
  }
-  return r;
+  return rc;
 }

 Client::C_Readahead::C_Readahead(Client *c, Fh *f) :
@ -9874,6 +9897,8 @@ int Client::ftruncate(int fd, loff_t length, const UserPerm& perms)
  if (f->flags & O_PATH)
    return -EBADF;
 #endif
+  if ((f->mode & CEPH_FILE_MODE_WR) == 0)
+    return -EBADF;
  struct stat attr;
  attr.st_size = length;
  return _setattr(f->inode, &attr, CEPH_SETATTR_SIZE, perms);
@ -11463,6 +11488,9 @@ int Client::_getxattr(Inode *in, const char *name, void *value, size_t size,
    if (vxattr->flags & VXATTR_RSTAT) {
      flags |= CEPH_STAT_RSTAT;
    }
+    if (vxattr->flags & VXATTR_DIRSTAT) {
+      flags |= CEPH_CAP_FILE_SHARED;
+    }
    r = _getattr(in, flags, perms, true);
    if (r != 0) {
      // Error from getattr!
@ -11998,18 +12026,21 @@ size_t Client::_vxattrcb_snap_btime(Inode *in, char *val, size_t size)
      (long unsigned)in->snap_btime.nsec());
 }

+size_t Client::_vxattrcb_cluster_fsid(Inode *in, char *val, size_t size)
+{
+  return snprintf(val, size, "%s", monclient->get_fsid().to_string().c_str());
+}
+
+size_t Client::_vxattrcb_client_id(Inode *in, char *val, size_t size)
+{
+  auto name = messenger->get_myname();
+  return snprintf(val, size, "%s%ld", name.type_str(), name.num());
+}
+
 #define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name
 #define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2

-#define XATTR_NAME_CEPH(_type, _name)				\
-{								\
-  name: CEPH_XATTR_NAME(_type, _name),				\
-  getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name,	\
-  readonly: true,						\
-  exists_cb: NULL,						\
-  flags: 0,                                                     \
-}
-#define XATTR_NAME_CEPH2(_type, _name, _flags)                 \
+#define XATTR_NAME_CEPH(_type, _name, _flags)                 \
 {                                                              \
  name: CEPH_XATTR_NAME(_type, _name),                         \
  getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name,     \
@ -12047,14 +12078,14 @@ const Client::VXattr Client::_dir_vxattrs[] = {
  XATTR_LAYOUT_FIELD(dir, layout, object_size),
  XATTR_LAYOUT_FIELD(dir, layout, pool),
  XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
-  XATTR_NAME_CEPH(dir, entries),
-  XATTR_NAME_CEPH(dir, files),
-  XATTR_NAME_CEPH(dir, subdirs),
-  XATTR_NAME_CEPH2(dir, rentries, VXATTR_RSTAT),
-  XATTR_NAME_CEPH2(dir, rfiles, VXATTR_RSTAT),
-  XATTR_NAME_CEPH2(dir, rsubdirs, VXATTR_RSTAT),
-  XATTR_NAME_CEPH2(dir, rbytes, VXATTR_RSTAT),
-  XATTR_NAME_CEPH2(dir, rctime, VXATTR_RSTAT),
+  XATTR_NAME_CEPH(dir, entries, VXATTR_DIRSTAT),
+  XATTR_NAME_CEPH(dir, files, VXATTR_DIRSTAT),
+  XATTR_NAME_CEPH(dir, subdirs, VXATTR_DIRSTAT),
+  XATTR_NAME_CEPH(dir, rentries, VXATTR_RSTAT),
+  XATTR_NAME_CEPH(dir, rfiles, VXATTR_RSTAT),
+  XATTR_NAME_CEPH(dir, rsubdirs, VXATTR_RSTAT),
+  XATTR_NAME_CEPH(dir, rbytes, VXATTR_RSTAT),
+  XATTR_NAME_CEPH(dir, rctime, VXATTR_RSTAT),
  {
    name: "ceph.quota",
    getxattr_cb: &Client::_vxattrcb_quota,
@ -12104,6 +12135,24 @@ const Client::VXattr Client::_file_vxattrs[] = {
  { name: "" }     /* Required table terminator */
 };

+const Client::VXattr Client::_common_vxattrs[] = {
+  {
+    name: "ceph.cluster_fsid",
+    getxattr_cb: &Client::_vxattrcb_cluster_fsid,
+    readonly: true,
+    exists_cb: nullptr,
+    flags: 0,
+  },
+  {
+    name: "ceph.client_id",
+    getxattr_cb: &Client::_vxattrcb_client_id,
+    readonly: true,
+    exists_cb: nullptr,
+    flags: 0,
+  },
+  { name: "" }     /* Required table terminator */
+};
+
 const Client::VXattr *Client::_get_vxattrs(Inode *in)
 {
  if (in->is_dir())
@ -12124,7 +12173,16 @@ const Client::VXattr *Client::_match_vxattr(Inode *in, const char *name)
 	vxattr++;
      }
    }
+
+    // for common vxattrs
+    vxattr = _common_vxattrs;
+    while (!vxattr->name.empty()) {
+      if (vxattr->name == name)
+        return vxattr;
+      vxattr++;
+    }
  }
+
  return NULL;
 }

--- a/ceph/src/client/Client.h
+++ b/ceph/src/client/Client.h
@ -778,7 +778,7 @@ protected:
  void _sync_write_commit(Inode *in);

  void dump_mds_requests(Formatter *f);
-  void dump_mds_sessions(Formatter *f);
+  void dump_mds_sessions(Formatter *f, bool cap_dump=false);

  int make_request(MetaRequest *req, const UserPerm& perms,
 		   InodeRef *ptarget = 0, bool *pcreated = 0,
@ -1007,9 +1007,11 @@ private:

  /* Flags for VXattr */
  static const unsigned VXATTR_RSTAT = 0x1;
+  static const unsigned VXATTR_DIRSTAT = 0x2;

  static const VXattr _dir_vxattrs[];
  static const VXattr _file_vxattrs[];
+  static const VXattr _common_vxattrs[];



@ -1159,6 +1161,9 @@ private:
  bool _vxattrcb_snap_btime_exists(Inode *in);
  size_t _vxattrcb_snap_btime(Inode *in, char *val, size_t size);

+  size_t _vxattrcb_cluster_fsid(Inode *in, char *val, size_t size);
+  size_t _vxattrcb_client_id(Inode *in, char *val, size_t size);
+
  static const VXattr *_get_vxattrs(Inode *in);
  static const VXattr *_match_vxattr(Inode *in, const char *name);

--- a/ceph/src/client/MetaSession.cc
+++ b/ceph/src/client/MetaSession.cc
@ -5,6 +5,7 @@
 #include "messages/MClientCapRelease.h"

 #include "MetaSession.h"
+#include "Inode.h"

 #include "common/Formatter.h"

@ -21,7 +22,7 @@ const char *MetaSession::get_state_name() const
  }
 }

-void MetaSession::dump(Formatter *f) const
+void MetaSession::dump(Formatter *f, bool cap_dump) const
 {
  f->dump_int("mds", mds_num);
  f->dump_object("addrs", addrs);
@ -31,6 +32,13 @@ void MetaSession::dump(Formatter *f) const
  f->dump_stream("last_cap_renew_request") << last_cap_renew_request;
  f->dump_unsigned("cap_renew_seq", cap_renew_seq);
  f->dump_int("num_caps", caps.size());
+  if (cap_dump) {
+    f->open_array_section("caps");
+    for (const auto& cap : caps) {
+      f->dump_object("cap", *cap);
+    }
+    f->close_section();
+  }
  f->dump_string("state", get_state_name());
 }

--- a/ceph/src/client/MetaSession.h
+++ b/ceph/src/client/MetaSession.h
@ -66,7 +66,7 @@ struct MetaSession {

  const char *get_state_name() const;

-  void dump(Formatter *f) const;
+  void dump(Formatter *f, bool cap_dump=false) const;

  void enqueue_cap_release(inodeno_t ino, uint64_t cap_id, ceph_seq_t iseq,
      ceph_seq_t mseq, epoch_t osd_barrier);
--- a/Show More
+++ b/Show More