import 14.2.11 upstream release

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
2025-07-11 10:47:40 +00:00 · 2020-08-12 09:35:12 +02:00 · 2020-08-12 09:35:12 +02:00 · 141ee55dc7
commit 141ee55dc7
parent aae9ebf445
261 changed files with 8771 additions and 1904 deletions
--- a/ceph/CMakeLists.txt
+++ b/ceph/CMakeLists.txt
@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.5.1)

 project(ceph CXX C ASM)
-set(VERSION 14.2.10)
+set(VERSION 14.2.11)

 if(POLICY CMP0028)
  cmake_policy(SET CMP0028 NEW)
--- a/ceph/PendingReleaseNotes
+++ b/ceph/PendingReleaseNotes
@ -1,38 +1,14 @@
-14.2.9
------
+>= 14.2.11
+----------

-* Bucket notifications now support Kafka endpoints. This requires librdkafka of
-  version 0.9.2 and up. Note that Ubuntu 16.04.6 LTS (Xenial Xerus) has an older
-  version of librdkafka, and would require an update to the library.
+* RGW: The ``radosgw-admin`` sub-commands dealing with orphans --
+  ``radosgw-admin orphans find``, ``radosgw-admin orphans finish``,
+  ``radosgw-admin orphans list-jobs`` -- have been deprecated. They
+  have not been actively maintained and they store intermediate
+  results on the cluster, which could fill a nearly-full cluster.
+  They have been replaced by a tool, currently considered
+  experimental, ``rgw-orphan-list``.

-* The pool parameter ``target_size_ratio``, used by the pg autoscaler,
-  has changed meaning. It is now normalized across pools, rather than
-  specifying an absolute ratio. For details, see :ref:`pg-autoscaler`.
-  If you have set target size ratios on any pools, you may want to set
-  these pools to autoscale ``warn`` mode to avoid data movement during
-  the upgrade::
-
-    ceph osd pool set <pool-name> pg_autoscale_mode warn
-
-* The behaviour of the ``-o`` argument to the rados tool has been reverted to
-  its orignal behaviour of indicating an output file. This reverts it to a more
-  consistent behaviour when compared to other tools. Specifying object size is now
-  accomplished by using an upper case O ``-O``.
-
-* The format of MDSs in `ceph fs dump` has changed.
-
-* Ceph will issue a health warning if a RADOS pool's ``size`` is set to 1
-  or in other words the pool is configured with no redundancy. This can
-  be fixed by setting the pool size to the minimum recommended value
-  with::
-
-    ceph osd pool set <pool-name> size <num-replicas>
-
-  The warning can be silenced with::
-
-    ceph config set global mon_warn_on_pool_no_redundancy false
-
-* RGW: bucket listing performance on sharded bucket indexes has been
-  notably improved by heuristically -- and significantly, in many
-  cases -- reducing the number of entries requested from each bucket
-  index shard.
+* Now when noscrub and/or nodeep-scrub flags are set globally or per pool,
+  scheduled scrubs of the type disabled will be aborted. All user initiated
+  scrubs are NOT interrupted.
--- a/ceph/alpine/APKBUILD
+++ b/ceph/alpine/APKBUILD
@ -1,7 +1,7 @@
 # Contributor: John Coyle <dx9err@gmail.com>
 # Maintainer: John Coyle <dx9err@gmail.com>
 pkgname=ceph
-pkgver=14.2.10
+pkgver=14.2.11
 pkgrel=0
 pkgdesc="Ceph is a distributed object store and file system"
 pkgusers="ceph"
@ -64,7 +64,7 @@ makedepends="
 	xmlstarlet
 	yasm
 "
-source="ceph-14.2.10.tar.bz2"
+source="ceph-14.2.11.tar.bz2"
 subpackages="
 	$pkgname-base
 	$pkgname-common
@ -117,7 +117,7 @@ _sysconfdir=/etc
 _udevrulesdir=/etc/udev/rules.d
 _python_sitelib=/usr/lib/python2.7/site-packages

-builddir=$srcdir/ceph-14.2.10
+builddir=$srcdir/ceph-14.2.11

 build() {
 	export CEPH_BUILD_VIRTUALENV=$builddir
--- a/ceph/ceph.spec
+++ b/ceph/ceph.spec
@ -109,7 +109,7 @@
 # main package definition
 #################################################################################
 Name:		ceph
-Version:	14.2.10
+Version:	14.2.11
 Release:	0%{?dist}
 %if 0%{?fedora} || 0%{?rhel}
 Epoch:		2
@ -125,7 +125,7 @@ License:	LGPL-2.1 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and
 Group:		System/Filesystems
 %endif
 URL:		http://ceph.com/
-Source0:	%{?_remote_tarball_prefix}ceph-14.2.10.tar.bz2
+Source0:	%{?_remote_tarball_prefix}ceph-14.2.11.tar.bz2
 %if 0%{?suse_version}
 # _insert_obs_source_lines_here
 ExclusiveArch:  x86_64 aarch64 ppc64le s390x
@ -1126,7 +1126,7 @@ This package provides Ceph’s default alerts for Prometheus.
 # common
 #################################################################################
 %prep
-%autosetup -p1 -n ceph-14.2.10
+%autosetup -p1 -n ceph-14.2.11

 %build
 # LTO can be enabled as soon as the following GCC bug is fixed:
@ -1470,6 +1470,7 @@ fi
 %{_mandir}/man8/ceph-authtool.8*
 %{_mandir}/man8/ceph-conf.8*
 %{_mandir}/man8/ceph-dencoder.8*
+%{_mandir}/man8/ceph-diff-sorted.8*
 %{_mandir}/man8/ceph-rbdnamer.8*
 %{_mandir}/man8/ceph-syn.8*
 %{_mandir}/man8/ceph-post-file.8*
@ -1482,6 +1483,7 @@ fi
 %{_mandir}/man8/rbd-replay.8*
 %{_mandir}/man8/rbd-replay-many.8*
 %{_mandir}/man8/rbd-replay-prep.8*
+%{_mandir}/man8/rgw-orphan-list.8*
 %dir %{_datadir}/ceph/
 %{_datadir}/ceph/known_hosts_drop.ceph.com
 %{_datadir}/ceph/id_rsa_drop.ceph.com
@ -1847,10 +1849,12 @@ fi
 %{_mandir}/man8/rbd-nbd.8*

 %files radosgw
+%{_bindir}/ceph-diff-sorted
 %{_bindir}/radosgw
 %{_bindir}/radosgw-token
 %{_bindir}/radosgw-es
 %{_bindir}/radosgw-object-expirer
+%{_bindir}/rgw-orphan-list
 %{_mandir}/man8/radosgw.8*
 %dir %{_localstatedir}/lib/ceph/radosgw
 %{_unitdir}/ceph-radosgw@.service
--- a/ceph/ceph.spec.in
+++ b/ceph/ceph.spec.in
@ -1470,6 +1470,7 @@ fi
 %{_mandir}/man8/ceph-authtool.8*
 %{_mandir}/man8/ceph-conf.8*
 %{_mandir}/man8/ceph-dencoder.8*
+%{_mandir}/man8/ceph-diff-sorted.8*
 %{_mandir}/man8/ceph-rbdnamer.8*
 %{_mandir}/man8/ceph-syn.8*
 %{_mandir}/man8/ceph-post-file.8*
@ -1482,6 +1483,7 @@ fi
 %{_mandir}/man8/rbd-replay.8*
 %{_mandir}/man8/rbd-replay-many.8*
 %{_mandir}/man8/rbd-replay-prep.8*
+%{_mandir}/man8/rgw-orphan-list.8*
 %dir %{_datadir}/ceph/
 %{_datadir}/ceph/known_hosts_drop.ceph.com
 %{_datadir}/ceph/id_rsa_drop.ceph.com
@ -1847,10 +1849,12 @@ fi
 %{_mandir}/man8/rbd-nbd.8*

 %files radosgw
+%{_bindir}/ceph-diff-sorted
 %{_bindir}/radosgw
 %{_bindir}/radosgw-token
 %{_bindir}/radosgw-es
 %{_bindir}/radosgw-object-expirer
+%{_bindir}/rgw-orphan-list
 %{_mandir}/man8/radosgw.8*
 %dir %{_localstatedir}/lib/ceph/radosgw
 %{_unitdir}/ceph-radosgw@.service
--- a/ceph/changelog.upstream
+++ b/ceph/changelog.upstream
@ -1,7 +1,13 @@
-ceph (14.2.10-1xenial) xenial; urgency=medium
+ceph (14.2.11-1xenial) xenial; urgency=medium


- -- Jenkins Build Slave User <jenkins-build@braggi11.front.sepia.ceph.com>  Thu, 25 Jun 2020 18:20:02 +0000
+ -- Jenkins Build Slave User <jenkins-build@braggi11.front.sepia.ceph.com>  Mon, 10 Aug 2020 20:49:33 +0000
+
+ceph (14.2.11-1) stable; urgency=medium
+
+  * New upstream release
+
+ -- Ceph Release Team <ceph-maintainers@ceph.com>  Mon, 10 Aug 2020 20:15:20 +0000

 ceph (14.2.10-1) stable; urgency=medium

--- a/ceph/debian/radosgw.install
+++ b/ceph/debian/radosgw.install
@ -1,6 +1,10 @@
 lib/systemd/system/ceph-radosgw*
+usr/bin/ceph-diff-sorted
 usr/bin/radosgw
 usr/bin/radosgw-es
 usr/bin/radosgw-object-expirer
 usr/bin/radosgw-token
+usr/bin/rgw-orphan-list
+usr/share/man/man8/ceph-diff-sorted.8
 usr/share/man/man8/radosgw.8
+usr/share/man/man8/rgw-orphan-list.8
--- a/ceph/doc/cephfs/fs-volumes.rst
+++ b/ceph/doc/cephfs/fs-volumes.rst
@ -174,6 +174,13 @@ The output format is json and contains fields as follows.
 * path: absolute path of a subvolume
 * type: subvolume type indicating whether it's clone or subvolume
 * pool_namespace: RADOS namespace of the subvolume
+* features: features supported by the subvolume
+
+The subvolume "features" are based on the internal version of the subvolume and is a list containing
+a subset of the following features,
+
+* "snapshot-clone": supports cloning using a subvolumes snapshot as the source
+* "snapshot-autoprotect": supports automatically protecting snapshots, that are active clone sources, from deletion

 List subvolumes using::

@ -195,6 +202,17 @@ List snapshots of a subvolume using::

    $ ceph fs subvolume snapshot ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]

+Fetch the metadata of a snapshot using::
+
+    $ ceph fs subvolume snapshot info <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
+
+The output format is json and contains fields as follows.
+
+* created_at: time of creation of snapshot in the format "YYYY-MM-DD HH:MM:SS:ffffff"
+* data_pool: data pool the snapshot belongs to
+* has_pending_clones: "yes" if snapshot clone is in progress otherwise "no"
+* size: snapshot size in bytes
+
 Cloning Snapshots
 -----------------

@ -202,10 +220,20 @@ Subvolumes can be created by cloning subvolume snapshots. Cloning is an asynchro
 data from a snapshot to a subvolume. Due to this bulk copy nature, cloning is currently inefficient for very huge
 data sets.

-Before starting a clone operation, the snapshot should be protected. Protecting a snapshot ensures that the snapshot
-cannot be deleted when a clone operation is in progress. Snapshots can be protected using::
+.. note:: Removing a snapshot (source subvolume) would fail if there are pending or in progress clone operations.
+
+Protecting snapshots prior to cloning was a pre-requisite in the Nautilus release, and the commands to protect/unprotect
+snapshots were introduced for this purpose. This pre-requisite, and hence the commands to protect/unprotect, is being
+deprecated in mainline CephFS, and may be removed from a future release.
+
+The commands being deprecated are::

  $ ceph fs subvolume snapshot protect <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
+  $ ceph fs subvolume snapshot unprotect <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
+
+.. note:: Using the above commands would not result in an error, but they serve no useful function.
+
+.. note:: Use subvolume info command to fetch subvolume metadata regarding supported "features" to help decide if protect/unprotect of snapshots is required, based on the "snapshot-autoprotect" feature availability.

 To initiate a clone operation use::

@ -231,12 +259,11 @@ A clone can be in one of the following states:

 #. `pending`     : Clone operation has not started
 #. `in-progress` : Clone operation is in progress
-#. `complete`    : Clone operation has sucessfully finished
+#. `complete`    : Clone operation has successfully finished
 #. `failed`      : Clone operation has failed

 Sample output from an `in-progress` clone operation::

-  $ ceph fs subvolume snapshot protect cephfs subvol1 snap1
  $ ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1
  $ ceph fs clone status cephfs clone1
  {
@ -254,7 +281,7 @@ Sample output from an `in-progress` clone operation::

 .. note:: Cloned subvolumes are accessible only after the clone operation has successfully completed.

-For a successsful clone operation, `clone status` would look like so::
+For a successful clone operation, `clone status` would look like so::

  $ ceph fs clone status cephfs clone1
  {
@ -270,14 +297,6 @@ To delete a partial clone use::

  $ ceph fs subvolume rm <vol_name> <clone_name> [--group_name <group_name>] --force

-When no clone operations are in progress or scheduled, the snaphot can be unprotected. To unprotect a snapshot use::
-
-  $ ceph fs subvolume snapshot unprotect <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
-
-Note that unprotecting a snapshot would fail if there are pending or in progress clone operations. Also note that,
-only unprotected snapshots can be removed. This guarantees that a snapshot cannot be deleted when clones are pending
-(or in progress).
-
 .. note:: Cloning only synchronizes directories, regular files and symbolic links. Also, inode timestamps (access and
          modification times) are synchronized upto seconds granularity.

@ -287,7 +306,6 @@ An `in-progress` or a `pending` clone operation can be canceled. To cancel a clo

 On successful cancelation, the cloned subvolume is moved to `canceled` state::

-  $ ceph fs subvolume snapshot protect cephfs subvol1 snap1
  $ ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1
  $ ceph fs clone cancel cephfs clone1
  $ ceph fs clone status cephfs clone1
--- a/ceph/doc/cephfs/health-messages.rst
+++ b/ceph/doc/cephfs/health-messages.rst
@ -59,8 +59,8 @@ by the setting ``mds_log_max_segments``, and when the number of segments
 exceeds that setting the MDS starts writing back metadata so that it
 can remove (trim) the oldest segments.  If this writeback is happening
 too slowly, or a software bug is preventing trimming, then this health
-message may appear.  The threshold for this message to appear is for the
-number of segments to be double ``mds_log_max_segments``.
+message may appear.  The threshold for this message to appear is controlled by
+the config option ``mds_log_warn_factor``, the default is 2.0.

 Message: "Client *name* failing to respond to capability release"
 Code: MDS_HEALTH_CLIENT_LATE_RELEASE, MDS_HEALTH_CLIENT_LATE_RELEASE_MANY
--- a/ceph/doc/cephfs/mds-config-ref.rst
+++ b/ceph/doc/cephfs/mds-config-ref.rst
@ -2,14 +2,6 @@
 MDS Config Reference
 ======================

-``mon force standby active`` 
-
-:Description: If ``true`` monitors force standby-replay to be active. Set
-              under ``[mon]`` or ``[global]``.
-
-:Type: Boolean
-:Default: ``true`` 
-
 ``mds cache memory limit``

 :Description: The memory limit the MDS should enforce for its cache.
@ -540,31 +532,6 @@
 :Default: ``0``


-``mds standby for name``
-
-:Description: An MDS daemon will standby for another MDS daemon of the name 
-              specified in this setting.
-
-:Type:  String
-:Default: N/A
-
-
-``mds standby for rank``
-
-:Description: An MDS daemon will standby for an MDS daemon of this rank. 
-:Type:  32-bit Integer
-:Default: ``-1``
-
-
-``mds standby replay``
-
-:Description: Determines whether a ``ceph-mds`` daemon should poll and replay 
-              the log of an active MDS (hot standby).
-              
-:Type:  Boolean
-:Default:  ``false``
-
-
 ``mds min caps per client``

 :Description: Set the minimum number of capabilities a client may hold.
--- a/ceph/doc/dev/msgr2.rst
+++ b/ceph/doc/dev/msgr2.rst
@ -1,7 +1,7 @@
 .. _msgr2-protocol:

-msgr2 protocol
-==============
+msgr2 protocol (msgr2.0 and msgr2.1)
+====================================

 This is a revision of the legacy Ceph on-wire protocol that was
 implemented by the SimpleMessenger.  It addresses performance and
@ -20,7 +20,7 @@ This protocol revision has several goals relative to the original protocol:
  (e.g., padding) that keep computation and memory copies out of the
  fast path where possible.
 * *Signing*.  We will allow for traffic to be signed (but not
-  necessarily encrypted).  This may not be implemented in the initial version.
+  necessarily encrypted).  This is not implemented.

 Definitions
 -----------
@ -56,10 +56,19 @@ Banner

 Both the client and server, upon connecting, send a banner::

-  "ceph %x %x\n", protocol_features_suppored, protocol_features_required
+  "ceph v2\n"
+  __le16 banner payload length
+  banner payload

-The protocol features are a new, distinct namespace.  Initially no
-features are defined or required, so this will be "ceph 0 0\n".
+A banner payload has the form::
+
+  __le64 peer_supported_features
+  __le64 peer_required_features
+
+This is a new, distinct feature bit namespace (CEPH_MSGR2_*).
+Currently, only CEPH_MSGR2_FEATURE_REVISION_1 is defined. It is
+supported but not required, so that msgr2.0 and msgr2.1 peers
+can talk to each other.

 If the remote party advertises required features we don't support, we
 can disconnect.
@ -79,27 +88,150 @@ can disconnect.
 Frame format
 ------------

-All further data sent or received is contained by a frame.  Each frame has
-the form::
+After the banners are exchanged, all further communication happens
+in frames.  The exact format of the frame depends on the connection
+mode (msgr2.0-crc, msgr2.0-secure, msgr2.1-crc or msgr2.1-secure).
+All connections start in crc mode (either msgr2.0-crc or msgr2.1-crc,
+depending on peer_supported_features from the banner).

-  frame_len (le32)
-  tag (TAG_* le32)
-  frame_header_checksum (le32)
-  payload
-  [payload padding -- only present after stream auth phase]
-  [signature -- only present after stream auth phase]
+Each frame has a 32-byte preamble::

+  __u8 tag
+  __u8 number of segments
+  {
+    __le32 segment length
+    __le16 segment alignment
+  } * 4
+  reserved (2 bytes)
+  __le32 preamble crc

-* The frame_header_checksum is over just the frame_len and tag values (8 bytes).
+An empty frame has one empty segment.  A non-empty frame can have
+between one and four segments, all segments except the last may be
+empty.

-* frame_len includes everything after the frame_len le32 up to the end of the
-  frame (all payloads, signatures, and padding).
+If there are less than four segments, unused (trailing) segment
+length and segment alignment fields are zeroed.

-* The payload format and length is determined by the tag.
+The reserved bytes are zeroed.

-* The signature portion is only present if the authentication phase
-  has completed (TAG_AUTH_DONE has been sent) and signatures are
-  enabled.
+The preamble checksum is CRC32-C.  It covers everything up to
+itself (28 bytes) and is calculated and verified irrespective of
+the connection mode (i.e. even if the frame is encrypted).
+
+### msgr2.0-crc mode
+
+A msgr2.0-crc frame has the form::
+
+  preamble (32 bytes)
+  {
+    segment payload
+  } * number of segments
+  epilogue (17 bytes)
+
+where epilogue is::
+
+  __u8 late_flags
+  {
+    __le32 segment crc
+  } * 4
+
+late_flags is used for frame abortion.  After transmitting the
+preamble and the first segment, the sender can fill the remaining
+segments with zeros and set a flag to indicate that the receiver must
+drop the frame.  This allows the sender to avoid extra buffering
+when a frame that is being put on the wire is revoked (i.e. yanked
+out of the messenger): payload buffers can be unpinned and handed
+back to the user immediately, without making a copy or blocking
+until the whole frame is transmitted.  Currently this is used only
+by the kernel client, see ceph_msg_revoke().
+
+The segment checksum is CRC32-C.  For "used" empty segments, it is
+set to (__le32)-1.  For unused (trailing) segments, it is zeroed.
+
+The crcs are calculated just to protect against bit errors.
+No authenticity guarantees are provided, unlike in msgr1 which
+attempted to provide some authenticity guarantee by optionally
+signing segment lengths and crcs with the session key.
+
+Issues:
+
+1. As part of introducing a structure for a generic frame with
+   variable number of segments suitable for both control and
+   message frames, msgr2.0 moved the crc of the first segment of
+   the message frame (ceph_msg_header2) into the epilogue.
+
+   As a result, ceph_msg_header2 can no longer be safely
+   interpreted before the whole frame is read off the wire.
+   This is a regression from msgr1, because in order to scatter
+   the payload directly into user-provided buffers and thus avoid
+   extra buffering and copying when receiving message frames,
+   ceph_msg_header2 must be available in advance -- it stores
+   the transaction id which the user buffers are keyed on.
+   The implementation has to choose between forgoing this
+   optimization or acting on an unverified segment.
+
+2. late_flags is not covered by any crc.  Since it stores the
+   abort flag, a single bit flip can result in a completed frame
+   being dropped (causing the sender to hang waiting for a reply)
+   or, worse, in an aborted frame with garbage segment payloads
+   being dispatched.
+
+   This was the case with msgr1 and got carried over to msgr2.0.
+
+### msgr2.1-crc mode
+
+Differences from msgr2.0-crc:
+
+1. The crc of the first segment is stored at the end of the
+   first segment, not in the epilogue.  The epilogue stores up to
+   three crcs, not up to four.
+
+   If the first segment is empty, (__le32)-1 crc is not generated.
+
+2. The epilogue is generated only if the frame has more than one
+   segment (i.e. at least one of second to fourth segments is not
+   empty).  Rationale: If the frame has only one segment, it cannot
+   be aborted and there are no crcs to store in the epilogue.
+
+3. Unchecksummed late_flags is replaced with late_status which
+   builds in bit error detection by using a 4-bit nibble per flag
+   and two code words that are Hamming Distance = 4 apart (and not
+   all zeros or ones).  This comes at the expense of having only
+   one reserved flag, of course.
+
+Some example frames:
+
+* A 0+0+0+0 frame (empty, no epilogue)::
+
+    preamble (32 bytes)
+
+* A 20+0+0+0 frame (no epilogue)::
+
+    preamble (32 bytes)
+    segment1 payload (20 bytes)
+    __le32 segment1 crc
+
+* A 0+70+0+0 frame::
+
+    preamble (32 bytes)
+    segment2 payload (70 bytes)
+    epilogue (13 bytes)
+
+* A 20+70+0+350 frame::
+
+    preamble (32 bytes)
+    segment1 payload (20 bytes)
+    __le32 segment1 crc
+    segment2 payload (70 bytes)
+    segment4 payload (350 bytes)
+    epilogue (13 bytes)
+
+where epilogue is::
+
+  __u8 late_status
+  {
+    __le32 segment crc
+  } * 3

 Hello
 -----
@ -198,47 +330,197 @@ authentication method as the first attempt:
 Post-auth frame format
 ----------------------

-The frame format is fixed (see above), but can take three different
-forms, depending on the AUTH_DONE flags:
+Depending on the negotiated connection mode from TAG_AUTH_DONE, the
+connection either stays in crc mode or switches to the corresponding
+secure mode (msgr2.0-secure or msgr2.1-secure).

-* If neither FLAG_SIGNED or FLAG_ENCRYPTED is specified, things are simple::
+### msgr2.0-secure mode

-    frame_len
-    tag
-    payload
-    payload_padding (out to auth block_size)
+A msgr2.0-secure frame has the form::

-  - The padding is some number of bytes < the auth block_size that
-    brings the total length of the payload + payload_padding to a
-    multiple of block_size.  It does not include the frame_len or tag.  Padding
-    content can be zeros or (better) random bytes.
-
-* If FLAG_SIGNED has been specified::
-
-    frame_len
-    tag
-    payload
-    payload_padding (out to auth block_size)
-    signature (sig_size bytes)
-
-  Here the padding just makes life easier for the signature.  It can be
-  random data to add additional confounder.  Note also that the
-  signature input must include some state from the session key and the
-  previous message.
-
-* If FLAG_ENCRYPTED has been specified::
-
-    frame_len
-    tag
  {
-      payload
-      payload_padding (out to auth block_size)
-    } ^ stream cipher
+    preamble (32 bytes)
+    {
+      segment payload
+      zero padding (out to 16 bytes)
+    } * number of segments
+    epilogue (16 bytes)
+  } ^ AES-128-GCM cipher
+  auth tag (16 bytes)

-  Note that the padding ensures that the total frame is a multiple of
-  the auth method's block_size so that the message can be sent out over
-  the wire without waiting for the next frame in the stream.
+where epilogue is::

+    __u8 late_flags
+    zero padding (15 bytes)
+
+late_flags has the same meaning as in msgr2.0-crc mode.
+
+Each segment and the epilogue are zero padded out to 16 bytes.
+Technically, GCM doesn't require any padding because Counter mode
+(the C in GCM) essentially turns a block cipher into a stream cipher.
+But, if the overall input length is not a multiple of 16 bytes, some
+implicit zero padding would occur internally because GHASH function
+used by GCM for generating auth tags only works on 16-byte blocks.
+
+Issues:
+
+1. The sender encrypts the whole frame using a single nonce
+   and generating a single auth tag.  Because segment lengths are
+   stored in the preamble, the receiver has no choice but to decrypt
+   and interpret the preamble without verifying the auth tag -- it
+   can't even tell how much to read off the wire to get the auth tag
+   otherwise!  This creates a decryption oracle, which, in conjunction
+   with Counter mode malleability, could lead to recovery of sensitive
+   information.
+
+   This issue extends to the first segment of the message frame as
+   well.  As in msgr2.0-crc mode, ceph_msg_header2 cannot be safely
+   interpreted before the whole frame is read off the wire.
+
+2. Deterministic nonce construction with a 4-byte counter field
+   followed by an 8-byte fixed field is used.  The initial values are
+   taken from the connection secret -- a random byte string generated
+   during the authentication phase.  Because the counter field is
+   only four bytes long, it can wrap and then repeat in under a day,
+   leading to GCM nonce reuse and therefore a potential complete
+   loss of both authenticity and confidentiality for the connection.
+   This was addressed by disconnecting before the counter repeats
+   (CVE-2020-1759).
+
+### msgr2.1-secure mode
+
+Differences from msgr2.0-secure:
+
+1. The preamble, the first segment and the rest of the frame are
+   encrypted separately, using separate nonces and generating
+   separate auth tags.  This gets rid of unverified plaintext use
+   and keeps msgr2.1-secure mode close to msgr2.1-crc mode, allowing
+   the implementation to receive message frames in a similar fashion
+   (little to no buffering, same scatter/gather logic, etc).
+
+   In order to reduce the number of en/decryption operations per
+   frame, the preamble is grown by a fixed size inline buffer (48
+   bytes) that the first segment is inlined into, either fully or
+   partially.  The preamble auth tag covers both the preamble and the
+   inline buffer, so if the first segment is small enough to be fully
+   inlined, it becomes available after a single decryption operation.
+
+2. As in msgr2.1-crc mode, the epilogue is generated only if the
+   frame has more than one segment.  The rationale is even stronger,
+   as it would require an extra en/decryption operation.
+
+3. For consistency with msgr2.1-crc mode, late_flags is replaced
+   with late_status (the built-in bit error detection isn't really
+   needed in secure mode).
+
+4. In accordance with `NIST Recommendation for GCM`_, deterministic
+   nonce construction with a 4-byte fixed field followed by an 8-byte
+   counter field is used.  An 8-byte counter field should never repeat
+   but the nonce reuse protection put in place for msgr2.0-secure mode
+   is still there.
+
+   The initial values are the same as in msgr2.0-secure mode.
+
+   .. _`NIST Recommendation for GCM`: https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
+
+As in msgr2.0-secure mode, each segment is zero padded out to
+16 bytes.  If the first segment is fully inlined, its padding goes
+to the inline buffer.  Otherwise, the padding is on the remainder.
+The corollary to this is that the inline buffer is consumed in
+16-byte chunks.
+
+The unused portion of the inline buffer is zeroed.
+
+Some example frames:
+
+* A 0+0+0+0 frame (empty, nothing to inline, no epilogue)::
+
+    {
+      preamble (32 bytes)
+      zero padding (48 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+
+* A 20+0+0+0 frame (first segment fully inlined, no epilogue)::
+
+    {
+      preamble (32 bytes)
+      segment1 payload (20 bytes)
+      zero padding (28 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+
+* A 0+70+0+0 frame (nothing to inline)::
+
+    {
+      preamble (32 bytes)
+      zero padding (48 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+    {
+      segment2 payload (70 bytes)
+      zero padding (10 bytes)
+      epilogue (16 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+
+* A 20+70+0+350 frame (first segment fully inlined)::
+
+    {
+      preamble (32 bytes)
+      segment1 payload (20 bytes)
+      zero padding (28 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+    {
+      segment2 payload (70 bytes)
+      zero padding (10 bytes)
+      segment4 payload (350 bytes)
+      zero padding (2 bytes)
+      epilogue (16 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+
+* A 105+0+0+0 frame (first segment partially inlined, no epilogue)::
+
+    {
+      preamble (32 bytes)
+      segment1 payload (48 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+    {
+      segment1 payload remainder (57 bytes)
+      zero padding (7 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+
+* A 105+70+0+350 frame (first segment partially inlined)::
+
+    {
+      preamble (32 bytes)
+      segment1 payload (48 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+    {
+      segment1 payload remainder (57 bytes)
+      zero padding (7 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+    {
+      segment2 payload (70 bytes)
+      zero padding (10 bytes)
+      segment4 payload (350 bytes)
+      zero padding (2 bytes)
+      epilogue (16 bytes)
+    } ^ AES-128-GCM cipher
+    auth tag (16 bytes)
+
+where epilogue is::
+
+    __u8 late_status
+    zero padding (15 bytes)
+
+late_status has the same meaning as in msgr2.1-crc mode.

 Message flow handshake
 ----------------------
--- a/ceph/doc/man/8/CMakeLists.txt
+++ b/ceph/doc/man/8/CMakeLists.txt
@ -48,7 +48,9 @@ endif()
 if(WITH_RADOSGW)
  list(APPEND man_srcs
 	radosgw.rst
-	radosgw-admin.rst)
+	radosgw-admin.rst
+	rgw-orphan-list.rst
+	ceph-diff-sorted.rst)
 endif()

 if(WITH_RBD)
--- a/ceph/doc/man/8/ceph-diff-sorted.rst
+++ b/ceph/doc/man/8/ceph-diff-sorted.rst
@ -0,0 +1,71 @@
+:orphan:
+
+==========================================================
+ ceph-diff-sorted -- compare two sorted files line by line
+==========================================================
+
+.. program:: ceph-diff-sorted
+
+Synopsis
+========
+
+| **ceph-diff-sorted** *file1* *file2*
+
+Description
+===========
+
+:program:`ceph-diff-sorted` is a simplifed *diff* utility optimized
+for comparing two files with lines that are lexically sorted.
+
+The output is simplified in comparison to that of the standard `diff`
+tool available in POSIX systems. Angle brackets ('<' and '>') are used
+to show lines that appear in one file but not the other. The output is
+not compatible with the `patch` tool.
+
+This tool was created in order to perform diffs of large files (e.g.,
+containing billions of lines) that the standard `diff` tool cannot
+handle efficiently. Knowing that the lines are sorted allows this to
+be done efficiently with minimal memory overhead.
+
+The sorting of each file needs to be done lexcially. Most POSIX
+systems use the *LANG* environment variable to determine the `sort`
+tool's sorting order. To sort lexically we would need something such
+as:
+
+        $ LANG=C sort some-file.txt >some-file-sorted.txt
+
+Examples
+========
+
+Compare two files::
+
+        $ ceph-diff-sorted fileA.txt fileB.txt
+
+Exit Status
+===========
+
+When complete, the exit status will be set to one of the following:
+
+0
+  files same
+1
+  files different
+2
+  usage problem (e.g., wrong number of command-line arguments)
+3
+  problem opening input file
+4
+  bad file content (e.g., unsorted order or empty lines)
+
+
+Availability
+============
+
+:program:`ceph-diff-sorted` is part of Ceph, a massively scalable,
+open-source, distributed storage system.  Please refer to the Ceph
+documentation at http://ceph.com/docs for more information.
+
+See also
+========
+
+:doc:`rgw-orphan-list <rgw-orphan-list>`\(8)
--- a/ceph/doc/man/8/rgw-orphan-list.rst
+++ b/ceph/doc/man/8/rgw-orphan-list.rst
@ -0,0 +1,69 @@
+:orphan:
+
+==================================================================
+ rgw-orphan-list -- list rados objects that are not indexed by rgw
+==================================================================
+
+.. program:: rgw-orphan-list
+
+Synopsis
+========
+
+| **rgw-orphan-list**
+
+Description
+===========
+
+:program:`rgw-orphan-list` is an *EXPERIMENTAL* RADOS gateway user
+administration utility. It produces a listing of rados objects that
+are not directly or indirectly referenced through the bucket indexes
+on a pool. It places the results and intermediate files on the local
+filesystem rather than on the ceph cluster itself, and therefore will
+not itself consume additional cluster storage.
+
+In theory orphans should not exist. However because ceph evolves
+rapidly, bugs do crop up, and they may result in orphans that are left
+behind.
+
+In its current form this utility does not take any command-line
+arguments or options. It will list the available pools and prompt the
+user to enter the pool they would like to list orphans for.
+
+Behind the scenes it runs `rados ls` and `radosgw-admin bucket
+radoslist ...` and produces a list of those entries that appear in the
+former but not the latter. Those entries are presumed to be the
+orphans.
+
+Warnings
+========
+
+This utility is currently considered *EXPERIMENTAL*.
+
+This utility will produce false orphan entries for unindexed buckets
+since such buckets have no bucket indices that can provide the
+starting point for tracing.
+
+Options
+=======
+
+At present there are no options.
+
+Examples
+========
+
+Launch the tool::
+
+        $ rgw-orphan-list
+
+Availability
+============
+
+:program:`radosgw-admin` is part of Ceph, a massively scalable, open-source,
+distributed storage system.  Please refer to the Ceph documentation at
+http://ceph.com/docs for more information.
+
+See also
+========
+
+:doc:`radosgw-admin <radosgw-admin>`\(8)
+:doc:`ceph-diff-sorted <ceph-diff-sorted>`\(8)
--- a/ceph/doc/man_index.rst
+++ b/ceph/doc/man_index.rst
@ -40,3 +40,5 @@
   man/8/rbd-replay
   man/8/rbd
   man/8/rbdmap
+   man/8/rgw-orphan-list
+   man/8/ceph-diff-sorted
--- a/ceph/doc/mgr/prometheus.rst
+++ b/ceph/doc/mgr/prometheus.rst
@ -25,11 +25,65 @@ The *prometheus* module is enabled with::
 Configuration
 -------------

-By default the module will accept HTTP requests on port ``9283`` on all
-IPv4 and IPv6 addresses on the host.  The port and listen address are both
+.. note::
+
+    The Prometheus manager module needs to be restarted for configuration changes to be applied.
+
+By default the module will accept HTTP requests on port ``9283`` on all IPv4
+and IPv6 addresses on the host.  The port and listen address are both
 configurable with ``ceph config-key set``, with keys
-``mgr/prometheus/server_addr`` and ``mgr/prometheus/server_port``.
-This port is registered with Prometheus's `registry <https://github.com/prometheus/prometheus/wiki/Default-port-allocations>`_.
+``mgr/prometheus/server_addr`` and ``mgr/prometheus/server_port``.  This port
+is registered with Prometheus's `registry
+<https://github.com/prometheus/prometheus/wiki/Default-port-allocations>`_.
+
+::
+
+    ceph config set mgr mgr/prometheus/server_addr 0.0.0.0
+    ceph config set mgr mgr/prometheus/server_port 9283
+
+.. warning::
+
+    The ``scrape_interval`` of this module should always be set to match
+    Prometheus' scrape interval to work properly and not cause any issues.
+    
+The Prometheus manager module is, by default, configured with a scrape interval
+of 15 seconds.  The scrape interval in the module is used for caching purposes
+and to determine when a cache is stale.
+
+It is not recommended to use a scrape interval below 10 seconds.  It is
+recommended to use 15 seconds as scrape interval, though, in some cases it
+might be useful to increase the scrape interval.
+
+To set a different scrape interval in the Prometheus module, set
+``scrape_interval`` to the desired value::
+
+    ceph config set mgr mgr/prometheus/scrape_interval 20
+
+On large clusters (>1000 OSDs), the time to fetch the metrics may become
+significant.  Without the cache, the Prometheus manager module could,
+especially in conjunction with multiple Prometheus instances, overload the
+manager and lead to unresponsive or crashing Ceph manager instances.  Hence,
+the cache is enabled by default and cannot be disabled.  This means that there
+is a possibility that the cache becomes stale.  The cache is considered stale
+when the time to fetch the metrics from Ceph exceeds the configured
+``scrape_interval``.
+
+If that is the case, **a warning will be logged** and the module will either
+
+* respond with a 503 HTTP status code (service unavailable) or,
+* it will return the content of the cache, even though it might be stale.
+
+This behavior can be configured. By default, it will return a 503 HTTP status
+code (service unavailable). You can set other options using the ``ceph config
+set`` commands.
+
+To tell the module to respond with possibly stale data, set it to ``return``::
+
+    ceph config set mgr mgr/prometheus/stale_cache_strategy return
+
+To tell the module to respond with "service unavailable", set it to ``fail``::
+
+    ceph config set mgr mgr/prometheus/stale_cache_strategy fail

 .. _prometheus-rbd-io-statistics:

--- a/ceph/doc/mgr/telemetry.rst
+++ b/ceph/doc/mgr/telemetry.rst
@ -123,6 +123,16 @@ The see the current configuration::

  ceph telemetry status

+Manually sending telemetry
+--------------------------
+
+To ad hoc send telemetry data::
+
+  ceph telemetry send
+
+In case telemetry is not enabled (with 'ceph telemetry on'), you need to add
+'--license sharing-1-0' to 'ceph telemetry send' command.
+
 Sending telemetry through a proxy
 ---------------------------------

--- a/ceph/doc/radosgw/config-ref.rst
+++ b/ceph/doc/radosgw/config-ref.rst
@ -218,6 +218,14 @@ instances or all radosgw-admin commands can be put into the ``[global]`` or the
 :Default: ``3600``


+``rgw gc max concurrent io``
+
+:Description: The maximum number of concurrent IO operations that the RGW garbage
+              collection thread will use when purging old data.
+:Type: Integer
+:Default: ``10``
+
+
 ``rgw s3 success create obj status``

 :Description: The alternate success status response for ``create-obj``.
--- a/ceph/doc/radosgw/index.rst
+++ b/ceph/doc/radosgw/index.rst
@ -65,6 +65,7 @@ you may write data with one API and retrieve it with the other.
   Data Layout in RADOS <layout>
   STS Lite <STSLite>
   Role <role>
+   Orphan List and Associated Tooliing <orphans>
   troubleshooting
   Manpage radosgw <../../man/8/radosgw>
   Manpage radosgw-admin <../../man/8/radosgw-admin>
--- a/ceph/doc/radosgw/opa.rst
+++ b/ceph/doc/radosgw/opa.rst
@ -46,6 +46,7 @@ Example request::
   {
       "input": {
           "method": "GET",
+           "subuser": "subuser",
           "user_info": {
               "used_id": "john",
               "display_name": "John"  
--- a/ceph/doc/radosgw/orphans.rst
+++ b/ceph/doc/radosgw/orphans.rst
@ -0,0 +1,115 @@
+==================================
+Orphan List and Associated Tooling
+==================================
+
+.. version added:: Luminous
+
+.. contents::
+
+Orphans are RADOS objects that are left behind after their associated
+RGW objects are removed. Normally these RADOS objects are removed
+automatically, either immediately or through a process known as
+"garbage collection". Over the history of RGW, however, there may have
+been bugs that prevented these RADOS objects from being deleted, and
+these RADOS objects may be consuming space on the Ceph cluster without
+being of any use. From the perspective of RGW, we call such RADOS
+objects "orphans".
+
+Orphans Find -- DEPRECATED
+--------------------------
+
+The `radosgw-admin` tool has/had three subcommands to help manage
+orphans, however these subcommands are (or will soon be)
+deprecated. These subcommands are:
+
+::
+   # radosgw-admin orphans find ...
+   # radosgw-admin orphans finish ...
+   # radosgw-admin orphans list-jobs ...
+
+There are two key problems with these subcommands, however. First,
+these subcommands have not been actively maintained and therefore have
+not tracked RGW as it has evolved in terms of features and updates. As
+a result the confidence that these subcommands can accurately identify
+true orphans is presently low.
+
+Second, these subcommands store intermediate results on the cluster
+itself. This can be problematic when cluster administrators are
+confronting insufficient storage space and want to remove orphans as a
+means of addressing the issue. The intermediate results could strain
+the existing cluster storage capacity even further.
+
+For these reasons "orphans find" has been deprecated.
+
+Orphan List
+-----------
+
+Because "orphans find" has been deprecated, RGW now includes an
+additional tool -- 'rgw-orphan-list'. When run it will list the
+available pools and prompt the user to enter the name of the data
+pool. At that point the tool will, perhaps after an extended period of
+time, produce a local file containing the RADOS objects from the
+designated pool that appear to be orphans. The administrator is free
+to examine this file and the decide on a course of action, perhaps
+removing those RADOS objects from the designated pool.
+
+All intermediate results are stored on the local file system rather
+than the Ceph cluster. So running the 'rgw-orphan-list' tool should
+have no appreciable impact on the amount of cluster storage consumed.
+
+WARNING: Experimental Status
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The 'rgw-orphan-list' tool is new and therefore currently considered
+experimental. The list of orphans produced should be "sanity checked"
+before being used for a large delete operation.
+
+WARNING: Specifying a Data Pool
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If a pool other than an RGW data pool is specified, the results of the
+tool will be erroneous. All RADOS objects found on such a pool will
+falsely be designated as orphans.
+
+WARNING: Unindexed Buckets
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+RGW allows for unindexed buckets, that is buckets that do not maintain
+an index of their contents. This is not a typical configuration, but
+it is supported. Because the 'rgw-orphan-list' tool uses the bucket
+indices to determine what RADOS objects should exist, objects in the
+unindexed buckets will falsely be listed as orphans.
+
+
+RADOS List
+----------
+
+One of the sub-steps in computing a list of orphans is to map each RGW
+object into its corresponding set of RADOS objects. This is done using
+a subcommand of 'radosgw-admin'.
+
+::
+   # radosgw-admin bucket radoslist [--bucket={bucket-name}]
+
+The subcommand will produce a list of RADOS objects that support all
+of the RGW objects. If a bucket is specified then the subcommand will
+only produce a list of RADOS objects that correspond back the RGW
+objects in the specified bucket.
+
+Note: Shared Bucket Markers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some administrators will be aware of the coding schemes used to name
+the RADOS objects that correspond to RGW objects, which include a
+"marker" unique to a given bucket.
+
+RADOS objects that correspond with the contents of one RGW bucket,
+however, may contain a marker that specifies a different bucket. This
+behavior is a consequence of the "shallow copy" optimization used by
+RGW. When larger objects are copied from bucket to bucket, only the
+"head" objects are actually copied, and the tail objects are
+shared. Those shared objects will contain the marker of the original
+bucket.
+
+.. _Data Layout in RADOS : ../layout
+.. _Pool Placement and Storage Classes : ../placement
--- a/ceph/monitoring/grafana/dashboards/osds-overview.json
+++ b/ceph/monitoring/grafana/dashboards/osds-overview.json
@ -431,7 +431,7 @@
      "strokeWidth": 1,
      "targets": [
        {
-          "expr": "count by(device_class) (ceph_osd_metadata)",
+          "expr": "count by (device_class) (ceph_osd_metadata)",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "{{device_class}}",
--- a/ceph/monitoring/grafana/dashboards/rbd-details.json
+++ b/ceph/monitoring/grafana/dashboards/rbd-details.json
@ -27,7 +27,7 @@
      }
    ]
  },
-  "description": "Detailed Performance of RBD Images (IOPS/Latency)",
+  "description": "Detailed Performance of RBD Images (IOPS/Throughput/Latency)",
  "editable": false,
  "gnetId": null,
  "graphTooltip": 0,
@ -77,21 +77,21 @@
          "expr": "irate(ceph_rbd_write_ops{pool=\"$Pool\", image=\"$Image\"}[30s])",
          "format": "time_series",
          "intervalFactor": 1,
-          "legendFormat": "Write {{instance}}",
+          "legendFormat": "Write",
          "refId": "A"
        },
        {
          "expr": "irate(ceph_rbd_read_ops{pool=\"$Pool\", image=\"$Image\"}[30s])",
          "format": "time_series",
          "intervalFactor": 1,
-          "legendFormat": "Read {{instance}}",
+          "legendFormat": "Read",
          "refId": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeShift": null,
-      "title": "IOPS Count",
+      "title": "IOPS",
      "tooltip": {
        "shared": true,
        "sort": 0,
@ -168,21 +168,21 @@
          "expr": "irate(ceph_rbd_write_bytes{pool=\"$Pool\", image=\"$Image\"}[30s])",
          "format": "time_series",
          "intervalFactor": 1,
-          "legendFormat": "Read {{instance}}",
+          "legendFormat": "Write",
          "refId": "A"
        },
        {
          "expr": "irate(ceph_rbd_read_bytes{pool=\"$Pool\", image=\"$Image\"}[30s])",
          "format": "time_series",
          "intervalFactor": 1,
-          "legendFormat": "Write {{instance}}",
+          "legendFormat": "Read",
          "refId": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeShift": null,
-      "title": "IO Bytes per Second",
+      "title": "Throughput",
      "tooltip": {
        "shared": true,
        "sort": 0,
@ -259,21 +259,21 @@
          "expr": "irate(ceph_rbd_write_latency_sum{pool=\"$Pool\", image=\"$Image\"}[30s]) / irate(ceph_rbd_write_latency_count{pool=\"$Pool\", image=\"$Image\"}[30s])",
          "format": "time_series",
          "intervalFactor": 1,
-          "legendFormat": "Write Latency Sum",
+          "legendFormat": "Write",
          "refId": "A"
        },
        {
          "expr": "irate(ceph_rbd_read_latency_sum{pool=\"$Pool\", image=\"$Image\"}[30s]) / irate(ceph_rbd_read_latency_count{pool=\"$Pool\", image=\"$Image\"}[30s])",
          "format": "time_series",
          "intervalFactor": 1,
-          "legendFormat": "Read Latency Sum",
+          "legendFormat": "Read",
          "refId": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeShift": null,
-      "title": "Averange Latency",
+      "title": "Average Latency",
      "tooltip": {
        "shared": true,
        "sort": 0,
--- a/ceph/monitoring/grafana/dashboards/rbd-overview.json
+++ b/ceph/monitoring/grafana/dashboards/rbd-overview.json
@ -416,7 +416,7 @@
      ],
      "targets": [
        {
-          "expr": "topk(10, (sort((irate(ceph_rbd_write_ops[30s]) + on(image, pool, namespace) irate(ceph_rbd_read_ops[30s])))))",
+          "expr": "topk(10, (sort((irate(ceph_rbd_write_ops[30s]) + on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])))))",
          "format": "table",
          "instant": true,
          "intervalFactor": 1,
--- a/ceph/qa/cephfs/clusters/1-mds-1-client-micro.yaml
+++ b/ceph/qa/cephfs/clusters/1-mds-1-client-micro.yaml
@ -0,0 +1,7 @@
+roles:
+- [mon.a, mon.b, mon.c, mgr.x, mds.a, osd.0, osd.1, osd.2, osd.3]
+- [client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
--- a/ceph/qa/objectstore/bluestore-hybrid.yaml
+++ b/ceph/qa/objectstore/bluestore-hybrid.yaml
@ -0,0 +1,40 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        bluestore allocator: hybrid
+        bluefs allocator: hybrid
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
--- a/ceph/qa/standalone/mon/mon-last-epoch-clean.sh
+++ b/ceph/qa/standalone/mon/mon-last-epoch-clean.sh
@ -181,8 +181,8 @@ function TEST_mon_last_clean_epoch() {

  sleep 5

-  ceph tell osd.* injectargs '--osd-beacon-report-interval 10' || exit 1
-  ceph tell mon.* injectargs \
+  ceph tell 'osd.*' injectargs '--osd-beacon-report-interval 10' || exit 1
+  ceph tell 'mon.*' injectargs \
    '--mon-min-osdmap-epochs 2 --paxos-service-trim-min 1' || exit 1

  create_pool foo 32
--- a/ceph/qa/standalone/osd/bad-inc-map.sh
+++ b/ceph/qa/standalone/osd/bad-inc-map.sh
@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+mon_port=$(get_unused_port)
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:$mon_port"
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    set -e
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+	$func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_bad_inc_map() {
+    local dir=$1
+
+    run_mon $dir a
+    run_mgr $dir x
+    run_osd $dir 0
+    run_osd $dir 1
+    run_osd $dir 2
+
+    ceph config set osd.2 osd_inject_bad_map_crc_probability 1
+
+    # osd map churn
+    create_pool foo 8
+    ceph osd pool set foo min_size 1
+    ceph osd pool set foo min_size 2
+
+    sleep 5
+
+    # make sure all the OSDs are still up
+    TIMEOUT=10 wait_for_osd up 0
+    TIMEOUT=10 wait_for_osd up 1
+    TIMEOUT=10 wait_for_osd up 2
+
+    # check for the signature in the log
+    grep "injecting map crc failure" $dir/osd.2.log || return 1
+    grep "bailing because last" $dir/osd.2.log || return 1
+
+    echo success
+
+    delete_pool foo
+    kill_daemons $dir || return 1
+}
+
+main bad-inc-map "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh bad-inc-map.sh"
+# End:
--- a/ceph/qa/standalone/scrub/osd-scrub-repair.sh
+++ b/ceph/qa/standalone/scrub/osd-scrub-repair.sh
@ -45,7 +45,7 @@ walk(if type == "object" then del(.mtime) else . end)
 | walk(if type == "object" then del(.version) else . end)
 | walk(if type == "object" then del(.prior_version) else . end)'

-sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print json.dumps(ud, sort_keys=True, indent=2)'
+sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print(json.dumps(ud, sort_keys=True, indent=2))'

 function run() {
    local dir=$1
--- a/ceph/qa/standalone/scrub/osd-scrub-test.sh
+++ b/ceph/qa/standalone/scrub/osd-scrub-test.sh
@ -187,6 +187,120 @@ function TEST_interval_changes() {
    teardown $dir || return 1
 }

+function _scrub_abort() {
+    local dir=$1
+    local poolname=test
+    local OSDS=3
+    local objects=1000
+    local type=$2
+
+    TESTDATA="testdata.$$"
+    if test $type = "scrub";
+    then
+      stopscrub="noscrub"
+      check="noscrub"
+    else
+      stopscrub="nodeep-scrub"
+      check="nodeep_scrub"
+    fi
+
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=3 || return 1
+    run_mgr $dir x || return 1
+    for osd in $(seq 0 $(expr $OSDS - 1))
+    do
+      run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \
+	      --osd_deep_scrub_randomize_ratio=0.0 \
+	      --osd_scrub_sleep=5.0 \
+	      --osd_scrub_interval_randomize_ratio=0  || return 1
+    done
+
+    # Create a pool with a single pg
+    create_pool $poolname 1 1
+    wait_for_clean || return 1
+    poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')
+
+    dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
+    for i in `seq 1 $objects`
+    do
+        rados -p $poolname put obj${i} $TESTDATA
+    done
+    rm -f $TESTDATA
+
+    local primary=$(get_primary $poolname obj1)
+    local pgid="${poolid}.0"
+
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_$type $pgid
+    # deep-scrub won't start without scrub noticing
+    if [ "$type" = "deep_scrub" ];
+    then
+      CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_scrub $pgid
+    fi
+
+    # Wait for scrubbing to start
+    set -o pipefail
+    found="no"
+    for i in $(seq 0 200)
+    do
+      flush_pg_stats
+      if ceph pg dump pgs | grep  ^$pgid| grep -q "scrubbing"
+      then
+        found="yes"
+        #ceph pg dump pgs
+        break
+      fi
+    done
+    set +o pipefail
+
+    if test $found = "no";
+    then
+      echo "Scrubbing never started"
+      return 1
+    fi
+
+    ceph osd set $stopscrub
+
+    # Wait for scrubbing to end
+    set -o pipefail
+    for i in $(seq 0 200)
+    do
+      flush_pg_stats
+      if ceph pg dump pgs | grep ^$pgid | grep -q "scrubbing"
+      then
+        continue
+      fi
+      #ceph pg dump pgs
+      break
+    done
+    set +o pipefail
+
+    sleep 5
+
+    if ! grep "$check set, aborting" $dir/osd.${primary}.log
+    then
+      echo "Abort not seen in log"
+      return 1
+    fi
+
+    local last_scrub=$(get_last_scrub_stamp $pgid)
+    ceph osd unset noscrub
+    TIMEOUT=$(($objects / 2))
+    wait_for_scrub $pgid "$last_scrub" || return 1
+
+    teardown $dir || return 1
+}
+
+function TEST_scrub_abort() {
+    local dir=$1
+    _scrub_abort $dir scrub
+}
+
+function TEST_deep_scrub_abort() {
+    local dir=$1
+    _scrub_abort $dir deep_scrub
+}
+
 main osd-scrub-test "$@"

 # Local Variables:
--- a/ceph/qa/standalone/special/ceph_objectstore_tool.py
+++ b/ceph/qa/standalone/special/ceph_objectstore_tool.py
@ -1034,7 +1034,7 @@ def main(argv):

    # Specify a bad --op command
    cmd = (CFSD_PREFIX + "--op oops").format(osd=ONEOSD)
-    ERRORS += test_failure(cmd, "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log)")
+    ERRORS += test_failure(cmd, "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, statfs)")

    # Provide just the object param not a command
    cmd = (CFSD_PREFIX + "object").format(osd=ONEOSD)
--- a/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml
+++ b/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml
@ -1,5 +1,8 @@
 overrides:
  ceph:
+    conf:
+      mgr:
+        debug client: 10
    log-whitelist:
      - OSD full dropping all updates
      - OSD near full
--- a/ceph/qa/suites/fs/upgrade/volumes/.qa
+++ b/ceph/qa/suites/fs/upgrade/volumes/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/%
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/%
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/.qa
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/bluestore-bitmap.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/bluestore-bitmap.yaml
@ -0,0 +1 @@
+../../../../../cephfs/objectstore-ec/bluestore-bitmap.yaml
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/.qa
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/1-mds-2-client-micro.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/1-mds-2-client-micro.yaml
@ -0,0 +1,7 @@
+roles:
+- [mon.a, mon.b, mon.c, mgr.x, mgr.y, mds.a, mds.b, mds.c, osd.0, osd.1, osd.2, osd.3]
+- [client.0, client.1]
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/conf
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/conf
@ -0,0 +1 @@
+.qa/cephfs/conf/
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/+
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/+
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/.qa
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/frag_enable.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/frag_enable.yaml
@ -0,0 +1 @@
+.qa/cephfs/overrides/frag_enable.yaml
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/pg-warn.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/pg-warn.yaml
@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon pg warn min per osd: 0
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_health.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_health.yaml
@ -0,0 +1 @@
+.qa/cephfs/overrides/whitelist_health.yaml
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_wrongly_marked_down.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_wrongly_marked_down.yaml
@ -0,0 +1 @@
+.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/%
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/%
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/.qa
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/0-mimic.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/0-mimic.yaml
@ -0,0 +1,42 @@
+meta:
+- desc: |
+   install ceph/mimic latest
+tasks:
+- install:
+    branch: mimic #tag: v13.2.8
+    exclude_packages:
+      - librados3
+      - ceph-mgr-dashboard
+      - ceph-mgr-diskprediction-local
+      - ceph-mgr-diskprediction-cloud
+      - ceph-mgr-rook
+      - ceph-mgr-cephadm
+      - cephadm
+    extra_packages: ['librados2']
+- print: "**** done installing mimic"
+- ceph:
+    mon_bind_addrvec: false
+    mon_bind_msgr2: false
+    log-whitelist:
+      - overall HEALTH_
+      - \(FS_
+      - \(MDS_
+      - \(OSD_
+      - \(MON_DOWN\)
+      - \(CACHE_POOL_
+      - \(POOL_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - Monitor daemon marked osd
+      - Behind on trimming
+      - Manager daemon
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms bind msgr2: false
+- exec:
+    osd.0:
+      - ceph osd require-osd-release mimic
+      - ceph osd set-require-min-compat-client mimic
+- print: "**** done ceph"
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/1-client.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/1-client.yaml
@ -0,0 +1,33 @@
+tasks:
+- workunit:
+    clients:
+      client.0:
+      - fs/upgrade/volume_client
+    env:
+      ACTION: create
+- print: "**** fs/volume_client create"
+- ceph-fuse:
+    client.0:
+      mount_path: /volumes/_nogroup/vol_isolated
+      mountpoint: mnt.0
+      auth_id: vol_data_isolated
+    client.1:
+      mount_path: /volumes/_nogroup/vol_default
+      mountpoint: mnt.1
+      auth_id: vol_default
+- print: "**** ceph-fuse vol_isolated"
+- workunit:
+    clients:
+      client.0:
+      - fs/upgrade/volume_client
+    env:
+      ACTION: populate
+    cleanup: false
+- workunit:
+    clients:
+      client.1:
+      - fs/upgrade/volume_client
+    env:
+      ACTION: populate
+    cleanup: false
+- print: "**** fs/volume_client populate"
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/2-upgrade.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/2-upgrade.yaml
@ -0,0 +1,54 @@
+overrides:
+  ceph:
+    mon_bind_msgr2: false
+    mon_bind_addrvec: false
+    log-whitelist:
+    - scrub mismatch
+    - ScrubResult
+    - wrongly marked
+    - \(POOL_APP_NOT_ENABLED\)
+    - \(SLOW_OPS\)
+    - overall HEALTH_
+    - \(MON_MSGR2_NOT_ENABLED\)
+    - slow request
+    conf:
+      global:
+        bluestore warn on legacy statfs: false
+        bluestore warn on no per pool omap: false
+      mon:
+        mon warn on osd down out interval zero: false
+
+tasks:
+- mds_pre_upgrade:
+- print: "**** done mds pre-upgrade sequence"
+- install.upgrade:
+    mon.a:
+- print: "**** done install.upgrade both hosts"
+- ceph.restart:
+    daemons: [mon.*, mgr.*]
+    mon-health-to-clog: false
+    wait-for-healthy: false
+- exec:
+    mon.a:
+      - ceph config set global mon_warn_on_msgr2_not_enabled false
+- ceph.healthy:
+- ceph.restart:
+    daemons: [osd.*]
+    wait-for-healthy: false
+    wait-for-osds-up: true
+- ceph.stop: [mds.*]
+- ceph.restart:
+    daemons: [mds.*]
+    wait-for-healthy: false
+    wait-for-osds-up: true
+- exec:
+    mon.a:
+    - ceph mon enable-msgr2
+    - ceph versions
+    - ceph osd dump -f json-pretty
+    - ceph config rm global mon_warn_on_msgr2_not_enabled
+    - ceph osd require-osd-release nautilus
+    - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done
+    #- ceph osd set-require-min-compat-client nautilus
+- ceph.healthy:
+- print: "**** done ceph.restart"
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/3-verify.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/3-verify.yaml
@ -0,0 +1,25 @@
+overrides:
+  ceph:
+    log-whitelist:
+      - missing required features
+tasks:
+- exec:
+    mon.a:
+      - ceph fs dump --format=json-pretty
+      - ceph fs volume ls
+      - ceph fs subvolume ls cephfs
+- workunit:
+   clients:
+     client.0:
+     - fs/upgrade/volume_client
+   env:
+     ACTION: verify
+   cleanup: false
+- workunit:
+   clients:
+     client.1:
+     - fs/upgrade/volume_client
+   env:
+     ACTION: verify
+   cleanup: false
+- print: "**** fs/volume_client verify"
--- a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/ubuntu_18.04.yaml
+++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/ubuntu_18.04.yaml
@ -0,0 +1 @@
+.qa/distros/all/ubuntu_18.04.yaml
--- a/ceph/qa/suites/rados/mgr/tasks/module_selftest.yaml
+++ b/ceph/qa/suites/rados/mgr/tasks/module_selftest.yaml
@ -19,6 +19,7 @@ tasks:
        - \(MGR_ZABBIX_
        - foo bar
        - Failed to open Telegraf
+        - evicting unresponsive client
  - cephfs_test_runner:
      modules:
        - tasks.mgr.test_module_selftest
--- a/ceph/qa/suites/rados/perf/distros/ubuntu_16.04.yaml
+++ b/ceph/qa/suites/rados/perf/distros/ubuntu_16.04.yaml
@ -0,0 +1 @@
+../../../../distros/supported-all-distro/ubuntu_16.04.yaml
--- a/ceph/qa/suites/rados/perf/distros/ubuntu_latest.yaml
+++ b/ceph/qa/suites/rados/perf/distros/ubuntu_latest.yaml
@ -0,0 +1 @@
+../../../../distros/supported-all-distro/ubuntu_latest.yaml
--- a/ceph/qa/suites/rados/perf/supported-random-distro$
+++ b/ceph/qa/suites/rados/perf/supported-random-distro$
@ -1 +0,0 @@
-../basic/supported-random-distro$
--- a/ceph/qa/suites/rados/singleton-flat/valgrind-leaks.yaml
+++ b/ceph/qa/suites/rados/singleton-flat/valgrind-leaks.yaml
@ -1,5 +1,6 @@
 # see http://tracker.ceph.com/issues/20360 and http://tracker.ceph.com/issues/18126
 os_type: centos
+os_version: '7.8'

 openstack:
  - volumes: # attached to each instance
--- a/ceph/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml
+++ b/ceph/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml
@ -0,0 +1,7 @@
+overrides:
+  ceph:
+    conf:
+      osd:
+        osd inject bad map crc probability: 0.1
+    log-whitelist:
+      - failed to encode map
--- a/ceph/qa/suites/rados/thrash/crc-failures/default.yaml
+++ b/ceph/qa/suites/rados/thrash/crc-failures/default.yaml
--- a/ceph/qa/suites/rados/verify/validater/valgrind.yaml
+++ b/ceph/qa/suites/rados/verify/validater/valgrind.yaml
@ -1,5 +1,6 @@
 # see http://tracker.ceph.com/issues/20360 and http://tracker.ceph.com/issues/18126
 os_type: centos
+os_version: '7.8'

 overrides:
  install:
--- a/ceph/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml
+++ b/ceph/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml
@ -0,0 +1 @@
+
--- a/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v27.yaml
+++ b/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v27.yaml
@ -1,3 +0,0 @@
-overrides:
-  s3a-hadoop:
-    hadoop-version: '2.7.3'
--- a/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v28.yaml
+++ b/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v28.yaml
@ -1,3 +0,0 @@
-overrides:
-  s3a-hadoop:
-    hadoop-version: '2.8.0'
--- a/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml
+++ b/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml
@ -0,0 +1,3 @@
+overrides:
+  s3a-hadoop:
+    hadoop-version: '3.2.0'
--- a/ceph/qa/suites/rgw/tools/+
+++ b/ceph/qa/suites/rgw/tools/+
--- a/ceph/qa/suites/rgw/tools/.qa
+++ b/ceph/qa/suites/rgw/tools/.qa
@ -0,0 +1 @@
+../.qa
--- a/ceph/qa/suites/rgw/tools/centos_latest.yaml
+++ b/ceph/qa/suites/rgw/tools/centos_latest.yaml
@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
--- a/ceph/qa/suites/rgw/tools/cluster.yaml
+++ b/ceph/qa/suites/rgw/tools/cluster.yaml
@ -0,0 +1,9 @@
+roles:
+- [mon.a, osd.0, osd.1, osd.2, mgr.0, client.0]
+openstack:
+- volumes: # attached to each instance
+    count: 1
+    size: 10 # GB
+overrides:
+  rgw:
+    frontend: beast
--- a/ceph/qa/suites/rgw/tools/tasks.yaml
+++ b/ceph/qa/suites/rgw/tools/tasks.yaml
@ -0,0 +1,19 @@
+tasks:
+- install:
+- ceph:
+- rgw:
+    client.0:
+      # force rgw_dns_name to be set with the fully qualified host name;
+      # it will be appended to the empty string
+      dns-name: ''
+- workunit:
+    clients:
+       client.0:
+           - rgw/test_rgw_orphan_list.sh
+overrides:
+  ceph:
+    conf:
+      client:
+        debug rgw: 20
+        debug ms: 1
+        rgw enable static website: false
--- a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/.qa
+++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/7-final-workload/rbd-python.yaml
+++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/7-final-workload/rbd-python.yaml
@ -3,7 +3,7 @@ meta:
   librbd python api tests
 tasks:
 - workunit:
-    tag: v14.2.2
+    tag: v14.2.10
    clients:
      client.0:
        - rbd/test_librbd_python.sh
--- a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore
+++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore
@ -1 +0,0 @@
-../../../../objectstore
--- a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/.qa
+++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/.qa
@ -0,0 +1 @@
+../.qa/
--- a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/bluestore-bitmap.yaml
+++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/bluestore-bitmap.yaml
@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
--- a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/default.yaml
+++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/default.yaml
--- a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/filestore-xfs.yaml
+++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/filestore-xfs.yaml
@ -0,0 +1 @@
+.qa/objectstore/filestore-xfs.yaml
--- a/ceph/qa/tasks/ceph_fuse.py
+++ b/ceph/qa/tasks/ceph_fuse.py
@ -112,30 +112,35 @@ def task(ctx, config):
        if client_config is None:
            client_config = {}

+        auth_id = client_config.get("auth_id", id_)
+
        skip = client_config.get("skip", False)
        if skip:
            skipped[id_] = skip
            continue

        if id_ not in all_mounts:
-            fuse_mount = FuseMount(ctx, client_config, testdir, id_, remote)
+            fuse_mount = FuseMount(ctx, client_config, testdir, auth_id, remote)
            all_mounts[id_] = fuse_mount
        else:
            # Catch bad configs where someone has e.g. tried to use ceph-fuse and kcephfs for the same client
            assert isinstance(all_mounts[id_], FuseMount)

        if not config.get("disabled", False) and client_config.get('mounted', True):
-            mounted_by_me[id_] = all_mounts[id_]
+            mounted_by_me[id_] = {"config": client_config, "mount": all_mounts[id_]}

    ctx.mounts = all_mounts

    # Mount any clients we have been asked to (default to mount all)
    log.info('Mounting ceph-fuse clients...')
-    for mount in mounted_by_me.values():
-        mount.mount()
+    for info in mounted_by_me.values():
+        config = info["config"]
+        mount_path = config.get("mount_path")
+        mountpoint = config.get("mountpoint")
+        info["mount"].mount(mountpoint=mountpoint, mount_path=mount_path)

-    for mount in mounted_by_me.values():
-        mount.wait_until_mounted()
+    for info in mounted_by_me.values():
+        info["mount"].wait_until_mounted()

    # Umount any pre-existing clients that we have not been asked to mount
    for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()) - set(skipped.keys()):
@ -148,7 +153,8 @@ def task(ctx, config):
    finally:
        log.info('Unmounting ceph-fuse clients...')

-        for mount in mounted_by_me.values():
+        for info in mounted_by_me.values():
            # Conditional because an inner context might have umounted it
+            mount = info["mount"]
            if mount.is_mounted():
                mount.umount_wait()
--- a/ceph/qa/tasks/ceph_test_case.py
+++ b/ceph/qa/tasks/ceph_test_case.py
@ -1,4 +1,3 @@
-
 import unittest
 from unittest import case
 import time
@ -8,6 +7,8 @@ from teuthology.orchestra.run import CommandFailedError

 log = logging.getLogger(__name__)

+class TestTimeoutError(RuntimeError):
+    pass

 class CephTestCase(unittest.TestCase):
    """
@ -32,6 +33,8 @@ class CephTestCase(unittest.TestCase):
    REQUIRE_MEMSTORE = False

    def setUp(self):
+        self._mon_configs_set = set()
+
        self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
            "Starting test {0}".format(self.id()))

@ -43,12 +46,42 @@ class CephTestCase(unittest.TestCase):
                raise case.SkipTest("Require `memstore` OSD backend (test " \
                        "would take too long on full sized OSDs")

-
-
    def tearDown(self):
+        self.config_clear()
+
        self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
            "Ended test {0}".format(self.id()))

+    def config_clear(self):
+        for section, key in self._mon_configs_set:
+            self.config_rm(section, key)
+        self._mon_configs_set.clear()
+
+    def _fix_key(self, key):
+        return str(key).replace(' ', '_')
+
+    def config_get(self, section, key):
+       key = self._fix_key(key)
+       return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "get", section, key).strip()
+
+    def config_show(self, entity, key):
+       key = self._fix_key(key)
+       return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "show", entity, key).strip()
+
+    def config_minimal(self):
+       return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "generate-minimal-conf").strip()
+
+    def config_rm(self, section, key):
+       key = self._fix_key(key)
+       self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "rm", section, key)
+       # simplification: skip removing from _mon_configs_set;
+       # let tearDown clear everything again
+
+    def config_set(self, section, key, value):
+       key = self._fix_key(key)
+       self._mon_configs_set.add((section, key))
+       self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "set", section, key, str(value))
+
    def assert_cluster_log(self, expected_pattern, invert_match=False,
                           timeout=10, watch_channel=None):
        """
@ -142,7 +175,7 @@ class CephTestCase(unittest.TestCase):
                raise RuntimeError("wait_until_equal: forbidden value {0} seen".format(val))
            else:
                if elapsed >= timeout:
-                    raise RuntimeError("Timed out after {0} seconds waiting for {1} (currently {2})".format(
+                    raise TestTimeoutError("Timed out after {0} seconds waiting for {1} (currently {2})".format(
                        elapsed, expect_val, val
                    ))
                else:
@ -161,7 +194,7 @@ class CephTestCase(unittest.TestCase):
                return
            else:
                if elapsed >= timeout:
-                    raise RuntimeError("Timed out after {0}s".format(elapsed))
+                    raise TestTimeoutError("Timed out after {0}s".format(elapsed))
                else:
                    log.debug("wait_until_true: waiting...")
                time.sleep(period)
--- a/ceph/qa/tasks/cephfs/cephfs_test_case.py
+++ b/ceph/qa/tasks/cephfs/cephfs_test_case.py
@ -10,6 +10,7 @@ from tasks.cephfs.fuse_mount import FuseMount

 from teuthology.orchestra import run
 from teuthology.orchestra.run import CommandFailedError
+from teuthology.contextutil import safe_while


 log = logging.getLogger(__name__)
@ -169,8 +170,6 @@ class CephFSTestCase(CephTestCase):
        self.configs_set = set()

    def tearDown(self):
-        super(CephFSTestCase, self).tearDown()
-
        self.mds_cluster.clear_firewall()
        for m in self.mounts:
            m.teardown()
@ -181,6 +180,8 @@ class CephFSTestCase(CephTestCase):
        for subsys, key in self.configs_set:
            self.mds_cluster.clear_ceph_conf(subsys, key)

+        return super(CephFSTestCase, self).tearDown()
+
    def set_conf(self, subsys, key, value):
        self.configs_set.add((subsys, key))
        self.mds_cluster.set_ceph_conf(subsys, key, value)
@ -266,6 +267,10 @@ class CephFSTestCase(CephTestCase):
        if core_dir:  # Non-default core_pattern with a directory in it
            # We have seen a core_pattern that looks like it's from teuthology's coredump
            # task, so proceed to clear out the core file
+            if core_dir[0] == '|':
+                log.info("Piped core dumps to program {0}, skip cleaning".format(core_dir[1:]))
+                return;
+
            log.info("Clearing core from directory: {0}".format(core_dir))

            # Verify that we see the expected single coredump
@ -304,3 +309,11 @@ class CephFSTestCase(CephTestCase):
                return subtrees
            time.sleep(pause)
        raise RuntimeError("rank {0} failed to reach desired subtree state", rank)
+
+    def _wait_until_scrub_complete(self, path="/", recursive=True):
+        out_json = self.fs.rank_tell(["scrub", "start", path] + ["recursive"] if recursive else [])
+        with safe_while(sleep=10, tries=10) as proceed:
+            while proceed():
+                out_json = self.fs.rank_tell(["scrub", "status"])
+                if out_json['status'] == "no active scrubs running":
+                    break;
--- a/ceph/qa/tasks/cephfs/fuse_mount.py
+++ b/ceph/qa/tasks/cephfs/fuse_mount.py
@ -27,7 +27,9 @@ class FuseMount(CephFSMount):
        self.inst = None
        self.addr = None

-    def mount(self, mount_path=None, mount_fs_name=None):
+    def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None):
+        if mountpoint is not None:
+            self.mountpoint = mountpoint
        self.setupfs(name=mount_fs_name)

        try:
@ -51,14 +53,8 @@ class FuseMount(CephFSMount):
        log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format(
            id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))

-        self.client_remote.run(
-            args=[
-                'mkdir',
-                '--',
-                self.mountpoint,
-            ],
-            timeout=(15*60)
-        )
+        self.client_remote.run(args=['mkdir', '-p', self.mountpoint],
+                               timeout=(15*60), cwd=self.test_dir)

        run_cmd = [
            'sudo',
@ -83,6 +79,7 @@ class FuseMount(CephFSMount):
            self.mountpoint,
        ]

+        cwd = self.test_dir
        if self.client_config.get('valgrind') is not None:
            run_cmd = misc.get_valgrind_args(
                self.test_dir,
@ -90,17 +87,23 @@ class FuseMount(CephFSMount):
                run_cmd,
                self.client_config.get('valgrind'),
            )
+            cwd = None # misc.get_valgrind_args chdir for us

        run_cmd.extend(fuse_cmd)

        def list_connections():
+            from teuthology.misc import get_system_type
+
+            conn_dir = "/sys/fs/fuse/connections"
+
+            self.client_remote.run(args=['sudo', 'modprobe', 'fuse'],
+                                   check_status=False)
            self.client_remote.run(
-                args=["sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"],
-                check_status=False,
-                timeout=(15*60)
-            )
+                args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir],
+                check_status=False, timeout=(30))
+
            try:
-                ls_str = self.client_remote.sh("ls /sys/fs/fuse/connections",
+                ls_str = self.client_remote.sh("ls " + conn_dir,
                                               stdout=StringIO(),
                                               timeout=(15*60)).strip()
            except CommandFailedError:
@ -118,6 +121,7 @@ class FuseMount(CephFSMount):

        proc = self.client_remote.run(
            args=run_cmd,
+            cwd=cwd,
            logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)),
            stdin=run.PIPE,
            wait=False,
@ -186,6 +190,7 @@ class FuseMount(CephFSMount):
                '--',
                self.mountpoint,
            ],
+            cwd=self.test_dir,
            stdout=StringIO(),
            stderr=StringIO(),
            wait=False,
@ -231,7 +236,7 @@ class FuseMount(CephFSMount):
        # unrestricted access to the filesystem mount.
        try:
            stderr = StringIO()
-            self.client_remote.run(args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(15*60), stderr=stderr)
+            self.client_remote.run(args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(15*60), cwd=self.test_dir, stderr=stderr)
        except run.CommandFailedError:
            stderr = stderr.getvalue()
            if "Read-only file system".lower() in stderr.lower():
@ -240,7 +245,7 @@ class FuseMount(CephFSMount):
                raise

    def _mountpoint_exists(self):
-        return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False, timeout=(15*60)).exitstatus == 0
+        return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False, cwd=self.test_dir, timeout=(15*60)).exitstatus == 0

    def umount(self):
        try:
@ -252,6 +257,7 @@ class FuseMount(CephFSMount):
                    '-u',
                    self.mountpoint,
                ],
+                cwd=self.test_dir,
                timeout=(30*60),
            )
        except run.CommandFailedError:
@ -346,8 +352,10 @@ class FuseMount(CephFSMount):
                    '--',
                    self.mountpoint,
                ],
+                cwd=self.test_dir,
                stderr=stderr,
-                timeout=(60*5)
+                timeout=(60*5),
+                check_status=False,
            )
        except CommandFailedError:
            if "No such file or directory" in stderr.getvalue():
@ -396,6 +404,7 @@ class FuseMount(CephFSMount):
                '-rf',
                self.mountpoint,
            ],
+            cwd=self.test_dir,
            timeout=(60*5)
        )

--- a/ceph/qa/tasks/cephfs/kernel_mount.py
+++ b/ceph/qa/tasks/cephfs/kernel_mount.py
@ -26,20 +26,16 @@ class KernelMount(CephFSMount):
        self.ipmi_password = ipmi_password
        self.ipmi_domain = ipmi_domain

-    def mount(self, mount_path=None, mount_fs_name=None):
+    def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None):
+        if mountpoint is not None:
+            self.mountpoint = mountpoint
        self.setupfs(name=mount_fs_name)

        log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
            id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))

-        self.client_remote.run(
-            args=[
-                'mkdir',
-                '--',
-                self.mountpoint,
-            ],
-            timeout=(5*60),
-        )
+        self.client_remote.run(args=['mkdir', '-p', self.mountpoint],
+                               timeout=(5*60))

        if mount_path is None:
            mount_path = "/"
@ -187,6 +183,7 @@ class KernelMount(CephFSMount):
                self.mountpoint,
            ],
            timeout=(5*60),
+            check_status=False,
        )

    def _find_debug_dir(self):
--- a/ceph/qa/tasks/cephfs/mount.py
+++ b/ceph/qa/tasks/cephfs/mount.py
@ -28,6 +28,7 @@ class CephFSMount(object):
        self.client_id = client_id
        self.client_remote = client_remote
        self.mountpoint_dir_name = 'mnt.{id}'.format(id=self.client_id)
+        self._mountpoint = None
        self.fs = None

        self.test_files = ['a', 'b', 'c']
@ -36,8 +37,16 @@ class CephFSMount(object):

    @property
    def mountpoint(self):
-        return os.path.join(
+        if self._mountpoint == None:
+            self._mountpoint= os.path.join(
                self.test_dir, '{dir_name}'.format(dir_name=self.mountpoint_dir_name))
+        return self._mountpoint
+
+    @mountpoint.setter
+    def mountpoint(self, path):
+        if not isinstance(path, str):
+            raise RuntimeError('path should be of str type.')
+        self._mountpoint = path

    def is_mounted(self):
        raise NotImplementedError()
@ -51,7 +60,7 @@ class CephFSMount(object):
        self.fs.wait_for_daemons()
        log.info('Ready to start {}...'.format(type(self).__name__))

-    def mount(self, mount_path=None, mount_fs_name=None):
+    def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None):
        raise NotImplementedError()

    def umount(self):
@ -440,13 +449,14 @@ class CephFSMount(object):
            n = {count}
            abs_path = "{abs_path}"

-            if not os.path.exists(os.path.dirname(abs_path)):
-                os.makedirs(os.path.dirname(abs_path))
+            if not os.path.exists(abs_path):
+                os.makedirs(abs_path)

            handles = []
            for i in range(0, n):
-                fname = "{{0}}_{{1}}".format(abs_path, i)
-                handles.append(open(fname, 'w'))
+                fname = "file_"+str(i)
+                path = os.path.join(abs_path, fname)
+                handles.append(open(path, 'w'))

            while True:
                time.sleep(1)
--- a/ceph/qa/tasks/cephfs/test_client_limits.py
+++ b/ceph/qa/tasks/cephfs/test_client_limits.py
@ -8,6 +8,7 @@ import logging
 from textwrap import dedent
 from unittest import SkipTest
 from teuthology.orchestra.run import CommandFailedError
+from tasks.ceph_test_case import TestTimeoutError
 from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming
 from tasks.cephfs.fuse_mount import FuseMount
 import os
@ -39,20 +40,17 @@ class TestClientLimits(CephFSTestCase):
        :param use_subdir: whether to put test files in a subdir or use root
        """

-        cache_size = open_files // 2
+        self.config_set('mds', 'mds_cache_memory_limit', "1K")
+        self.config_set('mds', 'mds_recall_max_caps', int(open_files/2))
+        self.config_set('mds', 'mds_recall_warning_threshold', open_files)

-        self.set_conf('mds', 'mds cache size', cache_size)
-        self.set_conf('mds', 'mds_recall_max_caps', open_files // 2)
-        self.set_conf('mds', 'mds_recall_warning_threshold', open_files)
-        self.fs.mds_fail_restart()
-        self.fs.wait_for_daemons()
-
-        mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client"))
-        mds_recall_warning_decay_rate = float(self.fs.get_config("mds_recall_warning_decay_rate"))
-        self.assertTrue(open_files >= mds_min_caps_per_client)
+        mds_min_caps_per_client = int(self.config_get('mds.a', "mds_min_caps_per_client"))
+        self.config_set('mds', 'mds_min_caps_working_set', mds_min_caps_per_client)
+        mds_recall_warning_decay_rate = float(self.config_get('mds.a', "mds_recall_warning_decay_rate"))
+        self.assertGreaterEqual(open_files, mds_min_caps_per_client)

        mount_a_client_id = self.mount_a.get_global_id()
-        path = "subdir/mount_a" if use_subdir else "mount_a"
+        path = "subdir" if use_subdir else "."
        open_proc = self.mount_a.open_n_background(path, open_files)

        # Client should now hold:
@ -88,8 +86,6 @@ class TestClientLimits(CephFSTestCase):
            num_caps = self.get_session(mount_a_client_id)['num_caps']
            if num_caps <= mds_min_caps_per_client:
                return True
-            elif num_caps < cache_size:
-                return True
            else:
                return False

@ -107,6 +103,53 @@ class TestClientLimits(CephFSTestCase):
    def test_client_pin_mincaps(self):
        self._test_client_pin(True, 200)

+    def test_client_min_caps_working_set(self):
+        """
+        When a client has inodes pinned in its cache (open files), that the MDS
+        will not warn about the client not responding to cache pressure when
+        the number of caps is below mds_min_caps_working_set.
+        """
+
+        # Set MDS cache memory limit to a low value that will make the MDS to
+        # ask the client to trim the caps.
+        cache_memory_limit = "1K"
+        open_files = 400
+
+        self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit)
+        self.config_set('mds', 'mds_recall_max_caps', int(open_files/2))
+        self.config_set('mds', 'mds_recall_warning_threshold', open_files)
+        self.config_set('mds', 'mds_min_caps_working_set', open_files*2)
+
+        mds_min_caps_per_client = int(self.config_get('mds.a', "mds_min_caps_per_client"))
+        mds_recall_warning_decay_rate = float(self.config_get('mds.a', "mds_recall_warning_decay_rate"))
+        self.assertGreaterEqual(open_files, mds_min_caps_per_client)
+
+        mount_a_client_id = self.mount_a.get_global_id()
+        self.mount_a.open_n_background("subdir", open_files)
+
+        # Client should now hold:
+        # `open_files` caps for the open files
+        # 1 cap for root
+        # 1 cap for subdir
+        self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
+                              open_files + 2,
+                              timeout=600,
+                              reject_fn=lambda x: x > open_files + 2)
+
+        # We can also test that the MDS health warning for oversized
+        # cache is functioning as intended.
+        self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2)
+
+        try:
+            # MDS should not be happy about that but it's not sending
+            # MDS_CLIENT_RECALL warnings because the client's caps are below
+            # mds_min_caps_working_set.
+            self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2)
+        except TestTimeoutError:
+            pass
+        else:
+            raise RuntimeError("expected no client recall warning")
+
    def test_client_release_bug(self):
        """
        When a client has a bug (which we will simulate) preventing it from releasing caps,
@ -240,11 +283,9 @@ class TestClientLimits(CephFSTestCase):
        That the MDS will not let a client sit above mds_max_caps_per_client caps.
        """

-        mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client"))
+        mds_min_caps_per_client = int(self.config_get('mds.a', "mds_min_caps_per_client"))
        mds_max_caps_per_client = 2*mds_min_caps_per_client
-        self.set_conf('mds', 'mds_max_caps_per_client', mds_max_caps_per_client)
-        self.fs.mds_fail_restart()
-        self.fs.wait_for_daemons()
+        self.config_set('mds', 'mds_max_caps_per_client', mds_max_caps_per_client)

        self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True)

--- a/ceph/qa/tasks/cephfs/test_scrub.py
+++ b/ceph/qa/tasks/cephfs/test_scrub.py
@ -75,6 +75,9 @@ class BacktraceWorkload(Workload):
        self._filesystem.mds_asok(["flush", "journal"])
        self._filesystem._write_data_xattr(st['st_ino'], "parent", "")

+    def create_files(self, nfiles=1000):
+        self._mount.create_n_files("scrub-new-files/file", nfiles)
+

 class DupInodeWorkload(Workload):
    """
@ -89,7 +92,7 @@ class DupInodeWorkload(Workload):

    def damage(self):
        temp_bin_path = "/tmp/10000000000.00000000_omap.bin"
-        self._mount.umount()
+        self._mount.umount_wait()
        self._filesystem.mds_asok(["flush", "journal"])
        self._filesystem.mds_stop()
        self._filesystem.rados(["getomapval", "10000000000.00000000",
@ -144,6 +147,27 @@ class TestScrub(CephFSTestCase):
                errors[0].exception, errors[0].backtrace
            ))

+    def _get_damage_count(self, damage_type='backtrace'):
+        out_json = self.fs.rank_tell(["damage", "ls"])
+        self.assertNotEqual(out_json, None)
+
+        damage_count = 0
+        for it in out_json:
+            if it['damage_type'] == damage_type:
+                damage_count += 1
+        return damage_count
+
+    def _scrub_new_files(self, workload):
+        """
+        That scrubbing new files does not lead to errors
+        """
+        workload.create_files(1000)
+        self._wait_until_scrub_complete()
+        self.assertEqual(self._get_damage_count(), 0)
+
+    def test_scrub_backtrace_for_new_files(self):
+        self._scrub_new_files(BacktraceWorkload(self.fs, self.mount_a))
+
    def test_scrub_backtrace(self):
        self._scrub(BacktraceWorkload(self.fs, self.mount_a))

--- a/ceph/qa/tasks/cephfs/test_volumes.py
+++ b/ceph/qa/tasks/cephfs/test_volumes.py
@ -29,6 +29,9 @@ class TestVolumes(CephFSTestCase):
    def _fs_cmd(self, *args):
        return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args)

+    def _raw_cmd(self, *args):
+        return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args)
+
    def __check_clone_state(self, state, clone, clone_group=None, timo=120):
        check = 0
        args = ["clone", "status", self.volname, clone]
@ -105,28 +108,33 @@ class TestVolumes(CephFSTestCase):
        self._verify_clone_attrs(subvolume, clone, source_group=source_group, clone_group=clone_group)

    def _generate_random_volume_name(self, count=1):
-        r = random.sample(range(10000), count)
-        volumes = ["{0}_{1}".format(TestVolumes.TEST_VOLUME_PREFIX, c) for c in r]
+        n = self.volume_start
+        volumes = [f"{TestVolumes.TEST_VOLUME_PREFIX}_{i:016}" for i in range(n, n+count)]
+        self.volume_start += count
        return volumes[0] if count == 1 else volumes

    def _generate_random_subvolume_name(self, count=1):
-        r = random.sample(range(10000), count)
-        subvolumes = ["{0}_{1}".format(TestVolumes.TEST_SUBVOLUME_PREFIX, c) for c in r]
+        n = self.subvolume_start
+        subvolumes = [f"{TestVolumes.TEST_SUBVOLUME_PREFIX}_{i:016}" for i in range(n, n+count)]
+        self.subvolume_start += count
        return subvolumes[0] if count == 1 else subvolumes

    def _generate_random_group_name(self, count=1):
-        r = random.sample(range(100), count)
-        groups = ["{0}_{1}".format(TestVolumes.TEST_GROUP_PREFIX, c) for c in r]
+        n = self.group_start
+        groups = [f"{TestVolumes.TEST_GROUP_PREFIX}_{i:016}" for i in range(n, n+count)]
+        self.group_start += count
        return groups[0] if count == 1 else groups

    def _generate_random_snapshot_name(self, count=1):
-        r = random.sample(range(100), count)
-        snaps = ["{0}_{1}".format(TestVolumes.TEST_SNAPSHOT_PREFIX, c) for c in r]
+        n = self.snapshot_start
+        snaps = [f"{TestVolumes.TEST_SNAPSHOT_PREFIX}_{i:016}" for i in range(n, n+count)]
+        self.snapshot_start += count
        return snaps[0] if count == 1 else snaps

    def _generate_random_clone_name(self, count=1):
-        r = random.sample(range(1000), count)
-        clones = ["{0}_{1}".format(TestVolumes.TEST_CLONE_PREFIX, c) for c in r]
+        n = self.clone_start
+        clones = [f"{TestVolumes.TEST_CLONE_PREFIX}_{i:016}" for i in range(n, n+count)]
+        self.clone_start += count
        return clones[0] if count == 1 else clones

    def _enable_multi_fs(self):
@ -164,6 +172,14 @@ class TestVolumes(CephFSTestCase):
        subvol_md = self._fs_cmd(*args)
        return subvol_md

+    def _get_subvolume_snapshot_info(self, vol_name, subvol_name, snapname, group_name=None):
+        args = ["subvolume", "snapshot", "info", vol_name, subvol_name, snapname]
+        if group_name:
+            args.append(group_name)
+        args = tuple(args)
+        snap_md = self._fs_cmd(*args)
+        return snap_md
+
    def _delete_test_volume(self):
        self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")

@ -217,6 +233,12 @@ class TestVolumes(CephFSTestCase):
        self.vol_created = False
        self._enable_multi_fs()
        self._create_or_reuse_test_volume()
+        self.config_set('mon', 'mon_allow_pool_delete', True)
+        self.volume_start = random.randint(1, (1<<20))
+        self.subvolume_start = random.randint(1, (1<<20))
+        self.group_start = random.randint(1, (1<<20))
+        self.snapshot_start = random.randint(1, (1<<20))
+        self.clone_start = random.randint(1, (1<<20))

    def tearDown(self):
        if self.vol_created:
@ -302,6 +324,52 @@ class TestVolumes(CephFSTestCase):
        else:
            raise RuntimeError("expected the 'fs volume rm' command to fail.")

+    def test_volume_rm_arbitrary_pool_removal(self):
+        """
+        That the arbitrary pool added to the volume out of band is removed
+        successfully on volume removal.
+        """
+        new_pool = "new_pool"
+        # add arbitrary data pool
+        self.fs.add_data_pool(new_pool)
+        self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+        #check if fs is gone
+        volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+        volnames = [volume['name'] for volume in volumes]
+        self.assertNotIn(self.volname, volnames)
+
+        #check if osd pools are gone
+        pools = json.loads(self._raw_cmd("osd", "pool", "ls", "detail", "--format=json-pretty"))
+        for pool in pools:
+            self.assertNotIn(self.volname, pool["application_metadata"].keys())
+
+    def test_volume_rm_when_mon_delete_pool_false(self):
+        """
+        That the volume can only be removed when mon_allowd_pool_delete is set
+        to true and verify that the pools are removed after volume deletion.
+        """
+        self.config_set('mon', 'mon_allow_pool_delete', False)
+        try:
+            self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EPERM,
+                             "expected the 'fs volume rm' command to fail with EPERM, "
+                             "but it failed with {0}".format(ce.exitstatus))
+        self.config_set('mon', 'mon_allow_pool_delete', True)
+        self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+        #check if fs is gone
+        volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+        volnames = [volume['name'] for volume in volumes]
+        self.assertNotIn(self.volname, volnames,
+                         "volume {0} exists after removal".format(self.volname))
+        #check if pools are gone
+        pools = json.loads(self._raw_cmd("osd", "pool", "ls", "detail", "--format=json-pretty"))
+        for pool in pools:
+            self.assertNotIn(self.volname, pool["application_metadata"].keys(),
+                             "pool {0} exists after volume removal".format(pool["pool_name"]))
+
    ### basic subvolume operations

    def test_subvolume_create_and_rm(self):
@ -784,7 +852,7 @@ class TestVolumes(CephFSTestCase):

        subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
                     "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
-                     "type", "uid"]
+                     "type", "uid", "features"]

        # create subvolume
        subvolume = self._generate_random_subvolume_name()
@ -792,37 +860,34 @@ class TestVolumes(CephFSTestCase):

        # get subvolume metadata
        subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
-        if len(subvol_info) == 0:
-            raise RuntimeError("Expected the 'fs subvolume info' command to list metadata of subvolume")
+        self.assertNotEqual(len(subvol_info), 0, "expected the 'fs subvolume info' command to list metadata of subvolume")
        for md in subvol_md:
-            if md not in subvol_info.keys():
-                raise RuntimeError("%s not present in the metadata of subvolume" % md)
+            self.assertIn(md, subvol_info.keys(), "'{0}' key not present in metadata of subvolume".format(md))

-        if subvol_info["bytes_pcent"] != "undefined":
-            raise RuntimeError("bytes_pcent should be set to undefined if quota is not set")
+        self.assertEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set")
+        self.assertEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set")
+        self.assertEqual(subvol_info["pool_namespace"], "", "expected pool namespace to be empty")

-        if subvol_info["bytes_quota"] != "infinite":
-            raise RuntimeError("bytes_quota should be set to infinite if quota is not set")
-        self.assertEqual(subvol_info["pool_namespace"], "")
+        self.assertEqual(len(subvol_info["features"]), 2,
+                         msg="expected 2 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+        for feature in ['snapshot-clone', 'snapshot-autoprotect']:
+            self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))

        nsize = self.DEFAULT_FILE_SIZE*1024*1024
-        try:
        self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize))
-        except CommandFailedError:
-            raise RuntimeError("expected the 'fs subvolume resize' command to succeed")

        # get subvolume metadata after quota set
        subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
-        if len(subvol_info) == 0:
-            raise RuntimeError("Expected the 'fs subvolume info' command to list metadata of subvolume")
-        if subvol_info["bytes_pcent"] == "undefined":
-            raise RuntimeError("bytes_pcent should not be set to undefined if quota is set")
+        self.assertNotEqual(len(subvol_info), 0, "expected the 'fs subvolume info' command to list metadata of subvolume")

-        if subvol_info["bytes_quota"] == "infinite":
-            raise RuntimeError("bytes_quota should not be set to infinite if quota is set")
+        self.assertNotEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is not set")
+        self.assertNotEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should not be set to infinite if quota is not set")
+        self.assertEqual(subvol_info["type"], "subvolume", "type should be set to subvolume")

-        if subvol_info["type"] != "subvolume":
-            raise RuntimeError("type should be set to subvolume")
+        self.assertEqual(len(subvol_info["features"]), 2,
+                         msg="expected 2 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+        for feature in ['snapshot-clone', 'snapshot-autoprotect']:
+            self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))

        # remove subvolumes
        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
@ -850,18 +915,12 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)

        # check clone status
        self._wait_for_clone_to_complete(clone)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

@ -904,8 +963,7 @@ class TestVolumes(CephFSTestCase):
        self._fs_cmd("subvolumegroup", "rm", self.volname, group)

    def test_subvolume_group_create_with_desired_data_pool_layout(self):
-        group1 = self._generate_random_group_name()
-        group2 = self._generate_random_group_name()
+        group1, group2 = self._generate_random_group_name(2)

        # create group
        self._fs_cmd("subvolumegroup", "create", self.volname, group1)
@ -966,8 +1024,7 @@ class TestVolumes(CephFSTestCase):
            raise RuntimeError("expected the 'fs subvolumegroup getpath' command to fail")

    def test_subvolume_create_with_desired_data_pool_layout_in_group(self):
-        subvol1 = self._generate_random_subvolume_name()
-        subvol2 = self._generate_random_subvolume_name()
+        subvol1, subvol2 = self._generate_random_subvolume_name(2)
        group = self._generate_random_group_name()

        # create group. this also helps set default pool layout for subvolumes
@ -998,8 +1055,7 @@ class TestVolumes(CephFSTestCase):
        self._fs_cmd("subvolumegroup", "rm", self.volname, group)

    def test_subvolume_group_create_with_desired_mode(self):
-        group1 = self._generate_random_group_name()
-        group2 = self._generate_random_group_name()
+        group1, group2 = self._generate_random_group_name(2)
        # default mode
        expected_mode1 = "755"
        # desired mode
@ -1047,9 +1103,8 @@ class TestVolumes(CephFSTestCase):
        self._fs_cmd("subvolumegroup", "rm", self.volname, subvolgroupname)

    def test_subvolume_create_with_desired_mode_in_group(self):
-        subvol1 = self._generate_random_subvolume_name()
-        subvol2 = self._generate_random_subvolume_name()
-        subvol3 = self._generate_random_subvolume_name()
+        subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3)
+
        group = self._generate_random_group_name()
        # default mode
        expected_mode1 = "755"
@ -1184,6 +1239,42 @@ class TestVolumes(CephFSTestCase):
        # verify trash dir is clean
        self._wait_for_trash_empty()

+    def test_subvolume_snapshot_info(self):
+
+        """
+        tests the 'fs subvolume snapshot info' command
+        """
+
+        snap_metadata = ["created_at", "data_pool", "has_pending_clones", "size"]
+
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=1)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot))
+        self.assertNotEqual(len(snap_info), 0)
+        for md in snap_metadata:
+            if md not in snap_info:
+                raise RuntimeError("%s not present in the metadata of subvolume snapshot" % md)
+        self.assertEqual(snap_info["has_pending_clones"], "no")
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
    def test_subvolume_snapshot_create_idempotence(self):
        subvolume = self._generate_random_subvolume_name()
        snapshot = self._generate_random_snapshot_name()
@ -1503,88 +1594,11 @@ class TestVolumes(CephFSTestCase):
        # verify trash dir is clean
        self._wait_for_trash_empty()

-    def test_subvolume_snapshot_protect_unprotect(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-
-        # create subvolume
-        self._fs_cmd("subvolume", "create", self.volname, subvolume)
-
-        # protect a nonexistent snapshot
-        try:
-            self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-        except CommandFailedError as ce:
-            if ce.exitstatus != errno.ENOENT:
-                raise RuntimeError("invalid error code when protecting a non-existing snapshot")
-        else:
-            raise RuntimeError("expected protection of non existent snapshot to fail")
-
-        # snapshot subvolume
-        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
-
-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
-        # protecting snapshot again, should return EEXIST
-        try:
-            self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-        except CommandFailedError as ce:
-            if ce.exitstatus != errno.EEXIST:
-                raise RuntimeError("invalid error code when protecting a protected snapshot")
-        else:
-            raise RuntimeError("expected protection of already protected snapshot to fail")
-
-        # remove snapshot should fail since the snapshot is protected
-        try:
-            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
-        except CommandFailedError as ce:
-            if ce.exitstatus != errno.EINVAL:
-                raise RuntimeError("invalid error code when removing a protected snapshot")
-        else:
-            raise RuntimeError("expected removal of protected snapshot to fail")
-
-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
-        # remove snapshot
-        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
-
-        # remove subvolume
-        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
-
-        # verify trash dir is clean
-        self._wait_for_trash_empty()
-
-    def test_subvolume_snapshot_clone_unprotected_snapshot(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
-
-        # create subvolume
-        self._fs_cmd("subvolume", "create", self.volname, subvolume)
-
-        # snapshot subvolume
-        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
-
-        # clone a non protected snapshot
-        try:
-            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
-        except CommandFailedError as ce:
-            if ce.exitstatus != errno.EINVAL:
-                raise RuntimeError("invalid error code when cloning a non protected snapshot")
-        else:
-            raise RuntimeError("expected cloning of unprotected snapshot to fail")
-
-        # remove snapshot
-        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
-
-        # remove subvolumes
-        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
-
-        # verify trash dir is clean
-        self._wait_for_trash_empty()
-
-    def test_subvolume_snapshot_clone(self):
+    def test_subvolume_snapshot_protect_unprotect_sanity(self):
+        """
+        Snapshot protect/unprotect commands are deprecated. This test exists to ensure that
+        invoking the command does not cause errors, till they are removed from a subsequent release.
+        """
        subvolume = self._generate_random_subvolume_name()
        snapshot = self._generate_random_snapshot_name()
        clone = self._generate_random_clone_name()
@ -1604,15 +1618,6 @@ class TestVolumes(CephFSTestCase):
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)

-        # unprotecting when a clone is in progress should fail
-        try:
-            self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-        except CommandFailedError as ce:
-            if ce.exitstatus != errno.EEXIST:
-                raise RuntimeError("invalid error code when unprotecting snapshot during clone")
-        else:
-            raise RuntimeError("expected unprotecting a snapshot to fail since it has pending clones")
-
        # check clone status
        self._wait_for_clone_to_complete(clone)

@ -1632,6 +1637,39 @@ class TestVolumes(CephFSTestCase):
        # verify trash dir is clean
        self._wait_for_trash_empty()

+    def test_subvolume_snapshot_clone(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=64)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # verify clone
+        self._verify_clone(subvolume, clone)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
    def test_subvolume_snapshot_clone_pool_layout(self):
        subvolume = self._generate_random_subvolume_name()
        snapshot = self._generate_random_snapshot_name()
@ -1650,18 +1688,12 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, "--pool_layout", new_pool)

        # check clone status
        self._wait_for_clone_to_complete(clone)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

@ -1697,18 +1729,12 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)

        # check clone status
        self._wait_for_clone_to_complete(clone)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

@ -1736,18 +1762,12 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)

        # check clone status
        self._wait_for_clone_to_complete(clone1)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

@ -1761,18 +1781,12 @@ class TestVolumes(CephFSTestCase):
        # snapshot clone -- use same snap name
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone1, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, clone1, snapshot)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, clone1, snapshot, clone2)

        # check clone status
        self._wait_for_clone_to_complete(clone2)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, clone1, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone1, snapshot)

@ -1802,9 +1816,6 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # create group
        self._fs_cmd("subvolumegroup", "create", self.volname, group)

@ -1814,9 +1825,6 @@ class TestVolumes(CephFSTestCase):
        # check clone status
        self._wait_for_clone_to_complete(clone, clone_group=group)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

@ -1851,18 +1859,12 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot, group)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--group_name', group)

        # check clone status
        self._wait_for_clone_to_complete(clone)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot, group)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)

@ -1898,9 +1900,6 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, s_group)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot, s_group)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone,
                     '--group_name', s_group, '--target_group_name', c_group)
@ -1908,9 +1907,6 @@ class TestVolumes(CephFSTestCase):
        # check clone status
        self._wait_for_clone_to_complete(clone, clone_group=c_group)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot, s_group)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, s_group)

@ -1943,23 +1939,25 @@ class TestVolumes(CephFSTestCase):
        self.mount_a.run_shell(['mkdir', '-p', createpath])

        # do some IO
-        self._do_subvolume_io(subvolume, number_of_files=32)
+        self._do_subvolume_io(subvolume, number_of_files=64)

        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)

+        # snapshot should not be deletable now
+        try:
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone")
+        else:
+            self.fail("expected removing source snapshot of a clone to fail")
+
        # check clone status
        self._wait_for_clone_to_complete(clone)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

@ -1987,9 +1985,6 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)

@ -1998,7 +1993,7 @@ class TestVolumes(CephFSTestCase):
            self._get_subvolume_path(self.volname, clone)
        except CommandFailedError as ce:
            if ce.exitstatus != errno.EAGAIN:
-                raise RuntimeError("invalid error code when cloning a non protected snapshot")
+                raise RuntimeError("invalid error code when fetching path of an pending clone")
        else:
            raise RuntimeError("expected fetching path of an pending clone to fail")

@ -2009,8 +2004,50 @@ class TestVolumes(CephFSTestCase):
        subvolpath = self._get_subvolume_path(self.volname, clone)
        self.assertNotEqual(subvolpath, None)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # verify clone
+        self._verify_clone(subvolume, clone)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_clone_in_progress_snapshot_rm(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=64)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # snapshot should not be deletable now
+        try:
+            self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone")
+        else:
+            self.fail("expected removing source snapshot of a clone to fail")
+
+        # check clone status
+        self._wait_for_clone_to_complete(clone)
+
+        # clone should be accessible now
+        subvolpath = self._get_subvolume_path(self.volname, clone)
+        self.assertNotEqual(subvolpath, None)

        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
@ -2039,9 +2076,6 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)

@ -2060,9 +2094,6 @@ class TestVolumes(CephFSTestCase):
        subvolpath = self._get_subvolume_path(self.volname, clone)
        self.assertNotEqual(subvolpath, None)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

@ -2111,9 +2142,6 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume1, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume1, snapshot)
-
        # schedule a clone with target as subvolume2
        try:
            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, subvolume2)
@ -2137,9 +2165,6 @@ class TestVolumes(CephFSTestCase):
        # check clone status
        self._wait_for_clone_to_complete(clone)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume1, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, snapshot)

@ -2172,9 +2197,6 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # add data pool
        new_pool = "new_pool"
        self.fs.add_data_pool(new_pool)
@ -2200,9 +2222,6 @@ class TestVolumes(CephFSTestCase):
        # check clone status
        self._wait_for_clone_to_fail(clone2)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

@ -2237,18 +2256,12 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)

        # check clone status
        self._wait_for_clone_to_complete(clone)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

@ -2276,9 +2289,6 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # schedule a clone
        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)

@ -2288,9 +2298,6 @@ class TestVolumes(CephFSTestCase):
        # verify canceled state
        self._check_clone_canceled(clone)

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

@ -2330,9 +2337,6 @@ class TestVolumes(CephFSTestCase):
        # snapshot subvolume
        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)

-        # now, protect snapshot
-        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
-
        # schedule clones
        for clone in clones:
            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
@ -2358,9 +2362,6 @@ class TestVolumes(CephFSTestCase):
                if ce.exitstatus != errno.EINVAL:
                    raise RuntimeError("invalid error code when cancelling on-going clone")

-        # now, unprotect snapshot
-        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
-
        # remove snapshot
        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)

--- a/ceph/qa/tasks/mgr/dashboard/helper.py
+++ b/ceph/qa/tasks/mgr/dashboard/helper.py
@ -149,6 +149,7 @@ class DashboardTestCase(MgrTestCase):
            cls.login('admin', 'admin')

    def setUp(self):
+        super(DashboardTestCase, self).setUp()
        if not self._loggedin and self.AUTO_AUTHENTICATE:
            self.login('admin', 'admin')
        self.wait_for_health_clear(20)
--- a/ceph/qa/tasks/mgr/dashboard/test_auth.py
+++ b/ceph/qa/tasks/mgr/dashboard/test_auth.py
@ -14,6 +14,7 @@ class AuthTest(DashboardTestCase):
    AUTO_AUTHENTICATE = False

    def setUp(self):
+        super(AuthTest, self).setUp()
        self.reset_session()

    def _validate_jwt_token(self, token, username, permissions):
--- a/ceph/qa/tasks/mgr/dashboard/test_health.py
+++ b/ceph/qa/tasks/mgr/dashboard/test_health.py
@ -19,6 +19,41 @@ class HealthTest(DashboardTestCase):
        'statuses': JObj({}, allow_unknown=True, unknown_schema=int)
    })

+    __mdsmap_schema = JObj({
+        'session_autoclose': int,
+        'balancer': str,
+        'up': JObj({}, allow_unknown=True),
+        'last_failure_osd_epoch': int,
+        'in': JList(int),
+        'last_failure': int,
+        'max_file_size': int,
+        'explicitly_allowed_features': int,
+        'damaged': JList(int),
+        'tableserver': int,
+        'failed': JList(int),
+        'metadata_pool': int,
+        'epoch': int,
+        'stopped': JList(int),
+        'max_mds': int,
+        'compat': JObj({
+            'compat': JObj({}, allow_unknown=True),
+            'ro_compat': JObj({}, allow_unknown=True),
+            'incompat': JObj({}, allow_unknown=True)
+        }),
+        'min_compat_client': str,
+        'data_pools': JList(int),
+        'info': JObj({}, allow_unknown=True),
+        'fs_name': str,
+        'created': str,
+        'standby_count_wanted': int,
+        'enabled': bool,
+        'modified': str,
+        'session_timeout': int,
+        'flags': int,
+        'ever_allowed_features': int,
+        'root': int
+    })
+
    def test_minimal_health(self):
        data = self._get('/api/health/minimal')
        self.assertStatus(200)
@ -40,18 +75,10 @@ class HealthTest(DashboardTestCase):
            'fs_map': JObj({
                'filesystems': JList(
                    JObj({
-                        'mdsmap': JObj({
-                            'info': JObj(
-                                {},
-                                allow_unknown=True,
-                                unknown_schema=JObj({
-                                    'state': str
-                                })
-                            )
-                        })
+                        'mdsmap': self.__mdsmap_schema
                    }),
                ),
-                'standbys': JList(JObj({})),
+                'standbys': JList(JObj({}, allow_unknown=True)),
            }),
            'health': JObj({
                'checks': JList(str),
@ -164,16 +191,7 @@ class HealthTest(DashboardTestCase):
                'filesystems': JList(
                    JObj({
                        'id': int,
-                        'mdsmap': JObj({
-                            # TODO: Expand mdsmap schema
-                            'info': JObj(
-                                {},
-                                allow_unknown=True,
-                                unknown_schema=JObj({
-                                    'state': str
-                                }, allow_unknown=True)
-                            )
-                        }, allow_unknown=True)
+                        'mdsmap': self.__mdsmap_schema
                    }),
                ),
                'standbys': JList(JObj({}, allow_unknown=True)),
--- a/ceph/qa/tasks/mgr/dashboard/test_pool.py
+++ b/ceph/qa/tasks/mgr/dashboard/test_pool.py
@ -2,9 +2,9 @@
 from __future__ import absolute_import

 import logging
-
 import six
 import time
+from contextlib import contextmanager

 from .helper import DashboardTestCase, JAny, JList, JObj

@ -37,30 +37,68 @@ class PoolTest(DashboardTestCase):
        'wr': pool_list_stat_schema,
    }, allow_unknown=True)

-    def _pool_create(self, data):
-        try:
-            self._task_post('/api/pool/', data)
+    pool_rbd_conf_schema = JList(JObj(sub_elems={
+        'name': str,
+        'value': str,
+        'source': int
+    }))
+
+    @contextmanager
+    def __create_pool(self, name, data=None):
+        pool_data = data or {
+            'pool': name,
+            'pg_num': '4',
+            'pool_type': 'replicated',
+            'compression_algorithm': 'snappy',
+            'compression_mode': 'passive',
+            'compression_max_blob_size': '131072',
+            'compression_required_ratio': '0.875',
+            'application_metadata': ['rbd'],
+            'configuration': {
+                'rbd_qos_bps_limit': 1024000,
+                'rbd_qos_iops_limit': 5000,
+            }
+        }
+        self._task_post('/api/pool/', pool_data)
        self.assertStatus(201)
-
-            self._check_pool_properties(data)
-
-            self._task_delete("/api/pool/" + data['pool'])
+        time.sleep(5)
+        self._validate_pool_properties(pool_data, self._get_pool(name))
+        yield pool_data
+        self._task_delete('/api/pool/' + name)
        self.assertStatus(204)
-        except Exception:
-            log.exception("test_pool_create: data=%s", data)
-            raise

-    def _check_pool_properties(self, data, pool_name=None):
-        if not pool_name:
-            pool_name = data['pool']
-        pool = self._get_pool(pool_name)
-        try:
-            for k, v in data.items():
-                self._check_pool_property(k, v, pool)
-
-        except Exception:
-            log.exception("test_pool_create: pool=%s", pool)
-            raise
+    def _validate_pool_properties(self, data, pool):
+        for prop, value in data.items():
+            if prop == 'pool_type':
+                self.assertEqual(pool['type'], value)
+            elif prop == 'size':
+                self.assertEqual(pool[prop], int(value),
+                                 '{}: {} != {}'.format(prop, pool[prop], value))
+            elif prop == 'pg_num':
+                self._check_pg_num(value, pool)
+            elif prop == 'application_metadata':
+                self.assertIsInstance(pool[prop], list)
+                self.assertEqual(value, pool[prop])
+            elif prop == 'pool':
+                self.assertEqual(pool['pool_name'], value)
+            elif prop.startswith('compression'):
+                if value is not None:
+                    if prop.endswith('size'):
+                        value = int(value)
+                    elif prop.endswith('ratio'):
+                        value = float(value)
+                    self.assertEqual(pool['options'][prop], value)
+                else:
+                    self.assertEqual(pool['options'], {})
+            elif prop == 'configuration':
+                # configuration cannot really be checked here for two reasons:
+                #   1.  The default value cannot be given to this method, which becomes relevant
+                #       when resetting a value, because it's not always zero.
+                #   2.  The expected `source` cannot be given to this method, and it cannot
+                #       relibably be determined (see 1)
+                pass
+            else:
+                self.assertEqual(pool[prop], value, '{}: {} != {}'.format(prop, pool[prop], value))

        health = self._get('/api/health/minimal')['health']
        self.assertEqual(health['status'], 'HEALTH_OK', msg='health={}'.format(health))
@ -71,49 +109,27 @@ class PoolTest(DashboardTestCase):
        self.assertSchemaBody(self.pool_schema)
        return pool

-    def _check_pool_property(self, prop, value, pool):
-        if prop == 'pool_type':
-            self.assertEqual(pool['type'], value)
-        elif prop == 'size':
-            self.assertEqual(pool[prop], int(value), '{}: {} != {}'.format(prop, pool[prop], value))
-        elif prop == 'pg_num':
-            self._check_pg_num(value, pool)
-        elif prop == 'application_metadata':
-            self.assertIsInstance(pool[prop], list)
-            self.assertEqual(pool[prop], value)
-        elif prop == 'pool':
-            self.assertEqual(pool['pool_name'], value)
-        elif prop.startswith('compression'):
-            if value is not None:
-                if prop.endswith('size'):
-                    value = int(value)
-                elif prop.endswith('ratio'):
-                    value = float(value)
-            self.assertEqual(pool['options'].get(prop), value)
-        else:
-            self.assertEqual(pool[prop], value, '{}: {} != {}'.format(prop, pool[prop], value))
-
    def _check_pg_num(self, value, pool):
-        # If both properties have not the same value, the cluster goes into a warning state,
-        # which will only happen during a pg update on a existing pool.
-        # The test that does that is currently commented out because
-        # our QA systems can't deal with the change.
-        # Feel free to test it locally.
-        prop = 'pg_num'
+        """
+        If both properties have not the same value, the cluster goes into a warning state, which
+        will only happen during a pg update on an existing pool. The test that does that is
+        currently commented out because our QA systems can't deal with the change. Feel free to test
+        it locally.
+        """
        pgp_prop = 'pg_placement_num'
-        health = lambda: self._get('/api/health/minimal')['health']['status'] == 'HEALTH_OK'
-        t = 0;
-        while (int(value) != pool[pgp_prop] or not health()) and t < 180:
+        t = 0
+        while (int(value) != pool[pgp_prop] or self._get('/api/health/minimal')['health']['status']
+               != 'HEALTH_OK') and t < 180:
            time.sleep(2)
            t += 2
            pool = self._get_pool(pool['pool_name'])
-        for p in [prop, pgp_prop]:  # Should have the same values
+        for p in ['pg_num', pgp_prop]:  # Should have the same values
            self.assertEqual(pool[p], int(value), '{}: {} != {}'.format(p, pool[p], value))

    @classmethod
    def tearDownClass(cls):
        super(PoolTest, cls).tearDownClass()
-        for name in ['dashboard_pool1', 'dashboard_pool2', 'dashboard_pool3', 'dashboard_pool_update1']:
+        for name in ['dashboard_pool1', 'dashboard_pool2', 'dashboard_pool3']:
            cls._ceph_cmd(['osd', 'pool', 'delete', name, name, '--yes-i-really-really-mean-it'])
        cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecprofile'])

@ -188,23 +204,40 @@ class PoolTest(DashboardTestCase):
        self.assertNotIn('pg_status', pool)
        self.assertSchema(pool['stats'], self.pool_list_stats_schema)
        self.assertNotIn('flags_names', pool)
+        self.assertSchema(pool['configuration'], self.pool_rbd_conf_schema)

    def test_pool_create(self):
        self._ceph_cmd(['osd', 'crush', 'rule', 'create-erasure', 'ecrule'])
        self._ceph_cmd(
            ['osd', 'erasure-code-profile', 'set', 'ecprofile', 'crush-failure-domain=osd'])
-        pools = [{
+
+        pool = {
            'pool': 'dashboard_pool1',
            'pg_num': '32',
            'pool_type': 'replicated',
            'application_metadata': ['rbd', 'sth'],
-        }, {
+        }
+        self._task_post('/api/pool/', pool)
+        self.assertStatus(201)
+        self._validate_pool_properties(pool, self._get_pool(pool['pool']))
+        self._task_delete("/api/pool/" + pool['pool'])
+        self.assertStatus(204)
+
+        pool = {
            'pool': 'dashboard_pool2',
            'pg_num': '32',
            'pool_type': 'erasure',
+            'application_metadata': ['rbd'],
            'erasure_code_profile': 'ecprofile',
            'crush_rule': 'ecrule',
-        }, {
+        }
+        self._task_post('/api/pool/', pool)
+        self.assertStatus(201)
+        self._validate_pool_properties(pool, self._get_pool(pool['pool']))
+        self._task_delete("/api/pool/" + pool['pool'])
+        self.assertStatus(204)
+
+        pool = {
            'pool': 'dashboard_pool3',
            'pg_num': '32',
            'pool_type': 'replicated',
@ -212,64 +245,99 @@ class PoolTest(DashboardTestCase):
            'compression_mode': 'aggressive',
            'compression_max_blob_size': '10000000',
            'compression_required_ratio': '0.8',
-        }]
-        for data in pools:
-            self._pool_create(data)
-
-    def test_update(self):
-        pool = {
-            'pool': 'dashboard_pool_update1',
-            'pg_num': '32',
-            'pool_type': 'replicated',
-            'compression_mode': 'passive',
-            'compression_algorithm': 'snappy',
-            'compression_max_blob_size': '131072',
-            'compression_required_ratio': '0.875',
+            'configuration': {
+                'rbd_qos_bps_limit': 2048,
+                'rbd_qos_iops_limit': None,
+            },
        }
-        updates = [
-            {
-                'application_metadata': ['rbd', 'sth'],
-            },
-            # The following test case is currently commented out because
-            # our QA systems can't deal with the change and will fail because
-            # they can't recover from the resulting warning state.
-            # Feel free to test it locally.
-            # {
-            #     'pg_num': '2', # Decrease PGs
-            # },
-            # {
-            #     'pg_num': '8', # Increase PGs
-            # },
-            {
-                'application_metadata': ['rgw'],
-            },
-            {
+        expected_configuration = [{
+            'name': 'rbd_qos_bps_limit',
+            'source': 1,
+            'value': '2048',
+        }, {
+            'name': 'rbd_qos_iops_limit',
+            'source': 0,
+            'value': '0',
+        }]
+        self._task_post('/api/pool/', pool)
+        self.assertStatus(201)
+        new_pool = self._get_pool(pool['pool'])
+        self._validate_pool_properties(pool, new_pool)
+        for conf in expected_configuration:
+            self.assertIn(conf, new_pool['configuration'])
+
+        self._task_delete("/api/pool/" + pool['pool'])
+        self.assertStatus(204)
+
+    def test_pool_update_metadata(self):
+        pool_name = 'pool_update_metadata'
+        with self.__create_pool(pool_name):
+            props = {'application_metadata': ['rbd', 'sth']}
+            self._task_put('/api/pool/{}'.format(pool_name), props)
+            time.sleep(5)
+            self._validate_pool_properties(props, self._get_pool(pool_name))
+
+            properties = {'application_metadata': ['rgw']}
+            self._task_put('/api/pool/' + pool_name, properties)
+            time.sleep(5)
+            self._validate_pool_properties(properties, self._get_pool(pool_name))
+
+            properties = {'application_metadata': ['rbd', 'sth']}
+            self._task_put('/api/pool/' + pool_name, properties)
+            time.sleep(5)
+            self._validate_pool_properties(properties, self._get_pool(pool_name))
+
+            properties = {'application_metadata': ['rgw']}
+            self._task_put('/api/pool/' + pool_name, properties)
+            time.sleep(5)
+            self._validate_pool_properties(properties, self._get_pool(pool_name))
+
+    def test_pool_update_configuration(self):
+        pool_name = 'pool_update_configuration'
+        with self.__create_pool(pool_name):
+            configuration = {
+                'rbd_qos_bps_limit': 1024,
+                'rbd_qos_iops_limit': None,
+            }
+            expected_configuration = [{
+                'name': 'rbd_qos_bps_limit',
+                'source': 1,
+                'value': '1024',
+            }, {
+                'name': 'rbd_qos_iops_limit',
+                'source': 0,
+                'value': '0',
+            }]
+            self._task_put('/api/pool/' + pool_name, {'configuration': configuration})
+            time.sleep(5)
+            pool_config = self._get_pool(pool_name)['configuration']
+            for conf in expected_configuration:
+                self.assertIn(conf, pool_config)
+
+    def test_pool_update_compression(self):
+        pool_name = 'pool_update_compression'
+        with self.__create_pool(pool_name):
+            properties = {
                'compression_algorithm': 'zstd',
                'compression_mode': 'aggressive',
                'compression_max_blob_size': '10000000',
                'compression_required_ratio': '0.8',
-            },
-            {
-                'compression_mode': 'unset'
            }
-        ]
-        self._task_post('/api/pool/', pool)
-        self.assertStatus(201)
-        self._check_pool_properties(pool)
+            self._task_put('/api/pool/' + pool_name, properties)
+            time.sleep(5)
+            self._validate_pool_properties(properties, self._get_pool(pool_name))

-        for update in updates:
-            self._task_put('/api/pool/' + pool['pool'], update)
-            if update.get('compression_mode') == 'unset':
-                update = {
-                    'compression_mode': None,
+    def test_pool_update_unset_compression(self):
+        pool_name = 'pool_update_unset_compression'
+        with self.__create_pool(pool_name):
+            self._task_put('/api/pool/' + pool_name, {'compression_mode': 'unset'})
+            time.sleep(5)
+            self._validate_pool_properties({
                'compression_algorithm': None,
                'compression_mode': None,
                'compression_max_blob_size': None,
                'compression_required_ratio': None,
-                }
-            self._check_pool_properties(update, pool_name=pool['pool'])
-        self._task_delete("/api/pool/" + pool['pool'])
-        self.assertStatus(204)
+            }, self._get_pool(pool_name))

    def test_pool_create_fail(self):
        data = {'pool_type': u'replicated', 'rule_name': u'dnf', 'pg_num': u'8', 'pool': u'sadfs'}
--- a/ceph/qa/tasks/mgr/dashboard/test_rbd.py
+++ b/ceph/qa/tasks/mgr/dashboard/test_rbd.py
@ -188,6 +188,8 @@ class RbdTest(DashboardTestCase):
            'block_name_prefix': JLeaf(str),
            'name': JLeaf(str),
            'id': JLeaf(str),
+            'unique_id': JLeaf(str),
+            'image_format': JLeaf(int),
            'pool_name': JLeaf(str),
            'features': JLeaf(int),
            'features_name': JList(JLeaf(str)),
--- a/ceph/qa/tasks/mgr/dashboard/test_rgw.py
+++ b/ceph/qa/tasks/mgr/dashboard/test_rgw.py
@ -67,6 +67,7 @@ class RgwApiCredentialsTest(RgwTestCase):
    AUTH_ROLES = ['rgw-manager']

    def setUp(self):
+        super(RgwApiCredentialsTest, self).setUp()
        # Restart the Dashboard module to ensure that the connection to the
        # RGW Admin Ops API is re-established with the new credentials.
        self.logout()
--- a/ceph/qa/tasks/mgr/dashboard/test_settings.py
+++ b/ceph/qa/tasks/mgr/dashboard/test_settings.py
@ -7,6 +7,7 @@ from .helper import DashboardTestCase, JList, JObj, JAny

 class SettingsTest(DashboardTestCase):
    def setUp(self):
+        super(SettingsTest, self).setUp()
        self.settings = self._get('/api/settings')

    def tearDown(self):
--- a/ceph/qa/tasks/mgr/test_crash.py
+++ b/ceph/qa/tasks/mgr/test_crash.py
@ -13,6 +13,7 @@ DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
 class TestCrash(MgrTestCase):

    def setUp(self):
+        super(TestCrash, self).setUp()
        self.setup_mgrs()
        self._load_module('crash')

--- a/ceph/qa/tasks/mgr/test_failover.py
+++ b/ceph/qa/tasks/mgr/test_failover.py
@ -12,6 +12,7 @@ class TestFailover(MgrTestCase):
    MGRS_REQUIRED = 2

    def setUp(self):
+        super(TestFailover, self).setUp()
        self.setup_mgrs()

    def test_timeout(self):
--- a/ceph/qa/tasks/mgr/test_insights.py
+++ b/ceph/qa/tasks/mgr/test_insights.py
@ -12,6 +12,7 @@ DATEFMT = '%Y-%m-%d %H:%M:%S.%f'

 class TestInsights(MgrTestCase):
    def setUp(self):
+        super(TestInsights, self).setUp()
        self.setup_mgrs()
        self._load_module("insights")
        self._load_module("selftest")
--- a/ceph/qa/tasks/mgr/test_module_selftest.py
+++ b/ceph/qa/tasks/mgr/test_module_selftest.py
@ -24,6 +24,7 @@ class TestModuleSelftest(MgrTestCase):
    MGRS_REQUIRED = 1

    def setUp(self):
+        super(TestModuleSelftest, self).setUp()
        self.setup_mgrs()

    def _selftest_plugin(self, module_name):
--- a/ceph/qa/tasks/mgr/test_progress.py
+++ b/ceph/qa/tasks/mgr/test_progress.py
@ -61,6 +61,7 @@ class TestProgress(MgrTestCase):
        return len(osd_map['osds'])

    def setUp(self):
+        super(TestProgress, self).setUp()
        # Ensure we have at least four OSDs
        if self._osd_count() < 4:
            raise SkipTest("Not enough OSDS!")
--- a/ceph/qa/tasks/mgr/test_prometheus.py
+++ b/ceph/qa/tasks/mgr/test_prometheus.py
@ -11,6 +11,7 @@ class TestPrometheus(MgrTestCase):
    MGRS_REQUIRED = 3

    def setUp(self):
+        super(TestPrometheus, self).setUp()
        self.setup_mgrs()

    def test_file_sd_command(self):
--- a/ceph/qa/tasks/rgw.py
+++ b/ceph/qa/tasks/rgw.py
@ -103,9 +103,10 @@ def start_rgw(ctx, config, clients):
                                                kport=keystone_port),
                ])

-        if client_config.get('dns-name'):
+
+        if client_config.get('dns-name') is not None:
            rgw_cmd.extend(['--rgw-dns-name', endpoint.dns_name])
-        if client_config.get('dns-s3website-name'):
+        if client_config.get('dns-s3website-name') is not None:
            rgw_cmd.extend(['--rgw-dns-s3website-name', endpoint.website_dns_name])

        rgw_cmd.extend([
@ -188,8 +189,7 @@ def assign_endpoints(ctx, config, default_cert):
            dns_name += remote.hostname

        website_dns_name = client_config.get('dns-s3website-name')
-        if website_dns_name:
-            if len(website_dns_name) == 0 or website_dns_name.endswith('.'):
+        if website_dns_name is not None and (len(website_dns_name) == 0 or website_dns_name.endswith('.')):
            website_dns_name += remote.hostname

        role_endpoints[role] = RGWEndpoint(remote.hostname, port, ssl_certificate, dns_name, website_dns_name)
--- a/ceph/qa/tasks/vstart_runner.py
+++ b/ceph/qa/tasks/vstart_runner.py
@ -308,10 +308,22 @@ class LocalRemote(object):

        return proc

-    def sh(self, command, log_limit=1024, cwd=None, env=None):
+    # XXX: for compatibility keep this method same teuthology.orchestra.remote.sh
+    def sh(self, script, **kwargs):
+        """
+        Shortcut for run method.
+
+        Usage:
+            my_name = remote.sh('whoami')
+            remote_date = remote.sh('date')
+        """
+        if 'stdout' not in kwargs:
+            kwargs['stdout'] = StringIO()
+        if 'args' not in kwargs:
+            kwargs['args'] = script
+        proc = self.run(**kwargs)
+        return proc.stdout.getvalue()

-        return misc.sh(command=command, log_limit=log_limit, cwd=cwd,
-                        env=env)

 class LocalDaemon(object):
    def __init__(self, daemon_type, daemon_id):
@ -474,16 +486,12 @@ class LocalFuseMount(FuseMount):
        if self.is_mounted():
            super(LocalFuseMount, self).umount()

-    def mount(self, mount_path=None, mount_fs_name=None):
+    def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None):
+        if mountpoint is not None:
+            self.mountpoint = mountpoint
        self.setupfs(name=mount_fs_name)

-        self.client_remote.run(
-            args=[
-                'mkdir',
-                '--',
-                self.mountpoint,
-            ],
-        )
+        self.client_remote.run(args=['mkdir', '-p', self.mountpoint])

        def list_connections():
            self.client_remote.run(
@ -559,6 +567,8 @@ class LocalFuseMount(FuseMount):

        self.gather_mount_info()

+        self.mounted = True
+
    def _run_python(self, pyscript, py_version='python'):
        """
        Override this to remove the daemon-helper prefix that is used otherwise
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`../../../../../cephfs/objectstore-ec/bluestore-bitmap.yaml`
				`@ -0,0 +1 @@`
				`.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml`
				`@ -0,0 +1 @@`
				`../../../../distros/supported-all-distro/ubuntu_16.04.yaml`