From 141ee55dc7b9809d1a44bf696fc19c06fb2568cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= Date: Wed, 12 Aug 2020 09:35:12 +0200 Subject: [PATCH] import 14.2.11 upstream release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fabian Grünbichler --- ceph/CMakeLists.txt | 2 +- ceph/PendingReleaseNotes | 48 +- ceph/alpine/APKBUILD | 6 +- ceph/ceph.spec | 10 +- ceph/ceph.spec.in | 4 + ceph/changelog.upstream | 10 +- ceph/debian/radosgw.install | 4 + ceph/doc/cephfs/fs-volumes.rst | 46 +- ceph/doc/cephfs/health-messages.rst | 4 +- ceph/doc/cephfs/mds-config-ref.rst | 33 - ceph/doc/dev/msgr2.rst | 394 ++++++++++-- ceph/doc/man/8/CMakeLists.txt | 4 +- ceph/doc/man/8/ceph-diff-sorted.rst | 71 +++ ceph/doc/man/8/rgw-orphan-list.rst | 69 ++ ceph/doc/man_index.rst | 2 + ceph/doc/mgr/prometheus.rst | 68 +- ceph/doc/mgr/telemetry.rst | 10 + ceph/doc/radosgw/config-ref.rst | 8 + ceph/doc/radosgw/index.rst | 1 + ceph/doc/radosgw/opa.rst | 1 + ceph/doc/radosgw/orphans.rst | 115 ++++ .../grafana/dashboards/osds-overview.json | 2 +- .../grafana/dashboards/rbd-details.json | 20 +- .../grafana/dashboards/rbd-overview.json | 2 +- .../cephfs/clusters/1-mds-1-client-micro.yaml | 7 + ceph/qa/objectstore/bluestore-hybrid.yaml | 40 ++ .../qa/standalone/mon/mon-last-epoch-clean.sh | 4 +- ceph/qa/standalone/osd/bad-inc-map.sh | 62 ++ ceph/qa/standalone/scrub/osd-scrub-repair.sh | 2 +- ceph/qa/standalone/scrub/osd-scrub-test.sh | 114 ++++ .../special/ceph_objectstore_tool.py | 2 +- .../fs/basic_functional/tasks/volumes.yaml | 3 + ceph/qa/suites/fs/upgrade/volumes/.qa | 1 + .../suites/fs/upgrade/volumes/import-legacy/% | 0 .../fs/upgrade/volumes/import-legacy/.qa | 1 + .../import-legacy/bluestore-bitmap.yaml | 1 + .../volumes/import-legacy/clusters/.qa | 1 + .../clusters/1-mds-2-client-micro.yaml | 7 + .../fs/upgrade/volumes/import-legacy/conf | 1 + .../upgrade/volumes/import-legacy/overrides/+ | 0 .../volumes/import-legacy/overrides/.qa | 1 + .../import-legacy/overrides/frag_enable.yaml | 1 + .../import-legacy/overrides/pg-warn.yaml | 5 + .../overrides/whitelist_health.yaml | 1 + .../whitelist_wrongly_marked_down.yaml | 1 + .../fs/upgrade/volumes/import-legacy/tasks/% | 0 .../upgrade/volumes/import-legacy/tasks/.qa | 1 + .../volumes/import-legacy/tasks/0-mimic.yaml | 42 ++ .../volumes/import-legacy/tasks/1-client.yaml | 33 + .../import-legacy/tasks/2-upgrade.yaml | 54 ++ .../volumes/import-legacy/tasks/3-verify.yaml | 25 + .../volumes/import-legacy/ubuntu_18.04.yaml | 1 + .../rados/mgr/tasks/module_selftest.yaml | 1 + .../rados/perf/distros/ubuntu_16.04.yaml | 1 + .../rados/perf/distros/ubuntu_latest.yaml | 1 + .../rados/perf/supported-random-distro$ | 1 - .../rados/singleton-flat/valgrind-leaks.yaml | 1 + .../crc-failures/bad_map_crc_failure.yaml | 7 + .../rados/thrash/crc-failures/default.yaml | 0 .../rados/verify/validater/valgrind.yaml | 1 + .../suites/rgw/hadoop-s3a/hadoop/default.yaml | 1 + ceph/qa/suites/rgw/hadoop-s3a/hadoop/v27.yaml | 3 - ceph/qa/suites/rgw/hadoop-s3a/hadoop/v28.yaml | 3 - ceph/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml | 3 + ceph/qa/suites/rgw/tools/+ | 0 ceph/qa/suites/rgw/tools/.qa | 1 + ceph/qa/suites/rgw/tools/centos_latest.yaml | 1 + ceph/qa/suites/rgw/tools/cluster.yaml | 9 + ceph/qa/suites/rgw/tools/tasks.yaml | 19 + .../nautilus-p2p-stress-split/.qa | 1 + .../7-final-workload/rbd-python.yaml | 2 +- .../nautilus-p2p-stress-split/objectstore | 1 - .../nautilus-p2p-stress-split/objectstore/.qa | 1 + .../objectstore/bluestore-bitmap.yaml | 1 + .../objectstore/default.yaml | 0 .../objectstore/filestore-xfs.yaml | 1 + ceph/qa/tasks/ceph_fuse.py | 20 +- ceph/qa/tasks/ceph_test_case.py | 43 +- ceph/qa/tasks/cephfs/cephfs_test_case.py | 17 +- ceph/qa/tasks/cephfs/fuse_mount.py | 43 +- ceph/qa/tasks/cephfs/kernel_mount.py | 15 +- ceph/qa/tasks/cephfs/mount.py | 24 +- ceph/qa/tasks/cephfs/test_client_limits.py | 75 ++- ceph/qa/tasks/cephfs/test_scrub.py | 26 +- ceph/qa/tasks/cephfs/test_volumes.py | 459 ++++++------- ceph/qa/tasks/mgr/dashboard/helper.py | 1 + ceph/qa/tasks/mgr/dashboard/test_auth.py | 1 + ceph/qa/tasks/mgr/dashboard/test_health.py | 58 +- ceph/qa/tasks/mgr/dashboard/test_pool.py | 288 +++++---- ceph/qa/tasks/mgr/dashboard/test_rbd.py | 2 + ceph/qa/tasks/mgr/dashboard/test_rgw.py | 1 + ceph/qa/tasks/mgr/dashboard/test_settings.py | 1 + ceph/qa/tasks/mgr/test_crash.py | 1 + ceph/qa/tasks/mgr/test_failover.py | 1 + ceph/qa/tasks/mgr/test_insights.py | 1 + ceph/qa/tasks/mgr/test_module_selftest.py | 1 + ceph/qa/tasks/mgr/test_progress.py | 1 + ceph/qa/tasks/mgr/test_prometheus.py | 1 + ceph/qa/tasks/rgw.py | 10 +- ceph/qa/tasks/vstart_runner.py | 32 +- ceph/qa/workunits/ceph-helpers-root.sh | 9 - ceph/qa/workunits/fs/upgrade/volume_client | 110 ++++ ceph/qa/workunits/rados/test.sh | 3 +- .../rados/test_envlibrados_for_rocksdb.sh | 10 +- ceph/qa/workunits/rgw/test_rgw_orphan_list.sh | 512 +++++++++++++++ ceph/selinux/ceph.te | 4 + ceph/src/.git_version | 4 +- .../ceph_volume/devices/raw/common.py | 9 +- .../ceph_volume/devices/raw/list.py | 28 +- .../ceph_volume/devices/raw/prepare.py | 59 +- .../ceph_volume/tests/devices/raw/__init__.py | 0 .../tests/devices/raw/test_prepare.py | 97 +++ ceph/src/ceph_osd.cc | 6 +- ceph/src/ceph_syn.cc | 1 - ceph/src/client/Client.cc | 43 +- ceph/src/client/Client.h | 3 + ceph/src/cls/rgw/cls_rgw_types.cc | 1 + ceph/src/common/config.cc | 1 + ceph/src/common/options.cc | 56 +- ceph/src/include/msgr.h | 7 +- ceph/src/include/rbd/librbd.h | 19 +- ceph/src/include/rbd/librbd.hpp | 10 +- ceph/src/librbd/Watcher.cc | 2 +- ceph/src/librbd/io/ImageRequestWQ.cc | 75 +++ ceph/src/librbd/io/ImageRequestWQ.h | 4 + ceph/src/librbd/librbd.cc | 49 +- ceph/src/mds/Beacon.cc | 8 +- ceph/src/mds/CDir.cc | 32 +- ceph/src/mds/CDir.h | 5 - ceph/src/mds/CInode.cc | 28 +- ceph/src/mds/LogSegment.h | 62 +- ceph/src/mds/MDCache.cc | 96 ++- ceph/src/mds/MDCache.h | 35 +- ceph/src/mds/MDLog.cc | 2 +- ceph/src/mds/MDLog.h | 2 +- ceph/src/mds/MDSRank.cc | 9 + ceph/src/mds/Mutation.h | 11 +- ceph/src/mds/Server.cc | 18 +- ceph/src/mds/SessionMap.h | 4 + ceph/src/mds/events/EMetaBlob.h | 2 +- ceph/src/mds/journal.cc | 81 ++- ceph/src/mgr/DaemonServer.cc | 6 + ceph/src/mon/OSDMonitor.cc | 44 +- ceph/src/mon/OSDMonitor.h | 4 + ceph/src/mon/PGMap.cc | 6 +- ceph/src/msg/CMakeLists.txt | 1 + ceph/src/msg/async/AsyncMessenger.cc | 1 + ceph/src/msg/async/ProtocolV2.cc | 291 +++------ ceph/src/msg/async/ProtocolV2.h | 21 +- ceph/src/msg/async/crypto_onwire.cc | 138 ++-- ceph/src/msg/async/crypto_onwire.h | 25 +- ceph/src/msg/async/frames_v2.cc | 480 ++++++++++++++ ceph/src/msg/async/frames_v2.h | 449 +++++++------ ceph/src/os/CMakeLists.txt | 2 + ceph/src/os/bluestore/Allocator.cc | 32 +- ceph/src/os/bluestore/Allocator.h | 5 +- ceph/src/os/bluestore/AvlAllocator.cc | 422 ++++++++++++ ceph/src/os/bluestore/AvlAllocator.h | 257 ++++++++ ceph/src/os/bluestore/BitmapAllocator.h | 5 +- ceph/src/os/bluestore/BlueStore.cc | 2 +- ceph/src/os/bluestore/HybridAllocator.cc | 222 +++++++ ceph/src/os/bluestore/HybridAllocator.h | 48 ++ ceph/src/os/bluestore/KernelDevice.cc | 40 +- ceph/src/os/bluestore/StupidAllocator.cc | 15 +- ceph/src/os/bluestore/StupidAllocator.h | 5 +- .../os/bluestore/fastbmap_allocator_impl.cc | 113 +++- .../os/bluestore/fastbmap_allocator_impl.h | 128 ++-- ceph/src/osd/OSD.cc | 26 +- ceph/src/osd/PG.cc | 56 +- ceph/src/osd/PG.h | 7 +- ceph/src/pybind/cephfs/cephfs.pyx | 10 +- ceph/src/pybind/mgr/alerts/module.py | 5 +- .../mgr/dashboard/controllers/health.py | 3 - .../pybind/mgr/dashboard/controllers/rbd.py | 10 +- .../dist/en-US/2.6b39f7993dea306e22fe.js | 1 + .../dist/en-US/2.94553c8b5ce53156481e.js | 1 - .../dist/en-US/7.6c6f774332e18ad6d7f3.js | 1 + .../dist/en-US/7.da876ba85e66dec4f9dc.js | 1 - .../dashboard/frontend/dist/en-US/index.html | 2 +- ...d231a3.js => main.68c073418034a57dd1dd.js} | 2 +- .../en-US/runtime.8e2dc9ef618a9b9db481.js | 1 - .../en-US/runtime.bae04349ee2a2342490f.js | 1 + .../iscsi-target-list.component.spec.ts | 103 +++ .../iscsi-target-list.component.ts | 2 +- .../rbd-configuration-list.component.html | 9 +- .../rbd-details/rbd-details.component.html | 5 + .../ceph/block/rbd-form/rbd-form.component.ts | 10 + .../block/rbd-list/rbd-list.component.html | 2 +- .../ceph/block/rbd-list/rbd-list.component.ts | 10 +- .../src/app/ceph/block/rbd-list/rbd-model.ts | 7 + .../dashboard/health/health.component.html | 5 +- .../dashboard/health/health.component.scss | 2 +- .../dashboard/health/health.component.spec.ts | 2 +- .../ceph/dashboard/mds-summary.pipe.spec.ts | 28 +- .../app/ceph/dashboard/mds-summary.pipe.ts | 27 +- .../ceph/dashboard/mgr-summary.pipe.spec.ts | 12 +- .../app/ceph/dashboard/mgr-summary.pipe.ts | 17 +- .../mgr/dashboard/services/ceph_service.py | 20 +- ceph/src/pybind/mgr/dashboard/services/rbd.py | 15 + .../mgr/dashboard/tests/test_ceph_service.py | 67 ++ .../mgr/dashboard/tests/test_rbd_service.py | 37 ++ ceph/src/pybind/mgr/k8sevents/module.py | 37 +- ceph/src/pybind/mgr/progress/module.py | 4 + ceph/src/pybind/mgr/prometheus/module.py | 124 +++- ceph/src/pybind/mgr/telemetry/module.py | 8 +- .../mgr/volumes/fs/operations/template.py | 28 - .../fs/operations/versions/subvolume_base.py | 10 +- .../fs/operations/versions/subvolume_v1.py | 76 +-- .../mgr/volumes/fs/operations/volume.py | 39 +- ceph/src/pybind/mgr/volumes/fs/volume.py | 50 +- ceph/src/pybind/mgr/volumes/module.py | 71 ++- ceph/src/pybind/rbd/rbd.pyx | 49 ++ ceph/src/rgw/CMakeLists.txt | 3 + ceph/src/rgw/rgw-orphan-list | 94 +++ ceph/src/rgw/rgw_admin.cc | 36 +- ceph/src/rgw/rgw_auth.cc | 13 +- ceph/src/rgw/rgw_auth.h | 10 + ceph/src/rgw/rgw_auth_filters.h | 4 + ceph/src/rgw/rgw_bucket.cc | 44 +- ceph/src/rgw/rgw_cr_tools.cc | 3 +- ceph/src/rgw/rgw_lc.cc | 2 + ceph/src/rgw/rgw_main.cc | 7 +- ceph/src/rgw/rgw_op.cc | 31 +- ceph/src/rgw/rgw_op.h | 7 + ceph/src/rgw/rgw_opa.cc | 1 + ceph/src/rgw/rgw_orphan.cc | 601 ++++++++++++++++++ ceph/src/rgw/rgw_orphan.h | 76 +++ ceph/src/rgw/rgw_rados.cc | 31 +- ceph/src/rgw/rgw_rados.h | 19 +- ceph/src/rgw/rgw_rest_role.cc | 2 +- ceph/src/rgw/rgw_rest_user.cc | 3 +- ceph/src/rgw/rgw_url.cc | 13 +- ceph/src/rgw/rgw_user.cc | 3 +- ceph/src/test/cli/radosgw-admin/help.t | 1 + ceph/src/test/librbd/test_librbd.cc | 62 ++ ceph/src/test/librbd/test_mock_Watcher.cc | 12 - ceph/src/test/msgr/CMakeLists.txt | 5 + ceph/src/test/msgr/test_frames_v2.cc | 450 +++++++++++++ ceph/src/test/msgr/test_msgr.cc | 116 ++++ ceph/src/test/objectstore/Allocator_bench.cc | 34 +- ceph/src/test/objectstore/Allocator_test.cc | 97 ++- ceph/src/test/objectstore/CMakeLists.txt | 22 + .../objectstore/fastbmap_allocator_test.cc | 128 ++++ .../test/objectstore/hybrid_allocator_test.cc | 231 +++++++ ceph/src/test/objectstore/test_bdev.cc | 111 ++++ .../test/objectstore/test_bluestore_types.cc | 2 + ceph/src/test/pybind/test_cephfs.py | 9 + ceph/src/test/pybind/test_rbd.py | 20 + ceph/src/test/rgw/CMakeLists.txt | 2 + ceph/src/test/rgw/rgw_multi/tests_ps.py | 2 +- ceph/src/test/rgw/test-ceph-diff-sorted.sh | 108 ++++ ceph/src/test/rgw/test_rgw_iam_policy.cc | 5 + ceph/src/test/rgw/test_rgw_url.cc | 11 +- ceph/src/test/system/CMakeLists.txt | 21 +- ceph/src/test/system/st_rados_notify.cc | 81 --- ceph/src/test/system/st_rados_watch.cc | 103 --- ceph/src/tools/CMakeLists.txt | 4 + ceph/src/tools/ceph-diff-sorted.cc | 173 +++++ ceph/src/tools/ceph_objectstore_tool.cc | 19 +- ceph/src/tools/cephfs/MDSUtility.cc | 6 +- ceph/src/tools/rbd/action/Import.cc | 28 +- 261 files changed, 8771 insertions(+), 1904 deletions(-) create mode 100644 ceph/doc/man/8/ceph-diff-sorted.rst create mode 100644 ceph/doc/man/8/rgw-orphan-list.rst create mode 100644 ceph/doc/radosgw/orphans.rst create mode 100644 ceph/qa/cephfs/clusters/1-mds-1-client-micro.yaml create mode 100644 ceph/qa/objectstore/bluestore-hybrid.yaml create mode 100755 ceph/qa/standalone/osd/bad-inc-map.sh create mode 120000 ceph/qa/suites/fs/upgrade/volumes/.qa create mode 100644 ceph/qa/suites/fs/upgrade/volumes/import-legacy/% create mode 120000 ceph/qa/suites/fs/upgrade/volumes/import-legacy/.qa create mode 120000 ceph/qa/suites/fs/upgrade/volumes/import-legacy/bluestore-bitmap.yaml create mode 120000 ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/.qa create mode 100644 ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/1-mds-2-client-micro.yaml create mode 120000 ceph/qa/suites/fs/upgrade/volumes/import-legacy/conf create mode 100644 ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/+ create mode 120000 ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/.qa create mode 120000 ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/frag_enable.yaml create mode 100644 ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/pg-warn.yaml create mode 120000 ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_health.yaml create mode 120000 ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_wrongly_marked_down.yaml create mode 100644 ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/% create mode 120000 ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/.qa create mode 100644 ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/0-mimic.yaml create mode 100644 ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/1-client.yaml create mode 100644 ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/2-upgrade.yaml create mode 100644 ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/3-verify.yaml create mode 120000 ceph/qa/suites/fs/upgrade/volumes/import-legacy/ubuntu_18.04.yaml create mode 120000 ceph/qa/suites/rados/perf/distros/ubuntu_16.04.yaml create mode 120000 ceph/qa/suites/rados/perf/distros/ubuntu_latest.yaml delete mode 120000 ceph/qa/suites/rados/perf/supported-random-distro$ create mode 100644 ceph/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml create mode 100644 ceph/qa/suites/rados/thrash/crc-failures/default.yaml create mode 100644 ceph/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml delete mode 100644 ceph/qa/suites/rgw/hadoop-s3a/hadoop/v27.yaml delete mode 100644 ceph/qa/suites/rgw/hadoop-s3a/hadoop/v28.yaml create mode 100644 ceph/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml create mode 100644 ceph/qa/suites/rgw/tools/+ create mode 120000 ceph/qa/suites/rgw/tools/.qa create mode 120000 ceph/qa/suites/rgw/tools/centos_latest.yaml create mode 100644 ceph/qa/suites/rgw/tools/cluster.yaml create mode 100644 ceph/qa/suites/rgw/tools/tasks.yaml create mode 120000 ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/.qa delete mode 120000 ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore create mode 120000 ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/.qa create mode 120000 ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/bluestore-bitmap.yaml create mode 100644 ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/default.yaml create mode 120000 ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/filestore-xfs.yaml create mode 100755 ceph/qa/workunits/fs/upgrade/volume_client create mode 100755 ceph/qa/workunits/rgw/test_rgw_orphan_list.sh create mode 100644 ceph/src/ceph-volume/ceph_volume/tests/devices/raw/__init__.py create mode 100644 ceph/src/ceph-volume/ceph_volume/tests/devices/raw/test_prepare.py mode change 100644 => 100755 ceph/src/client/Client.cc mode change 100644 => 100755 ceph/src/mds/CDir.cc create mode 100644 ceph/src/msg/async/frames_v2.cc create mode 100755 ceph/src/os/bluestore/AvlAllocator.cc create mode 100755 ceph/src/os/bluestore/AvlAllocator.h create mode 100644 ceph/src/os/bluestore/HybridAllocator.cc create mode 100644 ceph/src/os/bluestore/HybridAllocator.h create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/2.6b39f7993dea306e22fe.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/2.94553c8b5ce53156481e.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/7.6c6f774332e18ad6d7f3.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/7.da876ba85e66dec4f9dc.js rename ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/{main.3eca3295c9e90ad231a3.js => main.68c073418034a57dd1dd.js} (89%) delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/runtime.8e2dc9ef618a9b9db481.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/runtime.bae04349ee2a2342490f.js create mode 100644 ceph/src/pybind/mgr/dashboard/tests/test_ceph_service.py create mode 100644 ceph/src/pybind/mgr/dashboard/tests/test_rbd_service.py create mode 100755 ceph/src/rgw/rgw-orphan-list create mode 100644 ceph/src/test/msgr/test_frames_v2.cc create mode 100755 ceph/src/test/objectstore/hybrid_allocator_test.cc create mode 100755 ceph/src/test/objectstore/test_bdev.cc create mode 100755 ceph/src/test/rgw/test-ceph-diff-sorted.sh delete mode 100644 ceph/src/test/system/st_rados_notify.cc delete mode 100644 ceph/src/test/system/st_rados_watch.cc create mode 100644 ceph/src/tools/ceph-diff-sorted.cc diff --git a/ceph/CMakeLists.txt b/ceph/CMakeLists.txt index ae684bea2..31b995e95 100644 --- a/ceph/CMakeLists.txt +++ b/ceph/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.5.1) project(ceph CXX C ASM) -set(VERSION 14.2.10) +set(VERSION 14.2.11) if(POLICY CMP0028) cmake_policy(SET CMP0028 NEW) diff --git a/ceph/PendingReleaseNotes b/ceph/PendingReleaseNotes index 87109b088..76cc45fcb 100644 --- a/ceph/PendingReleaseNotes +++ b/ceph/PendingReleaseNotes @@ -1,38 +1,14 @@ -14.2.9 ------- +>= 14.2.11 +---------- -* Bucket notifications now support Kafka endpoints. This requires librdkafka of - version 0.9.2 and up. Note that Ubuntu 16.04.6 LTS (Xenial Xerus) has an older - version of librdkafka, and would require an update to the library. +* RGW: The ``radosgw-admin`` sub-commands dealing with orphans -- + ``radosgw-admin orphans find``, ``radosgw-admin orphans finish``, + ``radosgw-admin orphans list-jobs`` -- have been deprecated. They + have not been actively maintained and they store intermediate + results on the cluster, which could fill a nearly-full cluster. + They have been replaced by a tool, currently considered + experimental, ``rgw-orphan-list``. -* The pool parameter ``target_size_ratio``, used by the pg autoscaler, - has changed meaning. It is now normalized across pools, rather than - specifying an absolute ratio. For details, see :ref:`pg-autoscaler`. - If you have set target size ratios on any pools, you may want to set - these pools to autoscale ``warn`` mode to avoid data movement during - the upgrade:: - - ceph osd pool set pg_autoscale_mode warn - -* The behaviour of the ``-o`` argument to the rados tool has been reverted to - its orignal behaviour of indicating an output file. This reverts it to a more - consistent behaviour when compared to other tools. Specifying object size is now - accomplished by using an upper case O ``-O``. - -* The format of MDSs in `ceph fs dump` has changed. - -* Ceph will issue a health warning if a RADOS pool's ``size`` is set to 1 - or in other words the pool is configured with no redundancy. This can - be fixed by setting the pool size to the minimum recommended value - with:: - - ceph osd pool set size - - The warning can be silenced with:: - - ceph config set global mon_warn_on_pool_no_redundancy false - -* RGW: bucket listing performance on sharded bucket indexes has been - notably improved by heuristically -- and significantly, in many - cases -- reducing the number of entries requested from each bucket - index shard. +* Now when noscrub and/or nodeep-scrub flags are set globally or per pool, + scheduled scrubs of the type disabled will be aborted. All user initiated + scrubs are NOT interrupted. diff --git a/ceph/alpine/APKBUILD b/ceph/alpine/APKBUILD index 63543c4d5..4cf2351ec 100644 --- a/ceph/alpine/APKBUILD +++ b/ceph/alpine/APKBUILD @@ -1,7 +1,7 @@ # Contributor: John Coyle # Maintainer: John Coyle pkgname=ceph -pkgver=14.2.10 +pkgver=14.2.11 pkgrel=0 pkgdesc="Ceph is a distributed object store and file system" pkgusers="ceph" @@ -64,7 +64,7 @@ makedepends=" xmlstarlet yasm " -source="ceph-14.2.10.tar.bz2" +source="ceph-14.2.11.tar.bz2" subpackages=" $pkgname-base $pkgname-common @@ -117,7 +117,7 @@ _sysconfdir=/etc _udevrulesdir=/etc/udev/rules.d _python_sitelib=/usr/lib/python2.7/site-packages -builddir=$srcdir/ceph-14.2.10 +builddir=$srcdir/ceph-14.2.11 build() { export CEPH_BUILD_VIRTUALENV=$builddir diff --git a/ceph/ceph.spec b/ceph/ceph.spec index b6516fcc9..915bffb95 100644 --- a/ceph/ceph.spec +++ b/ceph/ceph.spec @@ -109,7 +109,7 @@ # main package definition ################################################################################# Name: ceph -Version: 14.2.10 +Version: 14.2.11 Release: 0%{?dist} %if 0%{?fedora} || 0%{?rhel} Epoch: 2 @@ -125,7 +125,7 @@ License: LGPL-2.1 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and Group: System/Filesystems %endif URL: http://ceph.com/ -Source0: %{?_remote_tarball_prefix}ceph-14.2.10.tar.bz2 +Source0: %{?_remote_tarball_prefix}ceph-14.2.11.tar.bz2 %if 0%{?suse_version} # _insert_obs_source_lines_here ExclusiveArch: x86_64 aarch64 ppc64le s390x @@ -1126,7 +1126,7 @@ This package provides Ceph’s default alerts for Prometheus. # common ################################################################################# %prep -%autosetup -p1 -n ceph-14.2.10 +%autosetup -p1 -n ceph-14.2.11 %build # LTO can be enabled as soon as the following GCC bug is fixed: @@ -1470,6 +1470,7 @@ fi %{_mandir}/man8/ceph-authtool.8* %{_mandir}/man8/ceph-conf.8* %{_mandir}/man8/ceph-dencoder.8* +%{_mandir}/man8/ceph-diff-sorted.8* %{_mandir}/man8/ceph-rbdnamer.8* %{_mandir}/man8/ceph-syn.8* %{_mandir}/man8/ceph-post-file.8* @@ -1482,6 +1483,7 @@ fi %{_mandir}/man8/rbd-replay.8* %{_mandir}/man8/rbd-replay-many.8* %{_mandir}/man8/rbd-replay-prep.8* +%{_mandir}/man8/rgw-orphan-list.8* %dir %{_datadir}/ceph/ %{_datadir}/ceph/known_hosts_drop.ceph.com %{_datadir}/ceph/id_rsa_drop.ceph.com @@ -1847,10 +1849,12 @@ fi %{_mandir}/man8/rbd-nbd.8* %files radosgw +%{_bindir}/ceph-diff-sorted %{_bindir}/radosgw %{_bindir}/radosgw-token %{_bindir}/radosgw-es %{_bindir}/radosgw-object-expirer +%{_bindir}/rgw-orphan-list %{_mandir}/man8/radosgw.8* %dir %{_localstatedir}/lib/ceph/radosgw %{_unitdir}/ceph-radosgw@.service diff --git a/ceph/ceph.spec.in b/ceph/ceph.spec.in index 03a73dad0..046111ada 100644 --- a/ceph/ceph.spec.in +++ b/ceph/ceph.spec.in @@ -1470,6 +1470,7 @@ fi %{_mandir}/man8/ceph-authtool.8* %{_mandir}/man8/ceph-conf.8* %{_mandir}/man8/ceph-dencoder.8* +%{_mandir}/man8/ceph-diff-sorted.8* %{_mandir}/man8/ceph-rbdnamer.8* %{_mandir}/man8/ceph-syn.8* %{_mandir}/man8/ceph-post-file.8* @@ -1482,6 +1483,7 @@ fi %{_mandir}/man8/rbd-replay.8* %{_mandir}/man8/rbd-replay-many.8* %{_mandir}/man8/rbd-replay-prep.8* +%{_mandir}/man8/rgw-orphan-list.8* %dir %{_datadir}/ceph/ %{_datadir}/ceph/known_hosts_drop.ceph.com %{_datadir}/ceph/id_rsa_drop.ceph.com @@ -1847,10 +1849,12 @@ fi %{_mandir}/man8/rbd-nbd.8* %files radosgw +%{_bindir}/ceph-diff-sorted %{_bindir}/radosgw %{_bindir}/radosgw-token %{_bindir}/radosgw-es %{_bindir}/radosgw-object-expirer +%{_bindir}/rgw-orphan-list %{_mandir}/man8/radosgw.8* %dir %{_localstatedir}/lib/ceph/radosgw %{_unitdir}/ceph-radosgw@.service diff --git a/ceph/changelog.upstream b/ceph/changelog.upstream index 65e170f79..1d9350849 100644 --- a/ceph/changelog.upstream +++ b/ceph/changelog.upstream @@ -1,7 +1,13 @@ -ceph (14.2.10-1xenial) xenial; urgency=medium +ceph (14.2.11-1xenial) xenial; urgency=medium - -- Jenkins Build Slave User Thu, 25 Jun 2020 18:20:02 +0000 + -- Jenkins Build Slave User Mon, 10 Aug 2020 20:49:33 +0000 + +ceph (14.2.11-1) stable; urgency=medium + + * New upstream release + + -- Ceph Release Team Mon, 10 Aug 2020 20:15:20 +0000 ceph (14.2.10-1) stable; urgency=medium diff --git a/ceph/debian/radosgw.install b/ceph/debian/radosgw.install index 329ea0e48..9bb764937 100644 --- a/ceph/debian/radosgw.install +++ b/ceph/debian/radosgw.install @@ -1,6 +1,10 @@ lib/systemd/system/ceph-radosgw* +usr/bin/ceph-diff-sorted usr/bin/radosgw usr/bin/radosgw-es usr/bin/radosgw-object-expirer usr/bin/radosgw-token +usr/bin/rgw-orphan-list +usr/share/man/man8/ceph-diff-sorted.8 usr/share/man/man8/radosgw.8 +usr/share/man/man8/rgw-orphan-list.8 diff --git a/ceph/doc/cephfs/fs-volumes.rst b/ceph/doc/cephfs/fs-volumes.rst index 17e6e0c24..a7f284fac 100644 --- a/ceph/doc/cephfs/fs-volumes.rst +++ b/ceph/doc/cephfs/fs-volumes.rst @@ -174,6 +174,13 @@ The output format is json and contains fields as follows. * path: absolute path of a subvolume * type: subvolume type indicating whether it's clone or subvolume * pool_namespace: RADOS namespace of the subvolume +* features: features supported by the subvolume + +The subvolume "features" are based on the internal version of the subvolume and is a list containing +a subset of the following features, + +* "snapshot-clone": supports cloning using a subvolumes snapshot as the source +* "snapshot-autoprotect": supports automatically protecting snapshots, that are active clone sources, from deletion List subvolumes using:: @@ -195,6 +202,17 @@ List snapshots of a subvolume using:: $ ceph fs subvolume snapshot ls [--group_name ] +Fetch the metadata of a snapshot using:: + + $ ceph fs subvolume snapshot info [--group_name ] + +The output format is json and contains fields as follows. + +* created_at: time of creation of snapshot in the format "YYYY-MM-DD HH:MM:SS:ffffff" +* data_pool: data pool the snapshot belongs to +* has_pending_clones: "yes" if snapshot clone is in progress otherwise "no" +* size: snapshot size in bytes + Cloning Snapshots ----------------- @@ -202,10 +220,20 @@ Subvolumes can be created by cloning subvolume snapshots. Cloning is an asynchro data from a snapshot to a subvolume. Due to this bulk copy nature, cloning is currently inefficient for very huge data sets. -Before starting a clone operation, the snapshot should be protected. Protecting a snapshot ensures that the snapshot -cannot be deleted when a clone operation is in progress. Snapshots can be protected using:: +.. note:: Removing a snapshot (source subvolume) would fail if there are pending or in progress clone operations. + +Protecting snapshots prior to cloning was a pre-requisite in the Nautilus release, and the commands to protect/unprotect +snapshots were introduced for this purpose. This pre-requisite, and hence the commands to protect/unprotect, is being +deprecated in mainline CephFS, and may be removed from a future release. + +The commands being deprecated are:: $ ceph fs subvolume snapshot protect [--group_name ] + $ ceph fs subvolume snapshot unprotect [--group_name ] + +.. note:: Using the above commands would not result in an error, but they serve no useful function. + +.. note:: Use subvolume info command to fetch subvolume metadata regarding supported "features" to help decide if protect/unprotect of snapshots is required, based on the "snapshot-autoprotect" feature availability. To initiate a clone operation use:: @@ -231,12 +259,11 @@ A clone can be in one of the following states: #. `pending` : Clone operation has not started #. `in-progress` : Clone operation is in progress -#. `complete` : Clone operation has sucessfully finished +#. `complete` : Clone operation has successfully finished #. `failed` : Clone operation has failed Sample output from an `in-progress` clone operation:: - $ ceph fs subvolume snapshot protect cephfs subvol1 snap1 $ ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1 $ ceph fs clone status cephfs clone1 { @@ -254,7 +281,7 @@ Sample output from an `in-progress` clone operation:: .. note:: Cloned subvolumes are accessible only after the clone operation has successfully completed. -For a successsful clone operation, `clone status` would look like so:: +For a successful clone operation, `clone status` would look like so:: $ ceph fs clone status cephfs clone1 { @@ -270,14 +297,6 @@ To delete a partial clone use:: $ ceph fs subvolume rm [--group_name ] --force -When no clone operations are in progress or scheduled, the snaphot can be unprotected. To unprotect a snapshot use:: - - $ ceph fs subvolume snapshot unprotect [--group_name ] - -Note that unprotecting a snapshot would fail if there are pending or in progress clone operations. Also note that, -only unprotected snapshots can be removed. This guarantees that a snapshot cannot be deleted when clones are pending -(or in progress). - .. note:: Cloning only synchronizes directories, regular files and symbolic links. Also, inode timestamps (access and modification times) are synchronized upto seconds granularity. @@ -287,7 +306,6 @@ An `in-progress` or a `pending` clone operation can be canceled. To cancel a clo On successful cancelation, the cloned subvolume is moved to `canceled` state:: - $ ceph fs subvolume snapshot protect cephfs subvol1 snap1 $ ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1 $ ceph fs clone cancel cephfs clone1 $ ceph fs clone status cephfs clone1 diff --git a/ceph/doc/cephfs/health-messages.rst b/ceph/doc/cephfs/health-messages.rst index aa93a9d71..b096e124c 100644 --- a/ceph/doc/cephfs/health-messages.rst +++ b/ceph/doc/cephfs/health-messages.rst @@ -59,8 +59,8 @@ by the setting ``mds_log_max_segments``, and when the number of segments exceeds that setting the MDS starts writing back metadata so that it can remove (trim) the oldest segments. If this writeback is happening too slowly, or a software bug is preventing trimming, then this health -message may appear. The threshold for this message to appear is for the -number of segments to be double ``mds_log_max_segments``. +message may appear. The threshold for this message to appear is controlled by +the config option ``mds_log_warn_factor``, the default is 2.0. Message: "Client *name* failing to respond to capability release" Code: MDS_HEALTH_CLIENT_LATE_RELEASE, MDS_HEALTH_CLIENT_LATE_RELEASE_MANY diff --git a/ceph/doc/cephfs/mds-config-ref.rst b/ceph/doc/cephfs/mds-config-ref.rst index 4e7a5abc0..b91a44245 100644 --- a/ceph/doc/cephfs/mds-config-ref.rst +++ b/ceph/doc/cephfs/mds-config-ref.rst @@ -2,14 +2,6 @@ MDS Config Reference ====================== -``mon force standby active`` - -:Description: If ``true`` monitors force standby-replay to be active. Set - under ``[mon]`` or ``[global]``. - -:Type: Boolean -:Default: ``true`` - ``mds cache memory limit`` :Description: The memory limit the MDS should enforce for its cache. @@ -540,31 +532,6 @@ :Default: ``0`` -``mds standby for name`` - -:Description: An MDS daemon will standby for another MDS daemon of the name - specified in this setting. - -:Type: String -:Default: N/A - - -``mds standby for rank`` - -:Description: An MDS daemon will standby for an MDS daemon of this rank. -:Type: 32-bit Integer -:Default: ``-1`` - - -``mds standby replay`` - -:Description: Determines whether a ``ceph-mds`` daemon should poll and replay - the log of an active MDS (hot standby). - -:Type: Boolean -:Default: ``false`` - - ``mds min caps per client`` :Description: Set the minimum number of capabilities a client may hold. diff --git a/ceph/doc/dev/msgr2.rst b/ceph/doc/dev/msgr2.rst index 7257a7c7f..b24c06b28 100644 --- a/ceph/doc/dev/msgr2.rst +++ b/ceph/doc/dev/msgr2.rst @@ -1,7 +1,7 @@ .. _msgr2-protocol: -msgr2 protocol -============== +msgr2 protocol (msgr2.0 and msgr2.1) +==================================== This is a revision of the legacy Ceph on-wire protocol that was implemented by the SimpleMessenger. It addresses performance and @@ -20,7 +20,7 @@ This protocol revision has several goals relative to the original protocol: (e.g., padding) that keep computation and memory copies out of the fast path where possible. * *Signing*. We will allow for traffic to be signed (but not - necessarily encrypted). This may not be implemented in the initial version. + necessarily encrypted). This is not implemented. Definitions ----------- @@ -56,10 +56,19 @@ Banner Both the client and server, upon connecting, send a banner:: - "ceph %x %x\n", protocol_features_suppored, protocol_features_required + "ceph v2\n" + __le16 banner payload length + banner payload -The protocol features are a new, distinct namespace. Initially no -features are defined or required, so this will be "ceph 0 0\n". +A banner payload has the form:: + + __le64 peer_supported_features + __le64 peer_required_features + +This is a new, distinct feature bit namespace (CEPH_MSGR2_*). +Currently, only CEPH_MSGR2_FEATURE_REVISION_1 is defined. It is +supported but not required, so that msgr2.0 and msgr2.1 peers +can talk to each other. If the remote party advertises required features we don't support, we can disconnect. @@ -79,27 +88,150 @@ can disconnect. Frame format ------------ -All further data sent or received is contained by a frame. Each frame has -the form:: +After the banners are exchanged, all further communication happens +in frames. The exact format of the frame depends on the connection +mode (msgr2.0-crc, msgr2.0-secure, msgr2.1-crc or msgr2.1-secure). +All connections start in crc mode (either msgr2.0-crc or msgr2.1-crc, +depending on peer_supported_features from the banner). - frame_len (le32) - tag (TAG_* le32) - frame_header_checksum (le32) - payload - [payload padding -- only present after stream auth phase] - [signature -- only present after stream auth phase] +Each frame has a 32-byte preamble:: + __u8 tag + __u8 number of segments + { + __le32 segment length + __le16 segment alignment + } * 4 + reserved (2 bytes) + __le32 preamble crc -* The frame_header_checksum is over just the frame_len and tag values (8 bytes). +An empty frame has one empty segment. A non-empty frame can have +between one and four segments, all segments except the last may be +empty. -* frame_len includes everything after the frame_len le32 up to the end of the - frame (all payloads, signatures, and padding). +If there are less than four segments, unused (trailing) segment +length and segment alignment fields are zeroed. -* The payload format and length is determined by the tag. +The reserved bytes are zeroed. -* The signature portion is only present if the authentication phase - has completed (TAG_AUTH_DONE has been sent) and signatures are - enabled. +The preamble checksum is CRC32-C. It covers everything up to +itself (28 bytes) and is calculated and verified irrespective of +the connection mode (i.e. even if the frame is encrypted). + +### msgr2.0-crc mode + +A msgr2.0-crc frame has the form:: + + preamble (32 bytes) + { + segment payload + } * number of segments + epilogue (17 bytes) + +where epilogue is:: + + __u8 late_flags + { + __le32 segment crc + } * 4 + +late_flags is used for frame abortion. After transmitting the +preamble and the first segment, the sender can fill the remaining +segments with zeros and set a flag to indicate that the receiver must +drop the frame. This allows the sender to avoid extra buffering +when a frame that is being put on the wire is revoked (i.e. yanked +out of the messenger): payload buffers can be unpinned and handed +back to the user immediately, without making a copy or blocking +until the whole frame is transmitted. Currently this is used only +by the kernel client, see ceph_msg_revoke(). + +The segment checksum is CRC32-C. For "used" empty segments, it is +set to (__le32)-1. For unused (trailing) segments, it is zeroed. + +The crcs are calculated just to protect against bit errors. +No authenticity guarantees are provided, unlike in msgr1 which +attempted to provide some authenticity guarantee by optionally +signing segment lengths and crcs with the session key. + +Issues: + +1. As part of introducing a structure for a generic frame with + variable number of segments suitable for both control and + message frames, msgr2.0 moved the crc of the first segment of + the message frame (ceph_msg_header2) into the epilogue. + + As a result, ceph_msg_header2 can no longer be safely + interpreted before the whole frame is read off the wire. + This is a regression from msgr1, because in order to scatter + the payload directly into user-provided buffers and thus avoid + extra buffering and copying when receiving message frames, + ceph_msg_header2 must be available in advance -- it stores + the transaction id which the user buffers are keyed on. + The implementation has to choose between forgoing this + optimization or acting on an unverified segment. + +2. late_flags is not covered by any crc. Since it stores the + abort flag, a single bit flip can result in a completed frame + being dropped (causing the sender to hang waiting for a reply) + or, worse, in an aborted frame with garbage segment payloads + being dispatched. + + This was the case with msgr1 and got carried over to msgr2.0. + +### msgr2.1-crc mode + +Differences from msgr2.0-crc: + +1. The crc of the first segment is stored at the end of the + first segment, not in the epilogue. The epilogue stores up to + three crcs, not up to four. + + If the first segment is empty, (__le32)-1 crc is not generated. + +2. The epilogue is generated only if the frame has more than one + segment (i.e. at least one of second to fourth segments is not + empty). Rationale: If the frame has only one segment, it cannot + be aborted and there are no crcs to store in the epilogue. + +3. Unchecksummed late_flags is replaced with late_status which + builds in bit error detection by using a 4-bit nibble per flag + and two code words that are Hamming Distance = 4 apart (and not + all zeros or ones). This comes at the expense of having only + one reserved flag, of course. + +Some example frames: + +* A 0+0+0+0 frame (empty, no epilogue):: + + preamble (32 bytes) + +* A 20+0+0+0 frame (no epilogue):: + + preamble (32 bytes) + segment1 payload (20 bytes) + __le32 segment1 crc + +* A 0+70+0+0 frame:: + + preamble (32 bytes) + segment2 payload (70 bytes) + epilogue (13 bytes) + +* A 20+70+0+350 frame:: + + preamble (32 bytes) + segment1 payload (20 bytes) + __le32 segment1 crc + segment2 payload (70 bytes) + segment4 payload (350 bytes) + epilogue (13 bytes) + +where epilogue is:: + + __u8 late_status + { + __le32 segment crc + } * 3 Hello ----- @@ -198,47 +330,197 @@ authentication method as the first attempt: Post-auth frame format ---------------------- -The frame format is fixed (see above), but can take three different -forms, depending on the AUTH_DONE flags: +Depending on the negotiated connection mode from TAG_AUTH_DONE, the +connection either stays in crc mode or switches to the corresponding +secure mode (msgr2.0-secure or msgr2.1-secure). -* If neither FLAG_SIGNED or FLAG_ENCRYPTED is specified, things are simple:: +### msgr2.0-secure mode - frame_len - tag - payload - payload_padding (out to auth block_size) +A msgr2.0-secure frame has the form:: - - The padding is some number of bytes < the auth block_size that - brings the total length of the payload + payload_padding to a - multiple of block_size. It does not include the frame_len or tag. Padding - content can be zeros or (better) random bytes. - -* If FLAG_SIGNED has been specified:: - - frame_len - tag - payload - payload_padding (out to auth block_size) - signature (sig_size bytes) - - Here the padding just makes life easier for the signature. It can be - random data to add additional confounder. Note also that the - signature input must include some state from the session key and the - previous message. - -* If FLAG_ENCRYPTED has been specified:: - - frame_len - tag + { + preamble (32 bytes) { - payload - payload_padding (out to auth block_size) - } ^ stream cipher + segment payload + zero padding (out to 16 bytes) + } * number of segments + epilogue (16 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) - Note that the padding ensures that the total frame is a multiple of - the auth method's block_size so that the message can be sent out over - the wire without waiting for the next frame in the stream. +where epilogue is:: + __u8 late_flags + zero padding (15 bytes) + +late_flags has the same meaning as in msgr2.0-crc mode. + +Each segment and the epilogue are zero padded out to 16 bytes. +Technically, GCM doesn't require any padding because Counter mode +(the C in GCM) essentially turns a block cipher into a stream cipher. +But, if the overall input length is not a multiple of 16 bytes, some +implicit zero padding would occur internally because GHASH function +used by GCM for generating auth tags only works on 16-byte blocks. + +Issues: + +1. The sender encrypts the whole frame using a single nonce + and generating a single auth tag. Because segment lengths are + stored in the preamble, the receiver has no choice but to decrypt + and interpret the preamble without verifying the auth tag -- it + can't even tell how much to read off the wire to get the auth tag + otherwise! This creates a decryption oracle, which, in conjunction + with Counter mode malleability, could lead to recovery of sensitive + information. + + This issue extends to the first segment of the message frame as + well. As in msgr2.0-crc mode, ceph_msg_header2 cannot be safely + interpreted before the whole frame is read off the wire. + +2. Deterministic nonce construction with a 4-byte counter field + followed by an 8-byte fixed field is used. The initial values are + taken from the connection secret -- a random byte string generated + during the authentication phase. Because the counter field is + only four bytes long, it can wrap and then repeat in under a day, + leading to GCM nonce reuse and therefore a potential complete + loss of both authenticity and confidentiality for the connection. + This was addressed by disconnecting before the counter repeats + (CVE-2020-1759). + +### msgr2.1-secure mode + +Differences from msgr2.0-secure: + +1. The preamble, the first segment and the rest of the frame are + encrypted separately, using separate nonces and generating + separate auth tags. This gets rid of unverified plaintext use + and keeps msgr2.1-secure mode close to msgr2.1-crc mode, allowing + the implementation to receive message frames in a similar fashion + (little to no buffering, same scatter/gather logic, etc). + + In order to reduce the number of en/decryption operations per + frame, the preamble is grown by a fixed size inline buffer (48 + bytes) that the first segment is inlined into, either fully or + partially. The preamble auth tag covers both the preamble and the + inline buffer, so if the first segment is small enough to be fully + inlined, it becomes available after a single decryption operation. + +2. As in msgr2.1-crc mode, the epilogue is generated only if the + frame has more than one segment. The rationale is even stronger, + as it would require an extra en/decryption operation. + +3. For consistency with msgr2.1-crc mode, late_flags is replaced + with late_status (the built-in bit error detection isn't really + needed in secure mode). + +4. In accordance with `NIST Recommendation for GCM`_, deterministic + nonce construction with a 4-byte fixed field followed by an 8-byte + counter field is used. An 8-byte counter field should never repeat + but the nonce reuse protection put in place for msgr2.0-secure mode + is still there. + + The initial values are the same as in msgr2.0-secure mode. + + .. _`NIST Recommendation for GCM`: https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf + +As in msgr2.0-secure mode, each segment is zero padded out to +16 bytes. If the first segment is fully inlined, its padding goes +to the inline buffer. Otherwise, the padding is on the remainder. +The corollary to this is that the inline buffer is consumed in +16-byte chunks. + +The unused portion of the inline buffer is zeroed. + +Some example frames: + +* A 0+0+0+0 frame (empty, nothing to inline, no epilogue):: + + { + preamble (32 bytes) + zero padding (48 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + +* A 20+0+0+0 frame (first segment fully inlined, no epilogue):: + + { + preamble (32 bytes) + segment1 payload (20 bytes) + zero padding (28 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + +* A 0+70+0+0 frame (nothing to inline):: + + { + preamble (32 bytes) + zero padding (48 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + { + segment2 payload (70 bytes) + zero padding (10 bytes) + epilogue (16 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + +* A 20+70+0+350 frame (first segment fully inlined):: + + { + preamble (32 bytes) + segment1 payload (20 bytes) + zero padding (28 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + { + segment2 payload (70 bytes) + zero padding (10 bytes) + segment4 payload (350 bytes) + zero padding (2 bytes) + epilogue (16 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + +* A 105+0+0+0 frame (first segment partially inlined, no epilogue):: + + { + preamble (32 bytes) + segment1 payload (48 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + { + segment1 payload remainder (57 bytes) + zero padding (7 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + +* A 105+70+0+350 frame (first segment partially inlined):: + + { + preamble (32 bytes) + segment1 payload (48 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + { + segment1 payload remainder (57 bytes) + zero padding (7 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + { + segment2 payload (70 bytes) + zero padding (10 bytes) + segment4 payload (350 bytes) + zero padding (2 bytes) + epilogue (16 bytes) + } ^ AES-128-GCM cipher + auth tag (16 bytes) + +where epilogue is:: + + __u8 late_status + zero padding (15 bytes) + +late_status has the same meaning as in msgr2.1-crc mode. Message flow handshake ---------------------- diff --git a/ceph/doc/man/8/CMakeLists.txt b/ceph/doc/man/8/CMakeLists.txt index dd3f8af3f..819afc056 100644 --- a/ceph/doc/man/8/CMakeLists.txt +++ b/ceph/doc/man/8/CMakeLists.txt @@ -48,7 +48,9 @@ endif() if(WITH_RADOSGW) list(APPEND man_srcs radosgw.rst - radosgw-admin.rst) + radosgw-admin.rst + rgw-orphan-list.rst + ceph-diff-sorted.rst) endif() if(WITH_RBD) diff --git a/ceph/doc/man/8/ceph-diff-sorted.rst b/ceph/doc/man/8/ceph-diff-sorted.rst new file mode 100644 index 000000000..99e958336 --- /dev/null +++ b/ceph/doc/man/8/ceph-diff-sorted.rst @@ -0,0 +1,71 @@ +:orphan: + +========================================================== + ceph-diff-sorted -- compare two sorted files line by line +========================================================== + +.. program:: ceph-diff-sorted + +Synopsis +======== + +| **ceph-diff-sorted** *file1* *file2* + +Description +=========== + +:program:`ceph-diff-sorted` is a simplifed *diff* utility optimized +for comparing two files with lines that are lexically sorted. + +The output is simplified in comparison to that of the standard `diff` +tool available in POSIX systems. Angle brackets ('<' and '>') are used +to show lines that appear in one file but not the other. The output is +not compatible with the `patch` tool. + +This tool was created in order to perform diffs of large files (e.g., +containing billions of lines) that the standard `diff` tool cannot +handle efficiently. Knowing that the lines are sorted allows this to +be done efficiently with minimal memory overhead. + +The sorting of each file needs to be done lexcially. Most POSIX +systems use the *LANG* environment variable to determine the `sort` +tool's sorting order. To sort lexically we would need something such +as: + + $ LANG=C sort some-file.txt >some-file-sorted.txt + +Examples +======== + +Compare two files:: + + $ ceph-diff-sorted fileA.txt fileB.txt + +Exit Status +=========== + +When complete, the exit status will be set to one of the following: + +0 + files same +1 + files different +2 + usage problem (e.g., wrong number of command-line arguments) +3 + problem opening input file +4 + bad file content (e.g., unsorted order or empty lines) + + +Availability +============ + +:program:`ceph-diff-sorted` is part of Ceph, a massively scalable, +open-source, distributed storage system. Please refer to the Ceph +documentation at http://ceph.com/docs for more information. + +See also +======== + +:doc:`rgw-orphan-list `\(8) diff --git a/ceph/doc/man/8/rgw-orphan-list.rst b/ceph/doc/man/8/rgw-orphan-list.rst new file mode 100644 index 000000000..408242da2 --- /dev/null +++ b/ceph/doc/man/8/rgw-orphan-list.rst @@ -0,0 +1,69 @@ +:orphan: + +================================================================== + rgw-orphan-list -- list rados objects that are not indexed by rgw +================================================================== + +.. program:: rgw-orphan-list + +Synopsis +======== + +| **rgw-orphan-list** + +Description +=========== + +:program:`rgw-orphan-list` is an *EXPERIMENTAL* RADOS gateway user +administration utility. It produces a listing of rados objects that +are not directly or indirectly referenced through the bucket indexes +on a pool. It places the results and intermediate files on the local +filesystem rather than on the ceph cluster itself, and therefore will +not itself consume additional cluster storage. + +In theory orphans should not exist. However because ceph evolves +rapidly, bugs do crop up, and they may result in orphans that are left +behind. + +In its current form this utility does not take any command-line +arguments or options. It will list the available pools and prompt the +user to enter the pool they would like to list orphans for. + +Behind the scenes it runs `rados ls` and `radosgw-admin bucket +radoslist ...` and produces a list of those entries that appear in the +former but not the latter. Those entries are presumed to be the +orphans. + +Warnings +======== + +This utility is currently considered *EXPERIMENTAL*. + +This utility will produce false orphan entries for unindexed buckets +since such buckets have no bucket indices that can provide the +starting point for tracing. + +Options +======= + +At present there are no options. + +Examples +======== + +Launch the tool:: + + $ rgw-orphan-list + +Availability +============ + +:program:`radosgw-admin` is part of Ceph, a massively scalable, open-source, +distributed storage system. Please refer to the Ceph documentation at +http://ceph.com/docs for more information. + +See also +======== + +:doc:`radosgw-admin `\(8) +:doc:`ceph-diff-sorted `\(8) diff --git a/ceph/doc/man_index.rst b/ceph/doc/man_index.rst index d32fd2fe2..72a539913 100644 --- a/ceph/doc/man_index.rst +++ b/ceph/doc/man_index.rst @@ -40,3 +40,5 @@ man/8/rbd-replay man/8/rbd man/8/rbdmap + man/8/rgw-orphan-list + man/8/ceph-diff-sorted diff --git a/ceph/doc/mgr/prometheus.rst b/ceph/doc/mgr/prometheus.rst index 8dbc44f52..87296be39 100644 --- a/ceph/doc/mgr/prometheus.rst +++ b/ceph/doc/mgr/prometheus.rst @@ -25,11 +25,65 @@ The *prometheus* module is enabled with:: Configuration ------------- -By default the module will accept HTTP requests on port ``9283`` on all -IPv4 and IPv6 addresses on the host. The port and listen address are both +.. note:: + + The Prometheus manager module needs to be restarted for configuration changes to be applied. + +By default the module will accept HTTP requests on port ``9283`` on all IPv4 +and IPv6 addresses on the host. The port and listen address are both configurable with ``ceph config-key set``, with keys -``mgr/prometheus/server_addr`` and ``mgr/prometheus/server_port``. -This port is registered with Prometheus's `registry `_. +``mgr/prometheus/server_addr`` and ``mgr/prometheus/server_port``. This port +is registered with Prometheus's `registry +`_. + +:: + + ceph config set mgr mgr/prometheus/server_addr 0.0.0.0 + ceph config set mgr mgr/prometheus/server_port 9283 + +.. warning:: + + The ``scrape_interval`` of this module should always be set to match + Prometheus' scrape interval to work properly and not cause any issues. + +The Prometheus manager module is, by default, configured with a scrape interval +of 15 seconds. The scrape interval in the module is used for caching purposes +and to determine when a cache is stale. + +It is not recommended to use a scrape interval below 10 seconds. It is +recommended to use 15 seconds as scrape interval, though, in some cases it +might be useful to increase the scrape interval. + +To set a different scrape interval in the Prometheus module, set +``scrape_interval`` to the desired value:: + + ceph config set mgr mgr/prometheus/scrape_interval 20 + +On large clusters (>1000 OSDs), the time to fetch the metrics may become +significant. Without the cache, the Prometheus manager module could, +especially in conjunction with multiple Prometheus instances, overload the +manager and lead to unresponsive or crashing Ceph manager instances. Hence, +the cache is enabled by default and cannot be disabled. This means that there +is a possibility that the cache becomes stale. The cache is considered stale +when the time to fetch the metrics from Ceph exceeds the configured +``scrape_interval``. + +If that is the case, **a warning will be logged** and the module will either + +* respond with a 503 HTTP status code (service unavailable) or, +* it will return the content of the cache, even though it might be stale. + +This behavior can be configured. By default, it will return a 503 HTTP status +code (service unavailable). You can set other options using the ``ceph config +set`` commands. + +To tell the module to respond with possibly stale data, set it to ``return``:: + + ceph config set mgr mgr/prometheus/stale_cache_strategy return + +To tell the module to respond with "service unavailable", set it to ``fail``:: + + ceph config set mgr mgr/prometheus/stale_cache_strategy fail .. _prometheus-rbd-io-statistics: @@ -62,7 +116,7 @@ Statistic names and labels ========================== The names of the stats are exactly as Ceph names them, with -illegal characters ``.``, ``-`` and ``::`` translated to ``_``, +illegal characters ``.``, ``-`` and ``::`` translated to ``_``, and ``ceph_`` prefixed to all names. @@ -75,7 +129,7 @@ rocksdb stats. The *cluster* statistics (i.e. those global to the Ceph cluster) -have labels appropriate to what they report on. For example, +have labels appropriate to what they report on. For example, metrics relating to pools have a ``pool_id`` label. @@ -109,7 +163,7 @@ Correlating drive statistics with node_exporter The prometheus output from Ceph is designed to be used in conjunction with the generic host monitoring from the Prometheus node_exporter. -To enable correlation of Ceph OSD statistics with node_exporter's +To enable correlation of Ceph OSD statistics with node_exporter's drive statistics, special series are output like this: :: diff --git a/ceph/doc/mgr/telemetry.rst b/ceph/doc/mgr/telemetry.rst index c0f56dde3..37fa82142 100644 --- a/ceph/doc/mgr/telemetry.rst +++ b/ceph/doc/mgr/telemetry.rst @@ -123,6 +123,16 @@ The see the current configuration:: ceph telemetry status +Manually sending telemetry +-------------------------- + +To ad hoc send telemetry data:: + + ceph telemetry send + +In case telemetry is not enabled (with 'ceph telemetry on'), you need to add +'--license sharing-1-0' to 'ceph telemetry send' command. + Sending telemetry through a proxy --------------------------------- diff --git a/ceph/doc/radosgw/config-ref.rst b/ceph/doc/radosgw/config-ref.rst index 0e20dfd5a..2664b94a7 100644 --- a/ceph/doc/radosgw/config-ref.rst +++ b/ceph/doc/radosgw/config-ref.rst @@ -218,6 +218,14 @@ instances or all radosgw-admin commands can be put into the ``[global]`` or the :Default: ``3600`` +``rgw gc max concurrent io`` + +:Description: The maximum number of concurrent IO operations that the RGW garbage + collection thread will use when purging old data. +:Type: Integer +:Default: ``10`` + + ``rgw s3 success create obj status`` :Description: The alternate success status response for ``create-obj``. diff --git a/ceph/doc/radosgw/index.rst b/ceph/doc/radosgw/index.rst index 0523caadb..e82b837ce 100644 --- a/ceph/doc/radosgw/index.rst +++ b/ceph/doc/radosgw/index.rst @@ -65,6 +65,7 @@ you may write data with one API and retrieve it with the other. Data Layout in RADOS STS Lite Role + Orphan List and Associated Tooliing troubleshooting Manpage radosgw <../../man/8/radosgw> Manpage radosgw-admin <../../man/8/radosgw-admin> diff --git a/ceph/doc/radosgw/opa.rst b/ceph/doc/radosgw/opa.rst index 89f9300b9..74eeb918b 100644 --- a/ceph/doc/radosgw/opa.rst +++ b/ceph/doc/radosgw/opa.rst @@ -46,6 +46,7 @@ Example request:: { "input": { "method": "GET", + "subuser": "subuser", "user_info": { "used_id": "john", "display_name": "John" diff --git a/ceph/doc/radosgw/orphans.rst b/ceph/doc/radosgw/orphans.rst new file mode 100644 index 000000000..9a77d60de --- /dev/null +++ b/ceph/doc/radosgw/orphans.rst @@ -0,0 +1,115 @@ +================================== +Orphan List and Associated Tooling +================================== + +.. version added:: Luminous + +.. contents:: + +Orphans are RADOS objects that are left behind after their associated +RGW objects are removed. Normally these RADOS objects are removed +automatically, either immediately or through a process known as +"garbage collection". Over the history of RGW, however, there may have +been bugs that prevented these RADOS objects from being deleted, and +these RADOS objects may be consuming space on the Ceph cluster without +being of any use. From the perspective of RGW, we call such RADOS +objects "orphans". + +Orphans Find -- DEPRECATED +-------------------------- + +The `radosgw-admin` tool has/had three subcommands to help manage +orphans, however these subcommands are (or will soon be) +deprecated. These subcommands are: + +:: + # radosgw-admin orphans find ... + # radosgw-admin orphans finish ... + # radosgw-admin orphans list-jobs ... + +There are two key problems with these subcommands, however. First, +these subcommands have not been actively maintained and therefore have +not tracked RGW as it has evolved in terms of features and updates. As +a result the confidence that these subcommands can accurately identify +true orphans is presently low. + +Second, these subcommands store intermediate results on the cluster +itself. This can be problematic when cluster administrators are +confronting insufficient storage space and want to remove orphans as a +means of addressing the issue. The intermediate results could strain +the existing cluster storage capacity even further. + +For these reasons "orphans find" has been deprecated. + +Orphan List +----------- + +Because "orphans find" has been deprecated, RGW now includes an +additional tool -- 'rgw-orphan-list'. When run it will list the +available pools and prompt the user to enter the name of the data +pool. At that point the tool will, perhaps after an extended period of +time, produce a local file containing the RADOS objects from the +designated pool that appear to be orphans. The administrator is free +to examine this file and the decide on a course of action, perhaps +removing those RADOS objects from the designated pool. + +All intermediate results are stored on the local file system rather +than the Ceph cluster. So running the 'rgw-orphan-list' tool should +have no appreciable impact on the amount of cluster storage consumed. + +WARNING: Experimental Status +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The 'rgw-orphan-list' tool is new and therefore currently considered +experimental. The list of orphans produced should be "sanity checked" +before being used for a large delete operation. + +WARNING: Specifying a Data Pool +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If a pool other than an RGW data pool is specified, the results of the +tool will be erroneous. All RADOS objects found on such a pool will +falsely be designated as orphans. + +WARNING: Unindexed Buckets +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +RGW allows for unindexed buckets, that is buckets that do not maintain +an index of their contents. This is not a typical configuration, but +it is supported. Because the 'rgw-orphan-list' tool uses the bucket +indices to determine what RADOS objects should exist, objects in the +unindexed buckets will falsely be listed as orphans. + + +RADOS List +---------- + +One of the sub-steps in computing a list of orphans is to map each RGW +object into its corresponding set of RADOS objects. This is done using +a subcommand of 'radosgw-admin'. + +:: + # radosgw-admin bucket radoslist [--bucket={bucket-name}] + +The subcommand will produce a list of RADOS objects that support all +of the RGW objects. If a bucket is specified then the subcommand will +only produce a list of RADOS objects that correspond back the RGW +objects in the specified bucket. + +Note: Shared Bucket Markers +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some administrators will be aware of the coding schemes used to name +the RADOS objects that correspond to RGW objects, which include a +"marker" unique to a given bucket. + +RADOS objects that correspond with the contents of one RGW bucket, +however, may contain a marker that specifies a different bucket. This +behavior is a consequence of the "shallow copy" optimization used by +RGW. When larger objects are copied from bucket to bucket, only the +"head" objects are actually copied, and the tail objects are +shared. Those shared objects will contain the marker of the original +bucket. + +.. _Data Layout in RADOS : ../layout +.. _Pool Placement and Storage Classes : ../placement diff --git a/ceph/monitoring/grafana/dashboards/osds-overview.json b/ceph/monitoring/grafana/dashboards/osds-overview.json index 869fdc2fa..4b91df9eb 100644 --- a/ceph/monitoring/grafana/dashboards/osds-overview.json +++ b/ceph/monitoring/grafana/dashboards/osds-overview.json @@ -431,7 +431,7 @@ "strokeWidth": 1, "targets": [ { - "expr": "count by(device_class) (ceph_osd_metadata)", + "expr": "count by (device_class) (ceph_osd_metadata)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device_class}}", diff --git a/ceph/monitoring/grafana/dashboards/rbd-details.json b/ceph/monitoring/grafana/dashboards/rbd-details.json index c822e7dff..68fffad1e 100644 --- a/ceph/monitoring/grafana/dashboards/rbd-details.json +++ b/ceph/monitoring/grafana/dashboards/rbd-details.json @@ -27,7 +27,7 @@ } ] }, - "description": "Detailed Performance of RBD Images (IOPS/Latency)", + "description": "Detailed Performance of RBD Images (IOPS/Throughput/Latency)", "editable": false, "gnetId": null, "graphTooltip": 0, @@ -77,21 +77,21 @@ "expr": "irate(ceph_rbd_write_ops{pool=\"$Pool\", image=\"$Image\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Write {{instance}}", + "legendFormat": "Write", "refId": "A" }, { "expr": "irate(ceph_rbd_read_ops{pool=\"$Pool\", image=\"$Image\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Read {{instance}}", + "legendFormat": "Read", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "IOPS Count", + "title": "IOPS", "tooltip": { "shared": true, "sort": 0, @@ -168,21 +168,21 @@ "expr": "irate(ceph_rbd_write_bytes{pool=\"$Pool\", image=\"$Image\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Read {{instance}}", + "legendFormat": "Write", "refId": "A" }, { "expr": "irate(ceph_rbd_read_bytes{pool=\"$Pool\", image=\"$Image\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Write {{instance}}", + "legendFormat": "Read", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "IO Bytes per Second", + "title": "Throughput", "tooltip": { "shared": true, "sort": 0, @@ -259,21 +259,21 @@ "expr": "irate(ceph_rbd_write_latency_sum{pool=\"$Pool\", image=\"$Image\"}[30s]) / irate(ceph_rbd_write_latency_count{pool=\"$Pool\", image=\"$Image\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Write Latency Sum", + "legendFormat": "Write", "refId": "A" }, { "expr": "irate(ceph_rbd_read_latency_sum{pool=\"$Pool\", image=\"$Image\"}[30s]) / irate(ceph_rbd_read_latency_count{pool=\"$Pool\", image=\"$Image\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Read Latency Sum", + "legendFormat": "Read", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Averange Latency", + "title": "Average Latency", "tooltip": { "shared": true, "sort": 0, diff --git a/ceph/monitoring/grafana/dashboards/rbd-overview.json b/ceph/monitoring/grafana/dashboards/rbd-overview.json index f3df003ec..eb15fbcb8 100644 --- a/ceph/monitoring/grafana/dashboards/rbd-overview.json +++ b/ceph/monitoring/grafana/dashboards/rbd-overview.json @@ -416,7 +416,7 @@ ], "targets": [ { - "expr": "topk(10, (sort((irate(ceph_rbd_write_ops[30s]) + on(image, pool, namespace) irate(ceph_rbd_read_ops[30s])))))", + "expr": "topk(10, (sort((irate(ceph_rbd_write_ops[30s]) + on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])))))", "format": "table", "instant": true, "intervalFactor": 1, diff --git a/ceph/qa/cephfs/clusters/1-mds-1-client-micro.yaml b/ceph/qa/cephfs/clusters/1-mds-1-client-micro.yaml new file mode 100644 index 000000000..8b66c3906 --- /dev/null +++ b/ceph/qa/cephfs/clusters/1-mds-1-client-micro.yaml @@ -0,0 +1,7 @@ +roles: +- [mon.a, mon.b, mon.c, mgr.x, mds.a, osd.0, osd.1, osd.2, osd.3] +- [client.0] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/ceph/qa/objectstore/bluestore-hybrid.yaml b/ceph/qa/objectstore/bluestore-hybrid.yaml new file mode 100644 index 000000000..68b9bc427 --- /dev/null +++ b/ceph/qa/objectstore/bluestore-hybrid.yaml @@ -0,0 +1,40 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + bluestore allocator: hybrid + bluefs allocator: hybrid + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + diff --git a/ceph/qa/standalone/mon/mon-last-epoch-clean.sh b/ceph/qa/standalone/mon/mon-last-epoch-clean.sh index e38663c6a..667c6a702 100755 --- a/ceph/qa/standalone/mon/mon-last-epoch-clean.sh +++ b/ceph/qa/standalone/mon/mon-last-epoch-clean.sh @@ -181,8 +181,8 @@ function TEST_mon_last_clean_epoch() { sleep 5 - ceph tell osd.* injectargs '--osd-beacon-report-interval 10' || exit 1 - ceph tell mon.* injectargs \ + ceph tell 'osd.*' injectargs '--osd-beacon-report-interval 10' || exit 1 + ceph tell 'mon.*' injectargs \ '--mon-min-osdmap-epochs 2 --paxos-service-trim-min 1' || exit 1 create_pool foo 32 diff --git a/ceph/qa/standalone/osd/bad-inc-map.sh b/ceph/qa/standalone/osd/bad-inc-map.sh new file mode 100755 index 000000000..cc3cf27cc --- /dev/null +++ b/ceph/qa/standalone/osd/bad-inc-map.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +mon_port=$(get_unused_port) + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:$mon_port" + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + set -e + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_bad_inc_map() { + local dir=$1 + + run_mon $dir a + run_mgr $dir x + run_osd $dir 0 + run_osd $dir 1 + run_osd $dir 2 + + ceph config set osd.2 osd_inject_bad_map_crc_probability 1 + + # osd map churn + create_pool foo 8 + ceph osd pool set foo min_size 1 + ceph osd pool set foo min_size 2 + + sleep 5 + + # make sure all the OSDs are still up + TIMEOUT=10 wait_for_osd up 0 + TIMEOUT=10 wait_for_osd up 1 + TIMEOUT=10 wait_for_osd up 2 + + # check for the signature in the log + grep "injecting map crc failure" $dir/osd.2.log || return 1 + grep "bailing because last" $dir/osd.2.log || return 1 + + echo success + + delete_pool foo + kill_daemons $dir || return 1 +} + +main bad-inc-map "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh bad-inc-map.sh" +# End: diff --git a/ceph/qa/standalone/scrub/osd-scrub-repair.sh b/ceph/qa/standalone/scrub/osd-scrub-repair.sh index 5b943dcdd..e1b9fe050 100755 --- a/ceph/qa/standalone/scrub/osd-scrub-repair.sh +++ b/ceph/qa/standalone/scrub/osd-scrub-repair.sh @@ -45,7 +45,7 @@ walk(if type == "object" then del(.mtime) else . end) | walk(if type == "object" then del(.version) else . end) | walk(if type == "object" then del(.prior_version) else . end)' -sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print json.dumps(ud, sort_keys=True, indent=2)' +sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print(json.dumps(ud, sort_keys=True, indent=2))' function run() { local dir=$1 diff --git a/ceph/qa/standalone/scrub/osd-scrub-test.sh b/ceph/qa/standalone/scrub/osd-scrub-test.sh index c530233ea..7fcbfc651 100755 --- a/ceph/qa/standalone/scrub/osd-scrub-test.sh +++ b/ceph/qa/standalone/scrub/osd-scrub-test.sh @@ -187,6 +187,120 @@ function TEST_interval_changes() { teardown $dir || return 1 } +function _scrub_abort() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=1000 + local type=$2 + + TESTDATA="testdata.$$" + if test $type = "scrub"; + then + stopscrub="noscrub" + check="noscrub" + else + stopscrub="nodeep-scrub" + check="nodeep_scrub" + fi + + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_sleep=5.0 \ + --osd_scrub_interval_randomize_ratio=0 || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + local pgid="${poolid}.0" + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_$type $pgid + # deep-scrub won't start without scrub noticing + if [ "$type" = "deep_scrub" ]; + then + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_scrub $pgid + fi + + # Wait for scrubbing to start + set -o pipefail + found="no" + for i in $(seq 0 200) + do + flush_pg_stats + if ceph pg dump pgs | grep ^$pgid| grep -q "scrubbing" + then + found="yes" + #ceph pg dump pgs + break + fi + done + set +o pipefail + + if test $found = "no"; + then + echo "Scrubbing never started" + return 1 + fi + + ceph osd set $stopscrub + + # Wait for scrubbing to end + set -o pipefail + for i in $(seq 0 200) + do + flush_pg_stats + if ceph pg dump pgs | grep ^$pgid | grep -q "scrubbing" + then + continue + fi + #ceph pg dump pgs + break + done + set +o pipefail + + sleep 5 + + if ! grep "$check set, aborting" $dir/osd.${primary}.log + then + echo "Abort not seen in log" + return 1 + fi + + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph osd unset noscrub + TIMEOUT=$(($objects / 2)) + wait_for_scrub $pgid "$last_scrub" || return 1 + + teardown $dir || return 1 +} + +function TEST_scrub_abort() { + local dir=$1 + _scrub_abort $dir scrub +} + +function TEST_deep_scrub_abort() { + local dir=$1 + _scrub_abort $dir deep_scrub +} + main osd-scrub-test "$@" # Local Variables: diff --git a/ceph/qa/standalone/special/ceph_objectstore_tool.py b/ceph/qa/standalone/special/ceph_objectstore_tool.py index 496ae417e..111f43597 100755 --- a/ceph/qa/standalone/special/ceph_objectstore_tool.py +++ b/ceph/qa/standalone/special/ceph_objectstore_tool.py @@ -1034,7 +1034,7 @@ def main(argv): # Specify a bad --op command cmd = (CFSD_PREFIX + "--op oops").format(osd=ONEOSD) - ERRORS += test_failure(cmd, "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log)") + ERRORS += test_failure(cmd, "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, statfs)") # Provide just the object param not a command cmd = (CFSD_PREFIX + "object").format(osd=ONEOSD) diff --git a/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml b/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml index 7364fd211..e94728f9e 100644 --- a/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml +++ b/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml @@ -1,5 +1,8 @@ overrides: ceph: + conf: + mgr: + debug client: 10 log-whitelist: - OSD full dropping all updates - OSD near full diff --git a/ceph/qa/suites/fs/upgrade/volumes/.qa b/ceph/qa/suites/fs/upgrade/volumes/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/% b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/% new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/.qa b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/bluestore-bitmap.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/bluestore-bitmap.yaml new file mode 120000 index 000000000..17ad98e79 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/bluestore-bitmap.yaml @@ -0,0 +1 @@ +../../../../../cephfs/objectstore-ec/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/.qa b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/1-mds-2-client-micro.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/1-mds-2-client-micro.yaml new file mode 100644 index 000000000..9b443f7d2 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/clusters/1-mds-2-client-micro.yaml @@ -0,0 +1,7 @@ +roles: +- [mon.a, mon.b, mon.c, mgr.x, mgr.y, mds.a, mds.b, mds.c, osd.0, osd.1, osd.2, osd.3] +- [client.0, client.1] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/conf b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/conf new file mode 120000 index 000000000..6d4712984 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/conf @@ -0,0 +1 @@ +.qa/cephfs/conf/ \ No newline at end of file diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/+ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/+ new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/.qa b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/frag_enable.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/frag_enable.yaml new file mode 120000 index 000000000..34a39a368 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/frag_enable.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/frag_enable.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/pg-warn.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/pg-warn.yaml new file mode 100644 index 000000000..4ae54a40d --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/pg-warn.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon pg warn min per osd: 0 diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_health.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_health.yaml new file mode 120000 index 000000000..74f39a49b --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/whitelist_health.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_wrongly_marked_down.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..b4528c0f8 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/% b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/% new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/.qa b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/0-mimic.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/0-mimic.yaml new file mode 100644 index 000000000..1ca8973bb --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/0-mimic.yaml @@ -0,0 +1,42 @@ +meta: +- desc: | + install ceph/mimic latest +tasks: +- install: + branch: mimic #tag: v13.2.8 + exclude_packages: + - librados3 + - ceph-mgr-dashboard + - ceph-mgr-diskprediction-local + - ceph-mgr-diskprediction-cloud + - ceph-mgr-rook + - ceph-mgr-cephadm + - cephadm + extra_packages: ['librados2'] +- print: "**** done installing mimic" +- ceph: + mon_bind_addrvec: false + mon_bind_msgr2: false + log-whitelist: + - overall HEALTH_ + - \(FS_ + - \(MDS_ + - \(OSD_ + - \(MON_DOWN\) + - \(CACHE_POOL_ + - \(POOL_ + - \(MGR_DOWN\) + - \(PG_ + - \(SMALLER_PGP_NUM\) + - Monitor daemon marked osd + - Behind on trimming + - Manager daemon + conf: + global: + mon warn on pool no app: false + ms bind msgr2: false +- exec: + osd.0: + - ceph osd require-osd-release mimic + - ceph osd set-require-min-compat-client mimic +- print: "**** done ceph" diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/1-client.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/1-client.yaml new file mode 100644 index 000000000..82731071f --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/1-client.yaml @@ -0,0 +1,33 @@ +tasks: +- workunit: + clients: + client.0: + - fs/upgrade/volume_client + env: + ACTION: create +- print: "**** fs/volume_client create" +- ceph-fuse: + client.0: + mount_path: /volumes/_nogroup/vol_isolated + mountpoint: mnt.0 + auth_id: vol_data_isolated + client.1: + mount_path: /volumes/_nogroup/vol_default + mountpoint: mnt.1 + auth_id: vol_default +- print: "**** ceph-fuse vol_isolated" +- workunit: + clients: + client.0: + - fs/upgrade/volume_client + env: + ACTION: populate + cleanup: false +- workunit: + clients: + client.1: + - fs/upgrade/volume_client + env: + ACTION: populate + cleanup: false +- print: "**** fs/volume_client populate" diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/2-upgrade.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/2-upgrade.yaml new file mode 100644 index 000000000..fd23132bf --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/2-upgrade.yaml @@ -0,0 +1,54 @@ +overrides: + ceph: + mon_bind_msgr2: false + mon_bind_addrvec: false + log-whitelist: + - scrub mismatch + - ScrubResult + - wrongly marked + - \(POOL_APP_NOT_ENABLED\) + - \(SLOW_OPS\) + - overall HEALTH_ + - \(MON_MSGR2_NOT_ENABLED\) + - slow request + conf: + global: + bluestore warn on legacy statfs: false + bluestore warn on no per pool omap: false + mon: + mon warn on osd down out interval zero: false + +tasks: +- mds_pre_upgrade: +- print: "**** done mds pre-upgrade sequence" +- install.upgrade: + mon.a: +- print: "**** done install.upgrade both hosts" +- ceph.restart: + daemons: [mon.*, mgr.*] + mon-health-to-clog: false + wait-for-healthy: false +- exec: + mon.a: + - ceph config set global mon_warn_on_msgr2_not_enabled false +- ceph.healthy: +- ceph.restart: + daemons: [osd.*] + wait-for-healthy: false + wait-for-osds-up: true +- ceph.stop: [mds.*] +- ceph.restart: + daemons: [mds.*] + wait-for-healthy: false + wait-for-osds-up: true +- exec: + mon.a: + - ceph mon enable-msgr2 + - ceph versions + - ceph osd dump -f json-pretty + - ceph config rm global mon_warn_on_msgr2_not_enabled + - ceph osd require-osd-release nautilus + - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done + #- ceph osd set-require-min-compat-client nautilus +- ceph.healthy: +- print: "**** done ceph.restart" diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/3-verify.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/3-verify.yaml new file mode 100644 index 000000000..003409ca3 --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/tasks/3-verify.yaml @@ -0,0 +1,25 @@ +overrides: + ceph: + log-whitelist: + - missing required features +tasks: +- exec: + mon.a: + - ceph fs dump --format=json-pretty + - ceph fs volume ls + - ceph fs subvolume ls cephfs +- workunit: + clients: + client.0: + - fs/upgrade/volume_client + env: + ACTION: verify + cleanup: false +- workunit: + clients: + client.1: + - fs/upgrade/volume_client + env: + ACTION: verify + cleanup: false +- print: "**** fs/volume_client verify" diff --git a/ceph/qa/suites/fs/upgrade/volumes/import-legacy/ubuntu_18.04.yaml b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/ubuntu_18.04.yaml new file mode 120000 index 000000000..cfb85f10e --- /dev/null +++ b/ceph/qa/suites/fs/upgrade/volumes/import-legacy/ubuntu_18.04.yaml @@ -0,0 +1 @@ +.qa/distros/all/ubuntu_18.04.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/mgr/tasks/module_selftest.yaml b/ceph/qa/suites/rados/mgr/tasks/module_selftest.yaml index 9fa956b7e..11053d6a2 100644 --- a/ceph/qa/suites/rados/mgr/tasks/module_selftest.yaml +++ b/ceph/qa/suites/rados/mgr/tasks/module_selftest.yaml @@ -19,6 +19,7 @@ tasks: - \(MGR_ZABBIX_ - foo bar - Failed to open Telegraf + - evicting unresponsive client - cephfs_test_runner: modules: - tasks.mgr.test_module_selftest diff --git a/ceph/qa/suites/rados/perf/distros/ubuntu_16.04.yaml b/ceph/qa/suites/rados/perf/distros/ubuntu_16.04.yaml new file mode 120000 index 000000000..a92e40600 --- /dev/null +++ b/ceph/qa/suites/rados/perf/distros/ubuntu_16.04.yaml @@ -0,0 +1 @@ +../../../../distros/supported-all-distro/ubuntu_16.04.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/perf/distros/ubuntu_latest.yaml b/ceph/qa/suites/rados/perf/distros/ubuntu_latest.yaml new file mode 120000 index 000000000..f4d73c110 --- /dev/null +++ b/ceph/qa/suites/rados/perf/distros/ubuntu_latest.yaml @@ -0,0 +1 @@ +../../../../distros/supported-all-distro/ubuntu_latest.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/perf/supported-random-distro$ b/ceph/qa/suites/rados/perf/supported-random-distro$ deleted file mode 120000 index 7cef21eef..000000000 --- a/ceph/qa/suites/rados/perf/supported-random-distro$ +++ /dev/null @@ -1 +0,0 @@ -../basic/supported-random-distro$ \ No newline at end of file diff --git a/ceph/qa/suites/rados/singleton-flat/valgrind-leaks.yaml b/ceph/qa/suites/rados/singleton-flat/valgrind-leaks.yaml index c41f75fce..5a7fff755 100644 --- a/ceph/qa/suites/rados/singleton-flat/valgrind-leaks.yaml +++ b/ceph/qa/suites/rados/singleton-flat/valgrind-leaks.yaml @@ -1,5 +1,6 @@ # see http://tracker.ceph.com/issues/20360 and http://tracker.ceph.com/issues/18126 os_type: centos +os_version: '7.8' openstack: - volumes: # attached to each instance diff --git a/ceph/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml b/ceph/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml new file mode 100644 index 000000000..1e04fb369 --- /dev/null +++ b/ceph/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + osd: + osd inject bad map crc probability: 0.1 + log-whitelist: + - failed to encode map diff --git a/ceph/qa/suites/rados/thrash/crc-failures/default.yaml b/ceph/qa/suites/rados/thrash/crc-failures/default.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/rados/verify/validater/valgrind.yaml b/ceph/qa/suites/rados/verify/validater/valgrind.yaml index 83eb2add5..933290245 100644 --- a/ceph/qa/suites/rados/verify/validater/valgrind.yaml +++ b/ceph/qa/suites/rados/verify/validater/valgrind.yaml @@ -1,5 +1,6 @@ # see http://tracker.ceph.com/issues/20360 and http://tracker.ceph.com/issues/18126 os_type: centos +os_version: '7.8' overrides: install: diff --git a/ceph/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml b/ceph/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/ceph/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml @@ -0,0 +1 @@ + diff --git a/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v27.yaml b/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v27.yaml deleted file mode 100644 index 7fd0dcf94..000000000 --- a/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v27.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - s3a-hadoop: - hadoop-version: '2.7.3' diff --git a/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v28.yaml b/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v28.yaml deleted file mode 100644 index 20a06baab..000000000 --- a/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v28.yaml +++ /dev/null @@ -1,3 +0,0 @@ -overrides: - s3a-hadoop: - hadoop-version: '2.8.0' diff --git a/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml b/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml new file mode 100644 index 000000000..d017b756b --- /dev/null +++ b/ceph/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml @@ -0,0 +1,3 @@ +overrides: + s3a-hadoop: + hadoop-version: '3.2.0' diff --git a/ceph/qa/suites/rgw/tools/+ b/ceph/qa/suites/rgw/tools/+ new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/rgw/tools/.qa b/ceph/qa/suites/rgw/tools/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/ceph/qa/suites/rgw/tools/.qa @@ -0,0 +1 @@ +../.qa \ No newline at end of file diff --git a/ceph/qa/suites/rgw/tools/centos_latest.yaml b/ceph/qa/suites/rgw/tools/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/ceph/qa/suites/rgw/tools/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rgw/tools/cluster.yaml b/ceph/qa/suites/rgw/tools/cluster.yaml new file mode 100644 index 000000000..0eab7ebad --- /dev/null +++ b/ceph/qa/suites/rgw/tools/cluster.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, osd.0, osd.1, osd.2, mgr.0, client.0] +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB +overrides: + rgw: + frontend: beast \ No newline at end of file diff --git a/ceph/qa/suites/rgw/tools/tasks.yaml b/ceph/qa/suites/rgw/tools/tasks.yaml new file mode 100644 index 000000000..acceb21c8 --- /dev/null +++ b/ceph/qa/suites/rgw/tools/tasks.yaml @@ -0,0 +1,19 @@ +tasks: +- install: +- ceph: +- rgw: + client.0: + # force rgw_dns_name to be set with the fully qualified host name; + # it will be appended to the empty string + dns-name: '' +- workunit: + clients: + client.0: + - rgw/test_rgw_orphan_list.sh +overrides: + ceph: + conf: + client: + debug rgw: 20 + debug ms: 1 + rgw enable static website: false diff --git a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/.qa b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/7-final-workload/rbd-python.yaml b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/7-final-workload/rbd-python.yaml index d87ca8b42..dbfde7f72 100644 --- a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/7-final-workload/rbd-python.yaml +++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/7-final-workload/rbd-python.yaml @@ -3,7 +3,7 @@ meta: librbd python api tests tasks: - workunit: - tag: v14.2.2 + tag: v14.2.10 clients: client.0: - rbd/test_librbd_python.sh diff --git a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore deleted file mode 120000 index 58b9a18c2..000000000 --- a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore +++ /dev/null @@ -1 +0,0 @@ -../../../../objectstore \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/.qa b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/bluestore-bitmap.yaml b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/default.yaml b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/default.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/filestore-xfs.yaml b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/filestore-xfs.yaml new file mode 120000 index 000000000..41f2a9d14 --- /dev/null +++ b/ceph/qa/suites/upgrade/nautilus-p2p/nautilus-p2p-stress-split/objectstore/filestore-xfs.yaml @@ -0,0 +1 @@ +.qa/objectstore/filestore-xfs.yaml \ No newline at end of file diff --git a/ceph/qa/tasks/ceph_fuse.py b/ceph/qa/tasks/ceph_fuse.py index e41020c3d..1439ccffd 100644 --- a/ceph/qa/tasks/ceph_fuse.py +++ b/ceph/qa/tasks/ceph_fuse.py @@ -112,30 +112,35 @@ def task(ctx, config): if client_config is None: client_config = {} + auth_id = client_config.get("auth_id", id_) + skip = client_config.get("skip", False) if skip: skipped[id_] = skip continue if id_ not in all_mounts: - fuse_mount = FuseMount(ctx, client_config, testdir, id_, remote) + fuse_mount = FuseMount(ctx, client_config, testdir, auth_id, remote) all_mounts[id_] = fuse_mount else: # Catch bad configs where someone has e.g. tried to use ceph-fuse and kcephfs for the same client assert isinstance(all_mounts[id_], FuseMount) if not config.get("disabled", False) and client_config.get('mounted', True): - mounted_by_me[id_] = all_mounts[id_] + mounted_by_me[id_] = {"config": client_config, "mount": all_mounts[id_]} ctx.mounts = all_mounts # Mount any clients we have been asked to (default to mount all) log.info('Mounting ceph-fuse clients...') - for mount in mounted_by_me.values(): - mount.mount() + for info in mounted_by_me.values(): + config = info["config"] + mount_path = config.get("mount_path") + mountpoint = config.get("mountpoint") + info["mount"].mount(mountpoint=mountpoint, mount_path=mount_path) - for mount in mounted_by_me.values(): - mount.wait_until_mounted() + for info in mounted_by_me.values(): + info["mount"].wait_until_mounted() # Umount any pre-existing clients that we have not been asked to mount for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()) - set(skipped.keys()): @@ -148,7 +153,8 @@ def task(ctx, config): finally: log.info('Unmounting ceph-fuse clients...') - for mount in mounted_by_me.values(): + for info in mounted_by_me.values(): # Conditional because an inner context might have umounted it + mount = info["mount"] if mount.is_mounted(): mount.umount_wait() diff --git a/ceph/qa/tasks/ceph_test_case.py b/ceph/qa/tasks/ceph_test_case.py index e2506436b..9e26439ed 100644 --- a/ceph/qa/tasks/ceph_test_case.py +++ b/ceph/qa/tasks/ceph_test_case.py @@ -1,4 +1,3 @@ - import unittest from unittest import case import time @@ -8,6 +7,8 @@ from teuthology.orchestra.run import CommandFailedError log = logging.getLogger(__name__) +class TestTimeoutError(RuntimeError): + pass class CephTestCase(unittest.TestCase): """ @@ -32,6 +33,8 @@ class CephTestCase(unittest.TestCase): REQUIRE_MEMSTORE = False def setUp(self): + self._mon_configs_set = set() + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", "Starting test {0}".format(self.id())) @@ -43,12 +46,42 @@ class CephTestCase(unittest.TestCase): raise case.SkipTest("Require `memstore` OSD backend (test " \ "would take too long on full sized OSDs") - - def tearDown(self): + self.config_clear() + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", "Ended test {0}".format(self.id())) + def config_clear(self): + for section, key in self._mon_configs_set: + self.config_rm(section, key) + self._mon_configs_set.clear() + + def _fix_key(self, key): + return str(key).replace(' ', '_') + + def config_get(self, section, key): + key = self._fix_key(key) + return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "get", section, key).strip() + + def config_show(self, entity, key): + key = self._fix_key(key) + return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "show", entity, key).strip() + + def config_minimal(self): + return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "generate-minimal-conf").strip() + + def config_rm(self, section, key): + key = self._fix_key(key) + self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "rm", section, key) + # simplification: skip removing from _mon_configs_set; + # let tearDown clear everything again + + def config_set(self, section, key, value): + key = self._fix_key(key) + self._mon_configs_set.add((section, key)) + self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "set", section, key, str(value)) + def assert_cluster_log(self, expected_pattern, invert_match=False, timeout=10, watch_channel=None): """ @@ -142,7 +175,7 @@ class CephTestCase(unittest.TestCase): raise RuntimeError("wait_until_equal: forbidden value {0} seen".format(val)) else: if elapsed >= timeout: - raise RuntimeError("Timed out after {0} seconds waiting for {1} (currently {2})".format( + raise TestTimeoutError("Timed out after {0} seconds waiting for {1} (currently {2})".format( elapsed, expect_val, val )) else: @@ -161,7 +194,7 @@ class CephTestCase(unittest.TestCase): return else: if elapsed >= timeout: - raise RuntimeError("Timed out after {0}s".format(elapsed)) + raise TestTimeoutError("Timed out after {0}s".format(elapsed)) else: log.debug("wait_until_true: waiting...") time.sleep(period) diff --git a/ceph/qa/tasks/cephfs/cephfs_test_case.py b/ceph/qa/tasks/cephfs/cephfs_test_case.py index f376bb69a..bee9e7df9 100644 --- a/ceph/qa/tasks/cephfs/cephfs_test_case.py +++ b/ceph/qa/tasks/cephfs/cephfs_test_case.py @@ -10,6 +10,7 @@ from tasks.cephfs.fuse_mount import FuseMount from teuthology.orchestra import run from teuthology.orchestra.run import CommandFailedError +from teuthology.contextutil import safe_while log = logging.getLogger(__name__) @@ -169,8 +170,6 @@ class CephFSTestCase(CephTestCase): self.configs_set = set() def tearDown(self): - super(CephFSTestCase, self).tearDown() - self.mds_cluster.clear_firewall() for m in self.mounts: m.teardown() @@ -181,6 +180,8 @@ class CephFSTestCase(CephTestCase): for subsys, key in self.configs_set: self.mds_cluster.clear_ceph_conf(subsys, key) + return super(CephFSTestCase, self).tearDown() + def set_conf(self, subsys, key, value): self.configs_set.add((subsys, key)) self.mds_cluster.set_ceph_conf(subsys, key, value) @@ -266,6 +267,10 @@ class CephFSTestCase(CephTestCase): if core_dir: # Non-default core_pattern with a directory in it # We have seen a core_pattern that looks like it's from teuthology's coredump # task, so proceed to clear out the core file + if core_dir[0] == '|': + log.info("Piped core dumps to program {0}, skip cleaning".format(core_dir[1:])) + return; + log.info("Clearing core from directory: {0}".format(core_dir)) # Verify that we see the expected single coredump @@ -304,3 +309,11 @@ class CephFSTestCase(CephTestCase): return subtrees time.sleep(pause) raise RuntimeError("rank {0} failed to reach desired subtree state", rank) + + def _wait_until_scrub_complete(self, path="/", recursive=True): + out_json = self.fs.rank_tell(["scrub", "start", path] + ["recursive"] if recursive else []) + with safe_while(sleep=10, tries=10) as proceed: + while proceed(): + out_json = self.fs.rank_tell(["scrub", "status"]) + if out_json['status'] == "no active scrubs running": + break; diff --git a/ceph/qa/tasks/cephfs/fuse_mount.py b/ceph/qa/tasks/cephfs/fuse_mount.py index 68fc387d7..c71b2041d 100644 --- a/ceph/qa/tasks/cephfs/fuse_mount.py +++ b/ceph/qa/tasks/cephfs/fuse_mount.py @@ -27,7 +27,9 @@ class FuseMount(CephFSMount): self.inst = None self.addr = None - def mount(self, mount_path=None, mount_fs_name=None): + def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None): + if mountpoint is not None: + self.mountpoint = mountpoint self.setupfs(name=mount_fs_name) try: @@ -51,14 +53,8 @@ class FuseMount(CephFSMount): log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) - self.client_remote.run( - args=[ - 'mkdir', - '--', - self.mountpoint, - ], - timeout=(15*60) - ) + self.client_remote.run(args=['mkdir', '-p', self.mountpoint], + timeout=(15*60), cwd=self.test_dir) run_cmd = [ 'sudo', @@ -83,6 +79,7 @@ class FuseMount(CephFSMount): self.mountpoint, ] + cwd = self.test_dir if self.client_config.get('valgrind') is not None: run_cmd = misc.get_valgrind_args( self.test_dir, @@ -90,17 +87,23 @@ class FuseMount(CephFSMount): run_cmd, self.client_config.get('valgrind'), ) + cwd = None # misc.get_valgrind_args chdir for us run_cmd.extend(fuse_cmd) def list_connections(): + from teuthology.misc import get_system_type + + conn_dir = "/sys/fs/fuse/connections" + + self.client_remote.run(args=['sudo', 'modprobe', 'fuse'], + check_status=False) self.client_remote.run( - args=["sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"], - check_status=False, - timeout=(15*60) - ) + args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir], + check_status=False, timeout=(30)) + try: - ls_str = self.client_remote.sh("ls /sys/fs/fuse/connections", + ls_str = self.client_remote.sh("ls " + conn_dir, stdout=StringIO(), timeout=(15*60)).strip() except CommandFailedError: @@ -118,6 +121,7 @@ class FuseMount(CephFSMount): proc = self.client_remote.run( args=run_cmd, + cwd=cwd, logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), stdin=run.PIPE, wait=False, @@ -186,6 +190,7 @@ class FuseMount(CephFSMount): '--', self.mountpoint, ], + cwd=self.test_dir, stdout=StringIO(), stderr=StringIO(), wait=False, @@ -231,7 +236,7 @@ class FuseMount(CephFSMount): # unrestricted access to the filesystem mount. try: stderr = StringIO() - self.client_remote.run(args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(15*60), stderr=stderr) + self.client_remote.run(args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(15*60), cwd=self.test_dir, stderr=stderr) except run.CommandFailedError: stderr = stderr.getvalue() if "Read-only file system".lower() in stderr.lower(): @@ -240,7 +245,7 @@ class FuseMount(CephFSMount): raise def _mountpoint_exists(self): - return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False, timeout=(15*60)).exitstatus == 0 + return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False, cwd=self.test_dir, timeout=(15*60)).exitstatus == 0 def umount(self): try: @@ -252,6 +257,7 @@ class FuseMount(CephFSMount): '-u', self.mountpoint, ], + cwd=self.test_dir, timeout=(30*60), ) except run.CommandFailedError: @@ -346,8 +352,10 @@ class FuseMount(CephFSMount): '--', self.mountpoint, ], + cwd=self.test_dir, stderr=stderr, - timeout=(60*5) + timeout=(60*5), + check_status=False, ) except CommandFailedError: if "No such file or directory" in stderr.getvalue(): @@ -396,6 +404,7 @@ class FuseMount(CephFSMount): '-rf', self.mountpoint, ], + cwd=self.test_dir, timeout=(60*5) ) diff --git a/ceph/qa/tasks/cephfs/kernel_mount.py b/ceph/qa/tasks/cephfs/kernel_mount.py index 3df4bdec5..e749c0611 100644 --- a/ceph/qa/tasks/cephfs/kernel_mount.py +++ b/ceph/qa/tasks/cephfs/kernel_mount.py @@ -26,20 +26,16 @@ class KernelMount(CephFSMount): self.ipmi_password = ipmi_password self.ipmi_domain = ipmi_domain - def mount(self, mount_path=None, mount_fs_name=None): + def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None): + if mountpoint is not None: + self.mountpoint = mountpoint self.setupfs(name=mount_fs_name) log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format( id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) - self.client_remote.run( - args=[ - 'mkdir', - '--', - self.mountpoint, - ], - timeout=(5*60), - ) + self.client_remote.run(args=['mkdir', '-p', self.mountpoint], + timeout=(5*60)) if mount_path is None: mount_path = "/" @@ -187,6 +183,7 @@ class KernelMount(CephFSMount): self.mountpoint, ], timeout=(5*60), + check_status=False, ) def _find_debug_dir(self): diff --git a/ceph/qa/tasks/cephfs/mount.py b/ceph/qa/tasks/cephfs/mount.py index a22ae3dcb..5d8e6d23a 100644 --- a/ceph/qa/tasks/cephfs/mount.py +++ b/ceph/qa/tasks/cephfs/mount.py @@ -28,6 +28,7 @@ class CephFSMount(object): self.client_id = client_id self.client_remote = client_remote self.mountpoint_dir_name = 'mnt.{id}'.format(id=self.client_id) + self._mountpoint = None self.fs = None self.test_files = ['a', 'b', 'c'] @@ -36,8 +37,16 @@ class CephFSMount(object): @property def mountpoint(self): - return os.path.join( - self.test_dir, '{dir_name}'.format(dir_name=self.mountpoint_dir_name)) + if self._mountpoint == None: + self._mountpoint= os.path.join( + self.test_dir, '{dir_name}'.format(dir_name=self.mountpoint_dir_name)) + return self._mountpoint + + @mountpoint.setter + def mountpoint(self, path): + if not isinstance(path, str): + raise RuntimeError('path should be of str type.') + self._mountpoint = path def is_mounted(self): raise NotImplementedError() @@ -51,7 +60,7 @@ class CephFSMount(object): self.fs.wait_for_daemons() log.info('Ready to start {}...'.format(type(self).__name__)) - def mount(self, mount_path=None, mount_fs_name=None): + def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None): raise NotImplementedError() def umount(self): @@ -440,13 +449,14 @@ class CephFSMount(object): n = {count} abs_path = "{abs_path}" - if not os.path.exists(os.path.dirname(abs_path)): - os.makedirs(os.path.dirname(abs_path)) + if not os.path.exists(abs_path): + os.makedirs(abs_path) handles = [] for i in range(0, n): - fname = "{{0}}_{{1}}".format(abs_path, i) - handles.append(open(fname, 'w')) + fname = "file_"+str(i) + path = os.path.join(abs_path, fname) + handles.append(open(path, 'w')) while True: time.sleep(1) diff --git a/ceph/qa/tasks/cephfs/test_client_limits.py b/ceph/qa/tasks/cephfs/test_client_limits.py index 01b5db655..e32259795 100644 --- a/ceph/qa/tasks/cephfs/test_client_limits.py +++ b/ceph/qa/tasks/cephfs/test_client_limits.py @@ -8,6 +8,7 @@ import logging from textwrap import dedent from unittest import SkipTest from teuthology.orchestra.run import CommandFailedError +from tasks.ceph_test_case import TestTimeoutError from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming from tasks.cephfs.fuse_mount import FuseMount import os @@ -39,20 +40,17 @@ class TestClientLimits(CephFSTestCase): :param use_subdir: whether to put test files in a subdir or use root """ - cache_size = open_files // 2 + self.config_set('mds', 'mds_cache_memory_limit', "1K") + self.config_set('mds', 'mds_recall_max_caps', int(open_files/2)) + self.config_set('mds', 'mds_recall_warning_threshold', open_files) - self.set_conf('mds', 'mds cache size', cache_size) - self.set_conf('mds', 'mds_recall_max_caps', open_files // 2) - self.set_conf('mds', 'mds_recall_warning_threshold', open_files) - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() - - mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client")) - mds_recall_warning_decay_rate = float(self.fs.get_config("mds_recall_warning_decay_rate")) - self.assertTrue(open_files >= mds_min_caps_per_client) + mds_min_caps_per_client = int(self.config_get('mds.a', "mds_min_caps_per_client")) + self.config_set('mds', 'mds_min_caps_working_set', mds_min_caps_per_client) + mds_recall_warning_decay_rate = float(self.config_get('mds.a', "mds_recall_warning_decay_rate")) + self.assertGreaterEqual(open_files, mds_min_caps_per_client) mount_a_client_id = self.mount_a.get_global_id() - path = "subdir/mount_a" if use_subdir else "mount_a" + path = "subdir" if use_subdir else "." open_proc = self.mount_a.open_n_background(path, open_files) # Client should now hold: @@ -88,8 +86,6 @@ class TestClientLimits(CephFSTestCase): num_caps = self.get_session(mount_a_client_id)['num_caps'] if num_caps <= mds_min_caps_per_client: return True - elif num_caps < cache_size: - return True else: return False @@ -107,6 +103,53 @@ class TestClientLimits(CephFSTestCase): def test_client_pin_mincaps(self): self._test_client_pin(True, 200) + def test_client_min_caps_working_set(self): + """ + When a client has inodes pinned in its cache (open files), that the MDS + will not warn about the client not responding to cache pressure when + the number of caps is below mds_min_caps_working_set. + """ + + # Set MDS cache memory limit to a low value that will make the MDS to + # ask the client to trim the caps. + cache_memory_limit = "1K" + open_files = 400 + + self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit) + self.config_set('mds', 'mds_recall_max_caps', int(open_files/2)) + self.config_set('mds', 'mds_recall_warning_threshold', open_files) + self.config_set('mds', 'mds_min_caps_working_set', open_files*2) + + mds_min_caps_per_client = int(self.config_get('mds.a', "mds_min_caps_per_client")) + mds_recall_warning_decay_rate = float(self.config_get('mds.a', "mds_recall_warning_decay_rate")) + self.assertGreaterEqual(open_files, mds_min_caps_per_client) + + mount_a_client_id = self.mount_a.get_global_id() + self.mount_a.open_n_background("subdir", open_files) + + # Client should now hold: + # `open_files` caps for the open files + # 1 cap for root + # 1 cap for subdir + self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], + open_files + 2, + timeout=600, + reject_fn=lambda x: x > open_files + 2) + + # We can also test that the MDS health warning for oversized + # cache is functioning as intended. + self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2) + + try: + # MDS should not be happy about that but it's not sending + # MDS_CLIENT_RECALL warnings because the client's caps are below + # mds_min_caps_working_set. + self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2) + except TestTimeoutError: + pass + else: + raise RuntimeError("expected no client recall warning") + def test_client_release_bug(self): """ When a client has a bug (which we will simulate) preventing it from releasing caps, @@ -240,11 +283,9 @@ class TestClientLimits(CephFSTestCase): That the MDS will not let a client sit above mds_max_caps_per_client caps. """ - mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client")) + mds_min_caps_per_client = int(self.config_get('mds.a', "mds_min_caps_per_client")) mds_max_caps_per_client = 2*mds_min_caps_per_client - self.set_conf('mds', 'mds_max_caps_per_client', mds_max_caps_per_client) - self.fs.mds_fail_restart() - self.fs.wait_for_daemons() + self.config_set('mds', 'mds_max_caps_per_client', mds_max_caps_per_client) self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True) diff --git a/ceph/qa/tasks/cephfs/test_scrub.py b/ceph/qa/tasks/cephfs/test_scrub.py index 226db8157..1875b5f34 100644 --- a/ceph/qa/tasks/cephfs/test_scrub.py +++ b/ceph/qa/tasks/cephfs/test_scrub.py @@ -75,6 +75,9 @@ class BacktraceWorkload(Workload): self._filesystem.mds_asok(["flush", "journal"]) self._filesystem._write_data_xattr(st['st_ino'], "parent", "") + def create_files(self, nfiles=1000): + self._mount.create_n_files("scrub-new-files/file", nfiles) + class DupInodeWorkload(Workload): """ @@ -89,7 +92,7 @@ class DupInodeWorkload(Workload): def damage(self): temp_bin_path = "/tmp/10000000000.00000000_omap.bin" - self._mount.umount() + self._mount.umount_wait() self._filesystem.mds_asok(["flush", "journal"]) self._filesystem.mds_stop() self._filesystem.rados(["getomapval", "10000000000.00000000", @@ -144,6 +147,27 @@ class TestScrub(CephFSTestCase): errors[0].exception, errors[0].backtrace )) + def _get_damage_count(self, damage_type='backtrace'): + out_json = self.fs.rank_tell(["damage", "ls"]) + self.assertNotEqual(out_json, None) + + damage_count = 0 + for it in out_json: + if it['damage_type'] == damage_type: + damage_count += 1 + return damage_count + + def _scrub_new_files(self, workload): + """ + That scrubbing new files does not lead to errors + """ + workload.create_files(1000) + self._wait_until_scrub_complete() + self.assertEqual(self._get_damage_count(), 0) + + def test_scrub_backtrace_for_new_files(self): + self._scrub_new_files(BacktraceWorkload(self.fs, self.mount_a)) + def test_scrub_backtrace(self): self._scrub(BacktraceWorkload(self.fs, self.mount_a)) diff --git a/ceph/qa/tasks/cephfs/test_volumes.py b/ceph/qa/tasks/cephfs/test_volumes.py index a869b79e4..dc255c1b8 100644 --- a/ceph/qa/tasks/cephfs/test_volumes.py +++ b/ceph/qa/tasks/cephfs/test_volumes.py @@ -29,6 +29,9 @@ class TestVolumes(CephFSTestCase): def _fs_cmd(self, *args): return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args) + def _raw_cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + def __check_clone_state(self, state, clone, clone_group=None, timo=120): check = 0 args = ["clone", "status", self.volname, clone] @@ -105,28 +108,33 @@ class TestVolumes(CephFSTestCase): self._verify_clone_attrs(subvolume, clone, source_group=source_group, clone_group=clone_group) def _generate_random_volume_name(self, count=1): - r = random.sample(range(10000), count) - volumes = ["{0}_{1}".format(TestVolumes.TEST_VOLUME_PREFIX, c) for c in r] + n = self.volume_start + volumes = [f"{TestVolumes.TEST_VOLUME_PREFIX}_{i:016}" for i in range(n, n+count)] + self.volume_start += count return volumes[0] if count == 1 else volumes def _generate_random_subvolume_name(self, count=1): - r = random.sample(range(10000), count) - subvolumes = ["{0}_{1}".format(TestVolumes.TEST_SUBVOLUME_PREFIX, c) for c in r] + n = self.subvolume_start + subvolumes = [f"{TestVolumes.TEST_SUBVOLUME_PREFIX}_{i:016}" for i in range(n, n+count)] + self.subvolume_start += count return subvolumes[0] if count == 1 else subvolumes def _generate_random_group_name(self, count=1): - r = random.sample(range(100), count) - groups = ["{0}_{1}".format(TestVolumes.TEST_GROUP_PREFIX, c) for c in r] + n = self.group_start + groups = [f"{TestVolumes.TEST_GROUP_PREFIX}_{i:016}" for i in range(n, n+count)] + self.group_start += count return groups[0] if count == 1 else groups def _generate_random_snapshot_name(self, count=1): - r = random.sample(range(100), count) - snaps = ["{0}_{1}".format(TestVolumes.TEST_SNAPSHOT_PREFIX, c) for c in r] + n = self.snapshot_start + snaps = [f"{TestVolumes.TEST_SNAPSHOT_PREFIX}_{i:016}" for i in range(n, n+count)] + self.snapshot_start += count return snaps[0] if count == 1 else snaps def _generate_random_clone_name(self, count=1): - r = random.sample(range(1000), count) - clones = ["{0}_{1}".format(TestVolumes.TEST_CLONE_PREFIX, c) for c in r] + n = self.clone_start + clones = [f"{TestVolumes.TEST_CLONE_PREFIX}_{i:016}" for i in range(n, n+count)] + self.clone_start += count return clones[0] if count == 1 else clones def _enable_multi_fs(self): @@ -164,6 +172,14 @@ class TestVolumes(CephFSTestCase): subvol_md = self._fs_cmd(*args) return subvol_md + def _get_subvolume_snapshot_info(self, vol_name, subvol_name, snapname, group_name=None): + args = ["subvolume", "snapshot", "info", vol_name, subvol_name, snapname] + if group_name: + args.append(group_name) + args = tuple(args) + snap_md = self._fs_cmd(*args) + return snap_md + def _delete_test_volume(self): self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") @@ -217,6 +233,12 @@ class TestVolumes(CephFSTestCase): self.vol_created = False self._enable_multi_fs() self._create_or_reuse_test_volume() + self.config_set('mon', 'mon_allow_pool_delete', True) + self.volume_start = random.randint(1, (1<<20)) + self.subvolume_start = random.randint(1, (1<<20)) + self.group_start = random.randint(1, (1<<20)) + self.snapshot_start = random.randint(1, (1<<20)) + self.clone_start = random.randint(1, (1<<20)) def tearDown(self): if self.vol_created: @@ -302,6 +324,52 @@ class TestVolumes(CephFSTestCase): else: raise RuntimeError("expected the 'fs volume rm' command to fail.") + def test_volume_rm_arbitrary_pool_removal(self): + """ + That the arbitrary pool added to the volume out of band is removed + successfully on volume removal. + """ + new_pool = "new_pool" + # add arbitrary data pool + self.fs.add_data_pool(new_pool) + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + #check if fs is gone + volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty")) + volnames = [volume['name'] for volume in volumes] + self.assertNotIn(self.volname, volnames) + + #check if osd pools are gone + pools = json.loads(self._raw_cmd("osd", "pool", "ls", "detail", "--format=json-pretty")) + for pool in pools: + self.assertNotIn(self.volname, pool["application_metadata"].keys()) + + def test_volume_rm_when_mon_delete_pool_false(self): + """ + That the volume can only be removed when mon_allowd_pool_delete is set + to true and verify that the pools are removed after volume deletion. + """ + self.config_set('mon', 'mon_allow_pool_delete', False) + try: + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, + "expected the 'fs volume rm' command to fail with EPERM, " + "but it failed with {0}".format(ce.exitstatus)) + self.config_set('mon', 'mon_allow_pool_delete', True) + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + #check if fs is gone + volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty")) + volnames = [volume['name'] for volume in volumes] + self.assertNotIn(self.volname, volnames, + "volume {0} exists after removal".format(self.volname)) + #check if pools are gone + pools = json.loads(self._raw_cmd("osd", "pool", "ls", "detail", "--format=json-pretty")) + for pool in pools: + self.assertNotIn(self.volname, pool["application_metadata"].keys(), + "pool {0} exists after volume removal".format(pool["pool_name"])) + ### basic subvolume operations def test_subvolume_create_and_rm(self): @@ -784,7 +852,7 @@ class TestVolumes(CephFSTestCase): subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace", - "type", "uid"] + "type", "uid", "features"] # create subvolume subvolume = self._generate_random_subvolume_name() @@ -792,37 +860,34 @@ class TestVolumes(CephFSTestCase): # get subvolume metadata subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) - if len(subvol_info) == 0: - raise RuntimeError("Expected the 'fs subvolume info' command to list metadata of subvolume") + self.assertNotEqual(len(subvol_info), 0, "expected the 'fs subvolume info' command to list metadata of subvolume") for md in subvol_md: - if md not in subvol_info.keys(): - raise RuntimeError("%s not present in the metadata of subvolume" % md) + self.assertIn(md, subvol_info.keys(), "'{0}' key not present in metadata of subvolume".format(md)) - if subvol_info["bytes_pcent"] != "undefined": - raise RuntimeError("bytes_pcent should be set to undefined if quota is not set") + self.assertEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set") + self.assertEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set") + self.assertEqual(subvol_info["pool_namespace"], "", "expected pool namespace to be empty") - if subvol_info["bytes_quota"] != "infinite": - raise RuntimeError("bytes_quota should be set to infinite if quota is not set") - self.assertEqual(subvol_info["pool_namespace"], "") + self.assertEqual(len(subvol_info["features"]), 2, + msg="expected 2 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect']: + self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) nsize = self.DEFAULT_FILE_SIZE*1024*1024 - try: - self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize)) - except CommandFailedError: - raise RuntimeError("expected the 'fs subvolume resize' command to succeed") + self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize)) # get subvolume metadata after quota set subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) - if len(subvol_info) == 0: - raise RuntimeError("Expected the 'fs subvolume info' command to list metadata of subvolume") - if subvol_info["bytes_pcent"] == "undefined": - raise RuntimeError("bytes_pcent should not be set to undefined if quota is set") + self.assertNotEqual(len(subvol_info), 0, "expected the 'fs subvolume info' command to list metadata of subvolume") - if subvol_info["bytes_quota"] == "infinite": - raise RuntimeError("bytes_quota should not be set to infinite if quota is set") + self.assertNotEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is not set") + self.assertNotEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should not be set to infinite if quota is not set") + self.assertEqual(subvol_info["type"], "subvolume", "type should be set to subvolume") - if subvol_info["type"] != "subvolume": - raise RuntimeError("type should be set to subvolume") + self.assertEqual(len(subvol_info["features"]), 2, + msg="expected 2 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect']: + self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume) @@ -850,18 +915,12 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) # check clone status self._wait_for_clone_to_complete(clone) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -904,8 +963,7 @@ class TestVolumes(CephFSTestCase): self._fs_cmd("subvolumegroup", "rm", self.volname, group) def test_subvolume_group_create_with_desired_data_pool_layout(self): - group1 = self._generate_random_group_name() - group2 = self._generate_random_group_name() + group1, group2 = self._generate_random_group_name(2) # create group self._fs_cmd("subvolumegroup", "create", self.volname, group1) @@ -966,8 +1024,7 @@ class TestVolumes(CephFSTestCase): raise RuntimeError("expected the 'fs subvolumegroup getpath' command to fail") def test_subvolume_create_with_desired_data_pool_layout_in_group(self): - subvol1 = self._generate_random_subvolume_name() - subvol2 = self._generate_random_subvolume_name() + subvol1, subvol2 = self._generate_random_subvolume_name(2) group = self._generate_random_group_name() # create group. this also helps set default pool layout for subvolumes @@ -998,8 +1055,7 @@ class TestVolumes(CephFSTestCase): self._fs_cmd("subvolumegroup", "rm", self.volname, group) def test_subvolume_group_create_with_desired_mode(self): - group1 = self._generate_random_group_name() - group2 = self._generate_random_group_name() + group1, group2 = self._generate_random_group_name(2) # default mode expected_mode1 = "755" # desired mode @@ -1047,9 +1103,8 @@ class TestVolumes(CephFSTestCase): self._fs_cmd("subvolumegroup", "rm", self.volname, subvolgroupname) def test_subvolume_create_with_desired_mode_in_group(self): - subvol1 = self._generate_random_subvolume_name() - subvol2 = self._generate_random_subvolume_name() - subvol3 = self._generate_random_subvolume_name() + subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3) + group = self._generate_random_group_name() # default mode expected_mode1 = "755" @@ -1184,6 +1239,42 @@ class TestVolumes(CephFSTestCase): # verify trash dir is clean self._wait_for_trash_empty() + def test_subvolume_snapshot_info(self): + + """ + tests the 'fs subvolume snapshot info' command + """ + + snap_metadata = ["created_at", "data_pool", "has_pending_clones", "size"] + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=1) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot)) + self.assertNotEqual(len(snap_info), 0) + for md in snap_metadata: + if md not in snap_info: + raise RuntimeError("%s not present in the metadata of subvolume snapshot" % md) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_snapshot_create_idempotence(self): subvolume = self._generate_random_subvolume_name() snapshot = self._generate_random_snapshot_name() @@ -1503,88 +1594,11 @@ class TestVolumes(CephFSTestCase): # verify trash dir is clean self._wait_for_trash_empty() - def test_subvolume_snapshot_protect_unprotect(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - - # create subvolume - self._fs_cmd("subvolume", "create", self.volname, subvolume) - - # protect a nonexistent snapshot - try: - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - except CommandFailedError as ce: - if ce.exitstatus != errno.ENOENT: - raise RuntimeError("invalid error code when protecting a non-existing snapshot") - else: - raise RuntimeError("expected protection of non existent snapshot to fail") - - # snapshot subvolume - self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - - # protecting snapshot again, should return EEXIST - try: - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - except CommandFailedError as ce: - if ce.exitstatus != errno.EEXIST: - raise RuntimeError("invalid error code when protecting a protected snapshot") - else: - raise RuntimeError("expected protection of already protected snapshot to fail") - - # remove snapshot should fail since the snapshot is protected - try: - self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - except CommandFailedError as ce: - if ce.exitstatus != errno.EINVAL: - raise RuntimeError("invalid error code when removing a protected snapshot") - else: - raise RuntimeError("expected removal of protected snapshot to fail") - - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - - # remove snapshot - self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - - # remove subvolume - self._fs_cmd("subvolume", "rm", self.volname, subvolume) - - # verify trash dir is clean - self._wait_for_trash_empty() - - def test_subvolume_snapshot_clone_unprotected_snapshot(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() - - # create subvolume - self._fs_cmd("subvolume", "create", self.volname, subvolume) - - # snapshot subvolume - self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - - # clone a non protected snapshot - try: - self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) - except CommandFailedError as ce: - if ce.exitstatus != errno.EINVAL: - raise RuntimeError("invalid error code when cloning a non protected snapshot") - else: - raise RuntimeError("expected cloning of unprotected snapshot to fail") - - # remove snapshot - self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - - # remove subvolumes - self._fs_cmd("subvolume", "rm", self.volname, subvolume) - - # verify trash dir is clean - self._wait_for_trash_empty() - - def test_subvolume_snapshot_clone(self): + def test_subvolume_snapshot_protect_unprotect_sanity(self): + """ + Snapshot protect/unprotect commands are deprecated. This test exists to ensure that + invoking the command does not cause errors, till they are removed from a subsequent release. + """ subvolume = self._generate_random_subvolume_name() snapshot = self._generate_random_snapshot_name() clone = self._generate_random_clone_name() @@ -1604,15 +1618,6 @@ class TestVolumes(CephFSTestCase): # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) - # unprotecting when a clone is in progress should fail - try: - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - except CommandFailedError as ce: - if ce.exitstatus != errno.EEXIST: - raise RuntimeError("invalid error code when unprotecting snapshot during clone") - else: - raise RuntimeError("expected unprotecting a snapshot to fail since it has pending clones") - # check clone status self._wait_for_clone_to_complete(clone) @@ -1632,6 +1637,39 @@ class TestVolumes(CephFSTestCase): # verify trash dir is clean self._wait_for_trash_empty() + def test_subvolume_snapshot_clone(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # verify clone + self._verify_clone(subvolume, clone) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_snapshot_clone_pool_layout(self): subvolume = self._generate_random_subvolume_name() snapshot = self._generate_random_snapshot_name() @@ -1650,18 +1688,12 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, "--pool_layout", new_pool) # check clone status self._wait_for_clone_to_complete(clone) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -1697,18 +1729,12 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) # check clone status self._wait_for_clone_to_complete(clone) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -1736,18 +1762,12 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) # check clone status self._wait_for_clone_to_complete(clone1) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -1761,18 +1781,12 @@ class TestVolumes(CephFSTestCase): # snapshot clone -- use same snap name self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone1, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, clone1, snapshot) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, clone1, snapshot, clone2) # check clone status self._wait_for_clone_to_complete(clone2) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, clone1, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone1, snapshot) @@ -1802,9 +1816,6 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -1814,9 +1825,6 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone, clone_group=group) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -1851,18 +1859,12 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot, group) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--group_name', group) # check clone status self._wait_for_clone_to_complete(clone) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot, group) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group) @@ -1898,9 +1900,6 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, s_group) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot, s_group) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--group_name', s_group, '--target_group_name', c_group) @@ -1908,9 +1907,6 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone, clone_group=c_group) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot, s_group) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, s_group) @@ -1943,23 +1939,25 @@ class TestVolumes(CephFSTestCase): self.mount_a.run_shell(['mkdir', '-p', createpath]) # do some IO - self._do_subvolume_io(subvolume, number_of_files=32) + self._do_subvolume_io(subvolume, number_of_files=64) # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + # snapshot should not be deletable now + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone") + else: + self.fail("expected removing source snapshot of a clone to fail") + # check clone status self._wait_for_clone_to_complete(clone) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -1987,9 +1985,6 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) @@ -1998,7 +1993,7 @@ class TestVolumes(CephFSTestCase): self._get_subvolume_path(self.volname, clone) except CommandFailedError as ce: if ce.exitstatus != errno.EAGAIN: - raise RuntimeError("invalid error code when cloning a non protected snapshot") + raise RuntimeError("invalid error code when fetching path of an pending clone") else: raise RuntimeError("expected fetching path of an pending clone to fail") @@ -2009,8 +2004,50 @@ class TestVolumes(CephFSTestCase): subvolpath = self._get_subvolume_path(self.volname, clone) self.assertNotEqual(subvolpath, None) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # verify clone + self._verify_clone(subvolume, clone) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_in_progress_snapshot_rm(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # snapshot should not be deletable now + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone") + else: + self.fail("expected removing source snapshot of a clone to fail") + + # check clone status + self._wait_for_clone_to_complete(clone) + + # clone should be accessible now + subvolpath = self._get_subvolume_path(self.volname, clone) + self.assertNotEqual(subvolpath, None) # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -2039,9 +2076,6 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) @@ -2060,9 +2094,6 @@ class TestVolumes(CephFSTestCase): subvolpath = self._get_subvolume_path(self.volname, clone) self.assertNotEqual(subvolpath, None) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -2111,9 +2142,6 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume1, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume1, snapshot) - # schedule a clone with target as subvolume2 try: self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, subvolume2) @@ -2137,9 +2165,6 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume1, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, snapshot) @@ -2172,9 +2197,6 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # add data pool new_pool = "new_pool" self.fs.add_data_pool(new_pool) @@ -2200,9 +2222,6 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_fail(clone2) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -2237,18 +2256,12 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) # check clone status self._wait_for_clone_to_complete(clone) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -2276,9 +2289,6 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) @@ -2288,9 +2298,6 @@ class TestVolumes(CephFSTestCase): # verify canceled state self._check_clone_canceled(clone) - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -2330,9 +2337,6 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - # schedule clones for clone in clones: self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) @@ -2358,9 +2362,6 @@ class TestVolumes(CephFSTestCase): if ce.exitstatus != errno.EINVAL: raise RuntimeError("invalid error code when cancelling on-going clone") - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) - # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) diff --git a/ceph/qa/tasks/mgr/dashboard/helper.py b/ceph/qa/tasks/mgr/dashboard/helper.py index 41bf2153b..854c85096 100644 --- a/ceph/qa/tasks/mgr/dashboard/helper.py +++ b/ceph/qa/tasks/mgr/dashboard/helper.py @@ -149,6 +149,7 @@ class DashboardTestCase(MgrTestCase): cls.login('admin', 'admin') def setUp(self): + super(DashboardTestCase, self).setUp() if not self._loggedin and self.AUTO_AUTHENTICATE: self.login('admin', 'admin') self.wait_for_health_clear(20) diff --git a/ceph/qa/tasks/mgr/dashboard/test_auth.py b/ceph/qa/tasks/mgr/dashboard/test_auth.py index 0de3f2781..0acc64478 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_auth.py +++ b/ceph/qa/tasks/mgr/dashboard/test_auth.py @@ -14,6 +14,7 @@ class AuthTest(DashboardTestCase): AUTO_AUTHENTICATE = False def setUp(self): + super(AuthTest, self).setUp() self.reset_session() def _validate_jwt_token(self, token, username, permissions): diff --git a/ceph/qa/tasks/mgr/dashboard/test_health.py b/ceph/qa/tasks/mgr/dashboard/test_health.py index afc632a6c..65ea21b16 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_health.py +++ b/ceph/qa/tasks/mgr/dashboard/test_health.py @@ -19,6 +19,41 @@ class HealthTest(DashboardTestCase): 'statuses': JObj({}, allow_unknown=True, unknown_schema=int) }) + __mdsmap_schema = JObj({ + 'session_autoclose': int, + 'balancer': str, + 'up': JObj({}, allow_unknown=True), + 'last_failure_osd_epoch': int, + 'in': JList(int), + 'last_failure': int, + 'max_file_size': int, + 'explicitly_allowed_features': int, + 'damaged': JList(int), + 'tableserver': int, + 'failed': JList(int), + 'metadata_pool': int, + 'epoch': int, + 'stopped': JList(int), + 'max_mds': int, + 'compat': JObj({ + 'compat': JObj({}, allow_unknown=True), + 'ro_compat': JObj({}, allow_unknown=True), + 'incompat': JObj({}, allow_unknown=True) + }), + 'min_compat_client': str, + 'data_pools': JList(int), + 'info': JObj({}, allow_unknown=True), + 'fs_name': str, + 'created': str, + 'standby_count_wanted': int, + 'enabled': bool, + 'modified': str, + 'session_timeout': int, + 'flags': int, + 'ever_allowed_features': int, + 'root': int + }) + def test_minimal_health(self): data = self._get('/api/health/minimal') self.assertStatus(200) @@ -40,18 +75,10 @@ class HealthTest(DashboardTestCase): 'fs_map': JObj({ 'filesystems': JList( JObj({ - 'mdsmap': JObj({ - 'info': JObj( - {}, - allow_unknown=True, - unknown_schema=JObj({ - 'state': str - }) - ) - }) + 'mdsmap': self.__mdsmap_schema }), ), - 'standbys': JList(JObj({})), + 'standbys': JList(JObj({}, allow_unknown=True)), }), 'health': JObj({ 'checks': JList(str), @@ -164,16 +191,7 @@ class HealthTest(DashboardTestCase): 'filesystems': JList( JObj({ 'id': int, - 'mdsmap': JObj({ - # TODO: Expand mdsmap schema - 'info': JObj( - {}, - allow_unknown=True, - unknown_schema=JObj({ - 'state': str - }, allow_unknown=True) - ) - }, allow_unknown=True) + 'mdsmap': self.__mdsmap_schema }), ), 'standbys': JList(JObj({}, allow_unknown=True)), diff --git a/ceph/qa/tasks/mgr/dashboard/test_pool.py b/ceph/qa/tasks/mgr/dashboard/test_pool.py index 40a1e20e1..ad8697956 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_pool.py +++ b/ceph/qa/tasks/mgr/dashboard/test_pool.py @@ -2,9 +2,9 @@ from __future__ import absolute_import import logging - import six import time +from contextlib import contextmanager from .helper import DashboardTestCase, JAny, JList, JObj @@ -37,30 +37,68 @@ class PoolTest(DashboardTestCase): 'wr': pool_list_stat_schema, }, allow_unknown=True) - def _pool_create(self, data): - try: - self._task_post('/api/pool/', data) - self.assertStatus(201) + pool_rbd_conf_schema = JList(JObj(sub_elems={ + 'name': str, + 'value': str, + 'source': int + })) - self._check_pool_properties(data) + @contextmanager + def __create_pool(self, name, data=None): + pool_data = data or { + 'pool': name, + 'pg_num': '4', + 'pool_type': 'replicated', + 'compression_algorithm': 'snappy', + 'compression_mode': 'passive', + 'compression_max_blob_size': '131072', + 'compression_required_ratio': '0.875', + 'application_metadata': ['rbd'], + 'configuration': { + 'rbd_qos_bps_limit': 1024000, + 'rbd_qos_iops_limit': 5000, + } + } + self._task_post('/api/pool/', pool_data) + self.assertStatus(201) + time.sleep(5) + self._validate_pool_properties(pool_data, self._get_pool(name)) + yield pool_data + self._task_delete('/api/pool/' + name) + self.assertStatus(204) - self._task_delete("/api/pool/" + data['pool']) - self.assertStatus(204) - except Exception: - log.exception("test_pool_create: data=%s", data) - raise - - def _check_pool_properties(self, data, pool_name=None): - if not pool_name: - pool_name = data['pool'] - pool = self._get_pool(pool_name) - try: - for k, v in data.items(): - self._check_pool_property(k, v, pool) - - except Exception: - log.exception("test_pool_create: pool=%s", pool) - raise + def _validate_pool_properties(self, data, pool): + for prop, value in data.items(): + if prop == 'pool_type': + self.assertEqual(pool['type'], value) + elif prop == 'size': + self.assertEqual(pool[prop], int(value), + '{}: {} != {}'.format(prop, pool[prop], value)) + elif prop == 'pg_num': + self._check_pg_num(value, pool) + elif prop == 'application_metadata': + self.assertIsInstance(pool[prop], list) + self.assertEqual(value, pool[prop]) + elif prop == 'pool': + self.assertEqual(pool['pool_name'], value) + elif prop.startswith('compression'): + if value is not None: + if prop.endswith('size'): + value = int(value) + elif prop.endswith('ratio'): + value = float(value) + self.assertEqual(pool['options'][prop], value) + else: + self.assertEqual(pool['options'], {}) + elif prop == 'configuration': + # configuration cannot really be checked here for two reasons: + # 1. The default value cannot be given to this method, which becomes relevant + # when resetting a value, because it's not always zero. + # 2. The expected `source` cannot be given to this method, and it cannot + # relibably be determined (see 1) + pass + else: + self.assertEqual(pool[prop], value, '{}: {} != {}'.format(prop, pool[prop], value)) health = self._get('/api/health/minimal')['health'] self.assertEqual(health['status'], 'HEALTH_OK', msg='health={}'.format(health)) @@ -71,49 +109,27 @@ class PoolTest(DashboardTestCase): self.assertSchemaBody(self.pool_schema) return pool - def _check_pool_property(self, prop, value, pool): - if prop == 'pool_type': - self.assertEqual(pool['type'], value) - elif prop == 'size': - self.assertEqual(pool[prop], int(value), '{}: {} != {}'.format(prop, pool[prop], value)) - elif prop == 'pg_num': - self._check_pg_num(value, pool) - elif prop == 'application_metadata': - self.assertIsInstance(pool[prop], list) - self.assertEqual(pool[prop], value) - elif prop == 'pool': - self.assertEqual(pool['pool_name'], value) - elif prop.startswith('compression'): - if value is not None: - if prop.endswith('size'): - value = int(value) - elif prop.endswith('ratio'): - value = float(value) - self.assertEqual(pool['options'].get(prop), value) - else: - self.assertEqual(pool[prop], value, '{}: {} != {}'.format(prop, pool[prop], value)) - def _check_pg_num(self, value, pool): - # If both properties have not the same value, the cluster goes into a warning state, - # which will only happen during a pg update on a existing pool. - # The test that does that is currently commented out because - # our QA systems can't deal with the change. - # Feel free to test it locally. - prop = 'pg_num' + """ + If both properties have not the same value, the cluster goes into a warning state, which + will only happen during a pg update on an existing pool. The test that does that is + currently commented out because our QA systems can't deal with the change. Feel free to test + it locally. + """ pgp_prop = 'pg_placement_num' - health = lambda: self._get('/api/health/minimal')['health']['status'] == 'HEALTH_OK' - t = 0; - while (int(value) != pool[pgp_prop] or not health()) and t < 180: + t = 0 + while (int(value) != pool[pgp_prop] or self._get('/api/health/minimal')['health']['status'] + != 'HEALTH_OK') and t < 180: time.sleep(2) t += 2 pool = self._get_pool(pool['pool_name']) - for p in [prop, pgp_prop]: # Should have the same values + for p in ['pg_num', pgp_prop]: # Should have the same values self.assertEqual(pool[p], int(value), '{}: {} != {}'.format(p, pool[p], value)) @classmethod def tearDownClass(cls): super(PoolTest, cls).tearDownClass() - for name in ['dashboard_pool1', 'dashboard_pool2', 'dashboard_pool3', 'dashboard_pool_update1']: + for name in ['dashboard_pool1', 'dashboard_pool2', 'dashboard_pool3']: cls._ceph_cmd(['osd', 'pool', 'delete', name, name, '--yes-i-really-really-mean-it']) cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecprofile']) @@ -188,23 +204,40 @@ class PoolTest(DashboardTestCase): self.assertNotIn('pg_status', pool) self.assertSchema(pool['stats'], self.pool_list_stats_schema) self.assertNotIn('flags_names', pool) + self.assertSchema(pool['configuration'], self.pool_rbd_conf_schema) def test_pool_create(self): self._ceph_cmd(['osd', 'crush', 'rule', 'create-erasure', 'ecrule']) self._ceph_cmd( ['osd', 'erasure-code-profile', 'set', 'ecprofile', 'crush-failure-domain=osd']) - pools = [{ + + pool = { 'pool': 'dashboard_pool1', 'pg_num': '32', 'pool_type': 'replicated', 'application_metadata': ['rbd', 'sth'], - }, { + } + self._task_post('/api/pool/', pool) + self.assertStatus(201) + self._validate_pool_properties(pool, self._get_pool(pool['pool'])) + self._task_delete("/api/pool/" + pool['pool']) + self.assertStatus(204) + + pool = { 'pool': 'dashboard_pool2', 'pg_num': '32', 'pool_type': 'erasure', + 'application_metadata': ['rbd'], 'erasure_code_profile': 'ecprofile', 'crush_rule': 'ecrule', - }, { + } + self._task_post('/api/pool/', pool) + self.assertStatus(201) + self._validate_pool_properties(pool, self._get_pool(pool['pool'])) + self._task_delete("/api/pool/" + pool['pool']) + self.assertStatus(204) + + pool = { 'pool': 'dashboard_pool3', 'pg_num': '32', 'pool_type': 'replicated', @@ -212,64 +245,99 @@ class PoolTest(DashboardTestCase): 'compression_mode': 'aggressive', 'compression_max_blob_size': '10000000', 'compression_required_ratio': '0.8', - }] - for data in pools: - self._pool_create(data) - - def test_update(self): - pool = { - 'pool': 'dashboard_pool_update1', - 'pg_num': '32', - 'pool_type': 'replicated', - 'compression_mode': 'passive', - 'compression_algorithm': 'snappy', - 'compression_max_blob_size': '131072', - 'compression_required_ratio': '0.875', + 'configuration': { + 'rbd_qos_bps_limit': 2048, + 'rbd_qos_iops_limit': None, + }, } - updates = [ - { - 'application_metadata': ['rbd', 'sth'], - }, - # The following test case is currently commented out because - # our QA systems can't deal with the change and will fail because - # they can't recover from the resulting warning state. - # Feel free to test it locally. - # { - # 'pg_num': '2', # Decrease PGs - # }, - # { - # 'pg_num': '8', # Increase PGs - # }, - { - 'application_metadata': ['rgw'], - }, - { + expected_configuration = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 1, + 'value': '2048', + }, { + 'name': 'rbd_qos_iops_limit', + 'source': 0, + 'value': '0', + }] + self._task_post('/api/pool/', pool) + self.assertStatus(201) + new_pool = self._get_pool(pool['pool']) + self._validate_pool_properties(pool, new_pool) + for conf in expected_configuration: + self.assertIn(conf, new_pool['configuration']) + + self._task_delete("/api/pool/" + pool['pool']) + self.assertStatus(204) + + def test_pool_update_metadata(self): + pool_name = 'pool_update_metadata' + with self.__create_pool(pool_name): + props = {'application_metadata': ['rbd', 'sth']} + self._task_put('/api/pool/{}'.format(pool_name), props) + time.sleep(5) + self._validate_pool_properties(props, self._get_pool(pool_name)) + + properties = {'application_metadata': ['rgw']} + self._task_put('/api/pool/' + pool_name, properties) + time.sleep(5) + self._validate_pool_properties(properties, self._get_pool(pool_name)) + + properties = {'application_metadata': ['rbd', 'sth']} + self._task_put('/api/pool/' + pool_name, properties) + time.sleep(5) + self._validate_pool_properties(properties, self._get_pool(pool_name)) + + properties = {'application_metadata': ['rgw']} + self._task_put('/api/pool/' + pool_name, properties) + time.sleep(5) + self._validate_pool_properties(properties, self._get_pool(pool_name)) + + def test_pool_update_configuration(self): + pool_name = 'pool_update_configuration' + with self.__create_pool(pool_name): + configuration = { + 'rbd_qos_bps_limit': 1024, + 'rbd_qos_iops_limit': None, + } + expected_configuration = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 1, + 'value': '1024', + }, { + 'name': 'rbd_qos_iops_limit', + 'source': 0, + 'value': '0', + }] + self._task_put('/api/pool/' + pool_name, {'configuration': configuration}) + time.sleep(5) + pool_config = self._get_pool(pool_name)['configuration'] + for conf in expected_configuration: + self.assertIn(conf, pool_config) + + def test_pool_update_compression(self): + pool_name = 'pool_update_compression' + with self.__create_pool(pool_name): + properties = { 'compression_algorithm': 'zstd', 'compression_mode': 'aggressive', 'compression_max_blob_size': '10000000', 'compression_required_ratio': '0.8', - }, - { - 'compression_mode': 'unset' } - ] - self._task_post('/api/pool/', pool) - self.assertStatus(201) - self._check_pool_properties(pool) + self._task_put('/api/pool/' + pool_name, properties) + time.sleep(5) + self._validate_pool_properties(properties, self._get_pool(pool_name)) - for update in updates: - self._task_put('/api/pool/' + pool['pool'], update) - if update.get('compression_mode') == 'unset': - update = { - 'compression_mode': None, - 'compression_algorithm': None, - 'compression_mode': None, - 'compression_max_blob_size': None, - 'compression_required_ratio': None, - } - self._check_pool_properties(update, pool_name=pool['pool']) - self._task_delete("/api/pool/" + pool['pool']) - self.assertStatus(204) + def test_pool_update_unset_compression(self): + pool_name = 'pool_update_unset_compression' + with self.__create_pool(pool_name): + self._task_put('/api/pool/' + pool_name, {'compression_mode': 'unset'}) + time.sleep(5) + self._validate_pool_properties({ + 'compression_algorithm': None, + 'compression_mode': None, + 'compression_max_blob_size': None, + 'compression_required_ratio': None, + }, self._get_pool(pool_name)) def test_pool_create_fail(self): data = {'pool_type': u'replicated', 'rule_name': u'dnf', 'pg_num': u'8', 'pool': u'sadfs'} diff --git a/ceph/qa/tasks/mgr/dashboard/test_rbd.py b/ceph/qa/tasks/mgr/dashboard/test_rbd.py index 252ca82df..68af93d9b 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_rbd.py +++ b/ceph/qa/tasks/mgr/dashboard/test_rbd.py @@ -188,6 +188,8 @@ class RbdTest(DashboardTestCase): 'block_name_prefix': JLeaf(str), 'name': JLeaf(str), 'id': JLeaf(str), + 'unique_id': JLeaf(str), + 'image_format': JLeaf(int), 'pool_name': JLeaf(str), 'features': JLeaf(int), 'features_name': JList(JLeaf(str)), diff --git a/ceph/qa/tasks/mgr/dashboard/test_rgw.py b/ceph/qa/tasks/mgr/dashboard/test_rgw.py index dc119b300..5c41b207e 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_rgw.py +++ b/ceph/qa/tasks/mgr/dashboard/test_rgw.py @@ -67,6 +67,7 @@ class RgwApiCredentialsTest(RgwTestCase): AUTH_ROLES = ['rgw-manager'] def setUp(self): + super(RgwApiCredentialsTest, self).setUp() # Restart the Dashboard module to ensure that the connection to the # RGW Admin Ops API is re-established with the new credentials. self.logout() diff --git a/ceph/qa/tasks/mgr/dashboard/test_settings.py b/ceph/qa/tasks/mgr/dashboard/test_settings.py index bc0925df2..2d890484a 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_settings.py +++ b/ceph/qa/tasks/mgr/dashboard/test_settings.py @@ -7,6 +7,7 @@ from .helper import DashboardTestCase, JList, JObj, JAny class SettingsTest(DashboardTestCase): def setUp(self): + super(SettingsTest, self).setUp() self.settings = self._get('/api/settings') def tearDown(self): diff --git a/ceph/qa/tasks/mgr/test_crash.py b/ceph/qa/tasks/mgr/test_crash.py index cb5c7af76..49191127f 100644 --- a/ceph/qa/tasks/mgr/test_crash.py +++ b/ceph/qa/tasks/mgr/test_crash.py @@ -13,6 +13,7 @@ DATEFMT = '%Y-%m-%d %H:%M:%S.%f' class TestCrash(MgrTestCase): def setUp(self): + super(TestCrash, self).setUp() self.setup_mgrs() self._load_module('crash') diff --git a/ceph/qa/tasks/mgr/test_failover.py b/ceph/qa/tasks/mgr/test_failover.py index 5e1d842d5..a4e840883 100644 --- a/ceph/qa/tasks/mgr/test_failover.py +++ b/ceph/qa/tasks/mgr/test_failover.py @@ -12,6 +12,7 @@ class TestFailover(MgrTestCase): MGRS_REQUIRED = 2 def setUp(self): + super(TestFailover, self).setUp() self.setup_mgrs() def test_timeout(self): diff --git a/ceph/qa/tasks/mgr/test_insights.py b/ceph/qa/tasks/mgr/test_insights.py index 22e3abfcf..53a98b9c6 100644 --- a/ceph/qa/tasks/mgr/test_insights.py +++ b/ceph/qa/tasks/mgr/test_insights.py @@ -12,6 +12,7 @@ DATEFMT = '%Y-%m-%d %H:%M:%S.%f' class TestInsights(MgrTestCase): def setUp(self): + super(TestInsights, self).setUp() self.setup_mgrs() self._load_module("insights") self._load_module("selftest") diff --git a/ceph/qa/tasks/mgr/test_module_selftest.py b/ceph/qa/tasks/mgr/test_module_selftest.py index 267034707..3c36a6ebb 100644 --- a/ceph/qa/tasks/mgr/test_module_selftest.py +++ b/ceph/qa/tasks/mgr/test_module_selftest.py @@ -24,6 +24,7 @@ class TestModuleSelftest(MgrTestCase): MGRS_REQUIRED = 1 def setUp(self): + super(TestModuleSelftest, self).setUp() self.setup_mgrs() def _selftest_plugin(self, module_name): diff --git a/ceph/qa/tasks/mgr/test_progress.py b/ceph/qa/tasks/mgr/test_progress.py index 35039f2d0..dad329a06 100644 --- a/ceph/qa/tasks/mgr/test_progress.py +++ b/ceph/qa/tasks/mgr/test_progress.py @@ -61,6 +61,7 @@ class TestProgress(MgrTestCase): return len(osd_map['osds']) def setUp(self): + super(TestProgress, self).setUp() # Ensure we have at least four OSDs if self._osd_count() < 4: raise SkipTest("Not enough OSDS!") diff --git a/ceph/qa/tasks/mgr/test_prometheus.py b/ceph/qa/tasks/mgr/test_prometheus.py index e8292b4a2..63a4abe00 100644 --- a/ceph/qa/tasks/mgr/test_prometheus.py +++ b/ceph/qa/tasks/mgr/test_prometheus.py @@ -11,6 +11,7 @@ class TestPrometheus(MgrTestCase): MGRS_REQUIRED = 3 def setUp(self): + super(TestPrometheus, self).setUp() self.setup_mgrs() def test_file_sd_command(self): diff --git a/ceph/qa/tasks/rgw.py b/ceph/qa/tasks/rgw.py index 6056d2200..e747426c3 100644 --- a/ceph/qa/tasks/rgw.py +++ b/ceph/qa/tasks/rgw.py @@ -103,9 +103,10 @@ def start_rgw(ctx, config, clients): kport=keystone_port), ]) - if client_config.get('dns-name'): + + if client_config.get('dns-name') is not None: rgw_cmd.extend(['--rgw-dns-name', endpoint.dns_name]) - if client_config.get('dns-s3website-name'): + if client_config.get('dns-s3website-name') is not None: rgw_cmd.extend(['--rgw-dns-s3website-name', endpoint.website_dns_name]) rgw_cmd.extend([ @@ -188,9 +189,8 @@ def assign_endpoints(ctx, config, default_cert): dns_name += remote.hostname website_dns_name = client_config.get('dns-s3website-name') - if website_dns_name: - if len(website_dns_name) == 0 or website_dns_name.endswith('.'): - website_dns_name += remote.hostname + if website_dns_name is not None and (len(website_dns_name) == 0 or website_dns_name.endswith('.')): + website_dns_name += remote.hostname role_endpoints[role] = RGWEndpoint(remote.hostname, port, ssl_certificate, dns_name, website_dns_name) diff --git a/ceph/qa/tasks/vstart_runner.py b/ceph/qa/tasks/vstart_runner.py index 27a2c33b1..315ab8446 100644 --- a/ceph/qa/tasks/vstart_runner.py +++ b/ceph/qa/tasks/vstart_runner.py @@ -308,10 +308,22 @@ class LocalRemote(object): return proc - def sh(self, command, log_limit=1024, cwd=None, env=None): + # XXX: for compatibility keep this method same teuthology.orchestra.remote.sh + def sh(self, script, **kwargs): + """ + Shortcut for run method. + + Usage: + my_name = remote.sh('whoami') + remote_date = remote.sh('date') + """ + if 'stdout' not in kwargs: + kwargs['stdout'] = StringIO() + if 'args' not in kwargs: + kwargs['args'] = script + proc = self.run(**kwargs) + return proc.stdout.getvalue() - return misc.sh(command=command, log_limit=log_limit, cwd=cwd, - env=env) class LocalDaemon(object): def __init__(self, daemon_type, daemon_id): @@ -474,16 +486,12 @@ class LocalFuseMount(FuseMount): if self.is_mounted(): super(LocalFuseMount, self).umount() - def mount(self, mount_path=None, mount_fs_name=None): + def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None): + if mountpoint is not None: + self.mountpoint = mountpoint self.setupfs(name=mount_fs_name) - self.client_remote.run( - args=[ - 'mkdir', - '--', - self.mountpoint, - ], - ) + self.client_remote.run(args=['mkdir', '-p', self.mountpoint]) def list_connections(): self.client_remote.run( @@ -559,6 +567,8 @@ class LocalFuseMount(FuseMount): self.gather_mount_info() + self.mounted = True + def _run_python(self, pyscript, py_version='python'): """ Override this to remove the daemon-helper prefix that is used otherwise diff --git a/ceph/qa/workunits/ceph-helpers-root.sh b/ceph/qa/workunits/ceph-helpers-root.sh index 65c2fc3b9..dc81b2b3a 100755 --- a/ceph/qa/workunits/ceph-helpers-root.sh +++ b/ceph/qa/workunits/ceph-helpers-root.sh @@ -59,15 +59,6 @@ function install_cmake3_on_centos7 { sudo yum install -y cmake3 } -function install_cmake3_on_xenial { - install_pkg_on_ubuntu \ - ceph-cmake \ - d278b9d28de0f6b88f56dfe1e8bf684a41577210 \ - xenial \ - force \ - cmake -} - function install_pkg_on_ubuntu { local project=$1 shift diff --git a/ceph/qa/workunits/fs/upgrade/volume_client b/ceph/qa/workunits/fs/upgrade/volume_client new file mode 100755 index 000000000..37ee954cb --- /dev/null +++ b/ceph/qa/workunits/fs/upgrade/volume_client @@ -0,0 +1,110 @@ +#!/bin/bash + +set -ex + +PYTHON="python2" + +function run_payload { + local payload="$1" + sudo "$PYTHON" <&2 + sudo touch -- "$keyring" + sudo ceph-authtool "$keyring" --import-keyring "$T" + rm -f -- "$T" +} + +function conf_keys { + local client="$1" + ls /etc/ceph >&2 + ceph auth get-or-create "client.manila" mds 'allow *' osd 'allow rw' mon 'allow *' | import_key "$client" /etc/ceph/ceph.keyring +} + +function create_data_isolated { + local PAYLOAD=' +vp = VolumePath(None, "vol_isolated") +vc.create_volume(vp, (1<<33), data_isolated=True) +auth_result = vc.authorize(vp, "vol_data_isolated", tenant_id="test") +print("[client.vol_data_isolated]\n\tkey = ", auth_result["auth_key"]) +' + + run_payload "$PAYLOAD" | import_key "vol_data_isolated" +} + +function create_default { + local PAYLOAD=' +vp = VolumePath(None, "vol_default") +vc.create_volume(vp, (1<<33)) +auth_result = vc.authorize(vp, "vol_default", tenant_id="test") +print("[client.vol_default]\n\tkey = ", auth_result["auth_key"]) +' + run_payload "$PAYLOAD" | import_key "vol_default" +} + +function create { + create_data_isolated + create_default +} + +function populate { + pwd + df -h . + ls -l + cp -a /usr/bin . +} + +function verify_data_isolated { + ceph fs subvolume getpath cephfs vol_isolated + stat bin + ls bin | tail +} + +function verify_default { + ceph fs subvolume getpath cephfs vol_default + stat bin + ls bin | tail +} + +function verify { + diff <(ceph fs subvolume ls cephfs | jq -cS 'sort_by(.name)' | tee /dev/stderr) <(printf '[{"name":"vol_isolated"},{"name":"vol_default"}]' | jq -cS 'sort_by(.name)') + verify_data_isolated + verify_default +} + +function main { + if [ "$1" = create ]; then + conf_keys + create + elif [ "$1" = populate ]; then + populate + elif [ "$1" = verify ]; then + # verify (sub)volumes still exist and are configured correctly + verify + else + exit 1 + fi +} + +main "$ACTION" diff --git a/ceph/qa/workunits/rados/test.sh b/ceph/qa/workunits/rados/test.sh index a2f5e0499..a0b2aed54 100755 --- a/ceph/qa/workunits/rados/test.sh +++ b/ceph/qa/workunits/rados/test.sh @@ -31,8 +31,7 @@ for f in \ api_c_read_operations \ list_parallel \ open_pools_parallel \ - delete_pools_parallel \ - watch_notify + delete_pools_parallel do if [ $parallel -eq 1 ]; then r=`printf '%25s' $f` diff --git a/ceph/qa/workunits/rados/test_envlibrados_for_rocksdb.sh b/ceph/qa/workunits/rados/test_envlibrados_for_rocksdb.sh index f342dd186..7099dafb3 100755 --- a/ceph/qa/workunits/rados/test_envlibrados_for_rocksdb.sh +++ b/ceph/qa/workunits/rados/test_envlibrados_for_rocksdb.sh @@ -20,15 +20,7 @@ CURRENT_PATH=`pwd` # for rocksdb case $(distro_id) in ubuntu|debian|devuan) - install git g++ libsnappy-dev zlib1g-dev libbz2-dev libradospp-dev - case $(distro_version) in - *Xenial*) - install_cmake3_on_xenial - ;; - *) - install cmake - ;; - esac + install git g++ libsnappy-dev zlib1g-dev libbz2-dev libradospp-dev cmake ;; centos|fedora|rhel) install git gcc-c++.x86_64 snappy-devel zlib zlib-devel bzip2 bzip2-devel libradospp-devel.x86_64 diff --git a/ceph/qa/workunits/rgw/test_rgw_orphan_list.sh b/ceph/qa/workunits/rgw/test_rgw_orphan_list.sh new file mode 100755 index 000000000..da96ccd32 --- /dev/null +++ b/ceph/qa/workunits/rgw/test_rgw_orphan_list.sh @@ -0,0 +1,512 @@ +#!/usr/bin/env bash + +set -ex + +# if defined, debug messages will be displayed and prepended with the string +# debug="DEBUG" + +huge_size=2222 # in megabytes +big_size=6 # in megabytes + +huge_obj=/tmp/huge_obj.temp.$$ +big_obj=/tmp/big_obj.temp.$$ +empty_obj=/tmp/empty_obj.temp.$$ + +fifo=/tmp/orphan-fifo.$$ +awscli_dir=${HOME}/awscli_temp +export PATH=${PATH}:${awscli_dir} + +rgw_host=$(hostname --fqdn) +if echo "$rgw_host" | grep -q '\.' ; then + : +else + host_domain=".front.sepia.ceph.com" + echo "WARNING: rgw hostname -- $rgw_host -- does not appear to be fully qualified; PUNTING and appending $host_domain" + rgw_host="${rgw_host}${host_domain}" +fi +rgw_port=80 + +echo "Fully Qualified Domain Name: $rgw_host" + +success() { + echo OK. + exit 0 +} + +######################################################################## +# INSTALL AND CONFIGURE TOOLING + +install_awscli() { + # NB: this does verify authenticity and integrity of downloaded + # file; see + # https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2-linux.html + here="$(pwd)" + cd "$HOME" + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" + unzip awscliv2.zip + mkdir -p $awscli_dir + ./aws/install -i $awscli_dir + cd "$here" +} + +uninstall_awscli() { + here="$(pwd)" + cd "$HOME" + rm -rf $awscli_dir ./aws awscliv2.zip + cd "$here" +} + +sudo dnf install -y s3cmd + +sudo yum install python3-setuptools +sudo yum -y install python3-pip +sudo pip3 install --upgrade setuptools +sudo pip3 install python-swiftclient + +# get ready for transition from s3cmd to awscli +if false ;then + install_awscli + aws --version + uninstall_awscli +fi + +s3config=/tmp/s3config.$$ + +# do not include the port when it is 80; the host base is used in the +# v4 signature and it needs to follow this convention for signatures +# to match +if [ "$rgw_port" -ne 80 ] ;then + s3_host_base="${rgw_host}:${rgw_port}" +else + s3_host_base="$rgw_host" +fi + +cat >${s3config} <$fifo & + set +e # don't allow errors to stop script + while read line ;do + echo "$line" | grep --quiet "part $stop_part " + if [ ${PIPESTATUS[1]} -eq 0 ] ;then + kill -9 $(jobs -p) + break + fi + done <$fifo + set -e + + rm -f $fifo +} + +mys3upload() { + obj=$1 + bucket=$2 + dest_obj=$3 + + mys3cmd put -q $obj s3://${bucket}/$dest_obj +} + +######################################################################## +# PREP + +create_users +dd if=/dev/urandom of=$big_obj bs=1M count=${big_size} +dd if=/dev/urandom of=$huge_obj bs=1M count=${huge_size} +touch $empty_obj + +quick_tests() { + echo TRY A SWIFT COMMAND + myswift upload swift-plain-ctr $big_obj --object-name swift-obj-test + myswift list + myswift list swift-plain-ctr + + echo TRY A RADOSGW-ADMIN COMMAND + radosgw-admin bucket list # make sure rgw is up and running +} + +######################################################################## +# S3 TESTS + +#################################### +# regular multipart test + +mys3cmd mb s3://multipart-bkt +mys3upload $huge_obj multipart-bkt multipart-obj +mys3cmd ls +mys3cmd ls s3://multipart-bkt + +#################################### +# multipart test with incomplete uploads + +bkt="incomplete-mp-bkt-1" + +mys3cmd mb s3://$bkt +mys3uploadkill $huge_obj $bkt incomplete-mp-obj-1 $fifo 20 +mys3uploadkill $huge_obj $bkt incomplete-mp-obj-2 $fifo 100 + +#################################### +# resharded bucket + +bkt=resharded-bkt-1 + +mys3cmd mb s3://$bkt + +for f in $(seq 8) ; do + dest_obj="reshard-obj-${f}" + mys3cmd put -q $big_obj s3://${bkt}/$dest_obj +done + +radosgw-admin bucket reshard --num-shards 3 --bucket=$bkt --yes-i-really-mean-it +radosgw-admin bucket reshard --num-shards 5 --bucket=$bkt --yes-i-really-mean-it + +#################################### +# versioned bucket + +if true ;then + echo "WARNING: versioned bucket test currently turned off" +else + bkt=versioned-bkt-1 + + mys3cmd mb s3://$bkt + + # bucket-enable-versioning $bkt + + for f in $(seq 3) ;do + for g in $(seq 10) ;do + dest_obj="versioned-obj-${g}" + mys3cmd put -q $big_obj s3://${bkt}/$dest_obj + done + done + + for g in $(seq 1 2 10) ;do + dest_obj="versioned-obj-${g}" + mys3cmd rm s3://${bkt}/$dest_obj + done +fi + +############################################################ +# copy small objects + +o_bkt="orig-bkt-1" +d_bkt="copy-bkt-1" +mys3cmd mb s3://$o_bkt + +for f in $(seq 4) ;do + dest_obj="orig-obj-$f" + mys3cmd put -q $big_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-obj-1 s3://${d_bkt}/copied-obj-1 +mys3cmd cp s3://${o_bkt}/orig-obj-3 s3://${d_bkt}/copied-obj-3 + +for f in $(seq 5 6) ;do + dest_obj="orig-obj-$f" + mys3cmd put -q $big_obj s3://${d_bkt}/$dest_obj +done + +############################################################ +# copy small objects and delete original + +o_bkt="orig-bkt-2" +d_bkt="copy-bkt-2" + +mys3cmd mb s3://$o_bkt + +for f in $(seq 4) ;do + dest_obj="orig-obj-$f" + mys3cmd put -q $big_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-obj-1 s3://${d_bkt}/copied-obj-1 +mys3cmd cp s3://${o_bkt}/orig-obj-3 s3://${d_bkt}/copied-obj-3 + +for f in $(seq 5 6) ;do + dest_obj="orig-obj-$f" + mys3cmd put -q $big_obj s3://${d_bkt}/$dest_obj +done + +mys3cmd rb --recursive s3://${o_bkt} + +############################################################ +# copy multipart objects + +o_bkt="orig-mp-bkt-3" +d_bkt="copy-mp-bkt-3" + +mys3cmd mb s3://$o_bkt + +for f in $(seq 2) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-multipart-obj-1 \ + s3://${d_bkt}/copied-multipart-obj-1 + +for f in $(seq 5 5) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${d_bkt}/$dest_obj +done + + +############################################################ +# copy multipart objects and delete original + +o_bkt="orig-mp-bkt-4" +d_bkt="copy-mp-bkt-4" + +mys3cmd mb s3://$o_bkt + +for f in $(seq 2) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-multipart-obj-1 \ + s3://${d_bkt}/copied-multipart-obj-1 + +for f in $(seq 5 5) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${d_bkt}/$dest_obj +done + +mys3cmd rb --recursive s3://$o_bkt + +######################################################################## +# SWIFT TESTS + +# 600MB +segment_size=629145600 + +############################################################ +# plain test + +for f in $(seq 4) ;do + myswift upload swift-plain-ctr $big_obj --object-name swift-obj-$f +done + +############################################################ +# zero-len test + +myswift upload swift-zerolen-ctr $empty_obj --object-name subdir/ +myswift upload swift-zerolen-ctr $big_obj --object-name subdir/abc1 +myswift upload swift-zerolen-ctr $empty_obj --object-name subdir/empty1 +myswift upload swift-zerolen-ctr $big_obj --object-name subdir/xyz1 + +############################################################ +# dlo test + +# upload in 300MB segments +myswift upload swift-dlo-ctr $huge_obj --object-name dlo-obj-1 \ + -S $segment_size + +############################################################ +# slo test + +# upload in 300MB segments +myswift upload swift-slo-ctr $huge_obj --object-name slo-obj-1 \ + -S $segment_size --use-slo + +############################################################ +# large object copy test + +# upload in 300MB segments +o_ctr=swift-orig-ctr +o_obj=slo-orig-obj-1 +d_ctr=swift-copy-ctr +d_obj=slo-copy-obj-1 +myswift upload $o_ctr $big_obj --object-name $o_obj + +myswift copy --destination /${d_ctr}/${d_obj} \ + $o_ctr $o_obj + +myswift delete $o_ctr $o_obj + +############################################################ +# huge dlo object copy test + +o_ctr=swift-orig-dlo-ctr-1 +o_obj=dlo-orig-dlo-obj-1 +d_ctr=swift-copy-dlo-ctr-1 +d_obj=dlo-copy-dlo-obj-1 + +myswift upload $o_ctr $huge_obj --object-name $o_obj \ + -S $segment_size + +myswift copy --destination /${d_ctr}/${d_obj} \ + $o_ctr $o_obj + +############################################################ +# huge dlo object copy and orig delete + +o_ctr=swift-orig-dlo-ctr-2 +o_obj=dlo-orig-dlo-obj-2 +d_ctr=swift-copy-dlo-ctr-2 +d_obj=dlo-copy-dlo-obj-2 + +myswift upload $o_ctr $huge_obj --object-name $o_obj \ + -S $segment_size + +myswift copy --destination /${d_ctr}/${d_obj} \ + $o_ctr $o_obj + +myswift delete $o_ctr $o_obj + +############################################################ +# huge slo object copy test + +o_ctr=swift-orig-slo-ctr-1 +o_obj=slo-orig-slo-obj-1 +d_ctr=swift-copy-slo-ctr-1 +d_obj=slo-copy-slo-obj-1 +myswift upload $o_ctr $huge_obj --object-name $o_obj \ + -S $segment_size --use-slo + +myswift copy --destination /${d_ctr}/${d_obj} $o_ctr $o_obj + +############################################################ +# huge slo object copy test and orig delete + +o_ctr=swift-orig-slo-ctr-2 +o_obj=slo-orig-slo-obj-2 +d_ctr=swift-copy-slo-ctr-2 +d_obj=slo-copy-slo-obj-2 +myswift upload $o_ctr $huge_obj --object-name $o_obj \ + -S $segment_size --use-slo + +myswift copy --destination /${d_ctr}/${d_obj} $o_ctr $o_obj + +myswift delete $o_ctr $o_obj + +######################################################################## +# FORCE GARBAGE COLLECTION + +sleep 6 # since for testing age at which gc can happen is 5 secs +radosgw-admin gc process --include-all + + +######################################## +# DO ORPHAN LIST + +pool="default.rgw.buckets.data" + +rgw-orphan-list $pool + +# we only expect there to be one output file, but loop just in case +ol_error="" +for f in orphan-list-*.out ; do + if [ -s "$f" ] ;then # if file non-empty + ol_error="${ol_error}:$f" + echo "One ore more orphans found in $f:" + cat "$f" + fi +done + +if [ -n "$ol_error" ] ;then + echo "ERROR: orphans found when none expected" + exit 1 +fi + +######################################################################## +# CLEAN UP + +rm -f $empty_obj $big_obj $huge_obj $s3config + +success diff --git a/ceph/selinux/ceph.te b/ceph/selinux/ceph.te index 12fbcc1f5..9a481b096 100644 --- a/ceph/selinux/ceph.te +++ b/ceph/selinux/ceph.te @@ -15,6 +15,8 @@ require { type nvme_device_t; type httpd_config_t; type proc_kcore_t; + type amqp_port_t; + type soundd_port_t; class sock_file unlink; class tcp_socket name_connect_t; class lnk_file { create getattr read unlink }; @@ -89,6 +91,8 @@ corenet_tcp_sendrecv_cyphesis_port(ceph_t) allow ceph_t commplex_main_port_t:tcp_socket name_connect; allow ceph_t http_cache_port_t:tcp_socket name_connect; +allow ceph_t amqp_port_t:tcp_socket name_connect; +allow ceph_t soundd_port_t:tcp_socket name_connect; corecmd_exec_bin(ceph_t) corecmd_exec_shell(ceph_t) diff --git a/ceph/src/.git_version b/ceph/src/.git_version index 26f461d67..b351307c5 100644 --- a/ceph/src/.git_version +++ b/ceph/src/.git_version @@ -1,2 +1,2 @@ -b340acf629a010a74d90da5782a2c5fe0b54ac20 -v14.2.10 +f7fdb2f52131f54b891a2ec99d8205561242cdaf +v14.2.11 diff --git a/ceph/src/ceph-volume/ceph_volume/devices/raw/common.py b/ceph/src/ceph-volume/ceph_volume/devices/raw/common.py index d34a2941d..08cfd0289 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/raw/common.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/raw/common.py @@ -26,10 +26,6 @@ def create_parser(prog, description): dest='crush_device_class', help='Crush device class to assign this OSD to', ) - parser.add_argument( - '--cluster-fsid', - help='Specify the cluster fsid, useful when no ceph.conf is available', - ) parser.add_argument( '--no-tmpfs', action='store_true', @@ -45,4 +41,9 @@ def create_parser(prog, description): dest='block_wal', help='Path to bluestore block.wal block device' ) + parser.add_argument( + '--dmcrypt', + action='store_true', + help='Enable device encryption via dm-crypt', + ) return parser diff --git a/ceph/src/ceph-volume/ceph_volume/devices/raw/list.py b/ceph/src/ceph-volume/ceph_volume/devices/raw/list.py index b04f55cd8..bb15bf199 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/raw/list.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/raw/list.py @@ -30,8 +30,34 @@ class List(object): if not devs: logger.debug('Listing block devices via lsblk...') devs = [] + # adding '--inverse' allows us to get the mapper devices list in that command output. + # not listing root devices containing partitions shouldn't have side effect since we are + # in `ceph-volume raw` context. + # + # example: + # running `lsblk --paths --nodeps --output=NAME --noheadings` doesn't allow to get the mapper list + # because the output is like following : + # + # $ lsblk --paths --nodeps --output=NAME --noheadings + # /dev/sda + # /dev/sdb + # /dev/sdc + # /dev/sdd + # + # the dmcrypt mappers are hidden because of the `--nodeps` given they are displayed as a dependency. + # + # $ lsblk --paths --output=NAME --noheadings + # /dev/sda + # |-/dev/mapper/ceph-3b52c90d-6548-407d-bde1-efd31809702f-sda-block-dmcrypt + # `-/dev/mapper/ceph-3b52c90d-6548-407d-bde1-efd31809702f-sda-db-dmcrypt + # /dev/sdb + # /dev/sdc + # /dev/sdd + # + # adding `--inverse` is a trick to get around this issue, the counterpart is that we can't list root devices if they contain + # at least one partition but this shouldn't be an issue in `ceph-volume raw` context given we only deal with raw devices. out, err, ret = process.call([ - 'lsblk', '--paths', '--nodeps', '--output=NAME', '--noheadings' + 'lsblk', '--paths', '--nodeps', '--output=NAME', '--noheadings', '--inverse' ]) assert not ret devs = out diff --git a/ceph/src/ceph-volume/ceph_volume/devices/raw/prepare.py b/ceph/src/ceph-volume/ceph_volume/devices/raw/prepare.py index cb5c59ce4..3c96eedac 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/raw/prepare.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/raw/prepare.py @@ -1,15 +1,39 @@ from __future__ import print_function import json import logging +import os from textwrap import dedent from ceph_volume.util import prepare as prepare_utils +from ceph_volume.util import encryption as encryption_utils +from ceph_volume.util import disk from ceph_volume.util import system -from ceph_volume import conf, decorators, terminal +from ceph_volume import decorators, terminal from ceph_volume.devices.lvm.common import rollback_osd from .common import create_parser logger = logging.getLogger(__name__) +def prepare_dmcrypt(key, device, device_type, fsid): + """ + Helper for devices that are encrypted. The operations needed for + block, db, wal, or data/journal devices are all the same + """ + if not device: + return '' + kname = disk.lsblk(device)['KNAME'] + mapping = 'ceph-{}-{}-{}-dmcrypt'.format(fsid, kname, device_type) + # format data device + encryption_utils.luks_format( + key, + device + ) + encryption_utils.luks_open( + key, + device, + mapping + ) + + return '/dev/mapper/{}'.format(mapping) def prepare_bluestore(block, wal, db, secrets, osd_id, fsid, tmpfs): """ @@ -22,6 +46,12 @@ def prepare_bluestore(block, wal, db, secrets, osd_id, fsid, tmpfs): """ cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key()) + if secrets.get('dmcrypt_key'): + key = secrets['dmcrypt_key'] + block = prepare_dmcrypt(key, block, 'block', fsid) + wal = prepare_dmcrypt(key, wal, 'wal', fsid) + db = prepare_dmcrypt(key, db, 'db', fsid) + # create the directory prepare_utils.create_osd_path(osd_id, tmpfs=tmpfs) # symlink the block @@ -64,21 +94,20 @@ class Prepare(object): logger.info('will rollback OSD ID creation') rollback_osd(self.args, self.osd_id) raise - terminal.success("ceph-volume raw prepare successful for: %s" % self.args.data) + dmcrypt_log = 'dmcrypt' if args.dmcrypt else 'clear' + terminal.success("ceph-volume raw {} prepare successful for: {}".format(dmcrypt_log, self.args.data)) - def get_cluster_fsid(self): - """ - Allows using --cluster-fsid as an argument, but can fallback to reading - from ceph.conf if that is unset (the default behavior). - """ - if self.args.cluster_fsid: - return self.args.cluster_fsid - - return conf.ceph.get('global', 'fsid') @decorators.needs_root def prepare(self): secrets = {'cephx_secret': prepare_utils.create_key()} + encrypted = 1 if self.args.dmcrypt else 0 + cephx_lockbox_secret = '' if not encrypted else prepare_utils.create_key() + + if encrypted: + secrets['dmcrypt_key'] = os.getenv('CEPH_VOLUME_DMCRYPT_SECRET') + secrets['cephx_lockbox_secret'] = cephx_lockbox_secret # dummy value to make `ceph osd new` not complaining + osd_fsid = system.generate_uuid() crush_device_class = self.args.crush_device_class if crush_device_class: @@ -94,6 +123,7 @@ class Prepare(object): # reuse a given ID if it exists, otherwise create a new ID self.osd_id = prepare_utils.create_id( osd_fsid, json.dumps(secrets)) + prepare_bluestore( self.args.data, wal, @@ -112,8 +142,6 @@ class Prepare(object): Once the OSD is ready, an ad-hoc systemd unit will be enabled so that it can later get activated and the OSD daemon can get started. - Encryption is not supported. - ceph-volume raw prepare --bluestore --data {device} DB and WAL devices are supported. @@ -132,5 +160,10 @@ class Prepare(object): if not self.args.bluestore: terminal.error('must specify --bluestore (currently the only supported backend)') raise SystemExit(1) + if self.args.dmcrypt and not os.getenv('CEPH_VOLUME_DMCRYPT_SECRET'): + terminal.error('encryption was requested (--dmcrypt) but environment variable ' \ + 'CEPH_VOLUME_DMCRYPT_SECRET is not set, you must set ' \ + 'this variable to provide a dmcrypt secret.') + raise SystemExit(1) self.safe_prepare(self.args) diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/raw/__init__.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/raw/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/raw/test_prepare.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/raw/test_prepare.py new file mode 100644 index 000000000..e4cf8ce11 --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/raw/test_prepare.py @@ -0,0 +1,97 @@ +import pytest +from ceph_volume.devices import raw +from mock.mock import patch + + +class TestRaw(object): + + def test_main_spits_help_with_no_arguments(self, capsys): + raw.main.Raw([]).main() + stdout, stderr = capsys.readouterr() + assert 'Manage a single-device OSD on a raw block device.' in stdout + + def test_main_shows_activate_subcommands(self, capsys): + raw.main.Raw([]).main() + stdout, stderr = capsys.readouterr() + assert 'activate ' in stdout + assert 'Discover and prepare' in stdout + + def test_main_shows_prepare_subcommands(self, capsys): + raw.main.Raw([]).main() + stdout, stderr = capsys.readouterr() + assert 'prepare ' in stdout + assert 'Format a raw device' in stdout + + +class TestPrepare(object): + + def test_main_spits_help_with_no_arguments(self, capsys): + raw.prepare.Prepare([]).main() + stdout, stderr = capsys.readouterr() + assert 'Prepare an OSD by assigning an ID and FSID' in stdout + + def test_main_shows_full_help(self, capsys): + with pytest.raises(SystemExit): + raw.prepare.Prepare(argv=['--help']).main() + stdout, stderr = capsys.readouterr() + assert 'a raw device to use for the OSD' in stdout + assert 'Crush device class to assign this OSD to' in stdout + assert 'Use BlueStore backend' in stdout + assert 'Path to bluestore block.db block device' in stdout + assert 'Path to bluestore block.wal block device' in stdout + assert 'Enable device encryption via dm-crypt' in stdout + + @patch('ceph_volume.util.arg_validators.ValidDevice.__call__') + def test_prepare_dmcrypt_no_secret_passed(self, m_valid_device, capsys): + m_valid_device.return_value = '/dev/foo' + with pytest.raises(SystemExit): + raw.prepare.Prepare(argv=['--bluestore', '--data', '/dev/foo', '--dmcrypt']).main() + stdout, stderr = capsys.readouterr() + assert 'CEPH_VOLUME_DMCRYPT_SECRET is not set, you must set' in stderr + + @patch('ceph_volume.util.encryption.luks_open') + @patch('ceph_volume.util.encryption.luks_format') + @patch('ceph_volume.util.disk.lsblk') + def test_prepare_dmcrypt_block(self, m_lsblk, m_luks_format, m_luks_open): + m_lsblk.return_value = {'KNAME': 'foo'} + m_luks_format.return_value = True + m_luks_open.return_value = True + result = raw.prepare.prepare_dmcrypt('foo', '/dev/foo', 'block', '123') + m_luks_open.assert_called_with('foo', '/dev/foo', 'ceph-123-foo-block-dmcrypt') + m_luks_format.assert_called_with('foo', '/dev/foo') + assert result == '/dev/mapper/ceph-123-foo-block-dmcrypt' + + @patch('ceph_volume.util.encryption.luks_open') + @patch('ceph_volume.util.encryption.luks_format') + @patch('ceph_volume.util.disk.lsblk') + def test_prepare_dmcrypt_db(self, m_lsblk, m_luks_format, m_luks_open): + m_lsblk.return_value = {'KNAME': 'foo'} + m_luks_format.return_value = True + m_luks_open.return_value = True + result = raw.prepare.prepare_dmcrypt('foo', '/dev/foo', 'db', '123') + m_luks_open.assert_called_with('foo', '/dev/foo', 'ceph-123-foo-db-dmcrypt') + m_luks_format.assert_called_with('foo', '/dev/foo') + assert result == '/dev/mapper/ceph-123-foo-db-dmcrypt' + + @patch('ceph_volume.util.encryption.luks_open') + @patch('ceph_volume.util.encryption.luks_format') + @patch('ceph_volume.util.disk.lsblk') + def test_prepare_dmcrypt_wal(self, m_lsblk, m_luks_format, m_luks_open): + m_lsblk.return_value = {'KNAME': 'foo'} + m_luks_format.return_value = True + m_luks_open.return_value = True + result = raw.prepare.prepare_dmcrypt('foo', '/dev/foo', 'wal', '123') + m_luks_open.assert_called_with('foo', '/dev/foo', 'ceph-123-foo-wal-dmcrypt') + m_luks_format.assert_called_with('foo', '/dev/foo') + assert result == '/dev/mapper/ceph-123-foo-wal-dmcrypt' + + @patch('ceph_volume.devices.raw.prepare.rollback_osd') + @patch('ceph_volume.devices.raw.prepare.Prepare.prepare') + @patch('ceph_volume.util.arg_validators.ValidDevice.__call__') + def test_safe_prepare_exception_raised(self, m_valid_device, m_prepare, m_rollback_osd): + m_valid_device.return_value = '/dev/foo' + m_prepare.side_effect=Exception('foo') + m_rollback_osd.return_value = 'foobar' + with pytest.raises(Exception): + raw.prepare.Prepare(argv=['--bluestore', '--data', '/dev/foo']).main() + m_rollback_osd.assert_called() diff --git a/ceph/src/ceph_osd.cc b/ceph/src/ceph_osd.cc index 998f37074..347feab1c 100644 --- a/ceph/src/ceph_osd.cc +++ b/ceph/src/ceph_osd.cc @@ -575,6 +575,9 @@ flushjournal_out: g_conf().get_val("osd_client_message_size_cap"); boost::scoped_ptr client_byte_throttler( new Throttle(g_ceph_context, "osd_client_bytes", message_size)); + uint64_t message_cap = g_conf().get_val("osd_client_message_cap"); + boost::scoped_ptr client_msg_throttler( + new Throttle(g_ceph_context, "osd_client_messages", message_cap)); // All feature bits 0 - 34 should be present from dumpling v0.67 forward uint64_t osd_required = @@ -585,7 +588,7 @@ flushjournal_out: ms_public->set_default_policy(Messenger::Policy::stateless_server(0)); ms_public->set_policy_throttlers(entity_name_t::TYPE_CLIENT, client_byte_throttler.get(), - nullptr); + client_msg_throttler.get()); ms_public->set_policy(entity_name_t::TYPE_MON, Messenger::Policy::lossy_client(osd_required)); ms_public->set_policy(entity_name_t::TYPE_MGR, @@ -757,6 +760,7 @@ flushjournal_out: delete ms_objecter; client_byte_throttler.reset(); + client_msg_throttler.reset(); // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. char s[20]; diff --git a/ceph/src/ceph_syn.cc b/ceph/src/ceph_syn.cc index 50e26f281..5c469474d 100644 --- a/ceph/src/ceph_syn.cc +++ b/ceph/src/ceph_syn.cc @@ -63,7 +63,6 @@ int main(int argc, const char **argv, char *envp[]) for (int i=0; ibind(g_conf()->public_addr); mclients[i] = new MonClient(g_ceph_context); mclients[i]->build_initial_monmap(); auto client = new StandaloneClient(messengers[i], mclients[i]); diff --git a/ceph/src/client/Client.cc b/ceph/src/client/Client.cc old mode 100644 new mode 100755 index 3a02b72c2..77c35e564 --- a/ceph/src/client/Client.cc +++ b/ceph/src/client/Client.cc @@ -2083,6 +2083,7 @@ void Client::_closed_mds_session(MetaSession *s) mount_cond.Signal(); remove_session_caps(s); kick_requests_closed(s); + mds_ranks_closing.erase(s->mds_num); mds_sessions.erase(s->mds_num); } @@ -4348,10 +4349,12 @@ void Client::trim_caps(MetaSession *s, uint64_t max) all = false; } } + if (in->ll_ref == 1 && in->ino != MDS_INO_ROOT) { + _schedule_ino_release_callback(in.get()); + } if (all && in->ino != MDS_INO_ROOT) { ldout(cct, 20) << __func__ << " counting as trimmed: " << *in << dendl; trimmed++; - _schedule_ino_release_callback(in.get()); } } } @@ -6006,12 +6009,34 @@ void Client::_close_sessions() for (auto &p : mds_sessions) { if (p.second.state != MetaSession::STATE_CLOSING) { _close_mds_session(&p.second); + mds_ranks_closing.insert(p.first); } } // wait for sessions to close - ldout(cct, 2) << "waiting for " << mds_sessions.size() << " mds sessions to close" << dendl; - mount_cond.Wait(client_lock); + double timo = cct->_conf.get_val("client_shutdown_timeout").count(); + ldout(cct, 2) << "waiting for " << mds_ranks_closing.size() << " mds session(s) to close (timeout: " + << timo << "s)" << dendl; + if (!timo) { + mount_cond.Wait(client_lock); + } else { + int r = 0; + utime_t t; + t.set_from_double(timo); + while (!mds_ranks_closing.empty() && r == 0) { + r = mount_cond.WaitInterval(client_lock, t); + } + if (r != 0) { + ldout(cct, 1) << mds_ranks_closing.size() << " mds(s) did not respond to session close -- timing out." << dendl; + while (!mds_ranks_closing.empty()) { + auto session = mds_sessions.at(*mds_ranks_closing.begin()); + // this prunes entry from mds_sessions and mds_ranks_closing + _closed_mds_session(&session); + } + } + } + + mds_ranks_closing.clear(); } } @@ -8355,7 +8380,7 @@ static int _readdir_single_dirent_cb(void *p, struct dirent *de, struct dirent *Client::readdir(dir_result_t *d) { int ret; - static struct dirent de; + auto& de = d->de; single_readdir sr; sr.de = &de; sr.stx = NULL; @@ -9074,7 +9099,7 @@ int Client::uninline_data(Inode *in, Context *onfinish) int Client::read(int fd, char *buf, loff_t size, loff_t offset) { - std::lock_guard lock(client_lock); + std::unique_lock lock(client_lock); tout(cct) << "read" << std::endl; tout(cct) << fd << std::endl; tout(cct) << size << std::endl; @@ -9096,6 +9121,7 @@ int Client::read(int fd, char *buf, loff_t size, loff_t offset) int r = _read(f, offset, size, &bl); ldout(cct, 3) << "read(" << fd << ", " << (void*)buf << ", " << size << ", " << offset << ") = " << r << dendl; if (r >= 0) { + lock.unlock(); bl.copy(0, bl.length(), buf); r = bl.length(); } @@ -10698,6 +10724,7 @@ Inode *Client::open_snapdir(Inode *diri) in->mtime = diri->mtime; in->ctime = diri->ctime; in->btime = diri->btime; + in->atime = diri->atime; in->size = diri->size; in->change_attr = diri->change_attr; @@ -11538,6 +11565,12 @@ int Client::_setxattr(Inode *in, const char *name, const void *value, return -EROFS; } + if (size == 0) { + value = ""; + } else if (value == NULL) { + return -EINVAL; + } + bool posix_acl_xattr = false; if (acl_type == POSIX_ACL) posix_acl_xattr = !strncmp(name, "system.", 7); diff --git a/ceph/src/client/Client.h b/ceph/src/client/Client.h index 6e34e4ba1..9c4f02c04 100644 --- a/ceph/src/client/Client.h +++ b/ceph/src/client/Client.h @@ -222,6 +222,7 @@ struct dir_result_t { frag_t buffer_frag; vector buffer; + struct dirent de; }; class Client : public Dispatcher, public md_config_obs_t { @@ -1210,6 +1211,8 @@ private: // mds sessions map mds_sessions; // mds -> push seq + + std::set mds_ranks_closing; // mds ranks currently tearing down sessions list waiting_for_mdsmap; // FSMap, for when using mds_command diff --git a/ceph/src/cls/rgw/cls_rgw_types.cc b/ceph/src/cls/rgw/cls_rgw_types.cc index a94c1134b..b533ec055 100644 --- a/ceph/src/cls/rgw/cls_rgw_types.cc +++ b/ceph/src/cls/rgw/cls_rgw_types.cc @@ -261,6 +261,7 @@ bool rgw_cls_bi_entry::get_info(cls_rgw_obj_key *key, { rgw_bucket_dir_entry entry; decode(entry, iter); + account = (account && entry.exists); *key = entry.key; *category = entry.meta.category; accounted_stats->num_entries++; diff --git a/ceph/src/common/config.cc b/ceph/src/common/config.cc index 8024d433b..8816512cd 100644 --- a/ceph/src/common/config.cc +++ b/ceph/src/common/config.cc @@ -661,6 +661,7 @@ int md_config_t::parse_argv(ConfigValues& values, set_val_or_die(values, tracker, "daemonize", "false"); } else if (ceph_argparse_flag(args, i, "-d", (char*)NULL)) { + set_val_or_die(values, tracker, "fuse_debug", "true"); set_val_or_die(values, tracker, "daemonize", "false"); set_val_or_die(values, tracker, "log_file", ""); set_val_or_die(values, tracker, "log_to_stderr", "true"); diff --git a/ceph/src/common/options.cc b/ceph/src/common/options.cc index e490cb370..f6366ddf2 100644 --- a/ceph/src/common/options.cc +++ b/ceph/src/common/options.cc @@ -2508,7 +2508,7 @@ std::vector