mirror of
https://git.proxmox.com/git/ceph.git
synced 2025-04-30 12:16:12 +00:00
import ceph pacific 16.2.15 source
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
parent
ca55da0300
commit
47fdce5df8
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10.2)
|
|||||||
# remove cmake/modules/FindPython* once 3.12 is required
|
# remove cmake/modules/FindPython* once 3.12 is required
|
||||||
|
|
||||||
project(ceph
|
project(ceph
|
||||||
VERSION 16.2.14
|
VERSION 16.2.15
|
||||||
LANGUAGES CXX C ASM)
|
LANGUAGES CXX C ASM)
|
||||||
|
|
||||||
foreach(policy
|
foreach(policy
|
||||||
|
@ -32,6 +32,29 @@
|
|||||||
in certain recovery scenarios, e.g., monitor database lost and rebuilt, and
|
in certain recovery scenarios, e.g., monitor database lost and rebuilt, and
|
||||||
the restored file system is expected to have the same ID as before.
|
the restored file system is expected to have the same ID as before.
|
||||||
|
|
||||||
|
>=16.2.15
|
||||||
|
----------
|
||||||
|
* `ceph config dump --format <json|xml>` output will display the localized
|
||||||
|
option names instead of its normalized version. For e.g.,
|
||||||
|
"mgr/prometheus/x/server_port" will be displayed instead of
|
||||||
|
"mgr/prometheus/server_port". This matches the output of the non pretty-print
|
||||||
|
formatted version of the command.
|
||||||
|
|
||||||
|
* CEPHFS: MDS evicts clients which are not advancing their request tids which causes
|
||||||
|
a large buildup of session metadata resulting in the MDS going read-only due to
|
||||||
|
the RADOS operation exceeding the size threshold. `mds_session_metadata_threshold`
|
||||||
|
config controls the maximum size that a (encoded) session metadata can grow.
|
||||||
|
|
||||||
|
* RADOS: `get_pool_is_selfmanaged_snaps_mode` C++ API has been deprecated
|
||||||
|
due to being prone to false negative results. It's safer replacement is
|
||||||
|
`pool_is_in_selfmanaged_snaps_mode`.
|
||||||
|
|
||||||
|
* RBD: When diffing against the beginning of time (`fromsnapname == NULL`) in
|
||||||
|
fast-diff mode (`whole_object == true` with `fast-diff` image feature enabled
|
||||||
|
and valid), diff-iterate is now guaranteed to execute locally if exclusive
|
||||||
|
lock is available. This brings a dramatic performance improvement for QEMU
|
||||||
|
live disk synchronization and backup use cases.
|
||||||
|
|
||||||
>= 16.2.14
|
>= 16.2.14
|
||||||
----------
|
----------
|
||||||
|
|
||||||
@ -132,6 +155,10 @@
|
|||||||
* CEPHFS: After recovering a Ceph File System post following the disaster recovery
|
* CEPHFS: After recovering a Ceph File System post following the disaster recovery
|
||||||
procedure, the recovered files under `lost+found` directory can now be deleted.
|
procedure, the recovered files under `lost+found` directory can now be deleted.
|
||||||
* core: cache-tiering is now deprecated.
|
* core: cache-tiering is now deprecated.
|
||||||
|
* mgr/snap_schedule: The snap-schedule mgr module now retains one less snapshot
|
||||||
|
than the number mentioned against the config tunable `mds_max_snaps_per_dir`
|
||||||
|
so that a new snapshot can be created and retained during the next schedule
|
||||||
|
run.
|
||||||
|
|
||||||
>=16.2.8
|
>=16.2.8
|
||||||
--------
|
--------
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
Sphinx == 4.4.0
|
Sphinx == 5.0.2
|
||||||
git+https://github.com/ceph/sphinx-ditaa.git@py3#egg=sphinx-ditaa
|
git+https://github.com/ceph/sphinx-ditaa.git@py3#egg=sphinx-ditaa
|
||||||
breathe >= 4.20.0
|
breathe >= 4.20.0
|
||||||
Jinja2
|
Jinja2
|
||||||
|
@ -135,7 +135,7 @@
|
|||||||
# main package definition
|
# main package definition
|
||||||
#################################################################################
|
#################################################################################
|
||||||
Name: ceph
|
Name: ceph
|
||||||
Version: 16.2.14
|
Version: 16.2.15
|
||||||
Release: 0%{?dist}
|
Release: 0%{?dist}
|
||||||
%if 0%{?fedora} || 0%{?rhel}
|
%if 0%{?fedora} || 0%{?rhel}
|
||||||
Epoch: 2
|
Epoch: 2
|
||||||
@ -151,7 +151,7 @@ License: LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-
|
|||||||
Group: System/Filesystems
|
Group: System/Filesystems
|
||||||
%endif
|
%endif
|
||||||
URL: http://ceph.com/
|
URL: http://ceph.com/
|
||||||
Source0: %{?_remote_tarball_prefix}ceph-16.2.14.tar.bz2
|
Source0: %{?_remote_tarball_prefix}ceph-16.2.15.tar.bz2
|
||||||
%if 0%{?suse_version}
|
%if 0%{?suse_version}
|
||||||
# _insert_obs_source_lines_here
|
# _insert_obs_source_lines_here
|
||||||
ExclusiveArch: x86_64 aarch64 ppc64le s390x
|
ExclusiveArch: x86_64 aarch64 ppc64le s390x
|
||||||
@ -1208,7 +1208,7 @@ This package provides Ceph default alerts for Prometheus.
|
|||||||
# common
|
# common
|
||||||
#################################################################################
|
#################################################################################
|
||||||
%prep
|
%prep
|
||||||
%autosetup -p1 -n ceph-16.2.14
|
%autosetup -p1 -n ceph-16.2.15
|
||||||
|
|
||||||
%build
|
%build
|
||||||
# Disable lto on systems that do not support symver attribute
|
# Disable lto on systems that do not support symver attribute
|
||||||
|
@ -1,7 +1,13 @@
|
|||||||
ceph (16.2.14-1focal) focal; urgency=medium
|
ceph (16.2.15-1focal) focal; urgency=medium
|
||||||
|
|
||||||
|
|
||||||
-- Jenkins Build Slave User <jenkins-build@braggi13.front.sepia.ceph.com> Tue, 29 Aug 2023 16:38:35 +0000
|
-- Jenkins Build Slave User <jenkins-build@braggi16.front.sepia.ceph.com> Mon, 26 Feb 2024 19:34:01 +0000
|
||||||
|
|
||||||
|
ceph (16.2.15-1) stable; urgency=medium
|
||||||
|
|
||||||
|
* New upstream release
|
||||||
|
|
||||||
|
-- Ceph Release Team <ceph-maintainers@ceph.io> Mon, 26 Feb 2024 19:21:07 +0000
|
||||||
|
|
||||||
ceph (16.2.14-1) stable; urgency=medium
|
ceph (16.2.14-1) stable; urgency=medium
|
||||||
|
|
||||||
|
@ -56,12 +56,13 @@ function(build_rocksdb)
|
|||||||
endif()
|
endif()
|
||||||
include(CheckCXXCompilerFlag)
|
include(CheckCXXCompilerFlag)
|
||||||
check_cxx_compiler_flag("-Wno-deprecated-copy" HAS_WARNING_DEPRECATED_COPY)
|
check_cxx_compiler_flag("-Wno-deprecated-copy" HAS_WARNING_DEPRECATED_COPY)
|
||||||
|
set(rocksdb_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||||
if(HAS_WARNING_DEPRECATED_COPY)
|
if(HAS_WARNING_DEPRECATED_COPY)
|
||||||
set(rocksdb_CXX_FLAGS -Wno-deprecated-copy)
|
string(APPEND rocksdb_CXX_FLAGS " -Wno-deprecated-copy")
|
||||||
endif()
|
endif()
|
||||||
check_cxx_compiler_flag("-Wno-pessimizing-move" HAS_WARNING_PESSIMIZING_MOVE)
|
check_cxx_compiler_flag("-Wno-pessimizing-move" HAS_WARNING_PESSIMIZING_MOVE)
|
||||||
if(HAS_WARNING_PESSIMIZING_MOVE)
|
if(HAS_WARNING_PESSIMIZING_MOVE)
|
||||||
set(rocksdb_CXX_FLAGS "${rocksdb_CXX_FLAGS} -Wno-pessimizing-move")
|
string(APPEND rocksdb_CXX_FLAGS " -Wno-pessimizing-move")
|
||||||
endif()
|
endif()
|
||||||
if(rocksdb_CXX_FLAGS)
|
if(rocksdb_CXX_FLAGS)
|
||||||
list(APPEND rocksdb_CMAKE_ARGS -DCMAKE_CXX_FLAGS='${rocksdb_CXX_FLAGS}')
|
list(APPEND rocksdb_CMAKE_ARGS -DCMAKE_CXX_FLAGS='${rocksdb_CXX_FLAGS}')
|
||||||
|
@ -15,7 +15,7 @@ creation of multiple file systems use ``ceph fs flag set enable_multiple true``.
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs new <file system name> <metadata pool name> <data pool name>
|
ceph fs new <file system name> <metadata pool name> <data pool name>
|
||||||
|
|
||||||
This command creates a new file system. The file system name and metadata pool
|
This command creates a new file system. The file system name and metadata pool
|
||||||
name are self-explanatory. The specified data pool is the default data pool and
|
name are self-explanatory. The specified data pool is the default data pool and
|
||||||
@ -25,13 +25,13 @@ to accommodate the new file system.
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs ls
|
ceph fs ls
|
||||||
|
|
||||||
List all file systems by name.
|
List all file systems by name.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs dump [epoch]
|
ceph fs dump [epoch]
|
||||||
|
|
||||||
This dumps the FSMap at the given epoch (default: current) which includes all
|
This dumps the FSMap at the given epoch (default: current) which includes all
|
||||||
file system settings, MDS daemons and the ranks they hold, and the list of
|
file system settings, MDS daemons and the ranks they hold, and the list of
|
||||||
@ -40,7 +40,7 @@ standby MDS daemons.
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs rm <file system name> [--yes-i-really-mean-it]
|
ceph fs rm <file system name> [--yes-i-really-mean-it]
|
||||||
|
|
||||||
Destroy a CephFS file system. This wipes information about the state of the
|
Destroy a CephFS file system. This wipes information about the state of the
|
||||||
file system from the FSMap. The metadata pool and data pools are untouched and
|
file system from the FSMap. The metadata pool and data pools are untouched and
|
||||||
@ -48,28 +48,28 @@ must be destroyed separately.
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs get <file system name>
|
ceph fs get <file system name>
|
||||||
|
|
||||||
Get information about the named file system, including settings and ranks. This
|
Get information about the named file system, including settings and ranks. This
|
||||||
is a subset of the same information from the ``fs dump`` command.
|
is a subset of the same information from the ``ceph fs dump`` command.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs set <file system name> <var> <val>
|
ceph fs set <file system name> <var> <val>
|
||||||
|
|
||||||
Change a setting on a file system. These settings are specific to the named
|
Change a setting on a file system. These settings are specific to the named
|
||||||
file system and do not affect other file systems.
|
file system and do not affect other file systems.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs add_data_pool <file system name> <pool name/id>
|
ceph fs add_data_pool <file system name> <pool name/id>
|
||||||
|
|
||||||
Add a data pool to the file system. This pool can be used for file layouts
|
Add a data pool to the file system. This pool can be used for file layouts
|
||||||
as an alternate location to store file data.
|
as an alternate location to store file data.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs rm_data_pool <file system name> <pool name/id>
|
ceph fs rm_data_pool <file system name> <pool name/id>
|
||||||
|
|
||||||
This command removes the specified pool from the list of data pools for the
|
This command removes the specified pool from the list of data pools for the
|
||||||
file system. If any files have layouts for the removed data pool, the file
|
file system. If any files have layouts for the removed data pool, the file
|
||||||
@ -82,7 +82,7 @@ Settings
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs set <fs name> max_file_size <size in bytes>
|
ceph fs set <fs name> max_file_size <size in bytes>
|
||||||
|
|
||||||
CephFS has a configurable maximum file size, and it's 1TB by default.
|
CephFS has a configurable maximum file size, and it's 1TB by default.
|
||||||
You may wish to set this limit higher if you expect to store large files
|
You may wish to set this limit higher if you expect to store large files
|
||||||
@ -116,13 +116,13 @@ Taking a CephFS cluster down is done by setting the down flag:
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs set <fs_name> down true
|
ceph fs set <fs_name> down true
|
||||||
|
|
||||||
To bring the cluster back online:
|
To bring the cluster back online:
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs set <fs_name> down false
|
ceph fs set <fs_name> down false
|
||||||
|
|
||||||
This will also restore the previous value of max_mds. MDS daemons are brought
|
This will also restore the previous value of max_mds. MDS daemons are brought
|
||||||
down in a way such that journals are flushed to the metadata pool and all
|
down in a way such that journals are flushed to the metadata pool and all
|
||||||
@ -133,11 +133,11 @@ Taking the cluster down rapidly for deletion or disaster recovery
|
|||||||
-----------------------------------------------------------------
|
-----------------------------------------------------------------
|
||||||
|
|
||||||
To allow rapidly deleting a file system (for testing) or to quickly bring the
|
To allow rapidly deleting a file system (for testing) or to quickly bring the
|
||||||
file system and MDS daemons down, use the ``fs fail`` command:
|
file system and MDS daemons down, use the ``ceph fs fail`` command:
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs fail <fs_name>
|
ceph fs fail <fs_name>
|
||||||
|
|
||||||
This command sets a file system flag to prevent standbys from
|
This command sets a file system flag to prevent standbys from
|
||||||
activating on the file system (the ``joinable`` flag).
|
activating on the file system (the ``joinable`` flag).
|
||||||
@ -146,7 +146,7 @@ This process can also be done manually by doing the following:
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs set <fs_name> joinable false
|
ceph fs set <fs_name> joinable false
|
||||||
|
|
||||||
Then the operator can fail all of the ranks which causes the MDS daemons to
|
Then the operator can fail all of the ranks which causes the MDS daemons to
|
||||||
respawn as standbys. The file system will be left in a degraded state.
|
respawn as standbys. The file system will be left in a degraded state.
|
||||||
@ -154,7 +154,7 @@ respawn as standbys. The file system will be left in a degraded state.
|
|||||||
::
|
::
|
||||||
|
|
||||||
# For all ranks, 0-N:
|
# For all ranks, 0-N:
|
||||||
mds fail <fs_name>:<n>
|
ceph mds fail <fs_name>:<n>
|
||||||
|
|
||||||
Once all ranks are inactive, the file system may also be deleted or left in
|
Once all ranks are inactive, the file system may also be deleted or left in
|
||||||
this state for other purposes (perhaps disaster recovery).
|
this state for other purposes (perhaps disaster recovery).
|
||||||
@ -163,7 +163,7 @@ To bring the cluster back up, simply set the joinable flag:
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs set <fs_name> joinable true
|
ceph fs set <fs_name> joinable true
|
||||||
|
|
||||||
|
|
||||||
Daemons
|
Daemons
|
||||||
@ -182,34 +182,35 @@ Commands to manipulate MDS daemons:
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
mds fail <gid/name/role>
|
ceph mds fail <gid/name/role>
|
||||||
|
|
||||||
Mark an MDS daemon as failed. This is equivalent to what the cluster
|
Mark an MDS daemon as failed. This is equivalent to what the cluster
|
||||||
would do if an MDS daemon had failed to send a message to the mon
|
would do if an MDS daemon had failed to send a message to the mon
|
||||||
for ``mds_beacon_grace`` second. If the daemon was active and a suitable
|
for ``mds_beacon_grace`` second. If the daemon was active and a suitable
|
||||||
standby is available, using ``mds fail`` will force a failover to the standby.
|
standby is available, using ``ceph mds fail`` will force a failover to the
|
||||||
|
standby.
|
||||||
|
|
||||||
If the MDS daemon was in reality still running, then using ``mds fail``
|
If the MDS daemon was in reality still running, then using ``ceph mds fail``
|
||||||
will cause the daemon to restart. If it was active and a standby was
|
will cause the daemon to restart. If it was active and a standby was
|
||||||
available, then the "failed" daemon will return as a standby.
|
available, then the "failed" daemon will return as a standby.
|
||||||
|
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
tell mds.<daemon name> command ...
|
ceph tell mds.<daemon name> command ...
|
||||||
|
|
||||||
Send a command to the MDS daemon(s). Use ``mds.*`` to send a command to all
|
Send a command to the MDS daemon(s). Use ``mds.*`` to send a command to all
|
||||||
daemons. Use ``ceph tell mds.* help`` to learn available commands.
|
daemons. Use ``ceph tell mds.* help`` to learn available commands.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
mds metadata <gid/name/role>
|
ceph mds metadata <gid/name/role>
|
||||||
|
|
||||||
Get metadata about the given MDS known to the Monitors.
|
Get metadata about the given MDS known to the Monitors.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
mds repaired <role>
|
ceph mds repaired <role>
|
||||||
|
|
||||||
Mark the file system rank as repaired. Unlike the name suggests, this command
|
Mark the file system rank as repaired. Unlike the name suggests, this command
|
||||||
does not change a MDS; it manipulates the file system rank which has been
|
does not change a MDS; it manipulates the file system rank which has been
|
||||||
@ -228,14 +229,14 @@ Commands to manipulate required client features of a file system:
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs required_client_features <fs name> add reply_encoding
|
ceph fs required_client_features <fs name> add reply_encoding
|
||||||
fs required_client_features <fs name> rm reply_encoding
|
ceph fs required_client_features <fs name> rm reply_encoding
|
||||||
|
|
||||||
To list all CephFS features
|
To list all CephFS features
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs feature ls
|
ceph fs feature ls
|
||||||
|
|
||||||
Clients that are missing newly added features will be evicted automatically.
|
Clients that are missing newly added features will be evicted automatically.
|
||||||
|
|
||||||
@ -330,7 +331,7 @@ Global settings
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs flag set <flag name> <flag val> [<confirmation string>]
|
ceph fs flag set <flag name> <flag val> [<confirmation string>]
|
||||||
|
|
||||||
Sets a global CephFS flag (i.e. not specific to a particular file system).
|
Sets a global CephFS flag (i.e. not specific to a particular file system).
|
||||||
Currently, the only flag setting is 'enable_multiple' which allows having
|
Currently, the only flag setting is 'enable_multiple' which allows having
|
||||||
@ -352,13 +353,13 @@ file system.
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
mds rmfailed
|
ceph mds rmfailed
|
||||||
|
|
||||||
This removes a rank from the failed set.
|
This removes a rank from the failed set.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs reset <file system name>
|
ceph fs reset <file system name>
|
||||||
|
|
||||||
This command resets the file system state to defaults, except for the name and
|
This command resets the file system state to defaults, except for the name and
|
||||||
pools. Non-zero ranks are saved in the stopped set.
|
pools. Non-zero ranks are saved in the stopped set.
|
||||||
@ -366,7 +367,7 @@ pools. Non-zero ranks are saved in the stopped set.
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
fs new <file system name> <metadata pool name> <data pool name> --fscid <fscid> --force
|
ceph fs new <file system name> <metadata pool name> <data pool name> --fscid <fscid> --force
|
||||||
|
|
||||||
This command creates a file system with a specific **fscid** (file system cluster ID).
|
This command creates a file system with a specific **fscid** (file system cluster ID).
|
||||||
You may want to do this when an application expects the file system's ID to be
|
You may want to do this when an application expects the file system's ID to be
|
||||||
|
@ -37,7 +37,7 @@ Options :
|
|||||||
.. code:: bash
|
.. code:: bash
|
||||||
|
|
||||||
[build]$ python3 -m venv venv && source venv/bin/activate && pip3 install cmd2
|
[build]$ python3 -m venv venv && source venv/bin/activate && pip3 install cmd2
|
||||||
[build]$ source vstart_environment.sh && source venv/bin/activate && python3 ../src/tools/cephfs/cephfs-shell
|
[build]$ source vstart_environment.sh && source venv/bin/activate && python3 ../src/tools/cephfs/shell/cephfs-shell
|
||||||
|
|
||||||
Commands
|
Commands
|
||||||
========
|
========
|
||||||
|
@ -24,6 +24,16 @@ that directory.
|
|||||||
To restrict clients to only mount and work within a certain directory, use
|
To restrict clients to only mount and work within a certain directory, use
|
||||||
path-based MDS authentication capabilities.
|
path-based MDS authentication capabilities.
|
||||||
|
|
||||||
|
Note that this restriction *only* impacts the filesystem hierarchy -- the metadata
|
||||||
|
tree managed by the MDS. Clients will still be able to access the underlying
|
||||||
|
file data in RADOS directly. To segregate clients fully, you must also isolate
|
||||||
|
untrusted clients in their own RADOS namespace. You can place a client's
|
||||||
|
filesystem subtree in a particular namespace using `file layouts`_ and then
|
||||||
|
restrict their RADOS access to that namespace using `OSD capabilities`_
|
||||||
|
|
||||||
|
.. _file layouts: ./file-layouts
|
||||||
|
.. _OSD capabilities: ../rados/operations/user-management/#authorization-capabilities
|
||||||
|
|
||||||
Syntax
|
Syntax
|
||||||
------
|
------
|
||||||
|
|
||||||
|
@ -38,6 +38,13 @@ below). By default
|
|||||||
the start time is last midnight. So when a snapshot schedule with repeat
|
the start time is last midnight. So when a snapshot schedule with repeat
|
||||||
interval `1h` is added at 13:50
|
interval `1h` is added at 13:50
|
||||||
with the default start time, the first snapshot will be taken at 14:00.
|
with the default start time, the first snapshot will be taken at 14:00.
|
||||||
|
The time zone is assumed to be UTC if none is explicitly included in the string.
|
||||||
|
An explicit time zone will be mapped to UTC at execution.
|
||||||
|
The start time must be in ISO8601 format. Examples below:
|
||||||
|
|
||||||
|
UTC: 2022-08-08T05:30:00 i.e. 5:30 AM UTC, without explicit time zone offset
|
||||||
|
IDT: 2022-08-08T09:00:00+03:00 i.e. 6:00 AM UTC
|
||||||
|
EDT: 2022-08-08T05:30:00-04:00 i.e. 9:30 AM UTC
|
||||||
|
|
||||||
Retention specifications are identified by path and the retention spec itself. A
|
Retention specifications are identified by path and the retention spec itself. A
|
||||||
retention spec consists of either a number and a time period separated by a
|
retention spec consists of either a number and a time period separated by a
|
||||||
@ -155,6 +162,11 @@ Examples::
|
|||||||
snapshot creation is accounted for in the "created_count" field, which is a
|
snapshot creation is accounted for in the "created_count" field, which is a
|
||||||
cumulative count of the total number of snapshots created so far.
|
cumulative count of the total number of snapshots created so far.
|
||||||
|
|
||||||
|
.. note: The maximum number of snapshots to retain per directory is limited by the
|
||||||
|
config tunable `mds_max_snaps_per_dir`. This tunable defaults to 100.
|
||||||
|
To ensure a new snapshot can be created, one snapshot less than this will be
|
||||||
|
retained. So by default, a maximum of 99 snapshots will be retained.
|
||||||
|
|
||||||
Active and inactive schedules
|
Active and inactive schedules
|
||||||
-----------------------------
|
-----------------------------
|
||||||
Snapshot schedules can be added for a path that doesn't exist yet in the
|
Snapshot schedules can be added for a path that doesn't exist yet in the
|
||||||
|
@ -60,6 +60,8 @@ Possible -op commands::
|
|||||||
* meta-list
|
* meta-list
|
||||||
* get-osdmap
|
* get-osdmap
|
||||||
* set-osdmap
|
* set-osdmap
|
||||||
|
* get-superblock
|
||||||
|
* set-superblock
|
||||||
* get-inc-osdmap
|
* get-inc-osdmap
|
||||||
* set-inc-osdmap
|
* set-inc-osdmap
|
||||||
* mark-complete
|
* mark-complete
|
||||||
@ -414,7 +416,7 @@ Options
|
|||||||
|
|
||||||
.. option:: --op arg
|
.. option:: --op arg
|
||||||
|
|
||||||
Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log]
|
Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-superblock, set-superblock, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log]
|
||||||
|
|
||||||
.. option:: --epoch arg
|
.. option:: --epoch arg
|
||||||
|
|
||||||
@ -422,7 +424,7 @@ Options
|
|||||||
|
|
||||||
.. option:: --file arg
|
.. option:: --file arg
|
||||||
|
|
||||||
path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap
|
path of file to export, export-remove, import, get-osdmap, set-osdmap, get-superblock, set-superblock, get-inc-osdmap or set-inc-osdmap
|
||||||
|
|
||||||
.. option:: --mon-store-path arg
|
.. option:: --mon-store-path arg
|
||||||
|
|
||||||
|
@ -1314,7 +1314,7 @@ Subcommand ``cache-mode`` specifies the caching mode for cache tier <pool>.
|
|||||||
|
|
||||||
Usage::
|
Usage::
|
||||||
|
|
||||||
ceph osd tier cache-mode <poolname> writeback|readproxy|readonly|none
|
ceph osd tier cache-mode <poolname> writeback|proxy|readproxy|readonly|none
|
||||||
|
|
||||||
Subcommand ``remove`` removes the tier <tierpool> (the second one) from base pool
|
Subcommand ``remove`` removes the tier <tierpool> (the second one) from base pool
|
||||||
<pool> (the first one).
|
<pool> (the first one).
|
||||||
|
@ -264,8 +264,8 @@ Pool specific commands
|
|||||||
:command:`append` *name* *infile*
|
:command:`append` *name* *infile*
|
||||||
Append object name to the cluster with contents from infile.
|
Append object name to the cluster with contents from infile.
|
||||||
|
|
||||||
:command:`rm` *name*
|
:command:`rm` [--force-full] *name* ...
|
||||||
Remove object name.
|
Remove object(s) with name(s). With ``--force-full`` will remove when cluster is marked full.
|
||||||
|
|
||||||
:command:`listwatchers` *name*
|
:command:`listwatchers` *name*
|
||||||
List the watchers of object name.
|
List the watchers of object name.
|
||||||
|
@ -333,7 +333,7 @@ OSD and run the following command:
|
|||||||
|
|
||||||
ceph-bluestore-tool \
|
ceph-bluestore-tool \
|
||||||
--path <data path> \
|
--path <data path> \
|
||||||
--sharding="m(3) p(3,0-12) o(3,0-13)=block_cache={type=binned_lru} l p" \
|
--sharding="m(3) p(3,0-12) O(3,0-13)=block_cache={type=binned_lru} L P" \
|
||||||
reshard
|
reshard
|
||||||
|
|
||||||
|
|
||||||
|
@ -109,17 +109,6 @@ Async messenger options
|
|||||||
:Default: ``3``
|
:Default: ``3``
|
||||||
|
|
||||||
|
|
||||||
``ms_async_max_op_threads``
|
|
||||||
|
|
||||||
:Description: Maximum number of worker threads used by each Async Messenger instance.
|
|
||||||
Set to lower values when your machine has limited CPU count, and increase
|
|
||||||
when your CPUs are underutilized (i. e. one or more of CPUs are
|
|
||||||
constantly on 100% load during I/O operations).
|
|
||||||
:Type: 64-bit Unsigned Integer
|
|
||||||
:Required: No
|
|
||||||
:Default: ``5``
|
|
||||||
|
|
||||||
|
|
||||||
``ms_async_send_inline``
|
``ms_async_send_inline``
|
||||||
|
|
||||||
:Description: Send messages directly from the thread that generated them instead of
|
:Description: Send messages directly from the thread that generated them instead of
|
||||||
@ -129,5 +118,3 @@ Async messenger options
|
|||||||
:Type: Boolean
|
:Type: Boolean
|
||||||
:Required: No
|
:Required: No
|
||||||
:Default: ``false``
|
:Default: ``false``
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,12 +4,41 @@
|
|||||||
|
|
||||||
.. index:: pools; configuration
|
.. index:: pools; configuration
|
||||||
|
|
||||||
Ceph uses default values to determine how many placement groups (PGs) will be
|
The number of placement groups that the CRUSH algorithm assigns to each pool is
|
||||||
assigned to each pool. We recommend overriding some of the defaults.
|
determined by the values of variables in the centralized configuration database
|
||||||
Specifically, we recommend setting a pool's replica size and overriding the
|
in the monitor cluster.
|
||||||
default number of placement groups. You can set these values when running
|
|
||||||
`pool`_ commands. You can also override the defaults by adding new ones in the
|
Both containerized deployments of Ceph (deployments made using ``cephadm`` or
|
||||||
``[global]`` section of your Ceph configuration file.
|
Rook) and non-containerized deployments of Ceph rely on the values in the
|
||||||
|
central configuration database in the monitor cluster to assign placement
|
||||||
|
groups to pools.
|
||||||
|
|
||||||
|
Example Commands
|
||||||
|
----------------
|
||||||
|
|
||||||
|
To see the value of the variable that governs the number of placement groups in a given pool, run a command of the following form:
|
||||||
|
|
||||||
|
.. prompt:: bash
|
||||||
|
|
||||||
|
ceph config get osd osd_pool_default_pg_num
|
||||||
|
|
||||||
|
To set the value of the variable that governs the number of placement groups in a given pool, run a command of the following form:
|
||||||
|
|
||||||
|
.. prompt:: bash
|
||||||
|
|
||||||
|
ceph config set osd osd_pool_default_pg_num
|
||||||
|
|
||||||
|
Manual Tuning
|
||||||
|
-------------
|
||||||
|
In some cases, it might be advisable to override some of the defaults. For
|
||||||
|
example, you might determine that it is wise to set a pool's replica size and
|
||||||
|
to override the default number of placement groups in the pool. You can set
|
||||||
|
these values when running `pool`_ commands.
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
|
||||||
|
See :ref:`pg-autoscaler`.
|
||||||
|
|
||||||
|
|
||||||
.. literalinclude:: pool-pg.conf
|
.. literalinclude:: pool-pg.conf
|
||||||
|
@ -1404,6 +1404,31 @@ other performance issue with the OSDs.
|
|||||||
The exact size of the snapshot trim queue is reported by the ``snaptrimq_len``
|
The exact size of the snapshot trim queue is reported by the ``snaptrimq_len``
|
||||||
field of ``ceph pg ls -f json-detail``.
|
field of ``ceph pg ls -f json-detail``.
|
||||||
|
|
||||||
|
Stretch Mode
|
||||||
|
------------
|
||||||
|
|
||||||
|
INCORRECT_NUM_BUCKETS_STRETCH_MODE
|
||||||
|
__________________________________
|
||||||
|
|
||||||
|
Stretch mode currently only support 2 dividing buckets with OSDs, this warning suggests
|
||||||
|
that the number of dividing buckets is not equal to 2 after stretch mode is enabled.
|
||||||
|
You can expect unpredictable failures and MON assertions until the condition is fixed.
|
||||||
|
|
||||||
|
We encourage you to fix this by removing additional dividing buckets or bump the
|
||||||
|
number of dividing buckets to 2.
|
||||||
|
|
||||||
|
UNEVEN_WEIGHTS_STRETCH_MODE
|
||||||
|
___________________________
|
||||||
|
|
||||||
|
The 2 dividing buckets must have equal weights when stretch mode is enabled.
|
||||||
|
This warning suggests that the 2 dividing buckets have uneven weights after
|
||||||
|
stretch mode is enabled. This is not immediately fatal, however, you can expect
|
||||||
|
Ceph to be confused when trying to process transitions between dividing buckets.
|
||||||
|
|
||||||
|
We encourage you to fix this by making the weights even on both dividing buckets.
|
||||||
|
This can be done by making sure the combined weight of the OSDs on each dividing
|
||||||
|
bucket are the same.
|
||||||
|
|
||||||
Miscellaneous
|
Miscellaneous
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
|
@ -127,6 +127,14 @@ Options
|
|||||||
:Type: Integer
|
:Type: Integer
|
||||||
:Default: ``65000``
|
:Default: ``65000``
|
||||||
|
|
||||||
|
``max_header_size``
|
||||||
|
|
||||||
|
:Description: The maximum number of header bytes available for a single request.
|
||||||
|
|
||||||
|
:Type: Integer
|
||||||
|
:Default: ``16384``
|
||||||
|
:Maximum: ``65536``
|
||||||
|
|
||||||
|
|
||||||
Civetweb
|
Civetweb
|
||||||
========
|
========
|
||||||
|
@ -55,7 +55,7 @@ download_from() {
|
|||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
url=$url_base/$fname
|
url=$url_base/$fname
|
||||||
wget -c --no-verbose -O $fname $url
|
wget --no-verbose -O $fname $url
|
||||||
if [ $? != 0 -o ! -e $fname ]; then
|
if [ $? != 0 -o ! -e $fname ]; then
|
||||||
echo "Download of $url failed"
|
echo "Download of $url failed"
|
||||||
elif [ $(sha256sum $fname | awk '{print $1}') != $sha256 ]; then
|
elif [ $(sha256sum $fname | awk '{print $1}') != $sha256 ]; then
|
||||||
@ -183,8 +183,7 @@ download_boost $boost_version 4eb3b8d442b426dc35346235c8733b5ae35ba431690e38c6a8
|
|||||||
https://boostorg.jfrog.io/artifactory/main/release/$boost_version/source \
|
https://boostorg.jfrog.io/artifactory/main/release/$boost_version/source \
|
||||||
https://downloads.sourceforge.net/project/boost/boost/$boost_version \
|
https://downloads.sourceforge.net/project/boost/boost/$boost_version \
|
||||||
https://download.ceph.com/qa
|
https://download.ceph.com/qa
|
||||||
download_liburing 0.7 8e2842cfe947f3a443af301bdd6d034455536c38a455c7a700d0c1ad165a7543 \
|
download_liburing 0.7 05d0cf8493d573c76b11abfcf34aabc7153affebe17ff95f9ae88b0de062a59d \
|
||||||
https://github.com/axboe/liburing/archive \
|
|
||||||
https://git.kernel.dk/cgit/liburing/snapshot
|
https://git.kernel.dk/cgit/liburing/snapshot
|
||||||
pmdk_version=1.10
|
pmdk_version=1.10
|
||||||
download_pmdk $pmdk_version 08dafcf94db5ac13fac9139c92225d9aa5f3724ea74beee4e6ca19a01a2eb20c \
|
download_pmdk $pmdk_version 08dafcf94db5ac13fac9139c92225d9aa5f3724ea74beee4e6ca19a01a2eb20c \
|
||||||
|
@ -342,7 +342,7 @@ local g = import 'grafonnet/grafana.libsonnet';
|
|||||||
$.graphPanelSchema({},
|
$.graphPanelSchema({},
|
||||||
title,
|
title,
|
||||||
description,
|
description,
|
||||||
'null',
|
'null as zero',
|
||||||
false,
|
false,
|
||||||
formatY1,
|
formatY1,
|
||||||
'short',
|
'short',
|
||||||
|
@ -133,7 +133,7 @@ local u = import 'utils.libsonnet';
|
|||||||
$.graphPanelSchema({},
|
$.graphPanelSchema({},
|
||||||
title,
|
title,
|
||||||
'',
|
'',
|
||||||
'null',
|
'null as zero',
|
||||||
false,
|
false,
|
||||||
formatY1,
|
formatY1,
|
||||||
'short',
|
'short',
|
||||||
|
@ -140,7 +140,7 @@ local u = import 'utils.libsonnet';
|
|||||||
{},
|
{},
|
||||||
title,
|
title,
|
||||||
description,
|
description,
|
||||||
'null',
|
'null as zero',
|
||||||
false,
|
false,
|
||||||
formatY1,
|
formatY1,
|
||||||
formatY2,
|
formatY2,
|
||||||
@ -658,7 +658,7 @@ local u = import 'utils.libsonnet';
|
|||||||
$.graphPanelSchema(aliasColors,
|
$.graphPanelSchema(aliasColors,
|
||||||
title,
|
title,
|
||||||
description,
|
description,
|
||||||
'null',
|
'null as zero',
|
||||||
false,
|
false,
|
||||||
formatY1,
|
formatY1,
|
||||||
formatY2,
|
formatY2,
|
||||||
|
@ -87,7 +87,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -185,7 +185,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -283,7 +283,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -400,7 +400,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -498,7 +498,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -596,7 +596,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
|
@ -93,7 +93,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -186,7 +186,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -285,7 +285,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
|
@ -87,7 +87,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -180,7 +180,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -266,7 +266,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -352,7 +352,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -445,7 +445,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -531,7 +531,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -636,7 +636,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -754,7 +754,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -893,7 +893,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -1000,7 +1000,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
|
@ -80,7 +80,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -173,7 +173,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
@ -266,7 +266,7 @@
|
|||||||
"lines": true,
|
"lines": true,
|
||||||
"linewidth": 1,
|
"linewidth": 1,
|
||||||
"links": [ ],
|
"links": [ ],
|
||||||
"nullPointMode": "null",
|
"nullPointMode": "null as zero",
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pointradius": 5,
|
"pointradius": 5,
|
||||||
"points": false,
|
"points": false,
|
||||||
|
@ -518,7 +518,7 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: "Pool '{{ $labels.name }}' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours."
|
description: "Pool '{{ $labels.name }}' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours."
|
||||||
summary: "Pool growth rate may soon exceed capacity"
|
summary: "Pool growth rate may soon exceed capacity"
|
||||||
expr: "(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id) group_right ceph_pool_metadata) >= 95"
|
expr: "(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id, instance) group_right() ceph_pool_metadata) >= 95"
|
||||||
labels:
|
labels:
|
||||||
oid: "1.3.6.1.4.1.50495.1.2.1.9.2"
|
oid: "1.3.6.1.4.1.50495.1.2.1.9.2"
|
||||||
severity: "warning"
|
severity: "warning"
|
||||||
|
@ -1499,35 +1499,44 @@ tests:
|
|||||||
# trigger percent full prediction on pools 1 and 2 only
|
# trigger percent full prediction on pools 1 and 2 only
|
||||||
- interval: 12h
|
- interval: 12h
|
||||||
input_series:
|
input_series:
|
||||||
- series: 'ceph_pool_percent_used{pool_id="1"}'
|
- series: 'ceph_pool_percent_used{pool_id="1", instance="9090"}'
|
||||||
values: '70 75 80 87 92'
|
|
||||||
- series: 'ceph_pool_percent_used{pool_id="2"}'
|
|
||||||
values: '22 22 23 23 24'
|
|
||||||
- series: 'ceph_pool_metadata{pool_id="1",name="rbd",type="replicated"}'
|
|
||||||
values: '1 1 1 1 1'
|
values: '1 1 1 1 1'
|
||||||
- series: 'ceph_pool_metadata{pool_id="2",name="default.rgw.index",type="replicated"}'
|
- series: 'ceph_pool_percent_used{pool_id="1", instance="8090"}'
|
||||||
|
values: '78 89 79 98 78'
|
||||||
|
- series: 'ceph_pool_percent_used{pool_id="2", instance="9090"}'
|
||||||
|
values: '1 1 1 1 1'
|
||||||
|
- series: 'ceph_pool_percent_used{pool_id="2", instance="8090"}'
|
||||||
|
values: '22 22 23 23 24'
|
||||||
|
- series: 'ceph_pool_metadata{pool_id="1" , instance="9090" ,name="rbd",type="replicated"}'
|
||||||
|
values: '1 1 1 1 1'
|
||||||
|
- series: 'ceph_pool_metadata{pool_id="1", instance="8090",name="default.rgw.index",type="replicated"}'
|
||||||
|
values: '1 1 1 1 1'
|
||||||
|
- series: 'ceph_pool_metadata{pool_id="2" , instance="9090" ,name="rbd",type="replicated"}'
|
||||||
|
values: '1 1 1 1 1'
|
||||||
|
- series: 'ceph_pool_metadata{pool_id="2", instance="8090",name="default.rgw.index",type="replicated"}'
|
||||||
values: '1 1 1 1 1'
|
values: '1 1 1 1 1'
|
||||||
promql_expr_test:
|
promql_expr_test:
|
||||||
- expr: |
|
- expr: |
|
||||||
(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id)
|
(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id, instance)
|
||||||
group_right ceph_pool_metadata) >= 95
|
group_right() ceph_pool_metadata) >= 95
|
||||||
eval_time: 36h
|
eval_time: 36h
|
||||||
exp_samples:
|
exp_samples:
|
||||||
- labels: '{name="rbd",pool_id="1",type="replicated"}'
|
- labels: '{instance="8090",name="default.rgw.index",pool_id="1",type="replicated"}'
|
||||||
value: 1.424E+02 # 142%
|
value: 1.435E+02 # 142%
|
||||||
alert_rule_test:
|
alert_rule_test:
|
||||||
- eval_time: 48h
|
- eval_time: 48h
|
||||||
alertname: CephPoolGrowthWarning
|
alertname: CephPoolGrowthWarning
|
||||||
exp_alerts:
|
exp_alerts:
|
||||||
- exp_labels:
|
- exp_labels:
|
||||||
name: rbd
|
instance: 8090
|
||||||
|
name: default.rgw.index
|
||||||
pool_id: 1
|
pool_id: 1
|
||||||
severity: warning
|
severity: warning
|
||||||
type: ceph_default
|
type: ceph_default
|
||||||
oid: 1.3.6.1.4.1.50495.1.2.1.9.2
|
oid: 1.3.6.1.4.1.50495.1.2.1.9.2
|
||||||
exp_annotations:
|
exp_annotations:
|
||||||
summary: Pool growth rate may soon exceed capacity
|
summary: Pool growth rate may soon exceed capacity
|
||||||
description: Pool 'rbd' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours.
|
description: Pool 'default.rgw.index' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours.
|
||||||
- interval: 1m
|
- interval: 1m
|
||||||
input_series:
|
input_series:
|
||||||
- series: 'ceph_health_detail{name="POOL_BACKFILLFULL"}'
|
- series: 'ceph_health_detail{name="POOL_BACKFILLFULL"}'
|
||||||
|
@ -3,6 +3,7 @@ overrides:
|
|||||||
conf:
|
conf:
|
||||||
mds:
|
mds:
|
||||||
debug mds: 20
|
debug mds: 20
|
||||||
|
debug mds balancer: 20
|
||||||
debug ms: 1
|
debug ms: 1
|
||||||
mds debug frag: true
|
mds debug frag: true
|
||||||
mds debug scatterstat: true
|
mds debug scatterstat: true
|
||||||
|
@ -2,7 +2,10 @@ overrides:
|
|||||||
ceph:
|
ceph:
|
||||||
log-ignorelist:
|
log-ignorelist:
|
||||||
- overall HEALTH_
|
- overall HEALTH_
|
||||||
|
- \(CEPHADM_STRAY_DAEMON\)
|
||||||
- \(FS_DEGRADED\)
|
- \(FS_DEGRADED\)
|
||||||
|
- FS_
|
||||||
|
- \(CEPHADM_
|
||||||
- \(MDS_FAILED\)
|
- \(MDS_FAILED\)
|
||||||
- \(MDS_DEGRADED\)
|
- \(MDS_DEGRADED\)
|
||||||
- \(FS_WITH_FAILED_MDS\)
|
- \(FS_WITH_FAILED_MDS\)
|
||||||
@ -10,3 +13,10 @@ overrides:
|
|||||||
- \(MDS_ALL_DOWN\)
|
- \(MDS_ALL_DOWN\)
|
||||||
- \(MDS_UP_LESS_THAN_MAX\)
|
- \(MDS_UP_LESS_THAN_MAX\)
|
||||||
- \(FS_INLINE_DATA_DEPRECATED\)
|
- \(FS_INLINE_DATA_DEPRECATED\)
|
||||||
|
- \(PG_DEGRADED\)
|
||||||
|
- Degraded data redundancy
|
||||||
|
- \(PG_
|
||||||
|
- acting
|
||||||
|
- MDS_INSUFFICIENT_STANDBY
|
||||||
|
- deprecated feature inline_data
|
||||||
|
- compat changed unexpectedly
|
||||||
|
@ -2,8 +2,10 @@ overrides:
|
|||||||
ceph:
|
ceph:
|
||||||
log-ignorelist:
|
log-ignorelist:
|
||||||
- overall HEALTH_
|
- overall HEALTH_
|
||||||
- \(OSD_DOWN\)
|
- OSD_DOWN
|
||||||
- \(OSD_
|
- OSD_
|
||||||
- but it is still running
|
- but it is still running
|
||||||
# MDS daemon 'b' is not responding, replacing it as rank 0 with standby 'a'
|
# MDS daemon 'b' is not responding, replacing it as rank 0 with standby 'a'
|
||||||
- is not responding
|
- is not responding
|
||||||
|
- is down
|
||||||
|
- osds down
|
||||||
|
6
ceph/qa/distros/all/rhel_8.5.yaml
Normal file
6
ceph/qa/distros/all/rhel_8.5.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
os_type: rhel
|
||||||
|
os_version: "8.5"
|
||||||
|
overrides:
|
||||||
|
selinux:
|
||||||
|
whitelist:
|
||||||
|
- scontext=system_u:system_r:logrotate_t:s0
|
6
ceph/qa/distros/all/rhel_8.6.yaml
Normal file
6
ceph/qa/distros/all/rhel_8.6.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
os_type: rhel
|
||||||
|
os_version: "8.6"
|
||||||
|
overrides:
|
||||||
|
selinux:
|
||||||
|
whitelist:
|
||||||
|
- scontext=system_u:system_r:logrotate_t:s0
|
@ -1 +1 @@
|
|||||||
rhel_8.4.yaml
|
rhel_8.6.yaml
|
@ -1 +0,0 @@
|
|||||||
.qa/distros/podman/rhel_8.4_container_tools_3.0.yaml
|
|
@ -1 +0,0 @@
|
|||||||
.qa/distros/podman/rhel_8.4_container_tools_rhel8.yaml
|
|
@ -0,0 +1 @@
|
|||||||
|
.qa/distros/podman/rhel_8.6_container_tools_3.0.yaml
|
@ -0,0 +1 @@
|
|||||||
|
.qa/distros/podman/rhel_8.6_container_tools_rhel8.yaml
|
@ -1,5 +1,5 @@
|
|||||||
os_type: rhel
|
os_type: rhel
|
||||||
os_version: "8.4"
|
os_version: "8.6"
|
||||||
overrides:
|
overrides:
|
||||||
selinux:
|
selinux:
|
||||||
whitelist:
|
whitelist:
|
@ -1,5 +1,5 @@
|
|||||||
os_type: rhel
|
os_type: rhel
|
||||||
os_version: "8.4"
|
os_version: "8.6"
|
||||||
overrides:
|
overrides:
|
||||||
selinux:
|
selinux:
|
||||||
whitelist:
|
whitelist:
|
@ -1691,6 +1691,29 @@ function test_wait_for_peered() {
|
|||||||
|
|
||||||
#######################################################################
|
#######################################################################
|
||||||
|
|
||||||
|
##
|
||||||
|
# Wait until the cluster's health condition disappeared.
|
||||||
|
# $TIMEOUT default
|
||||||
|
#
|
||||||
|
# @param string to grep for in health detail
|
||||||
|
# @return 0 if the cluster health doesn't matches request,
|
||||||
|
# 1 otherwise if after $TIMEOUT seconds health condition remains.
|
||||||
|
#
|
||||||
|
function wait_for_health_gone() {
|
||||||
|
local grepstr=$1
|
||||||
|
local -a delays=($(get_timeout_delays $TIMEOUT .1))
|
||||||
|
local -i loop=0
|
||||||
|
|
||||||
|
while ceph health detail | grep "$grepstr" ; do
|
||||||
|
if (( $loop >= ${#delays[*]} )) ; then
|
||||||
|
ceph health detail
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
sleep ${delays[$loop]}
|
||||||
|
loop+=1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
##
|
##
|
||||||
# Wait until the cluster has health condition passed as arg
|
# Wait until the cluster has health condition passed as arg
|
||||||
# again for $TIMEOUT seconds.
|
# again for $TIMEOUT seconds.
|
||||||
|
148
ceph/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh
Executable file
148
ceph/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh
Executable file
@ -0,0 +1,148 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
|
||||||
|
function run() {
|
||||||
|
local dir=$1
|
||||||
|
shift
|
||||||
|
|
||||||
|
export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one
|
||||||
|
export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one
|
||||||
|
export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one
|
||||||
|
export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one
|
||||||
|
export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
|
||||||
|
export CEPH_ARGS
|
||||||
|
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
||||||
|
|
||||||
|
export BASE_CEPH_ARGS=$CEPH_ARGS
|
||||||
|
CEPH_ARGS+="--mon-host=$CEPH_MON_A"
|
||||||
|
|
||||||
|
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
||||||
|
for func in $funcs ; do
|
||||||
|
setup $dir || return 1
|
||||||
|
$func $dir || return 1
|
||||||
|
teardown $dir || return 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
TEST_stretched_cluster_failover_add_three_osds(){
|
||||||
|
local dir=$1
|
||||||
|
local OSDS=8
|
||||||
|
setup $dir || return 1
|
||||||
|
|
||||||
|
run_mon $dir a --public-addr $CEPH_MON_A || return 1
|
||||||
|
wait_for_quorum 300 1 || return 1
|
||||||
|
|
||||||
|
run_mon $dir b --public-addr $CEPH_MON_B || return 1
|
||||||
|
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B"
|
||||||
|
wait_for_quorum 300 2 || return 1
|
||||||
|
|
||||||
|
run_mon $dir c --public-addr $CEPH_MON_C || return 1
|
||||||
|
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C"
|
||||||
|
wait_for_quorum 300 3 || return 1
|
||||||
|
|
||||||
|
run_mon $dir d --public-addr $CEPH_MON_D || return 1
|
||||||
|
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D"
|
||||||
|
wait_for_quorum 300 4 || return 1
|
||||||
|
|
||||||
|
run_mon $dir e --public-addr $CEPH_MON_E || return 1
|
||||||
|
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E"
|
||||||
|
wait_for_quorum 300 5 || return 1
|
||||||
|
|
||||||
|
ceph mon set election_strategy connectivity
|
||||||
|
ceph mon add disallowed_leader e
|
||||||
|
|
||||||
|
run_mgr $dir x || return 1
|
||||||
|
run_mgr $dir y || return 1
|
||||||
|
run_mgr $dir z || return 1
|
||||||
|
|
||||||
|
for osd in $(seq 0 $(expr $OSDS - 1))
|
||||||
|
do
|
||||||
|
run_osd $dir $osd || return 1
|
||||||
|
done
|
||||||
|
|
||||||
|
for zone in iris pze
|
||||||
|
do
|
||||||
|
ceph osd crush add-bucket $zone zone
|
||||||
|
ceph osd crush move $zone root=default
|
||||||
|
done
|
||||||
|
|
||||||
|
|
||||||
|
ceph osd crush add-bucket node-2 host
|
||||||
|
ceph osd crush add-bucket node-3 host
|
||||||
|
ceph osd crush add-bucket node-4 host
|
||||||
|
ceph osd crush add-bucket node-5 host
|
||||||
|
|
||||||
|
ceph osd crush move node-2 zone=iris
|
||||||
|
ceph osd crush move node-3 zone=iris
|
||||||
|
ceph osd crush move node-4 zone=pze
|
||||||
|
ceph osd crush move node-5 zone=pze
|
||||||
|
|
||||||
|
ceph osd crush move osd.0 host=node-2
|
||||||
|
ceph osd crush move osd.1 host=node-2
|
||||||
|
ceph osd crush move osd.2 host=node-3
|
||||||
|
ceph osd crush move osd.3 host=node-3
|
||||||
|
ceph osd crush move osd.4 host=node-4
|
||||||
|
ceph osd crush move osd.5 host=node-4
|
||||||
|
ceph osd crush move osd.6 host=node-5
|
||||||
|
ceph osd crush move osd.7 host=node-5
|
||||||
|
|
||||||
|
ceph mon set_location a zone=iris host=node-2
|
||||||
|
ceph mon set_location b zone=iris host=node-3
|
||||||
|
ceph mon set_location c zone=pze host=node-4
|
||||||
|
ceph mon set_location d zone=pze host=node-5
|
||||||
|
|
||||||
|
hostname=$(hostname -s)
|
||||||
|
ceph osd crush remove $hostname || return 1
|
||||||
|
ceph osd getcrushmap > crushmap || return 1
|
||||||
|
crushtool --decompile crushmap > crushmap.txt || return 1
|
||||||
|
sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1
|
||||||
|
cat >> crushmap_modified.txt << EOF
|
||||||
|
rule stretch_rule {
|
||||||
|
id 1
|
||||||
|
type replicated
|
||||||
|
min_size 1
|
||||||
|
max_size 10
|
||||||
|
step take iris
|
||||||
|
step chooseleaf firstn 2 type host
|
||||||
|
step emit
|
||||||
|
step take pze
|
||||||
|
step chooseleaf firstn 2 type host
|
||||||
|
step emit
|
||||||
|
}
|
||||||
|
|
||||||
|
# end crush map
|
||||||
|
EOF
|
||||||
|
|
||||||
|
crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1
|
||||||
|
ceph osd setcrushmap -i crushmap.bin || return 1
|
||||||
|
local stretched_poolname=stretched_rbdpool
|
||||||
|
ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1
|
||||||
|
ceph osd pool set $stretched_poolname size 4 || return 1
|
||||||
|
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
ceph mon set_location e zone=arbiter host=node-1
|
||||||
|
ceph mon enable_stretch_mode e stretch_rule zone
|
||||||
|
|
||||||
|
kill_daemons $dir KILL mon.c || return 1
|
||||||
|
kill_daemons $dir KILL mon.d || return 1
|
||||||
|
|
||||||
|
kill_daemons $dir KILL osd.4 || return 1
|
||||||
|
kill_daemons $dir KILL osd.5 || return 1
|
||||||
|
kill_daemons $dir KILL osd.6 || return 1
|
||||||
|
kill_daemons $dir KILL osd.7 || return 1
|
||||||
|
|
||||||
|
ceph -s
|
||||||
|
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
run_osd $dir 8 || return 1
|
||||||
|
run_osd $dir 9 || return 1
|
||||||
|
run_osd $dir 10 || return 1
|
||||||
|
|
||||||
|
ceph -s
|
||||||
|
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
teardown $dir || return 1
|
||||||
|
}
|
||||||
|
main mon-stretch-fail-recovery "$@"
|
145
ceph/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh
Executable file
145
ceph/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh
Executable file
@ -0,0 +1,145 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
|
||||||
|
function run() {
|
||||||
|
local dir=$1
|
||||||
|
shift
|
||||||
|
|
||||||
|
export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one
|
||||||
|
export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one
|
||||||
|
export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one
|
||||||
|
export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one
|
||||||
|
export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
|
||||||
|
export CEPH_ARGS
|
||||||
|
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
||||||
|
|
||||||
|
export BASE_CEPH_ARGS=$CEPH_ARGS
|
||||||
|
CEPH_ARGS+="--mon-host=$CEPH_MON_A"
|
||||||
|
|
||||||
|
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
||||||
|
for func in $funcs ; do
|
||||||
|
setup $dir || return 1
|
||||||
|
$func $dir || return 1
|
||||||
|
teardown $dir || return 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
TEST_stretched_cluster_uneven_weight() {
|
||||||
|
local dir=$1
|
||||||
|
local OSDS=4
|
||||||
|
local weight=0.09000
|
||||||
|
setup $dir || return 1
|
||||||
|
|
||||||
|
run_mon $dir a --public-addr $CEPH_MON_A || return 1
|
||||||
|
wait_for_quorum 300 1 || return 1
|
||||||
|
|
||||||
|
run_mon $dir b --public-addr $CEPH_MON_B || return 1
|
||||||
|
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B"
|
||||||
|
wait_for_quorum 300 2 || return 1
|
||||||
|
|
||||||
|
run_mon $dir c --public-addr $CEPH_MON_C || return 1
|
||||||
|
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C"
|
||||||
|
wait_for_quorum 300 3 || return 1
|
||||||
|
|
||||||
|
run_mon $dir d --public-addr $CEPH_MON_D || return 1
|
||||||
|
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D"
|
||||||
|
wait_for_quorum 300 4 || return 1
|
||||||
|
|
||||||
|
run_mon $dir e --public-addr $CEPH_MON_E || return 1
|
||||||
|
CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E"
|
||||||
|
wait_for_quorum 300 5 || return 1
|
||||||
|
|
||||||
|
ceph mon set election_strategy connectivity
|
||||||
|
ceph mon add disallowed_leader e
|
||||||
|
|
||||||
|
run_mgr $dir x || return 1
|
||||||
|
run_mgr $dir y || return 1
|
||||||
|
run_mgr $dir z || return 1
|
||||||
|
|
||||||
|
for osd in $(seq 0 $(expr $OSDS - 1))
|
||||||
|
do
|
||||||
|
run_osd $dir $osd || return 1
|
||||||
|
done
|
||||||
|
|
||||||
|
for zone in iris pze
|
||||||
|
do
|
||||||
|
ceph osd crush add-bucket $zone zone
|
||||||
|
ceph osd crush move $zone root=default
|
||||||
|
done
|
||||||
|
|
||||||
|
ceph osd crush add-bucket node-2 host
|
||||||
|
ceph osd crush add-bucket node-3 host
|
||||||
|
ceph osd crush add-bucket node-4 host
|
||||||
|
ceph osd crush add-bucket node-5 host
|
||||||
|
|
||||||
|
ceph osd crush move node-2 zone=iris
|
||||||
|
ceph osd crush move node-3 zone=iris
|
||||||
|
ceph osd crush move node-4 zone=pze
|
||||||
|
ceph osd crush move node-5 zone=pze
|
||||||
|
|
||||||
|
ceph osd crush move osd.0 host=node-2
|
||||||
|
ceph osd crush move osd.1 host=node-3
|
||||||
|
ceph osd crush move osd.2 host=node-4
|
||||||
|
ceph osd crush move osd.3 host=node-5
|
||||||
|
|
||||||
|
ceph mon set_location a zone=iris host=node-2
|
||||||
|
ceph mon set_location b zone=iris host=node-3
|
||||||
|
ceph mon set_location c zone=pze host=node-4
|
||||||
|
ceph mon set_location d zone=pze host=node-5
|
||||||
|
|
||||||
|
hostname=$(hostname -s)
|
||||||
|
ceph osd crush remove $hostname || return 1
|
||||||
|
ceph osd getcrushmap > crushmap || return 1
|
||||||
|
crushtool --decompile crushmap > crushmap.txt || return 1
|
||||||
|
sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1
|
||||||
|
cat >> crushmap_modified.txt << EOF
|
||||||
|
rule stretch_rule {
|
||||||
|
id 1
|
||||||
|
type replicated
|
||||||
|
min_size 1
|
||||||
|
max_size 10
|
||||||
|
step take iris
|
||||||
|
step chooseleaf firstn 2 type host
|
||||||
|
step emit
|
||||||
|
step take pze
|
||||||
|
step chooseleaf firstn 2 type host
|
||||||
|
step emit
|
||||||
|
}
|
||||||
|
# end crush map
|
||||||
|
EOF
|
||||||
|
|
||||||
|
crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1
|
||||||
|
ceph osd setcrushmap -i crushmap.bin || return 1
|
||||||
|
local stretched_poolname=stretched_rbdpool
|
||||||
|
ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1
|
||||||
|
ceph osd pool set $stretched_poolname size 4 || return 1
|
||||||
|
|
||||||
|
ceph mon set_location e zone=arbiter host=node-1 || return 1
|
||||||
|
ceph mon enable_stretch_mode e stretch_rule zone || return 1 # Enter strech mode
|
||||||
|
|
||||||
|
# reweight to a more round decimal.
|
||||||
|
ceph osd crush reweight osd.0 $weight
|
||||||
|
ceph osd crush reweight osd.1 $weight
|
||||||
|
ceph osd crush reweight osd.2 $weight
|
||||||
|
ceph osd crush reweight osd.3 $weight
|
||||||
|
|
||||||
|
# Firstly, we test for stretch mode buckets != 2
|
||||||
|
ceph osd crush add-bucket sham zone || return 1
|
||||||
|
ceph osd crush move sham root=default || return 1
|
||||||
|
wait_for_health "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1
|
||||||
|
|
||||||
|
ceph osd crush rm sham # clear the health warn
|
||||||
|
wait_for_health_gone "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1
|
||||||
|
|
||||||
|
# Next, we test for uneven weights across buckets
|
||||||
|
|
||||||
|
ceph osd crush reweight osd.0 0.07000
|
||||||
|
|
||||||
|
wait_for_health "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1
|
||||||
|
|
||||||
|
ceph osd crush reweight osd.0 $weight # clear the health warn
|
||||||
|
|
||||||
|
wait_for_health_gone "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1
|
||||||
|
|
||||||
|
teardown $dir || return 1
|
||||||
|
}
|
||||||
|
main mon-stretched-cluster-uneven-weight "$@"
|
@ -19,6 +19,7 @@ overrides:
|
|||||||
- MDS_READ_ONLY
|
- MDS_READ_ONLY
|
||||||
- force file system read-only
|
- force file system read-only
|
||||||
- with standby daemon mds
|
- with standby daemon mds
|
||||||
|
- MDS abort because newly corrupt dentry
|
||||||
tasks:
|
tasks:
|
||||||
- cephfs_test_runner:
|
- cephfs_test_runner:
|
||||||
modules:
|
modules:
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
# Lengthen the timeout for thrashed MDS
|
||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
conf:
|
||||||
|
client:
|
||||||
|
client_shutdown_timeout: 120
|
@ -0,0 +1,6 @@
|
|||||||
|
# Lengthen the timeout for thrashed MDS
|
||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
conf:
|
||||||
|
client:
|
||||||
|
client_shutdown_timeout: 120
|
@ -0,0 +1,13 @@
|
|||||||
|
tasks:
|
||||||
|
- check-counter:
|
||||||
|
counters:
|
||||||
|
mgr:
|
||||||
|
- name: "finisher-volumes.complete_latency.avgcount"
|
||||||
|
min: 4
|
||||||
|
- name: "finisher-volumes.queue_len"
|
||||||
|
expected_val: 0
|
||||||
|
|
||||||
|
- cephfs_test_runner:
|
||||||
|
fail_on_skip: false
|
||||||
|
modules:
|
||||||
|
- tasks.cephfs.test_volumes.TestPerModuleFinsherThread
|
0
ceph/qa/suites/krbd/singleton-msgr-failures/%
Normal file
0
ceph/qa/suites/krbd/singleton-msgr-failures/%
Normal file
@ -0,0 +1 @@
|
|||||||
|
.qa/objectstore/bluestore-bitmap.yaml
|
7
ceph/qa/suites/krbd/singleton-msgr-failures/conf.yaml
Normal file
7
ceph/qa/suites/krbd/singleton-msgr-failures/conf.yaml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
conf:
|
||||||
|
global:
|
||||||
|
ms die on skipped message: false
|
||||||
|
client:
|
||||||
|
rbd default features: 37
|
1
ceph/qa/suites/krbd/singleton-msgr-failures/ms_mode$/.qa
Symbolic link
1
ceph/qa/suites/krbd/singleton-msgr-failures/ms_mode$/.qa
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../.qa/
|
@ -0,0 +1,5 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
conf:
|
||||||
|
client:
|
||||||
|
rbd default map options: ms_mode=crc,rxbounce
|
@ -0,0 +1,5 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
conf:
|
||||||
|
client:
|
||||||
|
rbd default map options: ms_mode=crc
|
@ -0,0 +1,5 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
conf:
|
||||||
|
client:
|
||||||
|
rbd default map options: ms_mode=legacy,rxbounce
|
@ -0,0 +1,5 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
conf:
|
||||||
|
client:
|
||||||
|
rbd default map options: ms_mode=legacy
|
@ -0,0 +1,5 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
conf:
|
||||||
|
client:
|
||||||
|
rbd default map options: ms_mode=secure
|
1
ceph/qa/suites/krbd/singleton-msgr-failures/msgr-failures/.qa
Symbolic link
1
ceph/qa/suites/krbd/singleton-msgr-failures/msgr-failures/.qa
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../.qa/
|
1
ceph/qa/suites/krbd/singleton-msgr-failures/tasks/.qa
Symbolic link
1
ceph/qa/suites/krbd/singleton-msgr-failures/tasks/.qa
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../.qa/
|
@ -2,6 +2,7 @@ overrides:
|
|||||||
ceph:
|
ceph:
|
||||||
conf:
|
conf:
|
||||||
global:
|
global:
|
||||||
|
mon warn on pool no app: false
|
||||||
ms die on skipped message: false
|
ms die on skipped message: false
|
||||||
client:
|
client:
|
||||||
rbd default features: 37
|
rbd default features: 37
|
||||||
|
19
ceph/qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml
Normal file
19
ceph/qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
conf:
|
||||||
|
global:
|
||||||
|
osd pool default size: 1
|
||||||
|
osd:
|
||||||
|
osd shutdown pgref assert: true
|
||||||
|
roles:
|
||||||
|
- [mon.a, mgr.x, osd.0, client.0]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- install:
|
||||||
|
extra_system_packages:
|
||||||
|
- fio
|
||||||
|
- ceph:
|
||||||
|
- workunit:
|
||||||
|
clients:
|
||||||
|
all:
|
||||||
|
- rbd/krbd_watch_errors.sh
|
@ -1,3 +1,28 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- \(HOST_IN_MAINTENANCE\)
|
||||||
|
- \(OSD_DOWN\)
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- down
|
||||||
|
- overall HEALTH_
|
||||||
|
- \(CEPHADM_STRAY_DAEMON\)
|
||||||
|
- stray daemon
|
||||||
|
- \(FS_DEGRADED\)
|
||||||
|
- \(MDS_FAILED\)
|
||||||
|
- \(MDS_DEGRADED\)
|
||||||
|
- \(FS_WITH_FAILED_MDS\)
|
||||||
|
- \(MDS_DAMAGE\)
|
||||||
|
- \(MDS_ALL_DOWN\)
|
||||||
|
- \(MDS_UP_LESS_THAN_MAX\)
|
||||||
|
- \(FS_INLINE_DATA_DEPRECATED\)
|
||||||
|
- \(PG_DEGRADED\)
|
||||||
|
- Degraded data redundancy
|
||||||
|
- \(PG_
|
||||||
|
- acting
|
||||||
|
- MDS_INSUFFICIENT_STANDBY
|
||||||
|
- deprecated feature inline_data
|
||||||
|
- compat changed unexpectedly
|
||||||
roles:
|
roles:
|
||||||
# 3 osd roles on host.a is required for cephadm task. It checks if the cluster is healthy.
|
# 3 osd roles on host.a is required for cephadm task. It checks if the cluster is healthy.
|
||||||
# More daemons will be deployed on both hosts in e2e tests.
|
# More daemons will be deployed on both hosts in e2e tests.
|
||||||
|
@ -24,6 +24,21 @@ openstack:
|
|||||||
size: 10 # GB
|
size: 10 # GB
|
||||||
overrides:
|
overrides:
|
||||||
ceph:
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- slow requests
|
||||||
|
- \(PG_
|
||||||
|
- PG_
|
||||||
|
- \(CEPHADM_STRAY_DAEMON\)
|
||||||
|
- slow request
|
||||||
|
- \(MDS_
|
||||||
|
- MDS_
|
||||||
|
- osds down
|
||||||
|
- OSD_
|
||||||
|
- \(OSD_
|
||||||
|
- client
|
||||||
|
- FS_
|
||||||
|
- \(FS_
|
||||||
|
- degraded
|
||||||
conf:
|
conf:
|
||||||
osd:
|
osd:
|
||||||
osd shutdown pgref assert: true
|
osd shutdown pgref assert: true
|
||||||
|
@ -1,3 +1,10 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- \(OSD_DOWN\)
|
||||||
|
- \(PG_
|
||||||
|
- but it is still running
|
||||||
tasks:
|
tasks:
|
||||||
- cephadm.shell:
|
- cephadm.shell:
|
||||||
host.a:
|
host.a:
|
||||||
|
@ -1,3 +1,10 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- \(OSD_DOWN\)
|
||||||
|
- \(PG_
|
||||||
|
- but it is still running
|
||||||
tasks:
|
tasks:
|
||||||
- cephadm.shell:
|
- cephadm.shell:
|
||||||
host.a:
|
host.a:
|
||||||
|
@ -1,3 +1,10 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- \(OSD_DOWN\)
|
||||||
|
- \(PG_
|
||||||
|
- but it is still running
|
||||||
tasks:
|
tasks:
|
||||||
- cephadm.shell:
|
- cephadm.shell:
|
||||||
host.a:
|
host.a:
|
||||||
|
@ -1,3 +1,10 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- \(OSD_DOWN\)
|
||||||
|
- \(PG_
|
||||||
|
- but it is still running
|
||||||
tasks:
|
tasks:
|
||||||
- cephadm.shell:
|
- cephadm.shell:
|
||||||
host.a:
|
host.a:
|
||||||
|
@ -1,3 +1,11 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- \(OSD_DOWN\)
|
||||||
|
- \(PG_
|
||||||
|
- but it is still running
|
||||||
|
- \(CEPHADM_STRAY_DAEMON\)
|
||||||
tasks:
|
tasks:
|
||||||
- cephadm.shell:
|
- cephadm.shell:
|
||||||
host.a:
|
host.a:
|
||||||
|
@ -1,3 +1,11 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- \(PG_AVAILABILITY\)
|
||||||
|
- mon down
|
||||||
|
- mons down
|
||||||
|
- out of quorum
|
||||||
tasks:
|
tasks:
|
||||||
- cephadm:
|
- cephadm:
|
||||||
conf:
|
conf:
|
||||||
|
@ -3,6 +3,23 @@ overrides:
|
|||||||
log-ignorelist:
|
log-ignorelist:
|
||||||
- but it is still running
|
- but it is still running
|
||||||
- objects unfound and apparently lost
|
- objects unfound and apparently lost
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- \(OSDMAP_FLAGS\)
|
||||||
|
- flag\(s\) set
|
||||||
|
- \(CACHE_POOL_NO_HIT_SET\)
|
||||||
|
- \(CACHE_
|
||||||
|
- \(PG_
|
||||||
|
- \(OSD_
|
||||||
|
- \(POOL_
|
||||||
|
- \(CEPHADM_STRAY_DAEMON\)
|
||||||
|
- PG_
|
||||||
|
- CACHE_
|
||||||
|
- degraded
|
||||||
|
- backfill
|
||||||
|
- mons down
|
||||||
|
- OSD_
|
||||||
|
- is down
|
||||||
|
- acting
|
||||||
conf:
|
conf:
|
||||||
osd:
|
osd:
|
||||||
osd debug reject backfill probability: .3
|
osd debug reject backfill probability: .3
|
||||||
|
@ -1,3 +1,14 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- \(PG_
|
||||||
|
- mons down
|
||||||
|
- pg inactive
|
||||||
|
- out of quorum
|
||||||
|
- \(OSD_
|
||||||
|
- osds down
|
||||||
|
- osd down
|
||||||
tasks:
|
tasks:
|
||||||
- cephadm.shell:
|
- cephadm.shell:
|
||||||
env: [sha1]
|
env: [sha1]
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- Replacing daemon mds
|
||||||
|
- FS_DEGRADED
|
||||||
|
- \(CEPHADM_STRAY_DAEMON\)
|
||||||
roles:
|
roles:
|
||||||
- - host.a
|
- - host.a
|
||||||
- osd.0
|
- osd.0
|
||||||
|
@ -1,3 +1,10 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- \(OSD_DOWN\)
|
||||||
|
- \(CEPHADM_PAUSED\)
|
||||||
|
- mons down
|
||||||
roles:
|
roles:
|
||||||
- - host.a
|
- - host.a
|
||||||
- osd.0
|
- osd.0
|
||||||
|
@ -1,3 +1,10 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
log-ignorelist:
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- mons down
|
||||||
|
- \(MGR_DOWN\)
|
||||||
|
- out of quorum
|
||||||
roles:
|
roles:
|
||||||
- - host.a
|
- - host.a
|
||||||
- osd.0
|
- osd.0
|
||||||
|
@ -11,6 +11,15 @@ overrides:
|
|||||||
- \(POOL_APP_NOT_ENABLED\)
|
- \(POOL_APP_NOT_ENABLED\)
|
||||||
- \(PG_AVAILABILITY\)
|
- \(PG_AVAILABILITY\)
|
||||||
- \(PG_DEGRADED\)
|
- \(PG_DEGRADED\)
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- \(CEPHADM_STRAY_DAEMON\)
|
||||||
|
- missing hit_sets
|
||||||
|
- do not have an application enabled
|
||||||
|
- application not enabled on pool
|
||||||
|
- pool application
|
||||||
|
- mons down
|
||||||
|
- out of quorum
|
||||||
|
- needs hit_set_type to be set but it is not
|
||||||
conf:
|
conf:
|
||||||
client:
|
client:
|
||||||
debug ms: 1
|
debug ms: 1
|
||||||
|
@ -2,6 +2,7 @@ overrides:
|
|||||||
ceph:
|
ceph:
|
||||||
log-ignorelist:
|
log-ignorelist:
|
||||||
- \(PG_AVAILABILITY\)
|
- \(PG_AVAILABILITY\)
|
||||||
|
- \(POOL_APP_NOT_ENABLED\)
|
||||||
conf:
|
conf:
|
||||||
osd:
|
osd:
|
||||||
osd_class_load_list: "*"
|
osd_class_load_list: "*"
|
||||||
|
@ -8,6 +8,13 @@ overrides:
|
|||||||
- \(OSD_
|
- \(OSD_
|
||||||
- \(OBJECT_
|
- \(OBJECT_
|
||||||
- \(POOL_APP_NOT_ENABLED\)
|
- \(POOL_APP_NOT_ENABLED\)
|
||||||
|
- \(MON_DOWN\)
|
||||||
|
- mons down
|
||||||
|
- application not enabled on pool
|
||||||
|
- do not have an application enabled
|
||||||
|
- pool application
|
||||||
|
- out of quorum
|
||||||
|
- needs hit_set_type to be set but it is not
|
||||||
tasks:
|
tasks:
|
||||||
- workunit:
|
- workunit:
|
||||||
clients:
|
clients:
|
||||||
|
@ -0,0 +1,43 @@
|
|||||||
|
tasks:
|
||||||
|
- install:
|
||||||
|
- ceph:
|
||||||
|
wait-for-scrub: false
|
||||||
|
- check-counter:
|
||||||
|
counters:
|
||||||
|
mgr:
|
||||||
|
- name: "finisher-balancer.complete_latency.avgcount"
|
||||||
|
min: 1
|
||||||
|
- name: "finisher-balancer.queue_len"
|
||||||
|
expected_val: 0
|
||||||
|
- name: "finisher-crash.complete_latency.avgcount"
|
||||||
|
min: 2
|
||||||
|
- name: "finisher-crash.queue_len"
|
||||||
|
expected_val: 0
|
||||||
|
- name: "finisher-devicehealth.complete_latency.avgcount"
|
||||||
|
min: 1
|
||||||
|
- name: "finisher-devicehealth.queue_len"
|
||||||
|
expected_val: 0
|
||||||
|
- name: "finisher-iostat.complete_latency.avgcount"
|
||||||
|
min: 1
|
||||||
|
- name: "finisher-iostat.queue_len"
|
||||||
|
expected_val: 0
|
||||||
|
- name: "finisher-pg_autoscaler.complete_latency.avgcount"
|
||||||
|
min: 1
|
||||||
|
- name: "finisher-pg_autoscaler.queue_len"
|
||||||
|
expected_val: 0
|
||||||
|
- name: "finisher-progress.complete_latency.avgcount"
|
||||||
|
min: 2
|
||||||
|
- name: "finisher-progress.queue_len"
|
||||||
|
expected_val: 0
|
||||||
|
- name: "finisher-status.complete_latency.avgcount"
|
||||||
|
min: 2
|
||||||
|
- name: "finisher-status.queue_len"
|
||||||
|
expected_val: 0
|
||||||
|
- name: "finisher-telemetry.complete_latency.avgcount"
|
||||||
|
min: 1
|
||||||
|
- name: "finisher-telemetry.queue_len"
|
||||||
|
expected_val: 0
|
||||||
|
- workunit:
|
||||||
|
clients:
|
||||||
|
client.0:
|
||||||
|
- mgr/test_per_module_finisher.sh
|
@ -13,4 +13,4 @@ tasks:
|
|||||||
- workunit:
|
- workunit:
|
||||||
clients:
|
clients:
|
||||||
client.0:
|
client.0:
|
||||||
- mgr
|
- mgr/test_localpool.sh
|
||||||
|
18
ceph/qa/suites/rados/standalone/workloads/mon-stretch.yaml
Normal file
18
ceph/qa/suites/rados/standalone/workloads/mon-stretch.yaml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
roles:
|
||||||
|
- - mon.a
|
||||||
|
- mgr.x
|
||||||
|
- osd.0
|
||||||
|
- osd.1
|
||||||
|
- osd.2
|
||||||
|
- client.0
|
||||||
|
openstack:
|
||||||
|
- volumes: # attached to each instance
|
||||||
|
count: 3
|
||||||
|
size: 10 # GB
|
||||||
|
tasks:
|
||||||
|
- install:
|
||||||
|
- workunit:
|
||||||
|
basedir: qa/standalone
|
||||||
|
clients:
|
||||||
|
all:
|
||||||
|
- mon-stretch
|
@ -4,6 +4,8 @@ overrides:
|
|||||||
osd:
|
osd:
|
||||||
osd_class_load_list: "*"
|
osd_class_load_list: "*"
|
||||||
osd_class_default_list: "*"
|
osd_class_default_list: "*"
|
||||||
|
log-ignorelist:
|
||||||
|
- \(POOL_APP_NOT_ENABLED\)
|
||||||
tasks:
|
tasks:
|
||||||
- workunit:
|
- workunit:
|
||||||
clients:
|
clients:
|
||||||
|
@ -0,0 +1,13 @@
|
|||||||
|
overrides:
|
||||||
|
ceph:
|
||||||
|
conf:
|
||||||
|
mgr:
|
||||||
|
debug rbd: 20
|
||||||
|
tasks:
|
||||||
|
- install:
|
||||||
|
extra_system_packages:
|
||||||
|
- fio
|
||||||
|
- workunit:
|
||||||
|
clients:
|
||||||
|
client.0:
|
||||||
|
- rbd/rbd_support_module_recovery.sh
|
5
ceph/qa/suites/rgw/verify/tasks/bucket-check.yaml
Normal file
5
ceph/qa/suites/rgw/verify/tasks/bucket-check.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
tasks:
|
||||||
|
- workunit:
|
||||||
|
clients:
|
||||||
|
client.0:
|
||||||
|
- rgw/run-bucket-check.sh
|
@ -6,7 +6,7 @@ workload:
|
|||||||
- sequential:
|
- sequential:
|
||||||
- ragweed:
|
- ragweed:
|
||||||
client.1:
|
client.1:
|
||||||
default-branch: ceph-pacific
|
default-branch: ceph-nautilus
|
||||||
rgw_server: client.1
|
rgw_server: client.1
|
||||||
stages: prepare
|
stages: prepare
|
||||||
- print: "**** done rgw ragweed prepare 2-workload"
|
- print: "**** done rgw ragweed prepare 2-workload"
|
||||||
|
@ -5,7 +5,7 @@ rgw-final-workload:
|
|||||||
full_sequential:
|
full_sequential:
|
||||||
- ragweed:
|
- ragweed:
|
||||||
client.1:
|
client.1:
|
||||||
default-branch: ceph-pacific
|
default-branch: ceph-nautilus
|
||||||
rgw_server: client.1
|
rgw_server: client.1
|
||||||
stages: check
|
stages: check
|
||||||
- print: "**** done ragweed check 4-final-workload"
|
- print: "**** done ragweed check 4-final-workload"
|
||||||
|
@ -5,7 +5,7 @@ rgw-final-workload:
|
|||||||
full_sequential:
|
full_sequential:
|
||||||
- ragweed:
|
- ragweed:
|
||||||
client.1:
|
client.1:
|
||||||
default-branch: ceph-pacific
|
default-branch: ceph-octopus
|
||||||
rgw_server: client.1
|
rgw_server: client.1
|
||||||
stages: check
|
stages: check
|
||||||
- print: "**** done ragweed check 4-final-workload"
|
- print: "**** done ragweed check 4-final-workload"
|
||||||
|
@ -123,7 +123,7 @@ workload_pacific:
|
|||||||
- rados/test.sh
|
- rados/test.sh
|
||||||
- cls
|
- cls
|
||||||
env:
|
env:
|
||||||
CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.snapshots_namespaces'
|
CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.mirror_snapshot'
|
||||||
- print: "**** done rados/test.sh & cls workload_pacific"
|
- print: "**** done rados/test.sh & cls workload_pacific"
|
||||||
- sequential:
|
- sequential:
|
||||||
- rgw: [client.0]
|
- rgw: [client.0]
|
||||||
|
@ -7,4 +7,6 @@ stress-tasks:
|
|||||||
clients:
|
clients:
|
||||||
client.0:
|
client.0:
|
||||||
- cls/test_cls_rbd.sh
|
- cls/test_cls_rbd.sh
|
||||||
|
env:
|
||||||
|
CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.mirror_snapshot'
|
||||||
- print: "**** done cls/test_cls_rbd.sh 4-workload"
|
- print: "**** done cls/test_cls_rbd.sh 4-workload"
|
||||||
|
@ -3,7 +3,7 @@ meta:
|
|||||||
librbd python api tests
|
librbd python api tests
|
||||||
tasks:
|
tasks:
|
||||||
- workunit:
|
- workunit:
|
||||||
tag: v16.2.7
|
branch: pacific
|
||||||
clients:
|
clients:
|
||||||
client.0:
|
client.0:
|
||||||
- rbd/test_librbd_python.sh
|
- rbd/test_librbd_python.sh
|
||||||
|
@ -232,6 +232,7 @@ class OSDThrasher(Thrasher):
|
|||||||
self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0)
|
self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0)
|
||||||
self.random_eio = self.config.get('random_eio')
|
self.random_eio = self.config.get('random_eio')
|
||||||
self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3)
|
self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3)
|
||||||
|
self.chance_reset_purged_snaps_last = self.config.get('chance_reset_purged_snaps_last', 0.3)
|
||||||
|
|
||||||
num_osds = self.in_osds + self.out_osds
|
num_osds = self.in_osds + self.out_osds
|
||||||
self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds)
|
self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds)
|
||||||
@ -798,6 +799,19 @@ class OSDThrasher(Thrasher):
|
|||||||
else:
|
else:
|
||||||
self.cancel_force_recovery()
|
self.cancel_force_recovery()
|
||||||
|
|
||||||
|
def reset_purged_snaps_last(self):
|
||||||
|
"""
|
||||||
|
Run reset_purged_snaps_last
|
||||||
|
"""
|
||||||
|
self.log('reset_purged_snaps_last')
|
||||||
|
for osd in self.in_osds:
|
||||||
|
try:
|
||||||
|
self.ceph_manager.raw_cluster_cmd(
|
||||||
|
'tell', "osd.%s" % (str(osd)),
|
||||||
|
'reset_purged_snaps_last')
|
||||||
|
except CommandFailedError:
|
||||||
|
self.log('Failed to reset_purged_snaps_last, ignoring')
|
||||||
|
|
||||||
def all_up(self):
|
def all_up(self):
|
||||||
"""
|
"""
|
||||||
Make sure all osds are up and not out.
|
Make sure all osds are up and not out.
|
||||||
@ -1248,6 +1262,8 @@ class OSDThrasher(Thrasher):
|
|||||||
actions.append((self.thrash_pg_upmap_items, self.chance_thrash_pg_upmap_items,))
|
actions.append((self.thrash_pg_upmap_items, self.chance_thrash_pg_upmap_items,))
|
||||||
if self.chance_force_recovery > 0:
|
if self.chance_force_recovery > 0:
|
||||||
actions.append((self.force_cancel_recovery, self.chance_force_recovery))
|
actions.append((self.force_cancel_recovery, self.chance_force_recovery))
|
||||||
|
if self.chance_reset_purged_snaps_last > 0:
|
||||||
|
actions.append((self.reset_purged_snaps_last, self.chance_reset_purged_snaps_last))
|
||||||
|
|
||||||
for key in ['heartbeat_inject_failure', 'filestore_inject_stall']:
|
for key in ['heartbeat_inject_failure', 'filestore_inject_stall']:
|
||||||
for scenario in [
|
for scenario in [
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
# make logging friendly to teuthology
|
# make logging friendly to teuthology
|
||||||
log_to_file = true
|
log_to_file = true
|
||||||
log_to_stderr = false
|
log_to_stderr = false
|
||||||
|
log to journald = false
|
||||||
|
mon cluster log to file = true
|
||||||
mon cluster log file level = debug
|
mon cluster log file level = debug
|
||||||
|
|
||||||
mon clock drift allowed = 1.000
|
mon clock drift allowed = 1.000
|
||||||
|
@ -811,7 +811,7 @@ class CephFSMount(object):
|
|||||||
))
|
))
|
||||||
p.wait()
|
p.wait()
|
||||||
|
|
||||||
def open_background(self, basename="background_file", write=True):
|
def open_background(self, basename="background_file", write=True, content="content"):
|
||||||
"""
|
"""
|
||||||
Open a file for writing, then block such that the client
|
Open a file for writing, then block such that the client
|
||||||
will hold a capability.
|
will hold a capability.
|
||||||
@ -828,12 +828,11 @@ class CephFSMount(object):
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
with open("{path}", 'w') as f:
|
with open("{path}", 'w') as f:
|
||||||
f.write('content')
|
f.write("{content}")
|
||||||
f.flush()
|
f.flush()
|
||||||
f.write('content2')
|
|
||||||
while True:
|
while True:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
""").format(path=path)
|
""").format(path=path, content=content)
|
||||||
else:
|
else:
|
||||||
pyscript = dedent("""
|
pyscript = dedent("""
|
||||||
import time
|
import time
|
||||||
@ -849,6 +848,9 @@ class CephFSMount(object):
|
|||||||
# This wait would not be sufficient if the file had already
|
# This wait would not be sufficient if the file had already
|
||||||
# existed, but it's simple and in practice users of open_background
|
# existed, but it's simple and in practice users of open_background
|
||||||
# are not using it on existing files.
|
# are not using it on existing files.
|
||||||
|
if write:
|
||||||
|
self.wait_for_visible(basename, size=len(content))
|
||||||
|
else:
|
||||||
self.wait_for_visible(basename)
|
self.wait_for_visible(basename)
|
||||||
|
|
||||||
return rproc
|
return rproc
|
||||||
@ -887,17 +889,25 @@ class CephFSMount(object):
|
|||||||
if nr_links == 2:
|
if nr_links == 2:
|
||||||
return
|
return
|
||||||
|
|
||||||
def wait_for_visible(self, basename="background_file", timeout=30):
|
def wait_for_visible(self, basename="background_file", size=None, timeout=30):
|
||||||
i = 0
|
i = 0
|
||||||
|
args = ['stat']
|
||||||
|
if size is not None:
|
||||||
|
args += ['--printf=%s']
|
||||||
|
args += [os.path.join(self.hostfs_mntpt, basename)]
|
||||||
while i < timeout:
|
while i < timeout:
|
||||||
r = self.client_remote.run(args=[
|
p = self.client_remote.run(args=args, stdout=StringIO(), check_status=False)
|
||||||
'stat', os.path.join(self.hostfs_mntpt, basename)
|
if p.exitstatus == 0:
|
||||||
], check_status=False)
|
if size is not None:
|
||||||
if r.exitstatus == 0:
|
s = p.stdout.getvalue().strip()
|
||||||
log.debug("File {0} became visible from {1} after {2}s".format(
|
if int(s) == size:
|
||||||
basename, self.client_id, i))
|
log.info(f"File {basename} became visible with size {size} from {self.client_id} after {i}s")
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
|
log.error(f"File {basename} became visible but with size {int(s)} not {size}")
|
||||||
|
else:
|
||||||
|
log.info(f"File {basename} became visible from {self.client_id} after {i}s")
|
||||||
|
return
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
"""
|
"""
|
||||||
Before running this testsuite, add path to cephfs-shell module to $PATH and
|
NOTE: For running this tests locally (using vstart_runner.py), export the
|
||||||
export $PATH.
|
path to src/tools/cephfs/shell/cephfs-shell module to $PATH. Running
|
||||||
|
"export PATH=$PATH:$(cd ../src/tools/cephfs/shell && pwd)" from the build dir
|
||||||
|
will update the environment without hassles of typing the path correctly.
|
||||||
"""
|
"""
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from os import path
|
from os import path
|
||||||
|
@ -9,7 +9,9 @@ from textwrap import dedent
|
|||||||
from tasks.ceph_test_case import TestTimeoutError
|
from tasks.ceph_test_case import TestTimeoutError
|
||||||
from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming
|
from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming
|
||||||
from tasks.cephfs.fuse_mount import FuseMount
|
from tasks.cephfs.fuse_mount import FuseMount
|
||||||
|
from teuthology.exceptions import CommandFailedError
|
||||||
import os
|
import os
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@ -157,29 +159,49 @@ class TestClientLimits(CephFSTestCase):
|
|||||||
a fraction of second (0.5) by default when throttling condition is met.
|
a fraction of second (0.5) by default when throttling condition is met.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
max_caps_per_client = 500
|
subdir_count = 4
|
||||||
cap_acquisition_throttle = 250
|
files_per_dir = 25
|
||||||
|
|
||||||
self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client)
|
# throttle in a way so that two dir reads are already hitting it.
|
||||||
self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle)
|
throttle_value = (files_per_dir * 3) // 2
|
||||||
|
|
||||||
# Create 1500 files split across 6 directories, 250 each.
|
# activate throttling logic by setting max per client to a low value
|
||||||
for i in range(1, 7):
|
self.config_set('mds', 'mds_max_caps_per_client', 1)
|
||||||
self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True)
|
self.config_set('mds', 'mds_session_cap_acquisition_throttle', throttle_value)
|
||||||
|
|
||||||
|
# Create files split across {subdir_count} directories, {per_dir_count} in each dir
|
||||||
|
for i in range(1, subdir_count+1):
|
||||||
|
self.mount_a.create_n_files("dir{0}/file".format(i), files_per_dir, sync=True)
|
||||||
|
|
||||||
mount_a_client_id = self.mount_a.get_global_id()
|
mount_a_client_id = self.mount_a.get_global_id()
|
||||||
|
|
||||||
# recursive readdir
|
# recursive readdir. macOs wants an explicit directory for `find`.
|
||||||
self.mount_a.run_shell_payload("find | wc")
|
proc = self.mount_a.run_shell_payload("find . | wc", stderr=StringIO())
|
||||||
|
# return code may be None if the command got interrupted
|
||||||
# validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250
|
self.assertTrue(proc.returncode is None or proc.returncode == 0, proc.stderr.getvalue())
|
||||||
cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
|
|
||||||
self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle)
|
|
||||||
|
|
||||||
# validate the throttle condition to be hit atleast once
|
# validate the throttle condition to be hit atleast once
|
||||||
cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle']
|
cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle']
|
||||||
self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1)
|
self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1)
|
||||||
|
|
||||||
|
# validate cap_acquisition decay counter after readdir to NOT exceed the throttle value
|
||||||
|
# plus one batch that could have been taken immediately before querying
|
||||||
|
# assuming the batch is equal to the per dir file count.
|
||||||
|
cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
|
||||||
|
self.assertLessEqual(cap_acquisition_value, files_per_dir + throttle_value)
|
||||||
|
|
||||||
|
# make sure that the throttle was reported in the events
|
||||||
|
def historic_ops_have_event(expected_event):
|
||||||
|
ops_dump = self.fs.rank_tell(['dump_historic_ops'])
|
||||||
|
# reverse the events and the ops assuming that later ops would be throttled
|
||||||
|
for op in reversed(ops_dump['ops']):
|
||||||
|
for ev in reversed(op.get('type_data', {}).get('events', [])):
|
||||||
|
if ev['event'] == expected_event:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.assertTrue(historic_ops_have_event('cap_acquisition_throttle'))
|
||||||
|
|
||||||
def test_client_release_bug(self):
|
def test_client_release_bug(self):
|
||||||
"""
|
"""
|
||||||
When a client has a bug (which we will simulate) preventing it from releasing caps,
|
When a client has a bug (which we will simulate) preventing it from releasing caps,
|
||||||
@ -219,6 +241,55 @@ class TestClientLimits(CephFSTestCase):
|
|||||||
self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
|
self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
|
||||||
rproc.wait()
|
rproc.wait()
|
||||||
|
|
||||||
|
def test_client_blocklisted_oldest_tid(self):
|
||||||
|
"""
|
||||||
|
that a client is blocklisted when its encoded session metadata exceeds the
|
||||||
|
configured threshold (due to ever growing `completed_requests` caused due
|
||||||
|
to an unidentified bug (in the client or the MDS)).
|
||||||
|
"""
|
||||||
|
|
||||||
|
# num of requests client issues
|
||||||
|
max_requests = 10000
|
||||||
|
|
||||||
|
# The debug hook to inject the failure only exists in the fuse client
|
||||||
|
if not isinstance(self.mount_a, FuseMount):
|
||||||
|
self.skipTest("Require FUSE client to inject client release failure")
|
||||||
|
|
||||||
|
self.config_set('client', 'client inject fixed oldest tid', 'true')
|
||||||
|
self.mount_a.teardown()
|
||||||
|
self.mount_a.mount_wait()
|
||||||
|
|
||||||
|
self.config_set('mds', 'mds_max_completed_requests', max_requests);
|
||||||
|
|
||||||
|
# Create lots of files
|
||||||
|
self.mount_a.create_n_files("testdir/file1", max_requests + 100)
|
||||||
|
|
||||||
|
# Create a few files synchronously. This makes sure previous requests are completed
|
||||||
|
self.mount_a.create_n_files("testdir/file2", 5, True)
|
||||||
|
|
||||||
|
# Wait for the health warnings. Assume mds can handle 10 request per second at least
|
||||||
|
self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10, check_in_detail=str(self.mount_a.client_id))
|
||||||
|
|
||||||
|
# set the threshold low so that it has a high probability of
|
||||||
|
# hitting.
|
||||||
|
self.config_set('mds', 'mds_session_metadata_threshold', 5000);
|
||||||
|
|
||||||
|
# Create lot many files synchronously. This would hit the session metadata threshold
|
||||||
|
# causing the client to get blocklisted.
|
||||||
|
with self.assertRaises(CommandFailedError):
|
||||||
|
self.mount_a.create_n_files("testdir/file2", 100000, True)
|
||||||
|
|
||||||
|
self.mds_cluster.is_addr_blocklisted(self.mount_a.get_global_addr())
|
||||||
|
# the mds should bump up the relevant perf counter
|
||||||
|
pd = self.perf_dump()
|
||||||
|
self.assertGreater(pd['mds_sessions']['mdthresh_evicted'], 0)
|
||||||
|
|
||||||
|
# reset the config
|
||||||
|
self.config_set('client', 'client inject fixed oldest tid', 'false')
|
||||||
|
|
||||||
|
self.mount_a.kill_cleanup()
|
||||||
|
self.mount_a.mount_wait()
|
||||||
|
|
||||||
def test_client_oldest_tid(self):
|
def test_client_oldest_tid(self):
|
||||||
"""
|
"""
|
||||||
When a client does not advance its oldest tid, the MDS should notice that
|
When a client does not advance its oldest tid, the MDS should notice that
|
||||||
|
@ -10,8 +10,10 @@ from textwrap import dedent
|
|||||||
import time
|
import time
|
||||||
import distutils.version as version
|
import distutils.version as version
|
||||||
import re
|
import re
|
||||||
|
import string
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from teuthology import contextutil
|
||||||
from teuthology.orchestra import run
|
from teuthology.orchestra import run
|
||||||
from teuthology.orchestra.run import CommandFailedError
|
from teuthology.orchestra.run import CommandFailedError
|
||||||
from tasks.cephfs.fuse_mount import FuseMount
|
from tasks.cephfs.fuse_mount import FuseMount
|
||||||
@ -221,8 +223,10 @@ class TestClientRecovery(CephFSTestCase):
|
|||||||
# Capability release from stale session
|
# Capability release from stale session
|
||||||
# =====================================
|
# =====================================
|
||||||
if write:
|
if write:
|
||||||
cap_holder = self.mount_a.open_background()
|
content = ''.join(random.choices(string.ascii_uppercase + string.digits, k=16))
|
||||||
|
cap_holder = self.mount_a.open_background(content=content)
|
||||||
else:
|
else:
|
||||||
|
content = ''
|
||||||
self.mount_a.run_shell(["touch", "background_file"])
|
self.mount_a.run_shell(["touch", "background_file"])
|
||||||
self.mount_a.umount_wait()
|
self.mount_a.umount_wait()
|
||||||
self.mount_a.mount_wait()
|
self.mount_a.mount_wait()
|
||||||
@ -233,7 +237,7 @@ class TestClientRecovery(CephFSTestCase):
|
|||||||
|
|
||||||
# Wait for the file to be visible from another client, indicating
|
# Wait for the file to be visible from another client, indicating
|
||||||
# that mount_a has completed its network ops
|
# that mount_a has completed its network ops
|
||||||
self.mount_b.wait_for_visible()
|
self.mount_b.wait_for_visible(size=len(content))
|
||||||
|
|
||||||
# Simulate client death
|
# Simulate client death
|
||||||
self.mount_a.suspend_netns()
|
self.mount_a.suspend_netns()
|
||||||
@ -264,11 +268,9 @@ class TestClientRecovery(CephFSTestCase):
|
|||||||
"Capability handover took {0}, expected approx {1}".format(
|
"Capability handover took {0}, expected approx {1}".format(
|
||||||
cap_waited, session_timeout
|
cap_waited, session_timeout
|
||||||
))
|
))
|
||||||
|
|
||||||
self.mount_a._kill_background(cap_holder)
|
|
||||||
finally:
|
finally:
|
||||||
# teardown() doesn't quite handle this case cleanly, so help it out
|
self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable
|
||||||
self.mount_a.resume_netns()
|
self.mount_a._kill_background(cap_holder)
|
||||||
|
|
||||||
def test_stale_read_caps(self):
|
def test_stale_read_caps(self):
|
||||||
self._test_stale_caps(False)
|
self._test_stale_caps(False)
|
||||||
@ -319,9 +321,9 @@ class TestClientRecovery(CephFSTestCase):
|
|||||||
cap_waited, session_timeout / 2.0
|
cap_waited, session_timeout / 2.0
|
||||||
))
|
))
|
||||||
|
|
||||||
self.mount_a._kill_background(cap_holder)
|
|
||||||
finally:
|
finally:
|
||||||
self.mount_a.resume_netns()
|
self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable
|
||||||
|
self.mount_a._kill_background(cap_holder)
|
||||||
|
|
||||||
def test_trim_caps(self):
|
def test_trim_caps(self):
|
||||||
# Trim capability when reconnecting MDS
|
# Trim capability when reconnecting MDS
|
||||||
@ -387,7 +389,6 @@ class TestClientRecovery(CephFSTestCase):
|
|||||||
|
|
||||||
self.mount_b.check_filelock(do_flock=flockable)
|
self.mount_b.check_filelock(do_flock=flockable)
|
||||||
|
|
||||||
# Tear down the background process
|
|
||||||
self.mount_a._kill_background(lock_holder)
|
self.mount_a._kill_background(lock_holder)
|
||||||
|
|
||||||
def test_filelock_eviction(self):
|
def test_filelock_eviction(self):
|
||||||
@ -416,7 +417,6 @@ class TestClientRecovery(CephFSTestCase):
|
|||||||
# succeed
|
# succeed
|
||||||
self.wait_until_true(lambda: lock_taker.finished, timeout=10)
|
self.wait_until_true(lambda: lock_taker.finished, timeout=10)
|
||||||
finally:
|
finally:
|
||||||
# Tear down the background process
|
|
||||||
self.mount_a._kill_background(lock_holder)
|
self.mount_a._kill_background(lock_holder)
|
||||||
|
|
||||||
# teardown() doesn't quite handle this case cleanly, so help it out
|
# teardown() doesn't quite handle this case cleanly, so help it out
|
||||||
@ -751,24 +751,27 @@ class TestClientOnLaggyOSD(CephFSTestCase):
|
|||||||
# it takes time to have laggy clients entries in cluster log,
|
# it takes time to have laggy clients entries in cluster log,
|
||||||
# wait for 6 minutes to see if it is visible, finally restart
|
# wait for 6 minutes to see if it is visible, finally restart
|
||||||
# the client
|
# the client
|
||||||
tries = 6
|
with contextutil.safe_while(sleep=5, tries=6) as proceed:
|
||||||
while True:
|
while proceed():
|
||||||
try:
|
try:
|
||||||
with self.assert_cluster_log("1 client(s) laggy due to laggy OSDs",
|
with self.assert_cluster_log("1 client(s) laggy due to"
|
||||||
|
" laggy OSDs",
|
||||||
timeout=55):
|
timeout=55):
|
||||||
# make sure clients weren't evicted
|
# make sure clients weren't evicted
|
||||||
self.assert_session_count(2)
|
self.assert_session_count(2)
|
||||||
break
|
break
|
||||||
except AssertionError:
|
except (AssertionError, CommandFailedError) as e:
|
||||||
tries -= 1
|
log.debug(f'{e}, retrying')
|
||||||
if tries:
|
|
||||||
continue
|
# clear lagginess, expect to get the warning cleared and make sure
|
||||||
raise
|
# client gets evicted
|
||||||
|
self.clear_laggy_params(osd)
|
||||||
|
self.wait_for_health_clear(60)
|
||||||
|
self.assert_session_count(1)
|
||||||
finally:
|
finally:
|
||||||
self.mount_a.kill_cleanup()
|
self.mount_a.kill_cleanup()
|
||||||
self.mount_a.mount_wait()
|
self.mount_a.mount_wait()
|
||||||
self.mount_a.create_destroy()
|
self.mount_a.create_destroy()
|
||||||
self.clear_laggy_params(osd)
|
|
||||||
|
|
||||||
def test_client_eviction_if_config_is_unset(self):
|
def test_client_eviction_if_config_is_unset(self):
|
||||||
"""
|
"""
|
||||||
@ -800,6 +803,11 @@ class TestClientOnLaggyOSD(CephFSTestCase):
|
|||||||
|
|
||||||
time.sleep(session_timeout)
|
time.sleep(session_timeout)
|
||||||
self.assert_session_count(1)
|
self.assert_session_count(1)
|
||||||
|
|
||||||
|
# make sure warning wasn't seen in cluster log
|
||||||
|
with self.assert_cluster_log("laggy due to laggy OSDs",
|
||||||
|
timeout=120, present=False):
|
||||||
|
pass
|
||||||
finally:
|
finally:
|
||||||
self.mount_a.kill_cleanup()
|
self.mount_a.kill_cleanup()
|
||||||
self.mount_a.mount_wait()
|
self.mount_a.mount_wait()
|
||||||
|
@ -608,6 +608,7 @@ class TestDamage(CephFSTestCase):
|
|||||||
self.fs.flush()
|
self.fs.flush()
|
||||||
self.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0")
|
self.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0")
|
||||||
time.sleep(5) # for conf to percolate
|
time.sleep(5) # for conf to percolate
|
||||||
|
with self.assert_cluster_log("MDS abort because newly corrupt dentry"):
|
||||||
p = self.mount_a.run_shell_payload("timeout 60 mv a/b a/z", wait=False)
|
p = self.mount_a.run_shell_payload("timeout 60 mv a/b a/z", wait=False)
|
||||||
self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout)
|
self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout)
|
||||||
self.config_rm("mds", "mds_inject_rename_corrupt_dentry_first")
|
self.config_rm("mds", "mds_inject_rename_corrupt_dentry_first")
|
||||||
@ -642,6 +643,7 @@ class TestDamage(CephFSTestCase):
|
|||||||
rank0 = self.fs.get_rank()
|
rank0 = self.fs.get_rank()
|
||||||
self.fs.rank_freeze(True, rank=0)
|
self.fs.rank_freeze(True, rank=0)
|
||||||
# so now we want to trigger commit but this will crash, so:
|
# so now we want to trigger commit but this will crash, so:
|
||||||
|
with self.assert_cluster_log("MDS abort because newly corrupt dentry"):
|
||||||
c = ['--connect-timeout=60', 'tell', f"mds.{fscid}:0", "flush", "journal"]
|
c = ['--connect-timeout=60', 'tell', f"mds.{fscid}:0", "flush", "journal"]
|
||||||
p = self.ceph_cluster.mon_manager.run_cluster_cmd(args=c, wait=False, timeoutcmd=30)
|
p = self.ceph_cluster.mon_manager.run_cluster_cmd(args=c, wait=False, timeoutcmd=30)
|
||||||
self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout)
|
self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout)
|
||||||
|
@ -14,9 +14,12 @@ class TestClusterAffinity(CephFSTestCase):
|
|||||||
CLIENTS_REQUIRED = 0
|
CLIENTS_REQUIRED = 0
|
||||||
MDSS_REQUIRED = 4
|
MDSS_REQUIRED = 4
|
||||||
|
|
||||||
def _verify_join_fs(self, target, status=None):
|
def _verify_join_fs(self, target, status=None, fs=None):
|
||||||
|
fs_select = fs
|
||||||
|
if fs_select is None:
|
||||||
|
fs_select = self.fs
|
||||||
if status is None:
|
if status is None:
|
||||||
status = self.fs.wait_for_daemons(timeout=30)
|
status = fs_select.wait_for_daemons(timeout=30)
|
||||||
log.debug("%s", status)
|
log.debug("%s", status)
|
||||||
target = sorted(target, key=operator.itemgetter('name'))
|
target = sorted(target, key=operator.itemgetter('name'))
|
||||||
log.info("target = %s", target)
|
log.info("target = %s", target)
|
||||||
@ -37,11 +40,14 @@ class TestClusterAffinity(CephFSTestCase):
|
|||||||
return
|
return
|
||||||
self.fail("no entity")
|
self.fail("no entity")
|
||||||
|
|
||||||
def _verify_init(self):
|
def _verify_init(self, fs=None):
|
||||||
status = self.fs.status()
|
fs_select = fs
|
||||||
|
if fs_select is None:
|
||||||
|
fs_select = self.fs
|
||||||
|
status = fs_select.status()
|
||||||
log.info("status = {0}".format(status))
|
log.info("status = {0}".format(status))
|
||||||
target = [{'join_fscid': -1, 'name': info['name']} for info in status.get_all()]
|
target = [{'join_fscid': -1, 'name': info['name']} for info in status.get_all()]
|
||||||
self._verify_join_fs(target, status=status)
|
self._verify_join_fs(target, status=status, fs=fs_select)
|
||||||
return (status, target)
|
return (status, target)
|
||||||
|
|
||||||
def _reach_target(self, target):
|
def _reach_target(self, target):
|
||||||
@ -109,12 +115,21 @@ class TestClusterAffinity(CephFSTestCase):
|
|||||||
fs2 = self.mds_cluster.newfs(name="cephfs2")
|
fs2 = self.mds_cluster.newfs(name="cephfs2")
|
||||||
status, target = self._verify_init()
|
status, target = self._verify_init()
|
||||||
active = self.fs.get_active_names(status=status)[0]
|
active = self.fs.get_active_names(status=status)[0]
|
||||||
|
status2, _ = self._verify_init(fs=fs2)
|
||||||
|
active2 = fs2.get_active_names(status=status2)[0]
|
||||||
standbys = [info['name'] for info in status.get_standbys()]
|
standbys = [info['name'] for info in status.get_standbys()]
|
||||||
victim = standbys.pop()
|
victim = standbys.pop()
|
||||||
# Set a bogus fs on the others
|
# Set a bogus fs on the others
|
||||||
for mds in standbys:
|
for mds in standbys:
|
||||||
self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2')
|
self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2')
|
||||||
self._change_target_state(target, mds, {'join_fscid': fs2.id})
|
self._change_target_state(target, mds, {'join_fscid': fs2.id})
|
||||||
|
# The active MDS for cephfs2 will be replaced by the MDS for which
|
||||||
|
# file system affinity has been set. Also, set the affinity for
|
||||||
|
# the earlier active MDS so that it is not chosen by the monitors
|
||||||
|
# as an active MDS for the existing file system.
|
||||||
|
log.info(f'assigning affinity to cephfs2 for active mds (mds.{active2})')
|
||||||
|
self.config_set(f'mds.{active2}', 'mds_join_fs', 'cephfs2')
|
||||||
|
self._change_target_state(target, active2, {'join_fscid': fs2.id})
|
||||||
self.fs.rank_fail()
|
self.fs.rank_fail()
|
||||||
self._change_target_state(target, victim, {'state': 'up:active'})
|
self._change_target_state(target, victim, {'state': 'up:active'})
|
||||||
self._reach_target(target)
|
self._reach_target(target)
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user