import 15.2.9

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Thomas Lamprecht 2021-02-25 16:56:55 +01:00
parent 8b89984e92
commit adb31ebba1
386 changed files with 10834 additions and 2135 deletions

View File

@ -13,7 +13,8 @@ foreach(policy
CMP0054
CMP0056
CMP0065
CMP0075)
CMP0075
CMP0093)
if(POLICY ${policy})
cmake_policy(SET ${policy} NEW)
endif()
@ -667,4 +668,4 @@ add_custom_target(tags DEPENDS ctags)
find_package(CppCheck)
find_package(IWYU)
set(VERSION 15.2.8)
set(VERSION 15.2.9)

View File

@ -1,5 +1,13 @@
15.2.9
------
* MGR: progress module can now be turned on/off, using the commands:
``ceph progress on`` and ``ceph progress off``.
15.2.8
------
* $pid expansion in config paths like `admin_socket` will now properly expand
to the daemon pid for commands like `ceph-mds` or `ceph-osd`. Previously only
`ceph-fuse`/`rbd-nbd` expanded `$pid` with the actual daemon pid.
* ceph-volume: The ``lvm batch` subcommand received a major rewrite. This closed
a number of bugs and improves usability in terms of size specification and

View File

@ -59,7 +59,7 @@ cd build-doc
if [ ! -e $vdir ]; then
virtualenv --python=python3 $vdir
fi
$vdir/bin/pip install --quiet -r $TOPDIR/admin/doc-requirements.txt
$vdir/bin/pip install --use-feature=2020-resolver --quiet -r $TOPDIR/admin/doc-requirements.txt -r $TOPDIR/admin/doc-python-common-requirements.txt
install -d -m0755 \
$TOPDIR/build-doc/output/html \

View File

@ -0,0 +1,3 @@
pcpp
Jinja2
-e../src/python-common

View File

@ -1,7 +1,7 @@
# Contributor: John Coyle <dx9err@gmail.com>
# Maintainer: John Coyle <dx9err@gmail.com>
pkgname=ceph
pkgver=15.2.8
pkgver=15.2.9
pkgrel=0
pkgdesc="Ceph is a distributed object store and file system"
pkgusers="ceph"
@ -63,7 +63,7 @@ makedepends="
xmlstarlet
yasm
"
source="ceph-15.2.8.tar.bz2"
source="ceph-15.2.9.tar.bz2"
subpackages="
$pkgname-base
$pkgname-common
@ -116,7 +116,7 @@ _sysconfdir=/etc
_udevrulesdir=/etc/udev/rules.d
_python_sitelib=/usr/lib/python2.7/site-packages
builddir=$srcdir/ceph-15.2.8
builddir=$srcdir/ceph-15.2.9
build() {
export CEPH_BUILD_VIRTUALENV=$builddir

View File

@ -98,7 +98,7 @@
# main package definition
#################################################################################
Name: ceph
Version: 15.2.8
Version: 15.2.9
Release: 0%{?dist}
%if 0%{?fedora} || 0%{?rhel}
Epoch: 2
@ -114,7 +114,7 @@ License: LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-
Group: System/Filesystems
%endif
URL: http://ceph.com/
Source0: %{?_remote_tarball_prefix}ceph-15.2.8.tar.bz2
Source0: %{?_remote_tarball_prefix}ceph-15.2.9.tar.bz2
%if 0%{?suse_version}
# _insert_obs_source_lines_here
ExclusiveArch: x86_64 aarch64 ppc64le s390x
@ -414,10 +414,8 @@ Base is the package that includes all the files shared amongst ceph servers
%package -n cephadm
Summary: Utility to bootstrap Ceph clusters
BuildArch: noarch
Requires: lvm2
%if 0%{?suse_version}
Requires: apparmor-abstractions
%endif
Requires: python%{python3_pkgversion}
%if 0%{?weak_deps}
Recommends: podman
@ -477,8 +475,12 @@ Provides: ceph-test:/usr/bin/ceph-monstore-tool
Requires: ceph-base = %{_epoch_prefix}%{version}-%{release}
%if 0%{?weak_deps}
Recommends: nvme-cli
%if 0%{?suse_version}
Requires: smartmontools
%else
Recommends: smartmontools
%endif
%endif
%description mon
ceph-mon is the cluster monitor daemon for the Ceph distributed file
system. One or more instances of ceph-mon form a Paxos part-time
@ -757,8 +759,12 @@ Requires: libstoragemgmt
Requires: python%{python3_pkgversion}-ceph-common = %{_epoch_prefix}%{version}-%{release}
%if 0%{?weak_deps}
Recommends: nvme-cli
%if 0%{?suse_version}
Requires: smartmontools
%else
Recommends: smartmontools
%endif
%endif
%description osd
ceph-osd is the object storage daemon for the Ceph distributed file
system. It is responsible for storing objects on a local file system
@ -1134,7 +1140,7 @@ This package provides Cephs default alerts for Prometheus.
# common
#################################################################################
%prep
%autosetup -p1 -n ceph-15.2.8
%autosetup -p1 -n ceph-15.2.9
%build
# LTO can be enabled as soon as the following GCC bug is fixed:
@ -1309,7 +1315,7 @@ ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph
install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules
# sudoers.d
install -m 0600 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl
install -m 0440 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl
%if 0%{?rhel} >= 8
pathfix.py -pni "%{__python3} %{py3_shbang_opts}" %{buildroot}%{_bindir}/*

View File

@ -414,10 +414,8 @@ Base is the package that includes all the files shared amongst ceph servers
%package -n cephadm
Summary: Utility to bootstrap Ceph clusters
BuildArch: noarch
Requires: lvm2
%if 0%{?suse_version}
Requires: apparmor-abstractions
%endif
Requires: python%{python3_pkgversion}
%if 0%{?weak_deps}
Recommends: podman
@ -477,8 +475,12 @@ Provides: ceph-test:/usr/bin/ceph-monstore-tool
Requires: ceph-base = %{_epoch_prefix}%{version}-%{release}
%if 0%{?weak_deps}
Recommends: nvme-cli
%if 0%{?suse_version}
Requires: smartmontools
%else
Recommends: smartmontools
%endif
%endif
%description mon
ceph-mon is the cluster monitor daemon for the Ceph distributed file
system. One or more instances of ceph-mon form a Paxos part-time
@ -757,8 +759,12 @@ Requires: libstoragemgmt
Requires: python%{python3_pkgversion}-ceph-common = %{_epoch_prefix}%{version}-%{release}
%if 0%{?weak_deps}
Recommends: nvme-cli
%if 0%{?suse_version}
Requires: smartmontools
%else
Recommends: smartmontools
%endif
%endif
%description osd
ceph-osd is the object storage daemon for the Ceph distributed file
system. It is responsible for storing objects on a local file system
@ -1309,7 +1315,7 @@ ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph
install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules
# sudoers.d
install -m 0600 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl
install -m 0440 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl
%if 0%{?rhel} >= 8
pathfix.py -pni "%{__python3} %{py3_shbang_opts}" %{buildroot}%{_bindir}/*

View File

@ -1,7 +1,13 @@
ceph (15.2.8-1bionic) bionic; urgency=medium
ceph (15.2.9-1bionic) bionic; urgency=medium
-- Jenkins Build Slave User <jenkins-build@braggi12.front.sepia.ceph.com> Wed, 16 Dec 2020 18:29:12 +0000
-- Jenkins Build Slave User <jenkins-build@braggi11.front.sepia.ceph.com> Tue, 23 Feb 2021 14:23:03 +0000
ceph (15.2.9-1) stable; urgency=medium
* New upstream release
-- Ceph Release Team <ceph-maintainers@ceph.com> Tue, 23 Feb 2021 14:10:13 +0000
ceph (15.2.8-1) stable; urgency=medium

View File

@ -24,6 +24,7 @@ check_function_exists(strerror_r HAVE_Strerror_R)
check_function_exists(name_to_handle_at HAVE_NAME_TO_HANDLE_AT)
check_function_exists(pipe2 HAVE_PIPE2)
check_function_exists(accept4 HAVE_ACCEPT4)
check_function_exists(sigdescr_np HAVE_SIGDESCR_NP)
include(CMakePushCheckState)
cmake_push_check_state(RESET)

View File

@ -61,7 +61,7 @@ override_dh_auto_install:
install -D -m 644 udev/50-rbd.rules $(DESTDIR)/lib/udev/rules.d/50-rbd.rules
install -D -m 644 src/etc-rbdmap $(DESTDIR)/etc/ceph/rbdmap
install -D -m 644 etc/sysctl/90-ceph-osd.conf $(DESTDIR)/etc/sysctl.d/30-ceph-osd.conf
install -D -m 600 sudoers.d/ceph-osd-smartctl $(DESTDIR)/etc/sudoers.d/ceph-osd-smartctl
install -D -m 440 sudoers.d/ceph-osd-smartctl $(DESTDIR)/etc/sudoers.d/ceph-osd-smartctl
install -m 755 src/cephadm/cephadm $(DESTDIR)/usr/sbin/cephadm

View File

@ -17,8 +17,10 @@ if [ -r /etc/os-release ]; then
case "$ID" in
fedora)
PYBUILD="3.7"
if [ "$VERSION_ID" -ge "32" ] ; then
if [ "$VERSION_ID" -eq "32" ] ; then
PYBUILD="3.8"
elif [ "$VERSION_ID" -ge "33" ] ; then
PYBUILD="3.9"
fi
;;
rhel|centos)

View File

@ -319,7 +319,7 @@ This can be described with two layouts.
db_devices:
model: MC-55-44-XZ
limit: 2 (db_slots is actually to be favoured here, but it's not implemented yet)
---
service_type: osd
service_id: osd_spec_ssd
placement:
@ -376,8 +376,7 @@ You can use the 'host_pattern' key in the layout to target certain nodes. Salt t
rotational: 1
db_devices:
rotational: 0
---
service_type: osd
service_id: osd_spec_six_to_ten
placement:

View File

@ -37,4 +37,5 @@ versions of Ceph.
Client Setup <client-setup>
DriveGroups <drivegroups>
troubleshooting
concepts
concepts
Cephadm Feature Planning <../dev/cephadm/index>

View File

@ -72,8 +72,20 @@ monitoring by following the steps below.
ceph orch apply grafana 1
Cephadm handles the prometheus, grafana, and alertmanager
configurations automatically.
Cephadm takes care of the configuration of Prometheus, Grafana, and Alertmanager
automatically.
However, there is one exception to this rule. In a some setups, the Dashboard
user's browser might not be able to access the Grafana URL configured in Ceph
Dashboard. One such scenario is when the cluster and the accessing user are each
in a different DNS zone.
For this case, there is an extra configuration option for Ceph Dashboard, which
can be used to configure the URL for accessing Grafana by the user's browser.
This value will never be altered by cephadm. To set this configuration option,
issue the following command::
$ ceph dashboard set-grafana-frontend-api-url <grafana-server-api>
It may take a minute or two for services to be deployed. Once
completed, you should see something like this from ``ceph orch ls``
@ -88,6 +100,37 @@ completed, you should see something like this from ``ceph orch ls``
node-exporter 2/2 6s ago docker.io/prom/node-exporter:latest e5a616e4b9cf present
prometheus 1/1 6s ago docker.io/prom/prometheus:latest e935122ab143 present
Configuring SSL/TLS for Grafana
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``cephadm`` will deploy Grafana using the certificate defined in the ceph
key/value store. If a certificate is not specified, ``cephadm`` will generate a
self-signed certificate during deployment of the Grafana service.
A custom certificate can be configured using the following commands.
.. code-block:: bash
ceph config-key set mgr/cephadm/grafana_key -i $PWD/key.pem
ceph config-key set mgr/cephadm/grafana_crt -i $PWD/certificate.pem
The ``cephadm`` manager module needs to be restarted to be able to read updates
to these keys.
.. code-block:: bash
ceph orch restart mgr
If you already deployed Grafana, you need to redeploy the service for the
configuration to be updated.
.. code-block:: bash
ceph orch redeploy grafana
The ``redeploy`` command also takes care of setting the right URL for Ceph
Dashboard.
Using custom images
~~~~~~~~~~~~~~~~~~~
@ -120,7 +163,7 @@ For example
you have set the custom image for automatically. You will need to
manually update the configuration (image name and tag) to be able to
install updates.
If you choose to go with the recommendations instead, you can reset the
custom image you have set before. After that, the default value will be
used again. Use ``ceph config rm`` to reset the configuration option
@ -135,6 +178,86 @@ For example
ceph config rm mgr mgr/cephadm/container_image_prometheus
Using custom configuration files
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
By overriding cephadm templates, it is possible to completely customize the
configuration files for monitoring services.
Internally, cephadm already uses `Jinja2
<https://jinja.palletsprojects.com/en/2.11.x/>`_ templates to generate the
configuration files for all monitoring components. To be able to customize the
configuration of Prometheus, Grafana or the Alertmanager it is possible to store
a Jinja2 template for each service that will be used for configuration
generation instead. This template will be evaluated every time a service of that
kind is deployed or reconfigured. That way, the custom configuration is
preserved and automatically applied on future deployments of these services.
.. note::
The configuration of the custom template is also preserved when the default
configuration of cephadm changes. If the updated configuration is to be used,
the custom template needs to be migrated *manually*.
Option names
""""""""""""
The following templates for files that will be generated by cephadm can be
overridden. These are the names to be used when storing with ``ceph config-key
set``:
- ``alertmanager_alertmanager.yml``
- ``grafana_ceph-dashboard.yml``
- ``grafana_grafana.ini``
- ``prometheus_prometheus.yml``
You can look up the file templates that are currently used by cephadm in
``src/pybind/mgr/cephadm/templates``:
- ``services/alertmanager/alertmanager.yml.j2``
- ``services/grafana/ceph-dashboard.yml.j2``
- ``services/grafana/grafana.ini.j2``
- ``services/prometheus/prometheus.yml.j2``
Usage
"""""
The following command applies a single line value:
.. code-block:: bash
ceph config-key set mgr/cephadm/<option_name> <value>
To set contents of files as template use the ``-i`` argument:
.. code-block:: bash
ceph config-key set mgr/cephadm/<option_name> -i $PWD/<filename>
.. note::
When using files as input to ``config-key`` an absolute path to the file must
be used.
It is required to restart the cephadm mgr module after a configuration option
has been set. Then the configuration file for the service needs to be recreated.
This is done using `redeploy`. For more details see the following example.
Example
"""""""
.. code-block:: bash
# set the contents of ./prometheus.yml.j2 as template
ceph config-key set mgr/cephadm/services_prometheus_prometheus.yml \
-i $PWD/prometheus.yml.j2
# restart cephadm mgr module
ceph orch restart mgr
# redeploy the prometheus service
ceph orch redeploy prometheus
Disabling monitoring
--------------------

View File

@ -305,3 +305,53 @@ the cluster, create an initial ``ceph.conf`` file. For example::
Then, run bootstrap referencing this file::
cephadm bootstrap -c /root/ceph.conf ...
.. _cephadm-removing-hosts:
Removing Hosts
==============
If the node that want you to remove is running OSDs, make sure you remove the OSDs from the node.
To remove a host from a cluster, do the following:
For all Ceph service types, except for ``node-exporter`` and ``crash``, remove
the host from the placement specification file (for example, cluster.yml).
For example, if you are removing the host named host2, remove all occurrences of
``- host2`` from all ``placement:`` sections.
Update:
.. code-block:: yaml
service_type: rgw
placement:
hosts:
- host1
- host2
To:
.. code-block:: yaml
service_type: rgw
placement:
hosts:
- host1
Remove the host from cephadm's environment:
.. code-block:: bash
ceph orch host rm host2
See also :ref:`orchestrator-cli-host-management`.
If the host is running ``node-exporter`` and crash services, remove them by running
the following command on the host:
.. code-block:: bash
cephadm rm-daemon --fsid CLUSTER_ID --name SERVICE_NAME

View File

@ -28,7 +28,7 @@ The other dimension to MDS performance is the available RAM for caching. The
MDS necessarily manages a distributed and cooperative metadata cache among all
clients and other active MDSs. Therefore it is essential to provide the MDS
with sufficient RAM to enable faster metadata access and mutation. The default
MDS cache size (see also :doc:`/cephfs/cache-size-limits`) is 4GB. It is
MDS cache size (see also :doc:`/cephfs/cache-configuration`) is 4GB. It is
recommended to provision at least 8GB of RAM for the MDS to support this cache
size.

View File

@ -0,0 +1,181 @@
=======================
MDS Cache Configuration
=======================
The Metadata Server coordinates a distributed cache among all MDS and CephFS
clients. The cache serves to improve metadata access latency and allow clients
to safely (coherently) mutate metadata state (e.g. via `chmod`). The MDS issues
**capabilities** and **directory entry leases** to indicate what state clients
may cache and what manipulations clients may perform (e.g. writing to a file).
The MDS and clients both try to enforce a cache size. The mechanism for
specifying the MDS cache size is described below. Note that the MDS cache size
is a not a hard limit. The MDS always allows clients to lookup new metadata
which is loaded into the cache. This is an essential policy as its avoids
deadlock in client requests (some requests may rely on held capabilities before
capabilities are released).
When the MDS cache is too large, the MDS will **recall** client state so cache
items become unpinned and eligble to be dropped. The MDS can only drop cache
state when no clients refer to the metadata to be dropped. Also described below
is how to configure the MDS recall settings for your workload's needs. This is
necessary if the internal throttles on the MDS recall can not keep up with the
client workload.
MDS Cache Size
--------------
You can limit the size of the Metadata Server (MDS) cache by a byte count. This
is done through the `mds_cache_memory_limit` configuration. For example::
ceph config set mds mds_cache_memory_limit 8GB
In addition, you can specify a cache reservation by using the
`mds_cache_reservation` parameter for MDS operations. The cache reservation is
limited as a percentage of the memory and is set to 5% by default. The intent
of this parameter is to have the MDS maintain an extra reserve of memory for
its cache for new metadata operations to use. As a consequence, the MDS should
in general operate below its memory limit because it will recall old state from
clients in order to drop unused metadata in its cache.
If the MDS cannot keep its cache under the target size, the MDS will send a
health alert to the Monitors indicating the cache is too large. This is
controlled by the `mds_health_cache_threshold` configuration which is by
default 150% of the maximum cache size.
Because the cache limit is not a hard limit, potential bugs in the CephFS
client, MDS, or misbehaving applications might cause the MDS to exceed its
cache size. The health warnings are intended to help the operator detect this
situation and make necessary adjustments or investigate buggy clients.
MDS Cache Trimming
------------------
There are two configurations for throttling the rate of cache trimming in the MDS:
::
mds_cache_trim_threshold (default 64k)
and
::
mds_cache_trim_decay_rate (default 1)
The intent of the throttle is to prevent the MDS from spending too much time
trimming its cache. This may limit its ability to handle client requests or
perform other upkeep.
The trim configurations control an internal **decay counter**. Anytime metadata
is trimmed from the cache, the counter is incremented. The threshold sets the
maximum size of the counter while the decay rate indicates the exponential half
life for the counter. If the MDS is continually removing items from its cache,
it will reach a steady state of ``-ln(0.5)/rate*threshold`` items removed per
second.
The defaults are conservative and may need changed for production MDS with
large cache sizes.
MDS Recall
----------
MDS limits its recall of client state (capabilities/leases) to prevent creating
too much work for itself handling release messages from clients. This is controlled
via the following configurations:
The maximum number of capabilities to recall from a single client in a given recall
event::
mds_recall_max_caps (default: 5000)
The threshold and decay rate for the decay counter on a session::
mds_recall_max_decay_threshold (default: 16k)
and::
mds_recall_max_decay_rate (default: 2.5 seconds)
The session decay counter controls the rate of recall for an individual
session. The behavior of the counter works the same as for cache trimming
above. Each capability that is recalled increments the counter.
There is also a global decay counter that throttles for all session recall::
mds_recall_global_max_decay_threshold (default: 64k)
its decay rate is the same as ``mds_recall_max_decay_rate``. Any recalled
capability for any session also increments this counter.
If clients are slow to release state, the warning "failing to respond to cache
pressure" or ``MDS_HEALTH_CLIENT_RECALL`` will be reported. Each session's rate
of release is monitored by another decay counter configured by::
mds_recall_warning_threshold (default: 32k)
and::
mds_recall_warning_decay_rate (default: 60.0 seconds)
Each time a capability is released, the counter is incremented. If clients do
not release capabilities quickly enough and there is cache pressure, the
counter will indicate if the client is slow to release state.
Some workloads and client behaviors may require faster recall of client state
to keep up with capability acquisition. It is recommended to increase the above
counters as needed to resolve any slow recall warnings in the cluster health
state.
Session Liveness
----------------
The MDS also keeps track of whether sessions are quiescent. If a client session
is not utilizing its capabilities or is otherwise quiet, the MDS will begin
recalling state from the session even if its not under cache pressure. This
helps the MDS avoid future work when the cluster workload is hot and cache
pressure is forcing the MDS to recall state. The expectation is that a client
not utilizing its capabilities is unlikely to use those capabilities anytime
in the near future.
Determining whether a given session is quiescent is controlled by the following
configuration variables::
mds_session_cache_liveness_magnitude (default: 10)
and::
mds_session_cache_liveness_decay_rate (default: 5min)
The configuration ``mds_session_cache_liveness_decay_rate`` indicates the
half-life for the decay counter tracking the use of capabilities by the client.
Each time a client manipulates or acquires a capability, the MDS will increment
the counter. This is a rough but effective way to monitor utilization of the
client cache.
The ``mds_session_cache_liveness_magnitude`` is a base-2 magnitude difference
of the liveness decay counter and the number of capabilities outstanding for
the session. So if the client has ``1*2^20`` (1M) capabilities outstanding and
only uses **less** than ``1*2^(20-mds_session_cache_liveness_magnitude)`` (1K
using defaults), the MDS will consider the client to be quiescent and begin
recall.
Capability Limit
----------------
The MDS also tries to prevent a single client from acquiring too many
capabilities. This helps prevent recovery from taking a long time in some
situations. It is not generally necessary for a client to have such a large
cache. The limit is configured via::
mds_max_caps_per_client (default: 1M)
It is not recommended to set this value above 5M but it may be helpful with
some workloads.

View File

@ -1,14 +0,0 @@
Understanding MDS Cache Size Limits
===================================
This section describes ways to limit MDS cache size.
You can limit the size of the Metadata Server (MDS) cache by:
* *A memory limit*: A new behavior introduced in the Luminous release. Use the `mds_cache_memory_limit` parameters.
In addition, you can specify a cache reservation by using the `mds_cache_reservation` parameter for MDS operations. The cache reservation is limited as a percentage of the memory and is set to 5% by default. The intent of this parameter is to have the MDS maintain an extra reserve of memory for its cache for new metadata operations to use. As a consequence, the MDS should in general operate below its memory limit because it will recall old state from clients in order to drop unused metadata in its cache.
The `mds_cache_reservation` parameter replaces the `mds_health_cache_threshold` in all situations except when MDS nodes sends a health alert to the Monitors indicating the cache is too large. By default, `mds_health_cache_threshold` is 150% of the maximum cache size.
Be aware that the cache limit is not a hard limit. Potential bugs in the CephFS client or MDS or misbehaving applications might cause the MDS to exceed its cache size. The `mds_health_cache_threshold` configures the cluster health warning message so that operators can investigate why the MDS cannot shrink its cache.

View File

@ -85,7 +85,7 @@ FS Subvolume groups
Create a subvolume group using::
$ ceph fs subvolumegroup create <vol_name> <group_name> [--pool_layout <data_pool_name> --uid <uid> --gid <gid> --mode <octal_mode>]
$ ceph fs subvolumegroup create <vol_name> <group_name> [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>]
The command succeeds even if the subvolume group already exists.
@ -111,12 +111,8 @@ List subvolume groups using::
$ ceph fs subvolumegroup ls <vol_name>
Create a snapshot (see :doc:`/cephfs/experimental-features`) of a
subvolume group using::
$ ceph fs subvolumegroup snapshot create <vol_name> <group_name> <snap_name>
This implicitly snapshots all the subvolumes under the subvolume group.
.. note:: Subvolume group snapshot feature is no longer supported in mainline CephFS (existing group
snapshots can still be listed and deleted)
Remove a snapshot of a subvolume group using::
@ -135,7 +131,7 @@ FS Subvolumes
Create a subvolume using::
$ ceph fs subvolume create <vol_name> <subvol_name> [--size <size_in_bytes> --group_name <subvol_group_name> --pool_layout <data_pool_name> --uid <uid> --gid <gid> --mode <octal_mode> --namespace-isolated]
$ ceph fs subvolume create <vol_name> <subvol_name> [--size <size_in_bytes>] [--group_name <subvol_group_name>] [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>] [--namespace-isolated]
The command succeeds even if the subvolume already exists.
@ -150,16 +146,24 @@ its parent directory and no size limit.
Remove a subvolume using::
$ ceph fs subvolume rm <vol_name> <subvol_name> [--group_name <subvol_group_name> --force]
$ ceph fs subvolume rm <vol_name> <subvol_name> [--group_name <subvol_group_name>] [--force] [--retain-snapshots]
The command removes the subvolume and its contents. It does this in two steps.
First, it move the subvolume to a trash folder, and then asynchronously purges
First, it moves the subvolume to a trash folder, and then asynchronously purges
its contents.
The removal of a subvolume fails if it has snapshots, or is non-existent.
'--force' flag allows the non-existent subvolume remove command to succeed.
A subvolume can be removed retaining existing snapshots of the subvolume using the
'--retain-snapshots' option. If snapshots are retained, the subvolume is considered
empty for all operations not involving the retained snapshots.
.. note:: Snapshot retained subvolumes can be recreated using 'ceph fs subvolume create'
.. note:: Retained snapshots can be used as a clone source to recreate the subvolume, or clone to a newer subvolume.
Resize a subvolume using::
$ ceph fs subvolume resize <vol_name> <subvol_name> <new_size> [--group_name <subvol_group_name>] [--no_shrink]
@ -195,17 +199,32 @@ The output format is json and contains fields as follows.
* type: subvolume type indicating whether it's clone or subvolume
* pool_namespace: RADOS namespace of the subvolume
* features: features supported by the subvolume
* state: current state of the subvolume
If a subvolume has been removed retaining its snapshots, the output only contains fields as follows.
* type: subvolume type indicating whether it's clone or subvolume
* features: features supported by the subvolume
* state: current state of the subvolume
The subvolume "features" are based on the internal version of the subvolume and is a list containing
a subset of the following features,
* "snapshot-clone": supports cloning using a subvolumes snapshot as the source
* "snapshot-autoprotect": supports automatically protecting snapshots, that are active clone sources, from deletion
* "snapshot-retention": supports removing subvolume contents, retaining any existing snapshots
The subvolume "state" is based on the current state of the subvolume and contains one of the following values.
* "complete": subvolume is ready for all operations
* "snapshot-retained": subvolume is removed but its snapshots are retained
List subvolumes using::
$ ceph fs subvolume ls <vol_name> [--group_name <subvol_group_name>]
.. note:: subvolumes that are removed but have snapshots retained, are also listed.
Create a snapshot of a subvolume using::
$ ceph fs subvolume snapshot create <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
@ -213,11 +232,13 @@ Create a snapshot of a subvolume using::
Remove a snapshot of a subvolume using::
$ ceph fs subvolume snapshot rm <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name> --force]
$ ceph fs subvolume snapshot rm <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>] [--force]
Using the '--force' flag allows the command to succeed that would otherwise
fail if the snapshot did not exist.
.. note:: if the last snapshot within a snapshot retained subvolume is removed, the subvolume is also removed
List snapshots of a subvolume using::
$ ceph fs subvolume snapshot ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]

View File

@ -79,7 +79,7 @@ Administration
Administrative commands <administration>
Provision/Add/Remove MDS(s) <add-remove-mds>
MDS failover and standby configuration <standby>
MDS Cache Size Limits <cache-size-limits>
MDS Cache Configuration <cache-configuration>
MDS Configuration Settings <mds-config-ref>
Manual: ceph-mds <../../man/8/ceph-mds>
Export over NFS <nfs>

View File

@ -0,0 +1,121 @@
================
Compliance Check
================
The stability and reliability of a Ceph cluster is dependent not just upon the Ceph daemons, but
also the OS and hardware that Ceph is installed on. This document is intended to promote a design
discussion for providing a "compliance" feature within mgr/cephadm, which would be responsible for
identifying common platform-related issues that could impact Ceph stability and operation.
The ultimate goal of these checks is to identify issues early and raise a healthcheck WARN
event, to alert the Administrator to the issue.
Prerequisites
=============
In order to effectively analyse the hosts that Ceph is deployed to, this feature requires a cache
of host-related metadata. The metadata is already available from cephadm's HostFacts class and the
``gather-facts`` cephadm command. For the purposes of this document, we will assume that this
data is available within the mgr/cephadm "cache" structure.
Some checks will require that the host status is also populated e.g. ONLINE, OFFLINE, MAINTENANCE
Administrator Interaction
=========================
Not all users will require this feature, and must be able to 'opt out'. For this reason,
mgr/cephadm must provide controls, such as the following;
.. code-block::
ceph cephadm compliance enable | disable | status [--format json]
ceph cephadm compliance ls [--format json]
ceph cephadm compliance enable-check <name>
ceph cephadm compliance disable-check <name>
ceph cephadm compliance set-check-interval <int>
ceph cephadm compliance get-check-interval
The status option would show the enabled/disabled state of the feature, along with the
check-interval.
The ``ls`` subcommand would show all checks in the following format;
``check-name status description``
Proposed Integration
====================
The compliance checks are not required to run all the time, but instead should run at discrete
intervals. The interval would be configurable under via the :code:`set-check-interval`
subcommand (default would be every 12 hours)
mgr/cephadm currently executes an event driven (time based) serve loop to act on deploy/remove and
reconcile activity. In order to execute the compliance checks, the compliance check code would be
called from this main serve loop - when the :code:`set-check-interval` is met.
Proposed Checks
===============
All checks would push any errors to a list, so multiple issues can be escalated to the Admin at
the same time. The list below provides a description of each check, with the text following the
name indicating a shortname version *(the shortname is the reference for command Interaction
when enabling or disabling a check)*
OS Consistency (OS)
___________________
* all hosts must use same vendor
* all hosts must be on the same major release (this check would only be applicable to distributions that
offer a long-term-support strategy (RHEL, CentOS, SLES, Ubuntu etc)
*src: gather-facts output*
Linux Kernel Security Mode (LSM)
________________________________
* All hosts should have a consistent SELINUX/AppArmor configuration
*src: gather-facts output*
Services Check (SERVICES)
_________________________
Hosts that are in an ONLINE state should adhere to the following;
* all daemons (systemd units) should be enabled
* all daemons should be running (not dead)
*src: list_daemons output*
Support Status (SUPPORT)
________________________
If support status has been detected, it should be consistent across all hosts. At this point
support status is available only for Red Hat machines.
*src: gather-facts output*
Network : MTU (MTU)
________________________________
All network interfaces on the same Ceph network (public/cluster) should have the same MTU
*src: gather-facts output*
Network : LinkSpeed (LINKSPEED)
____________________________________________
All network interfaces on the same Ceph network (public/cluster) should have the same Linkspeed
*src: gather-facts output*
Network : Consistency (INTERFACE)
______________________________________________
All hosts with OSDs should have consistent network configuration - eg. if some hosts do
not separate cluster/public traffic but others do, that is an anomaly that would generate a
compliance check warning.
*src: gather-facts output*
Notification Strategy
=====================
If any of the checks fail, mgr/cephadm would raise a WARN level alert
Futures
=======
The checks highlighted here serve only as a starting point, and we should expect to expand
on the checks over time.

View File

@ -0,0 +1,104 @@
================
Host Maintenance
================
All hosts that support Ceph daemons need to support maintenance activity, whether the host
is physical or virtual. This means that management workflows should provide
a simple and consistent way to support this operational requirement. This document defines
the maintenance strategy that could be implemented in cephadm and mgr/cephadm.
High Level Design
=================
Placing a host into maintenance, adopts the following workflow;
#. confirm that the removal of the host does not impact data availability (the following
steps will assume it is safe to proceed)
* orch host ok-to-stop <host> would be used here
#. if the host has osd daemons, apply noout to the host subtree to prevent data migration
from triggering during the planned maintenance slot.
#. Stop the ceph target (all daemons stop)
#. Disable the ceph target on that host, to prevent a reboot from automatically starting
ceph services again)
Exiting Maintenance, is basically the reverse of the above sequence
Admin Interaction
=================
The ceph orch command will be extended to support maintenance.
.. code-block::
ceph orch host enter-maintenance <host> [ --check ]
ceph orch host exit-maintenance <host>
.. note:: In addition, the host's status should be updated to reflect whether it
is in maintenance or not.
The 'check' Option
__________________
The orch host ok-to-stop command focuses on ceph daemons (mon, osd, mds), which
provides the first check. However, a ceph cluster also uses other types of daemons
for monitoring, management and non-native protocol support which means the
logic will need to consider service impact too. The 'check' option provides
this additional layer to alert the user of service impact to *secondary*
daemons.
The list below shows some of these additional daemons.
* mgr (not included in ok-to-stop checks)
* prometheus, grafana, alertmanager
* rgw
* haproxy
* iscsi gateways
* ganesha gateways
By using the --check option first, the Admin can choose whether to proceed. This
workflow is obviously optional for the CLI user, but could be integrated into the
UI workflow to help less experienced Administators manage the cluster.
By adopting this two-phase approach, a UI based workflow would look something
like this.
#. User selects a host to place into maintenance
* orchestrator checks for data **and** service impact
#. If potential impact is shown, the next steps depend on the impact type
* **data availability** : maintenance is denied, informing the user of the issue
* **service availability** : user is provided a list of affected services and
asked to confirm
Components Impacted
===================
Implementing this capability will require changes to the following;
* cephadm
* Add maintenance subcommand with the following 'verbs'; enter, exit, check
* mgr/cephadm
* add methods to CephadmOrchestrator for enter/exit and check
* data gathering would be skipped for hosts in a maintenance state
* mgr/orchestrator
* add CLI commands to OrchestratorCli which expose the enter/exit and check interaction
Ideas for Future Work
=====================
#. When a host is placed into maintenance, the time of the event could be persisted. This
would allow the orchestrator layer to establish a maintenance window for the task and
alert if the maintenance window has been exceeded.
#. The maintenance process could support plugins to allow other integration tasks to be
initiated as part of the transition to and from maintenance. This plugin capability could
support actions like;
* alert suppression to 3rd party monitoring framework(s)
* service level reporting, to record outage windows

View File

@ -0,0 +1,13 @@
===================================
CEPHADM Developer Documentation
===================================
.. rubric:: Contents
.. toctree::
:maxdepth: 1
host-maintenance
compliance-check

View File

@ -732,24 +732,32 @@ Per client instance `rbd device map` options:
* noshare - Disable sharing of client instances with other mappings.
* crc - Enable CRC32C checksumming for data writes (default).
* crc - Enable CRC32C checksumming for msgr1 on-the-wire protocol (default).
For msgr2.1 protocol this option is ignored: full checksumming is always on
in 'crc' mode and always off in 'secure' mode.
* nocrc - Disable CRC32C checksumming for data writes.
* nocrc - Disable CRC32C checksumming for msgr1 on-the-wire protocol. Note
that only payload checksumming is disabled, header checksumming is always on.
For msgr2.1 protocol this option is ignored.
* cephx_require_signatures - Require cephx message signing (since 3.19,
default).
* cephx_require_signatures - Require msgr1 message signing feature (since 3.19,
default). This option is deprecated and will be removed in the future as the
feature has been supported since the Bobtail release.
* nocephx_require_signatures - Don't require cephx message signing (since
3.19).
* nocephx_require_signatures - Don't require msgr1 message signing feature
(since 3.19). This option is deprecated and will be removed in the future.
* tcp_nodelay - Disable Nagle's algorithm on client sockets (since 4.0,
default).
* notcp_nodelay - Enable Nagle's algorithm on client sockets (since 4.0).
* cephx_sign_messages - Enable message signing (since 4.4, default).
* cephx_sign_messages - Enable message signing for msgr1 on-the-wire protocol
(since 4.4, default). For msgr2.1 protocol this option is ignored: message
signing is built into 'secure' mode and not offered in 'crc' mode.
* nocephx_sign_messages - Disable message signing (since 4.4).
* nocephx_sign_messages - Disable message signing for msgr1 on-the-wire protocol
(since 4.4). For msgr2.1 protocol this option is ignored.
* mount_timeout=x - A timeout on various steps in `rbd device map` and
`rbd device unmap` sequences (default is 60 seconds). In particular,
@ -844,6 +852,25 @@ Per mapping (block device) `rbd device map` options:
backend that the data is incompressible, disabling compression in aggressive
mode (since 5.8).
* ms_mode=legacy - Use msgr1 on-the-wire protocol (since 5.11, default).
* ms_mode=crc - Use msgr2.1 on-the-wire protocol, select 'crc' mode, also
referred to as plain mode (since 5.11). If the daemon denies 'crc' mode,
fail the connection.
* ms_mode=secure - Use msgr2.1 on-the-wire protocol, select 'secure' mode
(since 5.11). 'secure' mode provides full in-transit encryption ensuring
both confidentiality and authenticity. If the daemon denies 'secure' mode,
fail the connection.
* ms_mode=prefer-crc - Use msgr2.1 on-the-wire protocol, select 'crc'
mode (since 5.11). If the daemon denies 'crc' mode in favor of 'secure'
mode, agree to 'secure' mode.
* ms_mode=prefer-secure - Use msgr2.1 on-the-wire protocol, select 'secure'
mode (since 5.11). If the daemon denies 'secure' mode in favor of 'crc'
mode, agree to 'crc' mode.
* udev - Wait for udev device manager to finish executing all matching
"add" rules and release the device before exiting (default). This option
is not passed to the kernel.

View File

@ -270,6 +270,34 @@ commands::
$ ceph dashboard ac-user-create <username> <password> administrator
Account Lock-out
^^^^^^^^^^^^^^^^
It disables a user account if a user repeatedly enters the wrong credentials
for multiple times. It is enabled by default to prevent brute-force or dictionary
attacks. The user can get or set the default number of lock-out attempts using
these commands respectively::
$ ceph dashboard get-account-lockout-attempts
$ ceph dashboard set-account-lockout-attempts <value:int>
.. warning::
This feature can be disabled by setting the default number of lock-out attempts to 0.
However, by disabling this feature, the account is more vulnerable to brute-force or
dictionary based attacks. This can be disabled by::
$ ceph dashboard set-account-lockout-attempts 0
Enable a Locked User
^^^^^^^^^^^^^^^^^^^^
If a user account is disabled as a result of multiple invalid login attempts, then
it needs to be manually enabled by the administrator. This can be done by the following
command::
$ ceph dashboard ac-user-enable <username>
Accessing the Dashboard
^^^^^^^^^^^^^^^^^^^^^^^
@ -479,7 +507,8 @@ will not be visible in Prometheus.
After you have set up Grafana and Prometheus, you will need to configure the
connection information that the Ceph Dashboard will use to access Grafana.
You need to tell the dashboard on which url Grafana instance is running/deployed::
You need to tell the dashboard on which URL the Grafana instance is
running/deployed::
$ ceph dashboard set-grafana-api-url <grafana-server-url> # default: ''
@ -503,6 +532,38 @@ e.g. caused by certificates signed by unknown CA or not matching the host name::
You can directly access Grafana Instance as well to monitor your cluster.
Alternative URL for Browsers
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The Ceph Dashboard backend requires the Grafana URL to be able to verify the
existence of Grafana Dashboards before the frontend even loads them. Due to the
nature of how Grafana is implemented in Ceph Dashboard, this means that two
working connections are required in order to be able to see Grafana graphs in
Ceph Dashboard:
- The backend (Ceph Mgr module) needs to verify the existence of the requested
graph. If this request succeeds, it lets the frontend know that it can safely
access Grafana.
- The frontend then requests the Grafana graphs directly from the user's
browser using an iframe. The Grafana instance is accessed directly without any
detour through Ceph Dashboard.
Now, it might be the case that your environment makes it difficult for the
user's browser to directly access the URL configured in Ceph Dashboard. To solve
this issue, a separate URL can be configured which will solely be used to tell
the frontend (the user's browser) which URL it should use to access Grafana.
This setting won't ever be changed automatically, unlike the GRAFANA_API_URL
which is set by :ref:`cephadm` (only if cephadm is used to deploy monitoring
services).
To change the URL that is returned to the frontend issue the following command::
$ ceph dashboard set-grafana-frontend-api-url <grafana-server-url>
If no value is set for that option, it will simply fall back to the value of the
GRAFANA_API_URL option. If set, it will instruct the browser to use this URL to
access Grafana.
.. _dashboard-sso-support:
Enabling Single Sign-On (SSO)

View File

@ -58,6 +58,8 @@ Status
Show current orchestrator mode and high-level status (whether the orchestrator
plugin is available and operational)
.. _orchestrator-cli-host-management:
Host Management
===============
@ -70,7 +72,7 @@ Add and remove hosts::
ceph orch host add <hostname> [<addr>] [<labels>...]
ceph orch host rm <hostname>
For cephadm, see also :ref:`cephadm-fqdn`.
For cephadm, see also :ref:`cephadm-fqdn` and :ref:`cephadm-removing-hosts`.
Host Specification
------------------

View File

@ -1127,7 +1127,7 @@ Miscellaneous
when osd data is on HDD and osd journal is on SSD.
:Type: Float
:Default: ``2``
:Default: ``1``
``osd command max records``

View File

@ -88,7 +88,7 @@ Where:
:Description: Number of OSDs requested to send data during recovery of
a single chunk. *d* needs to be chosen such that
k+1 <= d <= k+m-1. Larger the *d*, the better the savings.
k+1 <= d <= k+m-1. The larger the *d*, the better the savings.
:Type: Integer
:Required: No.

View File

@ -1175,3 +1175,16 @@ This warning can silenced by setting the
``mon_warn_on_osd_down_out_interval_zero`` to false::
ceph config global mon mon_warn_on_osd_down_out_interval_zero false
DASHBOARD_DEBUG
_______________
The Dashboard debug mode is enabled. This means, if there is an error
while processing a REST API request, the HTTP error response contains
a Python traceback. This behaviour should be disabled in production
environments because such a traceback might contain and expose sensible
information.
The debug mode can be disabled with::
ceph dashboard debug disable

View File

@ -365,7 +365,8 @@ else
--enable rhel-7-server-devtools-rpms
dts_ver=8
elif test $ID = centos -a $MAJOR_VERSION = 8 ; then
$SUDO dnf config-manager --set-enabled PowerTools
# Enable 'powertools' or 'PowerTools' repo
$SUDO dnf config-manager --set-enabled $(dnf repolist --all 2>/dev/null|gawk 'tolower($0) ~ /^powertools\s/{print $1}')
# before EPEL8 and PowerTools provide all dependencies, we use sepia for the dependencies
$SUDO dnf config-manager --add-repo http://apt-mirror.front.sepia.ceph.com/lab-extras/8/
$SUDO dnf config-manager --setopt=apt-mirror.front.sepia.ceph.com_lab-extras_8_.gpgcheck=0 --save

View File

@ -126,7 +126,7 @@
"label": "Read (-) / Write (+)",
"logBase": 1,
"max": null,
"min": "0",
"min": null,
"show": true
},
{

View File

@ -230,8 +230,8 @@ groups:
- alert: pool filling up
expr: |
(
predict_linear(ceph_pool_stored[2d], 3600 * 24 * 5) >=
ceph_pool_max_avail
predict_linear(ceph_pool_stored[2d], 3600 * 24 * 5)
>= ceph_pool_stored + ceph_pool_max_avail
) * on(pool_id) group_left(name) ceph_pool_metadata
labels:
severity: warning
@ -241,3 +241,15 @@ groups:
description: >
Pool {{ $labels.name }} will be full in less than 5 days
assuming the average fill-up rate of the past 48 hours.
- name: healthchecks
rules:
- alert: Slow OSD Ops
expr: ceph_healthcheck_slow_ops > 0
for: 30s
labels:
severity: warning
type: ceph_default
annotations:
description: >
{{ $value }} OSD requests are taking too long to process (osd_op_complaint_time exceeded)

View File

@ -9,4 +9,4 @@ tasks:
- echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_18.04/ /" | sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list
- sudo apt update
- sudo apt -y install podman
- echo -e "[registries.search]\nregistries = ['docker.io']" | sudo tee /etc/containers/registries.conf
- echo -e "[[registry]]\nlocation = 'docker.io'\n\n[[registry.mirror]]\nlocation='docker-mirror.front.sepia.ceph.com:5000'\n" | sudo tee /etc/containers/registries.conf

View File

@ -222,6 +222,17 @@ function TEST_profile_k_sanity() {
m=1 || return 1
}
function TEST_invalid_crush_failure_domain() {
local dir=$1
run_mon $dir a || return 1
local profile=ec_profile
local crush_failure_domain=invalid_failure_domain
! ceph osd erasure-code-profile set $profile k=4 m=2 crush-failure-domain=$crush_failure_domain 2>&1 || return 1
}
main osd-erasure-code-profile "$@"
# Local Variables:

View File

@ -15,5 +15,6 @@ overrides:
tasks:
- cephfs_test_runner:
fail_on_skip: false
modules:
- tasks.cephfs.test_volumes

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=crc

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=legacy

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=secure

View File

@ -3,5 +3,3 @@ overrides:
conf:
global:
ms die on skipped message: false
client:
rbd default map options: read_from_replica=balance

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=crc,read_from_replica=balance

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=legacy,read_from_replica=balance

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=prefer-crc,read_from_replica=balance

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=secure,read_from_replica=balance

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=crc

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=legacy

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=secure

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=crc

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=legacy

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=secure

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=crc

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=legacy

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=prefer-crc

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=secure

View File

@ -0,0 +1 @@
../.qa/

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=crc

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=legacy

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=prefer-crc

View File

@ -0,0 +1,5 @@
overrides:
ceph:
conf:
client:
rbd default map options: ms_mode=secure

View File

@ -1,5 +0,0 @@
tasks:
- workunit:
clients:
all:
- rbd/krbd_stable_pages_required.sh

View File

@ -0,0 +1,5 @@
tasks:
- workunit:
clients:
all:
- rbd/krbd_stable_writes.sh

View File

@ -11,3 +11,4 @@ tasks:
- ceph orch ls
- ceph orch host ls
- ceph orch device ls
- ceph orch ls --format yaml

View File

@ -22,6 +22,7 @@ tasks:
- \(OSD_HOST_DOWN\)
- \(POOL_APP_NOT_ENABLED\)
- \(OSDMAP_FLAGS\)
- \(OSD_FLAGS\)
- pauserd,pausewr flag\(s\) set
- Monitor daemon marked osd\.[[:digit:]]+ down, but it is still running
- evicting unresponsive client .+

View File

@ -0,0 +1,44 @@
roles:
- - mon.a
- mgr.x
- osd.0
- osd.1
- osd.2
- osd.3
- client.0
- - mon.b
- mon.c
- osd.4
- osd.5
- osd.6
- osd.7
openstack:
- volumes: # attached to each instance
count: 4
size: 10 # GB
tasks:
- install:
- ceph:
create_rbd_pool: false
pre-mgr-commands:
- sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force
log-ignorelist:
- overall HEALTH_
- \(OSDMAP_FLAGS\)
- \(OSD_
- \(PG_
- \(POOL_
- \(CACHE_POOL_
- \(OBJECT_
- \(SLOW_OPS\)
- \(REQUEST_SLOW\)
- \(TOO_FEW_PGS\)
- slow request
- exec:
client.0:
- ceph progress off
- workunit:
clients:
all:
- mon/pg_autoscaler.sh

View File

@ -13,5 +13,6 @@ overrides:
rgw curl low speed time: 300
rgw md log max shards: 4
rgw data log num shards: 4
rgw sync obj etag verify: true
rgw:
compression type: random

View File

@ -4,6 +4,7 @@ overrides:
- \(MON_DOWN\)
- \(MGR_DOWN\)
- slow request
- evicting unresponsive client
meta:
- desc: install ceph/nautilus latest
tasks:

View File

@ -26,6 +26,7 @@ tasks:
- Monitor daemon marked osd
- Behind on trimming
- Manager daemon
- evicting unresponsive client
conf:
global:
mon warn on pool no app: false

View File

@ -14,6 +14,8 @@ tasks:
bluestore_warn_on_legacy_statfs: false
bluestore warn on no per pool omap: false
mon pg warn min per osd: 0
log-whitelist:
- evicting unresponsive client
- exec:
osd.0:
- ceph osd require-osd-release nautilus

View File

@ -155,13 +155,12 @@ def ceph_log(ctx, config):
while not self.stop_event.is_set():
self.stop_event.wait(timeout=30)
try:
run.wait(
ctx.cluster.run(
args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'
],
wait=False,
)
procs = ctx.cluster.run(
args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'],
wait=False,
stderr=StringIO()
)
run.wait(procs)
except exceptions.ConnectionLostError as e:
# Some tests may power off nodes during test, in which
# case we will see connection errors that we should ignore.
@ -175,6 +174,14 @@ def ceph_log(ctx, config):
log.debug("Missed logrotate, EOFError")
except SSHException:
log.debug("Missed logrotate, SSHException")
except run.CommandFailedError as e:
for p in procs:
if p.finished and p.exitstatus != 0:
err = p.stderr.getvalue()
if 'error: error renaming temp state file' in err:
log.info('ignoring transient state error: %s', e)
else:
raise
except socket.error as e:
if e.errno in (errno.EHOSTUNREACH, errno.ECONNRESET):
log.debug("Missed logrotate, host unreachable")

View File

@ -1317,13 +1317,17 @@ def registries_add_mirror_to_docker_io(conf, mirror):
}
else:
v2 = config # type: ignore
dockers = [r for r in v2['registry'] if r['prefix'] == 'docker.io']
dockers = [
r for r in v2['registry'] if
r.get('prefix') == 'docker.io' or r.get('location') == 'docker.io'
]
if dockers:
docker = dockers[0]
docker['mirror'] = [{
"location": mirror,
"insecure": True,
}]
if 'mirror' not in docker:
docker['mirror'] = [{
"location": mirror,
"insecure": True,
}]
return v2

View File

@ -227,6 +227,9 @@ class CephFSTestCase(CephTestCase):
def _session_by_id(self, session_ls):
return dict([(s['id'], s) for s in session_ls])
def perf_dump(self, rank=None, status=None):
return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status)
def wait_until_evicted(self, client_id, timeout=30):
def is_client_evicted():
ls = self._session_list()

View File

@ -201,7 +201,7 @@ class CephFSMount(object):
return self.run_shell(["bash", "-c", Raw(f"'{payload}'")], **kwargs)
def run_shell(self, args, wait=True, stdin=None, check_status=True,
omit_sudo=True):
omit_sudo=True, timeout=10800):
if isinstance(args, str):
args = args.split()
@ -209,7 +209,8 @@ class CephFSMount(object):
return self.client_remote.run(args=args, stdout=StringIO(),
stderr=StringIO(), wait=wait,
stdin=stdin, check_status=check_status,
omit_sudo=omit_sudo)
omit_sudo=omit_sudo,
timeout=timeout)
def open_no_data(self, basename):
"""

View File

@ -156,6 +156,36 @@ class TestClientLimits(CephFSTestCase):
else:
raise RuntimeError("expected no client recall warning")
def test_cap_acquisition_throttle_readdir(self):
"""
Mostly readdir acquires caps faster than the mds recalls, so the cap
acquisition via readdir is throttled by retrying the readdir after
a fraction of second (0.5) by default when throttling condition is met.
"""
max_caps_per_client = 500
cap_acquisition_throttle = 250
self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client)
self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle)
# Create 1500 files split across 6 directories, 250 each.
for i in range(1, 7):
self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True)
mount_a_client_id = self.mount_a.get_global_id()
# recursive readdir
self.mount_a.run_shell_payload("find | wc")
# validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250
cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle)
# validate the throttle condition to be hit atleast once
cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle']
self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1)
def test_client_release_bug(self):
"""
When a client has a bug (which we will simulate) preventing it from releasing caps,

View File

@ -425,12 +425,14 @@ class TestNFS(MgrTestCase):
'''
self._test_create_cluster()
info_output = json.loads(self._nfs_cmd('cluster', 'info', self.cluster_id))
info_ip = info_output[self.cluster_id][0].pop("ip")
host_details = {self.cluster_id: [{
"hostname": self._sys_cmd(['hostname']).decode("utf-8").strip(),
"ip": list(set(self._sys_cmd(['hostname', '-I']).decode("utf-8").split())),
"port": 2049
}]}
host_ip = self._sys_cmd(['hostname', '-I']).decode("utf-8").split()
self.assertDictEqual(info_output, host_details)
self.assertTrue(any([ip in info_ip for ip in host_ip]))
self._test_delete_cluster()
def test_cluster_set_reset_user_config(self):

View File

@ -856,7 +856,7 @@ vc.disconnect()
volume_id = "volumeid"
# Create auth_id
out = self.fs.mon_manager.raw_cluster_cmd(
self.fs.mon_manager.raw_cluster_cmd(
"auth", "get-or-create", "client.guest1",
"mds", "allow *",
"osd", "allow rw",
@ -918,7 +918,7 @@ vc.disconnect()
volume_id = "volumeid"
# Create auth_id
out = self.fs.mon_manager.raw_cluster_cmd(
self.fs.mon_manager.raw_cluster_cmd(
"auth", "get-or-create", "client.guest1",
"mds", "allow *",
"osd", "allow rw",

File diff suppressed because it is too large Load Diff

View File

@ -96,18 +96,19 @@ class DashboardTestCase(MgrTestCase):
cls._ceph_cmd(set_roles_args)
@classmethod
def login(cls, username, password):
def login(cls, username, password, set_cookies=False):
if cls._loggedin:
cls.logout()
cls._post('/api/auth', {'username': username, 'password': password})
cls._post('/api/auth', {'username': username,
'password': password}, set_cookies=set_cookies)
cls._assertEq(cls._resp.status_code, 201)
cls._token = cls.jsonBody()['token']
cls._loggedin = True
@classmethod
def logout(cls):
def logout(cls, set_cookies=False):
if cls._loggedin:
cls._post('/api/auth/logout')
cls._post('/api/auth/logout', set_cookies=set_cookies)
cls._assertEq(cls._resp.status_code, 200)
cls._token = None
cls._loggedin = False
@ -195,29 +196,49 @@ class DashboardTestCase(MgrTestCase):
def tearDownClass(cls):
super(DashboardTestCase, cls).tearDownClass()
# pylint: disable=inconsistent-return-statements
# pylint: disable=inconsistent-return-statements, too-many-branches
@classmethod
def _request(cls, url, method, data=None, params=None):
def _request(cls, url, method, data=None, params=None, set_cookies=False):
url = "{}{}".format(cls._base_uri, url)
log.info("Request %s to %s", method, url)
headers = {}
cookies = {}
if cls._token:
headers['Authorization'] = "Bearer {}".format(cls._token)
if set_cookies:
cookies['token'] = cls._token
else:
headers['Authorization'] = "Bearer {}".format(cls._token)
if method == 'GET':
cls._resp = cls._session.get(url, params=params, verify=False,
headers=headers)
elif method == 'POST':
cls._resp = cls._session.post(url, json=data, params=params,
verify=False, headers=headers)
elif method == 'DELETE':
cls._resp = cls._session.delete(url, json=data, params=params,
verify=False, headers=headers)
elif method == 'PUT':
cls._resp = cls._session.put(url, json=data, params=params,
verify=False, headers=headers)
if set_cookies:
if method == 'GET':
cls._resp = cls._session.get(url, params=params, verify=False,
headers=headers, cookies=cookies)
elif method == 'POST':
cls._resp = cls._session.post(url, json=data, params=params,
verify=False, headers=headers, cookies=cookies)
elif method == 'DELETE':
cls._resp = cls._session.delete(url, json=data, params=params,
verify=False, headers=headers, cookies=cookies)
elif method == 'PUT':
cls._resp = cls._session.put(url, json=data, params=params,
verify=False, headers=headers, cookies=cookies)
else:
assert False
else:
assert False
if method == 'GET':
cls._resp = cls._session.get(url, params=params, verify=False,
headers=headers)
elif method == 'POST':
cls._resp = cls._session.post(url, json=data, params=params,
verify=False, headers=headers)
elif method == 'DELETE':
cls._resp = cls._session.delete(url, json=data, params=params,
verify=False, headers=headers)
elif method == 'PUT':
cls._resp = cls._session.put(url, json=data, params=params,
verify=False, headers=headers)
else:
assert False
try:
if not cls._resp.ok:
# Output response for easier debugging.
@ -231,8 +252,8 @@ class DashboardTestCase(MgrTestCase):
raise ex
@classmethod
def _get(cls, url, params=None):
return cls._request(url, 'GET', params=params)
def _get(cls, url, params=None, set_cookies=False):
return cls._request(url, 'GET', params=params, set_cookies=set_cookies)
@classmethod
def _view_cache_get(cls, url, retries=5):
@ -253,16 +274,16 @@ class DashboardTestCase(MgrTestCase):
return res
@classmethod
def _post(cls, url, data=None, params=None):
cls._request(url, 'POST', data, params)
def _post(cls, url, data=None, params=None, set_cookies=False):
cls._request(url, 'POST', data, params, set_cookies=set_cookies)
@classmethod
def _delete(cls, url, data=None, params=None):
cls._request(url, 'DELETE', data, params)
def _delete(cls, url, data=None, params=None, set_cookies=False):
cls._request(url, 'DELETE', data, params, set_cookies=set_cookies)
@classmethod
def _put(cls, url, data=None, params=None):
cls._request(url, 'PUT', data, params)
def _put(cls, url, data=None, params=None, set_cookies=False):
cls._request(url, 'PUT', data, params, set_cookies=set_cookies)
@classmethod
def _assertEq(cls, v1, v2):
@ -281,8 +302,8 @@ class DashboardTestCase(MgrTestCase):
# pylint: disable=too-many-arguments
@classmethod
def _task_request(cls, method, url, data, timeout):
res = cls._request(url, method, data)
def _task_request(cls, method, url, data, timeout, set_cookies=False):
res = cls._request(url, method, data, set_cookies=set_cookies)
cls._assertIn(cls._resp.status_code, [200, 201, 202, 204, 400, 403, 404])
if cls._resp.status_code == 403:
@ -334,16 +355,16 @@ class DashboardTestCase(MgrTestCase):
return res_task['exception']
@classmethod
def _task_post(cls, url, data=None, timeout=60):
return cls._task_request('POST', url, data, timeout)
def _task_post(cls, url, data=None, timeout=60, set_cookies=False):
return cls._task_request('POST', url, data, timeout, set_cookies=set_cookies)
@classmethod
def _task_delete(cls, url, timeout=60):
return cls._task_request('DELETE', url, None, timeout)
def _task_delete(cls, url, timeout=60, set_cookies=False):
return cls._task_request('DELETE', url, None, timeout, set_cookies=set_cookies)
@classmethod
def _task_put(cls, url, data=None, timeout=60):
return cls._task_request('PUT', url, data, timeout)
def _task_put(cls, url, data=None, timeout=60, set_cookies=False):
return cls._task_request('PUT', url, data, timeout, set_cookies=set_cookies)
@classmethod
def cookies(cls):

View File

@ -30,6 +30,7 @@ class AuthTest(DashboardTestCase):
self.assertIn('delete', perms)
def test_a_set_login_credentials(self):
# test with Authorization header
self.create_user('admin2', 'admin2', ['administrator'])
self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'})
self.assertStatus(201)
@ -37,7 +38,16 @@ class AuthTest(DashboardTestCase):
self._validate_jwt_token(data['token'], "admin2", data['permissions'])
self.delete_user('admin2')
# test with Cookies set
self.create_user('admin2', 'admin2', ['administrator'])
self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'}, set_cookies=True)
self.assertStatus(201)
data = self.jsonBody()
self._validate_jwt_token(data['token'], "admin2", data['permissions'])
self.delete_user('admin2')
def test_login_valid(self):
# test with Authorization header
self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
self.assertStatus(201)
data = self.jsonBody()
@ -51,7 +61,22 @@ class AuthTest(DashboardTestCase):
}, allow_unknown=False))
self._validate_jwt_token(data['token'], "admin", data['permissions'])
# test with Cookies set
self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
self.assertStatus(201)
data = self.jsonBody()
self.assertSchema(data, JObj(sub_elems={
'token': JLeaf(str),
'username': JLeaf(str),
'permissions': JObj(sub_elems={}, allow_unknown=True),
'sso': JLeaf(bool),
'pwdExpirationDate': JLeaf(int, none=True),
'pwdUpdateRequired': JLeaf(bool)
}, allow_unknown=False))
self._validate_jwt_token(data['token'], "admin", data['permissions'])
def test_login_invalid(self):
# test with Authorization header
self._post("/api/auth", {'username': 'admin', 'password': 'inval'})
self.assertStatus(400)
self.assertJsonBody({
@ -60,7 +85,17 @@ class AuthTest(DashboardTestCase):
"detail": "Invalid credentials"
})
# test with Cookies set
self._post("/api/auth", {'username': 'admin', 'password': 'inval'}, set_cookies=True)
self.assertStatus(400)
self.assertJsonBody({
"component": "auth",
"code": "invalid_credentials",
"detail": "Invalid credentials"
})
def test_login_without_password(self):
# test with Authorization header
self.create_user('admin2', '', ['administrator'])
self._post("/api/auth", {'username': 'admin2', 'password': ''})
self.assertStatus(400)
@ -71,7 +106,70 @@ class AuthTest(DashboardTestCase):
})
self.delete_user('admin2')
# test with Cookies set
self.create_user('admin2', '', ['administrator'])
self._post("/api/auth", {'username': 'admin2', 'password': ''}, set_cookies=True)
self.assertStatus(400)
self.assertJsonBody({
"component": "auth",
"code": "invalid_credentials",
"detail": "Invalid credentials"
})
self.delete_user('admin2')
def test_lockout_user(self):
# test with Authorization header
self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3'])
for _ in range(3):
self._post("/api/auth", {'username': 'admin', 'password': 'inval'})
self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
self.assertStatus(400)
self.assertJsonBody({
"component": "auth",
"code": "invalid_credentials",
"detail": "Invalid credentials"
})
self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin'])
self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
self.assertStatus(201)
data = self.jsonBody()
self.assertSchema(data, JObj(sub_elems={
'token': JLeaf(str),
'username': JLeaf(str),
'permissions': JObj(sub_elems={}, allow_unknown=True),
'sso': JLeaf(bool),
'pwdExpirationDate': JLeaf(int, none=True),
'pwdUpdateRequired': JLeaf(bool)
}, allow_unknown=False))
self._validate_jwt_token(data['token'], "admin", data['permissions'])
# test with Cookies set
self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3'])
for _ in range(3):
self._post("/api/auth", {'username': 'admin', 'password': 'inval'}, set_cookies=True)
self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
self.assertStatus(400)
self.assertJsonBody({
"component": "auth",
"code": "invalid_credentials",
"detail": "Invalid credentials"
})
self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin'])
self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
self.assertStatus(201)
data = self.jsonBody()
self.assertSchema(data, JObj(sub_elems={
'token': JLeaf(str),
'username': JLeaf(str),
'permissions': JObj(sub_elems={}, allow_unknown=True),
'sso': JLeaf(bool),
'pwdExpirationDate': JLeaf(int, none=True),
'pwdUpdateRequired': JLeaf(bool)
}, allow_unknown=False))
self._validate_jwt_token(data['token'], "admin", data['permissions'])
def test_logout(self):
# test with Authorization header
self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
self.assertStatus(201)
data = self.jsonBody()
@ -86,7 +184,23 @@ class AuthTest(DashboardTestCase):
self.assertStatus(401)
self.set_jwt_token(None)
# test with Cookies set
self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
self.assertStatus(201)
data = self.jsonBody()
self._validate_jwt_token(data['token'], "admin", data['permissions'])
self.set_jwt_token(data['token'])
self._post("/api/auth/logout", set_cookies=True)
self.assertStatus(200)
self.assertJsonBody({
"redirect_url": "#/login"
})
self._get("/api/host", set_cookies=True)
self.assertStatus(401)
self.set_jwt_token(None)
def test_token_ttl(self):
# test with Authorization header
self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
self.assertStatus(201)
@ -99,7 +213,21 @@ class AuthTest(DashboardTestCase):
self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
self.set_jwt_token(None)
# test with Cookies set
self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
self.assertStatus(201)
self.set_jwt_token(self.jsonBody()['token'])
self._get("/api/host", set_cookies=True)
self.assertStatus(200)
time.sleep(6)
self._get("/api/host", set_cookies=True)
self.assertStatus(401)
self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
self.set_jwt_token(None)
def test_remove_from_blacklist(self):
# test with Authorization header
self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
self.assertStatus(201)
@ -119,11 +247,37 @@ class AuthTest(DashboardTestCase):
self._post("/api/auth/logout")
self.assertStatus(200)
# test with Cookies set
self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
self.assertStatus(201)
self.set_jwt_token(self.jsonBody()['token'])
# the following call adds the token to the blocklist
self._post("/api/auth/logout", set_cookies=True)
self.assertStatus(200)
self._get("/api/host", set_cookies=True)
self.assertStatus(401)
time.sleep(6)
self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
self.set_jwt_token(None)
self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
self.assertStatus(201)
self.set_jwt_token(self.jsonBody()['token'])
# the following call removes expired tokens from the blocklist
self._post("/api/auth/logout", set_cookies=True)
self.assertStatus(200)
def test_unauthorized(self):
# test with Authorization header
self._get("/api/host")
self.assertStatus(401)
# test with Cookies set
self._get("/api/host", set_cookies=True)
self.assertStatus(401)
def test_invalidate_token_by_admin(self):
# test with Authorization header
self._get("/api/host")
self.assertStatus(401)
self.create_user('user', 'user', ['read-only'])
@ -147,7 +301,32 @@ class AuthTest(DashboardTestCase):
self.assertStatus(200)
self.delete_user("user")
# test with Cookies set
self._get("/api/host", set_cookies=True)
self.assertStatus(401)
self.create_user('user', 'user', ['read-only'])
time.sleep(1)
self._post("/api/auth", {'username': 'user', 'password': 'user'}, set_cookies=True)
self.assertStatus(201)
self.set_jwt_token(self.jsonBody()['token'])
self._get("/api/host", set_cookies=True)
self.assertStatus(200)
time.sleep(1)
self._ceph_cmd(['dashboard', 'ac-user-set-password', '--force-password',
'user', 'user2'])
time.sleep(1)
self._get("/api/host", set_cookies=True)
self.assertStatus(401)
self.set_jwt_token(None)
self._post("/api/auth", {'username': 'user', 'password': 'user2'}, set_cookies=True)
self.assertStatus(201)
self.set_jwt_token(self.jsonBody()['token'])
self._get("/api/host", set_cookies=True)
self.assertStatus(200)
self.delete_user("user")
def test_check_token(self):
# test with Authorization header
self.login("admin", "admin")
self._post("/api/auth/check", {"token": self.jsonBody()["token"]})
self.assertStatus(200)
@ -160,7 +339,21 @@ class AuthTest(DashboardTestCase):
}, allow_unknown=False))
self.logout()
# test with Cookies set
self.login("admin", "admin", set_cookies=True)
self._post("/api/auth/check", {"token": self.jsonBody()["token"]}, set_cookies=True)
self.assertStatus(200)
data = self.jsonBody()
self.assertSchema(data, JObj(sub_elems={
"username": JLeaf(str),
"permissions": JObj(sub_elems={}, allow_unknown=True),
"sso": JLeaf(bool),
"pwdUpdateRequired": JLeaf(bool)
}, allow_unknown=False))
self.logout(set_cookies=True)
def test_check_wo_token(self):
# test with Authorization header
self.login("admin", "admin")
self._post("/api/auth/check", {"token": ""})
self.assertStatus(200)
@ -169,3 +362,13 @@ class AuthTest(DashboardTestCase):
"login_url": JLeaf(str)
}, allow_unknown=False))
self.logout()
# test with Cookies set
self.login("admin", "admin", set_cookies=True)
self._post("/api/auth/check", {"token": ""}, set_cookies=True)
self.assertStatus(200)
data = self.jsonBody()
self.assertSchema(data, JObj(sub_elems={
"login_url": JLeaf(str)
}, allow_unknown=False))
self.logout(set_cookies=True)

View File

@ -237,36 +237,139 @@ class OsdTest(DashboardTestCase):
class OsdFlagsTest(DashboardTestCase):
def __init__(self, *args, **kwargs):
super(OsdFlagsTest, self).__init__(*args, **kwargs)
self._initial_flags = sorted( # These flags cannot be unset
['sortbitwise', 'recovery_deletes', 'purged_snapdirs',
'pglog_hardlimit'])
self._initial_flags = ['sortbitwise', 'recovery_deletes', 'purged_snapdirs',
'pglog_hardlimit'] # These flags cannot be unset
@classmethod
def _get_cluster_osd_flags(cls):
return sorted(
json.loads(cls._ceph_cmd(['osd', 'dump',
'--format=json']))['flags_set'])
def _put_flags(cls, flags, ids=None):
url = '/api/osd/flags'
data = {'flags': flags}
@classmethod
def _put_flags(cls, flags):
cls._put('/api/osd/flags', data={'flags': flags})
return sorted(cls._resp.json())
if ids:
url = url + '/individual'
data['ids'] = ids
cls._put(url, data=data)
return cls._resp.json()
def test_list_osd_flags(self):
flags = self._get('/api/osd/flags')
self.assertStatus(200)
self.assertEqual(len(flags), 4)
self.assertEqual(sorted(flags), self._initial_flags)
self.assertCountEqual(flags, self._initial_flags)
def test_add_osd_flag(self):
flags = self._put_flags([
'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout',
'pause', 'pglog_hardlimit'
])
self.assertEqual(flags, sorted([
self.assertCountEqual(flags, [
'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout',
'pause', 'pglog_hardlimit'
]))
])
# Restore flags
self._put_flags(self._initial_flags)
def test_get_indiv_flag(self):
initial = self._get('/api/osd/flags/individual')
self.assertStatus(200)
self.assertSchema(initial, JList(JObj({
'osd': int,
'flags': JList(str)
})))
self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
flags_added = self._get('/api/osd/flags/individual')
self.assertStatus(200)
for osd in flags_added:
if osd['osd'] in [0, 1, 2]:
self.assertIn('noout', osd['flags'])
self.assertIn('noin', osd['flags'])
for osd_initial in initial:
if osd['osd'] == osd_initial['osd']:
self.assertGreater(len(osd['flags']), len(osd_initial['flags']))
self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
flags_removed = self._get('/api/osd/flags/individual')
self.assertStatus(200)
for osd in flags_removed:
if osd['osd'] in [0, 1, 2]:
self.assertNotIn('noout', osd['flags'])
self.assertNotIn('noin', osd['flags'])
def test_add_indiv_flag(self):
flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': True}
svc_id = 0
resp = self._put_flags(flags_update, [svc_id])
self._check_indiv_flags_resp(resp, [svc_id], ['noout'], [], ['noup', 'nodown', 'noin'])
self._check_indiv_flags_osd([svc_id], ['noout'], ['noup', 'nodown', 'noin'])
self._ceph_cmd(['osd', 'unset-group', 'noout', 'osd.{}'.format(svc_id)])
def test_add_multiple_indiv_flags(self):
flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True}
svc_id = 0
resp = self._put_flags(flags_update, [svc_id])
self._check_indiv_flags_resp(resp, [svc_id], ['noout', 'noin'], [], ['noup', 'nodown'])
self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown'])
self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.{}'.format(svc_id)])
def test_add_multiple_indiv_flags_multiple_osds(self):
flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True}
svc_id = [0, 1, 2]
resp = self._put_flags(flags_update, svc_id)
self._check_indiv_flags_resp(resp, svc_id, ['noout', 'noin'], [], ['noup', 'nodown'])
self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown'])
self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
def test_remove_indiv_flag(self):
flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': False}
svc_id = 0
self._ceph_cmd(['osd', 'set-group', 'noout', 'osd.{}'.format(svc_id)])
resp = self._put_flags(flags_update, [svc_id])
self._check_indiv_flags_resp(resp, [svc_id], [], ['noout'], ['noup', 'nodown', 'noin'])
self._check_indiv_flags_osd([svc_id], [], ['noup', 'nodown', 'noin', 'noout'])
def test_remove_multiple_indiv_flags(self):
flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False}
svc_id = 0
self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.{}'.format(svc_id)])
resp = self._put_flags(flags_update, [svc_id])
self._check_indiv_flags_resp(resp, [svc_id], [], ['noout', 'noin'], ['noup', 'nodown'])
self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown'])
def test_remove_multiple_indiv_flags_multiple_osds(self):
flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False}
svc_id = [0, 1, 2]
self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2'])
resp = self._put_flags(flags_update, svc_id)
self._check_indiv_flags_resp(resp, svc_id, [], ['noout', 'noin'], ['noup', 'nodown'])
self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown'])
def _check_indiv_flags_resp(self, resp, ids, added, removed, ignored):
self.assertStatus(200)
self.assertCountEqual(resp['ids'], ids)
self.assertCountEqual(resp['added'], added)
self.assertCountEqual(resp['removed'], removed)
for flag in ignored:
self.assertNotIn(flag, resp['added'])
self.assertNotIn(flag, resp['removed'])
def _check_indiv_flags_osd(self, ids, activated_flags, deactivated_flags):
osds = json.loads(self._ceph_cmd(['osd', 'dump', '--format=json']))['osds']
for osd in osds:
if osd['osd'] in ids:
for flag in activated_flags:
self.assertIn(flag, osd['state'])
for flag in deactivated_flags:
self.assertNotIn(flag, osd['state'])

View File

@ -1,6 +1,8 @@
import json
import logging
from unittest import SkipTest
from teuthology import misc
from tasks.ceph_test_case import CephTestCase
@ -99,7 +101,7 @@ class MgrTestCase(CephTestCase):
assert cls.mgr_cluster is not None
if len(cls.mgr_cluster.mgr_ids) < cls.MGRS_REQUIRED:
cls.skipTest(
raise SkipTest(
"Only have {0} manager daemons, {1} are required".format(
len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED))

View File

@ -28,6 +28,16 @@ class TestDashboard(MgrTestCase):
"mgr/dashboard/standby_error_status_code",
"500")
def wait_until_webserver_available(self, url):
def _check_connection():
try:
requests.get(url, allow_redirects=False, verify=False)
return True
except requests.ConnectionError:
pass
return False
self.wait_until_true(_check_connection, timeout=30)
def test_standby(self):
original_active_id = self.mgr_cluster.get_active_id()
original_uri = self._get_uri("dashboard")
@ -48,6 +58,9 @@ class TestDashboard(MgrTestCase):
self.assertNotEqual(original_uri, failed_over_uri)
# Wait until web server of the standby node is settled.
self.wait_until_webserver_available(original_uri)
# The original active daemon should have come back up as a standby
# and be doing redirects to the new active daemon.
r = requests.get(original_uri, allow_redirects=False, verify=False)
@ -55,7 +68,7 @@ class TestDashboard(MgrTestCase):
self.assertEqual(r.headers['Location'], failed_over_uri)
# Ensure that every URL redirects to the active daemon.
r = requests.get("{}/runtime.js".format(original_uri),
r = requests.get("{}/runtime.js".format(original_uri.strip('/')),
allow_redirects=False,
verify=False)
self.assertEqual(r.status_code, 303)
@ -85,6 +98,9 @@ class TestDashboard(MgrTestCase):
self.assertNotEqual(original_uri, failed_over_uri)
# Wait until web server of the standby node is settled.
self.wait_until_webserver_available(original_uri)
# Redirection should be disabled now, instead a 500 must be returned.
r = requests.get(original_uri, allow_redirects=False, verify=False)
self.assertEqual(r.status_code, 500)

View File

@ -44,6 +44,88 @@ class TestProgress(MgrTestCase):
log.info(json.dumps(p, indent=2))
return p['events']
def _completed_events(self):
"""
This function returns all events that are completed
"""
p = self._get_progress()
log.info(json.dumps(p, indent=2))
return p['completed']
def is_osd_marked_out(self, ev):
return ev['message'].endswith('marked out')
def is_osd_marked_in(self, ev):
return ev['message'].endswith('marked in')
def _get_osd_in_out_events(self, marked='both'):
"""
Return the event that deals with OSDs being
marked in, out or both
"""
marked_in_events = []
marked_out_events = []
events_in_progress = self._events_in_progress()
for ev in events_in_progress:
if self.is_osd_marked_out(ev):
marked_out_events.append(ev)
elif self.is_osd_marked_in(ev):
marked_in_events.append(ev)
if marked == 'both':
return [marked_in_events] + [marked_out_events]
elif marked == 'in':
return marked_in_events
else:
return marked_out_events
def _osd_in_out_events_count(self, marked='both'):
"""
Return the event that deals with OSDs being
marked in, out or both
"""
marked_in_events = []
marked_out_events = []
events_in_progress = self._events_in_progress()
for ev in events_in_progress:
if self.is_osd_marked_out(ev):
marked_out_events.append(ev)
elif self.is_osd_marked_in(ev):
marked_in_events.append(ev)
if marked == 'both':
return [marked_in_events] + [marked_out_events]
elif marked == 'in':
return marked_in_events
else:
return marked_out_events
def _osd_in_out_events_count(self, marked='both'):
"""
Count the number of on going recovery events that deals with
OSDs being marked in, out or both.
"""
events_in_progress = self._events_in_progress()
marked_in_count = 0
marked_out_count = 0
for ev in events_in_progress:
if self.is_osd_marked_out(ev):
marked_out_count += 1
elif self.is_osd_marked_in(ev):
marked_in_count += 1
if marked == 'both':
return marked_in_count + marked_out_count
elif marked == 'in':
return marked_in_count
else:
return marked_out_count
def _setup_pool(self, size=None):
self.mgr_cluster.mon_manager.create_pool(self.POOL)
if size is not None:
@ -105,9 +187,10 @@ class TestProgress(MgrTestCase):
'osd', 'out', str(osd_id))
# Wait for a progress event to pop up
self.wait_until_equal(lambda: len(self._all_events()), 1,
timeout=self.EVENT_CREATION_PERIOD)
ev = self._all_events()[0]
self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
timeout=self.EVENT_CREATION_PERIOD*2,
period=1)
ev = self._get_osd_in_out_events('out')[0]
log.info(json.dumps(ev, indent=1))
self.assertIn("Rebalancing after osd.0 marked out", ev['message'])
@ -125,8 +208,9 @@ class TestProgress(MgrTestCase):
try:
# Wait for progress event marked in to pop up
self.wait_until_equal(lambda: len(self._events_in_progress()), 1,
timeout=self.EVENT_CREATION_PERIOD)
self.wait_until_equal(lambda: self._osd_in_out_events_count('in'), 1,
timeout=self.EVENT_CREATION_PERIOD*2,
period=1)
except RuntimeError as ex:
if not "Timed out after" in str(ex):
raise ex
@ -134,12 +218,17 @@ class TestProgress(MgrTestCase):
log.info("There was no PGs affected by osd being marked in")
return None
new_event = self._events_in_progress()[0]
log.info(json.dumps(new_event, indent=1))
self.assertIn("Rebalancing after osd.0 marked in", new_event['message'])
new_event = self._get_osd_in_out_events('in')[0]
return new_event
def _no_events_anywhere(self):
"""
Whether there are any live or completed events
"""
p = self._get_progress()
total_events = len(p['events']) + len(p['completed'])
return total_events == 0
def _is_quiet(self):
"""
Whether any progress events are live.
@ -261,4 +350,53 @@ class TestProgress(MgrTestCase):
# Check that no event is created
time.sleep(self.EVENT_CREATION_PERIOD)
self.assertEqual(len(self._all_events()), osd_count - pool_size)
self.assertEqual(
self._osd_in_out_completed_events_count('out'),
osd_count - pool_size)
def test_turn_off_module(self):
"""
When the the module is turned off, there should not
be any on going events or completed events.
Also module should not accept any kind of Remote Event
coming in from other module, however, once it is turned
back, on creating an event should be working as it is.
"""
pool_size = 3
self._setup_pool(size=pool_size)
self._write_some_data(self.WRITE_PERIOD)
self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "off")
self.mgr_cluster.mon_manager.raw_cluster_cmd(
'osd', 'out', '0')
time.sleep(self.EVENT_CREATION_PERIOD)
self.mgr_cluster.mon_manager.raw_cluster_cmd(
'osd', 'in', '0')
time.sleep(self.EVENT_CREATION_PERIOD)
self.assertTrue(self._no_events_anywhere())
self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "on")
self._write_some_data(self.WRITE_PERIOD)
self.mgr_cluster.mon_manager.raw_cluster_cmd(
'osd', 'out', '0')
# Wait for a progress event to pop up
self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
timeout=self.EVENT_CREATION_PERIOD*2,
period=1)
ev1 = self._get_osd_in_out_events('out')[0]
log.info(json.dumps(ev1, indent=1))
self.wait_until_true(lambda: self._is_complete(ev1['id']),
timeout=self.RECOVERY_PERIOD)
self.assertTrue(self._is_quiet())

View File

@ -464,6 +464,11 @@ def task(ctx, config):
assert out['usage']['rgw.main']['num_objects'] == 1
assert out['usage']['rgw.main']['size_kb'] > 0
# TESTCASE 'bucket-stats6', 'bucket', 'stats', 'non-existent bucket', 'fails, 'bucket not found error'
(ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : 'doesnotexist'})
assert ret == 404
assert out['Code'] == 'NoSuchBucket'
# reclaim it
key.delete()

View File

@ -1268,7 +1268,10 @@ class LocalContext(object):
self.daemons.daemons[prefixed_type][svc_id] = LocalDaemon(svc_type, svc_id)
def __del__(self):
shutil.rmtree(self.teuthology_config['test_path'])
test_path = self.teuthology_config['test_path']
# opt_create_cluster_only does not create the test path
if test_path:
shutil.rmtree(test_path)
def teardown_cluster():
log.info('\ntearing down the cluster...')

View File

@ -0,0 +1,63 @@
#!/bin/sh -x
expect_failure() {
if "$@"; then return 1; else return 0; fi
}
set -e
mkdir group
mkdir group/subvol1
setfattr -n ceph.dir.subvolume -v 1 group/subvol1
# rename subvolume
mv group/subvol1 group/subvol2
# move file out of the subvolume
touch group/subvol2/file1
expect_failure python3 -c "import os; os.rename('group/subvol2/file1', 'group/file1')"
# move file into the subvolume
touch group/file2
expect_failure python3 -c "import os; os.rename('group/file2', 'group/subvol2/file2')"
# create hardlink within subvolume
ln group/subvol2/file1 group/subvol2/file1_
# create hardlink out of subvolume
expect_failure ln group/subvol2/file1 group/file1_
expect_failure ln group/file2 group/subvol1/file2_
# create snapshot at subvolume root
mkdir group/subvol2/.snap/s1
# create snapshot at descendent dir of subvolume
mkdir group/subvol2/dir
expect_failure mkdir group/subvol2/dir/.snap/s2
mkdir group/subvol3
setfattr -n ceph.dir.subvolume -v 1 group/subvol3
# move file across subvolumes
expect_failure python3 -c "import os; os.rename('group/subvol2/file1', 'group/subvol3/file1')"
# create hardlink across subvolumes
expect_failure ln group/subvol2/file1 group/subvol3/file1
# create subvolume inside existing subvolume
expect_failure setfattr -n ceph.dir.subvolume -v 1 group/subvol2/dir
# clear subvolume flag
setfattr -n ceph.dir.subvolume -v 0 group/subvol2
mkdir group/subvol2/dir/.snap/s2
# parent subvolume override child subvolume
setfattr -n ceph.dir.subvolume -v 1 group/subvol2/dir
setfattr -n ceph.dir.subvolume -v 1 group/subvol2
expect_failure mkdir group/subvol2/dir/.snap/s3
rmdir group/subvol2/.snap/s1
rmdir group/subvol2/dir/.snap/s2
rm -rf group
echo OK

View File

@ -8,7 +8,7 @@ function assert_dm() {
local devno
devno=$(sudo dmsetup info -c --noheadings -o Major,Minor $name)
grep -q $val /sys/dev/block/$devno/bdi/stable_pages_required
grep -q $val /sys/dev/block/$devno/queue/stable_writes
}
function dmsetup_reload() {
@ -22,7 +22,7 @@ function dmsetup_reload() {
sudo dmsetup resume $name
}
IMAGE_NAME="stable-pages-required-test"
IMAGE_NAME="stable-writes-test"
rbd create --size 1 $IMAGE_NAME
DEV=$(sudo rbd map $IMAGE_NAME)
@ -31,11 +31,11 @@ fallocate -l 1M loopfile
LOOP_DEV=$(sudo losetup -f --show loopfile)
[[ $(blockdev --getsize64 $DEV) -eq 1048576 ]]
grep -q 1 /sys/block/${DEV#/dev/}/bdi/stable_pages_required
grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes
rbd resize --size 2 $IMAGE_NAME
[[ $(blockdev --getsize64 $DEV) -eq 2097152 ]]
grep -q 1 /sys/block/${DEV#/dev/}/bdi/stable_pages_required
grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes
cat <<EOF | sudo dmsetup create tbl
0 1024 linear $LOOP_DEV 0

View File

@ -261,7 +261,7 @@ peer_add()
peer_uuid=$(rbd mirror pool info --cluster ${cluster} --pool ${pool} --format xml | \
xmlstarlet sel -t -v "//peers/peer[site_name='${remote_cluster}']/uuid")
rbd --cluster ${cluster} --pool ${pool} mirror pool peer remove ${peer_uuid}
CEPH_ARGS='' rbd --cluster ${cluster} --pool ${pool} mirror pool peer remove ${peer_uuid}
else
test $error_code -eq 0
if [ -n "$uuid_var_name" ]; then

View File

@ -1,2 +1,2 @@
bdf3eebcd22d7d0b3dd4d5501bee5bac354d5b55
15.2.8
357616cbf726abb779ca75a551e8d02568e15b17
15.2.9

View File

@ -42,7 +42,7 @@ class Create(object):
Create an OSD by assigning an ID and FSID, registering them with the
cluster with an ID and FSID, formatting and mounting the volume, adding
all the metadata to the logical volumes using LVM tags, and starting
the OSD daemon. This is a convinience command that combines the prepare
the OSD daemon. This is a convenience command that combines the prepare
and activate steps.
Encryption is supported via dmcrypt and the --dmcrypt flag.

View File

@ -167,9 +167,8 @@ int main(int argc, const char **argv, const char *envp[]) {
}
{
g_ceph_context->_conf.finalize_reexpand_meta();
common_init_finish(g_ceph_context);
init_async_signal_handler();
register_async_signal_handler(SIGHUP, sighup_handler);

View File

@ -48,7 +48,6 @@ import os
import platform
import pwd
import random
import re
import select
import shutil
import socket
@ -59,6 +58,7 @@ import tempfile
import time
import errno
import struct
from enum import Enum
try:
from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO
except ImportError:
@ -93,7 +93,7 @@ if sys.version_info > (3, 0):
container_path = ''
cached_stdin = None
DATEFMT = '%Y-%m-%dT%H:%M:%S.%f'
DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
# Log and console output config
logging_config = {
@ -924,12 +924,22 @@ class FileLock(object):
##################################
# Popen wrappers, lifted from ceph-volume
def call(command, # type: List[str]
desc=None, # type: Optional[str]
verbose=False, # type: bool
verbose_on_failure=True, # type: bool
timeout=DEFAULT_TIMEOUT, # type: Optional[int]
**kwargs):
class CallVerbosity(Enum):
SILENT = 0
# log stdout/stderr to logger.debug
DEBUG = 1
# On a non-zero exit status, it will forcefully set
# logging ON for the terminal
VERBOSE_ON_FAILURE = 2
# log at info (instead of debug) level.
VERBOSE = 3
def call(command: List[str],
desc: Optional[str] = None,
verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
timeout: Optional[int] = DEFAULT_TIMEOUT,
**kwargs) -> Tuple[str, str, int]:
"""
Wrap subprocess.Popen to
@ -937,14 +947,12 @@ def call(command, # type: List[str]
- decode utf-8
- cleanly return out, err, returncode
If verbose=True, log at info (instead of debug) level.
:param verbose_on_failure: On a non-zero exit status, it will forcefully set
logging ON for the terminal
:param timeout: timeout in seconds
"""
if not desc:
if desc is None:
desc = command[0]
if desc:
desc += ': '
timeout = timeout or args.timeout
logger.debug("Running command: %s" % ' '.join(command))
@ -977,7 +985,7 @@ def call(command, # type: List[str]
if end_time and (time.time() >= end_time):
stop = True
if process.poll() is None:
logger.info(desc + ':timeout after %s seconds' % timeout)
logger.info(desc + 'timeout after %s seconds' % timeout)
process.kill()
if reads and process.poll() is not None:
# we want to stop, but first read off anything remaining
@ -1007,55 +1015,58 @@ def call(command, # type: List[str]
lines = message.split('\n')
out_buffer = lines.pop()
for line in lines:
if verbose:
logger.info(desc + ':stdout ' + line)
else:
logger.debug(desc + ':stdout ' + line)
if verbosity == CallVerbosity.VERBOSE:
logger.info(desc + 'stdout ' + line)
elif verbosity != CallVerbosity.SILENT:
logger.debug(desc + 'stdout ' + line)
elif fd == process.stderr.fileno():
err += message
message = err_buffer + message
lines = message.split('\n')
err_buffer = lines.pop()
for line in lines:
if verbose:
logger.info(desc + ':stderr ' + line)
else:
logger.debug(desc + ':stderr ' + line)
if verbosity == CallVerbosity.VERBOSE:
logger.info(desc + 'stderr ' + line)
elif verbosity != CallVerbosity.SILENT:
logger.debug(desc + 'stderr ' + line)
else:
assert False
except (IOError, OSError):
pass
if verbose:
logger.debug(desc + ':profile rt=%s, stop=%s, exit=%s, reads=%s'
if verbosity == CallVerbosity.VERBOSE:
logger.debug(desc + 'profile rt=%s, stop=%s, exit=%s, reads=%s'
% (time.time()-start_time, stop, process.poll(), reads))
returncode = process.wait()
if out_buffer != '':
if verbose:
logger.info(desc + ':stdout ' + out_buffer)
else:
logger.debug(desc + ':stdout ' + out_buffer)
if verbosity == CallVerbosity.VERBOSE:
logger.info(desc + 'stdout ' + out_buffer)
elif verbosity != CallVerbosity.SILENT:
logger.debug(desc + 'stdout ' + out_buffer)
if err_buffer != '':
if verbose:
logger.info(desc + ':stderr ' + err_buffer)
else:
logger.debug(desc + ':stderr ' + err_buffer)
if verbosity == CallVerbosity.VERBOSE:
logger.info(desc + 'stderr ' + err_buffer)
elif verbosity != CallVerbosity.SILENT:
logger.debug(desc + 'stderr ' + err_buffer)
if returncode != 0 and verbose_on_failure and not verbose:
if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE:
# dump stdout + stderr
logger.info('Non-zero exit code %d from %s' % (returncode, ' '.join(command)))
for line in out.splitlines():
logger.info(desc + ':stdout ' + line)
logger.info(desc + 'stdout ' + line)
for line in err.splitlines():
logger.info(desc + ':stderr ' + line)
logger.info(desc + 'stderr ' + line)
return out, err, returncode
def call_throws(command, **kwargs):
# type: (List[str], Any) -> Tuple[str, str, int]
out, err, ret = call(command, **kwargs)
def call_throws(command: List[str],
desc: Optional[str] = None,
verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
timeout: Optional[int] = DEFAULT_TIMEOUT,
**kwargs) -> Tuple[str, str, int]:
out, err, ret = call(command, desc, verbosity, timeout, **kwargs)
if ret:
raise RuntimeError('Failed command: %s' % ' '.join(command))
return out, err, ret
@ -1166,7 +1177,7 @@ def get_file_timestamp(fn):
return datetime.datetime.fromtimestamp(
mt, tz=datetime.timezone.utc
).strftime(DATEFMT)
except Exception as e:
except Exception:
return None
@ -1188,11 +1199,11 @@ def try_convert_datetime(s):
p = re.compile(r'(\.[\d]{6})[\d]*')
s = p.sub(r'\1', s)
# replace trailling Z with -0000, since (on python 3.6.8) it won't parse
# replace trailing Z with -0000, since (on python 3.6.8) it won't parse
if s and s[-1] == 'Z':
s = s[:-1] + '-0000'
# cut off the redundnat 'CST' part that strptime can't parse, if
# cut off the redundant 'CST' part that strptime can't parse, if
# present.
v = s.split(' ')
s = ' '.join(v[0:3])
@ -1409,13 +1420,16 @@ def get_last_local_ceph_image():
[container_path, 'images',
'--filter', 'label=ceph=True',
'--filter', 'dangling=false',
'--format', '{{.Repository}} {{.Tag}}'])
for line in out.splitlines():
if len(line.split()) == 2:
repository, tag = line.split()
r = '{}:{}'.format(repository, tag)
logger.info('Using recent ceph image %s' % r)
return r
'--format', '{{.Repository}}@{{.Digest}}'])
return _filter_last_local_ceph_image(out)
def _filter_last_local_ceph_image(out):
# str -> Optional[str]
for image in out.splitlines():
if image and not image.endswith('@'):
logger.info('Using recent ceph image %s' % image)
return image
return None
@ -1627,7 +1641,7 @@ def check_unit(unit_name):
installed = False
try:
out, err, code = call(['systemctl', 'is-enabled', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
if code == 0:
enabled = True
installed = True
@ -1641,7 +1655,7 @@ def check_unit(unit_name):
state = 'unknown'
try:
out, err, code = call(['systemctl', 'is-active', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
out = out.strip()
if out in ['active']:
state = 'running'
@ -2177,10 +2191,10 @@ def _write_container_cmd_to_bash(file_obj, container, comment=None, background=F
# unit file, makes it easier to read and grok.
file_obj.write('# ' + comment + '\n')
# Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
file_obj.write('! '+ ' '.join(container.rm_cmd()) + '\n')
file_obj.write('! '+ ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
# Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
if 'podman' in container_path:
file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + '\n')
file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + ' 2> /dev/null\n')
# container run command
file_obj.write(' '.join(container.run_cmd()) + (' &' if background else '') + '\n')
@ -2292,9 +2306,9 @@ def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
unit_name = get_unit_name(fsid, daemon_type, daemon_id)
call(['systemctl', 'stop', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
call(['systemctl', 'reset-failed', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
if enable:
call_throws(['systemctl', 'enable', unit_name])
if start:
@ -2339,7 +2353,7 @@ class Firewalld(object):
else:
return
out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbose_on_failure=False)
out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
if ret:
logger.info('Enabling firewalld service %s in current zone...' % svc)
out, err, ret = call([self.cmd, '--permanent', '--add-service', svc])
@ -2357,7 +2371,7 @@ class Firewalld(object):
for port in fw_ports:
tcp_port = str(port) + '/tcp'
out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbose_on_failure=False)
out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
if ret:
logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
out, err, ret = call([self.cmd, '--permanent', '--add-port', tcp_port])
@ -2367,6 +2381,7 @@ class Firewalld(object):
else:
logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbose_on_failure=False)
def apply_rules(self):
# type: () -> None
if not self.available:
@ -2485,7 +2500,6 @@ Before=ceph-{fsid}.target
LimitNOFILE=1048576
LimitNPROC=1048576
EnvironmentFile=-/etc/environment
ExecStartPre=-{container_path} rm ceph-{fsid}-%i
ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
ExecStop=-{container_path} stop ceph-{fsid}-%i
ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
@ -2792,7 +2806,14 @@ def command_bootstrap():
'--allow-overwrite to overwrite' % f)
dirname = os.path.dirname(f)
if dirname and not os.path.exists(dirname):
raise Error('%s directory %s does not exist' % (f, dirname))
fname = os.path.basename(f)
logger.info(f"Creating directory {dirname} for {fname}")
try:
# use makedirs to create intermediate missing dirs
os.makedirs(dirname, 0o755)
except PermissionError:
raise Error(f"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.")
if not args.skip_prepare_host:
command_prepare_host()
@ -3608,7 +3629,7 @@ def command_ceph_volume():
privileged=True,
volume_mounts=mounts,
)
out, err, code = call_throws(c.run_cmd(), verbose=True)
out, err, code = call_throws(c.run_cmd(), verbosity=CallVerbosity.VERBOSE)
if not code:
print(out)
@ -3626,7 +3647,10 @@ def command_unit():
call_throws([
'systemctl',
args.command,
unit_name])
unit_name],
verbosity=CallVerbosity.VERBOSE,
desc=''
)
##################################
@ -3813,7 +3837,7 @@ def list_daemons(detail=True, legacy_dir=None):
'--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field,
'ceph-%s-%s' % (fsid, j)
],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
if not code:
(container_id, image_name, image_id, start,
version) = out.strip().split(',')
@ -3975,7 +3999,7 @@ class AdoptOsd(object):
args=['lvm', 'list', '--format=json'],
privileged=True
)
out, err, code = call_throws(c.run_cmd(), verbose=False)
out, err, code = call_throws(c.run_cmd())
if not code:
try:
js = json.loads(out)
@ -4305,11 +4329,11 @@ def command_rm_daemon():
'this command may destroy precious data!')
call(['systemctl', 'stop', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
call(['systemctl', 'reset-failed', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
call(['systemctl', 'disable', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
data_dir = get_data_dir(args.fsid, daemon_type, daemon_id)
if daemon_type in ['mon', 'osd', 'prometheus'] and \
not args.force_delete_data:
@ -4344,25 +4368,25 @@ def command_rm_cluster():
continue
unit_name = get_unit_name(args.fsid, d['name'])
call(['systemctl', 'stop', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
call(['systemctl', 'reset-failed', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
call(['systemctl', 'disable', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
# cluster units
for unit_name in ['ceph-%s.target' % args.fsid]:
call(['systemctl', 'stop', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
call(['systemctl', 'reset-failed', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
call(['systemctl', 'disable', unit_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
slice_name = 'system-%s.slice' % (('ceph-%s' % args.fsid).replace('-',
'\\x2d'))
call(['systemctl', 'stop', slice_name],
verbose_on_failure=False)
verbosity=CallVerbosity.DEBUG)
# rm units
call_throws(['rm', '-f', args.unit_dir +
@ -4655,13 +4679,13 @@ class Apt(Packager):
def install(self, ls):
logger.info('Installing packages %s...' % ls)
call_throws(['apt', 'install', '-y'] + ls)
call_throws(['apt-get', 'install', '-y'] + ls)
def install_podman(self):
if self.distro == 'ubuntu':
logger.info('Setting up repo for podman...')
self.add_kubic_repo()
call_throws(['apt', 'update'])
call_throws(['apt-get', 'update'])
logger.info('Attempting podman install...')
try:
@ -5436,7 +5460,6 @@ class HostFacts():
up_secs, _ = raw_time.split()
return float(up_secs)
@property
def kernel_security(self):
# type: () -> Dict[str, str]
"""Determine the security features enabled in the kernel - SELinux, AppArmor"""
@ -5501,6 +5524,23 @@ class HostFacts():
"description": "Linux Security Module framework is not available"
}
@property
def kernel_parameters(self):
# type: () -> Dict[str, str]
"""Get kernel parameters required/used in Ceph clusters"""
k_param = {}
out, _, _ = call_throws(['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
if out:
param_list = out.split('\n')
param_dict = { param.split(" = ")[0]:param.split(" = ")[-1] for param in param_list}
# return only desired parameters
if 'net.ipv4.ip_nonlocal_bind' in param_dict:
k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
return k_param
def dump(self):
# type: () -> str
"""Return the attributes of this HostFacts object as json"""

View File

@ -278,6 +278,15 @@ default via fe80::2480:28ec:5097:3fe2 dev wlp2s0 proto ra metric 20600 pref medi
result = cd.dict_get_join({'a': 1}, 'a')
assert result == 1
def test_last_local_images(self):
out = '''
docker.io/ceph/daemon-base@
docker.io/ceph/ceph:v15.2.5
docker.io/ceph/daemon-base:octopus
'''
image = cd._filter_last_local_ceph_image(out)
assert image == 'docker.io/ceph/ceph:v15.2.5'
class TestCustomContainer(unittest.TestCase):
cc: cd.CustomContainer

View File

@ -124,6 +124,8 @@
#define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED)
using namespace TOPNSPC::common;
void client_flush_set_callback(void *p, ObjectCacher::ObjectSet *oset)
{
Client *client = static_cast<Client*>(p);
@ -150,9 +152,11 @@ int Client::CommandHook::call(
std::lock_guard l{m_client->client_lock};
if (command == "mds_requests")
m_client->dump_mds_requests(f);
else if (command == "mds_sessions")
m_client->dump_mds_sessions(f);
else if (command == "dump_cache")
else if (command == "mds_sessions") {
bool cap_dump = false;
cmd_getval(cmdmap, "cap_dump", cap_dump);
m_client->dump_mds_sessions(f, cap_dump);
} else if (command == "dump_cache")
m_client->dump_cache(f);
else if (command == "kick_stale_sessions")
m_client->_kick_stale_sessions();
@ -468,6 +472,7 @@ void Client::dump_status(Formatter *f)
f->dump_int("osd_epoch", osd_epoch);
f->dump_int("osd_epoch_barrier", cap_epoch_barrier);
f->dump_bool("blacklisted", blacklisted);
f->dump_string("fs_name", mdsmap->get_fs_name());
}
}
@ -519,7 +524,8 @@ void Client::_finish_init()
lderr(cct) << "error registering admin socket command: "
<< cpp_strerror(-ret) << dendl;
}
ret = admin_socket->register_command("mds_sessions",
ret = admin_socket->register_command("mds_sessions "
"name=cap_dump,type=CephBool,req=false",
&m_command_hook,
"show mds session state");
if (ret < 0) {
@ -1534,7 +1540,7 @@ void Client::connect_mds_targets(mds_rank_t mds)
}
}
void Client::dump_mds_sessions(Formatter *f)
void Client::dump_mds_sessions(Formatter *f, bool cap_dump)
{
f->dump_int("id", get_nodeid().v);
entity_inst_t inst(messenger->get_myname(), messenger->get_myaddr_legacy());
@ -1544,7 +1550,7 @@ void Client::dump_mds_sessions(Formatter *f)
f->open_array_section("sessions");
for (const auto &p : mds_sessions) {
f->open_object_section("session");
p.second.dump(f);
p.second.dump(f, cap_dump);
f->close_section();
}
f->close_section();
@ -1912,6 +1918,7 @@ void Client::encode_dentry_release(Dentry *dn, MetaRequest *req,
rel.item.dname_len = dn->name.length();
rel.item.dname_seq = dn->lease_seq;
rel.dname = dn->name;
dn->lease_mds = -1;
}
ldout(cct, 25) << __func__ << " exit(dn:"
<< dn << ")" << dendl;
@ -4211,7 +4218,7 @@ void Client::remove_session_caps(MetaSession *s, int err)
int Client::_do_remount(bool retry_on_error)
{
uint64_t max_retries = g_conf().get_val<uint64_t>("mds_max_retries_on_remount_failure");
uint64_t max_retries = cct->_conf.get_val<uint64_t>("mds_max_retries_on_remount_failure");
errno = 0;
int r = remount_cb(callback_handle);
@ -7345,7 +7352,7 @@ unsigned Client::statx_to_mask(unsigned int flags, unsigned int want)
mask |= CEPH_CAP_AUTH_SHARED;
if (want & (CEPH_STATX_NLINK|CEPH_STATX_CTIME|CEPH_STATX_VERSION))
mask |= CEPH_CAP_LINK_SHARED;
if (want & (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION))
if (want & (CEPH_STATX_NLINK|CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION))
mask |= CEPH_CAP_FILE_SHARED;
if (want & (CEPH_STATX_VERSION|CEPH_STATX_CTIME))
mask |= CEPH_CAP_XATTR_SHARED;
@ -8130,6 +8137,7 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p,
string dn_name;
while (true) {
int mask = caps;
if (!dirp->inode->is_complete_and_ordered())
return -EAGAIN;
if (pd == dir->readdir_cache.end())
@ -8147,7 +8155,10 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p,
}
int idx = pd - dir->readdir_cache.begin();
int r = _getattr(dn->inode, caps, dirp->perms);
if (dn->inode->is_dir()) {
mask |= CEPH_STAT_RSTAT;
}
int r = _getattr(dn->inode, mask, dirp->perms);
if (r < 0)
return r;
@ -8231,7 +8242,7 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p,
uint64_t next_off = 1;
int r;
r = _getattr(diri, caps, dirp->perms);
r = _getattr(diri, caps | CEPH_STAT_RSTAT, dirp->perms);
if (r < 0)
return r;
@ -8264,7 +8275,7 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p,
in = diri->get_first_parent()->dir->parent_inode;
int r;
r = _getattr(in, caps, dirp->perms);
r = _getattr(in, caps | CEPH_STAT_RSTAT, dirp->perms);
if (r < 0)
return r;
@ -8330,7 +8341,11 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p,
int r;
if (check_caps) {
r = _getattr(entry.inode, caps, dirp->perms);
int mask = caps;
if(entry.inode->is_dir()){
mask |= CEPH_STAT_RSTAT;
}
r = _getattr(entry.inode, mask, dirp->perms);
if (r < 0)
return r;
}
@ -9224,7 +9239,7 @@ int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl)
int want, have = 0;
bool movepos = false;
std::unique_ptr<C_SaferCond> onuninline;
int64_t r = 0;
int64_t rc = 0;
const auto& conf = cct->_conf;
Inode *in = f->inode.get();
utime_t lat;
@ -9242,8 +9257,9 @@ int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl)
loff_t start_pos = offset;
if (in->inline_version == 0) {
r = _getattr(in, CEPH_STAT_CAP_INLINE_DATA, f->actor_perms, true);
auto r = _getattr(in, CEPH_STAT_CAP_INLINE_DATA, f->actor_perms, true);
if (r < 0) {
rc = r;
goto done;
}
ceph_assert(in->inline_version > 0);
@ -9254,9 +9270,12 @@ retry:
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
r = get_caps(f, CEPH_CAP_FILE_RD, want, &have, -1);
if (r < 0) {
goto done;
{
auto r = get_caps(f, CEPH_CAP_FILE_RD, want, &have, -1);
if (r < 0) {
rc = r;
goto done;
}
}
if (f->flags & O_DIRECT)
have &= ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO);
@ -9278,12 +9297,12 @@ retry:
bl->substr_of(in->inline_data, offset, len - offset);
bl->append_zero(endoff - len);
}
r = endoff - offset;
rc = endoff - offset;
} else if ((uint64_t)offset < endoff) {
bl->append_zero(endoff - offset);
r = endoff - offset;
rc = endoff - offset;
} else {
r = 0;
rc = 0;
}
goto success;
}
@ -9296,27 +9315,31 @@ retry:
if (f->flags & O_RSYNC) {
_flush_range(in, offset, size);
}
r = _read_async(f, offset, size, bl);
if (r < 0)
rc = _read_async(f, offset, size, bl);
if (rc < 0)
goto done;
} else {
if (f->flags & O_DIRECT)
_flush_range(in, offset, size);
bool checkeof = false;
r = _read_sync(f, offset, size, bl, &checkeof);
if (r < 0)
rc = _read_sync(f, offset, size, bl, &checkeof);
if (rc < 0)
goto done;
if (checkeof) {
offset += r;
size -= r;
offset += rc;
size -= rc;
put_cap_ref(in, CEPH_CAP_FILE_RD);
have = 0;
// reverify size
r = _getattr(in, CEPH_STAT_CAP_SIZE, f->actor_perms);
if (r < 0)
goto done;
{
auto r = _getattr(in, CEPH_STAT_CAP_SIZE, f->actor_perms);
if (r < 0) {
rc = r;
goto done;
}
}
// eof? short read.
if ((uint64_t)offset < in->size)
@ -9325,10 +9348,10 @@ retry:
}
success:
ceph_assert(r >= 0);
ceph_assert(rc >= 0);
if (movepos) {
// adjust fd pos
f->pos = start_pos + r;
f->pos = start_pos + rc;
}
lat = ceph_clock_now();
@ -9348,7 +9371,7 @@ done:
in->mark_caps_dirty(CEPH_CAP_FILE_WR);
check_caps(in, 0);
} else
r = ret;
rc = ret;
}
if (have) {
put_cap_ref(in, CEPH_CAP_FILE_RD);
@ -9356,7 +9379,7 @@ done:
if (movepos) {
unlock_fh_pos(f);
}
return r;
return rc;
}
Client::C_Readahead::C_Readahead(Client *c, Fh *f) :
@ -9874,6 +9897,8 @@ int Client::ftruncate(int fd, loff_t length, const UserPerm& perms)
if (f->flags & O_PATH)
return -EBADF;
#endif
if ((f->mode & CEPH_FILE_MODE_WR) == 0)
return -EBADF;
struct stat attr;
attr.st_size = length;
return _setattr(f->inode, &attr, CEPH_SETATTR_SIZE, perms);
@ -11463,6 +11488,9 @@ int Client::_getxattr(Inode *in, const char *name, void *value, size_t size,
if (vxattr->flags & VXATTR_RSTAT) {
flags |= CEPH_STAT_RSTAT;
}
if (vxattr->flags & VXATTR_DIRSTAT) {
flags |= CEPH_CAP_FILE_SHARED;
}
r = _getattr(in, flags, perms, true);
if (r != 0) {
// Error from getattr!
@ -11998,18 +12026,21 @@ size_t Client::_vxattrcb_snap_btime(Inode *in, char *val, size_t size)
(long unsigned)in->snap_btime.nsec());
}
size_t Client::_vxattrcb_cluster_fsid(Inode *in, char *val, size_t size)
{
return snprintf(val, size, "%s", monclient->get_fsid().to_string().c_str());
}
size_t Client::_vxattrcb_client_id(Inode *in, char *val, size_t size)
{
auto name = messenger->get_myname();
return snprintf(val, size, "%s%ld", name.type_str(), name.num());
}
#define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name
#define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2
#define XATTR_NAME_CEPH(_type, _name) \
{ \
name: CEPH_XATTR_NAME(_type, _name), \
getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
readonly: true, \
exists_cb: NULL, \
flags: 0, \
}
#define XATTR_NAME_CEPH2(_type, _name, _flags) \
#define XATTR_NAME_CEPH(_type, _name, _flags) \
{ \
name: CEPH_XATTR_NAME(_type, _name), \
getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
@ -12047,14 +12078,14 @@ const Client::VXattr Client::_dir_vxattrs[] = {
XATTR_LAYOUT_FIELD(dir, layout, object_size),
XATTR_LAYOUT_FIELD(dir, layout, pool),
XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
XATTR_NAME_CEPH(dir, entries),
XATTR_NAME_CEPH(dir, files),
XATTR_NAME_CEPH(dir, subdirs),
XATTR_NAME_CEPH2(dir, rentries, VXATTR_RSTAT),
XATTR_NAME_CEPH2(dir, rfiles, VXATTR_RSTAT),
XATTR_NAME_CEPH2(dir, rsubdirs, VXATTR_RSTAT),
XATTR_NAME_CEPH2(dir, rbytes, VXATTR_RSTAT),
XATTR_NAME_CEPH2(dir, rctime, VXATTR_RSTAT),
XATTR_NAME_CEPH(dir, entries, VXATTR_DIRSTAT),
XATTR_NAME_CEPH(dir, files, VXATTR_DIRSTAT),
XATTR_NAME_CEPH(dir, subdirs, VXATTR_DIRSTAT),
XATTR_NAME_CEPH(dir, rentries, VXATTR_RSTAT),
XATTR_NAME_CEPH(dir, rfiles, VXATTR_RSTAT),
XATTR_NAME_CEPH(dir, rsubdirs, VXATTR_RSTAT),
XATTR_NAME_CEPH(dir, rbytes, VXATTR_RSTAT),
XATTR_NAME_CEPH(dir, rctime, VXATTR_RSTAT),
{
name: "ceph.quota",
getxattr_cb: &Client::_vxattrcb_quota,
@ -12104,6 +12135,24 @@ const Client::VXattr Client::_file_vxattrs[] = {
{ name: "" } /* Required table terminator */
};
const Client::VXattr Client::_common_vxattrs[] = {
{
name: "ceph.cluster_fsid",
getxattr_cb: &Client::_vxattrcb_cluster_fsid,
readonly: true,
exists_cb: nullptr,
flags: 0,
},
{
name: "ceph.client_id",
getxattr_cb: &Client::_vxattrcb_client_id,
readonly: true,
exists_cb: nullptr,
flags: 0,
},
{ name: "" } /* Required table terminator */
};
const Client::VXattr *Client::_get_vxattrs(Inode *in)
{
if (in->is_dir())
@ -12124,7 +12173,16 @@ const Client::VXattr *Client::_match_vxattr(Inode *in, const char *name)
vxattr++;
}
}
// for common vxattrs
vxattr = _common_vxattrs;
while (!vxattr->name.empty()) {
if (vxattr->name == name)
return vxattr;
vxattr++;
}
}
return NULL;
}

View File

@ -778,7 +778,7 @@ protected:
void _sync_write_commit(Inode *in);
void dump_mds_requests(Formatter *f);
void dump_mds_sessions(Formatter *f);
void dump_mds_sessions(Formatter *f, bool cap_dump=false);
int make_request(MetaRequest *req, const UserPerm& perms,
InodeRef *ptarget = 0, bool *pcreated = 0,
@ -1007,9 +1007,11 @@ private:
/* Flags for VXattr */
static const unsigned VXATTR_RSTAT = 0x1;
static const unsigned VXATTR_DIRSTAT = 0x2;
static const VXattr _dir_vxattrs[];
static const VXattr _file_vxattrs[];
static const VXattr _common_vxattrs[];
@ -1159,6 +1161,9 @@ private:
bool _vxattrcb_snap_btime_exists(Inode *in);
size_t _vxattrcb_snap_btime(Inode *in, char *val, size_t size);
size_t _vxattrcb_cluster_fsid(Inode *in, char *val, size_t size);
size_t _vxattrcb_client_id(Inode *in, char *val, size_t size);
static const VXattr *_get_vxattrs(Inode *in);
static const VXattr *_match_vxattr(Inode *in, const char *name);

View File

@ -5,6 +5,7 @@
#include "messages/MClientCapRelease.h"
#include "MetaSession.h"
#include "Inode.h"
#include "common/Formatter.h"
@ -21,7 +22,7 @@ const char *MetaSession::get_state_name() const
}
}
void MetaSession::dump(Formatter *f) const
void MetaSession::dump(Formatter *f, bool cap_dump) const
{
f->dump_int("mds", mds_num);
f->dump_object("addrs", addrs);
@ -31,6 +32,13 @@ void MetaSession::dump(Formatter *f) const
f->dump_stream("last_cap_renew_request") << last_cap_renew_request;
f->dump_unsigned("cap_renew_seq", cap_renew_seq);
f->dump_int("num_caps", caps.size());
if (cap_dump) {
f->open_array_section("caps");
for (const auto& cap : caps) {
f->dump_object("cap", *cap);
}
f->close_section();
}
f->dump_string("state", get_state_name());
}

View File

@ -66,7 +66,7 @@ struct MetaSession {
const char *get_state_name() const;
void dump(Formatter *f) const;
void dump(Formatter *f, bool cap_dump=false) const;
void enqueue_cap_release(inodeno_t ino, uint64_t cap_id, ceph_seq_t iseq,
ceph_seq_t mseq, epoch_t osd_barrier);

Some files were not shown because too many files have changed in this diff Show More