import ceph pacific 16.2.5

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Thomas Lamprecht 2021-07-12 14:38:07 +02:00
parent a912ff2c95
commit b3b6e05ebb
861 changed files with 29320 additions and 15257 deletions

View File

@ -1,3 +1,6 @@
api-change:
- src/pybind/mgr/dashboard/openapi.yaml
build/ops: build/ops:
- "**/CMakeLists.txt" - "**/CMakeLists.txt"
- admin/** - admin/**

View File

@ -52,6 +52,7 @@ if(MINGW)
set(CMAKE_CXX_LINK_EXECUTABLE set(CMAKE_CXX_LINK_EXECUTABLE
"<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> ${CMAKE_GNULD_IMAGE_VERSION} <LINK_LIBRARIES>") "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> ${CMAKE_GNULD_IMAGE_VERSION} <LINK_LIBRARIES>")
link_directories(${MINGW_LINK_DIRECTORIES})
endif() endif()
option(WITH_CCACHE "Build with ccache.") option(WITH_CCACHE "Build with ccache.")
@ -716,4 +717,4 @@ add_tags(ctags
EXCLUDES "*.js" "*.css" ".tox" "python-common/build") EXCLUDES "*.js" "*.css" ".tox" "python-common/build")
add_custom_target(tags DEPENDS ctags) add_custom_target(tags DEPENDS ctags)
set(VERSION 16.2.4) set(VERSION 16.2.5)

View File

@ -11,6 +11,43 @@
>=16.0.0 >=16.0.0
-------- --------
* `ceph-mgr-modules-core` debian package does not recommend `ceph-mgr-rook`
anymore. As the latter depends on `python3-numpy` which cannot be imported in
different Python sub-interpreters multi-times if the version of
`python3-numpy` is older than 1.19. Since `apt-get` installs the `Recommends`
packages by default, `ceph-mgr-rook` was always installed along with
`ceph-mgr` debian package as an indirect dependency. If your workflow depends
on this behavior, you might want to install `ceph-mgr-rook` separately.
* mgr/nfs: ``nfs`` module is moved out of volumes plugin. Prior using the
``ceph nfs`` commands, ``nfs`` mgr module must be enabled.
* volumes/nfs: The ``cephfs`` cluster type has been removed from the
``nfs cluster create`` subcommand. Clusters deployed by cephadm can
support an NFS export of both ``rgw`` and ``cephfs`` from a single
NFS cluster instance.
* The ``nfs cluster update`` command has been removed. You can modify
the placement of an existing NFS service (and/or its associated
ingress service) using ``orch ls --export`` and ``orch apply -i
...``.
* The ``orch apply nfs`` command no longer requires a pool or
namespace argument. We strongly encourage users to use the defaults
so that the ``nfs cluster ls`` and related commands will work
properly.
* The ``nfs cluster delete`` and ``nfs export delete`` commands are
deprecated and will be removed in a future release. Please use
``nfs cluster rm`` and ``nfs export rm`` instead.
* mgr-pg_autoscaler: Autoscaler will now start out by scaling each
pool to have a full complements of pgs from the start and will only
decrease it when other pools need more pgs due to increased usage.
This improves out of the box performance of Ceph by allowing more PGs
to be created for a given pool.
* CephFS: Disabling allow_standby_replay on a file system will also stop all * CephFS: Disabling allow_standby_replay on a file system will also stop all
standby-replay daemons for that file system. standby-replay daemons for that file system.

View File

@ -122,7 +122,7 @@
# main package definition # main package definition
################################################################################# #################################################################################
Name: ceph Name: ceph
Version: 16.2.4 Version: 16.2.5
Release: 0%{?dist} Release: 0%{?dist}
%if 0%{?fedora} || 0%{?rhel} %if 0%{?fedora} || 0%{?rhel}
Epoch: 2 Epoch: 2
@ -138,7 +138,7 @@ License: LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-
Group: System/Filesystems Group: System/Filesystems
%endif %endif
URL: http://ceph.com/ URL: http://ceph.com/
Source0: %{?_remote_tarball_prefix}ceph-16.2.4.tar.bz2 Source0: %{?_remote_tarball_prefix}ceph-16.2.5.tar.bz2
%if 0%{?suse_version} %if 0%{?suse_version}
# _insert_obs_source_lines_here # _insert_obs_source_lines_here
ExclusiveArch: x86_64 aarch64 ppc64le s390x ExclusiveArch: x86_64 aarch64 ppc64le s390x
@ -1205,7 +1205,7 @@ This package provides Ceph default alerts for Prometheus.
# common # common
################################################################################# #################################################################################
%prep %prep
%autosetup -p1 -n ceph-16.2.4 %autosetup -p1 -n ceph-16.2.5
%build %build
# LTO can be enabled as soon as the following GCC bug is fixed: # LTO can be enabled as soon as the following GCC bug is fixed:
@ -1767,6 +1767,7 @@ fi
%{_datadir}/ceph/mgr/localpool %{_datadir}/ceph/mgr/localpool
%{_datadir}/ceph/mgr/mds_autoscaler %{_datadir}/ceph/mgr/mds_autoscaler
%{_datadir}/ceph/mgr/mirroring %{_datadir}/ceph/mgr/mirroring
%{_datadir}/ceph/mgr/nfs
%{_datadir}/ceph/mgr/orchestrator %{_datadir}/ceph/mgr/orchestrator
%{_datadir}/ceph/mgr/osd_perf_query %{_datadir}/ceph/mgr/osd_perf_query
%{_datadir}/ceph/mgr/osd_support %{_datadir}/ceph/mgr/osd_support

View File

@ -1767,6 +1767,7 @@ fi
%{_datadir}/ceph/mgr/localpool %{_datadir}/ceph/mgr/localpool
%{_datadir}/ceph/mgr/mds_autoscaler %{_datadir}/ceph/mgr/mds_autoscaler
%{_datadir}/ceph/mgr/mirroring %{_datadir}/ceph/mgr/mirroring
%{_datadir}/ceph/mgr/nfs
%{_datadir}/ceph/mgr/orchestrator %{_datadir}/ceph/mgr/orchestrator
%{_datadir}/ceph/mgr/osd_perf_query %{_datadir}/ceph/mgr/osd_perf_query
%{_datadir}/ceph/mgr/osd_support %{_datadir}/ceph/mgr/osd_support

View File

@ -1,7 +1,13 @@
ceph (16.2.4-1focal) focal; urgency=medium ceph (16.2.5-1focal) focal; urgency=medium
-- Jenkins Build Slave User <jenkins-build@braggi18.front.sepia.ceph.com> Thu, 13 May 2021 17:30:29 +0000 -- Jenkins Build Slave User <jenkins-build@braggi17.front.sepia.ceph.com> Thu, 08 Jul 2021 14:16:59 +0000
ceph (16.2.5-1) stable; urgency=medium
* New upstream release
-- Ceph Release Team <ceph-maintainers@ceph.com> Thu, 08 Jul 2021 14:03:54 +0000
ceph (16.2.4-1) stable; urgency=medium ceph (16.2.4-1) stable; urgency=medium

View File

@ -7,6 +7,7 @@ usr/share/ceph/mgr/insights
usr/share/ceph/mgr/iostat usr/share/ceph/mgr/iostat
usr/share/ceph/mgr/localpool usr/share/ceph/mgr/localpool
usr/share/ceph/mgr/mirroring usr/share/ceph/mgr/mirroring
usr/share/ceph/mgr/nfs
usr/share/ceph/mgr/orchestrator usr/share/ceph/mgr/orchestrator
usr/share/ceph/mgr/osd_perf_query usr/share/ceph/mgr/osd_perf_query
usr/share/ceph/mgr/osd_support usr/share/ceph/mgr/osd_support

View File

@ -226,7 +226,6 @@ Package: ceph-mgr
Architecture: linux-any Architecture: linux-any
Depends: ceph-base (= ${binary:Version}), Depends: ceph-base (= ${binary:Version}),
ceph-mgr-modules-core (= ${binary:Version}), ceph-mgr-modules-core (= ${binary:Version}),
libsqlite3-mod-ceph,
python3-bcrypt, python3-bcrypt,
python3-cherrypy3, python3-cherrypy3,
python3-distutils, python3-distutils,
@ -302,7 +301,6 @@ Depends: ${misc:Depends},
python3-openssl, python3-openssl,
Replaces: ceph-mgr (<< 15.1.0) Replaces: ceph-mgr (<< 15.1.0)
Breaks: ceph-mgr (<< 15.1.0) Breaks: ceph-mgr (<< 15.1.0)
Recommends: ceph-mgr-rook
Description: ceph manager modules which are always enabled Description: ceph manager modules which are always enabled
Ceph is a massively scalable, open-source, distributed Ceph is a massively scalable, open-source, distributed
storage system that runs on commodity hardware and delivers object, storage system that runs on commodity hardware and delivers object,

View File

@ -0,0 +1,37 @@
libcephsqlite.so libsqlite3-mod-ceph #MINVER#
_ZGVN18SimpleRADOSStriper7biglockB5cxx11E@Base 15.2.0-1
_ZGVN18SimpleRADOSStriper8lockdescB5cxx11E@Base 15.2.0-1
_ZN18SimpleRADOSStriper10XATTR_EXCLE@Base 15.2.0-1
_ZN18SimpleRADOSStriper10XATTR_SIZEE@Base 15.2.0-1
_ZN18SimpleRADOSStriper12recover_lockEv@Base 15.2.0-1
_ZN18SimpleRADOSStriper12set_metadataEmb@Base 15.2.0-1
_ZN18SimpleRADOSStriper12shrink_allocEm@Base 15.2.0-1
_ZN18SimpleRADOSStriper13XATTR_VERSIONE@Base 15.2.0-1
_ZN18SimpleRADOSStriper13config_loggerEPN4ceph6common11CephContextESt17basic_string_viewIcSt11char_traitsIcEEPSt10shared_ptrINS1_12PerfCountersEE@Base 15.2.0-1
_ZN18SimpleRADOSStriper13print_lockersERSo@Base 15.2.0-1
_ZN18SimpleRADOSStriper13wait_for_aiosEb@Base 15.2.0-1
_ZN18SimpleRADOSStriper15XATTR_ALLOCATEDE@Base 15.2.0-1
_ZN18SimpleRADOSStriper16lock_keeper_mainEv@Base 15.2.0-1
_ZN18SimpleRADOSStriper18maybe_shrink_allocEv@Base 15.2.0-1
_ZN18SimpleRADOSStriper24XATTR_LAYOUT_OBJECT_SIZEE@Base 15.2.0-1
_ZN18SimpleRADOSStriper24XATTR_LAYOUT_STRIPE_UNITE@Base 15.2.0-1
_ZN18SimpleRADOSStriper25XATTR_LAYOUT_STRIPE_COUNTE@Base 15.2.0-1
_ZN18SimpleRADOSStriper4lockEm@Base 15.2.0-1
_ZN18SimpleRADOSStriper4openEv@Base 15.2.0-1
_ZN18SimpleRADOSStriper4readEPvmm@Base 15.2.0-1
_ZN18SimpleRADOSStriper4statEPm@Base 15.2.0-1
_ZN18SimpleRADOSStriper5flushEv@Base 15.2.0-1
_ZN18SimpleRADOSStriper5writeEPKvmm@Base 15.2.0-1
_ZN18SimpleRADOSStriper6createEv@Base 15.2.0-1
_ZN18SimpleRADOSStriper6removeEv@Base 15.2.0-1
_ZN18SimpleRADOSStriper6str2blESt17basic_string_viewIcSt11char_traitsIcEE@Base 15.2.0-1
_ZN18SimpleRADOSStriper6unlockEv@Base 15.2.0-1
_ZN18SimpleRADOSStriper7biglockB5cxx11E@Base 15.2.0-1
_ZN18SimpleRADOSStriper7uint2blEm@Base 15.2.0-1
_ZN18SimpleRADOSStriper8lockdescB5cxx11E@Base 15.2.0-1
_ZN18SimpleRADOSStriper8truncateEm@Base 15.2.0-1
_ZN18SimpleRADOSStriperD1Ev@Base 15.2.0-1
_ZN18SimpleRADOSStriperD2Ev@Base 15.2.0-1
_ZNK18SimpleRADOSStriper15get_next_extentEmm@Base 15.2.0-1
cephsqlite_setcct@Base 15.2.0-1
sqlite3_cephsqlite_init@Base 15.2.0-1

View File

@ -126,8 +126,8 @@ Adoption process
This will perform a ``cephadm check-host`` on each host before adding it; This will perform a ``cephadm check-host`` on each host before adding it;
this check ensures that the host is functioning properly. The IP address this check ensures that the host is functioning properly. The IP address
argument is required only if DNS does not allow you to connect to each host argument is recommended; if not provided, then the host name will be resolved
by its short name. via DNS.
#. Verify that the adopted monitor and manager daemons are visible: #. Verify that the adopted monitor and manager daemons are visible:

View File

@ -3,6 +3,8 @@
Compatibility and Stability Compatibility and Stability
=========================== ===========================
.. _cephadm-compatibility-with-podman:
Compatibility with Podman Versions Compatibility with Podman Versions
---------------------------------- ----------------------------------
@ -25,6 +27,10 @@ Those versions are expected to work:
| >= 16.2.1 | False | True | True | False | True | | >= 16.2.1 | False | True | True | False | True |
+-----------+-------+-------+-------+-------+-------+ +-----------+-------+-------+-------+-------+-------+
.. warning::
Only podman versions that are 2.0.0 and higher work with Ceph Pacific, with the exception of podman version 2.2.1, which does not work with Ceph Pacific. kubic stable is known to work with Ceph Pacific, but it must be run with a newer kernel.
.. _cephadm-stability: .. _cephadm-stability:
Stability Stability

View File

@ -37,14 +37,27 @@ To add each new host to the cluster, perform two steps:
.. prompt:: bash # .. prompt:: bash #
ceph orch host add *newhost* ceph orch host add *<newhost>* [*<ip>*] [*<label1> ...*]
For example: For example:
.. prompt:: bash # .. prompt:: bash #
ceph orch host add host2 ceph orch host add host2 10.10.0.102
ceph orch host add host3 ceph orch host add host3 10.10.0.103
It is best to explicitly provide the host IP address. If an IP is
not provided, then the host name will be immediately resolved via
DNS and that IP will be used.
One or more labels can also be included to immediately label the
new host. For example, by default the ``_admin`` label will make
cephadm maintain a copy of the ``ceph.conf`` file and a
``client.admin`` keyring file in ``/etc/ceph``:
.. prompt:: bash #
ceph orch host add host4 10.10.0.104 --labels _admin
.. _cephadm-removing-hosts: .. _cephadm-removing-hosts:
@ -118,6 +131,33 @@ To remove a label, run::
ceph orch host label rm my_hostname my_label ceph orch host label rm my_hostname my_label
.. _cephadm-special-host-labels:
Special host labels
-------------------
The following host labels have a special meaning to cephadm. All start with ``_``.
* ``_no_schedule``: *Do not schedule or deploy daemons on this host*.
This label prevents cephadm from deploying daemons on this host. If it is added to
an existing host that already contains Ceph daemons, it will cause cephadm to move
those daemons elsewhere (except OSDs, which are not removed automatically).
* ``_no_autotune_memory``: *Do not autotune memory on this host*.
This label will prevent daemon memory from being tuned even when the
``osd_memory_target_autotune`` or similar option is enabled for one or more daemons
on that host.
* ``_admin``: *Distribute client.admin and ceph.conf to this host*.
By default, an ``_admin`` label is applied to the first host in the cluster (where
bootstrap was originally run), and the ``client.admin`` key is set to be distributed
to that host via the ``ceph orch client-keyring ...`` function. Adding this label
to additional hosts will normally cause cephadm to deploy config and keyring files
in ``/etc/ceph``.
Maintenance Mode Maintenance Mode
================ ================
@ -138,21 +178,21 @@ Many hosts can be added at once using
--- ---
service_type: host service_type: host
addr: node-00
hostname: node-00 hostname: node-00
addr: 192.168.0.10
labels: labels:
- example1 - example1
- example2 - example2
--- ---
service_type: host service_type: host
addr: node-01
hostname: node-01 hostname: node-01
addr: 192.168.0.11
labels: labels:
- grafana - grafana
--- ---
service_type: host service_type: host
addr: node-02
hostname: node-02 hostname: node-02
addr: 192.168.0.12
This can be combined with service specifications (below) to create a cluster spec This can be combined with service specifications (below) to create a cluster spec
file to deploy a whole cluster in one command. see ``cephadm bootstrap --apply-spec`` file to deploy a whole cluster in one command. see ``cephadm bootstrap --apply-spec``
@ -250,24 +290,12 @@ There are two ways to customize this configuration for your environment:
Fully qualified domain names vs bare host names Fully qualified domain names vs bare host names
=============================================== ===============================================
cephadm has very minimal requirements when it comes to resolving host
names etc. When cephadm initiates an ssh connection to a remote host,
the host name can be resolved in four different ways:
- a custom ssh config resolving the name to an IP
- via an externally maintained ``/etc/hosts``
- via explicitly providing an IP address to cephadm: ``ceph orch host add <hostname> <IP>``
- automatic name resolution via DNS.
Ceph itself uses the command ``hostname`` to determine the name of the
current host.
.. note:: .. note::
cephadm demands that the name of the host given via ``ceph orch host add`` cephadm demands that the name of the host given via ``ceph orch host add``
equals the output of ``hostname`` on remote hosts. equals the output of ``hostname`` on remote hosts.
Otherwise cephadm can't be sure, the host names returned by Otherwise cephadm can't be sure that names returned by
``ceph * metadata`` match the hosts known to cephadm. This might result ``ceph * metadata`` match the hosts known to cephadm. This might result
in a :ref:`cephadm-stray-host` warning. in a :ref:`cephadm-stray-host` warning.

View File

@ -23,6 +23,13 @@ Requirements
Any modern Linux distribution should be sufficient. Dependencies Any modern Linux distribution should be sufficient. Dependencies
are installed automatically by the bootstrap process below. are installed automatically by the bootstrap process below.
See the section :ref:`Compatibility With Podman
Versions<cephadm-compatibility-with-podman>` for a table of Ceph versions that
are compatible with Podman. Not every version of Podman is compatible with
Ceph.
.. _get-cephadm: .. _get-cephadm:
Install cephadm Install cephadm
@ -66,8 +73,8 @@ curl-based installation
* Although the standalone script is sufficient to get a cluster started, it is * Although the standalone script is sufficient to get a cluster started, it is
convenient to have the ``cephadm`` command installed on the host. To install convenient to have the ``cephadm`` command installed on the host. To install
the packages that provide the ``cephadm`` command for the Octopus release, the packages that provide the ``cephadm`` command, run the following
run the following commands: commands:
.. prompt:: bash # .. prompt:: bash #
:substitutions: :substitutions:
@ -148,11 +155,14 @@ This command will:
host. host.
* Generate a new SSH key for the Ceph cluster and add it to the root * Generate a new SSH key for the Ceph cluster and add it to the root
user's ``/root/.ssh/authorized_keys`` file. user's ``/root/.ssh/authorized_keys`` file.
* Write a copy of the public key to ``/etc/ceph/ceph.pub``.
* Write a minimal configuration file to ``/etc/ceph/ceph.conf``. This * Write a minimal configuration file to ``/etc/ceph/ceph.conf``. This
file is needed to communicate with the new cluster. file is needed to communicate with the new cluster.
* Write a copy of the ``client.admin`` administrative (privileged!) * Write a copy of the ``client.admin`` administrative (privileged!)
secret key to ``/etc/ceph/ceph.client.admin.keyring``. secret key to ``/etc/ceph/ceph.client.admin.keyring``.
* Write a copy of the public key to ``/etc/ceph/ceph.pub``. * Add the ``_admin`` label to the bootstrap host. By default, any host
with this label will (also) get a copy of ``/etc/ceph/ceph.conf`` and
``/etc/ceph/ceph.client.admin.keyring``.
Further information about cephadm bootstrap Further information about cephadm bootstrap
------------------------------------------- -------------------------------------------
@ -184,7 +194,13 @@ available options.
* You can pass any initial Ceph configuration options to the new * You can pass any initial Ceph configuration options to the new
cluster by putting them in a standard ini-style configuration file cluster by putting them in a standard ini-style configuration file
and using the ``--config *<config-file>*`` option. and using the ``--config *<config-file>*`` option. For example::
$ cat <<EOF > initial-ceph.conf
[global]
osd crush chooseleaf type = 0
EOF
$ ./cephadm bootstrap --config initial-ceph.conf ...
* The ``--ssh-user *<user>*`` option makes it possible to choose which ssh * The ``--ssh-user *<user>*`` option makes it possible to choose which ssh
user cephadm will use to connect to hosts. The associated ssh key will be user cephadm will use to connect to hosts. The associated ssh key will be
@ -266,6 +282,16 @@ Adding Hosts
Next, add all hosts to the cluster by following :ref:`cephadm-adding-hosts`. Next, add all hosts to the cluster by following :ref:`cephadm-adding-hosts`.
By default, a ``ceph.conf`` file and a copy of the ``client.admin`` keyring
are maintained in ``/etc/ceph`` on all hosts with the ``_admin`` label, which is initially
applied only to the bootstrap host. We usually recommend that one or more other hosts be
given the ``_admin`` label so that the Ceph CLI (e.g., via ``cephadm shell``) is easily
accessible on multiple hosts. To add the ``_admin`` label to additional host(s),
.. prompt:: bash #
ceph orch host label add *<host>* _admin
Adding additional MONs Adding additional MONs
====================== ======================

View File

@ -41,59 +41,75 @@ Manager <https://prometheus.io/docs/alerting/alertmanager/>`_ and `Grafana
Deploying monitoring with cephadm Deploying monitoring with cephadm
--------------------------------- ---------------------------------
By default, bootstrap will deploy a basic monitoring stack. If you The default behavior of ``cephadm`` is to deploy a basic monitoring stack. It
did not do this (by passing ``--skip-monitoring-stack``, or if you is however possible that you have a Ceph cluster without a monitoring stack,
converted an existing cluster to cephadm management, you can set up and you would like to add a monitoring stack to it. (Here are some ways that
monitoring by following the steps below. you might have come to have a Ceph cluster without a monitoring stack: You
might have passed the ``--skip-monitoring stack`` option to ``cephadm`` during
the installation of the cluster, or you might have converted an existing
cluster (which had no monitoring stack) to cephadm management.)
#. Enable the prometheus module in the ceph-mgr daemon. This exposes the internal Ceph metrics so that prometheus can scrape them. To set up monitoring on a Ceph cluster that has no monitoring, follow the
steps below:
.. code-block:: bash #. Enable the Prometheus module in the ceph-mgr daemon. This exposes the internal Ceph metrics so that Prometheus can scrape them:
.. prompt:: bash #
ceph mgr module enable prometheus ceph mgr module enable prometheus
#. Deploy a node-exporter service on every node of the cluster. The node-exporter provides host-level metrics like CPU and memory utilization. #. Deploy a node-exporter service on every node of the cluster. The node-exporter provides host-level metrics like CPU and memory utilization:
.. code-block:: bash .. prompt:: bash #
ceph orch apply node-exporter '*' ceph orch apply node-exporter '*'
#. Deploy alertmanager #. Deploy alertmanager:
.. code-block:: bash .. prompt:: bash #
ceph orch apply alertmanager 1 ceph orch apply alertmanager 1
#. Deploy prometheus. A single prometheus instance is sufficient, but #. Deploy Prometheus. A single Prometheus instance is sufficient, but
for HA you may want to deploy two. for high availablility (HA) you might want to deploy two:
.. code-block:: bash .. prompt:: bash #
ceph orch apply prometheus 1 # or 2 ceph orch apply prometheus 1
#. Deploy grafana or
.. code-block:: bash .. prompt:: bash #
ceph orch apply prometheus 2
#. Deploy grafana:
.. prompt:: bash #
ceph orch apply grafana 1 ceph orch apply grafana 1
Cephadm takes care of the configuration of Prometheus, Grafana, and Alertmanager Manually setting the Grafana URL
automatically. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
However, there is one exception to this rule. In a some setups, the Dashboard Cephadm automatically configures Prometheus, Grafana, and Alertmanager in
user's browser might not be able to access the Grafana URL configured in Ceph all cases except one.
Dashboard. One such scenario is when the cluster and the accessing user are each
in a different DNS zone.
For this case, there is an extra configuration option for Ceph Dashboard, which In a some setups, the Dashboard user's browser might not be able to access the
can be used to configure the URL for accessing Grafana by the user's browser. Grafana URL that is configured in Ceph Dashboard. This can happen when the
This value will never be altered by cephadm. To set this configuration option, cluster and the accessing user are in different DNS zones.
issue the following command::
$ ceph dashboard set-grafana-frontend-api-url <grafana-server-api> If this is the case, you can use a configuration option for Ceph Dashboard
to set the URL that the user's browser will use to access Grafana. This
value will never be altered by cephadm. To set this configuration option,
issue the following command:
It may take a minute or two for services to be deployed. Once .. prompt:: bash $
completed, you should see something like this from ``ceph orch ls``
ceph dashboard set-grafana-frontend-api-url <grafana-server-api>
It might take a minute or two for services to be deployed. After the
services have been deployed, you should see something like this when you issue the command ``ceph orch ls``:
.. code-block:: console .. code-block:: console
@ -108,26 +124,43 @@ completed, you should see something like this from ``ceph orch ls``
Configuring SSL/TLS for Grafana Configuring SSL/TLS for Grafana
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``cephadm`` will deploy Grafana using the certificate defined in the ceph ``cephadm`` deploys Grafana using the certificate defined in the ceph
key/value store. If a certificate is not specified, ``cephadm`` will generate a key/value store. If no certificate is specified, ``cephadm`` generates a
self-signed certificate during deployment of the Grafana service. self-signed certificate during the deployment of the Grafana service.
A custom certificate can be configured using the following commands. A custom certificate can be configured using the following commands:
.. code-block:: bash .. prompt:: bash #
ceph config-key set mgr/cephadm/grafana_key -i $PWD/key.pem ceph config-key set mgr/cephadm/grafana_key -i $PWD/key.pem
ceph config-key set mgr/cephadm/grafana_crt -i $PWD/certificate.pem ceph config-key set mgr/cephadm/grafana_crt -i $PWD/certificate.pem
If you already deployed Grafana, you need to ``reconfig`` the service for the If you have already deployed Grafana, run ``reconfig`` on the service to
configuration to be updated. update its configuration:
.. code-block:: bash .. prompt:: bash #
ceph orch reconfig grafana ceph orch reconfig grafana
The ``reconfig`` command also takes care of setting the right URL for Ceph The ``reconfig`` command also sets the proper URL for Ceph Dashboard.
Dashboard.
Networks and Ports
~~~~~~~~~~~~~~~~~~
All monitoring services can have the network and port they bind to configured with a yaml service specification
example spec file:
.. code-block:: yaml
service_type: grafana
service_name: grafana
placement:
count: 1
networks:
- 192.169.142.0/24
spec:
port: 4200
Using custom images Using custom images
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
@ -153,6 +186,17 @@ For example
ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1 ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1
If there were already running monitoring stack daemon(s) of the type whose
image you've changed, you must redeploy the daemon(s) in order to have them
actually use the new image.
For example, if you had changed the prometheus image
.. prompt:: bash #
ceph orch redeploy prometheus
.. note:: .. note::
By setting a custom image, the default value will be overridden (but not By setting a custom image, the default value will be overridden (but not
@ -256,16 +300,15 @@ Example
Disabling monitoring Disabling monitoring
-------------------- --------------------
If you have deployed monitoring and would like to remove it, you can do To disable monitoring and remove the software that supports it, run the following commands:
so with
.. code-block:: bash .. code-block:: console
ceph orch rm grafana $ ceph orch rm grafana
ceph orch rm prometheus --force # this will delete metrics data collected so far $ ceph orch rm prometheus --force # this will delete metrics data collected so far
ceph orch rm node-exporter $ ceph orch rm node-exporter
ceph orch rm alertmanager $ ceph orch rm alertmanager
ceph mgr module disable prometheus $ ceph mgr module disable prometheus
Deploying monitoring manually Deploying monitoring manually

View File

@ -1,42 +1,43 @@
.. _deploy-cephadm-nfs-ganesha:
=========== ===========
NFS Service NFS Service
=========== ===========
.. note:: Only the NFSv4 protocol is supported. .. note:: Only the NFSv4 protocol is supported.
.. _deploy-cephadm-nfs-ganesha: The simplest way to manage NFS is via the ``ceph nfs cluster ...``
commands; see :ref:`cephfs-nfs`. This document covers how to manage the
cephadm services directly, which should only be necessary for unusual NFS
configurations.
Deploying NFS ganesha Deploying NFS ganesha
===================== =====================
Cephadm deploys NFS Ganesha using a pre-defined RADOS *pool* Cephadm deploys NFS Ganesha daemon (or set of daemons). The configuration for
and optional *namespace* NFS is stored in the ``nfs-ganesha`` pool and exports are managed via the
``ceph nfs export ...`` commands and via the dashboard.
To deploy a NFS Ganesha gateway, run the following command: To deploy a NFS Ganesha gateway, run the following command:
.. prompt:: bash # .. prompt:: bash #
ceph orch apply nfs *<svc_id>* *<pool>* *<namespace>* --placement="*<num-daemons>* [*<host1>* ...]" ceph orch apply nfs *<svc_id>* [--port *<port>*] [--placement ...]
For example, to deploy NFS with a service id of *foo*, that will use the RADOS For example, to deploy NFS with a service id of *foo* on the default
pool *nfs-ganesha* and namespace *nfs-ns*: port 2049 with the default placement of a single daemon:
.. prompt:: bash # .. prompt:: bash #
ceph orch apply nfs foo nfs-ganesha nfs-ns ceph orch apply nfs foo
.. note:: See :ref:`orchestrator-cli-placement-spec` for the details of the placement
Create the *nfs-ganesha* pool first if it doesn't exist. specification.
See :ref:`orchestrator-cli-placement-spec` for details of the placement specification.
Service Specification Service Specification
===================== =====================
Alternatively, an NFS service can also be applied using a YAML specification. Alternatively, an NFS service can be applied using a YAML specification.
A service of type ``nfs`` requires a pool name and may contain
an optional namespace:
.. code-block:: yaml .. code-block:: yaml
@ -47,15 +48,66 @@ an optional namespace:
- host1 - host1
- host2 - host2
spec: spec:
pool: mypool port: 12345
namespace: mynamespace
where ``pool`` is a RADOS pool where NFS client recovery data is stored In this example, we run the server on the non-default ``port`` of
and ``namespace`` is a RADOS namespace where NFS client recovery 12345 (instead of the default 2049) on ``host1`` and ``host2``.
data is stored in the pool.
The specification can then be applied using: The specification can then be applied by running the following command:
.. prompt:: bash # .. prompt:: bash #
ceph orch apply -i nfs.yaml ceph orch apply -i nfs.yaml
High-availability NFS
=====================
Deploying an *ingress* service for an existing *nfs* service will provide:
* a stable, virtual IP that can be used to access the NFS server
* fail-over between hosts if there is a host failure
* load distribution across multiple NFS gateways (although this is rarely necessary)
Ingress for NFS can be deployed for an existing NFS service
(``nfs.mynfs`` in this example) with the following specification:
.. code-block:: yaml
service_type: ingress
service_id: nfs.mynfs
placement:
count: 2
spec:
backend_service: nfs.mynfs
frontend_port: 2049
monitor_port: 9000
virtual_ip: 10.0.0.123/24
A few notes:
* The *virtual_ip* must include a CIDR prefix length, as in the
example above. The virtual IP will normally be configured on the
first identified network interface that has an existing IP in the
same subnet. You can also specify a *virtual_interface_networks*
property to match against IPs in other networks; see
:ref:`ingress-virtual-ip` for more information.
* The *monitor_port* is used to access the haproxy load status
page. The user is ``admin`` by default, but can be modified by
via an *admin* property in the spec. If a password is not
specified via a *password* property in the spec, the auto-generated password
can be found with:
.. prompt:: bash #
ceph config-key get mgr/cephadm/ingress.*{svc_id}*/monitor_password
For example:
.. prompt:: bash #
ceph config-key get mgr/cephadm/ingress.nfs.myfoo/monitor_password
* The backend service (``nfs.mynfs`` in this example) should include
a *port* property that is not 2049 to avoid conflicting with the
ingress service, which could be placed on the same host(s).

View File

@ -299,35 +299,74 @@ CEPHADM_CHECK_KERNEL_VERSION
The OS kernel version (maj.min) is checked for consistency across the hosts. Once again, the The OS kernel version (maj.min) is checked for consistency across the hosts. Once again, the
majority of the hosts is used as the basis of identifying anomalies. majority of the hosts is used as the basis of identifying anomalies.
Client keyrings and configs
===========================
Cephadm can distribute copies of the ``ceph.conf`` and client keyring
files to hosts. For example, it is usually a good idea to store a
copy of the config and ``client.admin`` keyring on any hosts that will
be used to administer the cluster via the CLI. By default, cephadm will do
this for any nodes with the ``_admin`` label (which normally includes the bootstrap
host).
When a client keyring is placed under management, cephadm will:
- build a list of target hosts based on the specified placement spec (see :ref:`orchestrator-cli-placement-spec`)
- store a copy of the ``/etc/ceph/ceph.conf`` file on the specified host(s)
- store a copy of the keyring file on the specified host(s)
- update the ``ceph.conf`` file as needed (e.g., due to a change in the cluster monitors)
- update the keyring file if the entity's key is changed (e.g., via ``ceph auth ...`` commands)
- ensure the keyring file has the specified ownership and mode
- remove the keyring file when client keyring management is disabled
- remove the keyring file from old hosts if the keyring placement spec is updated (as needed)
To view which client keyrings are currently under management::
ceph orch client-keyring ls
To place a keyring under management::
ceph orch client-keyring set <entity> <placement> [--mode=<mode>] [--owner=<uid>.<gid>] [--path=<path>]
- By default, the *path* will be ``/etc/ceph/client.{entity}.keyring``, which is where
Ceph looks by default. Be careful specifying alternate locations as existing files
may be overwritten.
- A placement of ``*`` (all hosts) is common.
- The mode defaults to ``0600`` and ownership to ``0:0`` (user root, group root).
For example, to create and deploy a ``client.rbd`` key to hosts with the ``rbd-client`` label and group readable by uid/gid 107 (qemu),::
ceph auth get-or-create-key client.rbd mon 'profile rbd' mgr 'profile rbd' osd 'profile rbd pool=my_rbd_pool'
ceph orch client-keyring set client.rbd label:rbd-client --owner 107:107 --mode 640
The resulting keyring file is::
-rw-r-----. 1 qemu qemu 156 Apr 21 08:47 /etc/ceph/client.client.rbd.keyring
To disable management of a keyring file::
ceph orch client-keyring rm <entity>
Note that this will delete any keyring files for this entity that were previously written
to cluster nodes.
/etc/ceph/ceph.conf /etc/ceph/ceph.conf
=================== ===================
Cephadm distributes a minimized ``ceph.conf`` that only contains It may also be useful to distribute ``ceph.conf`` files to hosts without an associated
a minimal set of information to connect to the Ceph cluster. client keyring file. By default, cephadm only deploys a ``ceph.conf`` file to hosts where a client keyring
is also distributed (see above). To write config files to hosts without client keyrings::
To update the configuration settings, instead of manually editing
the ``ceph.conf`` file, use the config database instead::
ceph config set ...
See :ref:`ceph-conf-database` for details.
By default, cephadm does not deploy that minimized ``ceph.conf`` across the
cluster. To enable the management of ``/etc/ceph/ceph.conf`` files on all
hosts, please enable this by running::
ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf true ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf true
To set up an initial configuration before bootstrapping By default, the configs are written to all hosts (i.e., those listed
the cluster, create an initial ``ceph.conf`` file. For example:: by ``ceph orch host ls``). To specify which hosts get a ``ceph.conf``::
cat <<EOF > /etc/ceph/ceph.conf ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf_hosts <placement spec>
[global]
osd crush chooseleaf type = 0
EOF
Then, run bootstrap referencing this file:: For example, to distribute configs to hosts with the ``bare_config`` label,::
cephadm bootstrap -c /root/ceph.conf ...
ceph config set mgr mgr/cephadm/manage_etc_ceph_ceph_conf_hosts label:bare_config
(See :ref:`orchestrator-cli-placement-spec` for more information about placement specs.)

View File

@ -171,15 +171,27 @@ For example:
Declarative State Declarative State
----------------- -----------------
Note that the effect of ``ceph orch apply`` is persistent; that is, drives which are added to the system The effect of ``ceph orch apply`` is persistent. This means that drives that
or become available (say, by zapping) after the command is complete will be automatically found and added to the cluster. are added to the system after the ``ceph orch apply`` command completes will be
automatically found and added to the cluster. It also means that drives that
become available (by zapping, for example) after the ``ceph orch apply``
command completes will be automatically found and added to the cluster.
That is, after using:: We will examine the effects of the following command:
.. prompt:: bash #
ceph orch apply osd --all-available-devices ceph orch apply osd --all-available-devices
* If you add new disks to the cluster they will automatically be used to create new OSDs. After running the above command:
* A new OSD will be created automatically if you remove an OSD and clean the LVM physical volume.
* If you add new disks to the cluster, they will automatically be used to
create new OSDs.
* If you remove an OSD and clean the LVM physical volume, a new OSD will be
created automatically.
To disable the automatic creation of OSD on available devices, use the
``unmanaged`` parameter:
If you want to avoid this behavior (disable automatic creation of OSD on available devices), use the ``unmanaged`` parameter: If you want to avoid this behavior (disable automatic creation of OSD on available devices), use the ``unmanaged`` parameter:
@ -187,6 +199,16 @@ If you want to avoid this behavior (disable automatic creation of OSD on availab
ceph orch apply osd --all-available-devices --unmanaged=true ceph orch apply osd --all-available-devices --unmanaged=true
.. note::
Keep these three facts in mind:
- The default behavior of ``ceph orch apply`` causes cephadm constantly to reconcile. This means that cephadm creates OSDs as soon as new drives are detected.
- Setting ``unmanaged: True`` disables the creation of OSDs. If ``unmanaged: True`` is set, nothing will happen even if you apply a new OSD service.
- ``ceph orch daemon add`` creates OSDs, but does not add an OSD service.
* For cephadm, see also :ref:`cephadm-spec-unmanaged`. * For cephadm, see also :ref:`cephadm-spec-unmanaged`.
@ -246,7 +268,7 @@ It is possible to stop queued OSD removals by using the following command:
.. prompt:: bash # .. prompt:: bash #
ceph orch osd rm stop <svc_id(s)> ceph orch osd rm stop <osd_id(s)>
Example: Example:
@ -266,7 +288,7 @@ Replacing an OSD
.. prompt:: bash # .. prompt:: bash #
orch osd rm <svc_id(s)> --replace [--force] orch osd rm <osd_id(s)> --replace [--force]
Example: Example:
@ -341,25 +363,75 @@ Example command:
device. To disable this behavior, see :ref:`cephadm-osd-declarative`. device. To disable this behavior, see :ref:`cephadm-osd-declarative`.
.. _osd_autotune:
Automatically tuning OSD memory
===============================
OSD daemons will adjust their memory consumption based on the
``osd_memory_target`` config option (several gigabytes, by
default). If Ceph is deployed on dedicated nodes that are not sharing
memory with other services, cephadm can automatically adjust the per-OSD
memory consumption based on the total amount of RAM and the number of deployed
OSDs.
This option is enabled globally with::
ceph config set osd osd_memory_target_autotune true
Cephadm will start with a fraction
(``mgr/cephadm/autotune_memory_target_ratio``, which defaults to
``.7``) of the total RAM in the system, subtract off any memory
consumed by non-autotuned daemons (non-OSDs, for OSDs for which
``osd_memory_target_autotune`` is false), and then divide by the
remaining OSDs.
The final targets are reflected in the config database with options like::
WHO MASK LEVEL OPTION VALUE
osd host:foo basic osd_memory_target 126092301926
osd host:bar basic osd_memory_target 6442450944
Both the limits and the current memory consumed by each daemon are visible from
the ``ceph orch ps`` output in the ``MEM LIMIT`` column::
NAME HOST PORTS STATUS REFRESHED AGE MEM USED MEM LIMIT VERSION IMAGE ID CONTAINER ID
osd.1 dael running (3h) 10s ago 3h 72857k 117.4G 17.0.0-3781-gafaed750 7015fda3cd67 9e183363d39c
osd.2 dael running (81m) 10s ago 81m 63989k 117.4G 17.0.0-3781-gafaed750 7015fda3cd67 1f0cc479b051
osd.3 dael running (62m) 10s ago 62m 64071k 117.4G 17.0.0-3781-gafaed750 7015fda3cd67 ac5537492f27
To exclude an OSD from memory autotuning, disable the autotune option
for that OSD and also set a specific memory target. For example,
.. prompt:: bash #
ceph config set osd.123 osd_memory_target_autotune false
ceph config set osd.123 osd_memory_target 16G
.. _drivegroups: .. _drivegroups:
Advanced OSD Service Specifications Advanced OSD Service Specifications
=================================== ===================================
:ref:`orchestrator-cli-service-spec` of type ``osd`` are a way to describe a cluster layout using the properties of disks. :ref:`orchestrator-cli-service-spec`\s of type ``osd`` are a way to describe a
It gives the user an abstract way tell ceph which disks should turn into an OSD cluster layout, using the properties of disks. Service specifications give the
with which configuration without knowing the specifics of device names and paths. user an abstract way to tell Ceph which disks should turn into OSDs with which
configurations, without knowing the specifics of device names and paths.
Instead of doing this Service specifications make it possible to define a yaml or json file that can
be used to reduce the amount of manual work involved in creating OSDs.
For example, instead of running the following command:
.. prompt:: bash [monitor.1]# .. prompt:: bash [monitor.1]#
ceph orch daemon add osd *<host>*:*<path-to-device>* ceph orch daemon add osd *<host>*:*<path-to-device>*
for each device and each host, we can define a yaml|json file that allows us to describe for each device and each host, we can define a yaml or json file that allows us
the layout. Here's the most basic example. to describe the layout. Here's the most basic example.
Create a file called i.e. osd_spec.yml Create a file called (for example) ``osd_spec.yml``:
.. code-block:: yaml .. code-block:: yaml
@ -370,58 +442,60 @@ Create a file called i.e. osd_spec.yml
data_devices: <- the type of devices you are applying specs to data_devices: <- the type of devices you are applying specs to
all: true <- a filter, check below for a full list all: true <- a filter, check below for a full list
This would translate to: This means :
Turn any available(ceph-volume decides what 'available' is) into an OSD on all hosts that match #. Turn any available device (ceph-volume decides what 'available' is) into an
the glob pattern '*'. (The glob pattern matches against the registered hosts from `host ls`) OSD on all hosts that match the glob pattern '*'. (The glob pattern matches
There will be a more detailed section on host_pattern down below. against the registered hosts from `host ls`) A more detailed section on
host_pattern is available below.
and pass it to `osd create` like so #. Then pass it to `osd create` like this:
.. prompt:: bash [monitor.1]# .. prompt:: bash [monitor.1]#
ceph orch apply osd -i /path/to/osd_spec.yml ceph orch apply osd -i /path/to/osd_spec.yml
This will go out on all the matching hosts and deploy these OSDs. This instruction will be issued to all the matching hosts, and will deploy
these OSDs.
Since we want to have more complex setups, there are more filters than just the 'all' filter. Setups more complex than the one specified by the ``all`` filter are
possible. See :ref:`osd_filters` for details.
Also, there is a `--dry-run` flag that can be passed to the `apply osd` command, which gives you a synopsis A ``--dry-run`` flag can be passed to the ``apply osd`` command to display a
of the proposed layout. synopsis of the proposed layout.
Example Example
.. prompt:: bash [monitor.1]# .. prompt:: bash [monitor.1]#
[monitor.1]# ceph orch apply osd -i /path/to/osd_spec.yml --dry-run ceph orch apply osd -i /path/to/osd_spec.yml --dry-run
.. _osd_filters:
Filters Filters
------- -------
.. note:: .. note::
Filters are applied using a `AND` gate by default. This essentially means that a drive needs to fulfill all filter Filters are applied using an `AND` gate by default. This means that a drive
criteria in order to get selected. must fulfill all filter criteria in order to get selected. This behavior can
If you wish to change this behavior you can adjust this behavior by setting be adjusted by setting ``filter_logic: OR`` in the OSD specification.
`filter_logic: OR` # valid arguments are `AND`, `OR` Filters are used to assign disks to groups, using their attributes to group
them.
in the OSD Specification. The attributes are based off of ceph-volume's disk query. You can retrieve
information about the attributes with this command:
You can assign disks to certain groups by their attributes using filters.
The attributes are based off of ceph-volume's disk query. You can retrieve the information
with
.. code-block:: bash .. code-block:: bash
ceph-volume inventory </path/to/disk> ceph-volume inventory </path/to/disk>
Vendor or Model: Vendor or Model
^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^
You can target specific disks by their Vendor or by their Model Specific disks can be targeted by vendor or model:
.. code-block:: yaml .. code-block:: yaml
@ -434,19 +508,19 @@ or
vendor: disk_vendor_name vendor: disk_vendor_name
Size: Size
^^^^^ ^^^^
You can also match by disk `Size`. Specific disks can be targeted by `Size`:
.. code-block:: yaml .. code-block:: yaml
size: size_spec size: size_spec
Size specs: Size specs
___________ __________
Size specification of format can be of form: Size specifications can be of the following forms:
* LOW:HIGH * LOW:HIGH
* :HIGH * :HIGH
@ -455,38 +529,38 @@ Size specification of format can be of form:
Concrete examples: Concrete examples:
Includes disks of an exact size To include disks of an exact size
.. code-block:: yaml .. code-block:: yaml
size: '10G' size: '10G'
Includes disks which size is within the range To include disks within a given range of size:
.. code-block:: yaml .. code-block:: yaml
size: '10G:40G' size: '10G:40G'
Includes disks less than or equal to 10G in size To include disks that are less than or equal to 10G in size:
.. code-block:: yaml .. code-block:: yaml
size: ':10G' size: ':10G'
To include disks equal to or greater than 40G in size:
Includes disks equal to or greater than 40G in size
.. code-block:: yaml .. code-block:: yaml
size: '40G:' size: '40G:'
Sizes don't have to be exclusively in Gigabyte(G). Sizes don't have to be specified exclusively in Gigabytes(G).
Supported units are Megabyte(M), Gigabyte(G) and Terrabyte(T). Also appending the (B) for byte is supported. MB, GB, TB Other units of size are supported: Megabyte(M), Gigabyte(G) and Terrabyte(T).
Appending the (B) for byte is also supported: ``MB``, ``GB``, ``TB``.
Rotational: Rotational
^^^^^^^^^^^ ^^^^^^^^^^
This operates on the 'rotational' attribute of the disk. This operates on the 'rotational' attribute of the disk.
@ -499,8 +573,8 @@ This operates on the 'rotational' attribute of the disk.
`0` to match all disks that are non-rotational (SSD, NVME etc) `0` to match all disks that are non-rotational (SSD, NVME etc)
All: All
^^^^ ^^^
This will take all disks that are 'available' This will take all disks that are 'available'
@ -511,17 +585,17 @@ Note: This is exclusive for the data_devices section.
all: true all: true
Limiter: Limiter
^^^^^^^^ ^^^^^^^
When you specified valid filters but want to limit the amount of matching disks you can use the 'limit' directive. If you have specified some valid filters but want to limit the number of disks that they match, use the ``limit`` directive:
.. code-block:: yaml .. code-block:: yaml
limit: 2 limit: 2
For example, if you used `vendor` to match all disks that are from `VendorA` but only want to use the first two For example, if you used `vendor` to match all disks that are from `VendorA`
you could use `limit`. but want to use only the first two, you could use `limit`:
.. code-block:: yaml .. code-block:: yaml
@ -529,7 +603,7 @@ you could use `limit`.
vendor: VendorA vendor: VendorA
limit: 2 limit: 2
Note: Be aware that `limit` is really just a last resort and shouldn't be used if it can be avoided. Note: `limit` is a last resort and shouldn't be used if it can be avoided.
Additional Options Additional Options

View File

@ -112,18 +112,21 @@ elected as master, and the virtual IP will be moved to that node.
The active haproxy acts like a load balancer, distributing all RGW requests The active haproxy acts like a load balancer, distributing all RGW requests
between all the RGW daemons available. between all the RGW daemons available.
**Prerequisites:** Prerequisites
-------------
* An existing RGW service, without SSL. (If you want SSL service, the certificate * An existing RGW service, without SSL. (If you want SSL service, the certificate
should be configured on the ingress service, not the RGW service.) should be configured on the ingress service, not the RGW service.)
**Deploy of the high availability service for RGW** Deploying
---------
Use the command:: Use the command::
ceph orch apply -i <ingress_spec_file> ceph orch apply -i <ingress_spec_file>
**Service specification file:** Service specification
---------------------
It is a yaml format file with the following properties: It is a yaml format file with the following properties:
@ -171,7 +174,10 @@ where the properties of this service specification are:
SSL certificate, if SSL is to be enabled. This must contain the both the certificate and SSL certificate, if SSL is to be enabled. This must contain the both the certificate and
private key blocks in .pem format. private key blocks in .pem format.
**Selecting ethernet interfaces for the virtual IP:** .. _ingress-virtual-ip:
Selecting ethernet interfaces for the virtual IP
------------------------------------------------
You cannot simply provide the name of the network interface on which You cannot simply provide the name of the network interface on which
to configure the virtual IP because interface names tend to vary to configure the virtual IP because interface names tend to vary
@ -204,7 +210,8 @@ configuring a "dummy" IP address is an unroutable network on the correct interfa
and reference that dummy network in the networks list (see above). and reference that dummy network in the networks list (see above).
**Useful hints for ingress:** Useful hints for ingress
------------------------
* Good to have at least 3 RGW daemons * It is good to have at least 3 RGW daemons.
* Use at least 3 hosts for the ingress * We recommend at least 3 hosts for the ingress service.

View File

@ -5,39 +5,60 @@ Service Management
Service Status Service Status
============== ==============
A service is a group of daemons that are configured together. A service is a group of daemons configured together. To see the status of one
of the services running in the Ceph cluster, do the following:
Print a list of services known to the orchestrator. The list can be limited to #. Use the command line to print a list of services.
services on a particular host with the optional --host parameter and/or #. Locate the service whose status you want to check.
services of a particular type via optional --type parameter #. Print the status of the service.
(mon, osd, mgr, mds, rgw):
:: The following command prints a list of services known to the orchestrator. To
limit the output to services only on a specified host, use the optional
``--host`` parameter. To limit the output to services of only a particular
type, use the optional ``--type`` parameter (mon, osd, mgr, mds, rgw):
.. prompt:: bash #
ceph orch ls [--service_type type] [--service_name name] [--export] [--format f] [--refresh] ceph orch ls [--service_type type] [--service_name name] [--export] [--format f] [--refresh]
Discover the status of a particular service or daemons:: Discover the status of a particular service or daemon:
.. prompt:: bash #
ceph orch ls --service_type type --service_name <name> [--refresh] ceph orch ls --service_type type --service_name <name> [--refresh]
Export the service specs known to the orchestrator as yaml in format To export the service specifications knows to the orchestrator, run the following command.
that is compatible to ``ceph orch apply -i``::
.. prompt:: bash #
ceph orch ls --export ceph orch ls --export
For examples about retrieving specs of single services see :ref:`orchestrator-cli-service-spec-retrieve`. The service specifications exported with this command will be exported as yaml
and that yaml can be used with the ``ceph orch apply -i`` command.
For information about retrieving the specifications of single services (including examples of commands), see :ref:`orchestrator-cli-service-spec-retrieve`.
Daemon Status Daemon Status
============= =============
A daemon is a running systemd unit and is part of a service. A daemon is a systemd unit that is running and part of a service.
Print a list of all daemons known to the orchestrator:: To see the status of a daemon, do the following:
#. Print a list of all daemons known to the orchestrator.
#. Query the status of the target daemon.
First, print a list of all daemons known to the orchestrator:
.. prompt:: bash #
ceph orch ps [--hostname host] [--daemon_type type] [--service_name name] [--daemon_id id] [--format f] [--refresh] ceph orch ps [--hostname host] [--daemon_type type] [--service_name name] [--daemon_id id] [--format f] [--refresh]
Query the status of a particular service instance (mon, osd, mds, rgw). For OSDs Then query the status of a particular service instance (mon, osd, mds, rgw).
the id is the numeric OSD ID, for MDS services it is the file system name:: For OSDs the id is the numeric OSD ID. For MDS services the id is the file
system name:
.. prompt:: bash #
ceph orch ps --daemon_type osd --daemon_id 0 ceph orch ps --daemon_type osd --daemon_id 0
@ -46,8 +67,8 @@ the id is the numeric OSD ID, for MDS services it is the file system name::
Service Specification Service Specification
===================== =====================
A *Service Specification* is a data structure A *Service Specification* is a data structure that is used to specify the
to specify the deployment of services. For example in YAML: deployment of services. Here is an example of a service specification in YAML:
.. code-block:: yaml .. code-block:: yaml
@ -61,7 +82,7 @@ to specify the deployment of services. For example in YAML:
unmanaged: false unmanaged: false
... ...
where the properties of a service specification are: In this example, the properties of this service specification are:
* ``service_type`` * ``service_type``
The type of the service. Needs to be either a Ceph The type of the service. Needs to be either a Ceph
@ -73,21 +94,20 @@ where the properties of a service specification are:
The name of the service. The name of the service.
* ``placement`` * ``placement``
See :ref:`orchestrator-cli-placement-spec`. See :ref:`orchestrator-cli-placement-spec`.
* ``unmanaged`` * ``unmanaged`` If set to ``true``, the orchestrator will not deploy nor remove
If set to ``true``, the orchestrator will not deploy nor any daemon associated with this service. Placement and all other properties
remove any daemon associated with this service. Placement and all other will be ignored. This is useful, if you do not want this service to be
properties will be ignored. This is useful, if this service should not managed temporarily. For cephadm, See :ref:`cephadm-spec-unmanaged`
be managed temporarily. For cephadm, See :ref:`cephadm-spec-unmanaged`
Each service type can have additional service specific properties. Each service type can have additional service-specific properties.
Service specifications of type ``mon``, ``mgr``, and the monitoring Service specifications of type ``mon``, ``mgr``, and the monitoring
types do not require a ``service_id``. types do not require a ``service_id``.
A service of type ``osd`` is described in :ref:`drivegroups` A service of type ``osd`` is described in :ref:`drivegroups`
Many service specifications can be applied at once using Many service specifications can be applied at once using ``ceph orch apply -i``
``ceph orch apply -i`` by submitting a multi-document YAML file:: by submitting a multi-document YAML file::
cat <<EOF | ceph orch apply -i - cat <<EOF | ceph orch apply -i -
service_type: mon service_type: mon
@ -114,7 +134,9 @@ Retrieving the running Service Specification
If the services have been started via ``ceph orch apply...``, then directly changing If the services have been started via ``ceph orch apply...``, then directly changing
the Services Specification is complicated. Instead of attempting to directly change the Services Specification is complicated. Instead of attempting to directly change
the Services Specification, we suggest exporting the running Service Specification by the Services Specification, we suggest exporting the running Service Specification by
following these instructions:: following these instructions:
.. prompt:: bash #
ceph orch ls --service-name rgw.<realm>.<zone> --export > rgw.<realm>.<zone>.yaml ceph orch ls --service-name rgw.<realm>.<zone> --export > rgw.<realm>.<zone>.yaml
ceph orch ls --service-type mgr --export > mgr.yaml ceph orch ls --service-type mgr --export > mgr.yaml
@ -132,10 +154,16 @@ For the orchestrator to deploy a *service*, it needs to know where to deploy
specification. Placement specifications can either be passed as command line arguments specification. Placement specifications can either be passed as command line arguments
or in a YAML files. or in a YAML files.
.. note::
cephadm will not deploy daemons on hosts with the ``_no_schedule`` label; see :ref:`cephadm-special-host-labels`.
Explicit placements Explicit placements
------------------- -------------------
Daemons can be explicitly placed on hosts by simply specifying them:: Daemons can be explicitly placed on hosts by simply specifying them:
.. prompt:: bash #
orch apply prometheus --placement="host1 host2 host3" orch apply prometheus --placement="host1 host2 host3"
@ -150,7 +178,9 @@ Or in YAML:
- host2 - host2
- host3 - host3
MONs and other services may require some enhanced network specifications:: MONs and other services may require some enhanced network specifications:
.. prompt:: bash #
orch daemon add mon --placement="myhost:[v2:1.2.3.4:3300,v1:1.2.3.4:6789]=name" orch daemon add mon --placement="myhost:[v2:1.2.3.4:3300,v1:1.2.3.4:6789]=name"
@ -162,7 +192,9 @@ and ``=name`` specifies the name of the new monitor.
Placement by labels Placement by labels
------------------- -------------------
Daemons can be explicitly placed on hosts that match a specific label:: Daemons can be explicitly placed on hosts that match a specific label:
.. prompt:: bash #
orch apply prometheus --placement="label:mylabel" orch apply prometheus --placement="label:mylabel"
@ -179,7 +211,9 @@ Or in YAML:
Placement by pattern matching Placement by pattern matching
----------------------------- -----------------------------
Daemons can be placed on hosts as well:: Daemons can be placed on hosts as well:
.. prompt:: bash #
orch apply prometheus --placement='myhost[1-3]' orch apply prometheus --placement='myhost[1-3]'
@ -191,7 +225,9 @@ Or in YAML:
placement: placement:
host_pattern: "myhost[1-3]" host_pattern: "myhost[1-3]"
To place a service on *all* hosts, use ``"*"``:: To place a service on *all* hosts, use ``"*"``:
.. prompt:: bash #
orch apply node-exporter --placement='*' orch apply node-exporter --placement='*'
@ -207,21 +243,27 @@ Or in YAML:
Setting a limit Setting a limit
--------------- ---------------
By specifying ``count``, only that number of daemons will be created:: By specifying ``count``, only the number of daemons specified will be created:
.. prompt:: bash #
orch apply prometheus --placement=3 orch apply prometheus --placement=3
To deploy *daemons* on a subset of hosts, also specify the count:: To deploy *daemons* on a subset of hosts, specify the count:
.. prompt:: bash #
orch apply prometheus --placement="2 host1 host2 host3" orch apply prometheus --placement="2 host1 host2 host3"
If the count is bigger than the amount of hosts, cephadm deploys one per host:: If the count is bigger than the amount of hosts, cephadm deploys one per host:
.. prompt:: bash #
orch apply prometheus --placement="3 host1 host2" orch apply prometheus --placement="3 host1 host2"
results in two Prometheus daemons. The command immediately above results in two Prometheus daemons.
Or in YAML: YAML can also be used to specify limits, in the following way:
.. code-block:: yaml .. code-block:: yaml
@ -229,7 +271,7 @@ Or in YAML:
placement: placement:
count: 3 count: 3
Or with hosts: YAML can also be used to specify limits on hosts:
.. code-block:: yaml .. code-block:: yaml
@ -249,15 +291,21 @@ service in a ``ServiceSpec``. For certain operations, like updating
the RGW HTTP port, we need to update the existing the RGW HTTP port, we need to update the existing
specification. specification.
1. List the current ``ServiceSpec``:: 1. List the current ``ServiceSpec``:
.. prompt:: bash #
ceph orch ls --service_name=<service-name> --export > myservice.yaml ceph orch ls --service_name=<service-name> --export > myservice.yaml
2. Update the yaml file:: 2. Update the yaml file:
.. prompt:: bash #
vi myservice.yaml vi myservice.yaml
3. Apply the new ``ServiceSpec``:: 3. Apply the new ``ServiceSpec``:
.. prompt:: bash #
ceph orch apply -i myservice.yaml [--dry-run] ceph orch apply -i myservice.yaml [--dry-run]
@ -268,22 +316,25 @@ Cephadm uses a declarative state to define the layout of the cluster. This
state consists of a list of service specifications containing placement state consists of a list of service specifications containing placement
specifications (See :ref:`orchestrator-cli-service-spec` ). specifications (See :ref:`orchestrator-cli-service-spec` ).
Cephadm constantly compares list of actually running daemons in the cluster Cephadm continually compares a list of daemons actually running in the cluster
with the desired service specifications and will either add or remove new against the list in the service specifications. Cephadm adds new daemons and
daemons. removes old daemons as necessary in order to conform to the service
specifications.
First, cephadm will select a list of candidate hosts. It first looks for Cephadm does the following to maintain compliance with the service
explicit host names and will select those. In case there are no explicit hosts specifications.
defined, cephadm looks for a label specification. If there is no label defined
in the specification, cephadm will select hosts based on a host pattern. If
there is no pattern defined, cepham will finally select all known hosts as
candidates.
Then, cephadm will consider existing daemons of this services and will try to Cephadm first selects a list of candidate hosts. Cephadm seeks explicit host
avoid moving any daemons. names and selects them. If cephadm finds no explicit host names, it looks for
label specifications. If no label is defined in the specification, cephadm
selects hosts based on a host pattern. If no host pattern is defined, as a last
resort, cephadm selects all known hosts as candidates.
Cephadm supports the deployment of a specific amount of services. Let's Cephadm is aware of existing daemons running services and tries to avoid moving
consider a service specification like so: them.
Cephadm supports the deployment of a specific amount of services.
Consider the following service specification:
.. code-block:: yaml .. code-block:: yaml
@ -293,34 +344,39 @@ consider a service specification like so:
count: 3 count: 3
label: myfs label: myfs
This instructs cephadm to deploy three daemons on hosts labeled with This service specifcation instructs cephadm to deploy three daemons on hosts
``myfs`` across the cluster. labeled ``myfs`` across the cluster.
Then, in case there are less than three daemons deployed on the candidate If there are fewer than three daemons deployed on the candidate hosts, cephadm
hosts, cephadm will then randomly choose hosts for deploying new daemons. randomly chooses hosts on which to deploy new daemons.
In case there are more than three daemons deployed, cephadm will remove If there are more than three daemons deployed on the candidate hosts, cephadm
existing daemons. removes existing daemons.
Finally, cephadm will remove daemons on hosts that are outside of the list of Finally, cephadm removes daemons on hosts that are outside of the list of
candidate hosts. candidate hosts.
However, there is a special cases that cephadm needs to consider. .. note::
In case the are fewer hosts selected by the placement specification than There is a special case that cephadm must consider.
demanded by ``count``, cephadm will only deploy on selected hosts.
If there are fewer hosts selected by the placement specification than
demanded by ``count``, cephadm will deploy only on the selected hosts.
.. _cephadm-spec-unmanaged: .. _cephadm-spec-unmanaged:
Disable automatic deployment of daemons Disabling automatic deployment of daemons
======================================= =========================================
Cephadm supports disabling the automated deployment and removal of daemons per service. In Cephadm supports disabling the automated deployment and removal of daemons on a
this case, the CLI supports two commands that are dedicated to this mode. per service basis. The CLI supports two commands for this.
To disable the automatic management of dameons, apply Disabling automatic management of daemons
the :ref:`orchestrator-cli-service-spec` with ``unmanaged=True``. -----------------------------------------
To disable the automatic management of dameons, set ``unmanaged=True`` in the
:ref:`orchestrator-cli-service-spec` (``mgr.yaml``).
``mgr.yaml``: ``mgr.yaml``:
@ -331,36 +387,45 @@ the :ref:`orchestrator-cli-service-spec` with ``unmanaged=True``.
placement: placement:
label: mgr label: mgr
.. code-block:: bash
.. prompt:: bash #
ceph orch apply -i mgr.yaml ceph orch apply -i mgr.yaml
.. note:: .. note::
cephadm will no longer deploy any new daemons, if the placement After you apply this change in the Service Specification, cephadm will no
specification matches additional hosts. longer deploy any new daemons (even if the placement specification matches
additional hosts).
To manually deploy a daemon on a host, please execute: Deploying a daemon on a host manually
-------------------------------------
.. code-block:: bash To manually deploy a daemon on a host, run a command of the following form:
.. prompt:: bash #
ceph orch daemon add <daemon-type> --placement=<placement spec> ceph orch daemon add <daemon-type> --placement=<placement spec>
For example For example :
.. code-block:: bash .. prompt:: bash #
ceph orch daemon add mgr --placement=my_host ceph orch daemon add mgr --placement=my_host
To manually remove a daemon, please run: Removing a daemon from a host manually
--------------------------------------
.. code-block:: bash To manually remove a daemon, run a command of the following form:
.. prompt:: bash #
ceph orch daemon rm <daemon name>... [--force] ceph orch daemon rm <daemon name>... [--force]
For example For example:
.. code-block:: bash .. prompt:: bash #
ceph orch daemon rm mgr.my_host.xyzxyz ceph orch daemon rm mgr.my_host.xyzxyz
@ -369,5 +434,8 @@ For example
For managed services (``unmanaged=False``), cephadm will automatically For managed services (``unmanaged=False``), cephadm will automatically
deploy a new daemon a few seconds later. deploy a new daemon a few seconds later.
See also
--------
* See :ref:`cephadm-osd-declarative` for special handling of unmanaged OSDs. * See :ref:`cephadm-osd-declarative` for special handling of unmanaged OSDs.
* See also :ref:`cephadm-pause` * See also :ref:`cephadm-pause`

View File

@ -2,9 +2,9 @@
Upgrading Ceph Upgrading Ceph
============== ==============
Cephadm is capable of safely upgrading Ceph from one bugfix release to Cephadm can safely upgrade Ceph from one bugfix release to the next. For
another. For example, you can upgrade from v15.2.0 (the first Octopus example, you can upgrade from v15.2.0 (the first Octopus release) to the next
release) to the next point release v15.2.1. point release, v15.2.1.
The automated upgrade process follows Ceph best practices. For example: The automated upgrade process follows Ceph best practices. For example:
@ -13,54 +13,72 @@ The automated upgrade process follows Ceph best practices. For example:
will remain available. will remain available.
Keep in mind that the Ceph cluster health status is likely to switch to Keep in mind that the Ceph cluster health status is likely to switch to
`HEALTH_WARNING` during the upgrade. ``HEALTH_WARNING`` during the upgrade.
Starting the upgrade Starting the upgrade
==================== ====================
Before you start, you should verify that all hosts are currently online Before you begin using cephadm to upgrade Ceph, verify that all hosts are currently online and that your cluster is healthy:
and your cluster is healthy.
:: .. prompt:: bash #
# ceph -s ceph -s
To upgrade (or downgrade) to a specific release:: To upgrade (or downgrade) to a specific release:
# ceph orch upgrade start --ceph-version <version> .. prompt:: bash #
For example, to upgrade to v15.2.1:: ceph orch upgrade start --ceph-version <version>
# ceph orch upgrade start --ceph-version 15.2.1 For example, to upgrade to v15.2.1:
.. prompt:: bash #
ceph orch upgrade start --ceph-version 15.2.1
Monitoring the upgrade Monitoring the upgrade
====================== ======================
Determine whether an upgrade is in process and what version the cluster is Determine (1) whether an upgrade is in progress and (2) which version the
upgrading to with:: cluster is upgrading to by running the following command:
# ceph orch upgrade status .. prompt:: bash #
While the upgrade is underway, you will see a progress bar in the ceph ceph orch upgrade status
status output. For example::
Watching the progress bar during a Ceph upgrade
-----------------------------------------------
During the upgrade, a progress bar is visible in the ceph status output. It
looks like this:
.. code-block:: console
# ceph -s # ceph -s
[...] [...]
progress: progress:
Upgrade to docker.io/ceph/ceph:v15.2.1 (00h 20m 12s) Upgrade to docker.io/ceph/ceph:v15.2.1 (00h 20m 12s)
[=======.....................] (time remaining: 01h 43m 31s) [=======.....................] (time remaining: 01h 43m 31s)
You can also watch the cephadm log with:: Watching the cephadm log during an upgrade
------------------------------------------
# ceph -W cephadm Watch the cephadm log by running the following command:
.. prompt:: bash #
ceph -W cephadm
Canceling an upgrade Canceling an upgrade
==================== ====================
You can stop the upgrade process at any time with:: You can stop the upgrade process at any time with:
.. prompt:: bash #
# ceph orch upgrade stop # ceph orch upgrade stop
@ -73,46 +91,57 @@ There are a few health alerts that can arise during the upgrade process.
UPGRADE_NO_STANDBY_MGR UPGRADE_NO_STANDBY_MGR
---------------------- ----------------------
Ceph requires an active and standby manager daemon in order to proceed, but This alert means that Ceph requires an active and standby manager daemon in
there is currently no standby. order to proceed, but there is currently no standby.
You can ensure that Cephadm is configured to run 2 (or more) managers with:: You can ensure that Cephadm is configured to run 2 (or more) managers by running the following command:
# ceph orch apply mgr 2 # or more .. prompt:: bash #
You can check the status of existing mgr daemons with:: ceph orch apply mgr 2 # or more
# ceph orch ps --daemon-type mgr You can check the status of existing mgr daemons by running the following command:
If an existing mgr daemon has stopped, you can try restarting it with:: .. prompt:: bash #
# ceph orch daemon restart <name> ceph orch ps --daemon-type mgr
If an existing mgr daemon has stopped, you can try to restart it by running the following command:
.. prompt:: bash #
ceph orch daemon restart <name>
UPGRADE_FAILED_PULL UPGRADE_FAILED_PULL
------------------- -------------------
Ceph was unable to pull the container image for the target version. This alert means that Ceph was unable to pull the container image for the
This can happen if you specify an version or container image that does target version. This can happen if you specify a version or container image
not exist (e.g., 1.2.3), or if the container registry is not reachable from that does not exist (e.g. "1.2.3"), or if the container registry can not
one or more hosts in the cluster. be reached by one or more hosts in the cluster.
You can cancel the existing upgrade and specify a different target version with:: To cancel the existing upgrade and to specify a different target version, run the following commands:
# ceph orch upgrade stop .. prompt:: bash #
# ceph orch upgrade start --ceph-version <version>
ceph orch upgrade stop
ceph orch upgrade start --ceph-version <version>
Using customized container images Using customized container images
================================= =================================
For most users, simplify specifying the Ceph version is sufficient. For most users, upgrading requires nothing more complicated than specifying the
Cephadm will locate the specific Ceph container image to use by Ceph version number to upgrade to. In such cases, cephadm locates the specific
combining the ``container_image_base`` configuration option (default: Ceph container image to use by combining the ``container_image_base``
``docker.io/ceph/ceph``) with a tag of ``vX.Y.Z``. configuration option (default: ``docker.io/ceph/ceph``) with a tag of
``vX.Y.Z``.
You can also upgrade to an arbitrary container image. For example, to But it is possible to upgrade to an arbitrary container image, if that's what
upgrade to a development build:: you need. For example, the following command upgrades to a development build:
# ceph orch upgrade start --image quay.io/ceph-ci/ceph:recent-git-branch-name .. prompt:: bash #
ceph orch upgrade start --image quay.io/ceph-ci/ceph:recent-git-branch-name
For more information about available container images, see :ref:`containers`. For more information about available container images, see :ref:`containers`.

View File

@ -216,30 +216,15 @@ does not change a MDS; it manipulates the file system rank which has been
marked damaged. marked damaged.
Minimum Client Version Required Client Features
---------------------- ------------------------
It is sometimes desirable to set the minimum version of Ceph that a client must be It is sometimes desirable to set features that clients must support to talk to
running to connect to a CephFS cluster. Older clients may sometimes still be CephFS. Clients without those features may disrupt other clients or behave in
running with bugs that can cause locking issues between clients (due to surprising ways. Or, you may want to require newer features to prevent older
capability release). CephFS provides a mechanism to set the minimum and possibly buggy clients from connecting.
client version:
:: Commands to manipulate required client features of a file system:
fs set <fs name> min_compat_client <release>
For example, to only allow Nautilus clients, use:
::
fs set cephfs min_compat_client nautilus
Clients running an older version will be automatically evicted.
Enforcing minimum version of CephFS client is achieved by setting required
client features. Commands to manipulate required client features of a file
system:
:: ::
@ -252,8 +237,9 @@ To list all CephFS features
fs feature ls fs feature ls
Clients that are missing newly added features will be evicted automatically.
CephFS features and first release they came out. Here are the current CephFS features and first release they came out:
+------------------+--------------+-----------------+ +------------------+--------------+-----------------+
| Feature | Ceph release | Upstream Kernel | | Feature | Ceph release | Upstream Kernel |
@ -278,6 +264,8 @@ CephFS features and first release they came out.
+------------------+--------------+-----------------+ +------------------+--------------+-----------------+
| metric_collect | pacific | N/A | | metric_collect | pacific | N/A |
+------------------+--------------+-----------------+ +------------------+--------------+-----------------+
| alternate_name | pacific | PLANNED |
+------------------+--------------+-----------------+
CephFS Feature Descriptions CephFS Feature Descriptions
@ -328,6 +316,13 @@ delegated inode numbers is a prerequisite for client to do async file creation.
Clients can send performance metric to MDS if MDS support this feature. Clients can send performance metric to MDS if MDS support this feature.
::
alternate_name
Clients can set and understand "alternate names" for directory entries. This is
to be used for encrypted file name support.
Global settings Global settings
--------------- ---------------

View File

@ -0,0 +1,385 @@
.. _cephfs-mirroring:
=========================
CephFS Snapshot Mirroring
=========================
CephFS supports asynchronous replication of snapshots to a remote CephFS file system via
`cephfs-mirror` tool. Snapshots are synchronized by mirroring snapshot data followed by
creating a snapshot with the same name (for a given directory on the remote file system) as
the snapshot being synchronized.
Requirements
------------
The primary (local) and secondary (remote) Ceph clusters version should be Pacific or later.
Creating Users
--------------
Start by creating a user (on the primary/local cluster) for the mirror daemon. This user
requires write capability on the metadata pool to create RADOS objects (index objects)
for watch/notify operation and read capability on the data pool(s)::
$ ceph auth get-or-create client.mirror mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'
Create a user for each file system peer (on the secondary/remote cluster). This user needs
to have full capabilities on the MDS (to take snapshots) and the OSDs::
$ ceph fs authorize <fs_name> client.mirror_remote / rwps
This user should be used (as part of peer specification) when adding a peer.
Starting Mirror Daemon
----------------------
Mirror daemon should be spawned using `systemctl(1)` unit files::
$ systemctl enable cephfs-mirror@mirror
$ systemctl start cephfs-mirror@mirror
`cephfs-mirror` daemon can be run in foreground using::
$ cephfs-mirror --id mirror --cluster site-a -f
.. note:: User used here is `mirror` created in the `Creating Users` section.
Interface
---------
`Mirroring` module (manager plugin) provides interfaces for managing directory snapshot
mirroring. Manager interfaces are (mostly) wrappers around monitor commands for managing
file system mirroring and is the recommended control interface.
Mirroring Module
----------------
The mirroring module is responsible for assigning directories to mirror daemons for
synchronization. Multiple mirror daemons can be spawned to achieve concurrency in
directory snapshot synchronization. When mirror daemons are spawned (or terminated)
, the mirroring module discovers the modified set of mirror daemons and rebalances
the directory assignment amongst the new set thus providing high-availability.
.. note:: Multiple mirror daemons is currently untested. Only a single mirror daemon
is recommended.
Mirroring module is disabled by default. To enable mirroring use::
$ ceph mgr module enable mirroring
Mirroring module provides a family of commands to control mirroring of directory
snapshots. To add or remove directories, mirroring needs to be enabled for a given
file system. To enable mirroring use::
$ ceph fs snapshot mirror enable <fs_name>
.. note:: Mirroring module commands use `fs snapshot mirror` prefix as compared to
the monitor commands which `fs mirror` prefix. Make sure to use module
commands.
To disable mirroring, use::
$ ceph fs snapshot mirror disable <fs_name>
Once mirroring is enabled, add a peer to which directory snapshots are to be mirrored.
Peers follow `<client>@<cluster>` specification and get assigned a unique-id (UUID)
when added. See `Creating Users` section on how to create Ceph users for mirroring.
To add a peer use::
$ ceph fs snapshot mirror peer_add <fs_name> <remote_cluster_spec> [<remote_fs_name>] [<remote_mon_host>] [<cephx_key>]
`<remote_fs_name>` is optional, and defaults to `<fs_name>` (on the remote cluster).
This requires the remote cluster ceph configuration and user keyring to be available in
the primary cluster. See `Bootstrap Peers` section to avoid this. `peer_add` additionally
supports passing the remote cluster monitor address and the user key. However, bootstrapping
a peer is the recommended way to add a peer.
.. note:: Only a single peer is supported right now.
To remove a peer use::
$ ceph fs snapshot mirror peer_remove <fs_name> <peer_uuid>
To list file system mirror peers use::
$ ceph fs snapshot mirror peer_list <fs_name>
To configure a directory for mirroring, use::
$ ceph fs snapshot mirror add <fs_name> <path>
To stop a mirroring directory snapshots use::
$ ceph fs snapshot mirror remove <fs_name> <path>
Only absolute directory paths are allowed. Also, paths are normalized by the mirroring
module, therfore, `/a/b/../b` is equivalent to `/a/b`.
$ mkdir -p /d0/d1/d2
$ ceph fs snapshot mirror add cephfs /d0/d1/d2
{}
$ ceph fs snapshot mirror add cephfs /d0/d1/../d1/d2
Error EEXIST: directory /d0/d1/d2 is already tracked
Once a directory is added for mirroring, its subdirectory or ancestor directories are
disallowed to be added for mirorring::
$ ceph fs snapshot mirror add cephfs /d0/d1
Error EINVAL: /d0/d1 is a ancestor of tracked path /d0/d1/d2
$ ceph fs snapshot mirror add cephfs /d0/d1/d2/d3
Error EINVAL: /d0/d1/d2/d3 is a subtree of tracked path /d0/d1/d2
Commands to check directory mapping (to mirror daemons) and directory distribution are
detailed in `Mirroring Status` section.
Bootstrap Peers
---------------
Adding a peer (via `peer_add`) requires the peer cluster configuration and user keyring
to be available in the primary cluster (manager host and hosts running the mirror daemon).
This can be avoided by bootstrapping and importing a peer token. Peer bootstrap involves
creating a bootstrap token on the peer cluster via::
$ ceph fs snapshot mirror peer_bootstrap create <fs_name> <client_entity> <site-name>
e.g.::
$ ceph fs snapshot mirror peer_bootstrap create backup_fs client.mirror_remote site-remote
{"token": "eyJmc2lkIjogIjBkZjE3MjE3LWRmY2QtNDAzMC05MDc5LTM2Nzk4NTVkNDJlZiIsICJmaWxlc3lzdGVtIjogImJhY2t1cF9mcyIsICJ1c2VyIjogImNsaWVudC5taXJyb3JfcGVlcl9ib290c3RyYXAiLCAic2l0ZV9uYW1lIjogInNpdGUtcmVtb3RlIiwgImtleSI6ICJBUUFhcDBCZ0xtRmpOeEFBVnNyZXozai9YYUV0T2UrbUJEZlJDZz09IiwgIm1vbl9ob3N0IjogIlt2MjoxOTIuMTY4LjAuNTo0MDkxOCx2MToxOTIuMTY4LjAuNTo0MDkxOV0ifQ=="}
`site-name` refers to a user-defined string to identify the remote filesystem. In context
of `peer_add` interface, `site-name` is the passed in `cluster` name from `remote_cluster_spec`.
Import the bootstrap token in the primary cluster via::
$ ceph fs snapshot mirror peer_bootstrap import <fs_name> <token>
e.g.::
$ ceph fs snapshot mirror peer_bootstrap import cephfs eyJmc2lkIjogIjBkZjE3MjE3LWRmY2QtNDAzMC05MDc5LTM2Nzk4NTVkNDJlZiIsICJmaWxlc3lzdGVtIjogImJhY2t1cF9mcyIsICJ1c2VyIjogImNsaWVudC5taXJyb3JfcGVlcl9ib290c3RyYXAiLCAic2l0ZV9uYW1lIjogInNpdGUtcmVtb3RlIiwgImtleSI6ICJBUUFhcDBCZ0xtRmpOeEFBVnNyZXozai9YYUV0T2UrbUJEZlJDZz09IiwgIm1vbl9ob3N0IjogIlt2MjoxOTIuMTY4LjAuNTo0MDkxOCx2MToxOTIuMTY4LjAuNTo0MDkxOV0ifQ==
Mirroring Status
----------------
CephFS mirroring module provides `mirror daemon status` interface to check mirror daemon status::
$ ceph fs snapshot mirror daemon status <fs_name>
[
{
"daemon_id": 284167,
"filesystems": [
{
"filesystem_id": 1,
"name": "a",
"directory_count": 1,
"peers": [
{
"uuid": "02117353-8cd1-44db-976b-eb20609aa160",
"remote": {
"client_name": "client.mirror_remote",
"cluster_name": "ceph",
"fs_name": "backup_fs"
},
"stats": {
"failure_count": 1,
"recovery_count": 0
}
}
]
}
]
}
]
An entry per mirror daemon instance is displayed along with information such as configured
peers and basic stats. For more detailed stats, use the admin socket interface as detailed
below.
CephFS mirror daemons provide admin socket commands for querying mirror status. To check
available commands for mirror status use::
$ ceph --admin-daemon /path/to/mirror/daemon/admin/socket help
{
....
....
"fs mirror status cephfs@360": "get filesystem mirror status",
....
....
}
Commands with `fs mirror status` prefix provide mirror status for mirror enabled
file systems. Note that `cephfs@360` is of format `filesystem-name@filesystem-id`.
This format is required since mirror daemons get asynchronously notified regarding
file system mirror status (A file system can be deleted and recreated with the same
name).
Right now, the command provides minimal information regarding mirror status::
$ ceph --admin-daemon /var/run/ceph/cephfs-mirror.asok fs mirror status cephfs@360
{
"rados_inst": "192.168.0.5:0/1476644347",
"peers": {
"a2dc7784-e7a1-4723-b103-03ee8d8768f8": {
"remote": {
"client_name": "client.mirror_remote",
"cluster_name": "site-a",
"fs_name": "backup_fs"
}
}
},
"snap_dirs": {
"dir_count": 1
}
}
`Peers` section in the command output above shows the peer information such as unique
peer-id (UUID) and specification. The peer-id is required to remove an existing peer
as mentioned in the `Mirror Module and Interface` section.
Command with `fs mirror peer status` prefix provide peer synchronization status. This
command is of format `filesystem-name@filesystem-id peer-uuid`::
$ ceph --admin-daemon /var/run/ceph/cephfs-mirror.asok fs mirror peer status cephfs@360 a2dc7784-e7a1-4723-b103-03ee8d8768f8
{
"/d0": {
"state": "idle",
"last_synced_snap": {
"id": 120,
"name": "snap1",
"sync_duration": 0.079997898999999997,
"sync_time_stamp": "274900.558797s"
},
"snaps_synced": 2,
"snaps_deleted": 0,
"snaps_renamed": 0
}
}
Synchronization stats such as `snaps_synced`, `snaps_deleted` and `snaps_renamed` are reset
on daemon restart and/or when a directory is reassigned to another mirror daemon (when
multiple mirror daemons are deployed).
A directory can be in one of the following states::
- `idle`: The directory is currently not being synchronized
- `syncing`: The directory is currently being synchronized
- `failed`: The directory has hit upper limit of consecutive failures
When a directory hits a configured number of consecutive synchronization failures, the
mirror daemon marks it as `failed`. Synchronization for these directories are retried.
By default, the number of consecutive failures before a directory is marked as failed
is controlled by `cephfs_mirror_max_consecutive_failures_per_directory` configuration
option (default: 10) and the retry interval for failed directories is controlled via
`cephfs_mirror_retry_failed_directories_interval` configuration option (default: 60s).
E.g., adding a regular file for synchronization would result in failed status::
$ ceph fs snapshot mirror add cephfs /f0
$ ceph --admin-daemon /var/run/ceph/cephfs-mirror.asok fs mirror peer status cephfs@360 a2dc7784-e7a1-4723-b103-03ee8d8768f8
{
"/d0": {
"state": "idle",
"last_synced_snap": {
"id": 120,
"name": "snap1",
"sync_duration": 0.079997898999999997,
"sync_time_stamp": "274900.558797s"
},
"snaps_synced": 2,
"snaps_deleted": 0,
"snaps_renamed": 0
},
"/f0": {
"state": "failed",
"snaps_synced": 0,
"snaps_deleted": 0,
"snaps_renamed": 0
}
}
This allows a user to add a non-existent directory for synchronization. The mirror daemon
would mark the directory as failed and retry (less frequently). When the directory comes
to existence, the mirror daemons would unmark the failed state upon successfull snapshot
synchronization.
When mirroring is disabled, the respective `fs mirror status` command for the file system
will not show up in command help.
Configuration Options
---------------------
``cephfs_mirror_max_concurrent_directory_syncs``
:Description: Maximum number of directory snapshots that can be synchronized concurrently by
cephfs-mirror daemon. Controls the number of synchronization threads.
:Type: 64-bit Integer Unsigned
:Default: ``3``
``cephfs_mirror_action_update_interval``
:Description: Interval in seconds to process pending mirror update actions.
:Type: Float
:Default: ``2``
``cephfs_mirror_restart_mirror_on_blocklist_interval``
:Description: Interval in seconds to restart blocklisted mirror instances. Setting to zero (0)
disables restarting blocklisted instances.
:Type: Float
:Default: ``30``
``cephfs_mirror_max_snapshot_sync_per_cycle``
:Description: Maximum number of snapshots to mirror when a directory is picked up for mirroring
by worker threads.
:Type: 64-bit Integer Unsigned
:Default: ``3``
``cephfs_mirror_directory_scan_interval``
:Description: Interval in seconds to scan configured directories for snapshot mirroring.
:Type: 64-bit Integer Unsigned
:Default: ``10``
``cephfs_mirror_max_consecutive_failures_per_directory``
:Description: Number of consecutive snapshot synchronization failues to mark a directory as
"failed". Failed directories are retried for synchronization less frequently.
:Type: 64-bit Integer Unsigned
:Default: ``10``
``cephfs_mirror_retry_failed_directories_interval``
:Description: Interval in seconds to retry synchronization for failed directories.
:Type: 64-bit Integer Unsigned
:Default: ``60``
``cephfs_mirror_restart_mirror_on_failure_interval``
:Description: Interval in seconds to restart failed mirror instances. Setting to zero (0)
disables restarting failed mirror instances.
:Type: Float
:Default: ``20``
``cephfs_mirror_mount_timeout``
:Description: Timeout in seconds for mounting primary or secondary (remote) ceph file system
by the cephfs-mirror daemon. Setting this to a higher value could result in the
mirror daemon getting stalled when mounting a file system if the cluster is not
reachable. This option is used to override the usual client_mount_timeout.
:Type: Float
:Default: ``10``
Re-adding Peers
---------------
When re-adding (reassigning) a peer to a file system in another cluster, ensure that
all mirror daemons have stopped synchronization to the peer. This can be checked
via `fs mirror status` admin socket command (the `Peer UUID` should not show up
in the command output). Also, it is recommended to purge synchronized directories
from the peer before re-adding it to another file system (especially those directories
which might exist in the new primary file system). This is not required if re-adding
a peer to the same primary file system it was earlier synchronized from.

View File

@ -1,3 +1,5 @@
.. _cephfs-nfs:
======================= =======================
CephFS Exports over NFS CephFS Exports over NFS
======================= =======================
@ -11,12 +13,14 @@ Requirements
- ``nfs-ganesha``, ``nfs-ganesha-ceph``, ``nfs-ganesha-rados-grace`` and - ``nfs-ganesha``, ``nfs-ganesha-ceph``, ``nfs-ganesha-rados-grace`` and
``nfs-ganesha-rados-urls`` packages (version 3.3 and above) ``nfs-ganesha-rados-urls`` packages (version 3.3 and above)
.. note:: From Pacific, the nfs mgr module must be enabled prior to use.
Create NFS Ganesha Cluster Create NFS Ganesha Cluster
========================== ==========================
.. code:: bash .. code:: bash
$ ceph nfs cluster create <type> <clusterid> [<placement>] $ ceph nfs cluster create <clusterid> [<placement>] [--ingress --virtual-ip <ip>]
This creates a common recovery pool for all NFS Ganesha daemons, new user based on This creates a common recovery pool for all NFS Ganesha daemons, new user based on
``clusterid``, and a common NFS Ganesha config RADOS object. ``clusterid``, and a common NFS Ganesha config RADOS object.
@ -28,10 +32,6 @@ This creates a common recovery pool for all NFS Ganesha daemons, new user based
Currently, NFS Ganesha daemon deployed by cephadm listens on the standard Currently, NFS Ganesha daemon deployed by cephadm listens on the standard
port. So only one daemon will be deployed on a host. port. So only one daemon will be deployed on a host.
``<type>`` signifies the export type, which corresponds to the NFS Ganesha file
system abstraction layer (FSAL). Permissible values are ``"cephfs`` or
``rgw``, but currently only ``cephfs`` is supported.
``<clusterid>`` is an arbitrary string by which this NFS Ganesha cluster will be ``<clusterid>`` is an arbitrary string by which this NFS Ganesha cluster will be
known. known.
@ -49,24 +49,20 @@ cluster)::
"2 host1,host2" "2 host1,host2"
To deploy NFS with an HA front-end (virtual IP and load balancer), add the
``--ingress`` flag and specify a virtual IP address. This will deploy a combination
of keepalived and haproxy to provide an high-availability NFS frontend for the NFS
service.
For more details, refer :ref:`orchestrator-cli-placement-spec` but keep For more details, refer :ref:`orchestrator-cli-placement-spec` but keep
in mind that specifying the placement via a YAML file is not supported. in mind that specifying the placement via a YAML file is not supported.
Update NFS Ganesha Cluster
==========================
.. code:: bash
$ ceph nfs cluster update <clusterid> <placement>
This updates the deployed cluster according to the placement value.
Delete NFS Ganesha Cluster Delete NFS Ganesha Cluster
========================== ==========================
.. code:: bash .. code:: bash
$ ceph nfs cluster delete <clusterid> $ ceph nfs cluster rm <clusterid>
This deletes the deployed cluster. This deletes the deployed cluster.
@ -160,8 +156,8 @@ This removes the user defined configuration.
Create CephFS Export Create CephFS Export
==================== ====================
.. warning:: Currently, the volume/nfs interface is not integrated with dashboard. Both .. warning:: Currently, the nfs interface is not integrated with dashboard. Both
dashboard and volume/nfs interface have different export requirements and dashboard and nfs interface have different export requirements and
create exports differently. Management of dashboard created exports is not create exports differently. Management of dashboard created exports is not
supported. supported.
@ -186,12 +182,14 @@ path is '/'. It need not be unique. Subvolume path can be fetched using:
$ ceph fs subvolume getpath <vol_name> <subvol_name> [--group_name <subvol_group_name>] $ ceph fs subvolume getpath <vol_name> <subvol_name> [--group_name <subvol_group_name>]
.. note:: Export creation is supported only for NFS Ganesha clusters deployed using nfs interface.
Delete CephFS Export Delete CephFS Export
==================== ====================
.. code:: bash .. code:: bash
$ ceph nfs export delete <clusterid> <binding> $ ceph nfs export rm <clusterid> <binding>
This deletes an export in an NFS Ganesha cluster, where: This deletes an export in an NFS Ganesha cluster, where:
@ -323,4 +321,23 @@ grace period. The exports can be mounted by
.. note:: Only NFS v4.0+ is supported. .. note:: Only NFS v4.0+ is supported.
Troubleshooting
===============
Checking NFS-Ganesha logs with
1) ``cephadm``
.. code:: bash
$ cephadm logs --fsid <fsid> --name nfs.<cluster_id>.hostname
2) ``rook``
.. code:: bash
$ kubectl logs -n rook-ceph rook-ceph-nfs-<cluster_id>-<node_id> nfs-ganesha
Log level can be changed using `nfs cluster config set` command.
.. _NFS-Ganesha NFS Server: https://github.com/nfs-ganesha/nfs-ganesha/wiki .. _NFS-Ganesha NFS Server: https://github.com/nfs-ganesha/nfs-ganesha/wiki

View File

@ -92,7 +92,7 @@ Administration
Upgrading old file systems <upgrading> Upgrading old file systems <upgrading>
CephFS Top Utility <cephfs-top> CephFS Top Utility <cephfs-top>
Scheduled Snapshots <snap-schedule> Scheduled Snapshots <snap-schedule>
CephFS Snapshot Mirroring <cephfs-mirroring>
.. raw:: html .. raw:: html

View File

@ -162,10 +162,12 @@ your metadata throughput with no other administrative intervention.
Presently, there are two types of ephemeral pinning: Presently, there are two types of ephemeral pinning:
**Distributed Ephemeral Pins**: This policy indicates that **all** of a **Distributed Ephemeral Pins**: This policy causes a directory to fragment
directory's immediate children should be ephemerally pinned. The canonical (even well below the normal fragmentation thresholds) and distribute its
example would be the ``/home`` directory: we want every user's home directory fragments as ephemerally pinned subtrees. This has the effect of distributing
to be spread across the entire MDS cluster. This can be set via: immediate children across a range of MDS ranks. The canonical example use-case
would be the ``/home`` directory: we want every user's home directory to be
spread across the entire MDS cluster. This can be set via:
:: ::
@ -219,18 +221,3 @@ For the reverse situation:
The ``home/patrick`` directory and its children will be pinned to rank 2 The ``home/patrick`` directory and its children will be pinned to rank 2
because its export pin overrides the policy on ``home``. because its export pin overrides the policy on ``home``.
If a directory has an export pin and an ephemeral pin policy, the export pin
applies to the directory itself and the policy to its children. So:
::
mkdir -p home/{patrick,john}
setfattr -n ceph.dir.pin -v 0 home
setfattr -n ceph.dir.pin.distributed -v 1 home
The home directory inode (and all of its directory fragments) will always be
located on rank 0. All children including ``home/patrick`` and ``home/john``
will be ephemerally pinned according to the distributed policy. This may only
matter for some obscure performance advantages. All the same, it's mentioned
here so the override policy is clear.

View File

@ -89,9 +89,9 @@ path.
Examples:: Examples::
ceph fs snap-schedule status / ceph fs snap-schedule status /
ceph fs snap-schedule status /foo/bar format=json ceph fs snap-schedule status /foo/bar --format=json
ceph fs snap-schedule list / ceph fs snap-schedule list /
ceph fs snap-schedule list / recursive=true # list all schedules in the tree ceph fs snap-schedule list / --recursive=true # list all schedules in the tree
Add and remove schedules Add and remove schedules
@ -115,7 +115,7 @@ Examples::
ceph fs snap-schedule add / 1h 11:55 ceph fs snap-schedule add / 1h 11:55
ceph fs snap-schedule add / 2h 11:55 ceph fs snap-schedule add / 2h 11:55
ceph fs snap-schedule remove / 1h 11:55 # removes one single schedule ceph fs snap-schedule remove / 1h 11:55 # removes one single schedule
ceph fs snap-schedule remove / 1h # removes all schedules with repeat=1h ceph fs snap-schedule remove / 1h # removes all schedules with --repeat=1h
ceph fs snap-schedule remove / # removes all schedules on path / ceph fs snap-schedule remove / # removes all schedules on path /
Add and remove retention policies Add and remove retention policies

View File

@ -71,6 +71,12 @@ command. Older versions of Ceph require you to stop these daemons manually.
ceph fs set <fs_name> max_mds <old_max_mds> ceph fs set <fs_name> max_mds <old_max_mds>
9. Restore setting for ``allow_standby_replay`` (if applicable):
::
ceph fs set <fs_name> allow_standby_replay true
Upgrading pre-Firefly file systems past Jewel Upgrading pre-Firefly file systems past Jewel
============================================= =============================================

View File

@ -10,6 +10,7 @@ vstart --cephadm
- Start a cluster with vstart, with cephadm configured - Start a cluster with vstart, with cephadm configured
- Manage any additional daemons with cephadm - Manage any additional daemons with cephadm
- Requires compiled ceph binaries
In this case, the mon and manager at a minimum are running in the usual In this case, the mon and manager at a minimum are running in the usual
vstart way, not managed by cephadm. But cephadm is enabled and the local vstart way, not managed by cephadm. But cephadm is enabled and the local
@ -87,6 +88,26 @@ When you're done, you can tear down the cluster with::
sudo ../src/ckill.sh # or, sudo ../src/ckill.sh # or,
sudo ../src/cephadm/cephadm rm-cluster --force --fsid `cat fsid` sudo ../src/cephadm/cephadm rm-cluster --force --fsid `cat fsid`
cephadm bootstrap --shared_ceph_folder
======================================
Cephadm can also be used directly without compiled ceph binaries.
Run cephadm like so::
sudo ./cephadm bootstrap --mon-ip 127.0.0.1 \
--ssh-private-key /home/<user>/.ssh/id_rsa \
--skip-mon-network \
--skip-monitoring-stack --single-host-defaults \
--skip-dashboard \
--shared_ceph_folder /home/<user>/path/to/ceph/
- ``~/.ssh/id_rsa`` is used as the cluster key. It is assumed that
this key is authorized to ssh with no passphrase to root@`hostname`.
Source code changes made in the ``pybind/mgr/`` directory then
require a daemon restart to take effect.
Note regarding network calls from CLI handlers Note regarding network calls from CLI handlers
============================================== ==============================================
@ -102,3 +123,126 @@ another handler is executing.
This means we should do very few synchronous calls to remote hosts. This means we should do very few synchronous calls to remote hosts.
As a guideline, cephadm should do at most ``O(1)`` network calls in CLI handlers. As a guideline, cephadm should do at most ``O(1)`` network calls in CLI handlers.
Everything else should be done asynchronously in other threads, like ``serve()``. Everything else should be done asynchronously in other threads, like ``serve()``.
Kcli: a virtualization management tool to make easy orchestrators development
=============================================================================
`Kcli <https://github.com/karmab/kcli>`_ is meant to interact with existing
virtualization providers (libvirt, KubeVirt, oVirt, OpenStack, VMware vSphere,
GCP and AWS) and to easily deploy and customize VMs from cloud images.
It allows you to setup an environment with several vms with your preferred
configuration( memory, cpus, disks) and OS flavor.
main advantages:
----------------
- Is fast. Typically you can have a completely new Ceph cluster ready to debug
and develop orchestrator features in less than 5 minutes.
- Is a "near production" lab. The lab created with kcli is very near of "real"
clusters in QE labs or even in production. So easy to test "real things" in
almost "real environment"
- Is safe and isolated. Do not depend of the things you have installed in your
machine. And the vms are isolated from your environment.
- Easy to work "dev" environment. For "not compilated" software pieces,
for example any mgr module. It is an environment that allow you to test your
changes interactively.
Installation:
-------------
Complete documentation in `kcli installation <https://kcli.readthedocs.io/en/latest/#installation>`_
but we strongly suggest to use the container image approach.
So things to do:
- 1. Review `requeriments <https://kcli.readthedocs.io/en/latest/#libvirt-hypervisor-requisites>`_
and install/configure whatever you need to meet them.
- 2. get the kcli image and create one alias for executing the kcli command
::
# podman pull quay.io/karmab/kcli
# alias kcli='podman run --net host -it --rm --security-opt label=disable -v $HOME/.ssh:/root/.ssh -v $HOME/.kcli:/root/.kcli -v /var/lib/libvirt/images:/var/lib/libvirt/images -v /var/run/libvirt:/var/run/libvirt -v $PWD:/workdir -v /var/tmp:/ignitiondir quay.io/karmab/kcli'
.. note:: /var/lib/libvirt/images can be customized.... be sure that you are
using this folder for your OS images
.. note:: Once you have used your kcli tool to create and use different labs, we
suggest you to "save" and use your own kcli image.
Why?: kcli is alive and it changes (and for the moment only exists one tag ...
latest). Because we have more than enough with the current functionality, and
what we want is overall stability,
we suggest to store the kcli image you are using in a safe place and update
your kcli alias to use your own image.
Test your kcli installation:
----------------------------
See the kcli `basic usage workflow <https://kcli.readthedocs.io/en/latest/#basic-workflow>`_
Create a Ceph lab cluster
-------------------------
In order to make easy this task we are going to use a kcli plan.
A kcli plan is a file where you can define the different settings you want to
have in a set of vms.
You can define hardware parameters (cpu, memory, disks ..), operating system and
it also allows you to automate the installation and configuration of any
software you want to have.
There is a `repository <https://github.com/karmab/kcli-plans>`_ with a collection of
plans that can be used for different purposes. And we have predefined plans to
install Ceph clusters using Ceph ansible or cephadm, lets create our first Ceph
cluster using cephadm::
# kcli2 create plan -u https://github.com/karmab/kcli-plans/blob/master/ceph/ceph_cluster.yml
This will create a set of three vms using the plan file pointed by the url.
After a few minutes (depend of your laptop power), lets examine the cluster:
* Take a look to the vms created::
# kcli list vms
* Enter in the bootstrap node::
# kcli ssh ceph-node-00
* Take a look to the ceph cluster installed::
[centos@ceph-node-00 ~]$ sudo -i
[root@ceph-node-00 ~]# cephadm version
[root@ceph-node-00 ~]# cephadm shell
[ceph: root@ceph-node-00 /]# ceph orch host ls
Create a Ceph cluster to make easy developing in mgr modules (Orchestrators and Dashboard)
------------------------------------------------------------------------------------------
The cephadm kcli plan (and cephadm) are prepared to do that.
The idea behind this method is to replace several python mgr folders in each of
the ceph daemons with the source code folders in your host machine.
This "trick" will allow you to make changes in any orchestrator or dashboard
module and test them intermediately. (only needed to disable/enable the mgr module)
So in order to create a ceph cluster for development purposes you must use the
same cephadm plan but with a new parameter pointing your Ceph source code folder::
# kcli create plan -u https://github.com/karmab/kcli-plans/blob/master/ceph/ceph_cluster.yml -P ceph_dev_folder=/home/mycodefolder/ceph
Ceph Dashboard development
--------------------------
Ceph dashboard module is not going to be loaded if previously you have not
generated the frontend bundle.
For now, in order load properly the Ceph Dashboardmodule and to apply frontend
changes you have to run "ng build" on your laptop::
# Start local frontend build with watcher (in background):
sudo dnf install -y nodejs
cd <path-to-your-ceph-repo>
cd src/pybind/mgr/dashboard/frontend
sudo chown -R <your-user>:root dist node_modules
NG_CLI_ANALYTICS=false npm ci
npm run build -- --deleteOutputPath=false --watch &
After saving your changes, the frontend bundle will be built again.
When completed, you'll see::
"Localized bundle generation complete."
Then you can reload your Dashboard browser tab.

View File

@ -115,7 +115,7 @@ Mirroring module provides a family of commands to control mirroring of directory
snapshots. To add or remove directories, mirroring needs to be enabled for a given snapshots. To add or remove directories, mirroring needs to be enabled for a given
file system. To enable mirroring use:: file system. To enable mirroring use::
$ ceph fs snapshot mirror enable <fs> $ ceph fs snapshot mirror enable <fs_name>
.. note:: Mirroring module commands use `fs snapshot mirror` prefix as compared to .. note:: Mirroring module commands use `fs snapshot mirror` prefix as compared to
the monitor commands which `fs mirror` prefix. Make sure to use module the monitor commands which `fs mirror` prefix. Make sure to use module
@ -123,7 +123,7 @@ file system. To enable mirroring use::
To disable mirroring, use:: To disable mirroring, use::
$ ceph fs snapshot mirror disable <fs> $ ceph fs snapshot mirror disable <fs_name>
Once mirroring is enabled, add a peer to which directory snapshots are to be mirrored. Once mirroring is enabled, add a peer to which directory snapshots are to be mirrored.
Peers follow `<client>@<cluster>` specification and get assigned a unique-id (UUID) Peers follow `<client>@<cluster>` specification and get assigned a unique-id (UUID)
@ -131,9 +131,9 @@ when added. See `Creating Users` section on how to create Ceph users for mirrori
To add a peer use:: To add a peer use::
$ ceph fs snapshot mirror peer_add <fs> <remote_cluster_spec> [<remote_fs_name>] [<remote_mon_host>] [<cephx_key>] $ ceph fs snapshot mirror peer_add <fs_name> <remote_cluster_spec> [<remote_fs_name>] [<remote_mon_host>] [<cephx_key>]
`<remote_fs_name>` is optional, and default to `<fs>` (on the remote cluster). `<remote_fs_name>` is optional, and default to `<fs_name>` (on the remote cluster).
This requires the remote cluster ceph configuration and user keyring to be available in This requires the remote cluster ceph configuration and user keyring to be available in
the primary cluster. See `Bootstrap Peers` section to avoid this. `peer_add` additionally the primary cluster. See `Bootstrap Peers` section to avoid this. `peer_add` additionally
@ -144,21 +144,21 @@ a peer is the recommended way to add a peer.
To remove a peer use:: To remove a peer use::
$ ceph fs snapshot mirror peer_remove <fs> <peer_uuid> $ ceph fs snapshot mirror peer_remove <fs_name> <peer_uuid>
.. note:: See `Mirror Daemon Status` section on how to figure out Peer UUID. .. note:: See `Mirror Daemon Status` section on how to figure out Peer UUID.
To list file system mirror peers use:: To list file system mirror peers use::
$ ceph fs snapshot mirror peer_list <fs> $ ceph fs snapshot mirror peer_list <fs_name>
To configure a directory for mirroring, use:: To configure a directory for mirroring, use::
$ ceph fs snapshot mirror add <fs> <path> $ ceph fs snapshot mirror add <fs_name> <path>
To stop a mirroring directory snapshots use:: To stop a mirroring directory snapshots use::
$ ceph fs snapshot mirror remove <fs> <path> $ ceph fs snapshot mirror remove <fs_name> <path>
Only absolute directory paths are allowed. Also, paths are normalized by the mirroring Only absolute directory paths are allowed. Also, paths are normalized by the mirroring
module, therfore, `/a/b/../b` is equivalent to `/a/b`. module, therfore, `/a/b/../b` is equivalent to `/a/b`.
@ -210,8 +210,49 @@ Mirror Daemon Status
-------------------- --------------------
Mirror daemons get asynchronously notified about changes in file system mirroring status Mirror daemons get asynchronously notified about changes in file system mirroring status
and/or peer updates. CephFS mirror daemons provide admin socket commands for querying and/or peer updates.
mirror status. To check available commands for mirror status use::
CephFS mirroring module provides `mirror daemon status` interface to check mirror daemon
status::
$ ceph fs snapshot mirror daemon status <fs_name>
E.g::
$ ceph fs snapshot mirror daemon status a | jq
[
{
"daemon_id": 284167,
"filesystems": [
{
"filesystem_id": 1,
"name": "a",
"directory_count": 1,
"peers": [
{
"uuid": "02117353-8cd1-44db-976b-eb20609aa160",
"remote": {
"client_name": "client.mirror_remote",
"cluster_name": "ceph",
"fs_name": "backup_fs"
},
"stats": {
"failure_count": 1,
"recovery_count": 0
}
}
]
}
]
}
]
An entry per mirror daemon instance is displayed along with information such as configured
peers and basic stats. For more detailed stats, use the admin socket interface as detailed
below.
CephFS mirror daemons provide admin socket commands for querying mirror status. To check
available commands for mirror status use::
$ ceph --admin-daemon /path/to/mirror/daemon/admin/socket help $ ceph --admin-daemon /path/to/mirror/daemon/admin/socket help
{ {

View File

@ -423,6 +423,26 @@ Note:
When using docker, as your device, you might need to run the script with sudo When using docker, as your device, you might need to run the script with sudo
permissions. permissions.
run-cephadm-e2e-tests.sh
.........................
``run-cephadm-e2e-tests.sh`` runs a subset of E2E tests to verify that the Dashboard and cephadm as
Orchestrator backend behave correctly.
Prerequisites: you need to install `KCLI
<https://kcli.readthedocs.io/en/latest/>`_ in your local machine.
Note:
This script is aimed to be run as jenkins job so the cleanup is triggered only in a jenkins
environment. In local, the user will shutdown the cluster when desired (i.e. after debugging).
Start E2E tests by running::
$ cd <your/ceph/repo/dir>
$ sudo chown -R $(id -un) src/pybind/mgr/dashboard/frontend/dist src/pybind/mgr/dashboard/frontend/node_modules
$ ./src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh
$ kcli delete plan -y ceph # After tests finish.
Other running options Other running options
..................... .....................
@ -1466,6 +1486,25 @@ same applies to other request types:
| DELETE | Yes | delete | 204 | | DELETE | Yes | delete | 204 |
+--------------+------------+----------------+-------------+ +--------------+------------+----------------+-------------+
To use a custom endpoint for the above listed methods, you can
use ``@RESTController.MethodMap``
.. code-block:: python
import cherrypy
from ..tools import ApiController, RESTController
@RESTController.MethodMap(version='0.1')
def create(self):
return {"msg": "Hello"}
This decorator supports three parameters to customize the
endpoint:
* ``resource"``: resource id.
* ``status=200``: set the HTTP status response code
* ``version``: version
How to use a custom API endpoint in a RESTController? How to use a custom API endpoint in a RESTController?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -1494,7 +1533,7 @@ used. To use a custom endpoint inside a restricted ``RESTController`` use
def some_post_endpoint(self, **data): def some_post_endpoint(self, **data):
return {"msg": data} return {"msg": data}
Both decorators also support four parameters to customize the Both decorators also support five parameters to customize the
endpoint: endpoint:
* ``method="GET"``: the HTTP method allowed to access this endpoint. * ``method="GET"``: the HTTP method allowed to access this endpoint.
@ -1503,6 +1542,7 @@ endpoint:
* ``status=200``: set the HTTP status response code * ``status=200``: set the HTTP status response code
* ``query_params=[]``: list of method parameter names that correspond to URL * ``query_params=[]``: list of method parameter names that correspond to URL
query parameters. query parameters.
* ``version``: version
How to restrict access to a controller? How to restrict access to a controller?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -25,9 +25,12 @@ There are three ways to get packages:
Install packages with cephadm Install packages with cephadm
============================= =============================
#. Download the cephadm script:: #. Download the cephadm script
curl --silent --remote-name --location https://github.com/ceph/ceph/raw/octopus/src/cephadm/cephadm .. prompt:: bash $
:substitutions:
curl --silent --remote-name --location https://github.com/ceph/ceph/raw/|stable-release|/src/cephadm/cephadm
chmod +x cephadm chmod +x cephadm
#. Configure the Ceph repository based on the release name:: #. Configure the Ceph repository based on the release name::

View File

@ -36,6 +36,42 @@ Options
Perform a selftest. This mode performs a sanity check of ``stats`` module. Perform a selftest. This mode performs a sanity check of ``stats`` module.
Descriptions of fields
======================
.. describe:: chit
cap hit rate
.. describe:: rlat
read latency
.. describe:: wlat
write latency
.. describe:: mlat
metadata latency
.. describe:: dlease
dentry lease rate
.. describe:: ofiles
number of opened files
.. describe:: oicaps
number of pinned caps
.. describe:: oinodes
number of opened inodes
Availability Availability
============ ============

View File

@ -1,31 +1,23 @@
MDS Autoscaler Module MDS Autoscaler Module
===================== =====================
The MDS Autoscaler Module monitors ``fsmap`` update notifications from the mgr The MDS Autoscaler Module monitors file systems to ensure sufficient MDS
daemon and takes action to spawn or kill MDS daemons for a file-system as per daemons are available. It works by adjusting the placement specification for
changes to the: the orchestrator backend of the MDS service. To enable, use:
- ``max_mds`` config value .. sh:
- ``standby_count_wanted`` config value
- standby promotions to active MDS state in case of active MDS rank death
Bumping up the ``max_mds`` config option value causes a standby mds to be promoted ceph mgr module enable mds_autoscaler
to hold an active rank. This leads to a drop in standby mds count. The MDS
Autoscaler module detects this deficit and the orchestrator module is notified
about the required MDS count. The orchestrator back-end then takes necessary
measures to spawn standby MDSs.
Dropping the ``max_mds`` config option causes the orchestrator back-end to kill The module will monitor the following file system settings to inform
standby mds to achieve the new reduced count. Preferably standby mds are chosen placement count adjustments:
to be killed when the ``max_mds`` count is dropped.
An increment and decrement of the ``standby_count_wanted`` config option value - ``max_mds`` file system setting
has a similar effect on the total MDS count. The orchestrator is notified about - ``standby_count_wanted`` file system setting
the change and necessary action to spawn or kill standby MDSs is taken.
A death of an active MDS rank also causes promotion of a standby mds to occupy The Ceph monitor daemons are still responsible for promoting or stopping MDS
the required active rank. The MDS Autoscaler notices the change in the standby according to these settings. The ``mds_autoscaler`` simply adjusts the
mds count and a message is passed to the orchestrator to maintain the necessary number of MDS which are spawned by the orchestrator.
MDS count.
NOTE: There is no CLI associated with the MDS Autoscaler Module. .. note: There is no CLI or module configurations as of now. Enable or disable
the module to turn on or off.

View File

@ -39,6 +39,7 @@ To optimize the performance of your cluster, refer to the following:
mon-lookup-dns mon-lookup-dns
Heartbeat Settings <mon-osd-interaction> Heartbeat Settings <mon-osd-interaction>
OSD Settings <osd-config-ref> OSD Settings <osd-config-ref>
DmClock Settings <mclock-config-ref>
BlueStore Settings <bluestore-config-ref> BlueStore Settings <bluestore-config-ref>
FileStore Settings <filestore-config-ref> FileStore Settings <filestore-config-ref>
Journal Settings <journal-ref> Journal Settings <journal-ref>

View File

@ -0,0 +1,368 @@
========================
mClock Config Reference
========================
.. index:: mclock; configuration
Mclock profiles mask the low level details from users, making it
easier for them to configure mclock.
To use mclock, you must provide the following input parameters:
* total capacity of each OSD
* an mclock profile to enable
Using the settings in the specified profile, the OSD determines and applies the
lower-level mclock and Ceph parameters. The parameters applied by the mclock
profile make it possible to tune the QoS between client I/O, recovery/backfill
operations, and other background operations (for example, scrub, snap trim, and
PG deletion). These background activities are considered best-effort internal
clients of Ceph.
.. index:: mclock; profile definition
mClock Profiles - Definition and Purpose
========================================
A mclock profile is *“a configuration setting that when applied on a running
Ceph cluster enables the throttling of the operations(IOPS) belonging to
different client classes (background recovery, scrub, snaptrim, client op,
osd subop)”*.
The mclock profile uses the capacity limits and the mclock profile selected by
the user to determine the low-level mclock resource control parameters.
Depending on the profile, lower-level mclock resource-control parameters and
some Ceph-configuration parameters are transparently applied.
The low-level mclock resource control parameters are the *reservation*,
*limit*, and *weight* that provide control of the resource shares, as
described in the :ref:`dmclock-qos` section.
.. index:: mclock; profile types
mClock Profile Types
====================
mclock profiles can be broadly classified into two types,
- **Built-in**: Users can choose between the following built-in profile types:
- **high_client_ops** (*default*):
This profile allocates more reservation and limit to external-client ops
as compared to background recoveries and other internal clients within
Ceph. This profile is enabled by default.
- **high_recovery_ops**:
This profile allocates more reservation to background recoveries as
compared to external clients and other internal clients within Ceph. For
example, an admin may enable this profile temporarily to speed-up background
recoveries during non-peak hours.
- **balanced**:
This profile allocates equal reservation to client ops and background
recovery ops.
- **Custom**: This profile gives users complete control over all the mclock
configuration parameters. Using this profile is not recommended without
a deep understanding of mclock and related Ceph-configuration options.
.. note:: Across the built-in profiles, internal clients of mclock (for example
"scrub", "snap trim", and "pg deletion") are given slightly lower
reservations, but higher weight and no limit. This ensures that
these operations are able to complete quickly if there are no other
competing services.
.. index:: mclock; built-in profiles
mClock Built-in Profiles
========================
When a built-in profile is enabled, the mClock scheduler calculates the low
level mclock parameters [*reservation*, *weight*, *limit*] based on the profile
enabled for each client type. The mclock parameters are calculated based on
the max OSD capacity provided beforehand. As a result, the following mclock
config parameters cannot be modified when using any of the built-in profiles:
- ``osd_mclock_scheduler_client_res``
- ``osd_mclock_scheduler_client_wgt``
- ``osd_mclock_scheduler_client_lim``
- ``osd_mclock_scheduler_background_recovery_res``
- ``osd_mclock_scheduler_background_recovery_wgt``
- ``osd_mclock_scheduler_background_recovery_lim``
- ``osd_mclock_scheduler_background_best_effort_res``
- ``osd_mclock_scheduler_background_best_effort_wgt``
- ``osd_mclock_scheduler_background_best_effort_lim``
The following Ceph options will not be modifiable by the user:
- ``osd_max_backfills``
- ``osd_recovery_max_active``
This is because the above options are internally modified by the mclock
scheduler in order to maximize the impact of the set profile.
By default, the *high_client_ops* profile is enabled to ensure that a larger
chunk of the bandwidth allocation goes to client ops. Background recovery ops
are given lower allocation (and therefore take a longer time to complete). But
there might be instances that necessitate giving higher allocations to either
client ops or recovery ops. In order to deal with such a situation, you can
enable one of the alternate built-in profiles mentioned above.
If any mClock profile (including "custom") is active, the following Ceph config
sleep options will be disabled,
- ``osd_recovery_sleep``
- ``osd_recovery_sleep_hdd``
- ``osd_recovery_sleep_ssd``
- ``osd_recovery_sleep_hybrid``
- ``osd_scrub_sleep``
- ``osd_delete_sleep``
- ``osd_delete_sleep_hdd``
- ``osd_delete_sleep_ssd``
- ``osd_delete_sleep_hybrid``
- ``osd_snap_trim_sleep``
- ``osd_snap_trim_sleep_hdd``
- ``osd_snap_trim_sleep_ssd``
- ``osd_snap_trim_sleep_hybrid``
The above sleep options are disabled to ensure that mclock scheduler is able to
determine when to pick the next op from its operation queue and transfer it to
the operation sequencer. This results in the desired QoS being provided across
all its clients.
.. index:: mclock; enable built-in profile
Steps to Enable mClock Profile
==============================
The following sections outline the steps required to enable a mclock profile.
Determining OSD Capacity Using Benchmark Tests
----------------------------------------------
To allow mclock to fulfill its QoS goals across its clients, it is most
important to have a good understanding of each OSD's capacity in terms of its
baseline throughputs (IOPS) across the Ceph nodes. To determine this capacity,
you must perform appropriate benchmarking tests. The steps for performing these
benchmarking tests are broadly outlined below.
Any existing benchmarking tool can be used for this purpose. The following
steps use the *Ceph Benchmarking Tool* (cbt_). Regardless of the tool
used, the steps described below remain the same.
As already described in the :ref:`dmclock-qos` section, the number of
shards and the bluestore's throttle parameters have an impact on the mclock op
queues. Therefore, it is critical to set these values carefully in order to
maximize the impact of the mclock scheduler.
:Number of Operational Shards:
We recommend using the default number of shards as defined by the
configuration options ``osd_op_num_shards``, ``osd_op_num_shards_hdd``, and
``osd_op_num_shards_ssd``. In general, a lower number of shards will increase
the impact of the mclock queues.
:Bluestore Throttle Parameters:
We recommend using the default values as defined by
``bluestore_throttle_bytes`` and ``bluestore_throttle_deferred_bytes``. But
these parameters may also be determined during the benchmarking phase as
described below.
Benchmarking Test Steps Using CBT
`````````````````````````````````
The steps below use the default shards and detail the steps used to determine the
correct bluestore throttle values.
.. note:: These steps, although manual in April 2021, will be automated in the future.
1. On the Ceph node hosting the OSDs, download cbt_ from git.
2. Install cbt and all the dependencies mentioned on the cbt github page.
3. Construct the Ceph configuration file and the cbt yaml file.
4. Ensure that the bluestore throttle options ( i.e.
``bluestore_throttle_bytes`` and ``bluestore_throttle_deferred_bytes``) are
set to the default values.
5. Ensure that the test is performed on similar device types to get reliable
OSD capacity data.
6. The OSDs can be grouped together with the desired replication factor for the
test to ensure reliability of OSD capacity data.
7. After ensuring that the OSDs nodes are in the desired configuration, run a
simple 4KiB random write workload on the OSD(s) for 300 secs.
8. Note the overall throughput(IOPS) obtained from the cbt output file. This
value is the baseline throughput(IOPS) when the default bluestore
throttle options are in effect.
9. If the intent is to determine the bluestore throttle values for your
environment, then set the two options, ``bluestore_throttle_bytes`` and
``bluestore_throttle_deferred_bytes`` to 32 KiB(32768 Bytes) each to begin
with. Otherwise, you may skip to the next section.
10. Run the 4KiB random write workload as before on the OSD(s) for 300 secs.
11. Note the overall throughput from the cbt log files and compare the value
against the baseline throughput in step 8.
12. If the throughput doesn't match with the baseline, increment the bluestore
throttle options by 2x and repeat steps 9 through 11 until the obtained
throughput is very close to the baseline value.
For example, during benchmarking on a machine with NVMe SSDs, a value of 256 KiB for
both bluestore throttle and deferred bytes was determined to maximize the impact
of mclock. For HDDs, the corresponding value was 40 MiB, where the overall
throughput was roughly equal to the baseline throughput. Note that in general
for HDDs, the bluestore throttle values are expected to be higher when compared
to SSDs.
.. _cbt: https://github.com/ceph/cbt
Specifying Max OSD Capacity
----------------------------
The steps in this section may be performed only if the max osd capacity is
different from the default values (SSDs: 21500 IOPS and HDDs: 315 IOPS). The
option ``osd_mclock_max_capacity_iops_[hdd, ssd]`` can be set by specifying it
in either the **[global]** section or in a specific OSD section (**[osd.x]** of
your Ceph configuration file).
Alternatively, commands of the following form may be used:
.. prompt:: bash #
ceph config set [global, osd] osd_mclock_max_capacity_iops_[hdd,ssd] <value>
For example, the following command sets the max capacity for all the OSDs in a
Ceph node whose underlying device type is SSDs:
.. prompt:: bash #
ceph config set osd osd_mclock_max_capacity_iops_ssd 25000
To set the capacity for a specific OSD (for example "osd.0") whose underlying
device type is HDD, use a command like this:
.. prompt:: bash #
ceph config set osd.0 osd_mclock_max_capacity_iops_hdd 350
Specifying Which mClock Profile to Enable
-----------------------------------------
As already mentioned, the default mclock profile is set to *high_client_ops*.
The other values for the built-in profiles include *balanced* and
*high_recovery_ops*.
If there is a requirement to change the default profile, then the option
``osd_mclock_profile`` may be set in the **[global]** or **[osd]** section of
your Ceph configuration file before bringing up your cluster.
Alternatively, to change the profile during runtime, use the following command:
.. prompt:: bash #
ceph config set [global,osd] osd_mclock_profile <value>
For example, to change the profile to allow faster recoveries, the following
command can be used to switch to the *high_recovery_ops* profile:
.. prompt:: bash #
ceph config set osd osd_mclock_profile high_recovery_ops
.. note:: The *custom* profile is not recommended unless you are an advanced user.
And that's it! You are ready to run workloads on the cluster and check if the
QoS requirements are being met.
.. index:: mclock; config settings
mClock Config Options
=====================
``osd_mclock_profile``
:Description: This sets the type of mclock profile to use for providing QoS
based on operations belonging to different classes (background
recovery, scrub, snaptrim, client op, osd subop). Once a built-in
profile is enabled, the lower level mclock resource control
parameters [*reservation, weight, limit*] and some Ceph
configuration parameters are set transparently. Note that the
above does not apply for the *custom* profile.
:Type: String
:Valid Choices: high_client_ops, high_recovery_ops, balanced, custom
:Default: ``high_client_ops``
``osd_mclock_max_capacity_iops``
:Description: Max IOPS capacity (at 4KiB block size) to consider per OSD
(overrides _ssd and _hdd if non-zero)
:Type: Float
:Default: ``0.0``
``osd_mclock_max_capacity_iops_hdd``
:Description: Max IOPS capacity (at 4KiB block size) to consider per OSD (for
rotational media)
:Type: Float
:Default: ``315.0``
``osd_mclock_max_capacity_iops_ssd``
:Description: Max IOPS capacity (at 4KiB block size) to consider per OSD (for
solid state media)
:Type: Float
:Default: ``21500.0``
``osd_mclock_cost_per_io_usec``
:Description: Cost per IO in microseconds to consider per OSD (overrides _ssd
and _hdd if non-zero)
:Type: Float
:Default: ``0.0``
``osd_mclock_cost_per_io_usec_hdd``
:Description: Cost per IO in microseconds to consider per OSD (for rotational
media)
:Type: Float
:Default: ``25000.0``
``osd_mclock_cost_per_io_usec_ssd``
:Description: Cost per IO in microseconds to consider per OSD (for solid state
media)
:Type: Float
:Default: ``50.0``
``osd_mclock_cost_per_byte_usec``
:Description: Cost per byte in microseconds to consider per OSD (overrides _ssd
and _hdd if non-zero)
:Type: Float
:Default: ``0.0``
``osd_mclock_cost_per_byte_usec_hdd``
:Description: Cost per byte in microseconds to consider per OSD (for rotational
media)
:Type: Float
:Default: ``5.2``
``osd_mclock_cost_per_byte_usec_ssd``
:Description: Cost per byte in microseconds to consider per OSD (for solid state
media)
:Type: Float
:Default: ``0.011``

View File

@ -569,8 +569,8 @@ Operations
QoS Based on mClock QoS Based on mClock
------------------- -------------------
Ceph's use of mClock is currently experimental and should Ceph's use of mClock is now more refined and can be used by following the
be approached with an exploratory mindset. steps as described in `mClock Config Reference`_.
Core Concepts Core Concepts
````````````` `````````````
@ -597,7 +597,7 @@ words, the share of each type of service is controlled by three tags:
#. weight: the proportional share of capacity if extra capacity or system #. weight: the proportional share of capacity if extra capacity or system
oversubscribed. oversubscribed.
In Ceph operations are graded with "cost". And the resources allocated In Ceph, operations are graded with "cost". And the resources allocated
for serving various services are consumed by these "costs". So, for for serving various services are consumed by these "costs". So, for
example, the more reservation a services has, the more resource it is example, the more reservation a services has, the more resource it is
guaranteed to possess, as long as it requires. Assuming there are 2 guaranteed to possess, as long as it requires. Assuming there are 2
@ -619,10 +619,9 @@ competitor "1". In the case of client ops, it is not clamped by the
limit setting, so it can make use of all the resources if there is no limit setting, so it can make use of all the resources if there is no
recovery ongoing. recovery ongoing.
CURRENT IMPLEMENTATION NOTE: the current experimental implementation CURRENT IMPLEMENTATION NOTE: the current implementation enforces the limit
does not enforce the limit values. As a first approximation we decided values. Therefore, if a service crosses the enforced limit, the op remains
not to prevent operations that would otherwise enter the operation in the operation queue until the limit is restored.
sequencer from doing so.
Subtleties of mClock Subtleties of mClock
```````````````````` ````````````````````
@ -644,7 +643,7 @@ means if *W* is sufficiently large and therefore *1/W* is sufficiently
small, the calculated tag may never be assigned as it will get a value small, the calculated tag may never be assigned as it will get a value
of the current time. The ultimate lesson is that values for weight of the current time. The ultimate lesson is that values for weight
should not be too large. They should be under the number of requests should not be too large. They should be under the number of requests
one expects to ve serviced each second. one expects to be serviced each second.
Caveats Caveats
``````` ```````
@ -1125,3 +1124,4 @@ Miscellaneous
.. _Pool & PG Config Reference: ../pool-pg-config-ref .. _Pool & PG Config Reference: ../pool-pg-config-ref
.. _Journal Config Reference: ../journal-ref .. _Journal Config Reference: ../journal-ref
.. _cache target dirty high ratio: ../../operations/pools#cache-target-dirty-high-ratio .. _cache target dirty high ratio: ../../operations/pools#cache-target-dirty-high-ratio
.. _mClock Config Reference: ../mclock-config-ref

View File

@ -116,7 +116,7 @@ The import-only live-migration process is initiated by running the same
and providing a JSON-encoded ``source-spec`` to describe how to access and providing a JSON-encoded ``source-spec`` to describe how to access
the source image data. This ``source-spec`` can either be passed the source image data. This ``source-spec`` can either be passed
directly via the `--source-spec` optional, or via a file or STDIN via the directly via the `--source-spec` optional, or via a file or STDIN via the
`--source-spec-file` optional:: `--source-spec-path` optional::
$ rbd migration prepare --import-only --source-spec "<JSON>" migration_target $ rbd migration prepare --import-only --source-spec "<JSON>" migration_target

View File

@ -47,8 +47,8 @@ need to be enabled.::
rbd persistent cache mode = {cache-mode} rbd persistent cache mode = {cache-mode}
rbd plugins = pwl_cache rbd plugins = pwl_cache
Value of {cache-mode} can be ``rwl`` or ``ssd``. By default it is Value of {cache-mode} can be ``rwl``, ``ssd`` or ``disabled``. By default the
``disabled`` cache is disabled.
Here are some cache configuration settings: Here are some cache configuration settings:
@ -56,10 +56,12 @@ Here are some cache configuration settings:
have DAX enabled (see `DAX`_) when using ``rwl`` mode to avoid performance have DAX enabled (see `DAX`_) when using ``rwl`` mode to avoid performance
degradation. degradation.
- ``rbd_persistent_cache_size`` The cache size per image. - ``rbd_persistent_cache_size`` The cache size per image. The minimum cache
size is 1 GB.
- ``rbd_persistent_cache_log_periodic_stats`` This is a debug option. It is - ``rbd_persistent_cache_log_periodic_stats`` This is a debug option. It is
used to emit periodic perf stats to the debug log. used to emit periodic perf stats to the debug log if ``debug rbd pwl`` is
set to ``1`` or higher.
The above configurations can be set per-host, per-pool, per-image etc. Eg, to The above configurations can be set per-host, per-pool, per-image etc. Eg, to
set per-host, add the overrides to the appropriate `section`_ in the host's set per-host, add the overrides to the appropriate `section`_ in the host's
@ -70,21 +72,21 @@ Cache Status
------------ ------------
The persistent write-back cache is enabled when the exclusive lock is acquired, The persistent write-back cache is enabled when the exclusive lock is acquired,
and it is closed when the exclusive lock is released. To check the transient and it is closed when the exclusive lock is released. To check the cache status,
cache status, users may use the command ``rbd status``. :: users may use the command ``rbd status``. ::
rbd status {pool-name}/{image-name} rbd status {pool-name}/{image-name}
The status of the cache is shown, including present, clean, cache size and the The status of the cache is shown, including present, clean, cache size and the
position. location. Currently the status is updated only at the time the cache is opened
and closed and therefore may appear to be out of date (e.g. show that the cache
is clean when it is actually dirty).
For example:: For example::
$ rbd status rbd/foo $ rbd status rbd/foo
Watchers: none Watchers: none
image cache state: Image cache state: {"present":"true","empty":"false","clean":"true","cache_type":"ssd","pwl_host":"sceph9","pwl_path":"/tmp/rbd-pwl.rbd.abcdef123456.pool","pwl_size":1073741824}
clean: false size: 1 GiB host: sceph9 path: /tmp
Discard Cache Discard Cache
------------- -------------

View File

@ -1,7 +1,21 @@
#!/bin/bash -e #!/bin/bash -e
SCRIPTNAME="$(basename "${0}")"
BASEDIR="$(readlink -f "$(dirname "${0}")")"
if [ ! -d .git ]; then if [ ! -d .git ]; then
echo "no .git present. run this from the base dir of the git checkout." echo "$SCRIPTNAME: Full path to the script: $BASEDIR/$SCRIPTNAME"
echo "$SCRIPTNAME: No .git present. Run this from the base dir of the git checkout."
exit 1
fi
# Running the script from a directory containing a colon anywhere in the path
# will expose us to the dreaded "[BUG] npm run [command] failed if the directory
# path contains colon" bug https://github.com/npm/cli/issues/633
# (see https://tracker.ceph.com/issues/39556 for details)
if [[ "$BASEDIR" == *:* ]] ; then
echo "$SCRIPTNAME: Full path to the script: $BASEDIR/$SCRIPTNAME"
echo "$SCRIPTNAME: The path to the script contains a colon. Their presence has been known to break the script."
exit 1 exit 1
fi fi

View File

@ -37,7 +37,7 @@
"gnetId": null, "gnetId": null,
"graphTooltip": 0, "graphTooltip": 0,
"id": null, "id": null,
"iteration": 1557386759572, "iteration": 1615564911000,
"links": [], "links": [],
"panels": [ "panels": [
{ {
@ -182,7 +182,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts).*\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts).*\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts).*\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts).*\"}[1m]))\n) * 100", "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]))\n) * 100",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{mode}}", "legendFormat": "{{mode}}",
@ -283,14 +283,14 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "(node_memory_MemTotal{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]].*\"})- (\n (node_memory_MemFree{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]].*\"}) + \n (node_memory_Cached{instance=~\"[[ceph_hosts]].*\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]].*\"}) + \n (node_memory_Buffers{instance=~\"[[ceph_hosts]].*\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]].*\"}) +\n (node_memory_Slab{instance=~\"[[ceph_hosts]].*\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]].*\"})\n )\n \n", "expr": "(node_memory_MemTotal{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"})- (\n (node_memory_MemFree{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) + \n (node_memory_Cached{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) + \n (node_memory_Buffers{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) +\n (node_memory_Slab{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"})\n )\n \n",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "used", "legendFormat": "used",
"refId": "D" "refId": "D"
}, },
{ {
"expr": "node_memory_MemFree{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]].*\"} ", "expr": "node_memory_MemFree{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} ",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"intervalFactor": 1, "intervalFactor": 1,
@ -298,7 +298,7 @@
"refId": "A" "refId": "A"
}, },
{ {
"expr": "(node_memory_Cached{instance=~\"[[ceph_hosts]].*\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]].*\"}) + \n(node_memory_Buffers{instance=~\"[[ceph_hosts]].*\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]].*\"}) +\n(node_memory_Slab{instance=~\"[[ceph_hosts]].*\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]].*\"}) \n", "expr": "(node_memory_Cached{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) + \n(node_memory_Buffers{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) +\n(node_memory_Slab{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) \n",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"intervalFactor": 1, "intervalFactor": 1,
@ -306,7 +306,7 @@
"refId": "C" "refId": "C"
}, },
{ {
"expr": "node_memory_MemTotal{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]].*\"} ", "expr": "node_memory_MemTotal{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} ",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"intervalFactor": 1, "intervalFactor": 1,
@ -401,7 +401,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m])\n)", "expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{device}}.rx", "legendFormat": "{{device}}.rx",
@ -410,7 +410,7 @@
"textEditor": true "textEditor": true
}, },
{ {
"expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m])\n)", "expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{device}}.tx", "legendFormat": "{{device}}.tx",
@ -501,7 +501,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "irate(node_network_receive_drop{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"[[ceph_hosts]].*\"}[1m])", "expr": "irate(node_network_receive_drop{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
"format": "time_series", "format": "time_series",
"instant": false, "instant": false,
"intervalFactor": 1, "intervalFactor": 1,
@ -509,7 +509,7 @@
"refId": "A" "refId": "A"
}, },
{ {
"expr": "irate(node_network_transmit_drop{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"[[ceph_hosts]].*\"}[1m])", "expr": "irate(node_network_transmit_drop{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{device}}.tx", "legendFormat": "{{device}}.tx",
@ -621,7 +621,7 @@
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts).*\"})", "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"})",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"refId": "A", "refId": "A",
@ -685,7 +685,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "irate(node_network_receive_errs{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"[[ceph_hosts]].*\"}[1m])", "expr": "irate(node_network_receive_errs{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
"format": "time_series", "format": "time_series",
"instant": false, "instant": false,
"intervalFactor": 1, "intervalFactor": 1,
@ -693,7 +693,7 @@
"refId": "A" "refId": "A"
}, },
{ {
"expr": "irate(node_network_transmit_errs{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"[[ceph_hosts]].*\"}[1m])", "expr": "irate(node_network_transmit_errs{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{device}}.tx", "legendFormat": "{{device}}.tx",
@ -798,7 +798,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts).*\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts).*\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )", "expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) writes", "legendFormat": "{{device}}({{ceph_daemon}}) writes",
@ -807,7 +807,7 @@
"textEditor": true "textEditor": true
}, },
{ {
"expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts).*\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )", "expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"intervalFactor": 1, "intervalFactor": 1,
@ -899,14 +899,14 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", "expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) write", "legendFormat": "{{device}}({{ceph_daemon}}) write",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", "expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"format": "time_series", "format": "time_series",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) read", "legendFormat": "{{device}}({{ceph_daemon}}) read",
@ -992,7 +992,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance,device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", "expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"intervalFactor": 1, "intervalFactor": 1,
@ -1083,7 +1083,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts).*\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts).*\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")", "expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"intervalFactor": 1, "intervalFactor": 1,

View File

@ -131,7 +131,6 @@
"#d44a3a" "#d44a3a"
], ],
"datasource": "$datasource", "datasource": "$datasource",
"decimals": 0,
"description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster", "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster",
"decimals": 2, "decimals": 2,
"format": "percentunit", "format": "percentunit",
@ -215,7 +214,6 @@
"#d44a3a" "#d44a3a"
], ],
"datasource": "$datasource", "datasource": "$datasource",
"decimals": 0,
"description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)", "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)",
"decimals": 2, "decimals": 2,
"format": "percentunit", "format": "percentunit",
@ -433,7 +431,7 @@
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr" : "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)", "expr" : "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device, ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 1, "intervalFactor": 1,

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,8 @@
roles:
- [mon.a, mgr.x, mds.a, osd.0, client.0]
openstack:
- volumes: # attached to each instance
count: 1
size: 5 # GB
- machine:
disk: 10 # GB

View File

@ -0,0 +1,6 @@
os_type: rhel
os_version: "8.4"
overrides:
selinux:
whitelist:
- scontext=system_u:system_r:logrotate_t:s0

View File

@ -1 +1 @@
rhel_8.3.yaml rhel_8.4.yaml

View File

@ -8,9 +8,11 @@ overrides:
tasks: tasks:
- pexec: - pexec:
all: all:
- sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
- sudo dnf -y module disable container-tools - sudo dnf -y module disable container-tools
- sudo dnf -y install 'dnf-command(copr)' - sudo dnf -y install 'dnf-command(copr)'
- sudo dnf -y copr enable rhcontainerbot/container-selinux - sudo dnf -y copr enable rhcontainerbot/container-selinux
- sudo curl -L -o /etc/yum.repos.d/devel:kubic:libcontainers:stable.repo https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/CentOS_8/devel:kubic:libcontainers:stable.repo - sudo curl -L -o /etc/yum.repos.d/devel:kubic:libcontainers:stable.repo https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/CentOS_8/devel:kubic:libcontainers:stable.repo
- sudo dnf remove -y podman - sudo dnf remove -y podman
- sudo dnf -y install podman - sudo dnf -y install podman
- sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf

View File

@ -8,9 +8,11 @@ overrides:
tasks: tasks:
- pexec: - pexec:
all: all:
- sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
- sudo dnf -y module disable container-tools - sudo dnf -y module disable container-tools
- sudo dnf -y install 'dnf-command(copr)' - sudo dnf -y install 'dnf-command(copr)'
- sudo dnf -y copr enable rhcontainerbot/container-selinux - sudo dnf -y copr enable rhcontainerbot/container-selinux
- sudo curl -L -o /etc/yum.repos.d/devel:kubic:libcontainers:stable.repo https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/CentOS_8/devel:kubic:libcontainers:stable.repo - sudo curl -L -o /etc/yum.repos.d/devel:kubic:libcontainers:stable.repo https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/CentOS_8/devel:kubic:libcontainers:stable.repo
- sudo dnf remove -y podman - sudo dnf remove -y podman
- sudo dnf -y install podman - sudo dnf -y install podman
- sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf

View File

@ -5,8 +5,9 @@ os_version: "18.04"
tasks: tasks:
- pexec: - pexec:
all: all:
- sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
- curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_18.04/Release.key | sudo apt-key add - - curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_18.04/Release.key | sudo apt-key add -
- echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_18.04/ /" | sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list - echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_18.04/ /" | sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list
- sudo apt update - sudo apt update
- sudo apt -y install podman - sudo apt -y install podman
- echo -e "[[registry]]\nlocation = 'docker.io'\n\n[[registry.mirror]]\nlocation='docker-mirror.front.sepia.ceph.com:5000'\n" | sudo tee /etc/containers/registries.conf - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf

View File

@ -5,8 +5,9 @@ os_version: "20.04"
tasks: tasks:
- pexec: - pexec:
all: all:
- sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
- curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_20.04/Release.key | sudo apt-key add - - curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_20.04/Release.key | sudo apt-key add -
- echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_20.04/ /" | sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list - echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_20.04/ /" | sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list
- sudo apt update - sudo apt update
- sudo apt -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" install podman containernetworking-plugins - sudo apt -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" install podman containernetworking-plugins
- echo -e "[[registry]]\nlocation = 'docker.io'\n\n[[registry.mirror]]\nlocation='docker-mirror.front.sepia.ceph.com:5000'\n" | sudo tee /etc/containers/registries.conf - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf

View File

@ -5,8 +5,9 @@ os_version: "20.04"
tasks: tasks:
- pexec: - pexec:
all: all:
- sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
- curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/testing/xUbuntu_20.04/Release.key | sudo apt-key add - - curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/testing/xUbuntu_20.04/Release.key | sudo apt-key add -
- echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/testing/xUbuntu_20.04/ /" | sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:testing.list - echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/testing/xUbuntu_20.04/ /" | sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:testing.list
- sudo apt update - sudo apt update
- sudo apt -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" install podman containernetworking-plugins - sudo apt -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" install podman containernetworking-plugins
- echo -e "[[registry]]\nlocation = 'docker.io'\n\n[[registry.mirror]]\nlocation='docker-mirror.front.sepia.ceph.com:5000'\n" | sudo tee /etc/containers/registries.conf - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf

View File

@ -1 +1 @@
../all/ubuntu_18.04.yaml ../all/ubuntu_20.04.yaml

View File

@ -1 +1 @@
../all/ubuntu_18.04.yaml ../all/ubuntu_20.04.yaml

View File

@ -0,0 +1 @@
.qa/distros/supported/centos_latest.yaml

View File

@ -0,0 +1 @@
.qa/cephfs/begin.yaml

View File

@ -0,0 +1 @@
.qa/cephfs/clusters/1-node-1-mds-1-osd.yaml

1
ceph/qa/suites/fs/full/conf Symbolic link
View File

@ -0,0 +1 @@
.qa/cephfs/conf

View File

@ -0,0 +1 @@
.qa/distros/supported-random-distro$

View File

@ -0,0 +1,2 @@
tasks:
- ceph-fuse:

View File

@ -0,0 +1 @@
.qa/objectstore/bluestore-bitmap.yaml

View File

@ -0,0 +1,19 @@
overrides:
ceph:
conf:
mgr:
debug client: 20
log-ignorelist:
- OSD full dropping all updates
- OSD near full
- pausewr flag
- failsafe engaged, dropping updates
- failsafe disengaged, no longer dropping
- is full \(reached quota
- POOL_FULL
- POOL_NEARFULL
- POOL_BACKFILLFULL
- PG_DEGRADED
- OSD_OUT_OF_ORDER_FULL
- OSD_NEARFULL
- OSD_FULL

View File

@ -0,0 +1 @@
.qa/cephfs/overrides/frag_enable.yaml

View File

@ -0,0 +1 @@
.qa/overrides/no_client_pidfile.yaml

View File

@ -0,0 +1 @@
.qa/cephfs/overrides/whitelist_health.yaml

View File

@ -0,0 +1 @@
.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml

View File

@ -0,0 +1,21 @@
overrides:
ceph:
conf:
global:
osd_pool_default_size: 1
osd_pool_default_min_size: 1
client:
debug ms: 1
debug client: 20
mds:
debug ms: 1
debug mds: 20
osd: # force bluestore since it's required for ec overwrites
osd objectstore: bluestore
bluestore block size: 1073741824
tasks:
- workunit:
cleanup: false
clients:
client.0:
- fs/full/subvolume_rm.sh

View File

@ -1 +0,0 @@
./.qa/cephfs/overrides/whitelist_health.yaml

View File

@ -0,0 +1,14 @@
overrides:
ceph:
log-ignorelist:
- overall HEALTH_
- \(FS_DEGRADED\)
- \(MDS_FAILED\)
- \(MDS_DEGRADED\)
- \(FS_WITH_FAILED_MDS\)
- \(MDS_DAMAGE\)
- \(MDS_ALL_DOWN\)
- \(MDS_UP_LESS_THAN_MAX\)
- \(FS_INLINE_DATA_DEPRECATED\)
- Reduced data availability
- Degraded data redundancy

View File

@ -1,7 +1,7 @@
# Right now, cephfs-shell is only available as a package on Ubuntu # Right now, cephfs-shell is only available as a package on Ubuntu
# This overrides the random distribution that's chosen in the other yaml fragments. # This overrides the random distribution that's chosen in the other yaml fragments.
os_type: ubuntu os_type: ubuntu
os_version: "18.04" os_version: "20.04"
tasks: tasks:
- cephfs_test_runner: - cephfs_test_runner:
modules: modules:

View File

@ -0,0 +1 @@
.qa/distros/supported/centos_latest.yaml

View File

@ -11,6 +11,11 @@ tasks:
- ceph-mgr-rook - ceph-mgr-rook
- ceph-mgr-cephadm - ceph-mgr-cephadm
- cephadm - cephadm
- ceph-immutable-object-cache
- python3-rados
- python3-rgw
- python3-rbd
- python3-cephfs
extra_packages: ['librados2'] extra_packages: ['librados2']
- print: "**** done installing nautilus" - print: "**** done installing nautilus"
- ceph: - ceph:

View File

@ -0,0 +1 @@
.qa/distros/supported/centos_latest.yaml

View File

@ -11,6 +11,11 @@ tasks:
- ceph-mgr-rook - ceph-mgr-rook
- ceph-mgr-cephadm - ceph-mgr-cephadm
- cephadm - cephadm
- ceph-immutable-object-cache
- python3-rados
- python3-rgw
- python3-rbd
- python3-cephfs
extra_packages: ['librados2'] extra_packages: ['librados2']
- print: "**** done installing nautilus" - print: "**** done installing nautilus"
- ceph: - ceph:

View File

@ -11,6 +11,11 @@ tasks:
- ceph-mgr-rook - ceph-mgr-rook
- ceph-mgr-cephadm - ceph-mgr-cephadm
- cephadm - cephadm
- ceph-immutable-object-cache
- python3-rados
- python3-rgw
- python3-rbd
- python3-cephfs
extra_packages: ['librados2'] extra_packages: ['librados2']
- print: "**** done installing nautilus" - print: "**** done installing nautilus"
- ceph: - ceph:

View File

@ -1 +0,0 @@
.qa/cephfs/overrides/whitelist_health.yaml

View File

@ -0,0 +1,14 @@
overrides:
ceph:
log-ignorelist:
- overall HEALTH_
- \(FS_DEGRADED\)
- \(MDS_FAILED\)
- \(MDS_DEGRADED\)
- \(FS_WITH_FAILED_MDS\)
- \(MDS_DAMAGE\)
- \(MDS_ALL_DOWN\)
- \(MDS_UP_LESS_THAN_MAX\)
- \(FS_INLINE_DATA_DEPRECATED\)
- Reduced data availability
- Degraded data redundancy

View File

@ -1 +0,0 @@
.qa/distros/supported-random-distro$/

View File

@ -0,0 +1,3 @@
overrides:
kclient:
mntopts: ["nowsync"]

View File

@ -0,0 +1,3 @@
overrides:
kclient:
mntopts: ["wsync"]

Some files were not shown because too many files have changed in this diff Show More