mirror of
https://git.proxmox.com/git/ceph.git
synced 2025-04-28 15:01:36 +00:00
import sources for Ceph Quincy 17.2.8
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
parent
eccf199d63
commit
b009440314
1
ceph/.github/CODEOWNERS
vendored
1
ceph/.github/CODEOWNERS
vendored
@ -138,5 +138,6 @@ README* @ceph/doc-writers
|
||||
/src/test/run-rbd* @ceph/rbd
|
||||
/src/test/test_rbd* @ceph/rbd
|
||||
/src/tools/rbd* @ceph/rbd
|
||||
/systemd/ceph-rbd-mirror* @ceph/rbd
|
||||
/systemd/rbdmap.service.in @ceph/rbd
|
||||
/udev/50-rbd.rules @ceph/rbd
|
||||
|
54
ceph/.github/labeler.yml
vendored
54
ceph/.github/labeler.yml
vendored
@ -198,21 +198,60 @@ CI:
|
||||
- .github/**
|
||||
|
||||
rbd:
|
||||
- doc/dev/rbd*
|
||||
- doc/man/8/ceph-rbdnamer.rst
|
||||
- doc/man/8/rbd*
|
||||
- doc/rbd/**
|
||||
- doc/start/quick-rbd.rst
|
||||
- examples/librbd/**
|
||||
- examples/rbd-replay/**
|
||||
- qa/rbd/**
|
||||
- qa/run_xfstests*
|
||||
- qa/suites/krbd/**
|
||||
- qa/suites/rbd/**
|
||||
- qa/tasks/ceph_iscsi_client.py
|
||||
- qa/tasks/metadata.yaml
|
||||
- qa/tasks/qemu.py
|
||||
- qa/tasks/rbd*
|
||||
- qa/tasks/userdata*
|
||||
- qa/workunits/cls/test_cls_journal.sh
|
||||
- qa/workunits/cls/test_cls_lock.sh
|
||||
- qa/workunits/cls/test_cls_rbd.sh
|
||||
- qa/workunits/rbd/**
|
||||
- qa/workunits/windows/**
|
||||
- src/ceph-rbdnamer
|
||||
- src/cls/journal/**
|
||||
- src/cls/lock/**
|
||||
- src/cls/rbd/**
|
||||
- src/common/options/rbd*
|
||||
- src/etc-rbdmap
|
||||
- src/include/krbd.h
|
||||
- src/include/rbd*
|
||||
- src/include/rbd/**
|
||||
- src/journal/**
|
||||
- src/krbd.cc
|
||||
- src/librbd/**
|
||||
- src/ocf/**
|
||||
- src/pybind/mgr/rbd_support/**
|
||||
- src/pybind/rbd/**
|
||||
- src/rbd*
|
||||
- src/rbd*/**
|
||||
- src/test/cli/rbd/**
|
||||
- src/test/cli-integration/rbd/**
|
||||
- src/test/cls_journal/**
|
||||
- src/test/cls_lock/**
|
||||
- src/test/cls_rbd/**
|
||||
- src/test/journal/**
|
||||
- src/test/librbd/**
|
||||
- src/test/rbd_mirror/**
|
||||
- src/tools/rbd/**
|
||||
- src/tools/rbd_ggate/**
|
||||
- src/tools/rbd_mirror/**
|
||||
- src/tools/rbd_nbd/**
|
||||
- src/tools/rbd_wnbd/**
|
||||
- src/test/pybind/test_rbd.py
|
||||
- src/test/rbd*
|
||||
- src/test/rbd*/**
|
||||
- src/test/run-rbd*
|
||||
- src/test/test_rbd*
|
||||
- src/tools/rbd*/**
|
||||
- systemd/ceph-rbd-mirror*
|
||||
- systemd/rbdmap.service.in
|
||||
- udev/50-rbd.rules
|
||||
|
||||
rgw:
|
||||
- qa/suites/rgw/**
|
||||
@ -239,8 +278,7 @@ ceph-volume:
|
||||
- src/python-common/ceph/deployment/drive_selection/**
|
||||
|
||||
tests:
|
||||
- qa/tasks/**
|
||||
- qa/workunits/**
|
||||
- qa/**
|
||||
- src/test/**
|
||||
|
||||
nfs:
|
||||
|
@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
project(ceph
|
||||
VERSION 17.2.7
|
||||
VERSION 17.2.8
|
||||
LANGUAGES CXX C ASM)
|
||||
|
||||
cmake_policy(SET CMP0028 NEW)
|
||||
@ -243,6 +243,15 @@ set(HAVE_LIBURING ${WITH_LIBURING})
|
||||
CMAKE_DEPENDENT_OPTION(WITH_SYSTEM_LIBURING "Require and build with system liburing" OFF
|
||||
"HAVE_LIBAIO;WITH_BLUESTORE" OFF)
|
||||
|
||||
if(WITH_LIBURING)
|
||||
if(WITH_SYSTEM_LIBURING)
|
||||
find_package(uring REQUIRED)
|
||||
else()
|
||||
include(Builduring)
|
||||
build_uring()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
CMAKE_DEPENDENT_OPTION(WITH_BLUESTORE_PMEM "Enable PMDK libraries" OFF
|
||||
"WITH_BLUESTORE" OFF)
|
||||
|
||||
@ -657,7 +666,7 @@ if(WITH_SYSTEM_NPM)
|
||||
message(FATAL_ERROR "Can't find npm.")
|
||||
endif()
|
||||
endif()
|
||||
set(DASHBOARD_FRONTEND_LANGS "" CACHE STRING
|
||||
set(DASHBOARD_FRONTEND_LANGS "ALL" CACHE STRING
|
||||
"List of comma separated ceph-dashboard frontend languages to build. \
|
||||
Use value `ALL` to build all languages")
|
||||
CMAKE_DEPENDENT_OPTION(WITH_MGR_ROOK_CLIENT "Enable the mgr's Rook support" ON
|
||||
|
@ -1,3 +1,22 @@
|
||||
>=17.2.8
|
||||
--------
|
||||
|
||||
* RADOS: `get_pool_is_selfmanaged_snaps_mode` C++ API has been deprecated
|
||||
due to being prone to false negative results. It's safer replacement is
|
||||
`pool_is_in_selfmanaged_snaps_mode`.
|
||||
* RBD: When diffing against the beginning of time (`fromsnapname == NULL`) in
|
||||
fast-diff mode (`whole_object == true` with `fast-diff` image feature enabled
|
||||
and valid), diff-iterate is now guaranteed to execute locally if exclusive
|
||||
lock is available. This brings a dramatic performance improvement for QEMU
|
||||
live disk synchronization and backup use cases.
|
||||
* RBD: The option ``--image-id`` has been added to `rbd children` CLI command,
|
||||
so it can be run for images in the trash.
|
||||
* RBD: `RBD_IMAGE_OPTION_CLONE_FORMAT` option has been exposed in Python
|
||||
bindings via `clone_format` optional parameter to `clone`, `deep_copy` and
|
||||
`migration_prepare` methods.
|
||||
* RBD: `RBD_IMAGE_OPTION_FLATTEN` option has been exposed in Python bindings via
|
||||
`flatten` optional parameter to `deep_copy` and `migration_prepare` methods.
|
||||
|
||||
>=17.2.7
|
||||
--------
|
||||
|
||||
@ -43,6 +62,19 @@
|
||||
|
||||
* CEPHFS: After recovering a Ceph File System post following the disaster recovery
|
||||
procedure, the recovered files under `lost+found` directory can now be deleted.
|
||||
* `ceph config dump --format <json|xml>` output will display the localized
|
||||
option names instead of its normalized version. For e.g.,
|
||||
"mgr/prometheus/x/server_port" will be displayed instead of
|
||||
"mgr/prometheus/server_port". This matches the output of the non pretty-print
|
||||
formatted version of the command.
|
||||
* RADOS: For bug 62338 (https://tracker.ceph.com/issues/62338), we did not choose
|
||||
to condition the fix on a server flag in order to simplify backporting. As
|
||||
a result, in rare cases it may be possible for a PG to flip between two acting
|
||||
sets while an upgrade to a version with the fix is in progress. If you observe
|
||||
this behavior, you should be able to work around it by completing the upgrade or
|
||||
by disabling async recovery by setting osd_async_recovery_min_cost to a very
|
||||
large value on all OSDs until the upgrade is complete:
|
||||
``ceph config set osd osd_async_recovery_min_cost 1099511627776``
|
||||
|
||||
>=17.2.6
|
||||
--------
|
||||
@ -105,6 +137,142 @@
|
||||
recommend that users with versioned buckets, especially those that existed
|
||||
on prior releases, use these new tools to check whether their buckets are
|
||||
affected and to clean them up accordingly.
|
||||
* mgr/snap-schedule: For clusters with multiple CephFS file systems, all the
|
||||
snap-schedule commands now expect the '--fs' argument.
|
||||
* The `mon_cluster_log_file_level` and `mon_cluster_log_to_syslog_level` options
|
||||
have been removed. Henceforth, users should use the new generic option
|
||||
`mon_cluster_log_level` to control the cluster log level verbosity for the cluster
|
||||
log file as well as for all external entities.
|
||||
* RGW: Fixed a S3 Object Lock bug with PutObjectRetention requests that specify
|
||||
a RetainUntilDate after the year 2106. This date was truncated to 32 bits when
|
||||
stored, so a much earlier date was used for object lock enforcement. This does
|
||||
not effect PutBucketObjectLockConfiguration where a duration is given in Days.
|
||||
The RetainUntilDate encoding is fixed for new PutObjectRetention requests, but
|
||||
cannot repair the dates of existing object locks. Such objects can be identified
|
||||
with a HeadObject request based on the x-amz-object-lock-retain-until-date
|
||||
response header.
|
||||
* RADOS: `get_pool_is_selfmanaged_snaps_mode` C++ API has been deprecated
|
||||
due to being prone to false negative results. It's safer replacement is
|
||||
`pool_is_in_selfmanaged_snaps_mode`.
|
||||
* RADOS: For bug 62338 (https://tracker.ceph.com/issues/62338), we did not choose
|
||||
to condition the fix on a server flag in order to simplify backporting. As
|
||||
a result, in rare cases it may be possible for a PG to flip between two acting
|
||||
sets while an upgrade to a version with the fix is in progress. If you observe
|
||||
this behavior, you should be able to work around it by completing the upgrade or
|
||||
by disabling async recovery by setting osd_async_recovery_min_cost to a very
|
||||
large value on all OSDs until the upgrade is complete:
|
||||
``ceph config set osd osd_async_recovery_min_cost 1099511627776``
|
||||
* RADOS: A detailed version of the `balancer status` CLI command in the balancer
|
||||
module is now available. Users may run `ceph balancer status detail` to see more
|
||||
details about which PGs were updated in the balancer's last optimization.
|
||||
See https://docs.ceph.com/en/latest/rados/operations/balancer/ for more information.
|
||||
* CephFS: Full support for subvolumes and subvolume groups is now available
|
||||
for snap_schedule Manager module.
|
||||
* RGW: The SNS CreateTopic API now enforces the same topic naming requirements as AWS:
|
||||
Topic names must be made up of only uppercase and lowercase ASCII letters, numbers,
|
||||
underscores, and hyphens, and must be between 1 and 256 characters long.
|
||||
* RBD: When diffing against the beginning of time (`fromsnapname == NULL`) in
|
||||
fast-diff mode (`whole_object == true` with `fast-diff` image feature enabled
|
||||
and valid), diff-iterate is now guaranteed to execute locally if exclusive
|
||||
lock is available. This brings a dramatic performance improvement for QEMU
|
||||
live disk synchronization and backup use cases.
|
||||
* RBD: The ``try-netlink`` mapping option for rbd-nbd has become the default
|
||||
and is now deprecated. If the NBD netlink interface is not supported by the
|
||||
kernel, then the mapping is retried using the legacy ioctl interface.
|
||||
* RADOS: Read balancing may now be managed automatically via the balancer
|
||||
manager module. Users may choose between two new modes: ``upmap-read``, which
|
||||
offers upmap and read optimization simultaneously, or ``read``, which may be used
|
||||
to only optimize reads. For more detailed information see https://docs.ceph.com/en/latest/rados/operations/read-balancer/#online-optimization.
|
||||
* CephFS: MDS log trimming is now driven by a separate thread which tries to
|
||||
trim the log every second (`mds_log_trim_upkeep_interval` config). Also,
|
||||
a couple of configs govern how much time the MDS spends in trimming its
|
||||
logs. These configs are `mds_log_trim_threshold` and `mds_log_trim_decay_rate`.
|
||||
* RGW: Notification topics are now owned by the user that created them.
|
||||
By default, only the owner can read/write their topics. Topic policy documents
|
||||
are now supported to grant these permissions to other users. Preexisting topics
|
||||
are treated as if they have no owner, and any user can read/write them using the SNS API.
|
||||
If such a topic is recreated with CreateTopic, the issuing user becomes the new owner.
|
||||
For backward compatibility, all users still have permission to publish bucket
|
||||
notifications to topics owned by other users. A new configuration parameter:
|
||||
``rgw_topic_require_publish_policy`` can be enabled to deny ``sns:Publish``
|
||||
permissions unless explicitly granted by topic policy.
|
||||
* RGW: Fix issue with persistent notifications where the changes to topic param that
|
||||
were modified while persistent notifications were in the queue will be reflected in notifications.
|
||||
So if user sets up topic with incorrect config (password/ssl) causing failure while delivering the
|
||||
notifications to broker, can now modify the incorrect topic attribute and on retry attempt to delivery
|
||||
the notifications, new configs will be used.
|
||||
* RBD: The option ``--image-id`` has been added to `rbd children` CLI command,
|
||||
so it can be run for images in the trash.
|
||||
* PG dump: The default output of `ceph pg dump --format json` has changed. The
|
||||
default json format produces a rather massive output in large clusters and
|
||||
isn't scalable. So we have removed the 'network_ping_times' section from
|
||||
the output. Details in the tracker: https://tracker.ceph.com/issues/57460
|
||||
* mgr/REST: The REST manager module will trim requests based on the 'max_requests' option.
|
||||
Without this feature, and in the absence of manual deletion of old requests,
|
||||
the accumulation of requests in the array can lead to Out Of Memory (OOM) issues,
|
||||
resulting in the Manager crashing.
|
||||
|
||||
* CephFS: The `subvolume snapshot clone` command now depends on the config option
|
||||
`snapshot_clone_no_wait` which is used to reject the clone operation when
|
||||
all the cloner threads are busy. This config option is enabled by default which means
|
||||
that if no cloner threads are free, the clone request errors out with EAGAIN.
|
||||
The value of the config option can be fetched by using:
|
||||
`ceph config get mgr mgr/volumes/snapshot_clone_no_wait`
|
||||
and it can be disabled by using:
|
||||
`ceph config set mgr mgr/volumes/snapshot_clone_no_wait false`
|
||||
* RBD: `RBD_IMAGE_OPTION_CLONE_FORMAT` option has been exposed in Python
|
||||
bindings via `clone_format` optional parameter to `clone`, `deep_copy` and
|
||||
`migration_prepare` methods.
|
||||
* RBD: `RBD_IMAGE_OPTION_FLATTEN` option has been exposed in Python bindings via
|
||||
`flatten` optional parameter to `deep_copy` and `migration_prepare` methods.
|
||||
|
||||
* CephFS: Command "ceph mds fail" and "ceph fs fail" now requires a
|
||||
confirmation flag when some MDSs exhibit health warning MDS_TRIM or
|
||||
MDS_CACHE_OVERSIZED. This is to prevent accidental MDS failover causing
|
||||
further delays in recovery.
|
||||
* CephFS: fixes to the implementation of the ``root_squash`` mechanism enabled
|
||||
via cephx ``mds`` caps on a client credential require a new client feature
|
||||
bit, ``client_mds_auth_caps``. Clients using credentials with ``root_squash``
|
||||
without this feature will trigger the MDS to raise a HEALTH_ERR on the
|
||||
cluster, MDS_CLIENTS_BROKEN_ROOTSQUASH. See the documentation on this warning
|
||||
and the new feature bit for more information.
|
||||
* CephFS: Expanded removexattr support for cephfs virtual extended attributes.
|
||||
Previously one had to use setxattr to restore the default in order to "remove".
|
||||
You may now properly use removexattr to remove. You can also now remove layout
|
||||
on root inode, which then will restore layout to default layout.
|
||||
|
||||
* cls_cxx_gather is marked as deprecated.
|
||||
* CephFS: cephfs-journal-tool is guarded against running on an online file system.
|
||||
The 'cephfs-journal-tool --rank <fs_name>:<mds_rank> journal reset' and
|
||||
'cephfs-journal-tool --rank <fs_name>:<mds_rank> journal reset --force'
|
||||
commands require '--yes-i-really-really-mean-it'.
|
||||
|
||||
* Dashboard: Rearranged Navigation Layout: The navigation layout has been reorganized
|
||||
for improved usability and easier access to key features.
|
||||
* Dashboard: CephFS Improvments
|
||||
* Support for managing CephFS snapshots and clones, as well as snapshot schedule
|
||||
management
|
||||
* Manage authorization capabilities for CephFS resources
|
||||
* Helpers on mounting a CephFS volume
|
||||
* Dashboard: RGW Improvements
|
||||
* Support for managing bucket policies
|
||||
* Add/Remove bucket tags
|
||||
* ACL Management
|
||||
* Several UI/UX Improvements to the bucket form
|
||||
* Monitoring: Grafana dashboards are now loaded into the container at runtime rather than
|
||||
building a grafana image with the grafana dashboards. Official Ceph grafana images
|
||||
can be found in quay.io/ceph/grafana
|
||||
* Monitoring: RGW S3 Analytics: A new Grafana dashboard is now available, enabling you to
|
||||
visualize per bucket and user analytics data, including total GETs, PUTs, Deletes,
|
||||
Copies, and list metrics.
|
||||
* RBD: `Image::access_timestamp` and `Image::modify_timestamp` Python APIs now
|
||||
return timestamps in UTC.
|
||||
* RBD: Support for cloning from non-user type snapshots is added. This is
|
||||
intended primarily as a building block for cloning new groups from group
|
||||
snapshots created with `rbd group snap create` command, but has also been
|
||||
exposed via the new `--snap-id` option for `rbd clone` command.
|
||||
* RBD: The output of `rbd snap ls --all` command now includes the original
|
||||
type for trashed snapshots.
|
||||
|
||||
>=18.0.0
|
||||
|
||||
|
@ -45,19 +45,21 @@ out the git submodules associated with it:
|
||||
|
||||
## Build Prerequisites
|
||||
|
||||
*section last updated 27 Jul 2023*
|
||||
*section last updated 06 Sep 2024*
|
||||
|
||||
Make sure that ``curl`` is installed. The Debian and Ubuntu ``apt`` command is
|
||||
provided here, but if you use a system with a different package manager, then
|
||||
you must use whatever command is the proper counterpart of this one:
|
||||
We provide the Debian and Ubuntu ``apt`` commands in this procedure. If you use
|
||||
a system with a different package manager, then you will have to use different
|
||||
commands.
|
||||
|
||||
#. Install ``curl``:
|
||||
|
||||
apt install curl
|
||||
|
||||
Install Debian or RPM package dependencies by running the following command:
|
||||
#. Install package dependencies by running the ``install-deps.sh`` script:
|
||||
|
||||
./install-deps.sh
|
||||
|
||||
Install the ``python3-routes`` package:
|
||||
#. Install the ``python3-routes`` package:
|
||||
|
||||
apt install python3-routes
|
||||
|
||||
@ -70,12 +72,24 @@ we recommend that you build `.deb` or `.rpm` packages, or refer to
|
||||
``ceph.spec.in`` or ``debian/rules`` to see which configuration options are
|
||||
specified for production builds.
|
||||
|
||||
To build Ceph, make sure that you are in the top-level `ceph` directory that
|
||||
contains `do_cmake.sh` and `CONTRIBUTING.rst` and run the following commands:
|
||||
To build Ceph, follow this procedure:
|
||||
|
||||
./do_cmake.sh
|
||||
cd build
|
||||
ninja
|
||||
1. Make sure that you are in the top-level `ceph` directory that
|
||||
contains `do_cmake.sh` and `CONTRIBUTING.rst`.
|
||||
2. Run the `do_cmake.sh` script:
|
||||
|
||||
./do_cmake.sh
|
||||
|
||||
``do_cmake.sh`` by default creates a "debug build" of Ceph, which can be
|
||||
up to five times slower than a non-debug build. Pass
|
||||
``-DCMAKE_BUILD_TYPE=RelWithDebInfo`` to ``do_cmake.sh`` to create a
|
||||
non-debug build.
|
||||
3. Move into the `build` directory:
|
||||
|
||||
cd build
|
||||
4. Use the `ninja` buildsystem to build the development environment:
|
||||
|
||||
>``ninja``
|
||||
|
||||
``do_cmake.sh`` by default creates a "debug build" of Ceph, which can be up to
|
||||
five times slower than a non-debug build. Pass
|
||||
@ -93,21 +107,21 @@ Using the `-j` option with an argument appropriate to the hardware on which the
|
||||
to limit the job number to 3, run the command `ninja -j 3`. On average, each
|
||||
`ninja` job run in parallel needs approximately 2.5 GiB of RAM.
|
||||
|
||||
This documentation assumes that your build directory is a subdirectory of the
|
||||
`ceph.git` checkout. If the build directory is located elsewhere, point
|
||||
`CEPH_GIT_DIR` to the correct path of the checkout. Additional CMake args can
|
||||
be specified by setting ARGS before invoking ``do_cmake.sh``. See [cmake
|
||||
options](#cmake-options) for more details. For example:
|
||||
This documentation assumes that your build directory is a subdirectory of
|
||||
the `ceph.git` checkout. If the build directory is located elsewhere, point
|
||||
`CEPH_GIT_DIR` to the correct path of the checkout. Additional CMake args
|
||||
can be specified by setting ARGS before invoking ``do_cmake.sh``.
|
||||
See [cmake options](#cmake-options) for more details. For example:
|
||||
|
||||
ARGS="-DCMAKE_C_COMPILER=gcc-7" ./do_cmake.sh
|
||||
ARGS="-DCMAKE_C_COMPILER=gcc-7" ./do_cmake.sh
|
||||
|
||||
To build only certain targets, run a command of the following form:
|
||||
To build only certain targets, run a command of the following form:
|
||||
|
||||
ninja [target name]
|
||||
ninja [target name]
|
||||
|
||||
To install:
|
||||
5. Install the vstart cluster:
|
||||
|
||||
ninja install
|
||||
ninja install
|
||||
|
||||
### CMake Options
|
||||
|
||||
|
@ -121,14 +121,11 @@ If you do not have sufficient permissions to modify any field of the tracker
|
||||
issue, just add a comment describing what changes you would like to make.
|
||||
Someone with permissions will make the necessary modifications on your behalf.
|
||||
|
||||
For straightforward backports, that's all that you (as the developer of the fix)
|
||||
need to do. Volunteers from the `Stable Releases and Backports team`_ will
|
||||
proceed to create Backport issues to track the necessary backports and stage the
|
||||
backports by opening GitHub PRs with the cherry-picks. If you don't want to
|
||||
wait, and provided you have sufficient permissions at https://tracker.ceph.com,
|
||||
you can `create Backport tracker issues` and `stage backports`_ yourself. In
|
||||
that case, read on.
|
||||
|
||||
Authors of pull requests are responsible for creating associated backport pull
|
||||
requests. As long as you have sufficient permissions at
|
||||
https://tracker.ceph.com, you can `create Backport tracker issues` and `stage
|
||||
backports`_ yourself. Read these linked sections to learn how to create
|
||||
backport tracker issues and how to stage backports:
|
||||
|
||||
.. _`create backport tracker issues`:
|
||||
.. _`backport tracker issue`:
|
||||
@ -144,12 +141,9 @@ issue.
|
||||
Once the entire `Tracker workflow`_ has been completed for the master issue,
|
||||
issues can be created in the Backport tracker for tracking the backporting work.
|
||||
|
||||
Under ordinary circumstances, the developer who merges the master PR will flag
|
||||
the master tracker issue for backport by changing the Status to "Pending
|
||||
Backport", and volunteers from the `Stable Releases and Backports team`_
|
||||
periodically create backport tracker issues by running the
|
||||
``backport-create-issue`` script. They also do the actual backporting. But that
|
||||
does take time and you may not want to wait.
|
||||
Under ordinary circumstances, the developer who merges the ``main`` PR will flag
|
||||
the ``main`` branch tracker issue for backport by changing the Status to "Pending
|
||||
Backport".
|
||||
|
||||
You might be tempted to forge ahead and create the backport issues yourself.
|
||||
Please don't do that - it is difficult (bordering on impossible) to get all the
|
||||
@ -360,19 +354,10 @@ Once the backport PR is open, the first order of business is to set the
|
||||
Milestone tag to the stable release the backport PR is targeting. For example,
|
||||
if the PR is targeting "nautilus", set the Milestone tag to "nautilus".
|
||||
|
||||
If you don't have sufficient GitHub permissions to set the Milestone, don't
|
||||
worry. Members of the `Stable Releases and Backports team`_ periodically run
|
||||
a script (``ceph-backport.sh --milestones``) which scans all PRs targetting stable
|
||||
branches and automatically adds the correct Milestone tag if it is missing.
|
||||
|
||||
Next, check which component label was applied to the master PR corresponding to
|
||||
this backport, and double-check that that label is applied to the backport PR as
|
||||
well. For example, if the master PR carries the component label "core", the
|
||||
backport PR should also get that label.
|
||||
|
||||
In general, it is the responsibility of the `Stable Releases and Backports
|
||||
team`_ to ensure that backport PRs are properly labelled. If in doubt, just
|
||||
leave the labelling to them.
|
||||
Next, check which component label was applied to the ``main`` PR corresponding
|
||||
to this backport, and double-check that that label is applied to the backport
|
||||
PR as well. For example, if the master PR carries the component label "core",
|
||||
the backport PR should also get that label.
|
||||
|
||||
.. _`backport PR reviewing`:
|
||||
.. _`backport PR testing`:
|
||||
@ -381,9 +366,8 @@ leave the labelling to them.
|
||||
Reviewing, testing, and merging of backport PRs
|
||||
-----------------------------------------------
|
||||
|
||||
Once your backport PR is open and the Milestone is set properly, the
|
||||
`Stable Releases and Backports team` will take care of getting the PR
|
||||
reviewed and tested. Once the PR is reviewed and tested, it will be merged.
|
||||
Once your backport PR is open, it will be reviewed and tested. When the PR has
|
||||
been reviewed and tested, it will be merged.
|
||||
|
||||
If you would like to facilitate this process, you can solicit reviews and run
|
||||
integration tests on the PR. In this case, add comments to the PR describing the
|
||||
@ -394,22 +378,3 @@ it will be merged. Even if you have sufficient GitHub permissions to merge the
|
||||
PR, please do *not* merge it yourself. (Uncontrolled merging to stable branches
|
||||
unnecessarily complicates the release preparation process, which is done by
|
||||
volunteers.)
|
||||
|
||||
|
||||
Stable Releases and Backports team
|
||||
----------------------------------
|
||||
|
||||
Ceph has a `Stable Releases and Backports`_ team, staffed by volunteers,
|
||||
which is charged with maintaining the stable releases and backporting bugfixes
|
||||
from the master branch to them. (That team maintains a wiki, accessible by
|
||||
clicking the `Stable Releases and Backports`_ link, which describes various
|
||||
workflows in the backporting lifecycle.)
|
||||
|
||||
.. _`Stable Releases and Backports`: http://tracker.ceph.com/projects/ceph-releases/wiki
|
||||
|
||||
Ordinarily, it is enough to fill out the "Backport" field in the bug (tracker
|
||||
issue). The volunteers from the Stable Releases and Backports team will
|
||||
backport the fix, run regression tests on it, and include it in one or more
|
||||
future point releases.
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
Sphinx == 4.4.0
|
||||
Sphinx == 5.0.2
|
||||
git+https://github.com/ceph/sphinx-ditaa.git@py3#egg=sphinx-ditaa
|
||||
git+https://github.com/vlasovskikh/funcparserlib.git
|
||||
breathe >= 4.20.0
|
||||
|
@ -1,6 +1,6 @@
|
||||
ceph-menv
|
||||
|
||||
Environment assistant for use in conjuction with multiple ceph vstart (or more accurately mstart) clusters. Eliminates the need to specify the cluster that is being used with each and every command. Can provide a shell prompt feedback about the currently used cluster.
|
||||
Environment assistant for use in conjunction with multiple Ceph vstart (or more accurately mstart) clusters. Eliminates the need to specify the cluster that is being used with each and every command. Can provide a shell prompt feedback about the currently used cluster.
|
||||
|
||||
|
||||
Usage:
|
||||
|
105
ceph/ceph.spec
105
ceph/ceph.spec
@ -35,8 +35,8 @@
|
||||
%else
|
||||
%bcond_with rbd_rwl_cache
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?rhel} < 9
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%if 0%{?rhel} < 9 || 0%{?openEuler}
|
||||
%bcond_with system_pmdk
|
||||
%else
|
||||
%ifarch s390x aarch64
|
||||
@ -108,7 +108,7 @@
|
||||
%endif
|
||||
%bcond_with system_arrow
|
||||
%bcond_with system_utf8proc
|
||||
%if 0%{?fedora} || 0%{?suse_version} || 0%{?rhel} >= 8
|
||||
%if 0%{?fedora} || 0%{?suse_version} || 0%{?rhel} >= 8 || 0%{?openEuler}
|
||||
%global weak_deps 1
|
||||
%endif
|
||||
%if %{with selinux}
|
||||
@ -166,7 +166,7 @@
|
||||
# main package definition
|
||||
#################################################################################
|
||||
Name: ceph
|
||||
Version: 17.2.7
|
||||
Version: 17.2.8
|
||||
Release: 0%{?dist}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
Epoch: 2
|
||||
@ -182,7 +182,7 @@ License: LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-
|
||||
Group: System/Filesystems
|
||||
%endif
|
||||
URL: http://ceph.com/
|
||||
Source0: %{?_remote_tarball_prefix}ceph-17.2.7.tar.bz2
|
||||
Source0: %{?_remote_tarball_prefix}ceph-17.2.8.tar.bz2
|
||||
%if 0%{?suse_version}
|
||||
# _insert_obs_source_lines_here
|
||||
ExclusiveArch: x86_64 aarch64 ppc64le s390x
|
||||
@ -206,7 +206,7 @@ BuildRequires: selinux-policy-devel
|
||||
BuildRequires: gperf
|
||||
BuildRequires: cmake > 3.5
|
||||
BuildRequires: fuse-devel
|
||||
%if 0%{?fedora} || 0%{?suse_version} > 1500 || 0%{?rhel} == 9
|
||||
%if 0%{?fedora} || 0%{?suse_version} > 1500 || 0%{?rhel} == 9 || 0%{?openEuler}
|
||||
BuildRequires: gcc-c++ >= 11
|
||||
%endif
|
||||
%if 0%{?suse_version} == 1500
|
||||
@ -219,12 +219,12 @@ BuildRequires: %{gts_prefix}-build
|
||||
BuildRequires: %{gts_prefix}-libatomic-devel
|
||||
%endif
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel} == 9
|
||||
%if 0%{?fedora} || 0%{?rhel} == 9 || 0%{?openEuler}
|
||||
BuildRequires: libatomic
|
||||
%endif
|
||||
%if 0%{with tcmalloc}
|
||||
# libprofiler did not build on ppc64le until 2.7.90
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8 || 0%{?openEuler}
|
||||
BuildRequires: gperftools-devel >= 2.7.90
|
||||
%endif
|
||||
%if 0%{?rhel} && 0%{?rhel} < 8
|
||||
@ -371,7 +371,7 @@ BuildRequires: liblz4-devel >= 1.7
|
||||
BuildRequires: golang-github-prometheus-prometheus
|
||||
BuildRequires: jsonnet
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
Requires: systemd
|
||||
BuildRequires: boost-random
|
||||
BuildRequires: nss-devel
|
||||
@ -392,7 +392,7 @@ BuildRequires: lz4-devel >= 1.7
|
||||
# distro-conditional make check dependencies
|
||||
%if 0%{with make_check}
|
||||
BuildRequires: golang
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
BuildRequires: golang-github-prometheus
|
||||
BuildRequires: libtool-ltdl-devel
|
||||
BuildRequires: xmlsec1
|
||||
@ -409,9 +409,9 @@ BuildRequires: python%{python3_pkgversion}-scipy
|
||||
BuildRequires: python%{python3_pkgversion}-werkzeug
|
||||
BuildRequires: python%{python3_pkgversion}-pyOpenSSL
|
||||
%endif
|
||||
BuildRequires: jsonnet
|
||||
%if 0%{?suse_version}
|
||||
BuildRequires: golang-github-prometheus-prometheus
|
||||
BuildRequires: jsonnet
|
||||
BuildRequires: libxmlsec1-1
|
||||
BuildRequires: libxmlsec1-nss1
|
||||
BuildRequires: libxmlsec1-openssl1
|
||||
@ -426,7 +426,7 @@ BuildRequires: xmlsec1-openssl-devel
|
||||
%endif
|
||||
# lttng and babeltrace for rbd-replay-prep
|
||||
%if %{with lttng}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
BuildRequires: lttng-ust-devel
|
||||
BuildRequires: libbabeltrace-devel
|
||||
%endif
|
||||
@ -438,15 +438,18 @@ BuildRequires: babeltrace-devel
|
||||
%if 0%{?suse_version}
|
||||
BuildRequires: libexpat-devel
|
||||
%endif
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
BuildRequires: expat-devel
|
||||
%endif
|
||||
#hardened-cc1
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
BuildRequires: redhat-rpm-config
|
||||
%endif
|
||||
%if 0%{?openEuler}
|
||||
BuildRequires: openEuler-rpm-config
|
||||
%endif
|
||||
%if 0%{with seastar}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
BuildRequires: cryptopp-devel
|
||||
BuildRequires: numactl-devel
|
||||
%endif
|
||||
@ -534,7 +537,7 @@ Requires: python%{python3_pkgversion}-cephfs = %{_epoch_prefix}%{version}-%{rele
|
||||
Requires: python%{python3_pkgversion}-rgw = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: python%{python3_pkgversion}-ceph-argparse = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: python%{python3_pkgversion}-ceph-common = %{_epoch_prefix}%{version}-%{release}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
Requires: python%{python3_pkgversion}-prettytable
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
@ -606,7 +609,7 @@ Requires: ceph-mgr = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: ceph-grafana-dashboards = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: ceph-prometheus-alerts = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: python%{python3_pkgversion}-setuptools
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
Requires: python%{python3_pkgversion}-cherrypy
|
||||
Requires: python%{python3_pkgversion}-jwt
|
||||
Requires: python%{python3_pkgversion}-routes
|
||||
@ -636,7 +639,7 @@ Group: System/Filesystems
|
||||
%endif
|
||||
Requires: ceph-mgr = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: python%{python3_pkgversion}-numpy
|
||||
%if 0%{?fedora} || 0%{?suse_version}
|
||||
%if 0%{?fedora} || 0%{?suse_version} || 0%{?openEuler}
|
||||
Requires: python%{python3_pkgversion}-scikit-learn
|
||||
%endif
|
||||
Requires: python3-scipy
|
||||
@ -651,12 +654,13 @@ BuildArch: noarch
|
||||
Group: System/Filesystems
|
||||
%endif
|
||||
Requires: python%{python3_pkgversion}-bcrypt
|
||||
Requires: python%{python3_pkgversion}-packaging
|
||||
Requires: python%{python3_pkgversion}-pecan
|
||||
Requires: python%{python3_pkgversion}-pyOpenSSL
|
||||
Requires: python%{python3_pkgversion}-requests
|
||||
Requires: python%{python3_pkgversion}-dateutil
|
||||
Requires: python%{python3_pkgversion}-setuptools
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8 || 0%{?openEuler}
|
||||
Requires: python%{python3_pkgversion}-cherrypy
|
||||
Requires: python%{python3_pkgversion}-pyyaml
|
||||
Requires: python%{python3_pkgversion}-werkzeug
|
||||
@ -713,7 +717,7 @@ Requires: openssh
|
||||
Requires: python%{python3_pkgversion}-CherryPy
|
||||
Requires: python%{python3_pkgversion}-Jinja2
|
||||
%endif
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
Requires: openssh-clients
|
||||
Requires: python%{python3_pkgversion}-cherrypy
|
||||
Requires: python%{python3_pkgversion}-jinja2
|
||||
@ -805,7 +809,7 @@ Requires: ceph-selinux = %{_epoch_prefix}%{version}-%{release}
|
||||
%endif
|
||||
Requires: librados2 = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: librgw2 = %{_epoch_prefix}%{version}-%{release}
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
Requires: mailcap
|
||||
%endif
|
||||
%if 0%{?weak_deps}
|
||||
@ -885,6 +889,7 @@ Requires: parted
|
||||
Requires: util-linux
|
||||
Requires: xfsprogs
|
||||
Requires: python%{python3_pkgversion}-setuptools
|
||||
Requires: python%{python3_pkgversion}-packaging
|
||||
Requires: python%{python3_pkgversion}-ceph-common = %{_epoch_prefix}%{version}-%{release}
|
||||
%description volume
|
||||
This package contains a tool to deploy OSD with different devices like
|
||||
@ -896,7 +901,7 @@ Summary: RADOS distributed object store client library
|
||||
%if 0%{?suse_version}
|
||||
Group: System/Libraries
|
||||
%endif
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
Obsoletes: ceph-libs < %{_epoch_prefix}%{version}-%{release}
|
||||
%endif
|
||||
%description -n librados2
|
||||
@ -1043,7 +1048,7 @@ Requires: librados2 = %{_epoch_prefix}%{version}-%{release}
|
||||
%if 0%{?suse_version}
|
||||
Requires(post): coreutils
|
||||
%endif
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
Obsoletes: ceph-libs < %{_epoch_prefix}%{version}-%{release}
|
||||
%endif
|
||||
%description -n librbd1
|
||||
@ -1087,7 +1092,7 @@ Summary: Ceph distributed file system client library
|
||||
Group: System/Libraries
|
||||
%endif
|
||||
Obsoletes: libcephfs1 < %{_epoch_prefix}%{version}-%{release}
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
Obsoletes: ceph-libs < %{_epoch_prefix}%{version}-%{release}
|
||||
Obsoletes: ceph-libcephfs
|
||||
%endif
|
||||
@ -1140,7 +1145,7 @@ descriptions, and submitting the command to the appropriate daemon.
|
||||
|
||||
%package -n python%{python3_pkgversion}-ceph-common
|
||||
Summary: Python 3 utility libraries for Ceph
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8 || 0%{?openEuler}
|
||||
Requires: python%{python3_pkgversion}-pyyaml
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
@ -1274,7 +1279,7 @@ This package provides Ceph default alerts for Prometheus.
|
||||
# common
|
||||
#################################################################################
|
||||
%prep
|
||||
%autosetup -p1 -n ceph-17.2.7
|
||||
%autosetup -p1 -n ceph-17.2.8
|
||||
|
||||
%build
|
||||
# Disable lto on systems that do not support symver attribute
|
||||
@ -1449,7 +1454,7 @@ install -m 0755 %{buildroot}%{_bindir}/crimson-osd %{buildroot}%{_bindir}/ceph-o
|
||||
%endif
|
||||
|
||||
install -m 0644 -D src/etc-rbdmap %{buildroot}%{_sysconfdir}/ceph/rbdmap
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
install -m 0644 -D etc/sysconfig/ceph %{buildroot}%{_sysconfdir}/sysconfig/ceph
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
@ -1484,7 +1489,7 @@ install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules
|
||||
# sudoers.d
|
||||
install -m 0440 -D sudoers.d/ceph-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-smartctl
|
||||
|
||||
%if 0%{?rhel} >= 8
|
||||
%if 0%{?rhel} >= 8 || 0%{?openEuler}
|
||||
pathfix.py -pni "%{__python3} %{py3_shbang_opts}" %{buildroot}%{_bindir}/*
|
||||
pathfix.py -pni "%{__python3} %{py3_shbang_opts}" %{buildroot}%{_sbindir}/*
|
||||
%endif
|
||||
@ -1518,7 +1523,7 @@ install -m 644 -D monitoring/ceph-mixin/prometheus_alerts.yml %{buildroot}/etc/p
|
||||
%fdupes %{buildroot}%{_prefix}
|
||||
%endif
|
||||
|
||||
%if 0%{?rhel} == 8
|
||||
%if 0%{?rhel} == 8 || 0%{?openEuler}
|
||||
%py_byte_compile %{__python3} %{buildroot}%{python3_sitelib}
|
||||
%endif
|
||||
|
||||
@ -1559,7 +1564,7 @@ rm -rf %{_vpath_builddir}
|
||||
%{_libdir}/libosd_tp.so*
|
||||
%endif
|
||||
%config(noreplace) %{_sysconfdir}/logrotate.d/ceph
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%config(noreplace) %{_sysconfdir}/sysconfig/ceph
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
@ -1592,7 +1597,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph.target ceph-crash.service >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph.target ceph-crash.service
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -1603,7 +1608,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph.target ceph-crash.service
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph.target ceph-crash.service
|
||||
%endif
|
||||
|
||||
@ -1696,7 +1701,7 @@ exit 0
|
||||
%pre common
|
||||
CEPH_GROUP_ID=167
|
||||
CEPH_USER_ID=167
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
/usr/sbin/groupadd ceph -g $CEPH_GROUP_ID -o -r 2>/dev/null || :
|
||||
/usr/sbin/useradd ceph -u $CEPH_USER_ID -o -r -g ceph -s /sbin/nologin -c "Ceph daemons" -d %{_localstatedir}/lib/ceph 2>/dev/null || :
|
||||
%endif
|
||||
@ -1742,7 +1747,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-mds@\*.service ceph-mds.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-mds@\*.service ceph-mds.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -1753,7 +1758,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-mds@\*.service ceph-mds.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-mds@\*.service ceph-mds.target
|
||||
%endif
|
||||
|
||||
@ -1787,7 +1792,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-mgr@\*.service ceph-mgr.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-mgr@\*.service ceph-mgr.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -1798,7 +1803,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-mgr@\*.service ceph-mgr.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-mgr@\*.service ceph-mgr.target
|
||||
%endif
|
||||
|
||||
@ -1927,7 +1932,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-mon@\*.service ceph-mon.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-mon@\*.service ceph-mon.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -1938,7 +1943,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-mon@\*.service ceph-mon.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-mon@\*.service ceph-mon.target
|
||||
%endif
|
||||
|
||||
@ -1976,7 +1981,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset cephfs-mirror@\*.service cephfs-mirror.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post cephfs-mirror@\*.service cephfs-mirror.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -1987,7 +1992,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun cephfs-mirror@\*.service cephfs-mirror.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun cephfs-mirror@\*.service cephfs-mirror.target
|
||||
%endif
|
||||
|
||||
@ -2024,7 +2029,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-rbd-mirror@\*.service ceph-rbd-mirror.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-rbd-mirror@\*.service ceph-rbd-mirror.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -2035,7 +2040,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-rbd-mirror@\*.service ceph-rbd-mirror.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-rbd-mirror@\*.service ceph-rbd-mirror.target
|
||||
%endif
|
||||
|
||||
@ -2065,7 +2070,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -2076,7 +2081,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target
|
||||
%endif
|
||||
|
||||
@ -2122,7 +2127,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-radosgw@\*.service ceph-radosgw.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-radosgw@\*.service ceph-radosgw.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -2133,7 +2138,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-radosgw@\*.service ceph-radosgw.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-radosgw@\*.service ceph-radosgw.target
|
||||
%endif
|
||||
|
||||
@ -2174,7 +2179,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-osd@\*.service ceph-osd.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-osd@\*.service ceph-osd.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -2190,7 +2195,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-osd@\*.service ceph-osd.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-osd@\*.service ceph-osd.target
|
||||
%endif
|
||||
|
||||
@ -2229,7 +2234,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-volume@\*.service >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-volume@\*.service
|
||||
%endif
|
||||
|
||||
@ -2237,7 +2242,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-volume@\*.service
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-volume@\*.service
|
||||
%endif
|
||||
|
||||
|
@ -35,8 +35,8 @@
|
||||
%else
|
||||
%bcond_with rbd_rwl_cache
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?rhel} < 9
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%if 0%{?rhel} < 9 || 0%{?openEuler}
|
||||
%bcond_with system_pmdk
|
||||
%else
|
||||
%ifarch s390x aarch64
|
||||
@ -108,7 +108,7 @@
|
||||
%endif
|
||||
%bcond_with system_arrow
|
||||
%bcond_with system_utf8proc
|
||||
%if 0%{?fedora} || 0%{?suse_version} || 0%{?rhel} >= 8
|
||||
%if 0%{?fedora} || 0%{?suse_version} || 0%{?rhel} >= 8 || 0%{?openEuler}
|
||||
%global weak_deps 1
|
||||
%endif
|
||||
%if %{with selinux}
|
||||
@ -206,7 +206,7 @@ BuildRequires: selinux-policy-devel
|
||||
BuildRequires: gperf
|
||||
BuildRequires: cmake > 3.5
|
||||
BuildRequires: fuse-devel
|
||||
%if 0%{?fedora} || 0%{?suse_version} > 1500 || 0%{?rhel} == 9
|
||||
%if 0%{?fedora} || 0%{?suse_version} > 1500 || 0%{?rhel} == 9 || 0%{?openEuler}
|
||||
BuildRequires: gcc-c++ >= 11
|
||||
%endif
|
||||
%if 0%{?suse_version} == 1500
|
||||
@ -219,12 +219,12 @@ BuildRequires: %{gts_prefix}-build
|
||||
BuildRequires: %{gts_prefix}-libatomic-devel
|
||||
%endif
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel} == 9
|
||||
%if 0%{?fedora} || 0%{?rhel} == 9 || 0%{?openEuler}
|
||||
BuildRequires: libatomic
|
||||
%endif
|
||||
%if 0%{with tcmalloc}
|
||||
# libprofiler did not build on ppc64le until 2.7.90
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8 || 0%{?openEuler}
|
||||
BuildRequires: gperftools-devel >= 2.7.90
|
||||
%endif
|
||||
%if 0%{?rhel} && 0%{?rhel} < 8
|
||||
@ -371,7 +371,7 @@ BuildRequires: liblz4-devel >= 1.7
|
||||
BuildRequires: golang-github-prometheus-prometheus
|
||||
BuildRequires: jsonnet
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
Requires: systemd
|
||||
BuildRequires: boost-random
|
||||
BuildRequires: nss-devel
|
||||
@ -392,7 +392,7 @@ BuildRequires: lz4-devel >= 1.7
|
||||
# distro-conditional make check dependencies
|
||||
%if 0%{with make_check}
|
||||
BuildRequires: golang
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
BuildRequires: golang-github-prometheus
|
||||
BuildRequires: libtool-ltdl-devel
|
||||
BuildRequires: xmlsec1
|
||||
@ -409,9 +409,9 @@ BuildRequires: python%{python3_pkgversion}-scipy
|
||||
BuildRequires: python%{python3_pkgversion}-werkzeug
|
||||
BuildRequires: python%{python3_pkgversion}-pyOpenSSL
|
||||
%endif
|
||||
BuildRequires: jsonnet
|
||||
%if 0%{?suse_version}
|
||||
BuildRequires: golang-github-prometheus-prometheus
|
||||
BuildRequires: jsonnet
|
||||
BuildRequires: libxmlsec1-1
|
||||
BuildRequires: libxmlsec1-nss1
|
||||
BuildRequires: libxmlsec1-openssl1
|
||||
@ -426,7 +426,7 @@ BuildRequires: xmlsec1-openssl-devel
|
||||
%endif
|
||||
# lttng and babeltrace for rbd-replay-prep
|
||||
%if %{with lttng}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
BuildRequires: lttng-ust-devel
|
||||
BuildRequires: libbabeltrace-devel
|
||||
%endif
|
||||
@ -438,15 +438,18 @@ BuildRequires: babeltrace-devel
|
||||
%if 0%{?suse_version}
|
||||
BuildRequires: libexpat-devel
|
||||
%endif
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
BuildRequires: expat-devel
|
||||
%endif
|
||||
#hardened-cc1
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
BuildRequires: redhat-rpm-config
|
||||
%endif
|
||||
%if 0%{?openEuler}
|
||||
BuildRequires: openEuler-rpm-config
|
||||
%endif
|
||||
%if 0%{with seastar}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
BuildRequires: cryptopp-devel
|
||||
BuildRequires: numactl-devel
|
||||
%endif
|
||||
@ -534,7 +537,7 @@ Requires: python%{python3_pkgversion}-cephfs = %{_epoch_prefix}%{version}-%{rele
|
||||
Requires: python%{python3_pkgversion}-rgw = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: python%{python3_pkgversion}-ceph-argparse = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: python%{python3_pkgversion}-ceph-common = %{_epoch_prefix}%{version}-%{release}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
Requires: python%{python3_pkgversion}-prettytable
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
@ -606,7 +609,7 @@ Requires: ceph-mgr = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: ceph-grafana-dashboards = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: ceph-prometheus-alerts = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: python%{python3_pkgversion}-setuptools
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
Requires: python%{python3_pkgversion}-cherrypy
|
||||
Requires: python%{python3_pkgversion}-jwt
|
||||
Requires: python%{python3_pkgversion}-routes
|
||||
@ -636,7 +639,7 @@ Group: System/Filesystems
|
||||
%endif
|
||||
Requires: ceph-mgr = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: python%{python3_pkgversion}-numpy
|
||||
%if 0%{?fedora} || 0%{?suse_version}
|
||||
%if 0%{?fedora} || 0%{?suse_version} || 0%{?openEuler}
|
||||
Requires: python%{python3_pkgversion}-scikit-learn
|
||||
%endif
|
||||
Requires: python3-scipy
|
||||
@ -651,12 +654,13 @@ BuildArch: noarch
|
||||
Group: System/Filesystems
|
||||
%endif
|
||||
Requires: python%{python3_pkgversion}-bcrypt
|
||||
Requires: python%{python3_pkgversion}-packaging
|
||||
Requires: python%{python3_pkgversion}-pecan
|
||||
Requires: python%{python3_pkgversion}-pyOpenSSL
|
||||
Requires: python%{python3_pkgversion}-requests
|
||||
Requires: python%{python3_pkgversion}-dateutil
|
||||
Requires: python%{python3_pkgversion}-setuptools
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8 || 0%{?openEuler}
|
||||
Requires: python%{python3_pkgversion}-cherrypy
|
||||
Requires: python%{python3_pkgversion}-pyyaml
|
||||
Requires: python%{python3_pkgversion}-werkzeug
|
||||
@ -713,7 +717,7 @@ Requires: openssh
|
||||
Requires: python%{python3_pkgversion}-CherryPy
|
||||
Requires: python%{python3_pkgversion}-Jinja2
|
||||
%endif
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
Requires: openssh-clients
|
||||
Requires: python%{python3_pkgversion}-cherrypy
|
||||
Requires: python%{python3_pkgversion}-jinja2
|
||||
@ -805,7 +809,7 @@ Requires: ceph-selinux = %{_epoch_prefix}%{version}-%{release}
|
||||
%endif
|
||||
Requires: librados2 = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: librgw2 = %{_epoch_prefix}%{version}-%{release}
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
Requires: mailcap
|
||||
%endif
|
||||
%if 0%{?weak_deps}
|
||||
@ -885,6 +889,7 @@ Requires: parted
|
||||
Requires: util-linux
|
||||
Requires: xfsprogs
|
||||
Requires: python%{python3_pkgversion}-setuptools
|
||||
Requires: python%{python3_pkgversion}-packaging
|
||||
Requires: python%{python3_pkgversion}-ceph-common = %{_epoch_prefix}%{version}-%{release}
|
||||
%description volume
|
||||
This package contains a tool to deploy OSD with different devices like
|
||||
@ -896,7 +901,7 @@ Summary: RADOS distributed object store client library
|
||||
%if 0%{?suse_version}
|
||||
Group: System/Libraries
|
||||
%endif
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
Obsoletes: ceph-libs < %{_epoch_prefix}%{version}-%{release}
|
||||
%endif
|
||||
%description -n librados2
|
||||
@ -1043,7 +1048,7 @@ Requires: librados2 = %{_epoch_prefix}%{version}-%{release}
|
||||
%if 0%{?suse_version}
|
||||
Requires(post): coreutils
|
||||
%endif
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
Obsoletes: ceph-libs < %{_epoch_prefix}%{version}-%{release}
|
||||
%endif
|
||||
%description -n librbd1
|
||||
@ -1087,7 +1092,7 @@ Summary: Ceph distributed file system client library
|
||||
Group: System/Libraries
|
||||
%endif
|
||||
Obsoletes: libcephfs1 < %{_epoch_prefix}%{version}-%{release}
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
Obsoletes: ceph-libs < %{_epoch_prefix}%{version}-%{release}
|
||||
Obsoletes: ceph-libcephfs
|
||||
%endif
|
||||
@ -1140,7 +1145,7 @@ descriptions, and submitting the command to the appropriate daemon.
|
||||
|
||||
%package -n python%{python3_pkgversion}-ceph-common
|
||||
Summary: Python 3 utility libraries for Ceph
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8 || 0%{?openEuler}
|
||||
Requires: python%{python3_pkgversion}-pyyaml
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
@ -1449,7 +1454,7 @@ install -m 0755 %{buildroot}%{_bindir}/crimson-osd %{buildroot}%{_bindir}/ceph-o
|
||||
%endif
|
||||
|
||||
install -m 0644 -D src/etc-rbdmap %{buildroot}%{_sysconfdir}/ceph/rbdmap
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
install -m 0644 -D etc/sysconfig/ceph %{buildroot}%{_sysconfdir}/sysconfig/ceph
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
@ -1484,7 +1489,7 @@ install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules
|
||||
# sudoers.d
|
||||
install -m 0440 -D sudoers.d/ceph-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-smartctl
|
||||
|
||||
%if 0%{?rhel} >= 8
|
||||
%if 0%{?rhel} >= 8 || 0%{?openEuler}
|
||||
pathfix.py -pni "%{__python3} %{py3_shbang_opts}" %{buildroot}%{_bindir}/*
|
||||
pathfix.py -pni "%{__python3} %{py3_shbang_opts}" %{buildroot}%{_sbindir}/*
|
||||
%endif
|
||||
@ -1518,7 +1523,7 @@ install -m 644 -D monitoring/ceph-mixin/prometheus_alerts.yml %{buildroot}/etc/p
|
||||
%fdupes %{buildroot}%{_prefix}
|
||||
%endif
|
||||
|
||||
%if 0%{?rhel} == 8
|
||||
%if 0%{?rhel} == 8 || 0%{?openEuler}
|
||||
%py_byte_compile %{__python3} %{buildroot}%{python3_sitelib}
|
||||
%endif
|
||||
|
||||
@ -1559,7 +1564,7 @@ rm -rf %{_vpath_builddir}
|
||||
%{_libdir}/libosd_tp.so*
|
||||
%endif
|
||||
%config(noreplace) %{_sysconfdir}/logrotate.d/ceph
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%config(noreplace) %{_sysconfdir}/sysconfig/ceph
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
@ -1592,7 +1597,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph.target ceph-crash.service >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph.target ceph-crash.service
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -1603,7 +1608,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph.target ceph-crash.service
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph.target ceph-crash.service
|
||||
%endif
|
||||
|
||||
@ -1696,7 +1701,7 @@ exit 0
|
||||
%pre common
|
||||
CEPH_GROUP_ID=167
|
||||
CEPH_USER_ID=167
|
||||
%if 0%{?rhel} || 0%{?fedora}
|
||||
%if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler}
|
||||
/usr/sbin/groupadd ceph -g $CEPH_GROUP_ID -o -r 2>/dev/null || :
|
||||
/usr/sbin/useradd ceph -u $CEPH_USER_ID -o -r -g ceph -s /sbin/nologin -c "Ceph daemons" -d %{_localstatedir}/lib/ceph 2>/dev/null || :
|
||||
%endif
|
||||
@ -1742,7 +1747,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-mds@\*.service ceph-mds.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-mds@\*.service ceph-mds.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -1753,7 +1758,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-mds@\*.service ceph-mds.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-mds@\*.service ceph-mds.target
|
||||
%endif
|
||||
|
||||
@ -1787,7 +1792,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-mgr@\*.service ceph-mgr.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-mgr@\*.service ceph-mgr.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -1798,7 +1803,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-mgr@\*.service ceph-mgr.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-mgr@\*.service ceph-mgr.target
|
||||
%endif
|
||||
|
||||
@ -1927,7 +1932,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-mon@\*.service ceph-mon.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-mon@\*.service ceph-mon.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -1938,7 +1943,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-mon@\*.service ceph-mon.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-mon@\*.service ceph-mon.target
|
||||
%endif
|
||||
|
||||
@ -1976,7 +1981,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset cephfs-mirror@\*.service cephfs-mirror.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post cephfs-mirror@\*.service cephfs-mirror.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -1987,7 +1992,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun cephfs-mirror@\*.service cephfs-mirror.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun cephfs-mirror@\*.service cephfs-mirror.target
|
||||
%endif
|
||||
|
||||
@ -2024,7 +2029,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-rbd-mirror@\*.service ceph-rbd-mirror.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-rbd-mirror@\*.service ceph-rbd-mirror.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -2035,7 +2040,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-rbd-mirror@\*.service ceph-rbd-mirror.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-rbd-mirror@\*.service ceph-rbd-mirror.target
|
||||
%endif
|
||||
|
||||
@ -2065,7 +2070,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -2076,7 +2081,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-immutable-object-cache@\*.service ceph-immutable-object-cache.target
|
||||
%endif
|
||||
|
||||
@ -2122,7 +2127,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-radosgw@\*.service ceph-radosgw.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-radosgw@\*.service ceph-radosgw.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -2133,7 +2138,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-radosgw@\*.service ceph-radosgw.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-radosgw@\*.service ceph-radosgw.target
|
||||
%endif
|
||||
|
||||
@ -2174,7 +2179,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-osd@\*.service ceph-osd.target >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-osd@\*.service ceph-osd.target
|
||||
%endif
|
||||
if [ $1 -eq 1 ] ; then
|
||||
@ -2190,7 +2195,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-osd@\*.service ceph-osd.target
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-osd@\*.service ceph-osd.target
|
||||
%endif
|
||||
|
||||
@ -2229,7 +2234,7 @@ if [ $1 -eq 1 ] ; then
|
||||
/usr/bin/systemctl preset ceph-volume@\*.service >/dev/null 2>&1 || :
|
||||
fi
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_post ceph-volume@\*.service
|
||||
%endif
|
||||
|
||||
@ -2237,7 +2242,7 @@ fi
|
||||
%if 0%{?suse_version}
|
||||
%service_del_preun ceph-volume@\*.service
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler}
|
||||
%systemd_preun ceph-volume@\*.service
|
||||
%endif
|
||||
|
||||
|
@ -1,3 +1,9 @@
|
||||
ceph (17.2.8-1) stable; urgency=medium
|
||||
|
||||
* New upstream release
|
||||
|
||||
-- Ceph Release Team <ceph-maintainers@ceph.io> Mon, 11 Nov 2024 21:31:47 +0000
|
||||
|
||||
ceph (17.2.7-1) stable; urgency=medium
|
||||
|
||||
* New upstream release
|
||||
|
@ -156,14 +156,10 @@ function(do_build_boost root_dir version)
|
||||
set(boost_version 1.75.0)
|
||||
set(boost_sha256 953db31e016db7bb207f11432bef7df100516eeb746843fa0486a222e3fd49cb)
|
||||
string(REPLACE "." "_" boost_version_underscore ${boost_version} )
|
||||
set(boost_url
|
||||
https://boostorg.jfrog.io/artifactory/main/release/${boost_version}/source/boost_${boost_version_underscore}.tar.bz2)
|
||||
if(CMAKE_VERSION VERSION_GREATER 3.7)
|
||||
set(boost_url
|
||||
"${boost_url} http://downloads.sourceforge.net/project/boost/boost/${boost_version}/boost_${boost_version_underscore}.tar.bz2")
|
||||
set(boost_url
|
||||
"${boost_url} https://download.ceph.com/qa/boost_${boost_version_underscore}.tar.bz2")
|
||||
endif()
|
||||
list(APPEND boost_url
|
||||
https://boostorg.jfrog.io/artifactory/main/release/${boost_version}/source/boost_${boost_version_underscore}.tar.bz2
|
||||
https://downloads.sourceforge.net/project/boost/boost/${boost_version}/boost_${boost_version_underscore}.tar.bz2
|
||||
https://download.ceph.com/qa/boost_${boost_version_underscore}.tar.bz2)
|
||||
set(source_dir
|
||||
URL ${boost_url}
|
||||
URL_HASH SHA256=${boost_sha256}
|
||||
|
@ -11,6 +11,13 @@ function(build_rocksdb)
|
||||
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE})
|
||||
endif()
|
||||
|
||||
list(APPEND rocksdb_CMAKE_ARGS -DWITH_LIBURING=${WITH_LIBURING})
|
||||
if(WITH_LIBURING)
|
||||
list(APPEND rocksdb_CMAKE_ARGS -During_INCLUDE_DIR=${URING_INCLUDE_DIR})
|
||||
list(APPEND rocksdb_CMAKE_ARGS -During_LIBRARIES=${URING_LIBRARY_DIR})
|
||||
list(APPEND rocksdb_INTERFACE_LINK_LIBRARIES uring::uring)
|
||||
endif()
|
||||
|
||||
if(ALLOCATOR STREQUAL "jemalloc")
|
||||
list(APPEND rocksdb_CMAKE_ARGS -DWITH_JEMALLOC=ON)
|
||||
list(APPEND rocksdb_INTERFACE_LINK_LIBRARIES JeMalloc::JeMalloc)
|
||||
@ -52,12 +59,13 @@ function(build_rocksdb)
|
||||
endif()
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("-Wno-deprecated-copy" HAS_WARNING_DEPRECATED_COPY)
|
||||
set(rocksdb_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
if(HAS_WARNING_DEPRECATED_COPY)
|
||||
set(rocksdb_CXX_FLAGS -Wno-deprecated-copy)
|
||||
string(APPEND rocksdb_CXX_FLAGS " -Wno-deprecated-copy")
|
||||
endif()
|
||||
check_cxx_compiler_flag("-Wno-pessimizing-move" HAS_WARNING_PESSIMIZING_MOVE)
|
||||
if(HAS_WARNING_PESSIMIZING_MOVE)
|
||||
set(rocksdb_CXX_FLAGS "${rocksdb_CXX_FLAGS} -Wno-pessimizing-move")
|
||||
string(APPEND rocksdb_CXX_FLAGS " -Wno-pessimizing-move")
|
||||
endif()
|
||||
if(rocksdb_CXX_FLAGS)
|
||||
list(APPEND rocksdb_CMAKE_ARGS -DCMAKE_CXX_FLAGS='${rocksdb_CXX_FLAGS}')
|
||||
@ -84,6 +92,9 @@ function(build_rocksdb)
|
||||
INSTALL_COMMAND ""
|
||||
LIST_SEPARATOR !)
|
||||
|
||||
# make sure all the link libraries are built first
|
||||
add_dependencies(rocksdb_ext ${rocksdb_INTERFACE_LINK_LIBRARIES})
|
||||
|
||||
add_library(RocksDB::RocksDB STATIC IMPORTED)
|
||||
add_dependencies(RocksDB::RocksDB rocksdb_ext)
|
||||
set(rocksdb_INCLUDE_DIR "${rocksdb_SOURCE_DIR}/include")
|
||||
|
@ -32,6 +32,8 @@ function(build_uring)
|
||||
ExternalProject_Get_Property(liburing_ext source_dir)
|
||||
set(URING_INCLUDE_DIR "${source_dir}/src/include")
|
||||
set(URING_LIBRARY_DIR "${source_dir}/src")
|
||||
set(URING_INCLUDE_DIR ${URING_INCLUDE_DIR} PARENT_SCOPE)
|
||||
set(URING_LIBRARY_DIR ${URING_LIBRARY_DIR} PARENT_SCOPE)
|
||||
|
||||
add_library(uring::uring STATIC IMPORTED GLOBAL)
|
||||
add_dependencies(uring::uring liburing_ext)
|
||||
|
206
ceph/container/Containerfile
Normal file
206
ceph/container/Containerfile
Normal file
@ -0,0 +1,206 @@
|
||||
ARG FROM_IMAGE="quay.io/centos/centos:stream9"
|
||||
FROM $FROM_IMAGE
|
||||
|
||||
# allow FROM_IMAGE to be visible inside this stage
|
||||
ARG FROM_IMAGE
|
||||
|
||||
# Ceph branch name
|
||||
ARG CEPH_REF="main"
|
||||
|
||||
# Ceph SHA1
|
||||
ARG CEPH_SHA1
|
||||
|
||||
# Ceph git repo (ceph-ci.git or ceph.git)
|
||||
ARG CEPH_GIT_REPO
|
||||
|
||||
# (optional) Define the baseurl= for the ganesha.repo
|
||||
ARG GANESHA_REPO_BASEURL="https://buildlogs.centos.org/centos/\$releasever-stream/storage/\$basearch/nfsganesha-5/"
|
||||
|
||||
# (optional) Set to "crimson" to install crimson packages.
|
||||
ARG OSD_FLAVOR="default"
|
||||
|
||||
# (optional) Should be 'true' for CI builds (pull from shaman, etc.)
|
||||
ARG CI_CONTAINER="true"
|
||||
|
||||
RUN /bin/echo -e "\
|
||||
FROM_IMAGE: ${FROM_IMAGE}\n\
|
||||
CEPH_REF: ${CEPH_REF}\n\
|
||||
GANESHA_REPO_BASEURL: ${GANESHA_REPO_BASEURL} \n\
|
||||
OSD_FLAVOR: ${OSD_FLAVOR} \n\
|
||||
CI_CONTAINER: ${CI_CONTAINER}"
|
||||
|
||||
# Other labels are set automatically by container/build github action
|
||||
# See: https://github.com/opencontainers/image-spec/blob/main/annotations.md
|
||||
LABEL org.opencontainers.image.authors="Ceph Release Team <ceph-maintainers@ceph.io>" \
|
||||
org.opencontainers.image.documentation="https://docs.ceph.com/"
|
||||
|
||||
LABEL \
|
||||
FROM_IMAGE=${FROM_IMAGE} \
|
||||
CEPH_REF=${CEPH_REF} \
|
||||
CEPH_SHA1=${CEPH_SHA1} \
|
||||
CEPH_GIT_REPO=${CEPH_GIT_REPO} \
|
||||
GANESHA_REPO_BASEURL=${GANESHA_REPO_BASEURL} \
|
||||
OSD_FLAVOR=${OSD_FLAVOR}
|
||||
|
||||
|
||||
#===================================================================================================
|
||||
# Install ceph and dependencies, and clean up
|
||||
# IMPORTANT: in official builds, use '--squash' build option to keep image as small as possible
|
||||
# keeping run steps separate makes local rebuilds quick, but images are big without squash option
|
||||
#===================================================================================================
|
||||
|
||||
# Pre-reqs
|
||||
RUN dnf install -y --setopt=install_weak_deps=False epel-release jq
|
||||
|
||||
# Add NFS-Ganesha repo
|
||||
RUN \
|
||||
echo "[ganesha]" > /etc/yum.repos.d/ganesha.repo && \
|
||||
echo "name=ganesha" >> /etc/yum.repos.d/ganesha.repo && \
|
||||
echo "baseurl=${GANESHA_REPO_BASEURL}" >> /etc/yum.repos.d/ganesha.repo && \
|
||||
echo "gpgcheck=0" >> /etc/yum.repos.d/ganesha.repo && \
|
||||
echo "enabled=1" >> /etc/yum.repos.d/ganesha.repo
|
||||
|
||||
# ISCSI repo
|
||||
RUN set -x && \
|
||||
curl -s -L https://shaman.ceph.com/api/repos/tcmu-runner/main/latest/centos/9/repo?arch=$(arch) -o /etc/yum.repos.d/tcmu-runner.repo && \
|
||||
case "${CEPH_REF}" in \
|
||||
quincy|reef) \
|
||||
curl -s -L https://download.ceph.com/ceph-iscsi/3/rpm/el9/ceph-iscsi.repo -o /etc/yum.repos.d/ceph-iscsi.repo ;\
|
||||
;;\
|
||||
main|*) \
|
||||
curl -s -L https://shaman.ceph.com/api/repos/ceph-iscsi/main/latest/centos/9/repo -o /etc/yum.repos.d/ceph-iscsi.repo ;\
|
||||
;;\
|
||||
esac
|
||||
|
||||
# Ceph repo
|
||||
RUN set -x && \
|
||||
rpm --import 'https://download.ceph.com/keys/release.asc' && \
|
||||
ARCH=$(arch); if [ "${ARCH}" == "aarch64" ]; then ARCH="arm64"; fi ;\
|
||||
IS_RELEASE=0 ;\
|
||||
if [[ "${CI_CONTAINER}" == "true" ]] ; then \
|
||||
# TODO: this can return different ceph builds (SHA1) for x86 vs. arm runs. is it important to fix?
|
||||
REPO_URL=$(curl -s "https://shaman.ceph.com/api/search/?project=ceph&distros=centos/9/${ARCH}&flavor=${OSD_FLAVOR}&ref=${CEPH_REF}&sha1=latest" | jq -r .[0].url) ;\
|
||||
else \
|
||||
IS_RELEASE=1 ;\
|
||||
REPO_URL="http://download.ceph.com/rpm-${CEPH_REF}/el9/" ;\
|
||||
fi && \
|
||||
rpm -Uvh "$REPO_URL/noarch/ceph-release-1-${IS_RELEASE}.el9.noarch.rpm"
|
||||
|
||||
# Copr repos
|
||||
# scikit for mgr-diskprediction-local
|
||||
# ref: https://github.com/ceph/ceph-container/pull/1821
|
||||
RUN \
|
||||
dnf install -y --setopt=install_weak_deps=False dnf-plugins-core && \
|
||||
dnf copr enable -y tchaikov/python-scikit-learn
|
||||
|
||||
# Update package mgr
|
||||
RUN dnf update -y --setopt=install_weak_deps=False
|
||||
|
||||
# Define and install packages
|
||||
# General
|
||||
RUN echo "ca-certificates" > packages.txt
|
||||
# Ceph
|
||||
# TODO: remove lua-devel and luarocks once they are present in ceph.spec.in
|
||||
# ref: https://github.com/ceph/ceph/pull/54575#discussion_r1401199635
|
||||
RUN echo \
|
||||
"ceph-common \
|
||||
ceph-exporter \
|
||||
ceph-grafana-dashboards \
|
||||
ceph-immutable-object-cache \
|
||||
ceph-mds \
|
||||
ceph-mgr-cephadm \
|
||||
ceph-mgr-dashboard \
|
||||
ceph-mgr-diskprediction-local \
|
||||
ceph-mgr-k8sevents \
|
||||
ceph-mgr-rook \
|
||||
ceph-mgr \
|
||||
ceph-mon \
|
||||
ceph-osd \
|
||||
ceph-radosgw lua-devel luarocks \
|
||||
ceph-volume \
|
||||
cephfs-mirror \
|
||||
cephfs-top \
|
||||
kmod \
|
||||
libradosstriper1 \
|
||||
rbd-mirror" \
|
||||
>> packages.txt
|
||||
|
||||
# Optional crimson package(s)
|
||||
RUN if [ "${OSD_FLAVOR}" == "crimson" ]; then \
|
||||
echo "ceph-crimson-osd" >> packages.txt ; \
|
||||
fi
|
||||
|
||||
# Ceph "Recommends"
|
||||
RUN echo "nvme-cli python3-saml smartmontools" >> packages.txt
|
||||
# NFS-Ganesha
|
||||
RUN echo "\
|
||||
dbus-daemon \
|
||||
nfs-ganesha-ceph \
|
||||
nfs-ganesha-rados-grace \
|
||||
nfs-ganesha-rados-urls \
|
||||
nfs-ganesha-rgw \
|
||||
nfs-ganesha \
|
||||
rpcbind \
|
||||
sssd-client" >> packages.txt
|
||||
|
||||
# ISCSI
|
||||
RUN echo "ceph-iscsi tcmu-runner python3-rtslib" >> packages.txt
|
||||
|
||||
# Ceph-CSI
|
||||
# TODO: coordinate with @Madhu-1 to have Ceph-CSI install these itself if unused by ceph
|
||||
# @adk3798 does cephadm use these?
|
||||
RUN echo "attr ceph-fuse rbd-nbd" >> packages.txt
|
||||
|
||||
# Rook (only if packages must be in ceph container image)
|
||||
RUN echo "systemd-udev" >> packages.txt
|
||||
|
||||
# Util packages (should be kept to only utils that are truly very useful)
|
||||
# 'sgdisk' (from gdisk) is used in docs and scripts for clearing disks (could be a risk? @travisn @guits @ktdreyer ?)
|
||||
# 'ps' (from procps-ng) and 'hostname' are very valuable for debugging and CI
|
||||
# TODO: remove sg3_utils once they are moved to ceph.spec.in with libstoragemgmt
|
||||
# ref: https://github.com/ceph/ceph-container/pull/2013#issuecomment-1248606472
|
||||
RUN echo "gdisk hostname procps-ng sg3_utils e2fsprogs lvm2 gcc" >> packages.txt
|
||||
|
||||
# scikit
|
||||
RUN echo "python3-scikit-learn" >> packages.txt
|
||||
|
||||
RUN echo "=== PACKAGES TO BE INSTALLED ==="; cat packages.txt
|
||||
RUN echo "=== INSTALLING ===" ; \
|
||||
dnf install -y --setopt=install_weak_deps=False --setopt=skip_missing_names_on_install=False --enablerepo=crb $(cat packages.txt)
|
||||
|
||||
# XXX why isn't this done in the ganesha package?
|
||||
RUN mkdir -p /var/run/ganesha
|
||||
|
||||
# Disable sync with udev since the container can not contact udev
|
||||
RUN \
|
||||
sed -i -e 's/udev_rules = 1/udev_rules = 0/' \
|
||||
-e 's/udev_sync = 1/udev_sync = 0/' \
|
||||
-e 's/obtain_device_list_from_udev = 1/obtain_device_list_from_udev = 0/' \
|
||||
/etc/lvm/lvm.conf && \
|
||||
# validate the sed command worked as expected
|
||||
grep -sqo "udev_sync = 0" /etc/lvm/lvm.conf && \
|
||||
grep -sqo "udev_rules = 0" /etc/lvm/lvm.conf && \
|
||||
grep -sqo "obtain_device_list_from_udev = 0" /etc/lvm/lvm.conf
|
||||
|
||||
# CLEAN UP!
|
||||
RUN set -x && \
|
||||
dnf clean all && \
|
||||
rm -rf /var/cache/dnf/* && \
|
||||
rm -rf /var/lib/dnf/* && \
|
||||
rm -f /var/lib/rpm/__db* && \
|
||||
# remove unnecessary files with big impact
|
||||
rm -rf /etc/selinux /usr/share/{doc,man,selinux} && \
|
||||
# don't keep compiled python binaries
|
||||
find / -xdev \( -name "*.pyc" -o -name "*.pyo" \) -delete
|
||||
|
||||
# Verify that the packages installed haven't been accidentally cleaned, then
|
||||
# clean the package list and re-clean unnecessary RPM database files
|
||||
RUN rpm -q $(cat packages.txt) && rm -f /var/lib/rpm/__db* && rm -f *packages.txt
|
||||
|
||||
#
|
||||
# Set some envs in the container for quickly inspecting details about the build at runtime
|
||||
ENV CEPH_IS_DEVEL="${CI_CONTAINER}" \
|
||||
CEPH_REF="${CEPH_REF}" \
|
||||
CEPH_OSD_FLAVOR="${OSD_FLAVOR}" \
|
||||
FROM_IMAGE="${FROM_IMAGE}"
|
||||
|
175
ceph/container/build.sh
Executable file
175
ceph/container/build.sh
Executable file
@ -0,0 +1,175 @@
|
||||
#!/bin/bash -ex
|
||||
# vim: ts=4 sw=4 expandtab
|
||||
|
||||
# repo auth with write perms must be present (this script does not log into
|
||||
# CONTAINER_REPO_HOSTNAME and CONTAINER_REPO_ORGANIZATION).
|
||||
# If NO_PUSH is set, no login is necessary
|
||||
|
||||
|
||||
CFILE=${1:-Containerfile}
|
||||
shift || true
|
||||
|
||||
usage() {
|
||||
cat << EOF
|
||||
$0 [containerfile] (defaults to 'Containerfile')
|
||||
For a CI build (from ceph-ci.git, built and pushed to shaman):
|
||||
CI_CONTAINER: must be 'true'
|
||||
FLAVOR (OSD flavor, default or crimson)
|
||||
BRANCH (of Ceph. <remote>/<ref>)
|
||||
CEPH_SHA1 (of Ceph)
|
||||
ARCH (of build host, and resulting container)
|
||||
CONTAINER_REPO_HOSTNAME (quay.ceph.io, for CI, for instance)
|
||||
CONTAINER_REPO_ORGANIZATION (ceph-ci, for CI, for instance)
|
||||
CONTAINER_REPO_USERNAME
|
||||
CONTAINER_REPO_PASSWORD
|
||||
|
||||
For a release build: (from ceph.git, built and pushed to download.ceph.com)
|
||||
CI_CONTAINER: must be 'false'
|
||||
and you must also add
|
||||
VERSION (for instance, 19.1.0) for tagging the image
|
||||
|
||||
You can avoid the push step (for testing) by setting NO_PUSH to anything
|
||||
EOF
|
||||
}
|
||||
|
||||
CI_CONTAINER=${CI_CONTAINER:-false}
|
||||
FLAVOR=${FLAVOR:-default}
|
||||
# default: current checked-out branch
|
||||
BRANCH=${BRANCH:-$(git rev-parse --abbrev-ref HEAD)}
|
||||
# default: current checked-out branch
|
||||
CEPH_SHA1=${CEPH_SHA1:-$(git rev-parse HEAD)}
|
||||
# default: build host arch
|
||||
ARCH=${ARCH:-$(arch)}
|
||||
if [[ "${ARCH}" == "aarch64" ]] ; then ARCH=arm64; fi
|
||||
if [[ ${CI_CONTAINER} == "true" ]] ; then
|
||||
CONTAINER_REPO_HOSTNAME=${CONTAINER_REPO_HOSTNAME:-quay.ceph.io}
|
||||
CONTAINER_REPO_ORGANIZATION=${CONTAINER_REPO_ORGANIZATION:-ceph/ceph-${ARCH}}
|
||||
else
|
||||
CONTAINER_REPO_HOSTNAME=${CONTAINER_REPO_HOSTNAME:-quay.io}
|
||||
CONTAINER_REPO_ORGANIZATION=${CONTAINER_REPO_ORGANIZATION:-ceph/ceph}
|
||||
# default: most-recent annotated tag
|
||||
VERSION=${VERSION:-$(git describe --abbrev=0)}
|
||||
fi
|
||||
|
||||
# check for existence of all required variables
|
||||
: "${CI_CONTAINER:?}"
|
||||
: "${FLAVOR:?}"
|
||||
: "${BRANCH:?}"
|
||||
: "${CEPH_SHA1:?}"
|
||||
: "${ARCH:?}"
|
||||
: "${CONTAINER_REPO_HOSTNAME:?}"
|
||||
: "${CONTAINER_REPO_ORGANIZATION:?}"
|
||||
: "${CONTAINER_REPO_USERNAME:?}"
|
||||
: "${CONTAINER_REPO_PASSWORD:?}"
|
||||
if [[ ${CI_CONTAINER} != "true" ]] ; then ${VERSION:?}; fi
|
||||
|
||||
# check for valid repo auth (if pushing)
|
||||
ORGURL=${CONTAINER_REPO_HOSTNAME}/${CONTAINER_REPO_ORGANIZATION}
|
||||
MINIMAL_IMAGE=${ORGURL}/ceph:minimal-test
|
||||
if [[ ${NO_PUSH} != "true" ]] ; then
|
||||
podman rmi ${MINIMAL_IMAGE} || true
|
||||
echo "FROM scratch" | podman build -f - -t ${MINIMAL_IMAGE}
|
||||
if ! podman push ${MINIMAL_IMAGE} ; then
|
||||
echo "Not authenticated to ${ORGURL}; need docker/podman login?"
|
||||
exit 1
|
||||
fi
|
||||
podman rmi ${MINIMAL_IMAGE} | true
|
||||
fi
|
||||
|
||||
if [[ -z "${CEPH_GIT_REPO}" ]] ; then
|
||||
if [[ ${CI_CONTAINER} == "true" ]]; then
|
||||
CEPH_GIT_REPO=https://github.com/ceph/ceph-ci.git
|
||||
else
|
||||
CEPH_GIT_REPO=https://github.com/ceph/ceph.git
|
||||
fi
|
||||
fi
|
||||
|
||||
# BRANCH will be, say, origin/main. remove <remote>/
|
||||
BRANCH=${BRANCH##*/}
|
||||
|
||||
podman build --pull=newer --squash -f $CFILE -t build.sh.output \
|
||||
--build-arg FROM_IMAGE=${FROM_IMAGE:-quay.io/centos/centos:stream9} \
|
||||
--build-arg CEPH_SHA1=${CEPH_SHA1} \
|
||||
--build-arg CEPH_GIT_REPO=${CEPH_GIT_REPO} \
|
||||
--build-arg CEPH_REF=${BRANCH:-main} \
|
||||
--build-arg OSD_FLAVOR=${FLAVOR:-default} \
|
||||
--build-arg CI_CONTAINER=${CI_CONTAINER:-default} \
|
||||
2>&1
|
||||
|
||||
image_id=$(podman image ls localhost/build.sh.output --format '{{.ID}}')
|
||||
|
||||
# grab useful image attributes for building the tag
|
||||
#
|
||||
# the variable settings are prefixed with "export CEPH_CONTAINER_" so that
|
||||
# an eval or . can be used to put them into the environment
|
||||
#
|
||||
# PATH is removed from the output as it would cause problems for this
|
||||
# parent script and its children
|
||||
#
|
||||
# notes:
|
||||
#
|
||||
# we want .Architecture and everything in .Config.Env
|
||||
#
|
||||
# printf will not accept "\n" (is this a podman bug?)
|
||||
# so construct vars with two calls to podman inspect, joined by a newline,
|
||||
# so that vars will get the output of the first command, newline, output
|
||||
# of the second command
|
||||
#
|
||||
vars="$(podman inspect -f '{{printf "export CEPH_CONTAINER_ARCH=%v" .Architecture}}' ${image_id})
|
||||
$(podman inspect -f '{{range $index, $value := .Config.Env}}export CEPH_CONTAINER_{{$value}}{{println}}{{end}}' ${image_id})"
|
||||
vars="$(echo "${vars}" | grep -v PATH)"
|
||||
eval ${vars}
|
||||
|
||||
# remove everything up to and including the last slash
|
||||
fromtag=${CEPH_CONTAINER_FROM_IMAGE##*/}
|
||||
# translate : to -
|
||||
fromtag=${fromtag/:/-}
|
||||
builddate=$(date +%Y%m%d)
|
||||
local_tag=${fromtag}-${CEPH_CONTAINER_CEPH_REF}-${CEPH_CONTAINER_ARCH}-${builddate}
|
||||
|
||||
repopath=${CONTAINER_REPO_HOSTNAME}/${CONTAINER_REPO_ORGANIZATION}
|
||||
|
||||
if [[ ${CI_CONTAINER} == "true" ]] ; then
|
||||
# ceph-ci conventions for remote tags:
|
||||
# requires ARCH, BRANCH, CEPH_SHA1, FLAVOR
|
||||
full_repo_tag=$repopath/ceph:${BRANCH}-${fromtag}-${ARCH}-devel
|
||||
branch_repo_tag=$repopath/ceph:${BRANCH}
|
||||
sha1_repo_tag=$repopath/ceph:${CEPH_SHA1}
|
||||
|
||||
if [[ "${ARCH}" == "arm64" ]] ; then
|
||||
branch_repo_tag=${branch_repo_tag}-arm64
|
||||
sha1_repo_tag=${sha1_repo_tag}-arm64
|
||||
fi
|
||||
|
||||
podman tag ${image_id} ${full_repo_tag}
|
||||
podman tag ${image_id} ${branch_repo_tag}
|
||||
podman tag ${image_id} ${sha1_repo_tag}
|
||||
|
||||
if [[ ${FLAVOR} == "crimson" && ${ARCH} == "x86_64" ]] ; then
|
||||
sha1_flavor_repo_tag=${sha1_repo_tag}-${FLAVOR}
|
||||
podman tag ${image_id} ${sha1_flavor_repo_tag}
|
||||
if [[ -z "${NO_PUSH}" ]] ; then
|
||||
podman push ${sha1_flavor_repo_tag}
|
||||
fi
|
||||
exit
|
||||
fi
|
||||
|
||||
if [[ -z "${NO_PUSH}" ]] ; then
|
||||
podman push ${full_repo_tag}
|
||||
podman push ${branch_repo_tag}
|
||||
podman push ${sha1_repo_tag}
|
||||
fi
|
||||
else
|
||||
#
|
||||
# non-CI build. Tags are like v19.1.0-20240701
|
||||
# push to quay.ceph.io/ceph/prerelease
|
||||
#
|
||||
version_tag=${repopath}/prerelease/ceph-${ARCH}:${VERSION}-${builddate}
|
||||
|
||||
podman tag ${image_id} ${version_tag}
|
||||
if [[ -z "${NO_PUSH}" ]] ; then
|
||||
podman push ${image_id} ${version_tag}
|
||||
fi
|
||||
fi
|
||||
|
||||
|
164
ceph/container/make-manifest-list.py
Executable file
164
ceph/container/make-manifest-list.py
Executable file
@ -0,0 +1,164 @@
|
||||
#!/usr/bin/python3
|
||||
#
|
||||
# make a combined "manifest-list" container out of two arch-specific containers
|
||||
# searches for latest tags on HOST/{AMD,ARM}64_REPO, makes sure they refer
|
||||
# to the same Ceph SHA1, and creates a manifest-list ("fat") image on
|
||||
# MANIFEST_HOST/MANIFEST_REPO with the 'standard' set of tags.
|
||||
#
|
||||
# uses scratch local manifest LOCALMANIFEST, will be destroyed if present
|
||||
|
||||
from datetime import datetime
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
# optional env vars (will default if not set)
|
||||
|
||||
OPTIONAL_VARS = (
|
||||
'HOST',
|
||||
'AMD64_REPO',
|
||||
'ARM64_REPO',
|
||||
'MANIFEST_HOST',
|
||||
'MANIFEST_REPO',
|
||||
)
|
||||
|
||||
# Manifest image. Will be destroyed if already present.
|
||||
LOCALMANIFEST = 'localhost/m'
|
||||
|
||||
|
||||
def dump_vars(names, vardict):
|
||||
for name in names:
|
||||
print(f'{name}: {vardict[name]}', file=sys.stderr)
|
||||
|
||||
|
||||
def run_command(args):
|
||||
print(f'running {args}', file=sys.stderr)
|
||||
if not isinstance(args, list):
|
||||
args = args.split()
|
||||
try:
|
||||
result = subprocess.run(
|
||||
args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True)
|
||||
return True, result.stdout, result.stderr
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Command '{e.cmd}' returned {e.returncode}")
|
||||
print("Error output:")
|
||||
print(e.stderr)
|
||||
return False, result.stdout, result.stderr
|
||||
|
||||
|
||||
def get_command_output(args):
|
||||
success, stdout, stderr = run_command(args)
|
||||
return (stdout if success else None)
|
||||
|
||||
|
||||
def run_command_show_failure(args):
|
||||
success, stdout, stderr = run_command(args)
|
||||
if not success:
|
||||
print(f'{args} failed:', file=sys.stderr)
|
||||
print(f'stdout:\n{stdout}')
|
||||
print(f'stderr:\n{stderr}')
|
||||
return success
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def get_latest_tag(path):
|
||||
latest_tag = json.loads(
|
||||
get_command_output(f'skopeo list-tags docker://{path}')
|
||||
)['Tags'][-1]
|
||||
return latest_tag
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def get_image_inspect(path):
|
||||
info = json.loads(
|
||||
get_command_output(f'skopeo inspect docker://{path}')
|
||||
)
|
||||
return info
|
||||
|
||||
|
||||
def get_sha1(info):
|
||||
return info['Labels']['GIT_COMMIT']
|
||||
|
||||
|
||||
def main():
|
||||
host = os.environ.get('HOST', 'quay.io')
|
||||
amd64_repo = os.environ.get('AMD64_REPO', 'ceph/ceph-amd64')
|
||||
arm64_repo = os.environ.get('ARM64_REPO', 'ceph/ceph-arm64')
|
||||
manifest_host = os.environ.get('MANIFEST_HOST', host)
|
||||
manifest_repo = os.environ.get('MANIFEST_REPO', 'ceph/ceph')
|
||||
dump_vars(
|
||||
('host',
|
||||
'amd64_repo',
|
||||
'arm64_repo',
|
||||
'manifest_host',
|
||||
'manifest_repo',
|
||||
),
|
||||
locals())
|
||||
|
||||
repopaths = (
|
||||
f'{host}/{amd64_repo}',
|
||||
f'{host}/{arm64_repo}',
|
||||
)
|
||||
tags = [get_latest_tag(p) for p in repopaths]
|
||||
print(f'latest tags: amd64:{tags[0]} arm64:{tags[1]}')
|
||||
|
||||
# check that version of latest tag matches
|
||||
version_re = \
|
||||
r'v(?P<major>\d+)\.(?P<minor>\d+)\.(?P<micro>\d+)-(?P<date>\d+)'
|
||||
versions = list()
|
||||
for tag in tags:
|
||||
mo = re.match(version_re, tag)
|
||||
ver = f'{mo.group("major")}.{mo.group("minor")}.{mo.group("micro")}'
|
||||
versions.append(ver)
|
||||
if versions[0] != versions[1]:
|
||||
print(
|
||||
f'version mismatch: amd64:{versions[0]} arm64:{versions[1]}',
|
||||
file=sys.stderr,
|
||||
)
|
||||
return(1)
|
||||
|
||||
major, minor, micro = mo.group(1), mo.group(2), mo.group(3)
|
||||
print(f'Ceph version: {major}.{minor}.{micro}', file=sys.stderr)
|
||||
|
||||
# check that ceph sha1 of two arch images matches
|
||||
paths_with_tags = [f'{p}:{t}' for (p, t) in zip(repopaths, tags)]
|
||||
info = [get_image_inspect(p) for p in paths_with_tags]
|
||||
sha1s = [get_sha1(i) for i in info]
|
||||
if sha1s[0] != sha1s[1]:
|
||||
print(
|
||||
f'sha1 mismatch: amd64: {sha1s[0]} arm64: {sha1s[1]}',
|
||||
file=sys.stderr,
|
||||
)
|
||||
builddate = [i['Created'] for i in info]
|
||||
print(
|
||||
f'Build dates: amd64: {builddate[0]} arm64: {builddate[1]}',
|
||||
file=sys.stderr,
|
||||
)
|
||||
return(1)
|
||||
|
||||
# create manifest list image with the standard list of tags
|
||||
# ignore failure on manifest rm
|
||||
run_command(f'podman manifest rm localhost/m')
|
||||
run_command_show_failure(f'podman manifest create localhost/m')
|
||||
for p in paths_with_tags:
|
||||
run_command_show_failure(f'podman manifest add m {p}')
|
||||
base = f'{manifest_host}/{manifest_repo}'
|
||||
for t in (
|
||||
f'v{major}',
|
||||
f'v{major}.{minor}',
|
||||
f'v{major}.{minor}.{micro}',
|
||||
f'v{major}.{minor}.{micro}-{datetime.today().strftime("%Y%m%d")}',
|
||||
):
|
||||
run_command_show_failure(
|
||||
f'podman manifest push localhost/m {base}:{t}')
|
||||
|
||||
|
||||
if (__name__ == '__main__'):
|
||||
sys.exit(main())
|
@ -305,6 +305,7 @@ Depends: ${misc:Depends},
|
||||
${shlibs:Depends},
|
||||
python3-dateutil,
|
||||
python3-openssl,
|
||||
python3-packaging,
|
||||
Replaces: ceph-mgr (<< 15.1.0)
|
||||
Breaks: ceph-mgr (<< 15.1.0)
|
||||
Description: ceph manager modules which are always enabled
|
||||
|
@ -29,7 +29,7 @@ if [ -r /etc/os-release ]; then
|
||||
PYBUILD="3.7"
|
||||
fi
|
||||
;;
|
||||
rhel|centos)
|
||||
almalinux|rocky|rhel|centos)
|
||||
MAJOR_VER=$(echo "$VERSION_ID" | sed -e 's/\..*$//')
|
||||
if [ "$MAJOR_VER" -ge "9" ] ; then
|
||||
PYBUILD="3.9"
|
||||
|
357
ceph/doc/_static/js/pgcalc.js
vendored
Normal file
357
ceph/doc/_static/js/pgcalc.js
vendored
Normal file
@ -0,0 +1,357 @@
|
||||
var _____WB$wombat$assign$function_____ = function(name) {return (self._wb_wombat && self._wb_wombat.local_init && self._wb_wombat.local_init(name)) || self[name]; };
|
||||
if (!self.__WB_pmw) { self.__WB_pmw = function(obj) { this.__WB_source = obj; return this; } }
|
||||
{
|
||||
let window = _____WB$wombat$assign$function_____("window");
|
||||
let self = _____WB$wombat$assign$function_____("self");
|
||||
let document = _____WB$wombat$assign$function_____("document");
|
||||
let location = _____WB$wombat$assign$function_____("location");
|
||||
let top = _____WB$wombat$assign$function_____("top");
|
||||
let parent = _____WB$wombat$assign$function_____("parent");
|
||||
let frames = _____WB$wombat$assign$function_____("frames");
|
||||
let opener = _____WB$wombat$assign$function_____("opener");
|
||||
|
||||
var pow2belowThreshold = 0.25
|
||||
var key_values={};
|
||||
key_values['poolName'] ={'name':'Pool Name','default':'newPool','description': 'Name of the pool in question. Typical pool names are included below.', 'width':'30%; text-align: left'};
|
||||
key_values['size'] ={'name':'Size','default': 3, 'description': 'Number of replicas the pool will have. Default value of 3 is pre-filled.', 'width':'10%', 'global':1};
|
||||
key_values['osdNum'] ={'name':'OSD #','default': 100, 'description': 'Number of OSDs which this Pool will have PGs in. Typically, this is the entire Cluster OSD count, but could be less based on CRUSH rules. (e.g. Separate SSD and SATA disk sets)', 'width':'10%', 'global':1};
|
||||
key_values['percData'] ={'name':'%Data', 'default': 5, 'description': 'This value represents the approximate percentage of data which will be contained in this pool for that specific OSD set. Examples are pre-filled below for guidance.','width':'10%'};
|
||||
key_values['targPGsPerOSD'] ={'name':'Target PGs per OSD', 'default':100, 'description': 'This value should be populated based on the following guidance:', 'width':'10%', 'global':1, 'options': [ ['100','If the cluster OSD count is not expected to increase in the foreseeable future.'], ['200', 'If the cluster OSD count is expected to increase (up to double the size) in the foreseeable future.']]}
|
||||
|
||||
var notes ={
|
||||
'totalPerc':'<b>"Total Data Percentage"</b> below table should be a multiple of 100%.',
|
||||
'totalPGs':'<b>"Total PG Count"</b> below table will be the count of Primary PG copies. However, when calculating total PGs per OSD average, you must include all copies.',
|
||||
'noDecrease':'It\'s also important to know that the PG count can be increased, but <b>NEVER</b> decreased without destroying / recreating the pool. However, increasing the PG Count of a pool is one of the most impactful events in a Ceph Cluster, and should be avoided for production clusters if possible.',
|
||||
};
|
||||
|
||||
var presetTables={};
|
||||
presetTables['All-in-One']=[
|
||||
{ 'poolName' : 'rbd', 'size' : '3', 'osdNum' : '100', 'percData' : '100', 'targPGsPerOSD' : '100'},
|
||||
];
|
||||
presetTables['OpenStack']=[
|
||||
{ 'poolName' : 'cinder-backup', 'size' : '3', 'osdNum' : '100', 'percData' : '25', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'cinder-volumes', 'size' : '3', 'osdNum' : '100', 'percData' : '53', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'ephemeral-vms', 'size' : '3', 'osdNum' : '100', 'percData' : '15', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'glance-images', 'size' : '3', 'osdNum' : '100', 'percData' : '7', 'targPGsPerOSD' : '100'},
|
||||
];
|
||||
presetTables['OpenStack w RGW - Jewel and later']=[
|
||||
{ 'poolName' : '.rgw.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.control', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.data.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.gc', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.intent-log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.meta', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.usage', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.users.keys', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.users.email', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.users.swift', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.users.uid', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.buckets.extra', 'size' : '3', 'osdNum' : '100', 'percData' : '1.0', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.buckets.index', 'size' : '3', 'osdNum' : '100', 'percData' : '3.0', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.buckets.data', 'size' : '3', 'osdNum' : '100', 'percData' : '19', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'cinder-backup', 'size' : '3', 'osdNum' : '100', 'percData' : '18', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'cinder-volumes', 'size' : '3', 'osdNum' : '100', 'percData' : '42.8', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'ephemeral-vms', 'size' : '3', 'osdNum' : '100', 'percData' : '10', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'glance-images', 'size' : '3', 'osdNum' : '100', 'percData' : '5', 'targPGsPerOSD' : '100'},
|
||||
];
|
||||
|
||||
presetTables['Rados Gateway Only - Jewel and later']=[
|
||||
{ 'poolName' : '.rgw.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.control', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.data.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.gc', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.intent-log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.meta', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.usage', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.users.keys', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.users.email', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.users.swift', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.users.uid', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.buckets.extra', 'size' : '3', 'osdNum' : '100', 'percData' : '1.0', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.buckets.index', 'size' : '3', 'osdNum' : '100', 'percData' : '3.0', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'default.rgw.buckets.data', 'size' : '3', 'osdNum' : '100', 'percData' : '94.8', 'targPGsPerOSD' : '100'},
|
||||
];
|
||||
|
||||
presetTables['OpenStack w RGW - Infernalis and earlier']=[
|
||||
{ 'poolName' : '.intent-log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.buckets', 'size' : '3', 'osdNum' : '100', 'percData' : '18', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.buckets.extra', 'size' : '3', 'osdNum' : '100', 'percData' : '1.0', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.buckets.index', 'size' : '3', 'osdNum' : '100', 'percData' : '3.0', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.control', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.gc', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.usage', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.users', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.users.email', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.users.swift', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.users.uid', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'cinder-backup', 'size' : '3', 'osdNum' : '100', 'percData' : '19', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'cinder-volumes', 'size' : '3', 'osdNum' : '100', 'percData' : '42.9', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'ephemeral-vms', 'size' : '3', 'osdNum' : '100', 'percData' : '10', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'glance-images', 'size' : '3', 'osdNum' : '100', 'percData' : '5', 'targPGsPerOSD' : '100'},
|
||||
];
|
||||
|
||||
presetTables['Rados Gateway Only - Infernalis and earlier']=[
|
||||
{ 'poolName' : '.intent-log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.buckets', 'size' : '3', 'osdNum' : '100', 'percData' : '94.9', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.buckets.extra', 'size' : '3', 'osdNum' : '100', 'percData' : '1.0', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.buckets.index', 'size' : '3', 'osdNum' : '100', 'percData' : '3.0', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.control', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.gc', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.rgw.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.usage', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.users', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.users.email', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.users.swift', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : '.users.uid', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'},
|
||||
];
|
||||
presetTables['RBD and libRados']=[
|
||||
{ 'poolName' : 'rbd', 'size' : '3', 'osdNum' : '100', 'percData' : '75', 'targPGsPerOSD' : '100'},
|
||||
{ 'poolName' : 'myObjects', 'size' : '3', 'osdNum' : '100', 'percData' : '25', 'targPGsPerOSD' : '100'},
|
||||
];
|
||||
|
||||
$(function() {
|
||||
$("#presetType").on("change",changePreset);
|
||||
$("#btnAddPool").on("click",addPool);
|
||||
$("#btnGenCommands").on("click",generateCommands);
|
||||
$.each(presetTables,function(index,value) {
|
||||
selIndex='';
|
||||
if ( index == 'OpenStack w RGW - Jewel and later' )
|
||||
selIndex=' selected';
|
||||
$("#presetType").append("<option value=\""+index+"\""+selIndex+">"+index+"</option>");
|
||||
});
|
||||
changePreset();
|
||||
$("#beforeTable").html("<fieldset id='keyFieldset'><legend>Key</legend><dl class='table-display' id='keyDL'></dl></fieldset>");
|
||||
$.each(key_values, function(index, value) {
|
||||
pre='';
|
||||
post='';
|
||||
if ('global' in value) {
|
||||
pre='<a href="javascript://" onClick="globalChange(\''+index+'\');" title="Change the \''+value['name']+'\' parameter globally">';
|
||||
post='</a>'
|
||||
}
|
||||
|
||||
var dlAdd="<dt id='dt_"+index+"'>"+pre+value['name']+post+"</dt><dd id='dd_"+index+"'>"+value['description'];
|
||||
if ( 'options' in value ) {
|
||||
dlAdd+="<dl class='sub-table'>";
|
||||
$.each(value['options'], function (subIndex, subValue) {
|
||||
dlAdd+="<dt><a href=\"javascript://\" onClick=\"massUpdate('"+index+"','"+subValue[0]+"');\" title=\"Set all '"+value['name']+"' fields to '"+subValue[0]+"'.\">"+subValue[0]+"</a></dt><dd>"+subValue[1]+"</dd>";
|
||||
});
|
||||
dlAdd+="</dl>";
|
||||
}
|
||||
dlAdd+="</dd>";
|
||||
$("#keyDL").append(dlAdd);
|
||||
});
|
||||
$("#afterTable").html("<fieldset id='notesFieldset'><legend>Notes</legend><ul id='notesUL'>\n<ul></fieldset>");
|
||||
$.each(notes,function(index, value) {
|
||||
$("#notesUL").append("\t<li id=\"li_"+index+"\">"+value+"</li>\n");
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
function changePreset() {
|
||||
resetTable();
|
||||
fillTable($("#presetType").val());
|
||||
}
|
||||
|
||||
function resetTable() {
|
||||
$("#pgsperpool").html("");
|
||||
$("#pgsperpool").append("<tr id='headerRow'>\n</tr>\n");
|
||||
$("#headerRow").append("\t<th> </th>\n");
|
||||
var fieldCount=0;
|
||||
var percDataIndex=0;
|
||||
$.each(key_values, function(index, value) {
|
||||
fieldCount++;
|
||||
pre='';
|
||||
post='';
|
||||
var widthAdd='';
|
||||
if ( index == 'percData' )
|
||||
percDataIndex=fieldCount;
|
||||
if ('width' in value)
|
||||
widthAdd=' style=\'width: '+value['width']+'\'';
|
||||
if ('global' in value) {
|
||||
pre='<a href="javascript://" onClick="globalChange(\''+index+'\');" title="Change the \''+value['name']+'\' parameter globally">';
|
||||
post='</a>'
|
||||
}
|
||||
$("#headerRow").append("\t<th"+widthAdd+">"+pre+value['name']+post+"</th>\n");
|
||||
});
|
||||
percDataIndex++;
|
||||
$("#headerRow").append("\t<th class='center'>Suggested PG Count</th>\n");
|
||||
$("#pgsperpool").append("<tr id='totalRow'><td colspan='"+percDataIndex+"' id='percTotal' style='text-align: right; margin-right: 10px;'><strong>Total Data Percentage:</strong> <span id='percTotalValue'>0</span>%</td><td> </td><td id='pgTotal' class='bold pgcount' style='text-align: right;'>PG Total Count: <span id='pgTotalValue'>0</span></td></tr>");
|
||||
}
|
||||
|
||||
function nearestPow2( aSize ){
|
||||
var tmp=Math.pow(2, Math.round(Math.log(aSize)/Math.log(2)));
|
||||
if(tmp<(aSize*(1-pow2belowThreshold)))
|
||||
tmp*=2;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
function globalChange(field) {
|
||||
dialogHTML='<div title="Change \''+key_values[field]['name']+'\' Globally"><form>';
|
||||
dialogHTML+='<label for="value">New '+key_values[field]['name']+' value:</label><br />\n';
|
||||
dialogHTML+='<input type="text" name="globalValue" id="globalValue" value="'+$("#row0_"+field+"_input").val()+'" style="text-align: right;"/>';
|
||||
dialogHTML+='<input type="hidden" name="globalField" id="globalField" value="'+field+'"/>';
|
||||
dialogHTML+='<input type="submit" tabindex="-1" style="position:absolute; top:-1000px">';
|
||||
dialogHTML+='</form>';
|
||||
globalDialog=$(dialogHTML).dialog({
|
||||
autoOpen: true,
|
||||
width: 350,
|
||||
show: 'fold',
|
||||
hide: 'fold',
|
||||
modal: true,
|
||||
buttons: {
|
||||
"Update Value": function() { massUpdate($("#globalField").val(),$("#globalValue").val()); globalDialog.dialog("close"); setTimeout(function() { globalDialog.dialog("destroy"); }, 1000); },
|
||||
"Cancel": function() { globalDialog.dialog("close"); setTimeout(function() { globalDialog.dialog("destroy"); }, 1000); }
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
var rowCount=0;
|
||||
function fillTable(presetType) {
|
||||
rowCount=0;
|
||||
$.each(presetTables[presetType], function(index,value) {
|
||||
addTableRow(value);
|
||||
});
|
||||
}
|
||||
|
||||
function addPool() {
|
||||
dialogHTML='<div title="Add Pool"><form>';
|
||||
$.each(key_values, function(index,value) {
|
||||
dialogHTML+='<br /><label for="new'+index+'">'+value['name']+':</label><br />\n';
|
||||
classAdd='right';
|
||||
if ( index == 'poolName' )
|
||||
classAdd='left';
|
||||
dialogHTML+='<input type="text" name="new'+index+'" id="new'+index+'" value="'+value['default']+'" class="'+classAdd+'"/><br />';
|
||||
});
|
||||
dialogHTML+='<input type="submit" tabindex="-1" style="position:absolute; top:-1000px">';
|
||||
dialogHTML+='</form>';
|
||||
addPoolDialog=$(dialogHTML).dialog({
|
||||
autoOpen: true,
|
||||
width: 350,
|
||||
show: 'fold',
|
||||
hide: 'fold',
|
||||
modal: true,
|
||||
buttons: {
|
||||
"Add Pool": function() {
|
||||
var newPoolValues={};
|
||||
$.each(key_values,function(index,value) {
|
||||
newPoolValues[index]=$("#new"+index).val();
|
||||
});
|
||||
addTableRow(newPoolValues);
|
||||
addPoolDialog.dialog("close");
|
||||
setTimeout(function() { addPoolDialog.dialog("destroy"); }, 1000); },
|
||||
"Cancel": function() { addPoolDialog.dialog("close"); setTimeout(function() { addPoolDialog.dialog("destroy"); }, 1000); }
|
||||
}
|
||||
});
|
||||
|
||||
// addTableRow({'poolName':'newPool','size':3, 'osdNum':100,'targPGsPerOSD': 100, 'percData':0});
|
||||
}
|
||||
|
||||
function addTableRow(rowValues) {
|
||||
rowAdd="<tr id='row"+rowCount+"'>\n";
|
||||
rowAdd+="\t<td width='15px' class='inputColor'><a href='javascript://' title='Remove Pool' onClick='$(\"#row"+rowCount+"\").remove();updateTotals();'><span class='ui-icon ui-icon-trash'></span></a></td>\n";
|
||||
$.each(key_values, function(index,value) {
|
||||
classAdd=' center';
|
||||
modifier='';
|
||||
if ( index == 'percData' ) {
|
||||
classAdd='" style="text-align: right;';
|
||||
// modifier=' %';
|
||||
} else if ( index == 'poolName' )
|
||||
classAdd=' left';
|
||||
rowAdd+="\t<td id=\"row"+rowCount+"_"+index+"\"><input type=\"text\" class=\"inputColor "+index+classAdd+"\" id=\"row"+rowCount+"_"+index+"_input\" value=\""+rowValues[index]+"\" onFocus=\"focusMe("+rowCount+",'"+index+"');\" onKeyUp=\"keyMe("+rowCount+",'"+index+"');\" onBlur=\"blurMe("+rowCount+",'"+index+"');\">"+modifier+"</td>\n";
|
||||
});
|
||||
rowAdd+="\t<td id=\"row"+rowCount+"_pgCount\" class='pgcount' style='text-align: right;'>0</td></tr>";
|
||||
$("#totalRow").before(rowAdd);
|
||||
updatePGCount(rowCount);
|
||||
$("[id$='percData_input']").each(function() { var fieldVal=parseFloat($(this).val()); $(this).val(fieldVal.toFixed(2)); });
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
function updatePGCount(rowID) {
|
||||
if(rowID==-1) {
|
||||
for(var i=0;i<rowCount;i++) {
|
||||
updatePGCount(i);
|
||||
}
|
||||
} else {
|
||||
minValue=nearestPow2(Math.floor($("#row"+rowID+"_osdNum_input").val()/$("#row"+rowID+"_size_input").val())+1);
|
||||
if(minValue<$("#row"+rowID+"_osdNum_input").val())
|
||||
minValue*=2;
|
||||
calcValue=nearestPow2(Math.floor(($("#row"+rowID+"_targPGsPerOSD_input").val()*$("#row"+rowID+"_osdNum_input").val()*$("#row"+rowID+"_percData_input").val())/(100*$("#row"+rowID+"_size_input").val())));
|
||||
if(minValue>calcValue)
|
||||
$("#row"+rowID+"_pgCount").html(minValue);
|
||||
else
|
||||
$("#row"+rowID+"_pgCount").html(calcValue);
|
||||
}
|
||||
updateTotals();
|
||||
}
|
||||
|
||||
function focusMe(rowID,field) {
|
||||
$("#row"+rowID+"_"+field+"_input").toggleClass('inputColor');
|
||||
$("#row"+rowID+"_"+field+"_input").toggleClass('highlightColor');
|
||||
$("#dt_"+field).toggleClass('highlightColor');
|
||||
$("#dd_"+field).toggleClass('highlightColor');
|
||||
updatePGCount(rowID);
|
||||
}
|
||||
|
||||
function blurMe(rowID,field) {
|
||||
focusMe(rowID,field);
|
||||
$("[id$='percData_input']").each(function() { var fieldVal=parseFloat($(this).val()); $(this).val(fieldVal.toFixed(2)); });
|
||||
}
|
||||
|
||||
function keyMe(rowID,field) {
|
||||
updatePGCount(rowID);
|
||||
}
|
||||
|
||||
function massUpdate(field,value) {
|
||||
$("[id$='_"+field+"_input']").val(value);
|
||||
key_values[field]['default']=value;
|
||||
updatePGCount(-1);
|
||||
}
|
||||
|
||||
function updateTotals() {
|
||||
var totalPerc=0;
|
||||
var totalPGs=0;
|
||||
$("[id$='percData_input']").each(function() {
|
||||
totalPerc+=parseFloat($(this).val());
|
||||
if ( parseFloat($(this).val()) > 100 )
|
||||
$(this).addClass('ui-state-error');
|
||||
else
|
||||
$(this).removeClass('ui-state-error');
|
||||
});
|
||||
$("[id$='_pgCount']").each(function() {
|
||||
totalPGs+=parseInt($(this).html());
|
||||
});
|
||||
$("#percTotalValue").html(totalPerc.toFixed(2));
|
||||
$("#pgTotalValue").html(totalPGs);
|
||||
if(parseFloat(totalPerc.toFixed(2)) % 100 != 0) {
|
||||
$("#percTotalValue").addClass('ui-state-error');
|
||||
$("#li_totalPerc").addClass('ui-state-error');
|
||||
} else {
|
||||
$("#percTotalValue").removeClass('ui-state-error');
|
||||
$("#li_totalPerc").removeClass('ui-state-error');
|
||||
}
|
||||
$("#commandCode").html("");
|
||||
}
|
||||
|
||||
function generateCommands() {
|
||||
outputCommands="## Note: The 'while' loops below pause between pools to allow all\n\
|
||||
## PGs to be created. This is a safety mechanism to prevent\n\
|
||||
## saturating the Monitor nodes.\n\
|
||||
## -------------------------------------------------------------------\n\n";
|
||||
for(i=0;i<rowCount;i++) {
|
||||
console.log(i);
|
||||
outputCommands+="ceph osd pool create "+$("#row"+i+"_poolName_input").val()+" "+$("#row"+i+"_pgCount").html()+"\n";
|
||||
outputCommands+="ceph osd pool set "+$("#row"+i+"_poolName_input").val()+" size "+$("#row"+i+"_size_input").val()+"\n";
|
||||
outputCommands+="while [ $(ceph -s | grep creating -c) -gt 0 ]; do echo -n .;sleep 1; done\n\n";
|
||||
}
|
||||
window.location.href = "data:application/download," + encodeURIComponent(outputCommands);
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -19,9 +19,14 @@ The Ceph Storage Cluster
|
||||
========================
|
||||
|
||||
Ceph provides an infinitely scalable :term:`Ceph Storage Cluster` based upon
|
||||
:abbr:`RADOS (Reliable Autonomic Distributed Object Store)`, which you can read
|
||||
about in `RADOS - A Scalable, Reliable Storage Service for Petabyte-scale
|
||||
Storage Clusters`_.
|
||||
:abbr:`RADOS (Reliable Autonomic Distributed Object Store)`, a reliable,
|
||||
distributed storage service that uses the intelligence in each of its nodes to
|
||||
secure the data it stores and to provide that data to :term:`client`\s. See
|
||||
Sage Weil's "`The RADOS Object Store
|
||||
<https://ceph.io/en/news/blog/2009/the-rados-distributed-object-store/>`_" blog
|
||||
post for a brief explanation of RADOS and see `RADOS - A Scalable, Reliable
|
||||
Storage Service for Petabyte-scale Storage Clusters`_ for an exhaustive
|
||||
explanation of :term:`RADOS`.
|
||||
|
||||
A Ceph Storage Cluster consists of multiple types of daemons:
|
||||
|
||||
@ -30,12 +35,13 @@ A Ceph Storage Cluster consists of multiple types of daemons:
|
||||
- :term:`Ceph Manager`
|
||||
- :term:`Ceph Metadata Server`
|
||||
|
||||
Ceph Monitors maintain the master copy of the cluster map, which they provide
|
||||
to Ceph clients. Provisioning multiple monitors within the Ceph cluster ensures
|
||||
availability in the event that one of the monitor daemons or its host fails.
|
||||
The Ceph monitor provides copies of the cluster map to storage cluster clients.
|
||||
.. _arch_monitor:
|
||||
|
||||
A Ceph OSD Daemon checks its own state and the state of other OSDs and reports
|
||||
Ceph Monitors maintain the master copy of the cluster map, which they provide
|
||||
to Ceph clients. The existence of multiple monitors in the Ceph cluster ensures
|
||||
availability if one of the monitor daemons or its host fails.
|
||||
|
||||
A Ceph OSD Daemon checks its own state and the state of other OSDs and reports
|
||||
back to monitors.
|
||||
|
||||
A Ceph Manager serves as an endpoint for monitoring, orchestration, and plug-in
|
||||
@ -45,10 +51,11 @@ A Ceph Metadata Server (MDS) manages file metadata when CephFS is used to
|
||||
provide file services.
|
||||
|
||||
Storage cluster clients and :term:`Ceph OSD Daemon`\s use the CRUSH algorithm
|
||||
to compute information about data location. This means that clients and OSDs
|
||||
are not bottlenecked by a central lookup table. Ceph's high-level features
|
||||
include a native interface to the Ceph Storage Cluster via ``librados``, and a
|
||||
number of service interfaces built on top of ``librados``.
|
||||
to compute information about the location of data. Use of the CRUSH algoritm
|
||||
means that clients and OSDs are not bottlenecked by a central lookup table.
|
||||
Ceph's high-level features include a native interface to the Ceph Storage
|
||||
Cluster via ``librados``, and a number of service interfaces built on top of
|
||||
``librados``.
|
||||
|
||||
Storing Data
|
||||
------------
|
||||
@ -59,7 +66,7 @@ comes through a :term:`Ceph Block Device`, :term:`Ceph Object Storage`, the
|
||||
``librados``. The data received by the Ceph Storage Cluster is stored as RADOS
|
||||
objects. Each object is stored on an :term:`Object Storage Device` (this is
|
||||
also called an "OSD"). Ceph OSDs control read, write, and replication
|
||||
operations on storage drives. The default BlueStore back end stores objects
|
||||
operations on storage drives. The default BlueStore back end stores objects
|
||||
in a monolithic, database-like fashion.
|
||||
|
||||
.. ditaa::
|
||||
@ -67,7 +74,7 @@ in a monolithic, database-like fashion.
|
||||
/------\ +-----+ +-----+
|
||||
| obj |------>| {d} |------>| {s} |
|
||||
\------/ +-----+ +-----+
|
||||
|
||||
|
||||
Object OSD Drive
|
||||
|
||||
Ceph OSD Daemons store data as objects in a flat namespace. This means that
|
||||
@ -83,10 +90,10 @@ created date, and the last modified date.
|
||||
/------+------------------------------+----------------\
|
||||
| ID | Binary Data | Metadata |
|
||||
+------+------------------------------+----------------+
|
||||
| 1234 | 0101010101010100110101010010 | name1 = value1 |
|
||||
| 1234 | 0101010101010100110101010010 | name1 = value1 |
|
||||
| | 0101100001010100110101010010 | name2 = value2 |
|
||||
| | 0101100001010100110101010010 | nameN = valueN |
|
||||
\------+------------------------------+----------------/
|
||||
\------+------------------------------+----------------/
|
||||
|
||||
.. note:: An object ID is unique across the entire cluster, not just the local
|
||||
filesystem.
|
||||
@ -126,8 +133,8 @@ massive scale by distributing the work to all the OSD daemons in the cluster
|
||||
and all the clients that communicate with them. CRUSH uses intelligent data
|
||||
replication to ensure resiliency, which is better suited to hyper-scale
|
||||
storage. The following sections provide additional details on how CRUSH works.
|
||||
For a detailed discussion of CRUSH, see `CRUSH - Controlled, Scalable,
|
||||
Decentralized Placement of Replicated Data`_.
|
||||
For an in-depth, academic discussion of CRUSH, see `CRUSH - Controlled,
|
||||
Scalable, Decentralized Placement of Replicated Data`_.
|
||||
|
||||
.. index:: architecture; cluster map
|
||||
|
||||
@ -145,14 +152,14 @@ five maps that constitute the cluster map are:
|
||||
the address, and the TCP port of each monitor. The monitor map specifies the
|
||||
current epoch, the time of the monitor map's creation, and the time of the
|
||||
monitor map's last modification. To view a monitor map, run ``ceph mon
|
||||
dump``.
|
||||
|
||||
dump``.
|
||||
|
||||
#. **The OSD Map:** Contains the cluster ``fsid``, the time of the OSD map's
|
||||
creation, the time of the OSD map's last modification, a list of pools, a
|
||||
list of replica sizes, a list of PG numbers, and a list of OSDs and their
|
||||
statuses (for example, ``up``, ``in``). To view an OSD map, run ``ceph
|
||||
osd dump``.
|
||||
|
||||
osd dump``.
|
||||
|
||||
#. **The PG Map:** Contains the PG version, its time stamp, the last OSD map
|
||||
epoch, the full ratios, and the details of each placement group. This
|
||||
includes the PG ID, the `Up Set`, the `Acting Set`, the state of the PG (for
|
||||
@ -166,8 +173,8 @@ five maps that constitute the cluster map are:
|
||||
{decomp-crushmap-filename}``. Use a text editor or ``cat`` to view the
|
||||
decompiled map.
|
||||
|
||||
#. **The MDS Map:** Contains the current MDS map epoch, when the map was
|
||||
created, and the last time it changed. It also contains the pool for
|
||||
#. **The MDS Map:** Contains the current MDS map epoch, when the map was
|
||||
created, and the last time it changed. It also contains the pool for
|
||||
storing metadata, a list of metadata servers, and which metadata servers
|
||||
are ``up`` and ``in``. To view an MDS map, execute ``ceph fs dump``.
|
||||
|
||||
@ -210,13 +217,13 @@ High Availability Authentication
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The ``cephx`` authentication system is used by Ceph to authenticate users and
|
||||
daemons and to protect against man-in-the-middle attacks.
|
||||
daemons and to protect against man-in-the-middle attacks.
|
||||
|
||||
.. note:: The ``cephx`` protocol does not address data encryption in transport
|
||||
.. note:: The ``cephx`` protocol does not address data encryption in transport
|
||||
(for example, SSL/TLS) or encryption at rest.
|
||||
|
||||
``cephx`` uses shared secret keys for authentication. This means that both the
|
||||
client and the monitor cluster keep a copy of the client's secret key.
|
||||
client and the monitor cluster keep a copy of the client's secret key.
|
||||
|
||||
The ``cephx`` protocol makes it possible for each party to prove to the other
|
||||
that it has a copy of the key without revealing it. This provides mutual
|
||||
@ -233,7 +240,7 @@ Direct interactions between Ceph clients and OSDs require authenticated
|
||||
connections. The ``cephx`` authentication system establishes and sustains these
|
||||
authenticated connections.
|
||||
|
||||
The ``cephx`` protocol operates in a manner similar to `Kerberos`_.
|
||||
The ``cephx`` protocol operates in a manner similar to `Kerberos`_.
|
||||
|
||||
A user invokes a Ceph client to contact a monitor. Unlike Kerberos, each
|
||||
monitor can authenticate users and distribute keys, which means that there is
|
||||
@ -246,7 +253,7 @@ Monitors. The client then uses the session key to request services from the
|
||||
monitors, and the monitors provide the client with a ticket that authenticates
|
||||
the client against the OSDs that actually handle data. Ceph Monitors and OSDs
|
||||
share a secret, which means that the clients can use the ticket provided by the
|
||||
monitors to authenticate against any OSD or metadata server in the cluster.
|
||||
monitors to authenticate against any OSD or metadata server in the cluster.
|
||||
|
||||
Like Kerberos tickets, ``cephx`` tickets expire. An attacker cannot use an
|
||||
expired ticket or session key that has been obtained surreptitiously. This form
|
||||
@ -262,8 +269,8 @@ subsystem generates the username and key, stores a copy on the monitor(s), and
|
||||
transmits the user's secret back to the ``client.admin`` user. This means that
|
||||
the client and the monitor share a secret key.
|
||||
|
||||
.. note:: The ``client.admin`` user must provide the user ID and
|
||||
secret key to the user in a secure manner.
|
||||
.. note:: The ``client.admin`` user must provide the user ID and
|
||||
secret key to the user in a secure manner.
|
||||
|
||||
.. ditaa::
|
||||
|
||||
@ -273,7 +280,7 @@ the client and the monitor share a secret key.
|
||||
| request to |
|
||||
| create a user |
|
||||
|-------------->|----------+ create user
|
||||
| | | and
|
||||
| | | and
|
||||
|<--------------|<---------+ store key
|
||||
| transmit key |
|
||||
| |
|
||||
@ -296,25 +303,25 @@ and uses it to sign requests to OSDs and to metadata servers in the cluster.
|
||||
+---------+ +---------+
|
||||
| authenticate |
|
||||
|-------------->|----------+ generate and
|
||||
| | | encrypt
|
||||
| | | encrypt
|
||||
|<--------------|<---------+ session key
|
||||
| transmit |
|
||||
| encrypted |
|
||||
| session key |
|
||||
| |
|
||||
| |
|
||||
|-----+ decrypt |
|
||||
| | session |
|
||||
|<----+ key |
|
||||
| | session |
|
||||
|<----+ key |
|
||||
| |
|
||||
| req. ticket |
|
||||
|-------------->|----------+ generate and
|
||||
| | | encrypt
|
||||
| | | encrypt
|
||||
|<--------------|<---------+ ticket
|
||||
| recv. ticket |
|
||||
| |
|
||||
| |
|
||||
|-----+ decrypt |
|
||||
| | ticket |
|
||||
|<----+ |
|
||||
| | ticket |
|
||||
|<----+ |
|
||||
|
||||
|
||||
The ``cephx`` protocol authenticates ongoing communications between the clients
|
||||
@ -329,7 +336,7 @@ between the client and the daemon.
|
||||
| Client | | Monitor | | MDS | | OSD |
|
||||
+---------+ +---------+ +-------+ +-------+
|
||||
| request to | | |
|
||||
| create a user | | |
|
||||
| create a user | | |
|
||||
|-------------->| mon and | |
|
||||
|<--------------| client share | |
|
||||
| receive | a secret. | |
|
||||
@ -337,7 +344,7 @@ between the client and the daemon.
|
||||
| |<------------>| |
|
||||
| |<-------------+------------>|
|
||||
| | mon, mds, | |
|
||||
| authenticate | and osd | |
|
||||
| authenticate | and osd | |
|
||||
|-------------->| share | |
|
||||
|<--------------| a secret | |
|
||||
| session key | | |
|
||||
@ -353,7 +360,7 @@ between the client and the daemon.
|
||||
| receive response (CephFS only) |
|
||||
| |
|
||||
| make request |
|
||||
|------------------------------------------->|
|
||||
|------------------------------------------->|
|
||||
|<-------------------------------------------|
|
||||
receive response
|
||||
|
||||
@ -362,7 +369,7 @@ daemons. The authentication is not extended beyond the Ceph client. If a user
|
||||
accesses the Ceph client from a remote host, cephx authentication will not be
|
||||
applied to the connection between the user's host and the client host.
|
||||
|
||||
See `Cephx Config Guide`_ for more on configuration details.
|
||||
See `Cephx Config Guide`_ for more on configuration details.
|
||||
|
||||
See `User Management`_ for more on user management.
|
||||
|
||||
@ -416,7 +423,7 @@ the greater cluster provides several benefits:
|
||||
Monitors receive no such message after a configurable period of time,
|
||||
then they mark the OSD ``down``. This mechanism is a failsafe, however.
|
||||
Normally, Ceph OSD Daemons determine if a neighboring OSD is ``down`` and
|
||||
report it to the Ceph Monitors. This contributes to making Ceph Monitors
|
||||
report it to the Ceph Monitors. This contributes to making Ceph Monitors
|
||||
lightweight processes. See `Monitoring OSDs`_ and `Heartbeats`_ for
|
||||
additional details.
|
||||
|
||||
@ -463,7 +470,7 @@ the greater cluster provides several benefits:
|
||||
Write (2) | | | | Write (3)
|
||||
+------+ | | +------+
|
||||
| +------+ +------+ |
|
||||
| | Ack (4) Ack (5)| |
|
||||
| | Ack (4) Ack (5)| |
|
||||
v * * v
|
||||
+---------------+ +---------------+
|
||||
| Secondary OSD | | Tertiary OSD |
|
||||
@ -490,7 +497,7 @@ About Pools
|
||||
|
||||
The Ceph storage system supports the notion of 'Pools', which are logical
|
||||
partitions for storing objects.
|
||||
|
||||
|
||||
Ceph Clients retrieve a `Cluster Map`_ from a Ceph Monitor, and write RADOS
|
||||
objects to pools. The way that Ceph places the data in the pools is determined
|
||||
by the pool's ``size`` or number of replicas, the CRUSH rule, and the number of
|
||||
@ -511,12 +518,12 @@ placement groups in the pool.
|
||||
+--------+ +---------------+
|
||||
| Pool |---------->| CRUSH Rule |
|
||||
+--------+ Selects +---------------+
|
||||
|
||||
|
||||
|
||||
Pools set at least the following parameters:
|
||||
|
||||
- Ownership/Access to Objects
|
||||
- The Number of Placement Groups, and
|
||||
- The Number of Placement Groups, and
|
||||
- The CRUSH Rule to Use.
|
||||
|
||||
See `Set Pool Values`_ for details.
|
||||
@ -529,12 +536,12 @@ Mapping PGs to OSDs
|
||||
|
||||
Each pool has a number of placement groups (PGs) within it. CRUSH dynamically
|
||||
maps PGs to OSDs. When a Ceph Client stores objects, CRUSH maps each RADOS
|
||||
object to a PG.
|
||||
object to a PG.
|
||||
|
||||
This mapping of RADOS objects to PGs implements an abstraction and indirection
|
||||
layer between Ceph OSD Daemons and Ceph Clients. The Ceph Storage Cluster must
|
||||
be able to grow (or shrink) and redistribute data adaptively when the internal
|
||||
topology changes.
|
||||
topology changes.
|
||||
|
||||
If the Ceph Client "knew" which Ceph OSD Daemons were storing which objects, a
|
||||
tight coupling would exist between the Ceph Client and the Ceph OSD Daemon.
|
||||
@ -563,11 +570,11 @@ placement groups, and how it maps placement groups to OSDs.
|
||||
+------+------+-------------+ |
|
||||
| | | |
|
||||
v v v v
|
||||
/----------\ /----------\ /----------\ /----------\
|
||||
/----------\ /----------\ /----------\ /----------\
|
||||
| | | | | | | |
|
||||
| OSD #1 | | OSD #2 | | OSD #3 | | OSD #4 |
|
||||
| | | | | | | |
|
||||
\----------/ \----------/ \----------/ \----------/
|
||||
\----------/ \----------/ \----------/ \----------/
|
||||
|
||||
The client uses its copy of the cluster map and the CRUSH algorithm to compute
|
||||
precisely which OSD it will use when reading or writing a particular object.
|
||||
@ -581,11 +588,11 @@ When a Ceph Client binds to a Ceph Monitor, it retrieves the latest version of
|
||||
the `Cluster Map`_. When a client has been equipped with a copy of the cluster
|
||||
map, it is aware of all the monitors, OSDs, and metadata servers in the
|
||||
cluster. **However, even equipped with a copy of the latest version of the
|
||||
cluster map, the client doesn't know anything about object locations.**
|
||||
cluster map, the client doesn't know anything about object locations.**
|
||||
|
||||
**Object locations must be computed.**
|
||||
|
||||
The client requies only the object ID and the name of the pool in order to
|
||||
The client requires only the object ID and the name of the pool in order to
|
||||
compute the object location.
|
||||
|
||||
Ceph stores data in named pools (for example, "liverpool"). When a client
|
||||
@ -624,7 +631,7 @@ persists, you may need to refer to the `Troubleshooting Peering Failure`_
|
||||
section.
|
||||
|
||||
.. Note:: PGs that agree on the state of the cluster do not necessarily have
|
||||
the current data yet.
|
||||
the current data yet.
|
||||
|
||||
The Ceph Storage Cluster was designed to store at least two copies of an object
|
||||
(that is, ``size = 2``), which is the minimum requirement for data safety. For
|
||||
@ -654,7 +661,7 @@ epoch.
|
||||
The Ceph OSD daemons that are part of an *Acting Set* might not always be
|
||||
``up``. When an OSD in the *Acting Set* is ``up``, it is part of the *Up Set*.
|
||||
The *Up Set* is an important distinction, because Ceph can remap PGs to other
|
||||
Ceph OSD Daemons when an OSD fails.
|
||||
Ceph OSD Daemons when an OSD fails.
|
||||
|
||||
.. note:: Consider a hypothetical *Acting Set* for a PG that contains
|
||||
``osd.25``, ``osd.32`` and ``osd.61``. The first OSD (``osd.25``), is the
|
||||
@ -674,7 +681,7 @@ process (albeit rather crudely, since it is substantially less impactful with
|
||||
large clusters) where some, but not all of the PGs migrate from existing OSDs
|
||||
(OSD 1, and OSD 2) to the new OSD (OSD 3). Even when rebalancing, CRUSH is
|
||||
stable. Many of the placement groups remain in their original configuration,
|
||||
and each OSD gets some added capacity, so there are no load spikes on the
|
||||
and each OSD gets some added capacity, so there are no load spikes on the
|
||||
new OSD after rebalancing is complete.
|
||||
|
||||
|
||||
@ -732,7 +739,8 @@ of ``K+M`` so that each chunk is stored in an OSD in the acting set. The rank of
|
||||
the chunk is stored as an attribute of the object.
|
||||
|
||||
For instance an erasure coded pool can be created to use five OSDs (``K+M = 5``) and
|
||||
sustain the loss of two of them (``M = 2``).
|
||||
sustain the loss of two of them (``M = 2``). Data may be unavailable until (``K+1``)
|
||||
shards are restored.
|
||||
|
||||
Reading and Writing Encoded Chunks
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -821,7 +829,7 @@ account.
|
||||
| | | |
|
||||
| +-------+-------+ |
|
||||
| ^ |
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
+--+---+ +------+ +---+--+ +---+--+
|
||||
name | NYAN | | NYAN | | NYAN | | NYAN |
|
||||
@ -874,7 +882,7 @@ version 1).
|
||||
.. ditaa::
|
||||
|
||||
Primary OSD
|
||||
|
||||
|
||||
+-------------+
|
||||
| OSD 1 | +-------------+
|
||||
| log | Write Full | |
|
||||
@ -919,7 +927,7 @@ as ``D2v2`` ) while others are acknowledged and persisted to storage drives
|
||||
.. ditaa::
|
||||
|
||||
Primary OSD
|
||||
|
||||
|
||||
+-------------+
|
||||
| OSD 1 |
|
||||
| log |
|
||||
@ -928,11 +936,11 @@ as ``D2v2`` ) while others are acknowledged and persisted to storage drives
|
||||
| +----+ +<------------+ Ceph Client |
|
||||
| | v2 | |
|
||||
| +----+ | +-------------+
|
||||
| |D1v1| 1,1 |
|
||||
| +----+ |
|
||||
+------+------+
|
||||
|
|
||||
|
|
||||
| |D1v1| 1,1 |
|
||||
| +----+ |
|
||||
+------+------+
|
||||
|
|
||||
|
|
||||
| +------+------+
|
||||
| | OSD 2 |
|
||||
| +------+ | log |
|
||||
@ -960,7 +968,7 @@ the logs' ``last_complete`` pointer can move from ``1,1`` to ``1,2``.
|
||||
.. ditaa::
|
||||
|
||||
Primary OSD
|
||||
|
||||
|
||||
+-------------+
|
||||
| OSD 1 |
|
||||
| log |
|
||||
@ -969,10 +977,10 @@ the logs' ``last_complete`` pointer can move from ``1,1`` to ``1,2``.
|
||||
| +----+ +<------------+ Ceph Client |
|
||||
| | v2 | |
|
||||
| +----+ | +-------------+
|
||||
| |D1v1| 1,1 |
|
||||
| +----+ |
|
||||
+------+------+
|
||||
|
|
||||
| |D1v1| 1,1 |
|
||||
| +----+ |
|
||||
+------+------+
|
||||
|
|
||||
| +-------------+
|
||||
| | OSD 2 |
|
||||
| | log |
|
||||
@ -984,7 +992,7 @@ the logs' ``last_complete`` pointer can move from ``1,1`` to ``1,2``.
|
||||
| | |D2v1| 1,1 |
|
||||
| | +----+ |
|
||||
| +-------------+
|
||||
|
|
||||
|
|
||||
| +-------------+
|
||||
| | OSD 3 |
|
||||
| | log |
|
||||
@ -1005,7 +1013,7 @@ on **OSD 3**.
|
||||
.. ditaa::
|
||||
|
||||
Primary OSD
|
||||
|
||||
|
||||
+-------------+
|
||||
| OSD 1 |
|
||||
| log |
|
||||
@ -1048,7 +1056,7 @@ will be the head of the new authoritative log.
|
||||
| (down) |
|
||||
| c333 |
|
||||
+------+------+
|
||||
|
|
||||
|
|
||||
| +-------------+
|
||||
| | OSD 2 |
|
||||
| | log |
|
||||
@ -1057,7 +1065,7 @@ will be the head of the new authoritative log.
|
||||
| | +----+ |
|
||||
| | |
|
||||
| +-------------+
|
||||
|
|
||||
|
|
||||
| +-------------+
|
||||
| | OSD 3 |
|
||||
| | log |
|
||||
@ -1077,20 +1085,20 @@ will be the head of the new authoritative log.
|
||||
| 1,1 |
|
||||
| |
|
||||
+------+------+
|
||||
|
||||
|
||||
|
||||
|
||||
The log entry 1,2 found on **OSD 3** is divergent from the new authoritative log
|
||||
provided by **OSD 4**: it is discarded and the file containing the ``C1v2``
|
||||
chunk is removed. The ``D1v1`` chunk is rebuilt with the ``decode`` function of
|
||||
the erasure coding library during scrubbing and stored on the new primary
|
||||
the erasure coding library during scrubbing and stored on the new primary
|
||||
**OSD 4**.
|
||||
|
||||
|
||||
.. ditaa::
|
||||
|
||||
Primary OSD
|
||||
|
||||
|
||||
+-------------+
|
||||
| OSD 4 |
|
||||
| log |
|
||||
@ -1136,7 +1144,7 @@ configured to act as a cache tier, and a backing pool of either erasure-coded
|
||||
or relatively slower/cheaper devices configured to act as an economical storage
|
||||
tier. The Ceph objecter handles where to place the objects and the tiering
|
||||
agent determines when to flush objects from the cache to the backing storage
|
||||
tier. So the cache tier and the backing storage tier are completely transparent
|
||||
tier. So the cache tier and the backing storage tier are completely transparent
|
||||
to Ceph clients.
|
||||
|
||||
|
||||
@ -1146,14 +1154,14 @@ to Ceph clients.
|
||||
| Ceph Client |
|
||||
+------+------+
|
||||
^
|
||||
Tiering is |
|
||||
Tiering is |
|
||||
Transparent | Faster I/O
|
||||
to Ceph | +---------------+
|
||||
Client Ops | | |
|
||||
Client Ops | | |
|
||||
| +----->+ Cache Tier |
|
||||
| | | |
|
||||
| | +-----+---+-----+
|
||||
| | | ^
|
||||
| | | ^
|
||||
v v | | Active Data in Cache Tier
|
||||
+------+----+--+ | |
|
||||
| Objecter | | |
|
||||
@ -1194,11 +1202,11 @@ operations on the outbound data and return the data to the client.
|
||||
|
||||
A Ceph class for a content management system that presents pictures of a
|
||||
particular size and aspect ratio could take an inbound bitmap image, crop it
|
||||
to a particular aspect ratio, resize it and embed an invisible copyright or
|
||||
watermark to help protect the intellectual property; then, save the
|
||||
to a particular aspect ratio, resize it and embed an invisible copyright or
|
||||
watermark to help protect the intellectual property; then, save the
|
||||
resulting bitmap image to the object store.
|
||||
|
||||
See ``src/objclass/objclass.h``, ``src/fooclass.cc`` and ``src/barclass`` for
|
||||
See ``src/objclass/objclass.h``, ``src/fooclass.cc`` and ``src/barclass`` for
|
||||
exemplary implementations.
|
||||
|
||||
|
||||
@ -1275,7 +1283,7 @@ synchronization/communication channel.
|
||||
+----------+ +----------+ +----------+ +---------------+
|
||||
| | | |
|
||||
| | | |
|
||||
| | Watch Object | |
|
||||
| | Watch Object | |
|
||||
|--------------------------------------------------->|
|
||||
| | | |
|
||||
|<---------------------------------------------------|
|
||||
@ -1291,7 +1299,7 @@ synchronization/communication channel.
|
||||
| | | |
|
||||
| | |<-----------------|
|
||||
| | | Ack/Commit |
|
||||
| | Notify | |
|
||||
| | Notify | |
|
||||
|--------------------------------------------------->|
|
||||
| | | |
|
||||
|<---------------------------------------------------|
|
||||
@ -1301,7 +1309,7 @@ synchronization/communication channel.
|
||||
| | Notify | |
|
||||
| | |<-----------------|
|
||||
| | | Notify |
|
||||
| | Ack | |
|
||||
| | Ack | |
|
||||
|----------------+---------------------------------->|
|
||||
| | | |
|
||||
| | Ack | |
|
||||
@ -1309,7 +1317,7 @@ synchronization/communication channel.
|
||||
| | | |
|
||||
| | | Ack |
|
||||
| | |----------------->|
|
||||
| | | |
|
||||
| | | |
|
||||
|<---------------+----------------+------------------|
|
||||
| Complete
|
||||
|
||||
@ -1327,13 +1335,13 @@ volume'. Ceph's striping offers the throughput of RAID 0 striping, the
|
||||
reliability of n-way RAID mirroring and faster recovery.
|
||||
|
||||
Ceph provides three types of clients: Ceph Block Device, Ceph File System, and
|
||||
Ceph Object Storage. A Ceph Client converts its data from the representation
|
||||
Ceph Object Storage. A Ceph Client converts its data from the representation
|
||||
format it provides to its users (a block device image, RESTful objects, CephFS
|
||||
filesystem directories) into objects for storage in the Ceph Storage Cluster.
|
||||
filesystem directories) into objects for storage in the Ceph Storage Cluster.
|
||||
|
||||
.. tip:: The objects Ceph stores in the Ceph Storage Cluster are not striped.
|
||||
Ceph Object Storage, Ceph Block Device, and the Ceph File System stripe their
|
||||
data over multiple Ceph Storage Cluster objects. Ceph Clients that write
|
||||
.. tip:: The objects Ceph stores in the Ceph Storage Cluster are not striped.
|
||||
Ceph Object Storage, Ceph Block Device, and the Ceph File System stripe their
|
||||
data over multiple Ceph Storage Cluster objects. Ceph Clients that write
|
||||
directly to the Ceph Storage Cluster via ``librados`` must perform the
|
||||
striping (and parallel I/O) for themselves to obtain these benefits.
|
||||
|
||||
@ -1376,7 +1384,7 @@ diagram depicts the simplest form of striping:
|
||||
| End cCCC | | End cCCC |
|
||||
| Object 0 | | Object 1 |
|
||||
\-----------/ \-----------/
|
||||
|
||||
|
||||
|
||||
If you anticipate large images sizes, large S3 or Swift objects (e.g., video),
|
||||
or large CephFS directories, you may see considerable read/write performance
|
||||
@ -1416,16 +1424,16 @@ stripe (``stripe unit 16``) in the first object in the new object set (``object
|
||||
+-----------------+--------+--------+-----------------+
|
||||
| | | | +--\
|
||||
v v v v |
|
||||
/-----------\ /-----------\ /-----------\ /-----------\ |
|
||||
/-----------\ /-----------\ /-----------\ /-----------\ |
|
||||
| Begin cCCC| | Begin cCCC| | Begin cCCC| | Begin cCCC| |
|
||||
| Object 0 | | Object 1 | | Object 2 | | Object 3 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 0 | | unit 1 | | unit 2 | | unit 3 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | +-\
|
||||
| stripe | | stripe | | stripe | | stripe | +-\
|
||||
| unit 4 | | unit 5 | | unit 6 | | unit 7 | | Object
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ +- Set
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ +- Set
|
||||
| stripe | | stripe | | stripe | | stripe | | 1
|
||||
| unit 8 | | unit 9 | | unit 10 | | unit 11 | +-/
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
@ -1433,36 +1441,36 @@ stripe (``stripe unit 16``) in the first object in the new object set (``object
|
||||
| unit 12 | | unit 13 | | unit 14 | | unit 15 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| End cCCC | | End cCCC | | End cCCC | | End cCCC | |
|
||||
| Object 0 | | Object 1 | | Object 2 | | Object 3 | |
|
||||
| Object 0 | | Object 1 | | Object 2 | | Object 3 | |
|
||||
\-----------/ \-----------/ \-----------/ \-----------/ |
|
||||
|
|
||||
+--/
|
||||
|
||||
|
||||
+--\
|
||||
|
|
||||
/-----------\ /-----------\ /-----------\ /-----------\ |
|
||||
/-----------\ /-----------\ /-----------\ /-----------\ |
|
||||
| Begin cCCC| | Begin cCCC| | Begin cCCC| | Begin cCCC| |
|
||||
| Object 4 | | Object 5 | | Object 6 | | Object 7 | |
|
||||
| Object 4 | | Object 5 | | Object 6 | | Object 7 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 16 | | unit 17 | | unit 18 | | unit 19 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | +-\
|
||||
| stripe | | stripe | | stripe | | stripe | +-\
|
||||
| unit 20 | | unit 21 | | unit 22 | | unit 23 | | Object
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ +- Set
|
||||
| stripe | | stripe | | stripe | | stripe | | 2
|
||||
| stripe | | stripe | | stripe | | stripe | | 2
|
||||
| unit 24 | | unit 25 | | unit 26 | | unit 27 | +-/
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 28 | | unit 29 | | unit 30 | | unit 31 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| End cCCC | | End cCCC | | End cCCC | | End cCCC | |
|
||||
| Object 4 | | Object 5 | | Object 6 | | Object 7 | |
|
||||
| Object 4 | | Object 5 | | Object 6 | | Object 7 | |
|
||||
\-----------/ \-----------/ \-----------/ \-----------/ |
|
||||
|
|
||||
+--/
|
||||
|
||||
Three important variables determine how Ceph stripes data:
|
||||
Three important variables determine how Ceph stripes data:
|
||||
|
||||
- **Object Size:** Objects in the Ceph Storage Cluster have a maximum
|
||||
configurable size (e.g., 2MB, 4MB, etc.). The object size should be large
|
||||
@ -1470,24 +1478,24 @@ Three important variables determine how Ceph stripes data:
|
||||
the stripe unit.
|
||||
|
||||
- **Stripe Width:** Stripes have a configurable unit size (e.g., 64kb).
|
||||
The Ceph Client divides the data it will write to objects into equally
|
||||
sized stripe units, except for the last stripe unit. A stripe width,
|
||||
should be a fraction of the Object Size so that an object may contain
|
||||
The Ceph Client divides the data it will write to objects into equally
|
||||
sized stripe units, except for the last stripe unit. A stripe width,
|
||||
should be a fraction of the Object Size so that an object may contain
|
||||
many stripe units.
|
||||
|
||||
- **Stripe Count:** The Ceph Client writes a sequence of stripe units
|
||||
over a series of objects determined by the stripe count. The series
|
||||
of objects is called an object set. After the Ceph Client writes to
|
||||
over a series of objects determined by the stripe count. The series
|
||||
of objects is called an object set. After the Ceph Client writes to
|
||||
the last object in the object set, it returns to the first object in
|
||||
the object set.
|
||||
|
||||
|
||||
.. important:: Test the performance of your striping configuration before
|
||||
putting your cluster into production. You CANNOT change these striping
|
||||
parameters after you stripe the data and write it to objects.
|
||||
|
||||
Once the Ceph Client has striped data to stripe units and mapped the stripe
|
||||
units to objects, Ceph's CRUSH algorithm maps the objects to placement groups,
|
||||
and the placement groups to Ceph OSD Daemons before the objects are stored as
|
||||
and the placement groups to Ceph OSD Daemons before the objects are stored as
|
||||
files on a storage drive.
|
||||
|
||||
.. note:: Since a client writes to a single pool, all data striped into objects
|
||||
@ -1509,23 +1517,23 @@ Ceph Clients include a number of service interfaces. These include:
|
||||
that uses ``librbd`` directly--avoiding the kernel object overhead for
|
||||
virtualized systems.
|
||||
|
||||
- **Object Storage:** The :term:`Ceph Object Storage` (a.k.a., RGW) service
|
||||
- **Object Storage:** The :term:`Ceph Object Storage` (a.k.a., RGW) service
|
||||
provides RESTful APIs with interfaces that are compatible with Amazon S3
|
||||
and OpenStack Swift.
|
||||
|
||||
- **Filesystem**: The :term:`Ceph File System` (CephFS) service provides
|
||||
a POSIX compliant filesystem usable with ``mount`` or as
|
||||
and OpenStack Swift.
|
||||
|
||||
- **Filesystem**: The :term:`Ceph File System` (CephFS) service provides
|
||||
a POSIX compliant filesystem usable with ``mount`` or as
|
||||
a filesystem in user space (FUSE).
|
||||
|
||||
Ceph can run additional instances of OSDs, MDSs, and monitors for scalability
|
||||
and high availability. The following diagram depicts the high-level
|
||||
architecture.
|
||||
architecture.
|
||||
|
||||
.. ditaa::
|
||||
|
||||
+--------------+ +----------------+ +-------------+
|
||||
| Block Device | | Object Storage | | CephFS |
|
||||
+--------------+ +----------------+ +-------------+
|
||||
+--------------+ +----------------+ +-------------+
|
||||
|
||||
+--------------+ +----------------+ +-------------+
|
||||
| librbd | | librgw | | libcephfs |
|
||||
@ -1557,10 +1565,10 @@ another application.
|
||||
.. topic:: S3/Swift Objects and Store Cluster Objects Compared
|
||||
|
||||
Ceph's Object Storage uses the term *object* to describe the data it stores.
|
||||
S3 and Swift objects are not the same as the objects that Ceph writes to the
|
||||
S3 and Swift objects are not the same as the objects that Ceph writes to the
|
||||
Ceph Storage Cluster. Ceph Object Storage objects are mapped to Ceph Storage
|
||||
Cluster objects. The S3 and Swift objects do not necessarily
|
||||
correspond in a 1:1 manner with an object stored in the storage cluster. It
|
||||
Cluster objects. The S3 and Swift objects do not necessarily
|
||||
correspond in a 1:1 manner with an object stored in the storage cluster. It
|
||||
is possible for an S3 or Swift object to map to multiple Ceph objects.
|
||||
|
||||
See `Ceph Object Storage`_ for details.
|
||||
@ -1576,7 +1584,7 @@ Ceph Storage Cluster, where each object gets mapped to a placement group and
|
||||
distributed, and the placement groups are spread across separate ``ceph-osd``
|
||||
daemons throughout the cluster.
|
||||
|
||||
.. important:: Striping allows RBD block devices to perform better than a single
|
||||
.. important:: Striping allows RBD block devices to perform better than a single
|
||||
server could!
|
||||
|
||||
Thin-provisioned snapshottable Ceph Block Devices are an attractive option for
|
||||
@ -1585,7 +1593,8 @@ typically deploy a Ceph Block Device with the ``rbd`` network storage driver in
|
||||
QEMU/KVM, where the host machine uses ``librbd`` to provide a block device
|
||||
service to the guest. Many cloud computing stacks use ``libvirt`` to integrate
|
||||
with hypervisors. You can use thin-provisioned Ceph Block Devices with QEMU and
|
||||
``libvirt`` to support OpenStack and CloudStack among other solutions.
|
||||
``libvirt`` to support OpenStack, OpenNebula and CloudStack
|
||||
among other solutions.
|
||||
|
||||
While we do not provide ``librbd`` support with other hypervisors at this time,
|
||||
you may also use Ceph Block Device kernel objects to provide a block device to a
|
||||
@ -1610,7 +1619,7 @@ a Filesystem in User Space (FUSE).
|
||||
|
||||
+-----------------------+ +------------------------+
|
||||
| CephFS Kernel Object | | CephFS FUSE |
|
||||
+-----------------------+ +------------------------+
|
||||
+-----------------------+ +------------------------+
|
||||
|
||||
+---------------------------------------------------+
|
||||
| CephFS Library (libcephfs) |
|
||||
@ -1639,9 +1648,9 @@ CephFS separates the metadata from the data, storing the metadata in the MDS,
|
||||
and storing the file data in one or more objects in the Ceph Storage Cluster.
|
||||
The Ceph filesystem aims for POSIX compatibility. ``ceph-mds`` can run as a
|
||||
single process, or it can be distributed out to multiple physical machines,
|
||||
either for high availability or for scalability.
|
||||
either for high availability or for scalability.
|
||||
|
||||
- **High Availability**: The extra ``ceph-mds`` instances can be `standby`,
|
||||
- **High Availability**: The extra ``ceph-mds`` instances can be `standby`,
|
||||
ready to take over the duties of any failed ``ceph-mds`` that was
|
||||
`active`. This is easy because all the data, including the journal, is
|
||||
stored on RADOS. The transition is triggered automatically by ``ceph-mon``.
|
||||
|
@ -3,18 +3,20 @@
|
||||
``activate``
|
||||
============
|
||||
|
||||
Once :ref:`ceph-volume-lvm-prepare` is completed, and all the various steps
|
||||
that entails are done, the volume is ready to get "activated".
|
||||
After :ref:`ceph-volume-lvm-prepare` has completed its run, the volume can be
|
||||
activated.
|
||||
|
||||
This activation process enables a systemd unit that persists the OSD ID and its
|
||||
UUID (also called ``fsid`` in Ceph CLI tools), so that at boot time it can
|
||||
understand what OSD is enabled and needs to be mounted.
|
||||
Activating the volume involves enabling a ``systemd`` unit that persists the
|
||||
``OSD ID`` and its ``UUID`` (which is also called the ``fsid`` in the Ceph CLI
|
||||
tools). After this information has been persisted, the cluster can determine
|
||||
which OSD is enabled and must be mounted.
|
||||
|
||||
.. note:: The execution of this call is fully idempotent, and there is no
|
||||
side-effects when running multiple times
|
||||
.. note:: The execution of this call is fully idempotent. This means that the
|
||||
call can be executed multiple times without changing the result of its first
|
||||
successful execution.
|
||||
|
||||
For OSDs deployed by cephadm, please refer to :ref:`cephadm-osd-activate`
|
||||
instead.
|
||||
For information about OSDs deployed by cephadm, refer to
|
||||
:ref:`cephadm-osd-activate`.
|
||||
|
||||
New OSDs
|
||||
--------
|
||||
|
@ -9,3 +9,48 @@ Logical volume name format is vg/lv. Fails if OSD has already got attached DB.
|
||||
Attach vgname/lvname as a DB volume to OSD 1::
|
||||
|
||||
ceph-volume lvm new-db --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_db
|
||||
|
||||
Reversing BlueFS Spillover to Slow Devices
|
||||
------------------------------------------
|
||||
|
||||
Under certain circumstances, OSD RocksDB databases spill onto slow storage and
|
||||
the Ceph cluster returns specifics regarding BlueFS spillover warnings. ``ceph
|
||||
health detail`` returns these spillover warnings. Here is an example of a
|
||||
spillover warning::
|
||||
|
||||
osd.76 spilled over 128 KiB metadata from 'db' device (56 GiB used of 60 GiB) to slow device
|
||||
|
||||
To move this DB metadata from the slower device to the faster device, take the
|
||||
following steps:
|
||||
|
||||
#. Expand the database's logical volume (LV):
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
lvextend -l ${size} ${lv}/${db} ${ssd_dev}
|
||||
|
||||
#. Stop the OSD:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm unit --fsid $cid --name osd.${osd} stop
|
||||
|
||||
#. Run the ``bluefs-bdev-expand`` command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm shell --fsid $cid --name osd.${osd} -- ceph-bluestore-tool bluefs-bdev-expand --path /var/lib/ceph/osd/ceph-${osd}
|
||||
|
||||
#. Run the ``bluefs-bdev-migrate`` command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm shell --fsid $cid --name osd.${osd} -- ceph-bluestore-tool bluefs-bdev-migrate --path /var/lib/ceph/osd/ceph-${osd} --devs-source /var/lib/ceph/osd/ceph-${osd}/block --dev-target /var/lib/ceph/osd/ceph-${osd}/block.db
|
||||
|
||||
#. Restart the OSD:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm unit --fsid $cid --name osd.${osd} start
|
||||
|
||||
.. note:: *The above procedure was developed by Chris Dunlop on the [ceph-users] mailing list, and can be seen in its original context here:* `[ceph-users] Re: Fixing BlueFS spillover (pacific 16.2.14) <https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/message/POPUFSZGXR3P2RPYPJ4WJ4HGHZ3QESF6/>`_
|
||||
|
@ -23,20 +23,20 @@ Preparation
|
||||
#. Make sure that the ``cephadm`` command line tool is available on each host
|
||||
in the existing cluster. See :ref:`get-cephadm` to learn how.
|
||||
|
||||
#. Prepare each host for use by ``cephadm`` by running this command:
|
||||
#. Prepare each host for use by ``cephadm`` by running this command on that host:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm prepare-host
|
||||
|
||||
#. Choose a version of Ceph to use for the conversion. This procedure will work
|
||||
with any release of Ceph that is Octopus (15.2.z) or later, inclusive. The
|
||||
with any release of Ceph that is Octopus (15.2.z) or later. The
|
||||
latest stable release of Ceph is the default. You might be upgrading from an
|
||||
earlier Ceph release at the same time that you're performing this
|
||||
conversion; if you are upgrading from an earlier release, make sure to
|
||||
conversion. If you are upgrading from an earlier release, make sure to
|
||||
follow any upgrade-related instructions for that release.
|
||||
|
||||
Pass the image to cephadm with the following command:
|
||||
Pass the Ceph container image to cephadm with the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -51,25 +51,27 @@ Preparation
|
||||
|
||||
cephadm ls
|
||||
|
||||
Before starting the conversion process, ``cephadm ls`` shows all existing
|
||||
daemons to have a style of ``legacy``. As the adoption process progresses,
|
||||
adopted daemons will appear with a style of ``cephadm:v1``.
|
||||
Before starting the conversion process, ``cephadm ls`` reports all existing
|
||||
daemons with the style ``legacy``. As the adoption process progresses,
|
||||
adopted daemons will appear with the style ``cephadm:v1``.
|
||||
|
||||
|
||||
Adoption process
|
||||
----------------
|
||||
|
||||
#. Make sure that the ceph configuration has been migrated to use the cluster
|
||||
config database. If the ``/etc/ceph/ceph.conf`` is identical on each host,
|
||||
then the following command can be run on one single host and will affect all
|
||||
hosts:
|
||||
#. Make sure that the ceph configuration has been migrated to use the cluster's
|
||||
central config database. If ``/etc/ceph/ceph.conf`` is identical on all
|
||||
hosts, then the following command can be run on one host and will take
|
||||
effect for all hosts:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config assimilate-conf -i /etc/ceph/ceph.conf
|
||||
|
||||
If there are configuration variations between hosts, you will need to repeat
|
||||
this command on each host. During this adoption process, view the cluster's
|
||||
this command on each host, taking care that if there are conflicting option
|
||||
settings across hosts, the values from the last host will be used. During this
|
||||
adoption process, view the cluster's central
|
||||
configuration to confirm that it is complete by running the following
|
||||
command:
|
||||
|
||||
@ -77,36 +79,36 @@ Adoption process
|
||||
|
||||
ceph config dump
|
||||
|
||||
#. Adopt each monitor:
|
||||
#. Adopt each Monitor:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm adopt --style legacy --name mon.<hostname>
|
||||
|
||||
Each legacy monitor should stop, quickly restart as a cephadm
|
||||
Each legacy Monitor will stop, quickly restart as a cephadm
|
||||
container, and rejoin the quorum.
|
||||
|
||||
#. Adopt each manager:
|
||||
#. Adopt each Manager:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm adopt --style legacy --name mgr.<hostname>
|
||||
|
||||
#. Enable cephadm:
|
||||
#. Enable cephadm orchestration:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph mgr module enable cephadm
|
||||
ceph orch set backend cephadm
|
||||
|
||||
#. Generate an SSH key:
|
||||
#. Generate an SSH key for cephadm:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph cephadm generate-key
|
||||
ceph cephadm get-pub-key > ~/ceph.pub
|
||||
|
||||
#. Install the cluster SSH key on each host in the cluster:
|
||||
#. Install the cephadm SSH key on each host in the cluster:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -119,9 +121,10 @@ Adoption process
|
||||
SSH keys.
|
||||
|
||||
.. note::
|
||||
It is also possible to have cephadm use a non-root user to SSH
|
||||
It is also possible to arrange for cephadm to use a non-root user to SSH
|
||||
into cluster hosts. This user needs to have passwordless sudo access.
|
||||
Use ``ceph cephadm set-user <user>`` and copy the SSH key to that user.
|
||||
Use ``ceph cephadm set-user <user>`` and copy the SSH key to that user's
|
||||
home directory on each host.
|
||||
See :ref:`cephadm-ssh-user`
|
||||
|
||||
#. Tell cephadm which hosts to manage:
|
||||
@ -130,10 +133,10 @@ Adoption process
|
||||
|
||||
ceph orch host add <hostname> [ip-address]
|
||||
|
||||
This will perform a ``cephadm check-host`` on each host before adding it;
|
||||
this check ensures that the host is functioning properly. The IP address
|
||||
argument is recommended; if not provided, then the host name will be resolved
|
||||
via DNS.
|
||||
This will run ``cephadm check-host`` on each host before adding it.
|
||||
This check ensures that the host is functioning properly. The IP address
|
||||
argument is recommended. If the address is not provided, then the host name
|
||||
will be resolved via DNS.
|
||||
|
||||
#. Verify that the adopted monitor and manager daemons are visible:
|
||||
|
||||
@ -154,8 +157,8 @@ Adoption process
|
||||
cephadm adopt --style legacy --name osd.1
|
||||
cephadm adopt --style legacy --name osd.2
|
||||
|
||||
#. Redeploy MDS daemons by telling cephadm how many daemons to run for
|
||||
each file system. List file systems by name with the command ``ceph fs
|
||||
#. Redeploy CephFS MDS daemons (if deployed) by telling cephadm how many daemons to run for
|
||||
each file system. List CephFS file systems by name with the command ``ceph fs
|
||||
ls``. Run the following command on the master nodes to redeploy the MDS
|
||||
daemons:
|
||||
|
||||
@ -190,19 +193,19 @@ Adoption process
|
||||
systemctl stop ceph-mds.target
|
||||
rm -rf /var/lib/ceph/mds/ceph-*
|
||||
|
||||
#. Redeploy RGW daemons. Cephadm manages RGW daemons by zone. For each
|
||||
zone, deploy new RGW daemons with cephadm:
|
||||
#. Redeploy Ceph Object Gateway RGW daemons if deployed. Cephadm manages RGW
|
||||
daemons by zone. For each zone, deploy new RGW daemons with cephadm:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply rgw <svc_id> [--realm=<realm>] [--zone=<zone>] [--port=<port>] [--ssl] [--placement=<placement>]
|
||||
|
||||
where *<placement>* can be a simple daemon count, or a list of
|
||||
specific hosts (see :ref:`orchestrator-cli-placement-spec`), and the
|
||||
specific hosts (see :ref:`orchestrator-cli-placement-spec`). The
|
||||
zone and realm arguments are needed only for a multisite setup.
|
||||
|
||||
After the daemons have started and you have confirmed that they are
|
||||
functioning, stop and remove the old, legacy daemons:
|
||||
functioning, stop and remove the legacy daemons:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
|
@ -1,36 +1,36 @@
|
||||
=======================
|
||||
Basic Ceph Client Setup
|
||||
=======================
|
||||
Client machines require some basic configuration to interact with
|
||||
Ceph clusters. This section describes how to configure a client machine
|
||||
so that it can interact with a Ceph cluster.
|
||||
Client hosts require basic configuration to interact with
|
||||
Ceph clusters. This section describes how to perform this configuration.
|
||||
|
||||
.. note::
|
||||
Most client machines need to install only the `ceph-common` package
|
||||
and its dependencies. Such a setup supplies the basic `ceph` and
|
||||
`rados` commands, as well as other commands including `mount.ceph`
|
||||
and `rbd`.
|
||||
Most client hosts need to install only the ``ceph-common`` package
|
||||
and its dependencies. Such an installation supplies the basic ``ceph`` and
|
||||
``rados`` commands, as well as other commands including ``mount.ceph``
|
||||
and ``rbd``.
|
||||
|
||||
Config File Setup
|
||||
=================
|
||||
Client machines usually require smaller configuration files (here
|
||||
sometimes called "config files") than do full-fledged cluster members.
|
||||
Client hosts usually require smaller configuration files (here
|
||||
sometimes called "config files") than do back-end cluster hosts.
|
||||
To generate a minimal config file, log into a host that has been
|
||||
configured as a client or that is running a cluster daemon, and then run the following command:
|
||||
configured as a client or that is running a cluster daemon, then
|
||||
run the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config generate-minimal-conf
|
||||
|
||||
This command generates a minimal config file that tells the client how
|
||||
to reach the Ceph monitors. The contents of this file should usually
|
||||
be installed in ``/etc/ceph/ceph.conf``.
|
||||
to reach the Ceph Monitors. This file should usually
|
||||
be copied to ``/etc/ceph/ceph.conf`` on each client host.
|
||||
|
||||
Keyring Setup
|
||||
=============
|
||||
Most Ceph clusters run with authentication enabled. This means that
|
||||
the client needs keys in order to communicate with the machines in the
|
||||
cluster. To generate a keyring file with credentials for `client.fs`,
|
||||
the client needs keys in order to communicate with Ceph daemons.
|
||||
To generate a keyring file with credentials for ``client.fs``,
|
||||
log into an running cluster member and run the following command:
|
||||
|
||||
.. prompt:: bash $
|
||||
@ -40,6 +40,10 @@ log into an running cluster member and run the following command:
|
||||
The resulting output is directed into a keyring file, typically
|
||||
``/etc/ceph/ceph.keyring``.
|
||||
|
||||
To gain a broader understanding of client keyring distribution and administration, you should read :ref:`client_keyrings_and_configs`.
|
||||
To gain a broader understanding of client keyring distribution and administration,
|
||||
you should read :ref:`client_keyrings_and_configs`.
|
||||
|
||||
To see an example that explains how to distribute ``ceph.conf`` configuration files to hosts that are tagged with the ``bare_config`` label, you should read the section called "Distributing ceph.conf to hosts tagged with bare_config" in the section called :ref:`etc_ceph_conf_distribution`.
|
||||
To see an example that explains how to distribute ``ceph.conf`` configuration
|
||||
files to hosts that are tagged with the ``bare_config`` label, you should read
|
||||
the subsection named "Distributing ceph.conf to hosts tagged with bare_config"
|
||||
under the heading :ref:`etc_ceph_conf_distribution`.
|
||||
|
@ -30,8 +30,8 @@ This table shows which version pairs are expected to work or not work together:
|
||||
|
||||
.. note::
|
||||
|
||||
While not all podman versions have been actively tested against
|
||||
all Ceph versions, there are no known issues with using podman
|
||||
While not all Podman versions have been actively tested against
|
||||
all Ceph versions, there are no known issues with using Podman
|
||||
version 3.0 or greater with Ceph Quincy and later releases.
|
||||
|
||||
.. warning::
|
||||
|
@ -71,9 +71,9 @@ To add each new host to the cluster, perform two steps:
|
||||
ceph orch host add host2 10.10.0.102
|
||||
ceph orch host add host3 10.10.0.103
|
||||
|
||||
It is best to explicitly provide the host IP address. If an IP is
|
||||
It is best to explicitly provide the host IP address. If an address is
|
||||
not provided, then the host name will be immediately resolved via
|
||||
DNS and that IP will be used.
|
||||
DNS and the result will be used.
|
||||
|
||||
One or more labels can also be included to immediately label the
|
||||
new host. For example, by default the ``_admin`` label will make
|
||||
@ -98,10 +98,33 @@ To drain all daemons from a host, run a command of the following form:
|
||||
|
||||
ceph orch host drain *<host>*
|
||||
|
||||
The ``_no_schedule`` label will be applied to the host. See
|
||||
:ref:`cephadm-special-host-labels`.
|
||||
The ``_no_schedule`` and ``_no_conf_keyring`` labels will be applied to the
|
||||
host. See :ref:`cephadm-special-host-labels`.
|
||||
|
||||
All OSDs on the host will be scheduled to be removed. You can check the progress of the OSD removal operation with the following command:
|
||||
If you only want to drain daemons but leave managed ceph conf and keyring
|
||||
files on the host, you may pass the ``--keep-conf-keyring`` flag to the
|
||||
drain command.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host drain *<host>* --keep-conf-keyring
|
||||
|
||||
This will apply the ``_no_schedule`` label to the host but not the
|
||||
``_no_conf_keyring`` label.
|
||||
|
||||
If you want to drain daemons but leave managed `ceph.conf` and keyring
|
||||
files on the host, you may pass the ``--keep-conf-keyring`` flag to the
|
||||
drain command.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host drain *<host>* --keep-conf-keyring
|
||||
|
||||
This will apply the ``_no_schedule`` label to the host but not the
|
||||
``_no_conf_keyring`` label.
|
||||
|
||||
All OSDs on the host will be scheduled to be removed. You can check
|
||||
progress of the OSD removal operation with the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -109,6 +132,14 @@ All OSDs on the host will be scheduled to be removed. You can check the progress
|
||||
|
||||
See :ref:`cephadm-osd-removal` for more details about OSD removal.
|
||||
|
||||
The ``orch host drain`` command also supports a ``--zap-osd-devices``
|
||||
flag. Setting this flag while draining a host will cause cephadm to zap
|
||||
the devices of the OSDs it is removing as part of the drain process
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host drain *<host>* --zap-osd-devices
|
||||
|
||||
Use the following command to determine whether any daemons are still on the
|
||||
host:
|
||||
|
||||
@ -126,7 +157,7 @@ cluster by running the following command:
|
||||
Offline host removal
|
||||
--------------------
|
||||
|
||||
Even if a host is offline and can not be recovered, it can be removed from the
|
||||
If a host is offline and can not be recovered, it can be removed from the
|
||||
cluster by running a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
@ -180,6 +211,12 @@ The following host labels have a special meaning to cephadm. All start with ``_
|
||||
an existing host that already contains Ceph daemons, it will cause cephadm to move
|
||||
those daemons elsewhere (except OSDs, which are not removed automatically).
|
||||
|
||||
* ``_no_conf_keyring``: *Do not deploy config files or keyrings on this host*.
|
||||
|
||||
This label is effectively the same as ``_no_schedule`` but instead of working for
|
||||
daemons it works for client keyrings and ceph conf files that are being managed
|
||||
by cephadm
|
||||
|
||||
* ``_no_autotune_memory``: *Do not autotune memory on this host*.
|
||||
|
||||
This label will prevent daemon memory from being tuned even when the
|
||||
@ -222,8 +259,8 @@ Rescanning Host Devices
|
||||
=======================
|
||||
|
||||
Some servers and external enclosures may not register device removal or insertion with the
|
||||
kernel. In these scenarios, you'll need to perform a host rescan. A rescan is typically
|
||||
non-disruptive, and can be performed with the following CLI command:
|
||||
kernel. In these scenarios, you'll need to perform a device rescan on the appropriate host.
|
||||
A rescan is typically non-disruptive, and can be performed with the following CLI command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -274,7 +311,10 @@ Setting the initial CRUSH location of host
|
||||
==========================================
|
||||
|
||||
Hosts can contain a ``location`` identifier which will instruct cephadm to
|
||||
create a new CRUSH host located in the specified hierarchy.
|
||||
create a new CRUSH host bucket located in the specified hierarchy.
|
||||
You can specify more than one element of the tree when doing so (for
|
||||
instance if you want to ensure that the rack that a host is being
|
||||
added to is also added to the default bucket), for example:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
@ -282,23 +322,48 @@ create a new CRUSH host located in the specified hierarchy.
|
||||
hostname: node-00
|
||||
addr: 192.168.0.10
|
||||
location:
|
||||
root: default
|
||||
rack: rack1
|
||||
|
||||
.. note::
|
||||
|
||||
The ``location`` attribute will be only affect the initial CRUSH location. Subsequent
|
||||
changes of the ``location`` property will be ignored. Also, removing a host will not remove
|
||||
any CRUSH buckets.
|
||||
The ``location`` attribute will be only affect the initial CRUSH location.
|
||||
Subsequent changes of the ``location`` property will be ignored. Also,
|
||||
removing a host will not remove an associated CRUSH bucket unless the
|
||||
``--rm-crush-entry`` flag is provided to the ``orch host rm`` command
|
||||
|
||||
See also :ref:`crush_map_default_types`.
|
||||
|
||||
Removing a host from the CRUSH map
|
||||
==================================
|
||||
|
||||
The ``ceph orch host rm`` command has support for removing the associated host bucket
|
||||
from the CRUSH map. This is done by providing the ``--rm-crush-entry`` flag.
|
||||
|
||||
.. prompt:: bash [ceph:root@host1/]#
|
||||
|
||||
ceph orch host rm host1 --rm-crush-entry
|
||||
|
||||
When this flag is specified, cephadm will attempt to remove the host bucket
|
||||
from the CRUSH map as part of the host removal process. Note that if
|
||||
it fails to do so, cephadm will report the failure and the host will remain under
|
||||
cephadm control.
|
||||
|
||||
.. note::
|
||||
|
||||
Removal from the CRUSH map will fail if there are OSDs deployed on the
|
||||
host. If you would like to remove all the host's OSDs as well, please start
|
||||
by using the ``ceph orch host drain`` command to do so. Once the OSDs
|
||||
have been removed, then you may direct cephadm remove the CRUSH bucket
|
||||
along with the host using the ``--rm-crush-entry`` flag.
|
||||
|
||||
OS Tuning Profiles
|
||||
==================
|
||||
|
||||
Cephadm can be used to manage operating-system-tuning profiles that apply sets
|
||||
of sysctl settings to sets of hosts.
|
||||
Cephadm can be used to manage operating system tuning profiles that apply
|
||||
``sysctl`` settings to sets of hosts.
|
||||
|
||||
Create a YAML spec file in the following format:
|
||||
To do so, create a YAML spec file in the following format:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
@ -317,18 +382,21 @@ Apply the tuning profile with the following command:
|
||||
|
||||
ceph orch tuned-profile apply -i <tuned-profile-file-name>
|
||||
|
||||
This profile is written to ``/etc/sysctl.d/`` on each host that matches the
|
||||
hosts specified in the placement block of the yaml, and ``sysctl --system`` is
|
||||
This profile is written to a file under ``/etc/sysctl.d/`` on each host
|
||||
specified in the ``placement`` block, then ``sysctl --system`` is
|
||||
run on the host.
|
||||
|
||||
.. note::
|
||||
|
||||
The exact filename that the profile is written to within ``/etc/sysctl.d/``
|
||||
is ``<profile-name>-cephadm-tuned-profile.conf``, where ``<profile-name>`` is
|
||||
the ``profile_name`` setting that you specify in the YAML spec. Because
|
||||
the ``profile_name`` setting that you specify in the YAML spec. We suggest
|
||||
naming these profiles following the usual ``sysctl.d`` `NN-xxxxx` convention. Because
|
||||
sysctl settings are applied in lexicographical order (sorted by the filename
|
||||
in which the setting is specified), you may want to set the ``profile_name``
|
||||
in your spec so that it is applied before or after other conf files.
|
||||
in which the setting is specified), you may want to carefully choose
|
||||
the ``profile_name`` in your spec so that it is applied before or after other
|
||||
conf files. Careful selection ensures that values supplied here override or
|
||||
do not override those in other ``sysctl.d`` files as desired.
|
||||
|
||||
.. note::
|
||||
|
||||
@ -337,7 +405,7 @@ run on the host.
|
||||
|
||||
.. note::
|
||||
|
||||
Applying tuned profiles is idempotent when the ``--no-overwrite`` option is
|
||||
Applying tuning profiles is idempotent when the ``--no-overwrite`` option is
|
||||
passed. Moreover, if the ``--no-overwrite`` option is passed, existing
|
||||
profiles with the same name are not overwritten.
|
||||
|
||||
@ -497,12 +565,28 @@ There are two ways to customize this configuration for your environment:
|
||||
|
||||
We do *not recommend* this approach. The path name must be
|
||||
visible to *any* mgr daemon, and cephadm runs all daemons as
|
||||
containers. That means that the file either need to be placed
|
||||
containers. That means that the file must either be placed
|
||||
inside a customized container image for your deployment, or
|
||||
manually distributed to the mgr data directory
|
||||
(``/var/lib/ceph/<cluster-fsid>/mgr.<id>`` on the host, visible at
|
||||
``/var/lib/ceph/mgr/ceph-<id>`` from inside the container).
|
||||
|
||||
|
||||
Setting up CA signed keys for the cluster
|
||||
-----------------------------------------
|
||||
|
||||
Cephadm also supports using CA signed keys for SSH authentication
|
||||
across cluster nodes. In this setup, instead of needing a private
|
||||
key and public key, we instead need a private key and certificate
|
||||
created by signing that private key with a CA key. For more info
|
||||
on setting up nodes for authentication using a CA signed key, see
|
||||
:ref:`cephadm-bootstrap-ca-signed-keys`. Once you have your private
|
||||
key and signed cert, they can be set up for cephadm to use by running:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config-key set mgr/cephadm/ssh_identity_key -i <private-key-file>
|
||||
ceph config-key set mgr/cephadm/ssh_identity_cert -i <signed-cert-file>
|
||||
|
||||
.. _cephadm-fqdn:
|
||||
|
||||
Fully qualified domain names vs bare host names
|
||||
@ -534,8 +618,8 @@ Note that ``man hostname`` recommends ``hostname`` to return the bare
|
||||
host name:
|
||||
|
||||
The FQDN (Fully Qualified Domain Name) of the system is the
|
||||
name that the resolver(3) returns for the host name, such as,
|
||||
ursula.example.com. It is usually the hostname followed by the DNS
|
||||
name that the resolver(3) returns for the host name, for example
|
||||
``ursula.example.com``. It is usually the short hostname followed by the DNS
|
||||
domain name (the part after the first dot). You can check the FQDN
|
||||
using ``hostname --fqdn`` or the domain name using ``dnsdomainname``.
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
Deploying a new Ceph cluster
|
||||
============================
|
||||
|
||||
Cephadm creates a new Ceph cluster by "bootstrapping" on a single
|
||||
Cephadm creates a new Ceph cluster by bootstrapping a single
|
||||
host, expanding the cluster to encompass any additional hosts, and
|
||||
then deploying the needed services.
|
||||
|
||||
@ -18,7 +18,7 @@ Requirements
|
||||
- Python 3
|
||||
- Systemd
|
||||
- Podman or Docker for running containers
|
||||
- Time synchronization (such as chrony or NTP)
|
||||
- Time synchronization (such as Chrony or the legacy ``ntpd``)
|
||||
- LVM2 for provisioning storage devices
|
||||
|
||||
Any modern Linux distribution should be sufficient. Dependencies
|
||||
@ -41,54 +41,8 @@ There are two ways to install ``cephadm``:
|
||||
#. a :ref:`curl-based installation<cephadm_install_curl>` method
|
||||
#. :ref:`distribution-specific installation methods<cephadm_install_distros>`
|
||||
|
||||
|
||||
.. _cephadm_install_curl:
|
||||
|
||||
curl-based installation
|
||||
-----------------------
|
||||
|
||||
* Use ``curl`` to fetch the most recent version of the
|
||||
standalone script.
|
||||
|
||||
.. prompt:: bash #
|
||||
:substitutions:
|
||||
|
||||
curl --silent --remote-name --location https://github.com/ceph/ceph/raw/|stable-release|/src/cephadm/cephadm
|
||||
|
||||
Make the ``cephadm`` script executable:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
chmod +x cephadm
|
||||
|
||||
This script can be run directly from the current directory:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
./cephadm <arguments...>
|
||||
|
||||
* Although the standalone script is sufficient to get a cluster started, it is
|
||||
convenient to have the ``cephadm`` command installed on the host. To install
|
||||
the packages that provide the ``cephadm`` command, run the following
|
||||
commands:
|
||||
|
||||
.. prompt:: bash #
|
||||
:substitutions:
|
||||
|
||||
./cephadm add-repo --release |stable-release|
|
||||
./cephadm install
|
||||
|
||||
Confirm that ``cephadm`` is now in your PATH by running ``which``:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
which cephadm
|
||||
|
||||
A successful ``which cephadm`` command will return this:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
/usr/sbin/cephadm
|
||||
.. note:: In Quincy, cephadm is not distributed as an executable compiled from
|
||||
source code. That feature was introduced in the Reef release.
|
||||
|
||||
.. _cephadm_install_distros:
|
||||
|
||||
@ -128,6 +82,69 @@ that case, you can install cephadm directly. For example:
|
||||
zypper install -y cephadm
|
||||
|
||||
|
||||
.. _cephadm_install_curl:
|
||||
|
||||
curl-based installation
|
||||
-----------------------
|
||||
|
||||
* First, determine what version of Ceph you wish to install. You can use the releases
|
||||
page to find the `latest active releases <https://docs.ceph.com/en/latest/releases/#active-releases>`_.
|
||||
For example, we might find that ``18.2.1`` is the latest
|
||||
active release.
|
||||
|
||||
* Use ``curl`` to fetch a build of cephadm for that release.
|
||||
|
||||
.. prompt:: bash #
|
||||
:substitutions:
|
||||
|
||||
CEPH_RELEASE=18.2.0 # replace this with the active release
|
||||
curl --silent --remote-name --location https://download.ceph.com/rpm-${CEPH_RELEASE}/el9/noarch/cephadm
|
||||
|
||||
Ensure the ``cephadm`` file is executable:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
chmod +x cephadm
|
||||
|
||||
This file can be run directly from the current directory:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
./cephadm <arguments...>
|
||||
|
||||
* If you encounter any issues with running cephadm due to errors including
|
||||
the message ``bad interpreter``, then you may not have Python or
|
||||
the correct version of Python installed. The cephadm tool requires Python 3.6
|
||||
or later. You can manually run cephadm with a particular version of Python by
|
||||
prefixing the command with your installed Python version. For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
:substitutions:
|
||||
|
||||
python3.8 ./cephadm <arguments...>
|
||||
|
||||
* Although the standalone cephadm is sufficient to bootstrap a cluster, it is
|
||||
best to have the ``cephadm`` command installed on the host. To install
|
||||
the packages that provide the ``cephadm`` command, run the following
|
||||
commands:
|
||||
|
||||
.. prompt:: bash #
|
||||
:substitutions:
|
||||
|
||||
./cephadm add-repo --release |stable-release|
|
||||
./cephadm install
|
||||
|
||||
Confirm that ``cephadm`` is now in your PATH by running ``which``:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
which cephadm
|
||||
|
||||
A successful ``which cephadm`` command will return this:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
/usr/sbin/cephadm
|
||||
|
||||
Bootstrap a new cluster
|
||||
=======================
|
||||
@ -138,7 +155,7 @@ What to know before you bootstrap
|
||||
The first step in creating a new Ceph cluster is running the ``cephadm
|
||||
bootstrap`` command on the Ceph cluster's first host. The act of running the
|
||||
``cephadm bootstrap`` command on the Ceph cluster's first host creates the Ceph
|
||||
cluster's first "monitor daemon", and that monitor daemon needs an IP address.
|
||||
cluster's first Monitor daemon.
|
||||
You must pass the IP address of the Ceph cluster's first host to the ``ceph
|
||||
bootstrap`` command, so you'll need to know the IP address of that host.
|
||||
|
||||
@ -148,6 +165,39 @@ bootstrap`` command, so you'll need to know the IP address of that host.
|
||||
.. note:: If there are multiple networks and interfaces, be sure to choose one
|
||||
that will be accessible by any host accessing the Ceph cluster.
|
||||
|
||||
.. important:: When installing the Quincy release of Ceph, you might see the
|
||||
following error message:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
Deploying ceph-exporter service with default
|
||||
placement... Non-zero exit code 22
|
||||
|
||||
If you see this error message, add the ``--skip-monitoring-stack`` flag to
|
||||
your ``cephadm bootstrap`` command. To do this, run a command of the
|
||||
following form:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
cephadm bootstrap --mon-ip {monitor IP address} --skip-monitoring-stack
|
||||
|
||||
This command should result in a successfully-deployed cluster. Expand the
|
||||
successfully deployed cluster by deploying the monitoring stack without
|
||||
``ceph-exporter`` by running the following commands:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph orch apply prometheus
|
||||
ceph orch apply grafana
|
||||
ceph orch apply node-exporter
|
||||
ceph orch apply alertmanager
|
||||
|
||||
This error occurs because some Quincy releases contain a faulty
|
||||
``ceph-exporter``. After this fault was discovered, ``ceph-exporter`` was
|
||||
removed from Quincy. If your binary of ``cephadm`` is from the release with
|
||||
the fault, you will hit this error.
|
||||
|
||||
|
||||
Running the bootstrap command
|
||||
-----------------------------
|
||||
|
||||
@ -159,23 +209,25 @@ Run the ``ceph bootstrap`` command:
|
||||
|
||||
This command will:
|
||||
|
||||
* Create a monitor and manager daemon for the new cluster on the local
|
||||
* Create a Monitor and a Manager daemon for the new cluster on the local
|
||||
host.
|
||||
* Generate a new SSH key for the Ceph cluster and add it to the root
|
||||
user's ``/root/.ssh/authorized_keys`` file.
|
||||
* Write a copy of the public key to ``/etc/ceph/ceph.pub``.
|
||||
* Write a minimal configuration file to ``/etc/ceph/ceph.conf``. This
|
||||
file is needed to communicate with the new cluster.
|
||||
file is needed to communicate with Ceph daemons.
|
||||
* Write a copy of the ``client.admin`` administrative (privileged!)
|
||||
secret key to ``/etc/ceph/ceph.client.admin.keyring``.
|
||||
* Add the ``_admin`` label to the bootstrap host. By default, any host
|
||||
with this label will (also) get a copy of ``/etc/ceph/ceph.conf`` and
|
||||
``/etc/ceph/ceph.client.admin.keyring``.
|
||||
|
||||
.. _cephadm-bootstrap-further-info:
|
||||
|
||||
Further information about cephadm bootstrap
|
||||
-------------------------------------------
|
||||
|
||||
The default bootstrap behavior will work for most users. But if you'd like
|
||||
The default bootstrap process will work for most users. But if you'd like
|
||||
immediately to know more about ``cephadm bootstrap``, read the list below.
|
||||
|
||||
Also, you can run ``cephadm bootstrap -h`` to see all of ``cephadm``'s
|
||||
@ -186,15 +238,15 @@ available options.
|
||||
journald. If you want Ceph to write traditional log files to ``/var/log/ceph/$fsid``,
|
||||
use the ``--log-to-file`` option during bootstrap.
|
||||
|
||||
* Larger Ceph clusters perform better when (external to the Ceph cluster)
|
||||
* Larger Ceph clusters perform best when (external to the Ceph cluster)
|
||||
public network traffic is separated from (internal to the Ceph cluster)
|
||||
cluster traffic. The internal cluster traffic handles replication, recovery,
|
||||
and heartbeats between OSD daemons. You can define the :ref:`cluster
|
||||
network<cluster-network>` by supplying the ``--cluster-network`` option to the ``bootstrap``
|
||||
subcommand. This parameter must define a subnet in CIDR notation (for example
|
||||
subcommand. This parameter must be a subnet in CIDR notation (for example
|
||||
``10.90.90.0/24`` or ``fe80::/64``).
|
||||
|
||||
* ``cephadm bootstrap`` writes to ``/etc/ceph`` the files needed to access
|
||||
* ``cephadm bootstrap`` writes to ``/etc/ceph`` files needed to access
|
||||
the new cluster. This central location makes it possible for Ceph
|
||||
packages installed on the host (e.g., packages that give access to the
|
||||
cephadm command line interface) to find these files.
|
||||
@ -215,12 +267,12 @@ available options.
|
||||
EOF
|
||||
$ ./cephadm bootstrap --config initial-ceph.conf ...
|
||||
|
||||
* The ``--ssh-user *<user>*`` option makes it possible to choose which SSH
|
||||
* The ``--ssh-user *<user>*`` option makes it possible to designate which SSH
|
||||
user cephadm will use to connect to hosts. The associated SSH key will be
|
||||
added to ``/home/*<user>*/.ssh/authorized_keys``. The user that you
|
||||
designate with this option must have passwordless sudo access.
|
||||
|
||||
* If you are using a container on an authenticated registry that requires
|
||||
* If you are using a container image from a registry that requires
|
||||
login, you may add the argument:
|
||||
|
||||
* ``--registry-json <path to json file>``
|
||||
@ -231,7 +283,7 @@ available options.
|
||||
|
||||
Cephadm will attempt to log in to this registry so it can pull your container
|
||||
and then store the login info in its config database. Other hosts added to
|
||||
the cluster will then also be able to make use of the authenticated registry.
|
||||
the cluster will then also be able to make use of the authenticated container registry.
|
||||
|
||||
* See :ref:`cephadm-deployment-scenarios` for additional examples for using ``cephadm bootstrap``.
|
||||
|
||||
@ -296,7 +348,7 @@ Add all hosts to the cluster by following the instructions in
|
||||
|
||||
By default, a ``ceph.conf`` file and a copy of the ``client.admin`` keyring are
|
||||
maintained in ``/etc/ceph`` on all hosts that have the ``_admin`` label. This
|
||||
label is initially applied only to the bootstrap host. We usually recommend
|
||||
label is initially applied only to the bootstrap host. We recommend
|
||||
that one or more other hosts be given the ``_admin`` label so that the Ceph CLI
|
||||
(for example, via ``cephadm shell``) is easily accessible on multiple hosts. To add
|
||||
the ``_admin`` label to additional host(s), run a command of the following form:
|
||||
@ -309,9 +361,10 @@ the ``_admin`` label to additional host(s), run a command of the following form:
|
||||
Adding additional MONs
|
||||
======================
|
||||
|
||||
A typical Ceph cluster has three or five monitor daemons spread
|
||||
A typical Ceph cluster has three or five Monitor daemons spread
|
||||
across different hosts. We recommend deploying five
|
||||
monitors if there are five or more nodes in your cluster.
|
||||
Monitors if there are five or more nodes in your cluster. Most clusters do not
|
||||
benefit from seven or more Monitors.
|
||||
|
||||
Please follow :ref:`deploy_additional_monitors` to deploy additional MONs.
|
||||
|
||||
@ -336,12 +389,12 @@ See :ref:`osd_autotune`.
|
||||
|
||||
To deploy hyperconverged Ceph with TripleO, please refer to the TripleO documentation: `Scenario: Deploy Hyperconverged Ceph <https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/features/cephadm.html#scenario-deploy-hyperconverged-ceph>`_
|
||||
|
||||
In other cases where the cluster hardware is not exclusively used by Ceph (hyperconverged),
|
||||
In other cases where the cluster hardware is not exclusively used by Ceph (converged infrastructure),
|
||||
reduce the memory consumption of Ceph like so:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
# hyperconverged only:
|
||||
# converged only:
|
||||
ceph config set mgr mgr/cephadm/autotune_memory_target_ratio 0.2
|
||||
|
||||
Then enable memory autotuning:
|
||||
@ -370,9 +423,11 @@ Different deployment scenarios
|
||||
Single host
|
||||
-----------
|
||||
|
||||
To configure a Ceph cluster to run on a single host, use the
|
||||
``--single-host-defaults`` flag when bootstrapping. For use cases of this, see
|
||||
:ref:`one-node-cluster`.
|
||||
To deploy a Ceph cluster running on a single host, use the
|
||||
``--single-host-defaults`` flag when bootstrapping. For use cases, see
|
||||
:ref:`one-node-cluster`. Such clusters are generally not suitable for
|
||||
production.
|
||||
|
||||
|
||||
The ``--single-host-defaults`` flag sets the following configuration options::
|
||||
|
||||
@ -389,8 +444,8 @@ Deployment in an isolated environment
|
||||
-------------------------------------
|
||||
|
||||
You might need to install cephadm in an environment that is not connected
|
||||
directly to the internet (such an environment is also called an "isolated
|
||||
environment"). This can be done if a custom container registry is used. Either
|
||||
directly to the Internet (an "isolated" or "airgapped"
|
||||
environment). This requires the use of a custom container registry. Either
|
||||
of two kinds of custom container registry can be used in this scenario: (1) a
|
||||
Podman-based or Docker-based insecure registry, or (2) a secure registry.
|
||||
|
||||
@ -445,3 +500,104 @@ have access to all hosts that you plan to add to the cluster.
|
||||
cephadm --image *<hostname>*:5000/ceph/ceph bootstrap --mon-ip *<mon-ip>*
|
||||
|
||||
.. _cluster network: ../rados/configuration/network-config-ref#cluster-network
|
||||
|
||||
.. _cephadm-bootstrap-custom-ssh-keys:
|
||||
|
||||
Deployment with custom SSH keys
|
||||
-------------------------------
|
||||
|
||||
Bootstrap allows users to create their own private/public SSH key pair
|
||||
rather than having cephadm generate them automatically.
|
||||
|
||||
To use custom SSH keys, pass the ``--ssh-private-key`` and ``--ssh-public-key``
|
||||
fields to bootstrap. Both parameters require a path to the file where the
|
||||
keys are stored:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm bootstrap --mon-ip <ip-addr> --ssh-private-key <private-key-filepath> --ssh-public-key <public-key-filepath>
|
||||
|
||||
This setup allows users to use a key that has already been distributed to hosts
|
||||
the user wants in the cluster before bootstrap.
|
||||
|
||||
.. note:: In order for cephadm to connect to other hosts you'd like to add
|
||||
to the cluster, make sure the public key of the key pair provided is set up
|
||||
as an authorized key for the ssh user being used, typically root. If you'd
|
||||
like more info on using a non-root user as the ssh user, see :ref:`cephadm-bootstrap-further-info`
|
||||
|
||||
.. _cephadm-bootstrap-ca-signed-keys:
|
||||
|
||||
Deployment with CA signed SSH keys
|
||||
----------------------------------
|
||||
|
||||
As an alternative to standard public key authentication, cephadm also supports
|
||||
deployment using CA signed keys. Before bootstrapping it's recommended to set up
|
||||
the CA public key as a trusted CA key on hosts you'd like to eventually add to
|
||||
the cluster. For example:
|
||||
|
||||
.. prompt:: bash
|
||||
|
||||
# we will act as our own CA, therefore we'll need to make a CA key
|
||||
[root@host1 ~]# ssh-keygen -t rsa -f ca-key -N ""
|
||||
|
||||
# make the ca key trusted on the host we've generated it on
|
||||
# this requires adding in a line in our /etc/sshd_config
|
||||
# to mark this key as trusted
|
||||
[root@host1 ~]# cp ca-key.pub /etc/ssh
|
||||
[root@host1 ~]# vi /etc/ssh/sshd_config
|
||||
[root@host1 ~]# cat /etc/ssh/sshd_config | grep ca-key
|
||||
TrustedUserCAKeys /etc/ssh/ca-key.pub
|
||||
# now restart sshd so it picks up the config change
|
||||
[root@host1 ~]# systemctl restart sshd
|
||||
|
||||
# now, on all other hosts we want in the cluster, also install the CA key
|
||||
[root@host1 ~]# scp /etc/ssh/ca-key.pub host2:/etc/ssh/
|
||||
|
||||
# on other hosts, make the same changes to the sshd_config
|
||||
[root@host2 ~]# vi /etc/ssh/sshd_config
|
||||
[root@host2 ~]# cat /etc/ssh/sshd_config | grep ca-key
|
||||
TrustedUserCAKeys /etc/ssh/ca-key.pub
|
||||
# and restart sshd so it picks up the config change
|
||||
[root@host2 ~]# systemctl restart sshd
|
||||
|
||||
Once the CA key has been installed and marked as a trusted key, you are ready
|
||||
to use a private key/CA signed cert combination for SSH. Continuing with our
|
||||
current example, we will create a new key-pair for for host access and then
|
||||
sign it with our CA key
|
||||
|
||||
.. prompt:: bash
|
||||
|
||||
# make a new key pair
|
||||
[root@host1 ~]# ssh-keygen -t rsa -f cephadm-ssh-key -N ""
|
||||
# sign the private key. This will create a new cephadm-ssh-key-cert.pub
|
||||
# note here we're using user "root". If you'd like to use a non-root
|
||||
# user the arguments to the -I and -n params would need to be adjusted
|
||||
# Additionally, note the -V param indicates how long until the cert
|
||||
# this creates will expire
|
||||
[root@host1 ~]# ssh-keygen -s ca-key -I user_root -n root -V +52w cephadm-ssh-key
|
||||
[root@host1 ~]# ls
|
||||
ca-key ca-key.pub cephadm-ssh-key cephadm-ssh-key-cert.pub cephadm-ssh-key.pub
|
||||
|
||||
# verify our signed key is working. To do this, make sure the generated private
|
||||
# key ("cephadm-ssh-key" in our example) and the newly signed cert are stored
|
||||
# in the same directory. Then try to ssh using the private key
|
||||
[root@host1 ~]# ssh -i cephadm-ssh-key host2
|
||||
|
||||
Once you have your private key and corresponding CA signed cert and have tested
|
||||
SSH authentication using that key works, you can pass those keys to bootstrap
|
||||
in order to have cephadm use them for SSHing between cluster hosts
|
||||
|
||||
.. prompt:: bash
|
||||
|
||||
[root@host1 ~]# cephadm bootstrap --mon-ip <ip-addr> --ssh-private-key cephadm-ssh-key --ssh-signed-cert cephadm-ssh-key-cert.pub
|
||||
|
||||
Note that this setup does not require installing the corresponding public key
|
||||
from the private key passed to bootstrap on other nodes. In fact, cephadm will
|
||||
reject the ``--ssh-public-key`` argument when passed along with ``--ssh-signed-cert``.
|
||||
This is not because having the public key breaks anything, but rather because it is not at all needed
|
||||
and helps the bootstrap command differentiate if the user wants the CA signed
|
||||
keys setup or standard pubkey encryption. What this means is that SSH key rotation
|
||||
would simply be a matter of getting another key signed by the same CA and providing
|
||||
cephadm with the new private key and signed cert. No additional distribution of
|
||||
keys to cluster nodes is needed after the initial setup of the CA key as a trusted key,
|
||||
no matter how many new private key/signed cert pairs are rotated in.
|
||||
|
@ -328,15 +328,15 @@ You can disable this health warning by running the following command:
|
||||
|
||||
Cluster Configuration Checks
|
||||
----------------------------
|
||||
Cephadm periodically scans each of the hosts in the cluster in order
|
||||
to understand the state of the OS, disks, NICs etc. These facts can
|
||||
then be analysed for consistency across the hosts in the cluster to
|
||||
Cephadm periodically scans each host in the cluster in order
|
||||
to understand the state of the OS, disks, network interfacess etc. This information can
|
||||
then be analyzed for consistency across the hosts in the cluster to
|
||||
identify any configuration anomalies.
|
||||
|
||||
Enabling Cluster Configuration Checks
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The configuration checks are an **optional** feature, and are enabled
|
||||
These configuration checks are an **optional** feature, and are enabled
|
||||
by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
@ -346,7 +346,7 @@ by running the following command:
|
||||
States Returned by Cluster Configuration Checks
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The configuration checks are triggered after each host scan (1m). The
|
||||
Configuration checks are triggered after each host scan. The
|
||||
cephadm log entries will show the current state and outcome of the
|
||||
configuration checks as follows:
|
||||
|
||||
@ -383,14 +383,14 @@ To list all the configuration checks and their current states, run the following
|
||||
# ceph cephadm config-check ls
|
||||
|
||||
NAME HEALTHCHECK STATUS DESCRIPTION
|
||||
kernel_security CEPHADM_CHECK_KERNEL_LSM enabled checks SELINUX/Apparmor profiles are consistent across cluster hosts
|
||||
os_subscription CEPHADM_CHECK_SUBSCRIPTION enabled checks subscription states are consistent for all cluster hosts
|
||||
public_network CEPHADM_CHECK_PUBLIC_MEMBERSHIP enabled check that all hosts have a NIC on the Ceph public_network
|
||||
kernel_security CEPHADM_CHECK_KERNEL_LSM enabled check that SELINUX/Apparmor profiles are consistent across cluster hosts
|
||||
os_subscription CEPHADM_CHECK_SUBSCRIPTION enabled check that subscription states are consistent for all cluster hosts
|
||||
public_network CEPHADM_CHECK_PUBLIC_MEMBERSHIP enabled check that all hosts have a network interface on the Ceph public_network
|
||||
osd_mtu_size CEPHADM_CHECK_MTU enabled check that OSD hosts share a common MTU setting
|
||||
osd_linkspeed CEPHADM_CHECK_LINKSPEED enabled check that OSD hosts share a common linkspeed
|
||||
network_missing CEPHADM_CHECK_NETWORK_MISSING enabled checks that the cluster/public networks defined exist on the Ceph hosts
|
||||
ceph_release CEPHADM_CHECK_CEPH_RELEASE enabled check for Ceph version consistency - ceph daemons should be on the same release (unless upgrade is active)
|
||||
kernel_version CEPHADM_CHECK_KERNEL_VERSION enabled checks that the MAJ.MIN of the kernel on Ceph hosts is consistent
|
||||
osd_linkspeed CEPHADM_CHECK_LINKSPEED enabled check that OSD hosts share a common network link speed
|
||||
network_missing CEPHADM_CHECK_NETWORK_MISSING enabled check that the cluster/public networks as defined exist on the Ceph hosts
|
||||
ceph_release CEPHADM_CHECK_CEPH_RELEASE enabled check for Ceph version consistency: all Ceph daemons should be the same release unless upgrade is in progress
|
||||
kernel_version CEPHADM_CHECK_KERNEL_VERSION enabled checks that the maj.min version of the kernel is consistent across Ceph hosts
|
||||
|
||||
The name of each configuration check can be used to enable or disable a specific check by running a command of the following form:
|
||||
:
|
||||
@ -414,31 +414,31 @@ flagged as an anomaly and a healtcheck (WARNING) state raised.
|
||||
|
||||
CEPHADM_CHECK_SUBSCRIPTION
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
This check relates to the status of vendor subscription. This check is
|
||||
performed only for hosts using RHEL, but helps to confirm that all hosts are
|
||||
This check relates to the status of OS vendor subscription. This check is
|
||||
performed only for hosts using RHEL and helps to confirm that all hosts are
|
||||
covered by an active subscription, which ensures that patches and updates are
|
||||
available.
|
||||
|
||||
CEPHADM_CHECK_PUBLIC_MEMBERSHIP
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
All members of the cluster should have NICs configured on at least one of the
|
||||
All members of the cluster should have a network interface configured on at least one of the
|
||||
public network subnets. Hosts that are not on the public network will rely on
|
||||
routing, which may affect performance.
|
||||
|
||||
CEPHADM_CHECK_MTU
|
||||
~~~~~~~~~~~~~~~~~
|
||||
The MTU of the NICs on OSDs can be a key factor in consistent performance. This
|
||||
The MTU of the network interfaces on OSD hosts can be a key factor in consistent performance. This
|
||||
check examines hosts that are running OSD services to ensure that the MTU is
|
||||
configured consistently within the cluster. This is determined by establishing
|
||||
configured consistently within the cluster. This is determined by determining
|
||||
the MTU setting that the majority of hosts is using. Any anomalies result in a
|
||||
Ceph health check.
|
||||
health check.
|
||||
|
||||
CEPHADM_CHECK_LINKSPEED
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
This check is similar to the MTU check. Linkspeed consistency is a factor in
|
||||
consistent cluster performance, just as the MTU of the NICs on the OSDs is.
|
||||
This check determines the linkspeed shared by the majority of OSD hosts, and a
|
||||
health check is run for any hosts that are set at a lower linkspeed rate.
|
||||
This check is similar to the MTU check. Link speed consistency is a factor in
|
||||
consistent cluster performance, as is the MTU of the OSD node network interfaces.
|
||||
This check determines the link speed shared by the majority of OSD hosts, and a
|
||||
health check is run for any hosts that are set at a lower link speed rate.
|
||||
|
||||
CEPHADM_CHECK_NETWORK_MISSING
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -448,15 +448,14 @@ a health check is raised.
|
||||
|
||||
CEPHADM_CHECK_CEPH_RELEASE
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Under normal operations, the Ceph cluster runs daemons under the same ceph
|
||||
release (that is, the Ceph cluster runs all daemons under (for example)
|
||||
Octopus). This check determines the active release for each daemon, and
|
||||
Under normal operations, the Ceph cluster runs daemons that are of the same Ceph
|
||||
release (for example, Reef). This check determines the active release for each daemon, and
|
||||
reports any anomalies as a healthcheck. *This check is bypassed if an upgrade
|
||||
process is active within the cluster.*
|
||||
is in process.*
|
||||
|
||||
CEPHADM_CHECK_KERNEL_VERSION
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
The OS kernel version (maj.min) is checked for consistency across the hosts.
|
||||
The OS kernel version (maj.min) is checked for consistency across hosts.
|
||||
The kernel version of the majority of the hosts is used as the basis for
|
||||
identifying anomalies.
|
||||
|
||||
@ -533,6 +532,13 @@ The resulting keyring file is:
|
||||
|
||||
-rw-r-----. 1 qemu qemu 156 Apr 21 08:47 /etc/ceph/client.client.rbd.keyring
|
||||
|
||||
By default, cephadm will also manage ``/etc/ceph/ceph.conf`` on hosts where it writes the keyrings.
|
||||
This feature can be suppressed by passing ``--no-ceph-conf`` when setting the keyring.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch client-keyring set client.foo label:foo 0:0 --no-ceph-conf
|
||||
|
||||
Disabling Management of a Keyring File
|
||||
--------------------------------------
|
||||
|
||||
|
@ -347,7 +347,9 @@ Or in YAML:
|
||||
Placement by pattern matching
|
||||
-----------------------------
|
||||
|
||||
Daemons can be placed on hosts as well:
|
||||
Daemons can be placed on hosts using a host pattern as well.
|
||||
By default, the host pattern is matched using fnmatch which supports
|
||||
UNIX shell-style wildcards (see https://docs.python.org/3/library/fnmatch.html):
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -375,6 +377,26 @@ Or in YAML:
|
||||
placement:
|
||||
host_pattern: "*"
|
||||
|
||||
The host pattern also has support for using a regex. To use a regex, you
|
||||
must either add "regex: " to the start of the pattern when using the
|
||||
command line, or specify a ``pattern_type`` field to be "regex"
|
||||
when using YAML.
|
||||
|
||||
On the command line:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply prometheus --placement='regex:FOO[0-9]|BAR[0-9]'
|
||||
|
||||
In YAML:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
service_type: prometheus
|
||||
placement:
|
||||
host_pattern:
|
||||
pattern: 'FOO[0-9]|BAR[0-9]'
|
||||
pattern_type: regex
|
||||
|
||||
Changing the number of daemons
|
||||
------------------------------
|
||||
|
@ -125,65 +125,101 @@ example spec file:
|
||||
spec:
|
||||
port: 4200
|
||||
|
||||
.. _cephadm_default_images:
|
||||
|
||||
Default images
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
|
||||
``cephadm`` stores a local copy of the ``cephadm`` binary in
|
||||
``var/lib/ceph/{FSID}/cephadm.{DIGEST}``, where ``{DIGEST}`` is an alphanumeric
|
||||
string representing the currently-running version of Ceph.
|
||||
|
||||
To see the default container images, run a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
grep -E "_IMAGE =" /var/lib/ceph/{FSID}/cephadm.{DIGEST}
|
||||
|
||||
::
|
||||
|
||||
DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.51.0'
|
||||
DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.9.5'
|
||||
DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.9.5'
|
||||
DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.7.0'
|
||||
DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.27.0'
|
||||
DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/grafana:10.4.0'
|
||||
|
||||
Default monitoring images are specified in
|
||||
``/src/cephadm/cephadmlib/constants.py`` and in
|
||||
``/src/pybind/mgr/cephadm/module.py``.
|
||||
|
||||
*The information in this section was developed by Eugen Block in a thread on
|
||||
the [ceph-users] mailing list in April of 2024. The thread can be viewed here:*
|
||||
`[ceph-users] discussion about default monitoring images
|
||||
<https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/thread/QGC66QIFBKRTPZAQMQEYFXOGZJ7RLWBN/>`_.
|
||||
|
||||
.. _cephadm_monitoring-images:
|
||||
|
||||
Using custom images
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It is possible to install or upgrade monitoring components based on other
|
||||
images. To do so, the name of the image to be used needs to be stored in the
|
||||
configuration first. The following configuration options are available.
|
||||
images. The ID of the image that you plan to use must be stored in the
|
||||
configuration. The following configuration options are available:
|
||||
|
||||
- ``container_image_prometheus``
|
||||
- ``container_image_grafana``
|
||||
- ``container_image_alertmanager``
|
||||
- ``container_image_node_exporter``
|
||||
|
||||
Custom images can be set with the ``ceph config`` command
|
||||
Custom images can be set with the ``ceph config`` command. To set custom images, run a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
.. code-block:: bash
|
||||
ceph config set mgr mgr/cephadm/<option_name> <value>
|
||||
|
||||
ceph config set mgr mgr/cephadm/<option_name> <value>
|
||||
|
||||
For example
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1
|
||||
|
||||
If there were already running monitoring stack daemon(s) of the type whose
|
||||
image you've changed, you must redeploy the daemon(s) in order to have them
|
||||
actually use the new image.
|
||||
|
||||
For example, if you had changed the prometheus image
|
||||
For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch redeploy prometheus
|
||||
ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1
|
||||
|
||||
If you were already running monitoring stack daemon(s) of the same image type
|
||||
that you changed, then you must redeploy the daemon(s) in order to make them
|
||||
use the new image.
|
||||
|
||||
For example, if you changed the Prometheus image, you would have to run the
|
||||
following command in order to pick up the changes:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch redeploy prometheus
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
By setting a custom image, the default value will be overridden (but not
|
||||
overwritten). The default value changes when updates become available.
|
||||
By setting a custom image, you will not be able to update the component
|
||||
you have set the custom image for automatically. You will need to
|
||||
manually update the configuration (image name and tag) to be able to
|
||||
install updates.
|
||||
overwritten). The default value will change when an update becomes
|
||||
available. If you set a custom image, you will not be able automatically
|
||||
to update the component you have modified with the custom image. You will
|
||||
need to manually update the configuration (that includes the image name
|
||||
and the tag) to be able to install updates.
|
||||
|
||||
If you choose to go with the recommendations instead, you can reset the
|
||||
custom image you have set before. After that, the default value will be
|
||||
used again. Use ``ceph config rm`` to reset the configuration option
|
||||
If you choose to accept the recommendations, you can reset the custom
|
||||
image that you have set before. If you do this, the default value will be
|
||||
used again. Use ``ceph config rm`` to reset the configuration option, in
|
||||
a command of the following form:
|
||||
|
||||
.. code-block:: bash
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config rm mgr mgr/cephadm/<option_name>
|
||||
ceph config rm mgr mgr/cephadm/<option_name>
|
||||
|
||||
For example
|
||||
For example:
|
||||
|
||||
.. code-block:: bash
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config rm mgr mgr/cephadm/container_image_prometheus
|
||||
ceph config rm mgr mgr/cephadm/container_image_prometheus
|
||||
|
||||
See also :ref:`cephadm-airgap`.
|
||||
|
||||
@ -203,7 +239,7 @@ definition and management of the embedded Prometheus service. The endpoint liste
|
||||
``https://<mgr-ip>:8765/sd/`` (the port is
|
||||
configurable through the variable ``service_discovery_port``) and returns scrape target
|
||||
information in `http_sd_config format
|
||||
<https://prometheus.io/docs/prometheus/latest/configuration/configuration/#http_sd_config/>`_
|
||||
<https://prometheus.io/docs/prometheus/latest/configuration/configuration/#http_sd_config>`_
|
||||
|
||||
Customers with external monitoring stack can use `ceph-mgr` service discovery endpoint
|
||||
to get scraping configuration. Root certificate of the server can be obtained by the
|
||||
@ -316,9 +352,9 @@ Here's an example prometheus job definition that uses the cephadm service discov
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
- job_name: 'ceph-exporter'
|
||||
- job_name: 'haproxy'
|
||||
http_sd_configs:
|
||||
- url: http://<mgr-ip>:8765/sd/prometheus/sd-config?service=ceph-exporter
|
||||
- url: http://<mgr-ip>:8765/sd/prometheus/sd-config?service=haproxy
|
||||
|
||||
|
||||
* To enable the dashboard's prometheus-based alerting, see :ref:`dashboard-alerting`.
|
||||
|
@ -15,7 +15,7 @@ Deploying NFS ganesha
|
||||
=====================
|
||||
|
||||
Cephadm deploys NFS Ganesha daemon (or set of daemons). The configuration for
|
||||
NFS is stored in the ``nfs-ganesha`` pool and exports are managed via the
|
||||
NFS is stored in the ``.nfs`` pool and exports are managed via the
|
||||
``ceph nfs export ...`` commands and via the dashboard.
|
||||
|
||||
To deploy a NFS Ganesha gateway, run the following command:
|
||||
|
@ -1,7 +1,6 @@
|
||||
***********
|
||||
OSD Service
|
||||
***********
|
||||
.. _device management: ../rados/operations/devices
|
||||
.. _libstoragemgmt: https://github.com/libstorage/libstoragemgmt
|
||||
|
||||
List Devices
|
||||
@ -15,10 +14,9 @@ To print a list of devices discovered by ``cephadm``, run this command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch device ls [--hostname=...] [--wide] [--refresh]
|
||||
ceph orch device ls [--hostname=...] [--wide] [--refresh]
|
||||
|
||||
Example
|
||||
::
|
||||
Example::
|
||||
|
||||
Hostname Path Type Serial Size Health Ident Fault Available
|
||||
srv-01 /dev/sdb hdd 15P0A0YFFRD6 300G Unknown N/A N/A No
|
||||
@ -44,7 +42,7 @@ enable cephadm's "enhanced device scan" option as follows;
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/device_enhanced_scan true
|
||||
ceph config set mgr mgr/cephadm/device_enhanced_scan true
|
||||
|
||||
.. warning::
|
||||
Although the libstoragemgmt library performs standard SCSI inquiry calls,
|
||||
@ -80,12 +78,45 @@ like this:
|
||||
|
||||
In this example, libstoragemgmt has confirmed the health of the drives and the ability to
|
||||
interact with the Identification and Fault LEDs on the drive enclosures. For further
|
||||
information about interacting with these LEDs, refer to `device management`_.
|
||||
information about interacting with these LEDs, refer to :ref:`devices`.
|
||||
|
||||
.. note::
|
||||
The current release of `libstoragemgmt`_ (1.8.8) supports SCSI, SAS, and SATA based
|
||||
local disks only. There is no official support for NVMe devices (PCIe)
|
||||
|
||||
Retrieve Exact Size of Block Devices
|
||||
====================================
|
||||
|
||||
Run a command of the following form to discover the exact size of a block
|
||||
device. The value returned here is used by the orchestrator when comparing high
|
||||
and low values:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm shell ceph-volume inventory </dev/sda> --format json | jq .sys_api.human_readable_size
|
||||
|
||||
The exact size in GB is the size reported in TB, multiplied by 1000.
|
||||
|
||||
Example
|
||||
-------
|
||||
The following provides a specific example of this command based upon the
|
||||
general form of the command above:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm shell ceph-volume inventory /dev/sdc --format json | jq .sys_api.human_readable_size
|
||||
|
||||
::
|
||||
|
||||
"3.64 TB"
|
||||
|
||||
This means that the exact device size is 3.64 * 1000, or 3640GB.
|
||||
|
||||
This procedure was developed by Frédéric Nass. See `this thread on the
|
||||
[ceph-users] mailing list
|
||||
<https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/message/5BAAYFCQAZZDRSNCUPCVBNEPGJDARRZA/>`_
|
||||
for discussion of this matter.
|
||||
|
||||
.. _cephadm-deploy-osds:
|
||||
|
||||
Deploy OSDs
|
||||
@ -161,16 +192,16 @@ will happen without actually creating the OSDs.
|
||||
|
||||
For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply osd --all-available-devices --dry-run
|
||||
ceph orch apply osd --all-available-devices --dry-run
|
||||
|
||||
::
|
||||
::
|
||||
|
||||
NAME HOST DATA DB WAL
|
||||
all-available-devices node1 /dev/vdb - -
|
||||
all-available-devices node2 /dev/vdc - -
|
||||
all-available-devices node3 /dev/vdd - -
|
||||
NAME HOST DATA DB WAL
|
||||
all-available-devices node1 /dev/vdb - -
|
||||
all-available-devices node2 /dev/vdc - -
|
||||
all-available-devices node3 /dev/vdd - -
|
||||
|
||||
.. _cephadm-osd-declarative:
|
||||
|
||||
@ -185,9 +216,9 @@ command completes will be automatically found and added to the cluster.
|
||||
|
||||
We will examine the effects of the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply osd --all-available-devices
|
||||
ceph orch apply osd --all-available-devices
|
||||
|
||||
After running the above command:
|
||||
|
||||
@ -203,17 +234,17 @@ If you want to avoid this behavior (disable automatic creation of OSD on availab
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply osd --all-available-devices --unmanaged=true
|
||||
ceph orch apply osd --all-available-devices --unmanaged=true
|
||||
|
||||
.. note::
|
||||
|
||||
Keep these three facts in mind:
|
||||
Keep these three facts in mind:
|
||||
|
||||
- The default behavior of ``ceph orch apply`` causes cephadm constantly to reconcile. This means that cephadm creates OSDs as soon as new drives are detected.
|
||||
- The default behavior of ``ceph orch apply`` causes cephadm constantly to reconcile. This means that cephadm creates OSDs as soon as new drives are detected.
|
||||
|
||||
- Setting ``unmanaged: True`` disables the creation of OSDs. If ``unmanaged: True`` is set, nothing will happen even if you apply a new OSD service.
|
||||
- Setting ``unmanaged: True`` disables the creation of OSDs. If ``unmanaged: True`` is set, nothing will happen even if you apply a new OSD service.
|
||||
|
||||
- ``ceph orch daemon add`` creates OSDs, but does not add an OSD service.
|
||||
- ``ceph orch daemon add`` creates OSDs, but does not add an OSD service.
|
||||
|
||||
* For cephadm, see also :ref:`cephadm-spec-unmanaged`.
|
||||
|
||||
@ -224,7 +255,7 @@ Remove an OSD
|
||||
|
||||
Removing an OSD from a cluster involves two steps:
|
||||
|
||||
#. evacuating all placement groups (PGs) from the cluster
|
||||
#. evacuating all placement groups (PGs) from the OSD
|
||||
#. removing the PG-free OSD from the cluster
|
||||
|
||||
The following command performs these two steps:
|
||||
@ -241,7 +272,7 @@ Example:
|
||||
|
||||
Expected output::
|
||||
|
||||
Scheduled OSD(s) for removal
|
||||
Scheduled OSD(s) for removal
|
||||
|
||||
OSDs that are not safe to destroy will be rejected.
|
||||
|
||||
@ -264,14 +295,14 @@ You can query the state of OSD operation with the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch osd rm status
|
||||
ceph orch osd rm status
|
||||
|
||||
Expected output::
|
||||
|
||||
OSD_ID HOST STATE PG_COUNT REPLACE FORCE STARTED_AT
|
||||
2 cephadm-dev done, waiting for purge 0 True False 2020-07-17 13:01:43.147684
|
||||
3 cephadm-dev draining 17 False True 2020-07-17 13:01:45.162158
|
||||
4 cephadm-dev started 42 False True 2020-07-17 13:01:45.162158
|
||||
OSD_ID HOST STATE PG_COUNT REPLACE FORCE STARTED_AT
|
||||
2 cephadm-dev done, waiting for purge 0 True False 2020-07-17 13:01:43.147684
|
||||
3 cephadm-dev draining 17 False True 2020-07-17 13:01:45.162158
|
||||
4 cephadm-dev started 42 False True 2020-07-17 13:01:45.162158
|
||||
|
||||
|
||||
When no PGs are left on the OSD, it will be decommissioned and removed from the cluster.
|
||||
@ -293,11 +324,11 @@ Example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch osd rm stop 4
|
||||
ceph orch osd rm stop 4
|
||||
|
||||
Expected output::
|
||||
|
||||
Stopped OSD(s) removal
|
||||
Stopped OSD(s) removal
|
||||
|
||||
This resets the initial state of the OSD and takes it off the removal queue.
|
||||
|
||||
@ -318,7 +349,7 @@ Example:
|
||||
|
||||
Expected output::
|
||||
|
||||
Scheduled OSD(s) for replacement
|
||||
Scheduled OSD(s) for replacement
|
||||
|
||||
This follows the same procedure as the procedure in the "Remove OSD" section, with
|
||||
one exception: the OSD is not permanently removed from the CRUSH hierarchy, but is
|
||||
@ -425,10 +456,10 @@ the ``ceph orch ps`` output in the ``MEM LIMIT`` column::
|
||||
To exclude an OSD from memory autotuning, disable the autotune option
|
||||
for that OSD and also set a specific memory target. For example,
|
||||
|
||||
.. prompt:: bash #
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set osd.123 osd_memory_target_autotune false
|
||||
ceph config set osd.123 osd_memory_target 16G
|
||||
ceph config set osd.123 osd_memory_target_autotune false
|
||||
ceph config set osd.123 osd_memory_target 16G
|
||||
|
||||
|
||||
.. _drivegroups:
|
||||
@ -491,7 +522,7 @@ Example
|
||||
|
||||
.. prompt:: bash [monitor.1]#
|
||||
|
||||
ceph orch apply -i /path/to/osd_spec.yml --dry-run
|
||||
ceph orch apply -i /path/to/osd_spec.yml --dry-run
|
||||
|
||||
|
||||
|
||||
@ -501,9 +532,9 @@ Filters
|
||||
-------
|
||||
|
||||
.. note::
|
||||
Filters are applied using an `AND` gate by default. This means that a drive
|
||||
must fulfill all filter criteria in order to get selected. This behavior can
|
||||
be adjusted by setting ``filter_logic: OR`` in the OSD specification.
|
||||
Filters are applied using an `AND` gate by default. This means that a drive
|
||||
must fulfill all filter criteria in order to get selected. This behavior can
|
||||
be adjusted by setting ``filter_logic: OR`` in the OSD specification.
|
||||
|
||||
Filters are used to assign disks to groups, using their attributes to group
|
||||
them.
|
||||
@ -513,7 +544,7 @@ information about the attributes with this command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
ceph-volume inventory </path/to/disk>
|
||||
ceph-volume inventory </path/to/disk>
|
||||
|
||||
Vendor or Model
|
||||
^^^^^^^^^^^^^^^
|
||||
@ -622,9 +653,9 @@ but want to use only the first two, you could use `limit`:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
data_devices:
|
||||
vendor: VendorA
|
||||
limit: 2
|
||||
data_devices:
|
||||
vendor: VendorA
|
||||
limit: 2
|
||||
|
||||
.. note:: `limit` is a last resort and shouldn't be used if it can be avoided.
|
||||
|
||||
@ -845,8 +876,8 @@ See :ref:`orchestrator-cli-placement-spec`
|
||||
|
||||
.. note::
|
||||
|
||||
Assuming each host has a unique disk layout, each OSD
|
||||
spec needs to have a different service id
|
||||
Assuming each host has a unique disk layout, each OSD
|
||||
spec needs to have a different service id
|
||||
|
||||
|
||||
Dedicated wal + db
|
||||
@ -976,7 +1007,7 @@ activates all existing OSDs on a host.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph cephadm osd activate <host>...
|
||||
ceph cephadm osd activate <host>...
|
||||
|
||||
This will scan all existing disks for OSDs and deploy corresponding daemons.
|
||||
|
||||
|
@ -74,6 +74,33 @@ example spec file:
|
||||
spec:
|
||||
rgw_frontend_port: 8080
|
||||
|
||||
Passing Frontend Extra Arguments
|
||||
--------------------------------
|
||||
|
||||
The RGW service specification can be used to pass extra arguments to the rgw frontend by using
|
||||
the `rgw_frontend_extra_args` arguments list.
|
||||
|
||||
example spec file:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
service_type: rgw
|
||||
service_id: foo
|
||||
placement:
|
||||
label: rgw
|
||||
count_per_host: 2
|
||||
spec:
|
||||
rgw_realm: myrealm
|
||||
rgw_zone: myzone
|
||||
rgw_frontend_type: "beast"
|
||||
rgw_frontend_port: 5000
|
||||
rgw_frontend_extra_args:
|
||||
- "tcp_nodelay=1"
|
||||
- "max_header_size=65536"
|
||||
|
||||
.. note:: cephadm combines the arguments from the `spec` section and the ones from
|
||||
the `rgw_frontend_extra_args` into a single space-separated arguments list
|
||||
which is used to set the value of `rgw_frontends` configuration parameter.
|
||||
|
||||
Multisite zones
|
||||
---------------
|
||||
|
@ -1,66 +1,62 @@
|
||||
Troubleshooting
|
||||
===============
|
||||
|
||||
You may wish to investigate why a cephadm command failed
|
||||
or why a certain service no longer runs properly.
|
||||
This section explains how to investigate why a cephadm command failed or why a
|
||||
certain service no longer runs properly.
|
||||
|
||||
Cephadm deploys daemons within containers. This means that
|
||||
troubleshooting those containerized daemons will require
|
||||
a different process than traditional package-install daemons.
|
||||
Cephadm deploys daemons within containers. Troubleshooting containerized
|
||||
daemons requires a different process than does troubleshooting traditional
|
||||
daemons that were installed by means of packages.
|
||||
|
||||
Here are some tools and commands to help you troubleshoot
|
||||
your Ceph environment.
|
||||
Here are some tools and commands to help you troubleshoot your Ceph
|
||||
environment.
|
||||
|
||||
.. _cephadm-pause:
|
||||
|
||||
Pausing or Disabling cephadm
|
||||
----------------------------
|
||||
|
||||
If something goes wrong and cephadm is behaving badly, you can
|
||||
pause most of the Ceph cluster's background activity by running
|
||||
the following command:
|
||||
If something goes wrong and cephadm is behaving badly, pause most of the Ceph
|
||||
cluster's background activity by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch pause
|
||||
|
||||
This stops all changes in the Ceph cluster, but cephadm will
|
||||
still periodically check hosts to refresh its inventory of
|
||||
daemons and devices. You can disable cephadm completely by
|
||||
running the following commands:
|
||||
This stops all changes in the Ceph cluster, but cephadm will still periodically
|
||||
check hosts to refresh its inventory of daemons and devices. Disable cephadm
|
||||
completely by running the following commands:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch set backend ''
|
||||
ceph mgr module disable cephadm
|
||||
|
||||
These commands disable all of the ``ceph orch ...`` CLI commands.
|
||||
All previously deployed daemon containers continue to exist and
|
||||
will start as they did before you ran these commands.
|
||||
These commands disable all ``ceph orch ...`` CLI commands. All
|
||||
previously deployed daemon containers continue to run and will start just as
|
||||
they were before you ran these commands.
|
||||
|
||||
See :ref:`cephadm-spec-unmanaged` for information on disabling
|
||||
individual services.
|
||||
See :ref:`cephadm-spec-unmanaged` for more on disabling individual services.
|
||||
|
||||
|
||||
Per-service and Per-daemon Events
|
||||
---------------------------------
|
||||
|
||||
In order to facilitate debugging failed daemons,
|
||||
cephadm stores events per service and per daemon.
|
||||
These events often contain information relevant to
|
||||
troubleshooting your Ceph cluster.
|
||||
To make it easier to debug failed daemons, cephadm stores events per service
|
||||
and per daemon. These events often contain information relevant to
|
||||
the troubleshooting of your Ceph cluster.
|
||||
|
||||
Listing Service Events
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
To see the events associated with a certain service, run a
|
||||
command of the and following form:
|
||||
To see the events associated with a certain service, run a command of the
|
||||
following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch ls --service_name=<service-name> --format yaml
|
||||
|
||||
This will return something in the following form:
|
||||
This will return information in the following form:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
@ -81,8 +77,8 @@ This will return something in the following form:
|
||||
Listing Daemon Events
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
To see the events associated with a certain daemon, run a
|
||||
command of the and following form:
|
||||
To see the events associated with a certain daemon, run a command of the
|
||||
following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -105,32 +101,41 @@ This will return something in the following form:
|
||||
Checking Cephadm Logs
|
||||
---------------------
|
||||
|
||||
To learn how to monitor cephadm logs as they are generated, read :ref:`watching_cephadm_logs`.
|
||||
To learn how to monitor cephadm logs as they are generated, read
|
||||
:ref:`watching_cephadm_logs`.
|
||||
|
||||
If your Ceph cluster has been configured to log events to files, there will be a
|
||||
``ceph.cephadm.log`` file on all monitor hosts (see
|
||||
:ref:`cephadm-logs` for a more complete explanation).
|
||||
If your Ceph cluster has been configured to log events to files, there will be
|
||||
a ``ceph.cephadm.log`` file on all monitor hosts. See :ref:`cephadm-logs` for a
|
||||
more complete explanation.
|
||||
|
||||
Gathering Log Files
|
||||
-------------------
|
||||
|
||||
Use journalctl to gather the log files of all daemons:
|
||||
Use ``journalctl`` to gather the log files of all daemons:
|
||||
|
||||
.. note:: By default cephadm now stores logs in journald. This means
|
||||
that you will no longer find daemon logs in ``/var/log/ceph/``.
|
||||
|
||||
To read the log file of one specific daemon, run::
|
||||
To read the log file of one specific daemon, run a command of the following
|
||||
form:
|
||||
|
||||
cephadm logs --name <name-of-daemon>
|
||||
.. prompt:: bash
|
||||
|
||||
Note: this only works when run on the same host where the daemon is running. To
|
||||
get logs of a daemon running on a different host, give the ``--fsid`` option::
|
||||
cephadm logs --name <name-of-daemon>
|
||||
|
||||
cephadm logs --fsid <fsid> --name <name-of-daemon>
|
||||
.. Note:: This works only when run on the same host that is running the daemon.
|
||||
To get the logs of a daemon that is running on a different host, add the
|
||||
``--fsid`` option to the command, as in the following example:
|
||||
|
||||
where the ``<fsid>`` corresponds to the cluster ID printed by ``ceph status``.
|
||||
.. prompt:: bash
|
||||
|
||||
To fetch all log files of all daemons on a given host, run::
|
||||
cephadm logs --fsid <fsid> --name <name-of-daemon>
|
||||
|
||||
In this example, ``<fsid>`` corresponds to the cluster ID returned by the
|
||||
``ceph status`` command.
|
||||
|
||||
To fetch all log files of all daemons on a given host, run the following
|
||||
for-loop::
|
||||
|
||||
for name in $(cephadm ls | jq -r '.[].name') ; do
|
||||
cephadm logs --fsid <fsid> --name "$name" > $name;
|
||||
@ -139,39 +144,41 @@ To fetch all log files of all daemons on a given host, run::
|
||||
Collecting Systemd Status
|
||||
-------------------------
|
||||
|
||||
To print the state of a systemd unit, run::
|
||||
To print the state of a systemd unit, run a command of the following form:
|
||||
|
||||
systemctl status "ceph-$(cephadm shell ceph fsid)@<service name>.service";
|
||||
.. prompt:: bash
|
||||
|
||||
systemctl status "ceph-$(cephadm shell ceph fsid)@<service name>.service";
|
||||
|
||||
|
||||
To fetch all state of all daemons of a given host, run::
|
||||
To fetch the state of all daemons of a given host, run the following shell
|
||||
script::
|
||||
|
||||
fsid="$(cephadm shell ceph fsid)"
|
||||
for name in $(cephadm ls | jq -r '.[].name') ; do
|
||||
systemctl status "ceph-$fsid@$name.service" > $name;
|
||||
done
|
||||
fsid="$(cephadm shell ceph fsid)"
|
||||
for name in $(cephadm ls | jq -r '.[].name') ; do
|
||||
systemctl status "ceph-$fsid@$name.service" > $name;
|
||||
done
|
||||
|
||||
|
||||
List all Downloaded Container Images
|
||||
------------------------------------
|
||||
|
||||
To list all container images that are downloaded on a host:
|
||||
To list all container images that are downloaded on a host, run the following
|
||||
commands:
|
||||
|
||||
.. note:: ``Image`` might also be called `ImageID`
|
||||
.. prompt:: bash #
|
||||
|
||||
::
|
||||
podman ps -a --format json | jq '.[].Image' "docker.io/library/centos:8" "registry.opensuse.org/opensuse/leap:15.2"
|
||||
|
||||
podman ps -a --format json | jq '.[].Image'
|
||||
"docker.io/library/centos:8"
|
||||
"registry.opensuse.org/opensuse/leap:15.2"
|
||||
.. note:: ``Image`` might also be called ``ImageID``.
|
||||
|
||||
|
||||
Manually Running Containers
|
||||
---------------------------
|
||||
|
||||
Cephadm uses small wrappers when running containers. Refer to
|
||||
``/var/lib/ceph/<cluster-fsid>/<service-name>/unit.run`` for the
|
||||
container execution command.
|
||||
``/var/lib/ceph/<cluster-fsid>/<service-name>/unit.run`` for the container
|
||||
execution command.
|
||||
|
||||
.. _cephadm-ssh-errors:
|
||||
|
||||
@ -187,9 +194,10 @@ Error message::
|
||||
Please make sure that the host is reachable and accepts connections using the cephadm SSH key
|
||||
...
|
||||
|
||||
Things Ceph administrators can do:
|
||||
If you receive the above error message, try the following things to
|
||||
troubleshoot the SSH connection between ``cephadm`` and the monitor:
|
||||
|
||||
1. Ensure cephadm has an SSH identity key::
|
||||
1. Ensure that ``cephadm`` has an SSH identity key::
|
||||
|
||||
[root@mon1~]# cephadm shell -- ceph config-key get mgr/cephadm/ssh_identity_key > ~/cephadm_private_key
|
||||
INFO:cephadm:Inferring fsid f8edc08a-7f17-11ea-8707-000c2915dd98
|
||||
@ -202,20 +210,21 @@ Things Ceph administrators can do:
|
||||
|
||||
or::
|
||||
|
||||
[root@mon1 ~]# cat ~/cephadm_private_key | cephadm shell -- ceph cephadm set-ssk-key -i -
|
||||
[root@mon1 ~]# cat ~/cephadm_private_key | cephadm shell -- ceph cephadm set-ssh-key -i -
|
||||
|
||||
2. Ensure that the SSH config is correct::
|
||||
|
||||
[root@mon1 ~]# cephadm shell -- ceph cephadm get-ssh-config > config
|
||||
|
||||
3. Verify that we can connect to the host::
|
||||
3. Verify that it is possible to connect to the host::
|
||||
|
||||
[root@mon1 ~]# ssh -F config -i ~/cephadm_private_key root@mon1
|
||||
|
||||
Verifying that the Public Key is Listed in the authorized_keys file
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
To verify that the public key is in the authorized_keys file, run the following commands::
|
||||
To verify that the public key is in the ``authorized_keys`` file, run the
|
||||
following commands::
|
||||
|
||||
[root@mon1 ~]# cephadm shell -- ceph cephadm get-pub-key > ~/ceph.pub
|
||||
[root@mon1 ~]# grep "`cat ~/ceph.pub`" /root/.ssh/authorized_keys
|
||||
@ -231,27 +240,34 @@ Or this error::
|
||||
|
||||
Must set public_network config option or specify a CIDR network, ceph addrvec, or plain IP
|
||||
|
||||
This means that you must run a command of this form::
|
||||
This means that you must run a command of this form:
|
||||
|
||||
ceph config set mon public_network <mon_network>
|
||||
.. prompt:: bash
|
||||
|
||||
For more detail on operations of this kind, see :ref:`deploy_additional_monitors`
|
||||
ceph config set mon public_network <mon_network>
|
||||
|
||||
For more detail on operations of this kind, see
|
||||
:ref:`deploy_additional_monitors`.
|
||||
|
||||
Accessing the Admin Socket
|
||||
--------------------------
|
||||
|
||||
Each Ceph daemon provides an admin socket that bypasses the
|
||||
MONs (See :ref:`rados-monitoring-using-admin-socket`).
|
||||
Each Ceph daemon provides an admin socket that allows runtime option setting and statistic reading. See
|
||||
:ref:`rados-monitoring-using-admin-socket`.
|
||||
|
||||
To access the admin socket, first enter the daemon container on the host::
|
||||
#. To access the admin socket, enter the daemon container on the host::
|
||||
|
||||
[root@mon1 ~]# cephadm enter --name <daemon-name>
|
||||
[ceph: root@mon1 /]# ceph --admin-daemon /var/run/ceph/ceph-<daemon-name>.asok config show
|
||||
[root@mon1 ~]# cephadm enter --name <daemon-name>
|
||||
|
||||
#. Run a command of the following forms to see the admin socket's configuration and other available actions::
|
||||
|
||||
[ceph: root@mon1 /]# ceph --admin-daemon /var/run/ceph/ceph-<daemon-name>.asok config show
|
||||
[ceph: root@mon1 /]# ceph --admin-daemon /var/run/ceph/ceph-<daemon-name>.asok help
|
||||
|
||||
Running Various Ceph Tools
|
||||
--------------------------------
|
||||
|
||||
To run Ceph tools like ``ceph-objectstore-tool`` or
|
||||
To run Ceph tools such as ``ceph-objectstore-tool`` or
|
||||
``ceph-monstore-tool``, invoke the cephadm CLI with
|
||||
``cephadm shell --name <daemon-name>``. For example::
|
||||
|
||||
@ -268,100 +284,232 @@ To run Ceph tools like ``ceph-objectstore-tool`` or
|
||||
election_strategy: 1
|
||||
0: [v2:127.0.0.1:3300/0,v1:127.0.0.1:6789/0] mon.myhostname
|
||||
|
||||
The cephadm shell sets up the environment in a way that is suitable
|
||||
for extended daemon maintenance and running daemons interactively.
|
||||
The cephadm shell sets up the environment in a way that is suitable for
|
||||
extended daemon maintenance and for the interactive running of daemons.
|
||||
|
||||
.. _cephadm-restore-quorum:
|
||||
|
||||
Restoring the Monitor Quorum
|
||||
----------------------------
|
||||
|
||||
If the Ceph monitor daemons (mons) cannot form a quorum, cephadm will not be
|
||||
able to manage the cluster until quorum is restored.
|
||||
If the Ceph Monitor daemons (mons) cannot form a quorum, ``cephadm`` will not
|
||||
be able to manage the cluster until quorum is restored.
|
||||
|
||||
In order to restore the quorum, remove unhealthy monitors
|
||||
form the monmap by following these steps:
|
||||
|
||||
1. Stop all mons. For each mon host::
|
||||
1. Stop all Monitors. Use ``ssh`` to connect to each Monitor's host, and then
|
||||
while connected to the Monitor's host use ``cephadm`` to stop the Monitor
|
||||
daemon:
|
||||
|
||||
ssh {mon-host}
|
||||
cephadm unit --name mon.`hostname` stop
|
||||
.. prompt:: bash
|
||||
|
||||
ssh {mon-host}
|
||||
cephadm unit --name {mon.hostname} stop
|
||||
|
||||
|
||||
2. Identify a surviving monitor and log in to that host::
|
||||
2. Identify a surviving Monitor and log in to its host:
|
||||
|
||||
ssh {mon-host}
|
||||
cephadm enter --name mon.`hostname`
|
||||
.. prompt:: bash
|
||||
|
||||
3. Follow the steps in :ref:`rados-mon-remove-from-unhealthy`
|
||||
ssh {mon-host}
|
||||
cephadm enter --name {mon.hostname}
|
||||
|
||||
3. Follow the steps in :ref:`rados-mon-remove-from-unhealthy`.
|
||||
|
||||
.. _cephadm-manually-deploy-mgr:
|
||||
|
||||
Manually Deploying a Manager Daemon
|
||||
-----------------------------------
|
||||
At least one manager (mgr) daemon is required by cephadm in order to manage the
|
||||
cluster. If the last mgr in a cluster has been removed, follow these steps in
|
||||
order to deploy a manager called (for example)
|
||||
``mgr.hostname.smfvfd`` on a random host of your cluster manually.
|
||||
At least one Manager (``mgr``) daemon is required by cephadm in order to manage
|
||||
the cluster. If the last remaining Manager has been removed from the Ceph
|
||||
cluster, follow these steps in order to deploy a fresh Manager on an arbitrary
|
||||
host in your cluster. In this example, the freshly-deployed Manager daemon is
|
||||
called ``mgr.hostname.smfvfd``.
|
||||
|
||||
Disable the cephadm scheduler, in order to prevent cephadm from removing the new
|
||||
manager. See :ref:`cephadm-enable-cli`::
|
||||
#. Disable the cephadm scheduler, in order to prevent ``cephadm`` from removing
|
||||
the new Manager. See :ref:`cephadm-enable-cli`:
|
||||
|
||||
ceph config-key set mgr/cephadm/pause true
|
||||
.. prompt:: bash #
|
||||
|
||||
Then get or create the auth entry for the new manager::
|
||||
ceph config-key set mgr/cephadm/pause true
|
||||
|
||||
ceph auth get-or-create mgr.hostname.smfvfd mon "profile mgr" osd "allow *" mds "allow *"
|
||||
#. Retrieve or create the "auth entry" for the new Manager:
|
||||
|
||||
Get the ceph.conf::
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config generate-minimal-conf
|
||||
ceph auth get-or-create mgr.hostname.smfvfd mon "profile mgr" osd "allow *" mds "allow *"
|
||||
|
||||
Get the container image::
|
||||
#. Retrieve the Monitor's configuration:
|
||||
|
||||
ceph config get "mgr.hostname.smfvfd" container_image
|
||||
.. prompt:: bash #
|
||||
|
||||
Create a file ``config-json.json`` which contains the information necessary to deploy
|
||||
the daemon:
|
||||
ceph config generate-minimal-conf
|
||||
|
||||
.. code-block:: json
|
||||
#. Retrieve the container image:
|
||||
|
||||
{
|
||||
"config": "# minimal ceph.conf for 8255263a-a97e-4934-822c-00bfe029b28f\n[global]\n\tfsid = 8255263a-a97e-4934-822c-00bfe029b28f\n\tmon_host = [v2:192.168.0.1:40483/0,v1:192.168.0.1:40484/0]\n",
|
||||
"keyring": "[mgr.hostname.smfvfd]\n\tkey = V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4=\n"
|
||||
}
|
||||
.. prompt:: bash #
|
||||
|
||||
Deploy the daemon::
|
||||
ceph config get "mgr.hostname.smfvfd" container_image
|
||||
|
||||
cephadm --image <container-image> deploy --fsid <fsid> --name mgr.hostname.smfvfd --config-json config-json.json
|
||||
#. Create a file called ``config-json.json``, which contains the information
|
||||
necessary to deploy the daemon:
|
||||
|
||||
Analyzing Core Dumps
|
||||
.. code-block:: json
|
||||
|
||||
{
|
||||
"config": "# minimal ceph.conf for 8255263a-a97e-4934-822c-00bfe029b28f\n[global]\n\tfsid = 8255263a-a97e-4934-822c-00bfe029b28f\n\tmon_host = [v2:192.168.0.1:40483/0,v1:192.168.0.1:40484/0]\n",
|
||||
"keyring": "[mgr.hostname.smfvfd]\n\tkey = V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4=\n"
|
||||
}
|
||||
|
||||
#. Deploy the Manager daemon:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm --image <container-image> deploy --fsid <fsid> --name mgr.hostname.smfvfd --config-json config-json.json
|
||||
|
||||
Capturing Core Dumps
|
||||
---------------------
|
||||
|
||||
When a Ceph daemon crashes, cephadm supports analyzing core dumps. To enable core dumps, run
|
||||
A Ceph cluster that uses ``cephadm`` can be configured to capture core dumps.
|
||||
The initial capture and processing of the coredump is performed by
|
||||
`systemd-coredump
|
||||
<https://www.man7.org/linux/man-pages/man8/systemd-coredump.8.html>`_.
|
||||
|
||||
|
||||
To enable coredump handling, run the following command
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ulimit -c unlimited
|
||||
ulimit -c unlimited
|
||||
|
||||
Core dumps will now be written to ``/var/lib/systemd/coredump``.
|
||||
|
||||
.. note::
|
||||
|
||||
Core dumps are not namespaced by the kernel, which means
|
||||
they will be written to ``/var/lib/systemd/coredump`` on
|
||||
the container host.
|
||||
Core dumps are not namespaced by the kernel. This means that core dumps are
|
||||
written to ``/var/lib/systemd/coredump`` on the container host. The ``ulimit
|
||||
-c unlimited`` setting will persist only until the system is rebooted.
|
||||
|
||||
Now, wait for the crash to happen again. To simulate the crash of a daemon, run e.g. ``killall -3 ceph-mon``.
|
||||
Wait for the crash to happen again. To simulate the crash of a daemon, run for
|
||||
example ``killall -3 ceph-mon``.
|
||||
|
||||
Install debug packages including ``ceph-debuginfo`` by entering the cephadm shelll::
|
||||
|
||||
# cephadm shell --mount /var/lib/systemd/coredump
|
||||
[ceph: root@host1 /]# dnf install ceph-debuginfo gdb zstd
|
||||
[ceph: root@host1 /]# unzstd /mnt/coredump/core.ceph-*.zst
|
||||
[ceph: root@host1 /]# gdb /usr/bin/ceph-mon /mnt/coredump/core.ceph-...
|
||||
(gdb) bt
|
||||
#0 0x00007fa9117383fc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
|
||||
#1 0x00007fa910d7f8f0 in std::condition_variable::wait(std::unique_lock<std::mutex>&) () from /lib64/libstdc++.so.6
|
||||
#2 0x00007fa913d3f48f in AsyncMessenger::wait() () from /usr/lib64/ceph/libceph-common.so.2
|
||||
#3 0x0000563085ca3d7e in main ()
|
||||
Running the Debugger with cephadm
|
||||
----------------------------------
|
||||
|
||||
Running a single debugging session
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Initiate a debugging session by using the ``cephadm shell`` command.
|
||||
From within the shell container we need to install the debugger and debuginfo
|
||||
packages. To debug a core file captured by systemd, run the following:
|
||||
|
||||
|
||||
#. Start the shell session:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm shell --mount /var/lib/system/coredump
|
||||
|
||||
#. From within the shell session, run the following commands:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
dnf install ceph-debuginfo gdb zstd
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
unzstd /var/lib/systemd/coredump/core.ceph-*.zst
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
gdb /usr/bin/ceph-mon /mnt/coredump/core.ceph-*.zst
|
||||
|
||||
#. Run debugger commands at gdb's prompt:
|
||||
|
||||
.. prompt:: bash (gdb)
|
||||
|
||||
bt
|
||||
|
||||
::
|
||||
|
||||
#0 0x00007fa9117383fc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
|
||||
#1 0x00007fa910d7f8f0 in std::condition_variable::wait(std::unique_lock<std::mutex>&) () from /lib64/libstdc++.so.6
|
||||
#2 0x00007fa913d3f48f in AsyncMessenger::wait() () from /usr/lib64/ceph/libceph-common.so.2
|
||||
#3 0x0000563085ca3d7e in main ()
|
||||
|
||||
|
||||
Running repeated debugging sessions
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
When using ``cephadm shell``, as in the example above, any changes made to the
|
||||
container that is spawned by the shell command are ephemeral. After the shell
|
||||
session exits, the files that were downloaded and installed cease to be
|
||||
available. You can simply re-run the same commands every time ``cephadm shell``
|
||||
is invoked, but to save time and resources you can create a new container image
|
||||
and use it for repeated debugging sessions.
|
||||
|
||||
In the following example, we create a simple file that constructs the
|
||||
container image. The command below uses podman but it is expected to work
|
||||
correctly even if ``podman`` is replaced with ``docker``::
|
||||
|
||||
cat >Containerfile <<EOF
|
||||
ARG BASE_IMG=quay.io/ceph/ceph:v18
|
||||
FROM \${BASE_IMG}
|
||||
# install ceph debuginfo packages, gdb and other potentially useful packages
|
||||
RUN dnf install --enablerepo='*debug*' -y ceph-debuginfo gdb zstd strace python3-debuginfo
|
||||
EOF
|
||||
podman build -t ceph:debugging -f Containerfile .
|
||||
# pass --build-arg=BASE_IMG=<your image> to customize the base image
|
||||
|
||||
The above file creates a new local image named ``ceph:debugging``. This image
|
||||
can be used on the same machine that built it. The image can also be pushed to
|
||||
a container repository or saved and copied to a node that is running other Ceph
|
||||
containers. See the ``podman`` or ``docker`` documentation for more
|
||||
information about the container workflow.
|
||||
|
||||
After the image has been built, it can be used to initiate repeat debugging
|
||||
sessions. By using an image in this way, you avoid the trouble of having to
|
||||
re-install the debug tools and the debuginfo packages every time you need to
|
||||
run a debug session. To debug a core file using this image, in the same way as
|
||||
previously described, run:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm --image ceph:debugging shell --mount /var/lib/system/coredump
|
||||
|
||||
|
||||
Debugging live processes
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The gdb debugger can attach to running processes to debug them. This can be
|
||||
achieved with a containerized process by using the debug image and attaching it
|
||||
to the same PID namespace in which the process to be debugged resides.
|
||||
|
||||
This requires running a container command with some custom arguments. We can
|
||||
generate a script that can debug a process in a running container.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm --image ceph:debugging shell --dry-run > /tmp/debug.sh
|
||||
|
||||
This creates a script that includes the container command that ``cephadm``
|
||||
would use to create a shell. Modify the script by removing the ``--init``
|
||||
argument and replace it with the argument that joins to the namespace used for
|
||||
a running running container. For example, assume we want to debug the Manager
|
||||
and have determnined that the Manager is running in a container named
|
||||
``ceph-bc615290-685b-11ee-84a6-525400220000-mgr-ceph0-sluwsk``. In this case,
|
||||
the argument
|
||||
``--pid=container:ceph-bc615290-685b-11ee-84a6-525400220000-mgr-ceph0-sluwsk``
|
||||
should be used.
|
||||
|
||||
We can run our debugging container with ``sh /tmp/debug.sh``. Within the shell,
|
||||
we can run commands such as ``ps`` to get the PID of the Manager process. In
|
||||
the following example this is ``2``. While running gdb, we can attach to the
|
||||
running process:
|
||||
|
||||
.. prompt:: bash (gdb)
|
||||
|
||||
attach 2
|
||||
info threads
|
||||
bt
|
||||
|
@ -2,7 +2,7 @@
|
||||
Upgrading Ceph
|
||||
==============
|
||||
|
||||
Cephadm can safely upgrade Ceph from one bugfix release to the next. For
|
||||
Cephadm can safely upgrade Ceph from one point release to the next. For
|
||||
example, you can upgrade from v15.2.0 (the first Octopus release) to the next
|
||||
point release, v15.2.1.
|
||||
|
||||
@ -31,13 +31,13 @@ Before you use cephadm to upgrade Ceph, verify that all hosts are currently onli
|
||||
|
||||
ceph -s
|
||||
|
||||
To upgrade (or downgrade) to a specific release, run the following command:
|
||||
To upgrade to a specific release, run a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch upgrade start --ceph-version <version>
|
||||
|
||||
For example, to upgrade to v16.2.6, run the following command:
|
||||
For example, to upgrade to v16.2.6, run a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
@ -106,31 +106,45 @@ doesn't use ``cephadm shell``) to a version compatible with the new version.
|
||||
Potential problems
|
||||
==================
|
||||
|
||||
There are a few health alerts that can arise during the upgrade process.
|
||||
|
||||
Error: ENOENT: Module not found
|
||||
-------------------------------
|
||||
|
||||
The message ``Error ENOENT: Module not found`` appears in response to the command ``ceph orch upgrade status`` if the orchestrator has crashed:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch upgrade status
|
||||
|
||||
::
|
||||
|
||||
Error ENOENT: Module not found
|
||||
|
||||
This is possibly caused by invalid JSON in a mgr config-key. See `Redmine tracker Issue #67329 <https://tracker.ceph.com/issues/67329>`_ and `the discussion on the [ceph-users] mailing list <https://www.spinics.net/lists/ceph-users/msg83667.html>`_.
|
||||
|
||||
UPGRADE_NO_STANDBY_MGR
|
||||
----------------------
|
||||
|
||||
This alert (``UPGRADE_NO_STANDBY_MGR``) means that Ceph does not detect an
|
||||
active standby manager daemon. In order to proceed with the upgrade, Ceph
|
||||
requires an active standby manager daemon (which you can think of in this
|
||||
active standby Manager daemon. In order to proceed with the upgrade, Ceph
|
||||
requires an active standby Manager daemon (which you can think of in this
|
||||
context as "a second manager").
|
||||
|
||||
You can ensure that Cephadm is configured to run 2 (or more) managers by
|
||||
You can ensure that Cephadm is configured to run two (or more) Managers by
|
||||
running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch apply mgr 2 # or more
|
||||
|
||||
You can check the status of existing mgr daemons by running the following
|
||||
You can check the status of existing Manager daemons by running the following
|
||||
command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch ps --daemon-type mgr
|
||||
|
||||
If an existing mgr daemon has stopped, you can try to restart it by running the
|
||||
If an existing Manager daemon has stopped, you can try to restart it by running the
|
||||
following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
@ -158,7 +172,7 @@ Using customized container images
|
||||
=================================
|
||||
|
||||
For most users, upgrading requires nothing more complicated than specifying the
|
||||
Ceph version number to upgrade to. In such cases, cephadm locates the specific
|
||||
Ceph version to which to upgrade. In such cases, cephadm locates the specific
|
||||
Ceph container image to use by combining the ``container_image_base``
|
||||
configuration option (default: ``docker.io/ceph/ceph``) with a tag of
|
||||
``vX.Y.Z``.
|
||||
@ -168,7 +182,7 @@ you need. For example, the following command upgrades to a development build:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch upgrade start --image quay.io/ceph-ci/ceph:recent-git-branch-name
|
||||
ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:recent-git-branch-name
|
||||
|
||||
For more information about available container images, see :ref:`containers`.
|
||||
|
||||
|
@ -1,11 +1,13 @@
|
||||
.. _cephfs_add_remote_mds:
|
||||
|
||||
.. note::
|
||||
It is highly recommended to use :doc:`/cephadm/index` or another Ceph
|
||||
orchestrator for setting up the ceph cluster. Use this approach only if you
|
||||
are setting up the ceph cluster manually. If one still intends to use the
|
||||
manual way for deploying MDS daemons, :doc:`/cephadm/services/mds/` can
|
||||
also be used.
|
||||
.. warning:: The material on this page is to be used only for manually setting
|
||||
up a Ceph cluster. If you intend to use an automated tool such as
|
||||
:doc:`/cephadm/index` to set up a Ceph cluster, do not use the
|
||||
instructions on this page.
|
||||
|
||||
.. note:: If you are certain that you know what you are doing and you intend to
|
||||
manually deploy MDS daemons, see :doc:`/cephadm/services/mds/` before
|
||||
proceeding.
|
||||
|
||||
============================
|
||||
Deploying Metadata Servers
|
||||
@ -53,8 +55,7 @@ the MDS server. Even if a single MDS daemon is unable to fully utilize the
|
||||
hardware, it may be desirable later on to start more active MDS daemons on the
|
||||
same node to fully utilize the available cores and memory. Additionally, it may
|
||||
become clear with workloads on the cluster that performance improves with
|
||||
multiple active MDS on the same node rather than over-provisioning a single
|
||||
MDS.
|
||||
multiple active MDS on the same node rather than a single overloaded MDS.
|
||||
|
||||
Finally, be aware that CephFS is a highly-available file system by supporting
|
||||
standby MDS (see also :ref:`mds-standby`) for rapid failover. To get a real
|
||||
|
@ -209,3 +209,70 @@ cache. The limit is configured via:
|
||||
|
||||
It is not recommended to set this value above 5M but it may be helpful with
|
||||
some workloads.
|
||||
|
||||
|
||||
Dealing with "clients failing to respond to cache pressure" messages
|
||||
--------------------------------------------------------------------
|
||||
|
||||
Every second (or every interval set by the ``mds_cache_trim_interval``
|
||||
configuration paramater), the MDS runs the "cache trim" procedure. One of the
|
||||
steps of this procedure is "recall client state". During this step, the MDS
|
||||
checks every client (session) to determine whether it needs to recall caps.
|
||||
If any of the following are true, then the MDS needs to recall caps:
|
||||
|
||||
1. the cache is full (the ``mds_cache_memory_limit`` has been exceeded) and
|
||||
needs some inodes to be released
|
||||
2. the client exceeds ``mds_max_caps_per_client`` (1M by default)
|
||||
3. the client is inactive
|
||||
|
||||
To determine whether a client (a session) is inactive, the session's
|
||||
``cache_liveness`` parameters is checked and compared with the value::
|
||||
|
||||
(num_caps >> mds_session_cache_liveness_magnitude)
|
||||
|
||||
where ``mds_session_cache_liveness_magnitude`` is a config param (``10`` by
|
||||
default). If ``cache_liveness`` is smaller than this calculated value, the
|
||||
session is considered inactive and the MDS sends a "recall caps" request for
|
||||
all cached caps (the actual recall value is ``num_caps -
|
||||
mds_min_caps_per_client(100)``).
|
||||
|
||||
Under certain circumstances, many "recall caps" requests can be sent so quickly
|
||||
that the health warning is generated: "clients failing to respond to cache
|
||||
pressure". If the client does not release the caps fast enough, the MDS repeats
|
||||
the "recall caps" request one second later. This means that the MDS will send
|
||||
"recall caps" again and again. The "total" counter of "recall caps" for the
|
||||
session will grow and grow, and will eventually exceed the "mon warning limit".
|
||||
|
||||
A throttling mechanism, controlled by the ``mds_recall_max_decay_threshold``
|
||||
parameter (126K by default), is available for reducing the rate of "recall
|
||||
caps" counter growth, but sometimes it is not enough to slow the "recall caps"
|
||||
counter's growth rate. If altering the ``mds_recall_max_decay_threshold`` value
|
||||
does not sufficiently reduce the rate of the "recall caps" counter's growth,
|
||||
decrease ``mds_recall_max_caps`` incrementally until the "clients failing to
|
||||
respond to cache pressure" messages no longer appear in the logs.
|
||||
|
||||
Example Scenario
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
Here is an example. A client is having 20k caps cached. At some moment the
|
||||
server decides the client is inactive (because the session's ``cache_liveness``
|
||||
value is low). It starts to ask the client to release caps down to
|
||||
``mds_min_caps_per_client`` value (100 by default). Every second, it
|
||||
sends recall_caps asking to release ``caps_num - mds_min_caps_per_client`` caps
|
||||
(but not more than ``mds_recall_max_caps``, which is 30k by default). A client
|
||||
is starting to release, but is releasing with a rate of (for example) only 100
|
||||
caps per second.
|
||||
|
||||
So in the first second of time, the mds sends recall_caps = 20k - 100 the
|
||||
second second recall_caps = (20k - 100) - 100 the third second recall_caps =
|
||||
(20k - 200) - 100 and so on. And every time it sends recall_caps it updates the
|
||||
session's recall_caps value, which is calculated how many recall_caps sent in
|
||||
the last minute. I.e. the counter is growing quickly, eventually exceeding
|
||||
mds_recall_warning_threshold, which is 128K by default, and ceph starts to
|
||||
report "failing to respond to cache pressure" warning in the status. Now,
|
||||
after we set mds_recall_max_caps to 3K, in this situation the mds server sends
|
||||
only 3K recall_caps per second, and the maximum value the session's recall_caps
|
||||
value may have (if the mds is sending 3K every second for at least one minute)
|
||||
is 60 * 3K = 180K. This means that it is still possible to achieve
|
||||
``mds_recall_warning_threshold`` but only if a client does not "respond" for a
|
||||
long time, and as your experiments show it is not the case.
|
||||
|
@ -24,7 +24,7 @@ This will mount the default ceph filesystem using the drive letter ``x``.
|
||||
If ``ceph.conf`` is placed at the default location, which is
|
||||
``%ProgramData%\ceph\ceph.conf``, then this argument becomes optional.
|
||||
|
||||
The ``-l`` argument also allows using an empty folder as a mountpoint
|
||||
The ``-l`` argument also allows using an empty folder as a mount point
|
||||
instead of a drive letter.
|
||||
|
||||
The uid and gid used for mounting the filesystem default to 0 and may be
|
||||
@ -75,7 +75,7 @@ like so::
|
||||
|
||||
ceph-dokan.exe unmap -l x
|
||||
|
||||
Note that when unmapping Ceph filesystems, the exact same mountpoint argument
|
||||
Note that when unmapping Ceph filesystems, the exact same mount point argument
|
||||
must be used as when the mapping was created.
|
||||
|
||||
Limitations
|
||||
|
@ -47,4 +47,4 @@ client cache.
|
||||
| MDSs | -=-------> | OSDs |
|
||||
+---------------------+ +--------------------+
|
||||
|
||||
.. _Architecture: ../architecture
|
||||
.. _Architecture: ../../architecture
|
||||
|
@ -93,6 +93,15 @@ providing high-availability.
|
||||
.. note:: Deploying a single mirror daemon is recommended. Running multiple
|
||||
daemons is untested.
|
||||
|
||||
The following file types are supported by the mirroring:
|
||||
|
||||
- Regular files (-)
|
||||
- Directory files (d)
|
||||
- Symbolic link file (l)
|
||||
|
||||
The other file types are ignored by the mirroring. So they won't be
|
||||
available on a successfully synchronized peer.
|
||||
|
||||
The mirroring module is disabled by default. To enable the mirroring module,
|
||||
run the following command:
|
||||
|
||||
@ -111,7 +120,9 @@ system, run a command of the following form:
|
||||
|
||||
.. note:: "Mirroring module" commands are prefixed with ``fs snapshot mirror``.
|
||||
This distinguishes them from "monitor commands", which are prefixed with ``fs
|
||||
mirror``. Be sure (in this context) to use module commands.
|
||||
mirror``. Enabling mirroring by using monitor commands will result in the mirror daemon
|
||||
entering the "failed" state due to the absence of the `cephfs_mirror` index object.
|
||||
So be sure (in this context) to use module commands.
|
||||
|
||||
To disable mirroring for a given file system, run a command of the following form:
|
||||
|
||||
@ -387,6 +398,44 @@ would mark the directory as failed and retry (less frequently). When the directo
|
||||
to existence, the mirror daemons would unmark the failed state upon successful snapshot
|
||||
synchronization.
|
||||
|
||||
Adding a new snapshot or a new directory manually in the .snap directory of the
|
||||
remote filesystem will result in failed status of the corresponding configured directory.
|
||||
In the remote filesystem::
|
||||
|
||||
$ ceph fs subvolume snapshot create cephfs subvol1 snap2 group1
|
||||
or
|
||||
$ mkdir /d0/.snap/snap2
|
||||
|
||||
$ ceph --admin-daemon /var/run/ceph/cephfs-mirror.asok fs mirror peer status cephfs@360 a2dc7784-e7a1-4723-b103-03ee8d8768f8
|
||||
{
|
||||
"/d0": {
|
||||
"state": "failed",
|
||||
"failure_reason": "snapshot 'snap2' has invalid metadata",
|
||||
"last_synced_snap": {
|
||||
"id": 120,
|
||||
"name": "snap1",
|
||||
"sync_duration": 0.079997898999999997,
|
||||
"sync_time_stamp": "274900.558797s"
|
||||
},
|
||||
"snaps_synced": 2,
|
||||
"snaps_deleted": 0,
|
||||
"snaps_renamed": 0
|
||||
},
|
||||
"/f0": {
|
||||
"state": "failed",
|
||||
"snaps_synced": 0,
|
||||
"snaps_deleted": 0,
|
||||
"snaps_renamed": 0
|
||||
}
|
||||
}
|
||||
|
||||
When the snapshot or the directory is removed from the remote filesystem, the mirror daemon will
|
||||
clear the failed state upon successful synchronization of the pending snapshots, if any.
|
||||
|
||||
.. note:: Treat the remote filesystem as read-only. Nothing is inherently enforced by CephFS.
|
||||
But with the right mds caps, users would not be able to snapshot directories in the
|
||||
remote file system.
|
||||
|
||||
When mirroring is disabled, the respective `fs mirror status` command for the file system
|
||||
will not show up in command help.
|
||||
|
||||
|
@ -63,6 +63,62 @@ By default, `cephfs-top` uses `client.fstop` user to connect to a Ceph cluster::
|
||||
$ ceph auth get-or-create client.fstop mon 'allow r' mds 'allow r' osd 'allow r' mgr 'allow r'
|
||||
$ cephfs-top
|
||||
|
||||
Description of Fields
|
||||
---------------------
|
||||
|
||||
1. chit : Cap hit
|
||||
Percentage of file capability hits over total number of caps
|
||||
|
||||
2. dlease : Dentry lease
|
||||
Percentage of dentry leases handed out over the total dentry lease requests
|
||||
|
||||
3. ofiles : Opened files
|
||||
Number of opened files
|
||||
|
||||
4. oicaps : Pinned caps
|
||||
Number of pinned caps
|
||||
|
||||
5. oinodes : Opened inodes
|
||||
Number of opened inodes
|
||||
|
||||
6. rtio : Total size of read IOs
|
||||
Number of bytes read in input/output operations generated by all process
|
||||
|
||||
7. wtio : Total size of write IOs
|
||||
Number of bytes written in input/output operations generated by all processes
|
||||
|
||||
8. raio : Average size of read IOs
|
||||
Mean of number of bytes read in input/output operations generated by all
|
||||
process over total IO done
|
||||
|
||||
9. waio : Average size of write IOs
|
||||
Mean of number of bytes written in input/output operations generated by all
|
||||
process over total IO done
|
||||
|
||||
10. rsp : Read speed
|
||||
Speed of read IOs with respect to the duration since the last refresh of clients
|
||||
|
||||
11. wsp : Write speed
|
||||
Speed of write IOs with respect to the duration since the last refresh of clients
|
||||
|
||||
12. rlatavg : Average read latency
|
||||
Mean value of the read latencies
|
||||
|
||||
13. rlatsd : Standard deviation (variance) for read latency
|
||||
Dispersion of the metric for the read latency relative to its mean
|
||||
|
||||
14. wlatavg : Average write latency
|
||||
Mean value of the write latencies
|
||||
|
||||
15. wlatsd : Standard deviation (variance) for write latency
|
||||
Dispersion of the metric for the write latency relative to its mean
|
||||
|
||||
16. mlatavg : Average metadata latency
|
||||
Mean value of the metadata latencies
|
||||
|
||||
17. mlatsd : Standard deviation (variance) for metadata latency
|
||||
Dispersion of the metric for the metadata latency relative to its mean
|
||||
|
||||
Command-Line Options
|
||||
--------------------
|
||||
|
||||
|
@ -2,42 +2,55 @@
|
||||
CephFS Client Capabilities
|
||||
================================
|
||||
|
||||
Use Ceph authentication capabilities to restrict your file system clients
|
||||
to the lowest possible level of authority needed.
|
||||
Ceph authentication capabilities are used to restrict CephFS clients to
|
||||
the lowest level of authority necessary.
|
||||
|
||||
.. note:: Path restriction and layout modification restriction are new features
|
||||
in the Jewel release of Ceph.
|
||||
.. note:: Path restriction and layout-modification restriction were introduced
|
||||
in the Jewel release of Ceph.
|
||||
|
||||
.. note:: Using Erasure Coded(EC) pools with CephFS is supported only with the
|
||||
BlueStore Backend. They cannot be used as metadata pools and overwrites must
|
||||
be enabled on the data pools.
|
||||
.. note:: Using Erasure Coded (EC) pools with CephFS is supported only with
|
||||
:term:`BlueStore`. Erasure-coded pools cannot be used as metadata pools.
|
||||
Overwrites must be enabled on erasure-coded data pools.
|
||||
|
||||
|
||||
Path restriction
|
||||
================
|
||||
|
||||
By default, clients are not restricted in what paths they are allowed to
|
||||
mount. Further, when clients mount a subdirectory, e.g., ``/home/user``, the
|
||||
MDS does not by default verify that subsequent operations are ‘locked’ within
|
||||
that directory.
|
||||
By default, clients are not restricted in the paths that they are allowed to
|
||||
mount. When clients mount a subdirectory (for example ``/home/user``), the MDS
|
||||
does not by default verify that subsequent operations are "locked" within that
|
||||
directory.
|
||||
|
||||
To restrict clients so that they mount and work only within a certain
|
||||
directory, use path-based MDS authentication capabilities.
|
||||
|
||||
This restriction impacts *only* the filesystem hierarchy, or, in other words,
|
||||
the metadata tree that is managed by the MDS. Clients will still be able to
|
||||
access the underlying file data in RADOS directly. To segregate clients fully,
|
||||
isolate untrusted clients in their own RADOS namespace. You can place a
|
||||
client's filesystem subtree in a particular namespace using :ref:`file
|
||||
layouts<file-layouts>` and then restrict their RADOS access to that namespace
|
||||
using :ref:`OSD capabilities<modify-user-capabilities>`.
|
||||
|
||||
To restrict clients to only mount and work within a certain directory, use
|
||||
path-based MDS authentication capabilities.
|
||||
|
||||
Syntax
|
||||
------
|
||||
|
||||
To grant rw access to the specified directory only, we mention the specified
|
||||
directory while creating key for a client using the following syntax::
|
||||
To grant ``rw`` access to the specified directory only, mention the specified
|
||||
directory while creating key for a client. Use a command of the following form:
|
||||
|
||||
ceph fs authorize <fs_name> client.<client_id> <path-in-cephfs> rw
|
||||
.. prompt:: bash #
|
||||
|
||||
For example, to restrict client ``foo`` to writing only in the ``bar``
|
||||
directory of file system ``cephfs_a``, use ::
|
||||
ceph fs authorize <fs_name> client.<client_id> <path-in-cephfs> rw
|
||||
|
||||
ceph fs authorize cephfs_a client.foo / r /bar rw
|
||||
For example, to restrict a client named ``foo`` so that it can write only in
|
||||
the ``bar`` directory of file system ``cephfs_a``, run the following command:
|
||||
|
||||
results in:
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize cephfs_a client.foo / r /bar rw
|
||||
|
||||
This results in::
|
||||
|
||||
client.foo
|
||||
key: *key*
|
||||
@ -46,59 +59,65 @@ directory of file system ``cephfs_a``, use ::
|
||||
caps: [osd] allow rw tag cephfs data=cephfs_a
|
||||
|
||||
To completely restrict the client to the ``bar`` directory, omit the
|
||||
root directory ::
|
||||
root directory :
|
||||
|
||||
ceph fs authorize cephfs_a client.foo /bar rw
|
||||
.. prompt:: bash #
|
||||
|
||||
Note that if a client's read access is restricted to a path, they will only
|
||||
be able to mount the file system when specifying a readable path in the
|
||||
mount command (see below).
|
||||
ceph fs authorize cephfs_a client.foo /bar rw
|
||||
|
||||
Supplying ``all`` or ``*`` as the file system name will grant access to every
|
||||
file system. Note that it is usually necessary to quote ``*`` to protect it
|
||||
from the shell.
|
||||
If a client's read access is restricted to a path, the client will be able to
|
||||
mount the file system only by specifying a readable path in the mount command
|
||||
(see below).
|
||||
|
||||
See `User Management - Add a User to a Keyring`_. for additional details on
|
||||
user management
|
||||
Supplying ``all`` or ``*`` as the file system name grants access to every file
|
||||
system. It is usually necessary to quote ``*`` to protect it from the
|
||||
shell.
|
||||
|
||||
To restrict a client to the specified sub-directory only, we mention the
|
||||
specified directory while mounting using the following syntax::
|
||||
See `User Management - Add a User to a Keyring`_ for more on user management.
|
||||
|
||||
ceph-fuse -n client.<client_id> <mount-path> -r *directory_to_be_mounted*
|
||||
To restrict a client to only the specified sub-directory, mention the specified
|
||||
directory while mounting. Use a command of the following form:
|
||||
|
||||
For example, to restrict client ``foo`` to ``mnt/bar`` directory, we will
|
||||
use::
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph-fuse -n client.foo mnt -r /bar
|
||||
ceph-fuse -n client.<client_id> <mount-path> -r *directory_to_be_mounted*
|
||||
|
||||
Free space reporting
|
||||
For example, to restrict client ``foo`` to ``mnt/bar`` directory, use the
|
||||
following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph-fuse -n client.foo mnt -r /bar
|
||||
|
||||
Reporting free space
|
||||
--------------------
|
||||
|
||||
By default, when a client is mounting a sub-directory, the used space (``df``)
|
||||
will be calculated from the quota on that sub-directory, rather than reporting
|
||||
the overall amount of space used on the cluster.
|
||||
|
||||
If you would like the client to report the overall usage of the file system,
|
||||
and not just the quota usage on the sub-directory mounted, then set the
|
||||
following config option on the client::
|
||||
When a client has mounted a sub-directory, the used space (``df``) is
|
||||
calculated from the quota on that sub-directory rather than from the overall
|
||||
amount of space used on the CephFS file system.
|
||||
|
||||
To make the client report the overall usage of the file system and not only the
|
||||
quota usage on the mounted sub-directory, set the following config option on
|
||||
the client::
|
||||
|
||||
client quota df = false
|
||||
|
||||
If quotas are not enabled, or no quota is set on the sub-directory mounted,
|
||||
then the overall usage of the file system will be reported irrespective of
|
||||
the value of this setting.
|
||||
If quotas are not enabled or if no quota is set on the mounted sub-directory,
|
||||
then the overall usage of the file system will be reported irrespective of the
|
||||
value of this setting.
|
||||
|
||||
.. _cephfs-layout-and-quota-restriction:
|
||||
|
||||
Layout and Quota restriction (the 'p' flag)
|
||||
===========================================
|
||||
|
||||
To set layouts or quotas, clients require the 'p' flag in addition to 'rw'.
|
||||
This restricts all the attributes that are set by special extended attributes
|
||||
with a "ceph." prefix, as well as restricting other means of setting
|
||||
these fields (such as openc operations with layouts).
|
||||
To set layouts or quotas, clients require the ``p`` flag in addition to ``rw``.
|
||||
Using the ``p`` flag with ``rw`` restricts all the attributes that are set by
|
||||
special extended attributes by using a ``ceph.`` prefix, and restricts
|
||||
other means of setting these fields (such as ``openc`` operations with layouts).
|
||||
|
||||
For example, in the following snippet client.0 can modify layouts and quotas
|
||||
on the file system cephfs_a, but client.1 cannot::
|
||||
For example, in the following snippet ``client.0`` can modify layouts and
|
||||
quotas on the file system ``cephfs_a``, but ``client.1`` cannot::
|
||||
|
||||
client.0
|
||||
key: AQAz7EVWygILFRAAdIcuJ12opU/JKyfFmxhuaw==
|
||||
@ -116,12 +135,12 @@ on the file system cephfs_a, but client.1 cannot::
|
||||
Snapshot restriction (the 's' flag)
|
||||
===========================================
|
||||
|
||||
To create or delete snapshots, clients require the 's' flag in addition to
|
||||
'rw'. Note that when capability string also contains the 'p' flag, the 's'
|
||||
flag must appear after it (all flags except 'rw' must be specified in
|
||||
To create or delete snapshots, clients require the ``s`` flag in addition to
|
||||
``rw``. Note that when capability string also contains the ``p`` flag, the
|
||||
``s`` flag must appear after it (all flags except ``rw`` must be specified in
|
||||
alphabetical order).
|
||||
|
||||
For example, in the following snippet client.0 can create or delete snapshots
|
||||
For example, in the following snippet ``client.0`` can create or delete snapshots
|
||||
in the ``bar`` directory of file system ``cephfs_a``::
|
||||
|
||||
client.0
|
||||
@ -144,9 +163,9 @@ Network restriction
|
||||
caps: [mon] allow r network 10.0.0.0/8
|
||||
caps: [osd] allow rw tag cephfs data=cephfs_a network 10.0.0.0/8
|
||||
|
||||
The optional ``{network/prefix}`` is a standard network name and
|
||||
prefix length in CIDR notation (e.g., ``10.3.0.0/16``). If present,
|
||||
the use of this capability is restricted to clients connecting from
|
||||
The optional ``{network/prefix}`` is a standard network-name-and-prefix length
|
||||
in CIDR notation (for example, ``10.3.0.0/16``). If ``{network/prefix}}`` is
|
||||
present, the use of this capability is restricted to clients connecting from
|
||||
this network.
|
||||
|
||||
.. _fs-authorize-multifs:
|
||||
@ -154,98 +173,369 @@ this network.
|
||||
File system Information Restriction
|
||||
===================================
|
||||
|
||||
If desired, the monitor cluster can present a limited view of the file systems
|
||||
available. In this case, the monitor cluster will only inform clients about
|
||||
file systems specified by the administrator. Other file systems will not be
|
||||
reported and commands affecting them will fail as if the file systems do
|
||||
not exist.
|
||||
The monitor cluster can present a limited view of the available file systems.
|
||||
In this case, the monitor cluster informs clients only about file systems
|
||||
specified by the administrator. Other file systems are not reported and
|
||||
commands affecting them fail as though the file systems do not exist.
|
||||
|
||||
Consider following example. The Ceph cluster has 2 FSs::
|
||||
Consider following example. The Ceph cluster has 2 file systems:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs ls
|
||||
|
||||
::
|
||||
|
||||
$ ceph fs ls
|
||||
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]
|
||||
name: cephfs2, metadata pool: cephfs2_metadata, data pools: [cephfs2_data ]
|
||||
|
||||
But we authorize client ``someuser`` for only one FS::
|
||||
We authorize client ``someuser`` for only one file system:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize cephfs client.someuser / rw
|
||||
|
||||
::
|
||||
|
||||
$ ceph fs authorize cephfs client.someuser / rw
|
||||
[client.someuser]
|
||||
key = AQAmthpf89M+JhAAiHDYQkMiCq3x+J0n9e8REQ==
|
||||
$ cat ceph.client.someuser.keyring
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cat ceph.client.someuser.keyring
|
||||
|
||||
::
|
||||
|
||||
[client.someuser]
|
||||
key = AQAmthpf89M+JhAAiHDYQkMiCq3x+J0n9e8REQ==
|
||||
caps mds = "allow rw fsname=cephfs"
|
||||
caps mon = "allow r fsname=cephfs"
|
||||
caps osd = "allow rw tag cephfs data=cephfs"
|
||||
|
||||
And the client can only see the FS that it has authorization for::
|
||||
The client can see only the file system that it is authorized to see:
|
||||
|
||||
$ ceph fs ls -n client.someuser -k ceph.client.someuser.keyring
|
||||
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]
|
||||
.. prompt:: bash #
|
||||
|
||||
Standby MDS daemons will always be displayed. Note that the information about
|
||||
restricted MDS daemons and file systems may become available by other means,
|
||||
such as ``ceph health detail``.
|
||||
ceph fs ls -n client.someuser -k ceph.client.someuser.keyring
|
||||
|
||||
::
|
||||
|
||||
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]
|
||||
|
||||
Standby MDS daemons are always displayed. Information about restricted MDS
|
||||
daemons and file systems may become available by other means, such as by
|
||||
running ``ceph health detail``.
|
||||
|
||||
MDS communication restriction
|
||||
=============================
|
||||
|
||||
By default, user applications may communicate with any MDS, whether or not
|
||||
they are allowed to modify data on an associated file system (see
|
||||
`Path restriction` above). Client's communication can be restricted to MDS
|
||||
daemons associated with particular file system(s) by adding MDS caps for that
|
||||
By default, user applications may communicate with any MDS, regardless of
|
||||
whether they are allowed to modify data on an associated file system (see `Path
|
||||
restriction` above). Client communication can be restricted to MDS daemons
|
||||
associated with particular file system(s) by adding MDS caps for that
|
||||
particular file system. Consider the following example where the Ceph cluster
|
||||
has 2 FSs::
|
||||
has two file systems:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs ls
|
||||
|
||||
::
|
||||
|
||||
$ ceph fs ls
|
||||
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]
|
||||
name: cephfs2, metadata pool: cephfs2_metadata, data pools: [cephfs2_data ]
|
||||
|
||||
Client ``someuser`` is authorized only for one FS::
|
||||
Client ``someuser`` is authorized for only one file system:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize cephfs client.someuser / rw
|
||||
|
||||
::
|
||||
|
||||
$ ceph fs authorize cephfs client.someuser / rw
|
||||
[client.someuser]
|
||||
key = AQBPSARfg8hCJRAAEegIxjlm7VkHuiuntm6wsA==
|
||||
$ ceph auth get client.someuser > ceph.client.someuser.keyring
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph auth get client.someuser > ceph.client.someuser.keyring
|
||||
|
||||
::
|
||||
|
||||
exported keyring for client.someuser
|
||||
$ cat ceph.client.someuser.keyring
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cat ceph.client.someuser.keyring
|
||||
|
||||
::
|
||||
|
||||
[client.someuser]
|
||||
key = AQBPSARfg8hCJRAAEegIxjlm7VkHuiuntm6wsA==
|
||||
caps mds = "allow rw fsname=cephfs"
|
||||
caps mon = "allow r"
|
||||
caps osd = "allow rw tag cephfs data=cephfs"
|
||||
|
||||
Mounting ``cephfs1`` with ``someuser`` works::
|
||||
Mounting ``cephfs1`` on the already-created mount point ``/mnt/cephfs1`` with
|
||||
``someuser`` works:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
sudo ceph-fuse /mnt/cephfs1 -n client.someuser -k ceph.client.someuser.keyring --client-fs=cephfs
|
||||
|
||||
.. note:: If ``/mnt/cephfs`` does not exist prior to running the above command,
|
||||
create it by running ``mkdir /mnt/cephfs1``.
|
||||
|
||||
::
|
||||
|
||||
$ sudo ceph-fuse /mnt/cephfs1 -n client.someuser -k ceph.client.someuser.keyring --client-fs=cephfs
|
||||
ceph-fuse[96634]: starting ceph client
|
||||
ceph-fuse[96634]: starting fuse
|
||||
$ mount | grep ceph-fuse
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
mount | grep ceph-fuse
|
||||
|
||||
::
|
||||
|
||||
ceph-fuse on /mnt/cephfs1 type fuse.ceph-fuse (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other)
|
||||
|
||||
But mounting ``cephfs2`` does not::
|
||||
Mounting ``cephfs2`` with ``someuser`` does not work:
|
||||
|
||||
$ sudo ceph-fuse /mnt/cephfs2 -n client.someuser -k ceph.client.someuser.keyring --client-fs=cephfs2
|
||||
ceph-fuse[96599]: starting ceph client
|
||||
ceph-fuse[96599]: ceph mount failed with (1) Operation not permitted
|
||||
.. prompt:: bash #
|
||||
|
||||
sudo ceph-fuse /mnt/cephfs2 -n client.someuser -k ceph.client.someuser.keyring --client-fs=cephfs2
|
||||
|
||||
::
|
||||
|
||||
ceph-fuse[96599]: starting ceph client
|
||||
ceph-fuse[96599]: ceph mount failed with (1) Operation not permitted
|
||||
|
||||
Root squash
|
||||
===========
|
||||
|
||||
The ``root squash`` feature is implemented as a safety measure to prevent
|
||||
scenarios such as accidental ``sudo rm -rf /path``. You can enable
|
||||
``root_squash`` mode in MDS caps to disallow clients with uid=0 or gid=0 to
|
||||
perform write access operations -- e.g., rm, rmdir, rmsnap, mkdir, mksnap.
|
||||
However, the mode allows the read operations of a root client unlike in
|
||||
other file systems.
|
||||
scenarios such as an accidental forced removal of a path (for example, ``sudo
|
||||
rm -rf /path``). Enable ``root_squash`` mode in MDS caps to disallow clients
|
||||
with ``uid=0`` or ``gid=0`` to perform write access operations (for example
|
||||
``rm``, ``rmdir``, ``rmsnap``, ``mkdir``, and ``mksnap``). This mode permits
|
||||
the read operations on a root client, unlike the behavior of other file
|
||||
systems.
|
||||
|
||||
Following is an example of enabling root_squash in a filesystem except within
|
||||
'/volumes' directory tree in the filesystem::
|
||||
Here is an example of enabling ``root_squash`` in a filesystem, except within
|
||||
the ``/volumes`` directory tree in the filesystem:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize a client.test_a / rw root_squash /volumes rw
|
||||
ceph auth get client.test_a
|
||||
|
||||
::
|
||||
|
||||
$ ceph fs authorize a client.test_a / rw root_squash /volumes rw
|
||||
$ ceph auth get client.test_a
|
||||
[client.test_a]
|
||||
key = AQBZcDpfEbEUKxAADk14VflBXt71rL9D966mYA==
|
||||
caps mds = "allow rw fsname=a root_squash, allow rw fsname=a path=/volumes"
|
||||
caps mon = "allow r fsname=a"
|
||||
caps osd = "allow rw tag cephfs data=a"
|
||||
|
||||
Updating Capabilities using ``fs authorize``
|
||||
============================================
|
||||
|
||||
Beginning with the Reef release of Ceph, ``fs authorize`` can be used to add
|
||||
new caps to an existing client (for another CephFS or another path in the same
|
||||
file system).
|
||||
|
||||
The following example demonstrates the behavior that results from running the command ``ceph fs authorize a client.x / rw`` twice.
|
||||
|
||||
#. Create a new client:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize a client.x / rw
|
||||
|
||||
::
|
||||
|
||||
[client.x]
|
||||
key = AQAOtSVk9WWtIhAAJ3gSpsjwfIQ0gQ6vfSx/0w==
|
||||
|
||||
#. Get the client capabilities:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph auth get client.x
|
||||
|
||||
::
|
||||
|
||||
[client.x]
|
||||
key = AQAOtSVk9WWtIhAAJ3gSpsjwfIQ0gQ6vfSx/0w==
|
||||
caps mds = "allow rw fsname=a"
|
||||
caps mon = "allow r fsname=a"
|
||||
caps osd = "allow rw tag cephfs data=a"
|
||||
|
||||
#. Previously, running ``fs authorize a client.x / rw`` a second time printed
|
||||
an error message. In the Reef release and in later releases, this command
|
||||
prints a message reporting that the capabilities did not get updated:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
./bin/ceph fs authorize a client.x / rw
|
||||
|
||||
::
|
||||
|
||||
no update for caps of client.x
|
||||
|
||||
Adding New Caps Using ``fs authorize``
|
||||
--------------------------------------
|
||||
|
||||
Add capabilities for another path in same CephFS:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize a client.x /dir1 rw
|
||||
|
||||
::
|
||||
|
||||
updated caps for client.x
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph auth get client.x
|
||||
|
||||
::
|
||||
|
||||
[client.x]
|
||||
key = AQAOtSVk9WWtIhAAJ3gSpsjwfIQ0gQ6vfSx/0w==
|
||||
caps mds = "allow r fsname=a, allow rw fsname=a path=some/dir"
|
||||
caps mon = "allow r fsname=a"
|
||||
caps osd = "allow rw tag cephfs data=a"
|
||||
|
||||
Add capabilities for another CephFS on the Ceph cluster:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize b client.x / rw
|
||||
|
||||
::
|
||||
|
||||
updated caps for client.x
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph auth get client.x
|
||||
|
||||
::
|
||||
|
||||
[client.x]
|
||||
key = AQD6tiVk0uJdARAABMaQuLRotxTi3Qdj47FkBA==
|
||||
caps mds = "allow rw fsname=a, allow rw fsname=b"
|
||||
caps mon = "allow r fsname=a, allow r fsname=b"
|
||||
caps osd = "allow rw tag cephfs data=a, allow rw tag cephfs data=b"
|
||||
|
||||
Changing rw permissions in caps
|
||||
-------------------------------
|
||||
|
||||
Capabilities can be modified by running ``fs authorize`` only in the case when
|
||||
read/write permissions must be changed. This is because the command ``fs
|
||||
authorize`` becomes ambiguous. For example, a user runs ``fs authorize cephfs1
|
||||
client.x /dir1 rw`` to create a client and then runs ``fs authorize cephfs1
|
||||
client.x /dir2 rw`` (notice that ``/dir1`` has been changed to ``/dir2``).
|
||||
Running the second command could be interpreted to change ``/dir1`` to
|
||||
``/dir2`` with current capabilities or could be interpreted to authorize the
|
||||
client with a new capability for the path ``/dir2``. As shown previously, the
|
||||
second interpretation is chosen and it is therefore impossible to update a part
|
||||
of the capabilities granted except ``rw`` permissions. The following shows how
|
||||
read/write permissions for ``client.x`` can be changed:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize a client.x / r
|
||||
[client.x]
|
||||
key = AQBBKjBkIFhBDBAA6q5PmDDWaZtYjd+jafeVUQ==
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph auth get client.x
|
||||
|
||||
::
|
||||
|
||||
[client.x]
|
||||
key = AQBBKjBkIFhBDBAA6q5PmDDWaZtYjd+jafeVUQ==
|
||||
caps mds = "allow r fsname=a"
|
||||
caps mon = "allow r fsname=a"
|
||||
caps osd = "allow r tag cephfs data=a"
|
||||
|
||||
``fs authorize`` never deducts any part of caps
|
||||
-----------------------------------------------
|
||||
Capabilities that have been issued to a client can not be removed by running
|
||||
``fs authorize`` again. For example, if a client capability has ``root_squash``
|
||||
applied on a certain CephFS, running ``fs authorize`` again for the same CephFS
|
||||
but without ``root_squash`` will not lead to any update and the client caps will
|
||||
remain unchanged:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize a client.x / rw root_squash
|
||||
|
||||
::
|
||||
|
||||
[client.x]
|
||||
key = AQD61CVkcA1QCRAAd0XYqPbHvcc+lpUAuc6Vcw==
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph auth get client.x
|
||||
|
||||
::
|
||||
|
||||
[client.x]
|
||||
key = AQD61CVkcA1QCRAAd0XYqPbHvcc+lpUAuc6Vcw==
|
||||
caps mds = "allow rw fsname=a root_squash"
|
||||
caps mon = "allow r fsname=a"
|
||||
caps osd = "allow rw tag cephfs data=a"
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize a client.x / rw
|
||||
|
||||
::
|
||||
|
||||
[client.x]
|
||||
key = AQD61CVkcA1QCRAAd0XYqPbHvcc+lpUAuc6Vcw==
|
||||
no update was performed for caps of client.x. caps of client.x remains unchanged.
|
||||
|
||||
If a client already has a capability for file-system name ``a`` and path
|
||||
``dir1``, running ``fs authorize`` again for FS name ``a`` but path ``dir2``,
|
||||
instead of modifying the capabilities client already holds, a new cap for
|
||||
``dir2`` will be granted:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize a client.x /dir1 rw
|
||||
ceph auth get client.x
|
||||
|
||||
::
|
||||
|
||||
[client.x]
|
||||
key = AQC1tyVknMt+JxAAp0pVnbZGbSr/nJrmkMNKqA==
|
||||
caps mds = "allow rw fsname=a path=/dir1"
|
||||
caps mon = "allow r fsname=a"
|
||||
caps osd = "allow rw tag cephfs data=a"
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs authorize a client.x /dir2 rw
|
||||
|
||||
::
|
||||
|
||||
updated caps for client.x
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph auth get client.x
|
||||
|
||||
::
|
||||
|
||||
[client.x]
|
||||
key = AQC1tyVknMt+JxAAp0pVnbZGbSr/nJrmkMNKqA==
|
||||
caps mds = "allow rw fsname=a path=dir1, allow rw fsname=a path=dir2"
|
||||
caps mon = "allow r fsname=a"
|
||||
caps osd = "allow rw tag cephfs data=a"
|
||||
|
@ -15,7 +15,7 @@ Advanced: Metadata repair tools
|
||||
file system before attempting to repair it.
|
||||
|
||||
If you do not have access to professional support for your cluster,
|
||||
consult the ceph-users mailing list or the #ceph IRC channel.
|
||||
consult the ceph-users mailing list or the #ceph IRC/Slack channel.
|
||||
|
||||
|
||||
Journal export
|
||||
|
@ -6,6 +6,9 @@ File layouts
|
||||
The layout of a file controls how its contents are mapped to Ceph RADOS objects. You can
|
||||
read and write a file's layout using *virtual extended attributes* or xattrs.
|
||||
|
||||
Clients must use the ``p`` flag when writing a file's layout. See :ref:`Layout
|
||||
and Quota restriction (the 'p' flag) <cephfs-layout-and-quota-restriction>`.
|
||||
|
||||
The name of the layout xattrs depends on whether a file is a regular file or a directory. Regular
|
||||
files' layout xattrs are called ``ceph.file.layout``, whereas directories' layout xattrs are called
|
||||
``ceph.dir.layout``. Where subsequent examples refer to ``ceph.file.layout``, substitute ``dir`` as appropriate
|
||||
@ -20,26 +23,38 @@ Layout fields
|
||||
-------------
|
||||
|
||||
pool
|
||||
String, giving ID or name. String can only have characters in the set [a-zA-Z0-9\_-.]. Which RADOS pool a file's data objects will be stored in.
|
||||
This is a string and contains either an ID or a name. Strings may contain
|
||||
only characters in the set ``[a-zA-Z0-9\_-.]``. This determines the RADOS
|
||||
pool that stores a file's data objects.
|
||||
|
||||
pool_id
|
||||
String of digits. This is the system assigned pool id for the RADOS pool whenever it is created.
|
||||
This is a string of digits. This is the pool ID that was assigned by Ceph
|
||||
at the time of the creation of the RADOS pool.
|
||||
|
||||
pool_name
|
||||
String, given name. This is the user defined name for the RADOS pool whenever user creates it.
|
||||
This is a string. This is the name of the RADOS pool as defined by the user
|
||||
when the pool was created.
|
||||
|
||||
pool_namespace
|
||||
String with only characters in the set [a-zA-Z0-9\_-.]. Within the data pool, which RADOS namespace the objects will
|
||||
be written to. Empty by default (i.e. default namespace).
|
||||
This is a string containing only characters in the set ``[a-zA-Z0-9\_-.]``.
|
||||
This determines which RADOS namespace within the data pool that the objects
|
||||
will be written to.
|
||||
Empty by default (i.e. default namespace).
|
||||
|
||||
stripe_unit
|
||||
Integer in bytes. The size (in bytes) of a block of data used in the RAID 0 distribution of a file. All stripe units for a file have equal size. The last stripe unit is typically incomplete–i.e. it represents the data at the end of the file as well as unused “space” beyond it up to the end of the fixed stripe unit size.
|
||||
This is an integer. The size (in bytes) of a block of data used in the
|
||||
distribution of a file. All stripe units for a file have equal size. The
|
||||
last stripe unit is typically only partly full of data: it holds file data
|
||||
through EOF as well as padding that fills the balance of the fixed stripe
|
||||
unit size.
|
||||
|
||||
stripe_count
|
||||
Integer. The number of consecutive stripe units that constitute a RAID 0 “stripe” of file data.
|
||||
Integer. The number of consecutive stripe units that constitute a RAID 0
|
||||
“stripe” of file data.
|
||||
|
||||
object_size
|
||||
Integer in bytes. File data is chunked into RADOS objects of this size.
|
||||
Integer. The size of the object in bytes. File data is chunked into RADOS
|
||||
objects of this size.
|
||||
|
||||
.. tip::
|
||||
|
||||
|
@ -14,17 +14,17 @@ abstractions:
|
||||
|
||||
* FS volumes, an abstraction for CephFS file systems
|
||||
|
||||
* FS subvolumes, an abstraction for independent CephFS directory trees
|
||||
|
||||
* FS subvolume groups, an abstraction for a directory level higher than FS
|
||||
subvolumes. Used to effect policies (e.g., :doc:`/cephfs/file-layouts`)
|
||||
across a set of subvolumes
|
||||
|
||||
Some possible use-cases for the export abstractions:
|
||||
* FS subvolumes, an abstraction for independent CephFS directory trees
|
||||
|
||||
Possible use-cases for the export abstractions:
|
||||
|
||||
* FS subvolumes used as Manila shares or CSI volumes
|
||||
|
||||
* FS subvolume groups used as Manila share groups
|
||||
* FS-subvolume groups used as Manila share groups
|
||||
|
||||
Requirements
|
||||
------------
|
||||
@ -46,9 +46,9 @@ Create a volume by running the following command:
|
||||
|
||||
ceph fs volume create <vol_name> [placement]
|
||||
|
||||
This creates a CephFS file system and its data and metadata pools. It can also
|
||||
deploy MDS daemons for the filesystem using a ceph-mgr orchestrator module (for
|
||||
example Rook). See :doc:`/mgr/orchestrator`.
|
||||
This creates a CephFS file system and its data and metadata pools. This command
|
||||
can also deploy MDS daemons for the filesystem using a Ceph Manager orchestrator
|
||||
module (for example Rook). See :doc:`/mgr/orchestrator`.
|
||||
|
||||
``<vol_name>`` is the volume name (an arbitrary string). ``[placement]`` is an
|
||||
optional string that specifies the :ref:`orchestrator-cli-placement-spec` for
|
||||
@ -62,13 +62,13 @@ To remove a volume, run the following command:
|
||||
|
||||
$ ceph fs volume rm <vol_name> [--yes-i-really-mean-it]
|
||||
|
||||
This removes a file system and its data and metadata pools. It also tries to
|
||||
remove MDS daemons using the enabled ceph-mgr orchestrator module.
|
||||
This command removes a file system and its data and metadata pools. It also
|
||||
tries to remove MDS daemons using the enabled Ceph Manager orchestrator module.
|
||||
|
||||
.. note:: After volume deletion, it is recommended to restart `ceph-mgr`
|
||||
if a new file system is created on the same cluster and subvolume interface
|
||||
is being used. Please see https://tracker.ceph.com/issues/49605#note-5
|
||||
for more details.
|
||||
.. note:: After volume deletion, we recommend restarting `ceph-mgr` if a new
|
||||
file system is created on the same cluster and the subvolume interface is
|
||||
being used. See https://tracker.ceph.com/issues/49605#note-5 for more
|
||||
details.
|
||||
|
||||
List volumes by running the following command:
|
||||
|
||||
@ -80,23 +80,24 @@ Rename a volume by running the following command:
|
||||
|
||||
Renaming a volume can be an expensive operation that requires the following:
|
||||
|
||||
- Renaming the orchestrator-managed MDS service to match the <new_vol_name>.
|
||||
This involves launching a MDS service with ``<new_vol_name>`` and bringing
|
||||
down the MDS service with ``<vol_name>``.
|
||||
- Renaming the file system matching ``<vol_name>`` to ``<new_vol_name>``.
|
||||
- Changing the application tags on the data and metadata pools of the file system
|
||||
to ``<new_vol_name>``.
|
||||
- Renaming the orchestrator-managed MDS service to match the
|
||||
``<new_vol_name>``. This involves launching a MDS service with
|
||||
``<new_vol_name>`` and bringing down the MDS service with ``<vol_name>``.
|
||||
- Renaming the file system from ``<vol_name>`` to ``<new_vol_name>``.
|
||||
- Changing the application tags on the data and metadata pools of the file
|
||||
system to ``<new_vol_name>``.
|
||||
- Renaming the metadata and data pools of the file system.
|
||||
|
||||
The CephX IDs that are authorized for ``<vol_name>`` must be reauthorized for
|
||||
``<new_vol_name>``. Any ongoing operations of the clients using these IDs may
|
||||
be disrupted. Ensure that mirroring is disabled on the volume.
|
||||
``<new_vol_name>``. Any ongoing operations of the clients that are using these
|
||||
IDs may be disrupted. Ensure that mirroring is disabled on the volume.
|
||||
|
||||
To fetch the information of a CephFS volume, run the following command:
|
||||
|
||||
$ ceph fs volume info vol_name [--human_readable]
|
||||
|
||||
The ``--human_readable`` flag shows used and available pool capacities in KB/MB/GB.
|
||||
The ``--human_readable`` flag shows used and available pool capacities in
|
||||
KB/MB/GB.
|
||||
|
||||
The output format is JSON and contains fields as follows:
|
||||
|
||||
@ -144,7 +145,7 @@ Create a subvolume group by running the following command:
|
||||
|
||||
The command succeeds even if the subvolume group already exists.
|
||||
|
||||
When creating a subvolume group you can specify its data pool layout (see
|
||||
When you create a subvolume group, you can specify its data pool layout (see
|
||||
:doc:`/cephfs/file-layouts`), uid, gid, file mode in octal numerals, and
|
||||
size in bytes. The size of the subvolume group is specified by setting
|
||||
a quota on it (see :doc:`/cephfs/quota`). By default, the subvolume group
|
||||
@ -156,11 +157,11 @@ Remove a subvolume group by running a command of the following form:
|
||||
$ ceph fs subvolumegroup rm <vol_name> <group_name> [--force]
|
||||
|
||||
The removal of a subvolume group fails if the subvolume group is not empty or
|
||||
is non-existent. The ``--force`` flag allows the non-existent "subvolume group remove
|
||||
command" to succeed.
|
||||
is non-existent. The ``--force`` flag allows the command to succeed when its
|
||||
argument is a non-existent subvolume group.
|
||||
|
||||
|
||||
Fetch the absolute path of a subvolume group by running a command of the following form:
|
||||
Fetch the absolute path of a subvolume group by running a command of the
|
||||
following form:
|
||||
|
||||
$ ceph fs subvolumegroup getpath <vol_name> <group_name>
|
||||
|
||||
@ -171,15 +172,20 @@ List subvolume groups by running a command of the following form:
|
||||
.. note:: Subvolume group snapshot feature is no longer supported in mainline CephFS (existing group
|
||||
snapshots can still be listed and deleted)
|
||||
|
||||
Fetch the metadata of a subvolume group by running a command of the following form:
|
||||
Fetch the metadata of a subvolume group by running a command of the following
|
||||
form:
|
||||
|
||||
$ ceph fs subvolumegroup info <vol_name> <group_name>
|
||||
|
||||
The output format is JSON and contains fields as follows:
|
||||
|
||||
* ``atime``: access time of the subvolume group path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``mtime``: modification time of the subvolume group path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``ctime``: change time of the subvolume group path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``atime``: access time of the subvolume group path in the format ``YYYY-MM-DD
|
||||
HH:MM:SS``
|
||||
* ``mtime``: time of the most recent modification of the subvolume group path
|
||||
in the format
|
||||
``YYYY-MM-DD HH:MM:SS``
|
||||
* ``ctime``: time of the most recent change of the subvolume group path in the
|
||||
format ``YYYY-MM-DD HH:MM:SS``
|
||||
* ``uid``: uid of the subvolume group path
|
||||
* ``gid``: gid of the subvolume group path
|
||||
* ``mode``: mode of the subvolume group path
|
||||
@ -190,37 +196,39 @@ The output format is JSON and contains fields as follows:
|
||||
* ``created_at``: creation time of the subvolume group in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``data_pool``: data pool to which the subvolume group belongs
|
||||
|
||||
Check the presence of any subvolume group by running a command of the following form:
|
||||
Check for the presence of a given subvolume group by running a command of the
|
||||
following form:
|
||||
|
||||
$ ceph fs subvolumegroup exist <vol_name>
|
||||
|
||||
The ``exist`` command outputs:
|
||||
|
||||
* "subvolumegroup exists": if any subvolumegroup is present
|
||||
* "no subvolumegroup exists": if no subvolumegroup is present
|
||||
* ``subvolumegroup exists``: if any subvolumegroup is present
|
||||
* ``no subvolumegroup exists``: if no subvolumegroup is present
|
||||
|
||||
.. note:: This command checks for the presence of custom groups and not
|
||||
presence of the default one. To validate the emptiness of the volume, a
|
||||
subvolumegroup existence check alone is not sufficient. Subvolume existence
|
||||
also needs to be checked as there might be subvolumes in the default group.
|
||||
presence of the default one. A subvolumegroup-existence check alone is not
|
||||
sufficient to validate the emptiness of the volume. Subvolume existence must
|
||||
also be checked, as there might be subvolumes in the default group.
|
||||
|
||||
Resize a subvolume group by running a command of the following form:
|
||||
|
||||
$ ceph fs subvolumegroup resize <vol_name> <group_name> <new_size> [--no_shrink]
|
||||
|
||||
The command resizes the subvolume group quota, using the size specified by
|
||||
This command resizes the subvolume group quota, using the size specified by
|
||||
``new_size``. The ``--no_shrink`` flag prevents the subvolume group from
|
||||
shrinking below the current used size.
|
||||
|
||||
The subvolume group may be resized to an infinite size by passing ``inf`` or
|
||||
``infinite`` as the ``new_size``.
|
||||
|
||||
Remove a snapshot of a subvolume group by running a command of the following form:
|
||||
Remove a snapshot of a subvolume group by running a command of the following
|
||||
form:
|
||||
|
||||
$ ceph fs subvolumegroup snapshot rm <vol_name> <group_name> <snap_name> [--force]
|
||||
|
||||
Supplying the ``--force`` flag allows the command to succeed when it would otherwise
|
||||
fail due to the nonexistence of the snapshot.
|
||||
Supplying the ``--force`` flag allows the command to succeed when it would
|
||||
otherwise fail due to the nonexistence of the snapshot.
|
||||
|
||||
List snapshots of a subvolume group by running a command of the following form:
|
||||
|
||||
@ -230,181 +238,312 @@ List snapshots of a subvolume group by running a command of the following form:
|
||||
FS Subvolumes
|
||||
-------------
|
||||
|
||||
Create a subvolume using:
|
||||
Creating a subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ceph fs subvolume create <vol_name> <subvol_name> [--size <size_in_bytes>] [--group_name <subvol_group_name>] [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>] [--namespace-isolated]
|
||||
Use a command of the following form to create a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume create <vol_name> <subvol_name> [--size <size_in_bytes>] [--group_name <subvol_group_name>] [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>] [--namespace-isolated]
|
||||
|
||||
The command succeeds even if the subvolume already exists.
|
||||
|
||||
When creating a subvolume you can specify its subvolume group, data pool layout,
|
||||
uid, gid, file mode in octal numerals, and size in bytes. The size of the subvolume is
|
||||
specified by setting a quota on it (see :doc:`/cephfs/quota`). The subvolume can be
|
||||
created in a separate RADOS namespace by specifying --namespace-isolated option. By
|
||||
default a subvolume is created within the default subvolume group, and with an octal file
|
||||
mode '755', uid of its subvolume group, gid of its subvolume group, data pool layout of
|
||||
its parent directory and no size limit.
|
||||
When creating a subvolume, you can specify its subvolume group, data pool
|
||||
layout, uid, gid, file mode in octal numerals, and size in bytes. The size of
|
||||
the subvolume is specified by setting a quota on it (see :doc:`/cephfs/quota`).
|
||||
The subvolume can be created in a separate RADOS namespace by specifying the
|
||||
``--namespace-isolated`` option. By default, a subvolume is created within the
|
||||
default subvolume group with an octal file mode of ``755``, a uid of its
|
||||
subvolume group, a gid of its subvolume group, a data pool layout of its parent
|
||||
directory, and no size limit.
|
||||
|
||||
Remove a subvolume using:
|
||||
Removing a subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ceph fs subvolume rm <vol_name> <subvol_name> [--group_name <subvol_group_name>] [--force] [--retain-snapshots]
|
||||
Use a command of the following form to remove a subvolume:
|
||||
|
||||
The command removes the subvolume and its contents. It does this in two steps.
|
||||
First, it moves the subvolume to a trash folder, and then asynchronously purges
|
||||
its contents.
|
||||
.. prompt:: bash #
|
||||
|
||||
The removal of a subvolume fails if it has snapshots, or is non-existent.
|
||||
'--force' flag allows the non-existent subvolume remove command to succeed.
|
||||
ceph fs subvolume rm <vol_name> <subvol_name> [--group_name <subvol_group_name>] [--force] [--retain-snapshots]
|
||||
|
||||
A subvolume can be removed retaining existing snapshots of the subvolume using the
|
||||
'--retain-snapshots' option. If snapshots are retained, the subvolume is considered
|
||||
empty for all operations not involving the retained snapshots.
|
||||
This command removes the subvolume and its contents. This is done in two steps.
|
||||
First, the subvolume is moved to a trash folder. Second, the contents of that
|
||||
trash folder are purged asynchronously.
|
||||
|
||||
.. note:: Snapshot retained subvolumes can be recreated using 'ceph fs subvolume create'
|
||||
Subvolume removal fails if the subvolume has snapshots or is non-existent. The
|
||||
``--force`` flag allows the "non-existent subvolume remove" command to succeed.
|
||||
|
||||
.. note:: Retained snapshots can be used as a clone source to recreate the subvolume, or clone to a newer subvolume.
|
||||
To remove a subvolume while retaining snapshots of the subvolume, use the
|
||||
``--retain-snapshots`` flag. If snapshots associated with a given subvolume are
|
||||
retained, then the subvolume is considered empty for all operations that do not
|
||||
involve the retained snapshots.
|
||||
|
||||
Resize a subvolume using:
|
||||
.. note:: Snapshot-retained subvolumes can be recreated using ``ceph fs
|
||||
subvolume create``.
|
||||
|
||||
$ ceph fs subvolume resize <vol_name> <subvol_name> <new_size> [--group_name <subvol_group_name>] [--no_shrink]
|
||||
.. note:: Retained snapshots can be used as clone sources for recreating the
|
||||
subvolume or for cloning to a newer subvolume.
|
||||
|
||||
The command resizes the subvolume quota using the size specified by ``new_size``.
|
||||
The `--no_shrink`` flag prevents the subvolume from shrinking below the current used size of the subvolume.
|
||||
Resizing a subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The subvolume can be resized to an unlimited (but sparse) logical size by passing ``inf`` or ``infinite`` as `` new_size``.
|
||||
Use a command of the following form to resize a subvolume:
|
||||
|
||||
Authorize cephx auth IDs, the read/read-write access to fs subvolumes:
|
||||
.. prompt:: bash #
|
||||
|
||||
$ ceph fs subvolume authorize <vol_name> <sub_name> <auth_id> [--group_name=<group_name>] [--access_level=<access_level>]
|
||||
ceph fs subvolume resize <vol_name> <subvol_name> <new_size> [--group_name <subvol_group_name>] [--no_shrink]
|
||||
|
||||
The ``access_level`` takes ``r`` or ``rw`` as value.
|
||||
This command resizes the subvolume quota, using the size specified by
|
||||
``new_size``. The ``--no_shrink`` flag prevents the subvolume from shrinking
|
||||
below the current "used size" of the subvolume.
|
||||
|
||||
Deauthorize cephx auth IDs, the read/read-write access to fs subvolumes:
|
||||
The subvolume can be resized to an unlimited (but sparse) logical size by
|
||||
passing ``inf`` or ``infinite`` as ``<new_size>``.
|
||||
|
||||
$ ceph fs subvolume deauthorize <vol_name> <sub_name> <auth_id> [--group_name=<group_name>]
|
||||
Authorizing CephX auth IDs
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
List cephx auth IDs authorized to access fs subvolume:
|
||||
Use a command of the following form to authorize CephX auth IDs. This provides
|
||||
the read/read-write access to file system subvolumes:
|
||||
|
||||
$ ceph fs subvolume authorized_list <vol_name> <sub_name> [--group_name=<group_name>]
|
||||
.. prompt:: bash #
|
||||
|
||||
Evict fs clients based on auth ID and subvolume mounted:
|
||||
ceph fs subvolume authorize <vol_name> <sub_name> <auth_id> [--group_name=<group_name>] [--access_level=<access_level>]
|
||||
|
||||
$ ceph fs subvolume evict <vol_name> <sub_name> <auth_id> [--group_name=<group_name>]
|
||||
The ``<access_level>`` option takes either ``r`` or ``rw`` as a value.
|
||||
|
||||
Fetch the absolute path of a subvolume using:
|
||||
De-authorizing CephX auth IDs
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ceph fs subvolume getpath <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
Use a command of the following form to deauthorize CephX auth IDs. This removes
|
||||
the read/read-write access to file system subvolumes:
|
||||
|
||||
Fetch the information of a subvolume using:
|
||||
.. prompt:: bash #
|
||||
|
||||
$ ceph fs subvolume info <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
ceph fs subvolume deauthorize <vol_name> <sub_name> <auth_id> [--group_name=<group_name>]
|
||||
|
||||
The output format is JSON and contains fields as follows.
|
||||
Listing CephX auth IDs
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* ``atime``: access time of the subvolume path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``mtime``: modification time of the subvolume path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``ctime``: change time of the subvolume path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
Use a command of the following form to list CephX auth IDs authorized to access
|
||||
the file system subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume authorized_list <vol_name> <sub_name> [--group_name=<group_name>]
|
||||
|
||||
Evicting File System Clients (Auth ID)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to evict file system clients based on the
|
||||
auth ID and the subvolume mounted:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume evict <vol_name> <sub_name> <auth_id> [--group_name=<group_name>]
|
||||
|
||||
Fetching the Absolute Path of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to fetch the absolute path of a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume getpath <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Fetching a Subvolume's Information
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to fetch a subvolume's information:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume info <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
The output format is JSON and contains the following fields.
|
||||
|
||||
* ``atime``: access time of the subvolume path in the format ``YYYY-MM-DD
|
||||
HH:MM:SS``
|
||||
* ``mtime``: modification time of the subvolume path in the format ``YYYY-MM-DD
|
||||
HH:MM:SS``
|
||||
* ``ctime``: change time of the subvolume path in the format ``YYYY-MM-DD
|
||||
HH:MM:SS``
|
||||
* ``uid``: uid of the subvolume path
|
||||
* ``gid``: gid of the subvolume path
|
||||
* ``mode``: mode of the subvolume path
|
||||
* ``mon_addrs``: list of monitor addresses
|
||||
* ``bytes_pcent``: quota used in percentage if quota is set, else displays ``undefined``
|
||||
* ``bytes_quota``: quota size in bytes if quota is set, else displays ``infinite``
|
||||
* ``bytes_pcent``: quota used in percentage if quota is set; else displays
|
||||
``undefined``
|
||||
* ``bytes_quota``: quota size in bytes if quota is set; else displays
|
||||
``infinite``
|
||||
* ``bytes_used``: current used size of the subvolume in bytes
|
||||
* ``created_at``: creation time of the subvolume in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ``created_at``: creation time of the subvolume in the format ``YYYY-MM-DD
|
||||
HH:MM:SS``
|
||||
* ``data_pool``: data pool to which the subvolume belongs
|
||||
* ``path``: absolute path of a subvolume
|
||||
* ``type``: subvolume type indicating whether it's clone or subvolume
|
||||
* ``type``: subvolume type, indicating whether it is ``clone`` or ``subvolume``
|
||||
* ``pool_namespace``: RADOS namespace of the subvolume
|
||||
* ``features``: features supported by the subvolume
|
||||
* ``state``: current state of the subvolume
|
||||
|
||||
If a subvolume has been removed retaining its snapshots, the output contains only fields as follows.
|
||||
If a subvolume has been removed but its snapshots have been retained, the
|
||||
output contains only the following fields.
|
||||
|
||||
* ``type``: subvolume type indicating whether it's clone or subvolume
|
||||
* ``type``: subvolume type indicating whether it is ``clone`` or ``subvolume``
|
||||
* ``features``: features supported by the subvolume
|
||||
* ``state``: current state of the subvolume
|
||||
|
||||
A subvolume's ``features`` are based on the internal version of the subvolume and are
|
||||
a subset of the following:
|
||||
A subvolume's ``features`` are based on the internal version of the subvolume
|
||||
and are a subset of the following:
|
||||
|
||||
* ``snapshot-clone``: supports cloning using a subvolumes snapshot as the source
|
||||
* ``snapshot-autoprotect``: supports automatically protecting snapshots, that are active clone sources, from deletion
|
||||
* ``snapshot-retention``: supports removing subvolume contents, retaining any existing snapshots
|
||||
* ``snapshot-clone``: supports cloning using a subvolume's snapshot as the
|
||||
source
|
||||
* ``snapshot-autoprotect``: supports automatically protecting snapshots from
|
||||
deletion if they are active clone sources
|
||||
* ``snapshot-retention``: supports removing subvolume contents, retaining any
|
||||
existing snapshots
|
||||
|
||||
A subvolume's ``state`` is based on the current state of the subvolume and contains one of the following values.
|
||||
A subvolume's ``state`` is based on the current state of the subvolume and
|
||||
contains one of the following values.
|
||||
|
||||
* ``complete``: subvolume is ready for all operations
|
||||
* ``snapshot-retained``: subvolume is removed but its snapshots are retained
|
||||
|
||||
List subvolumes using:
|
||||
Listing Subvolumes
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ceph fs subvolume ls <vol_name> [--group_name <subvol_group_name>]
|
||||
Use a command of the following form to list subvolumes:
|
||||
|
||||
.. note:: subvolumes that are removed but have snapshots retained, are also listed.
|
||||
.. prompt:: bash #
|
||||
|
||||
Check the presence of any subvolume using:
|
||||
ceph fs subvolume ls <vol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
$ ceph fs subvolume exist <vol_name> [--group_name <subvol_group_name>]
|
||||
.. note:: Subvolumes that have been removed but have snapshots retained, are
|
||||
also listed.
|
||||
|
||||
Checking for the Presence of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to check for the presence of a given
|
||||
subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume exist <vol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
These are the possible results of the ``exist`` command:
|
||||
|
||||
* ``subvolume exists``: if any subvolume of given group_name is present
|
||||
* ``no subvolume exists``: if no subvolume of given group_name is present
|
||||
* ``subvolume exists``: if any subvolume of given ``group_name`` is present
|
||||
* ``no subvolume exists``: if no subvolume of given ``group_name`` is present
|
||||
|
||||
Set custom metadata on the subvolume as a key-value pair using:
|
||||
Setting Custom Metadata On a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ceph fs subvolume metadata set <vol_name> <subvol_name> <key_name> <value> [--group_name <subvol_group_name>]
|
||||
Use a command of the following form to set custom metadata on the subvolume as
|
||||
a key-value pair:
|
||||
|
||||
.. note:: If the key_name already exists then the old value will get replaced by the new value.
|
||||
.. prompt:: bash #
|
||||
|
||||
.. note:: key_name and value should be a string of ASCII characters (as specified in python's string.printable). key_name is case-insensitive and always stored in lower case.
|
||||
ceph fs subvolume metadata set <vol_name> <subvol_name> <key_name> <value> [--group_name <subvol_group_name>]
|
||||
|
||||
.. note:: Custom metadata on a subvolume is not preserved when snapshotting the subvolume, and hence, is also not preserved when cloning the subvolume snapshot.
|
||||
.. note:: If the key_name already exists then the old value will get replaced
|
||||
by the new value.
|
||||
|
||||
Get custom metadata set on the subvolume using the metadata key:
|
||||
.. note:: ``key_name`` and ``value`` should be a string of ASCII characters (as
|
||||
specified in Python's ``string.printable``). ``key_name`` is
|
||||
case-insensitive and always stored in lower case.
|
||||
|
||||
$ ceph fs subvolume metadata get <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>]
|
||||
.. note:: Custom metadata on a subvolume is not preserved when snapshotting the
|
||||
subvolume, and is therefore also not preserved when cloning the subvolume
|
||||
snapshot.
|
||||
|
||||
List custom metadata (key-value pairs) set on the subvolume using:
|
||||
Getting The Custom Metadata Set of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ceph fs subvolume metadata ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
Use a command of the following form to get the custom metadata set on the
|
||||
subvolume using the metadata key:
|
||||
|
||||
Remove custom metadata set on the subvolume using the metadata key:
|
||||
.. prompt:: bash #
|
||||
|
||||
$ ceph fs subvolume metadata rm <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>] [--force]
|
||||
ceph fs subvolume metadata get <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Using the ``--force`` flag allows the command to succeed that would otherwise
|
||||
fail if the metadata key did not exist.
|
||||
Listing The Custom Metadata Set of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Create a snapshot of a subvolume using:
|
||||
Use a command of the following form to list custom metadata (key-value pairs)
|
||||
set on the subvolume:
|
||||
|
||||
$ ceph fs subvolume snapshot create <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
.. prompt:: bash #
|
||||
|
||||
Remove a snapshot of a subvolume using:
|
||||
ceph fs subvolume metadata ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
$ ceph fs subvolume snapshot rm <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>] [--force]
|
||||
Removing a Custom Metadata Set from a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Using the ``--force`` flag allows the command to succeed that would otherwise
|
||||
fail if the snapshot did not exist.
|
||||
Use a command of the following form to remove custom metadata set on the
|
||||
subvolume using the metadata key:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume metadata rm <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>] [--force]
|
||||
|
||||
Using the ``--force`` flag allows the command to succeed when it would
|
||||
otherwise fail (if the metadata key did not exist).
|
||||
|
||||
Creating a Snapshot of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to create a snapshot of a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot create <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
|
||||
|
||||
Removing a Snapshot of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to remove a snapshot of a subvolume:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot rm <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>] [--force]
|
||||
|
||||
Using the ``--force`` flag allows the command to succeed when it would
|
||||
otherwise fail (if the snapshot did not exist).
|
||||
|
||||
.. note:: if the last snapshot within a snapshot retained subvolume is removed, the subvolume is also removed
|
||||
|
||||
List snapshots of a subvolume using:
|
||||
Listing the Snapshots of a Subvolume
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ceph fs subvolume snapshot ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
Use a command of the following from to list the snapshots of a subvolume:
|
||||
|
||||
Fetch the information of a snapshot using:
|
||||
.. prompt:: bash #
|
||||
|
||||
$ ceph fs subvolume snapshot info <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
ceph fs subvolume snapshot ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
The output format is json and contains fields as follows.
|
||||
Fetching a Snapshot's Information
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* ``created_at``: creation time of the snapshot in the format "YYYY-MM-DD HH:MM:SS:ffffff"
|
||||
Use a command of the following form to fetch a snapshot's information:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot info <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
|
||||
The output format is JSON and contains the following fields.
|
||||
|
||||
* ``created_at``: creation time of the snapshot in the format ``YYYY-MM-DD
|
||||
HH:MM:SS:ffffff``
|
||||
* ``data_pool``: data pool to which the snapshot belongs
|
||||
* ``has_pending_clones``: ``yes`` if snapshot clone is in progress, otherwise ``no``
|
||||
* ``pending_clones``: list of in-progress or pending clones and their target group if any exist, otherwise this field is not shown
|
||||
* ``orphan_clones_count``: count of orphan clones if the snapshot has orphan clones, otherwise this field is not shown
|
||||
* ``has_pending_clones``: ``yes`` if snapshot clone is in progress, otherwise
|
||||
``no``
|
||||
* ``pending_clones``: list of in-progress or pending clones and their target
|
||||
groups if any exist; otherwise this field is not shown
|
||||
* ``orphan_clones_count``: count of orphan clones if the snapshot has orphan
|
||||
clones, otherwise this field is not shown
|
||||
|
||||
Sample output when snapshot clones are in progress or pending::
|
||||
|
||||
@ -437,42 +576,74 @@ Sample output when no snapshot clone is in progress or pending::
|
||||
"has_pending_clones": "no"
|
||||
}
|
||||
|
||||
Set custom key-value metadata on the snapshot by running:
|
||||
Setting Custom Key-Value Pair Metadata on a Snapshot
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ceph fs subvolume snapshot metadata set <vol_name> <subvol_name> <snap_name> <key_name> <value> [--group_name <subvol_group_name>]
|
||||
Use a command of the following form to set custom key-value metadata on the
|
||||
snapshot:
|
||||
|
||||
.. note:: If the key_name already exists then the old value will get replaced by the new value.
|
||||
.. prompt:: bash #
|
||||
|
||||
.. note:: The key_name and value should be a strings of ASCII characters (as specified in Python's ``string.printable``). The key_name is case-insensitive and always stored in lowercase.
|
||||
ceph fs subvolume snapshot metadata set <vol_name> <subvol_name> <snap_name> <key_name> <value> [--group_name <subvol_group_name>]
|
||||
|
||||
.. note:: Custom metadata on a snapshot is not preserved when snapshotting the subvolume, and hence is also not preserved when cloning the subvolume snapshot.
|
||||
.. note:: If the ``key_name`` already exists then the old value will get replaced
|
||||
by the new value.
|
||||
|
||||
Get custom metadata set on the snapshot using the metadata key:
|
||||
.. note:: The ``key_name`` and value should be a strings of ASCII characters
|
||||
(as specified in Python's ``string.printable``). The ``key_name`` is
|
||||
case-insensitive and always stored in lowercase.
|
||||
|
||||
$ ceph fs subvolume snapshot metadata get <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>]
|
||||
.. note:: Custom metadata on a snapshot is not preserved when snapshotting the
|
||||
subvolume, and is therefore not preserved when cloning the subvolume
|
||||
snapshot.
|
||||
|
||||
List custom metadata (key-value pairs) set on the snapshot using:
|
||||
Getting Custom Metadata That Has Been Set on a Snapshot
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ceph fs subvolume snapshot metadata ls <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
Use a command of the following form to get custom metadata that has been set on
|
||||
the snapshot using the metadata key:
|
||||
|
||||
Remove custom metadata set on the snapshot using the metadata key:
|
||||
.. prompt:: bash #
|
||||
|
||||
$ ceph fs subvolume snapshot metadata rm <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>] [--force]
|
||||
ceph fs subvolume snapshot metadata get <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Using the ``--force`` flag allows the command to succeed that would otherwise
|
||||
fail if the metadata key did not exist.
|
||||
Listing Custom Metadata that has been Set on a Snapshot
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following from to list custom metadata (key-value pairs)
|
||||
set on the snapshot:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot metadata ls <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Removing Custom Metadata from a Snapshot
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use a command of the following form to remove custom metadata set on the
|
||||
snapshot using the metadata key:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot metadata rm <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>] [--force]
|
||||
|
||||
Using the ``--force`` flag allows the command to succeed when it would otherwise
|
||||
fail (if the metadata key did not exist).
|
||||
|
||||
Cloning Snapshots
|
||||
-----------------
|
||||
|
||||
Subvolumes can be created by cloning subvolume snapshots. Cloning is an asynchronous operation that copies
|
||||
data from a snapshot to a subvolume. Due to this bulk copying, cloning is inefficient for very large
|
||||
data sets.
|
||||
Subvolumes can be created by cloning subvolume snapshots. Cloning is an
|
||||
asynchronous operation that copies data from a snapshot to a subvolume. Because
|
||||
cloning is an operation that involves bulk copying, it is slow for
|
||||
very large data sets.
|
||||
|
||||
.. note:: Removing a snapshot (source subvolume) would fail if there are pending or in progress clone operations.
|
||||
.. note:: Removing a snapshot (source subvolume) fails when there are
|
||||
pending or in-progress clone operations.
|
||||
|
||||
Protecting snapshots prior to cloning was a prerequisite in the Nautilus release, and the commands to protect/unprotect
|
||||
snapshots were introduced for this purpose. This prerequisite, and hence the commands to protect/unprotect, is being
|
||||
Protecting snapshots prior to cloning was a prerequisite in the Nautilus
|
||||
release. Commands that made possible the protection and unprotection of
|
||||
snapshots were introduced for this purpose. This prerequisite is being
|
||||
deprecated and may be removed from a future release.
|
||||
|
||||
The commands being deprecated are:
|
||||
@ -486,29 +657,46 @@ The commands being deprecated are:
|
||||
|
||||
.. note:: Use the ``subvolume info`` command to fetch subvolume metadata regarding supported ``features`` to help decide if protect/unprotect of snapshots is required, based on the availability of the ``snapshot-autoprotect`` feature.
|
||||
|
||||
To initiate a clone operation use:
|
||||
Run a command of the following form to initiate a clone operation:
|
||||
|
||||
$ ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name>
|
||||
.. prompt:: bash #
|
||||
|
||||
If a snapshot (source subvolume) is a part of non-default group, the group name needs to be specified:
|
||||
ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name>
|
||||
|
||||
$ ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --group_name <subvol_group_name>
|
||||
Run a command of the following form when a snapshot (source subvolume) is a
|
||||
part of non-default group. Note that the group name needs to be specified:
|
||||
|
||||
Cloned subvolumes can be a part of a different group than the source snapshot (by default, cloned subvolumes are created in default group). To clone to a particular group use:
|
||||
.. prompt:: bash #
|
||||
|
||||
$ ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --target_group_name <subvol_group_name>
|
||||
ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --group_name <subvol_group_name>
|
||||
|
||||
Similar to specifying a pool layout when creating a subvolume, pool layout can be specified when creating a cloned subvolume. To create a cloned subvolume with a specific pool layout use:
|
||||
Cloned subvolumes can be a part of a different group than the source snapshot
|
||||
(by default, cloned subvolumes are created in default group). Run a command of
|
||||
the following form to clone to a particular group use:
|
||||
|
||||
$ ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --pool_layout <pool_layout>
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --target_group_name <subvol_group_name>
|
||||
|
||||
Pool layout can be specified when creating a cloned subvolume in a way that is
|
||||
similar to specifying a pool layout when creating a subvolume. Run a command of
|
||||
the following form to create a cloned subvolume with a specific pool layout:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot clone <vol_name> <subvol_name> <snap_name> <target_subvol_name> --pool_layout <pool_layout>
|
||||
|
||||
Configure the maximum number of concurrent clones. The default is 4:
|
||||
|
||||
$ ceph config set mgr mgr/volumes/max_concurrent_clones <value>
|
||||
.. prompt:: bash #
|
||||
|
||||
To check the status of a clone operation use:
|
||||
ceph config set mgr mgr/volumes/max_concurrent_clones <value>
|
||||
|
||||
$ ceph fs clone status <vol_name> <clone_name> [--group_name <group_name>]
|
||||
Run a command of the following form to check the status of a clone operation:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs clone status <vol_name> <clone_name> [--group_name <group_name>]
|
||||
|
||||
A clone can be in one of the following states:
|
||||
|
||||
@ -560,11 +748,14 @@ Here is an example of a ``failed`` clone::
|
||||
}
|
||||
}
|
||||
|
||||
(NOTE: since ``subvol1`` is in the default group, the ``source`` object's ``clone status`` does not include the group name)
|
||||
.. note:: Because ``subvol1`` is in the default group, the ``source`` object's
|
||||
``clone status`` does not include the group name)
|
||||
|
||||
.. note:: Cloned subvolumes are accessible only after the clone operation has successfully completed.
|
||||
.. note:: Cloned subvolumes are accessible only after the clone operation has
|
||||
successfully completed.
|
||||
|
||||
After a successful clone operation, ``clone status`` will look like the below::
|
||||
After a successful clone operation, ``clone status`` will look like the
|
||||
following::
|
||||
|
||||
$ ceph fs clone status cephfs clone1
|
||||
{
|
||||
@ -575,35 +766,79 @@ After a successful clone operation, ``clone status`` will look like the below::
|
||||
|
||||
If a clone operation is unsuccessful, the ``state`` value will be ``failed``.
|
||||
|
||||
To retry a failed clone operation, the incomplete clone must be deleted and the clone operation must be issued again.
|
||||
To delete a partial clone use::
|
||||
To retry a failed clone operation, the incomplete clone must be deleted and the
|
||||
clone operation must be issued again.
|
||||
|
||||
$ ceph fs subvolume rm <vol_name> <clone_name> [--group_name <group_name>] --force
|
||||
Run a command of the following form to delete a partial clone:
|
||||
|
||||
.. note:: Cloning synchronizes only directories, regular files and symbolic links. Inode timestamps (access and
|
||||
modification times) are synchronized up to seconds granularity.
|
||||
.. prompt:: bash #
|
||||
|
||||
An ``in-progress`` or a ``pending`` clone operation may be canceled. To cancel a clone operation use the ``clone cancel`` command:
|
||||
ceph fs subvolume rm <vol_name> <clone_name> [--group_name <group_name>] --force
|
||||
|
||||
$ ceph fs clone cancel <vol_name> <clone_name> [--group_name <group_name>]
|
||||
.. note:: Cloning synchronizes only directories, regular files and symbolic
|
||||
links. inode timestamps (access and modification times) are synchronized up
|
||||
to a second's granularity.
|
||||
|
||||
On successful cancellation, the cloned subvolume is moved to the ``canceled`` state::
|
||||
An ``in-progress`` or a ``pending`` clone operation may be canceled. To cancel
|
||||
a clone operation use the ``clone cancel`` command:
|
||||
|
||||
$ ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1
|
||||
$ ceph fs clone cancel cephfs clone1
|
||||
$ ceph fs clone status cephfs clone1
|
||||
{
|
||||
"status": {
|
||||
"state": "canceled",
|
||||
"source": {
|
||||
"volume": "cephfs",
|
||||
"subvolume": "subvol1",
|
||||
"snapshot": "snap1"
|
||||
}
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs clone cancel <vol_name> <clone_name> [--group_name <group_name>]
|
||||
|
||||
On successful cancellation, the cloned subvolume is moved to the ``canceled``
|
||||
state:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1
|
||||
ceph fs clone cancel cephfs clone1
|
||||
ceph fs clone status cephfs clone1
|
||||
|
||||
::
|
||||
|
||||
{
|
||||
"status": {
|
||||
"state": "canceled",
|
||||
"source": {
|
||||
"volume": "cephfs",
|
||||
"subvolume": "subvol1",
|
||||
"snapshot": "snap1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.. note:: The canceled cloned may be deleted by supplying the ``--force`` option to the `fs subvolume rm` command.
|
||||
.. note:: Delete the canceled cloned by supplying the ``--force`` option to the
|
||||
``fs subvolume rm`` command.
|
||||
|
||||
Configurables
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
Configure the maximum number of concurrent clone operations. The default is 4:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/volumes/max_concurrent_clones <value>
|
||||
|
||||
Configure the ``snapshot_clone_no_wait`` option:
|
||||
|
||||
The ``snapshot_clone_no_wait`` config option is used to reject clone-creation
|
||||
requests when cloner threads (which can be configured using the above options,
|
||||
for example, ``max_concurrent_clones``) are not available. It is enabled by
|
||||
default. This means that the value is set to ``True``, but it can be configured
|
||||
by using the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/volumes/snapshot_clone_no_wait <bool>
|
||||
|
||||
The current value of ``snapshot_clone_no_wait`` can be fetched by running the
|
||||
following command.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config get mgr mgr/volumes/snapshot_clone_no_wait
|
||||
|
||||
|
||||
.. _subvol-pinning:
|
||||
@ -616,27 +851,51 @@ to policies. This can distribute load across MDS ranks in predictable and
|
||||
stable ways. Review :ref:`cephfs-pinning` and :ref:`cephfs-ephemeral-pinning`
|
||||
for details on how pinning works.
|
||||
|
||||
Pinning is configured by:
|
||||
Run a command of the following form to configure pinning for subvolume groups:
|
||||
|
||||
$ ceph fs subvolumegroup pin <vol_name> <group_name> <pin_type> <pin_setting>
|
||||
|
||||
or for subvolumes:
|
||||
Run a command of the following form to configure pinning for subvolumes:
|
||||
|
||||
$ ceph fs subvolume pin <vol_name> <group_name> <pin_type> <pin_setting>
|
||||
|
||||
Typically you will want to set subvolume group pins. The ``pin_type`` may be
|
||||
one of ``export``, ``distributed``, or ``random``. The ``pin_setting``
|
||||
corresponds to the extended attributed "value" as in the pinning documentation
|
||||
referenced above.
|
||||
Under most circumstances, you will want to set subvolume group pins. The
|
||||
``pin_type`` may be ``export``, ``distributed``, or ``random``. The
|
||||
``pin_setting`` corresponds to the extended attributed "value" as in the
|
||||
pinning documentation referenced above.
|
||||
|
||||
So, for example, setting a distributed pinning strategy on a subvolume group:
|
||||
Here is an example of setting a distributed pinning strategy on a subvolume
|
||||
group:
|
||||
|
||||
$ ceph fs subvolumegroup pin cephfilesystem-a csi distributed 1
|
||||
|
||||
Will enable distributed subtree partitioning policy for the "csi" subvolume
|
||||
group. This will cause every subvolume within the group to be automatically
|
||||
This enables distributed subtree partitioning policy for the "csi" subvolume
|
||||
group. This will cause every subvolume within the group to be automatically
|
||||
pinned to one of the available ranks on the file system.
|
||||
|
||||
|
||||
|
||||
.. _disabling-volumes-plugin:
|
||||
|
||||
Disabling Volumes Plugin
|
||||
------------------------
|
||||
By default the volumes plugin is enabled and set to ``always on``. However, in
|
||||
certain cases it might be appropriate to disable it. For example, when a CephFS
|
||||
is in a degraded state, the volumes plugin commands may accumulate in MGR
|
||||
instead of getting served. Which eventually causes policy throttles to kick in
|
||||
and the MGR becomes unresponsive.
|
||||
|
||||
In this event, volumes plugin can be disabled even though it is an
|
||||
``always on`` module in MGR. To do so, run ``ceph mgr module disable volumes
|
||||
--yes-i-really-mean-it``. Do note that this command will disable operations
|
||||
and remove commands of volumes plugin since it will disable all CephFS
|
||||
services on the Ceph cluster accessed through this plugin.
|
||||
|
||||
Before resorting to a measure as drastic as this, it is a good idea to try less
|
||||
drastic measures and then assess if the file system experience has improved due
|
||||
to it. One example of such less drastic measure is to disable asynchronous
|
||||
threads launched by volumes plugins for cloning and purging trash.
|
||||
|
||||
|
||||
.. _manila: https://github.com/openstack/manila
|
||||
.. _CSI: https://github.com/ceph/ceph-csi
|
||||
|
@ -240,3 +240,15 @@ other daemons, please see :ref:`health-checks`.
|
||||
Description
|
||||
All MDS ranks are unavailable resulting in the file system to be completely
|
||||
offline.
|
||||
|
||||
``MDS_CLIENTS_LAGGY``
|
||||
----------------------------
|
||||
Message
|
||||
"Client *ID* is laggy; not evicted because some OSD(s) is/are laggy"
|
||||
|
||||
Description
|
||||
If OSD(s) is laggy (due to certain conditions like network cut-off, etc)
|
||||
then it might make clients laggy(session might get idle or cannot flush
|
||||
dirty data for cap revokes). If ``defer_client_eviction_on_laggy_osds`` is
|
||||
set to true (default true), client eviction will not take place and thus
|
||||
this health warning will be generated.
|
||||
|
@ -10,14 +10,13 @@ a state-of-the-art, multi-use, highly available, and performant file store for
|
||||
a variety of applications, including traditional use-cases like shared home
|
||||
directories, HPC scratch space, and distributed workflow shared storage.
|
||||
|
||||
CephFS achieves these goals through the use of some novel architectural
|
||||
choices. Notably, file metadata is stored in a separate RADOS pool from file
|
||||
data and served via a resizable cluster of *Metadata Servers*, or **MDS**,
|
||||
which may scale to support higher throughput metadata workloads. Clients of
|
||||
the file system have direct access to RADOS for reading and writing file data
|
||||
blocks. For this reason, workloads may linearly scale with the size of the
|
||||
underlying RADOS object store; that is, there is no gateway or broker mediating
|
||||
data I/O for clients.
|
||||
CephFS achieves these goals through novel architectural choices. Notably, file
|
||||
metadata is stored in a RADOS pool separate from file data and is served via a
|
||||
resizable cluster of *Metadata Servers*, or **MDS**\es, which scale to support
|
||||
higher-throughput workloads. Clients of the file system have direct access to
|
||||
RADOS for reading and writing file data blocks. This makes it possible for
|
||||
workloads to scale linearly with the size of the underlying RADOS object store.
|
||||
There is no gateway or broker that mediates data I/O for clients.
|
||||
|
||||
Access to data is coordinated through the cluster of MDS which serve as
|
||||
authorities for the state of the distributed metadata cache cooperatively
|
||||
|
@ -6,20 +6,25 @@ Mantle
|
||||
Mantle is for research and development of metadata balancer algorithms,
|
||||
not for use on production CephFS clusters.
|
||||
|
||||
Multiple, active MDSs can migrate directories to balance metadata load. The
|
||||
policies for when, where, and how much to migrate are hard-coded into the
|
||||
metadata balancing module. Mantle is a programmable metadata balancer built
|
||||
into the MDS. The idea is to protect the mechanisms for balancing load
|
||||
(migration, replication, fragmentation) but stub out the balancing policies
|
||||
using Lua. Mantle is based on [1] but the current implementation does *NOT*
|
||||
have the following features from that paper:
|
||||
Mantle is a programmable metadata balancer that is built into the MDS.
|
||||
|
||||
By default (without Mantle), multiple, active MDSs can migrate directories to
|
||||
balance metadata load. The policies for when, where, and how much to migrate
|
||||
are hard-coded into the metadata balancing module.
|
||||
|
||||
Mantle works by protecting the mechanisms for balancing load (migration,
|
||||
replication, fragmentation) while suppressing the balancing policies using Lua.
|
||||
Mantle is based on [1] but the current implementation does *NOT* have the
|
||||
following features from that paper:
|
||||
|
||||
1. Balancing API: in the paper, the user fills in when, where, how much, and
|
||||
load calculation policies; currently, Mantle only requires that Lua policies
|
||||
return a table of target loads (e.g., how much load to send to each MDS)
|
||||
2. "How much" hook: in the paper, there was a hook that let the user control
|
||||
the fragment selector policy; currently, Mantle does not have this hook
|
||||
3. Instantaneous CPU utilization as a metric
|
||||
load calculation policies. Currently, Mantle requires only that Lua policies
|
||||
return a table of target loads (for example, how much load to send to each
|
||||
MDS)
|
||||
2. The "how much" hook: in the paper, there was a hook that allowed the user to
|
||||
control the "fragment selector policy". Currently, Mantle does not have this
|
||||
hook.
|
||||
3. "Instantaneous CPU utilization" as a metric.
|
||||
|
||||
[1] Supercomputing '15 Paper:
|
||||
http://sc15.supercomputing.org/schedule/event_detail-evid=pap168.html
|
||||
@ -30,10 +35,11 @@ Quickstart with vstart
|
||||
.. warning::
|
||||
|
||||
Developing balancers with vstart is difficult because running all daemons
|
||||
and clients on one node can overload the system. Let it run for a while, even
|
||||
though you will likely see a bunch of lost heartbeat and laggy MDS warnings.
|
||||
Most of the time this guide will work but sometimes all MDSs lock up and you
|
||||
cannot actually see them spill. It is much better to run this on a cluster.
|
||||
and clients on one node can overload the system. Let the system run for a
|
||||
while, even though there will likely be many lost heartbeat warnings and
|
||||
many laggy MDS warnings. In most cases this guide will work, but sometimes
|
||||
when developing with vstart all MDSs will lock up and you cannot actually
|
||||
see them spill. It is better to run this on a multi-node cluster.
|
||||
|
||||
As a prerequisite, we assume you have installed `mdtest
|
||||
<https://sourceforge.net/projects/mdtest/>`_ or pulled the `Docker image
|
||||
|
@ -1,11 +1,10 @@
|
||||
Mount CephFS: Prerequisites
|
||||
===========================
|
||||
|
||||
You can use CephFS by mounting it to your local filesystem or by using
|
||||
`cephfs-shell`_. Mounting CephFS requires superuser privileges to trim
|
||||
dentries by issuing a remount of itself. CephFS can be mounted
|
||||
`using kernel`_ as well as `using FUSE`_. Both have their own
|
||||
advantages. Read the following section to understand more about both of
|
||||
You can use CephFS by mounting the file system on a machine or by using
|
||||
:ref:`cephfs-shell <cephfs-shell>`. A system mount can be performed using `the
|
||||
kernel driver`_ as well as `the FUSE driver`_. Both have their own advantages
|
||||
and disadvantages. Read the following section to understand more about both of
|
||||
these ways to mount CephFS.
|
||||
|
||||
For Windows CephFS mounts, please check the `ceph-dokan`_ page.
|
||||
@ -69,7 +68,7 @@ Ceph MON resides.
|
||||
individually, please check respective mount documents.
|
||||
|
||||
.. _Client Authentication: ../client-auth
|
||||
.. _cephfs-shell: ../cephfs-shell
|
||||
.. _using kernel: ../mount-using-kernel-driver
|
||||
.. _using FUSE: ../mount-using-fuse
|
||||
.. _cephfs-shell: ..cephfs-shell
|
||||
.. _the kernel driver: ../mount-using-kernel-driver
|
||||
.. _the FUSE driver: ../mount-using-fuse
|
||||
.. _ceph-dokan: ../ceph-dokan
|
||||
|
@ -12,14 +12,19 @@ Prerequisites
|
||||
Go through the prerequisites required by both, kernel as well as FUSE mounts,
|
||||
in `Mount CephFS: Prerequisites`_ page.
|
||||
|
||||
.. note:: Mounting CephFS using FUSE requires superuser privileges to trim dentries
|
||||
by issuing a remount of itself.
|
||||
.. note:: Mounting CephFS using FUSE requires superuser privileges (sudo/root).
|
||||
The libfuse interface does not provide a mechanism to trim cache entries in the
|
||||
kernel so a remount (``mount(2)``) system call is required to force the kernel
|
||||
to drop the cached metadata. ``ceph-fuse`` issues these remount system calls
|
||||
periodically in response to cache pressure in the MDS or due to metadata cache revocations.
|
||||
|
||||
Synopsis
|
||||
========
|
||||
In general, the command to mount CephFS via FUSE looks like this::
|
||||
In general, the command to mount CephFS via FUSE looks like this:
|
||||
|
||||
ceph-fuse {mountpoint} {options}
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph-fuse {mount point} {options}
|
||||
|
||||
Mounting CephFS
|
||||
===============
|
||||
@ -28,7 +33,7 @@ To FUSE-mount the Ceph file system, use the ``ceph-fuse`` command::
|
||||
mkdir /mnt/mycephfs
|
||||
ceph-fuse --id foo /mnt/mycephfs
|
||||
|
||||
Option ``-id`` passes the name of the CephX user whose keyring we intend to
|
||||
Option ``--id`` passes the name of the CephX user whose keyring we intend to
|
||||
use for mounting CephFS. In the above command, it's ``foo``. You can also use
|
||||
``-n`` instead, although ``--id`` is evidently easier::
|
||||
|
||||
|
@ -20,16 +20,18 @@ Complete General Prerequisites
|
||||
Go through the prerequisites required by both, kernel as well as FUSE mounts,
|
||||
in `Mount CephFS: Prerequisites`_ page.
|
||||
|
||||
Is mount helper is present?
|
||||
---------------------------
|
||||
Is mount helper present?
|
||||
------------------------
|
||||
``mount.ceph`` helper is installed by Ceph packages. The helper passes the
|
||||
monitor address(es) and CephX user keyrings automatically saving the Ceph
|
||||
admin the effort to pass these details explicitly while mounting CephFS. In
|
||||
case the helper is not present on the client machine, CephFS can still be
|
||||
mounted using kernel but by passing these details explicitly to the ``mount``
|
||||
command. To check whether it is present on your system, do::
|
||||
monitor address(es) and CephX user keyrings, saving the Ceph admin the effort
|
||||
of passing these details explicitly while mounting CephFS. If the helper is not
|
||||
present on the client machine, CephFS can still be mounted using the kernel
|
||||
driver, but only by passing these details explicitly to the ``mount`` command.
|
||||
To check whether ``mount.ceph`` is present on your system, run the following command:
|
||||
|
||||
stat /sbin/mount.ceph
|
||||
.. prompt:: bash #
|
||||
|
||||
stat /sbin/mount.ceph
|
||||
|
||||
Which Kernel Version?
|
||||
---------------------
|
||||
|
@ -143,6 +143,14 @@ The types of damage that can be reported and repaired by File System Scrub are:
|
||||
|
||||
* BACKTRACE : Inode's backtrace in the data pool is corrupted.
|
||||
|
||||
These above named MDS damages can be repaired by using the following command::
|
||||
|
||||
ceph tell mds.<fsname>:0 scrub start /path recursive, repair, force
|
||||
|
||||
If scrub is able to repair the damage, the corresponding entry is automatically
|
||||
removed from the damage table.
|
||||
|
||||
|
||||
Evaluate strays using recursive scrub
|
||||
=====================================
|
||||
|
||||
|
@ -30,14 +30,21 @@ assumed to be keyword arguments too.
|
||||
Snapshot schedules are identified by path, their repeat interval and their start
|
||||
time. The
|
||||
repeat interval defines the time between two subsequent snapshots. It is
|
||||
specified by a number and a period multiplier, one of `h(our)`, `d(ay)` and
|
||||
`w(eek)`. E.g. a repeat interval of `12h` specifies one snapshot every 12
|
||||
hours.
|
||||
specified by a number and a period multiplier, one of `h(our)`, `d(ay)`,
|
||||
`w(eek)`, `M(onth)` and `y(ear)`. E.g. a repeat interval of `12h` specifies one
|
||||
snapshot every 12 hours.
|
||||
The start time is specified as a time string (more details about passing times
|
||||
below). By default
|
||||
the start time is last midnight. So when a snapshot schedule with repeat
|
||||
interval `1h` is added at 13:50
|
||||
with the default start time, the first snapshot will be taken at 14:00.
|
||||
The time zone is assumed to be UTC if none is explicitly included in the string.
|
||||
An explicit time zone will be mapped to UTC at execution.
|
||||
The start time must be in ISO8601 format. Examples below:
|
||||
|
||||
UTC: 2022-08-08T05:30:00 i.e. 5:30 AM UTC, without explicit time zone offset
|
||||
IDT: 2022-08-08T09:00:00+03:00 i.e. 6:00 AM UTC
|
||||
EDT: 2022-08-08T05:30:00-04:00 i.e. 9:30 AM UTC
|
||||
|
||||
Retention specifications are identified by path and the retention spec itself. A
|
||||
retention spec consists of either a number and a time period separated by a
|
||||
@ -45,8 +52,8 @@ space or concatenated pairs of `<number><time period>`.
|
||||
The semantics are that a spec will ensure `<number>` snapshots are kept that are
|
||||
at least `<time period>` apart. For Example `7d` means the user wants to keep 7
|
||||
snapshots that are at least one day (but potentially longer) apart from each other.
|
||||
The following time periods are recognized: `h(our), d(ay), w(eek), m(onth),
|
||||
y(ear)` and `n`. The latter is a special modifier where e.g. `10n` means keep
|
||||
The following time periods are recognized: `h(our)`, `d(ay)`, `w(eek)`, `M(onth)`,
|
||||
`y(ear)` and `n`. The latter is a special modifier where e.g. `10n` means keep
|
||||
the last 10 snapshots regardless of timing,
|
||||
|
||||
All subcommands take optional `fs` argument to specify paths in
|
||||
@ -160,6 +167,8 @@ Examples::
|
||||
To ensure a new snapshot can be created, one snapshot less than this will be
|
||||
retained. So by default, a maximum of 99 snapshots will be retained.
|
||||
|
||||
.. note: The --fs argument is now required if there is more than one file system.
|
||||
|
||||
Active and inactive schedules
|
||||
-----------------------------
|
||||
Snapshot schedules can be added for a path that doesn't exist yet in the
|
||||
|
@ -407,6 +407,11 @@ its associated key. A less drastic but half-fix is to change the osd cap for
|
||||
your user to just ``caps osd = "allow rw"`` and delete ``tag cephfs
|
||||
data=....``
|
||||
|
||||
Disabling Volumes Plugin
|
||||
========================
|
||||
In certain scenarios, volumes plugin might be needed to disabled to prevent compromise
|
||||
for rest of the Ceph cluster. For details see: :ref:`disabling-volumes-plugin`
|
||||
|
||||
Reporting Issues
|
||||
================
|
||||
|
||||
|
@ -149,8 +149,7 @@ options. By default, ``log-to-stdout`` is enabled, and the latter disabled.
|
||||
vstart.sh
|
||||
---------
|
||||
|
||||
To facilitate the development of crimson, following options would be handy when
|
||||
using ``vstart.sh``,
|
||||
The following options can be used with ``vstart.sh``.
|
||||
|
||||
``--crimson``
|
||||
start ``crimson-osd`` instead of ``ceph-osd``
|
||||
|
@ -1,3 +1,5 @@
|
||||
.. _crimson_dev_doc:
|
||||
|
||||
===============================
|
||||
Crimson developer documentation
|
||||
===============================
|
||||
|
@ -32,8 +32,8 @@ The following chart illustrates the basic Ceph development workflow:
|
||||
|
||||
This page assumes that you are a new contributor with an idea for a bugfix or
|
||||
an enhancement, but you do not know how to proceed. Watch the `Getting Started
|
||||
with Ceph Development <https://www.youtube.com/watch?v=t5UIehZ1oLs>`_ video for
|
||||
a practical summary of this workflow.
|
||||
with Ceph Development <https://www.youtube.com/watch?v=t5UIehZ1oLs>`_ video (1
|
||||
hour 15 minutes) for a practical summary of this workflow.
|
||||
|
||||
Updating the tracker
|
||||
--------------------
|
||||
@ -63,8 +63,8 @@ Ceph Workflow Overview
|
||||
|
||||
Three repositories are involved in the Ceph workflow. They are:
|
||||
|
||||
1. The upstream repository (ceph/ceph)
|
||||
2. Your fork of the upstream repository (your_github_id/ceph)
|
||||
1. The upstream repository (``ceph/ceph``)
|
||||
2. Your fork of the upstream repository (``your_github_id/ceph``)
|
||||
3. Your local working copy of the repository (on your workstation)
|
||||
|
||||
The procedure for making changes to the Ceph repository is as follows:
|
||||
@ -133,14 +133,14 @@ Configuring Your Local Environment
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The commands in this section configure your local git environment so that it
|
||||
generates "Signed-off-by:" tags. These commands also set up your local
|
||||
generates ``Signed-off-by:`` tags. These commands also set up your local
|
||||
environment so that it can stay synchronized with the upstream repository.
|
||||
|
||||
These commands are necessary only during the initial setup of your local
|
||||
working copy. Another way to say that is "These commands are necessary
|
||||
only the first time that you are working with the Ceph repository. They are,
|
||||
however, unavoidable, and if you fail to run them then you will not be able
|
||||
to work on the Ceph repository.".
|
||||
The commands in this section are necessary only during the initial setup of
|
||||
your local working copy. This means that these commands are necessary only the
|
||||
first time that you are working with the Ceph repository. They are, however,
|
||||
unavoidable, and if you fail to run them then you will not be able to work on
|
||||
the Ceph repository..
|
||||
|
||||
1. Configure your local git environment with your name and email address.
|
||||
|
||||
@ -180,12 +180,12 @@ at the moment that you cloned it, but the upstream repo
|
||||
that it was forked from is not frozen in time: the upstream repo is still being
|
||||
updated by other contributors.
|
||||
|
||||
Because upstream main is continually receiving updates from other
|
||||
contributors, your fork will drift farther and farther from the state of the
|
||||
upstream repo when you cloned it.
|
||||
Because upstream main is continually receiving updates from other contributors,
|
||||
over time your fork will drift farther and farther from the state of the
|
||||
upstream repository as it was when you cloned it.
|
||||
|
||||
Keep your fork's ``main`` branch synchronized with upstream main to reduce drift
|
||||
between your fork's main branch and the upstream main branch.
|
||||
Keep your fork's ``main`` branch synchronized with upstream main to reduce
|
||||
drift between your fork's main branch and the upstream main branch.
|
||||
|
||||
Here are the commands for keeping your fork synchronized with the
|
||||
upstream repository:
|
||||
@ -216,15 +216,15 @@ Create a branch for your bugfix:
|
||||
git checkout -b fix_1
|
||||
git push -u origin fix_1
|
||||
|
||||
The first command (git checkout main) makes sure that the bugfix branch
|
||||
The first command (``git checkout main``) makes sure that the bugfix branch
|
||||
"fix_1" is created from the most recent state of the main branch of the
|
||||
upstream repository.
|
||||
|
||||
The second command (git checkout -b fix_1) creates a "bugfix branch" called
|
||||
The second command (``git checkout -b fix_1``) creates a "bugfix branch" called
|
||||
"fix_1" in your local working copy of the repository. The changes that you make
|
||||
in order to fix the bug will be commited to this branch.
|
||||
|
||||
The third command (git push -u origin fix_1) pushes the bugfix branch from
|
||||
The third command (``git push -u origin fix_1``) pushes the bugfix branch from
|
||||
your local working repository to your fork of the upstream repository.
|
||||
|
||||
.. _fixing_bug_locally:
|
||||
@ -243,15 +243,17 @@ Fixing the bug in the local working copy
|
||||
#. **Fixing the bug itself**
|
||||
|
||||
This guide cannot tell you how to fix the bug that you have chosen to fix.
|
||||
This guide assumes that you know what required improvement, and that you
|
||||
know what to do to provide that improvement.
|
||||
This guide assumes that you have identified an area that required
|
||||
improvement, and that you know how to make that improvement.
|
||||
|
||||
It might be that your fix is simple and requires only minimal testing. But
|
||||
that's unlikely. It is more likely that the process of fixing your bug will
|
||||
be iterative and will involve trial, error, skill, and patience.
|
||||
It might be that your fix is simple and that it requires only minimal
|
||||
testing. But that's unlikely unless you're updating only documentation. It
|
||||
is more likely that the process of fixing your bug will require several
|
||||
rounds of testing. The testing process is likely to be iterative and will
|
||||
involve trial, error, skill, and patience.
|
||||
|
||||
For a detailed discussion of the tools available for validating bugfixes,
|
||||
see the chapters on testing.
|
||||
see :ref:`the sections that discuss testing <dev-testing-unit-tests>`.
|
||||
|
||||
Pushing the Fix to Your Fork
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -261,9 +263,9 @@ believe that it works.
|
||||
|
||||
#. Commit the changes to your local working copy.
|
||||
|
||||
Commit the changes to the `fix_1` branch of your local working copy by using
|
||||
the ``--signoff`` option (here represented as the `s` portion of the `-as`
|
||||
flag):
|
||||
Commit the changes to the ``fix_1`` branch of your local working copy by
|
||||
using the ``--signoff`` option (here represented as the ``s`` portion of the
|
||||
``-as`` flag):
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
@ -273,8 +275,8 @@ believe that it works.
|
||||
|
||||
#. Push the changes to your fork:
|
||||
|
||||
Push the changes from the `fix_1` branch of your local working copy to the
|
||||
`fix_1` branch of your fork of the upstream repository:
|
||||
Push the changes from the ``fix_1`` branch of your local working copy to the
|
||||
``fix_1`` branch of your fork of the upstream repository:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
@ -306,7 +308,7 @@ believe that it works.
|
||||
|
||||
origin git@github.com:username/ceph.git (push)
|
||||
|
||||
provide the information that "origin" is the name of your fork of the
|
||||
provide the information that ``origin`` is the name of your fork of the
|
||||
Ceph repository.
|
||||
|
||||
|
||||
@ -333,7 +335,7 @@ the `Git Commit Good Practice`_ article at the `OpenStack Project Wiki`_.
|
||||
.. _`Git Commit Good Practice`: https://wiki.openstack.org/wiki/GitCommitMessages
|
||||
.. _`OpenStack Project Wiki`: https://wiki.openstack.org/wiki/Main_Page
|
||||
|
||||
See also our own `Submitting Patches
|
||||
See also Ceph's own `Submitting Patches
|
||||
<https://github.com/ceph/ceph/blob/main/SubmittingPatches.rst>`_ document.
|
||||
|
||||
After your pull request (PR) has been opened, update the :ref:`issue-tracker`
|
||||
@ -347,24 +349,25 @@ Understanding Automated PR validation
|
||||
|
||||
When you create or update your PR, the Ceph project's `Continuous Integration
|
||||
(CI) <https://en.wikipedia.org/wiki/Continuous_integration>`_ infrastructure
|
||||
automatically tests it. At the time of this writing (May 2022), the automated
|
||||
CI testing included many tests. These five are among them:
|
||||
automatically tests it. Here are just some of the automated tests that are
|
||||
performed on your PR:
|
||||
|
||||
#. a test to check that the commits are properly signed (see :ref:`submitting-patches`):
|
||||
#. a test to check that the commits are properly signed (see
|
||||
:ref:`submitting-patches`):
|
||||
#. a test to check that the documentation builds
|
||||
#. a test to check that the submodules are unmodified
|
||||
#. a test to check that the API is in order
|
||||
#. a :ref:`make check<make-check>` test
|
||||
|
||||
Additional tests may be run depending on which files your PR modifies.
|
||||
Additional tests may be run, depending which files your PR modifies.
|
||||
|
||||
The :ref:`make check<make-check>` test builds the PR and runs it through a
|
||||
battery of tests. These tests run on servers that are operated by the Ceph
|
||||
Continuous Integration (CI) team. When the tests have completed their run, the
|
||||
result is shown on GitHub in the pull request itself.
|
||||
|
||||
Test your modifications before you open a PR. Refer to the chapters
|
||||
on testing for details.
|
||||
Test your modifications before you open a PR. Refer to :ref:`the sections on
|
||||
testing <dev-testing-unit-tests>` for details.
|
||||
|
||||
Notes on PR make check test
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -398,9 +401,8 @@ determine the cause of your specific failure.
|
||||
Integration tests AKA ceph-qa-suite
|
||||
-----------------------------------
|
||||
|
||||
Since Ceph is complex, it may be necessary to test your fix to
|
||||
see how it behaves on real clusters running on physical or virtual
|
||||
hardware. Tests designed for this purpose live in the `ceph/qa
|
||||
It may be necessary to test your fix on real Ceph clusters that run on physical
|
||||
or virtual hardware. Tests designed for this purpose reside in the `ceph/qa
|
||||
sub-directory`_ and are run via the `teuthology framework`_.
|
||||
|
||||
.. _`ceph/qa sub-directory`: https://github.com/ceph/ceph/tree/main/qa/
|
||||
@ -410,10 +412,16 @@ sub-directory`_ and are run via the `teuthology framework`_.
|
||||
The Ceph community has access to the `Sepia lab
|
||||
<https://wiki.sepia.ceph.com/doku.php>`_ where `integration tests`_ can be run
|
||||
on physical hardware.
|
||||
Other developers may add tags like "needs-qa" to your PR. This allows PRs that
|
||||
need testing to be merged into a single branch and tested all at the same time.
|
||||
Since teuthology suites can take hours (even days in some cases) to run, this
|
||||
can save a lot of time.
|
||||
|
||||
Other contributors might add tags like ``needs-qa`` to your PR. This allows PRs
|
||||
to be merged into a single branch and then efficiently tested together.
|
||||
Teuthology test suites can take hours (and, in some cases, days) to
|
||||
complete, so batching tests reduces contention for resources and saves
|
||||
time.
|
||||
|
||||
If your code change has any effect on upgrades, add the
|
||||
``needs-upgrade-testing`` label. This indicates that an upgrade testing suite
|
||||
should be scheduled.
|
||||
|
||||
To request access to the Sepia lab, start `here
|
||||
<https://wiki.sepia.ceph.com/doku.php?id=vpnaccess>`_.
|
||||
@ -426,10 +434,11 @@ tests`_ chapter.
|
||||
Code review
|
||||
-----------
|
||||
|
||||
Once your bugfix has been thoroughly tested, or even during this process,
|
||||
it will be subjected to code review by other developers. This typically
|
||||
takes the form of comments in the PR itself, but can be supplemented
|
||||
by discussions on :ref:`irc` and the :ref:`mailing-list`.
|
||||
After your bugfix has been thoroughly tested--and sometimeseven during the
|
||||
testing--it will be subjected to code review by other developers. This
|
||||
typically takes the form of comments in the PR itself, but can be supplemented
|
||||
by discussions on :ref:`irc`, or on :ref:`Slack <ceph-slack>` or on the
|
||||
:ref:`mailing-list`.
|
||||
|
||||
Amending your PR
|
||||
----------------
|
||||
@ -438,24 +447,24 @@ While your PR is going through testing and `Code Review`_, you can
|
||||
modify it at any time by editing files in your local branch.
|
||||
|
||||
After updates are committed locally (to the ``fix_1`` branch in our
|
||||
example), they need to be pushed to GitHub so they appear in the PR.
|
||||
example), they must be pushed to GitHub in order to appear in the PR.
|
||||
|
||||
Modifying the PR is done by adding commits to the ``fix_1`` branch upon
|
||||
which it is based, often followed by rebasing to modify the branch's git
|
||||
history. See `this tutorial
|
||||
<https://www.atlassian.com/git/tutorials/rewriting-history>`_ for a good
|
||||
introduction to rebasing. When you are done with your modifications, you
|
||||
will need to force push your branch with:
|
||||
Modifying the PR is done by adding commits to the ``fix_1`` branch upon which
|
||||
it is based, often followed by rebasing to modify the branch's git history. See
|
||||
`this tutorial <https://www.atlassian.com/git/tutorials/rewriting-history>`_
|
||||
for an introduction to rebasing. When you are done with your modifications, you
|
||||
will need to force push your branch by running a command of the following form:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git push --force origin fix_1
|
||||
|
||||
Why do we take these extra steps instead of simply adding additional commits
|
||||
the PR? It is best practice for a PR to consist of a single commit; this
|
||||
makes for clean history, eases peer review of your changes, and facilitates
|
||||
merges. In rare circumstances it also makes it easier to cleanly revert
|
||||
changes.
|
||||
Why do we take these extra steps instead of simply adding additional commits to
|
||||
the PR? It is best practice for a PR to consist of a single commit; this makes
|
||||
it possible to maintain a clean history, it simplifies peer review of your
|
||||
changes, and it makes merging your PR easier. In the unlikely event that your
|
||||
PR has to be reverted, having a single commit associated with that PR makes the
|
||||
procession of reversion easier.
|
||||
|
||||
Merging
|
||||
-------
|
||||
@ -467,7 +476,7 @@ to change the :ref:`issue-tracker` status to "Resolved". Some issues may be
|
||||
flagged for backporting, in which case the status should be changed to
|
||||
"Pending Backport" (see the :ref:`backporting` chapter for details).
|
||||
|
||||
See also :ref:`merging` for more information on merging.
|
||||
See :ref:`merging` for more information on merging.
|
||||
|
||||
Proper Merge Commit Format
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -480,24 +489,37 @@ This is the most basic form of a merge commit::
|
||||
|
||||
This consists of two parts:
|
||||
|
||||
#. The title of the commit / PR to be merged.
|
||||
#. The title of the commit to be merged.
|
||||
#. The name and email address of the reviewer. Enclose the reviewer's email
|
||||
address in angle brackets.
|
||||
|
||||
Using a browser extension to auto-fill the merge message
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
If you use a browser to merge GitHub PRs, the easiest way to fill in
|
||||
the merge message is with the `"Ceph GitHub Helper Extension"
|
||||
<https://github.com/tspmelo/ceph-github-helper>`_ (available for `Chrome
|
||||
<https://chrome.google.com/webstore/detail/ceph-github-helper/ikpfebikkeabmdnccbimlomheocpgkmn>`_
|
||||
and `Firefox <https://addons.mozilla.org/en-US/firefox/addon/ceph-github-helper/>`_).
|
||||
|
||||
After enabling this extension, if you go to a GitHub PR page, a vertical helper
|
||||
will be displayed at the top-right corner. If you click on the user silhouette
|
||||
button the merge message input will be automatically populated.
|
||||
|
||||
Using .githubmap to Find a Reviewer's Email Address
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
If you cannot find the email address of the reviewer on his or her GitHub
|
||||
page, you can look it up in the **.githubmap** file, which can be found in
|
||||
the repository at **/ceph/.githubmap**.
|
||||
If you cannot find the email address of the reviewer on his or her GitHub page,
|
||||
you can look it up in the ``.githubmap`` file, which can be found in the
|
||||
repository at ``/ceph/.githubmap``.
|
||||
|
||||
Using "git log" to find a Reviewer's Email Address
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
If you cannot find a reviewer's email address by using the above methods, you
|
||||
can search the git log for their email address. Reviewers are likely to have
|
||||
committed something before. If they have made previous contributions, the git
|
||||
committed something before. If they have made previous contributions, the git
|
||||
log will probably contain their email address.
|
||||
|
||||
Use the following command
|
||||
Use the following command:
|
||||
|
||||
.. prompt:: bash [branch-under-review]$
|
||||
|
||||
@ -507,9 +529,9 @@ Using ptl-tool to Generate Merge Commits
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Another method of generating merge commits involves using Patrick Donnelly's
|
||||
**ptl-tool** pull commits. This tool can be found at
|
||||
**/ceph/src/script/ptl-tool.py**. Merge commits that have been generated by
|
||||
the **ptl-tool** have the following form::
|
||||
``ptl-tool`` to pull commits. This tool can be found at
|
||||
``/ceph/src/script/ptl-tool.py``. Merge commits that have been generated by the
|
||||
``ptl-tool`` have the following form::
|
||||
|
||||
Merge PR #36257 into main
|
||||
* refs/pull/36257/head:
|
||||
@ -533,7 +555,8 @@ push`` command, you will see the following error message:
|
||||
git push --set-upstream origin {x}
|
||||
|
||||
To set up git to automatically create the upstream branch that corresponds to
|
||||
the branch in your local working copy, run this command from within the
|
||||
the branch in your local working copy (without having to add the option
|
||||
``--set-upstream origin x`` every time), run this command from within the
|
||||
``ceph/`` directory:
|
||||
|
||||
.. prompt:: bash $
|
||||
@ -555,7 +578,7 @@ Deleting a Branch Remotely
|
||||
|
||||
To delete the branch named ``remoteBranchName`` from the remote upstream branch
|
||||
(which is also your fork of ``ceph/ceph``, as described in :ref:`forking`), run
|
||||
a command of this form:
|
||||
a command of the following form:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
@ -566,7 +589,8 @@ Searching a File Longitudinally for a String
|
||||
|
||||
To search for the commit that introduced a given string (in this example, that
|
||||
string is ``foo``) into a given file (in this example, that file is
|
||||
``file.rst``), run a command of this form:
|
||||
``file.rst``), use the ``-S <string>`` option. Run a command of the following
|
||||
form:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
|
@ -2423,8 +2423,10 @@ also manually invoked: ``tox -e openapi-check``.
|
||||
If that checker failed, it means that the current Pull Request is modifying the
|
||||
Ceph API and therefore:
|
||||
|
||||
#. The versioned OpenAPI specification should be updated explicitly: ``tox -e openapi-fix``.
|
||||
#. The team @ceph/api will be requested for reviews (this is automated via Github CODEOWNERS), in order to asses the impact of changes.
|
||||
#. The versioned OpenAPI specification should be updated explicitly: ``tox -e
|
||||
openapi-fix``.
|
||||
#. The team @ceph/api will be requested for reviews (this is automated via
|
||||
GitHub CODEOWNERS), in order to assess the impact of changes.
|
||||
|
||||
Additionally, Sphinx documentation can be generated from the OpenAPI
|
||||
specification with ``tox -e openapi-doc``.
|
||||
|
@ -7,28 +7,15 @@ to know.
|
||||
Leads
|
||||
-----
|
||||
|
||||
The Ceph project is led by Sage Weil. In addition, each major project
|
||||
component has its own lead. The following table shows all the leads and
|
||||
their nicks on `GitHub`_:
|
||||
The Ceph project was created by Sage Weil and is led by the Ceph Leadership
|
||||
Team (CLT). Each major component of the Ceph project has its own lead. The
|
||||
`team list`_ on the Ceph community website shows all the leads and their nicks
|
||||
on `GitHub`_:
|
||||
|
||||
.. _github: https://github.com/
|
||||
.. _team list: https://ceph.io/en/community/team
|
||||
|
||||
========= ================ =============
|
||||
Scope Lead GitHub nick
|
||||
========= ================ =============
|
||||
Ceph Sage Weil liewegas
|
||||
RADOS Neha Ojha neha-ojha
|
||||
RGW Yehuda Sadeh yehudasa
|
||||
RGW Matt Benjamin mattbenjamin
|
||||
RBD Ilya Dryomov dis
|
||||
CephFS Patrick Donnelly batrick
|
||||
Dashboard Ernesto Puerta epuertat
|
||||
MON Joao Luis jecluis
|
||||
Build/Ops Ken Dreyer ktdreyer
|
||||
Docs Zac Dover zdover23
|
||||
========= ================ =============
|
||||
|
||||
The Ceph-specific acronyms in the table are explained in
|
||||
Ceph-specific acronyms in the table of leads are explained in
|
||||
:doc:`/architecture`.
|
||||
|
||||
History
|
||||
@ -89,6 +76,8 @@ click on `New issue`_.
|
||||
.. _`jump to the Ceph project`: http://tracker.ceph.com/projects/ceph
|
||||
.. _`New issue`: http://tracker.ceph.com/projects/ceph/issues/new
|
||||
|
||||
.. _ceph-slack:
|
||||
|
||||
Slack
|
||||
-----
|
||||
|
||||
@ -298,16 +287,13 @@ See :ref:`kubernetes-dev`
|
||||
Backporting
|
||||
-----------
|
||||
|
||||
All bugfixes should be merged to the ``main`` branch before being
|
||||
backported. To flag a bugfix for backporting, make sure it has a
|
||||
`tracker issue`_ associated with it and set the ``Backport`` field to a
|
||||
comma-separated list of previous releases (e.g. "hammer,jewel") that you think
|
||||
need the backport.
|
||||
The rest (including the actual backporting) will be taken care of by the
|
||||
`Stable Releases and Backports`_ team.
|
||||
All bugfixes should be merged to the ``main`` branch before being backported.
|
||||
To flag a bugfix for backporting, make sure it has a `tracker issue`_
|
||||
associated with it and set the ``Backport`` field to a comma-separated list of
|
||||
previous releases (e.g. "hammer,jewel") that you think need the backport. You
|
||||
are responsible for the backporting of pull requests that you raise.
|
||||
|
||||
.. _`tracker issue`: http://tracker.ceph.com/
|
||||
.. _`Stable Releases and Backports`: http://tracker.ceph.com/projects/ceph-releases/wiki
|
||||
|
||||
Dependabot
|
||||
----------
|
||||
|
@ -19,6 +19,7 @@ Contributing to Ceph: A Guide for Developers
|
||||
Tests: Unit Tests <tests-unit-tests>
|
||||
Tests: Integration Tests (Teuthology) <testing_integration_tests/index>
|
||||
Tests: Running Tests (Locally) <running-tests-locally>
|
||||
Tests: Windows <tests-windows>
|
||||
Ceph Dashboard Developer Documentation (formerly HACKING.rst) <dash-devel>
|
||||
Tracing Developer Documentation <jaegertracing>
|
||||
Cephadm Developer Documentation <../cephadm/index>
|
||||
|
@ -52,12 +52,35 @@ Running your first test
|
||||
The Python tests in Ceph repository can be executed on your local machine
|
||||
using `vstart_runner.py`_. To do that, you'd need `teuthology`_ installed::
|
||||
|
||||
$ git clone https://github.com/ceph/teuthology
|
||||
$ cd teuthology
|
||||
$ ./bootstrap install
|
||||
|
||||
This will create a virtual environment named ``virtualenv`` in root of the
|
||||
teuthology repository and install teuthology in it.
|
||||
|
||||
You can also install teuthology via ``pip`` if you would like to install it
|
||||
in a custom virtual environment with clone `teuthology`_ repository using
|
||||
``git``::
|
||||
|
||||
$ virtualenv --python=python3 venv
|
||||
$ source venv/bin/activate
|
||||
$ pip install 'setuptools >= 12'
|
||||
$ pip install teuthology[test]@git+https://github.com/ceph/teuthology
|
||||
$ deactivate
|
||||
|
||||
If for some unforeseen reason above approaches do no work (maybe boostrap
|
||||
script doesn't work due to a bug or you can't download tethology at the
|
||||
moment) teuthology can be installed manually manually from copy of
|
||||
teuthology repo already present on your machine::
|
||||
|
||||
$ cd teuthology
|
||||
$ virtualenv -p python3 venv
|
||||
$ source venv/bin/activate
|
||||
$ pip install -r requirements.txt
|
||||
$ pip install .
|
||||
$ deactivate
|
||||
|
||||
The above steps installs teuthology in a virtual environment. Before running
|
||||
a test locally, build Ceph successfully from the source (refer
|
||||
:doc:`/install/build-ceph`) and do::
|
||||
|
@ -3,11 +3,68 @@
|
||||
Integration Tests using Teuthology Workflow
|
||||
===========================================
|
||||
|
||||
Scheduling Test Run
|
||||
-------------------
|
||||
Infrastructure
|
||||
--------------
|
||||
|
||||
Getting binaries
|
||||
****************
|
||||
Components:
|
||||
|
||||
1. `ceph-ci`_: Clone of the main Ceph repository, used for triggering Jenkins
|
||||
Ceph builds for development.
|
||||
|
||||
2. `Ceph Jenkins`_: Responsible for triggering builds, uploading packages
|
||||
to Chacra, and pushing updates about the build to Shaman.
|
||||
|
||||
3. `Shaman`_: UI Interface used to check build status. In its backend,
|
||||
it is a REST API to query and store build information.
|
||||
|
||||
4. `Chacra`_: Service where packages are uploaded. The binaries uploaded
|
||||
here can be downloaded and used by anyone.
|
||||
|
||||
5. `Teuthology CLI`_: Developers can use various Teuthology commands to schedule
|
||||
and manage test runs.
|
||||
|
||||
6. Teuthology: This component is responsible for pushing test jobs to
|
||||
the Beanstalk queue and Paddles. It also picks jobs from
|
||||
the queue and runs tests.
|
||||
|
||||
7. Beanstalk queue: A priority queue containing all the queued jobs.
|
||||
Developers typically do not need to interact with it.
|
||||
|
||||
8. Paddles: A backend service that stores all test run information.
|
||||
Developers typically do not need to interact with it.
|
||||
|
||||
9. `Pulpito`_: A UI interface (for information stored in Paddles) that allows
|
||||
developers to see detailed information about their scheduled tests,
|
||||
including status and results.
|
||||
|
||||
10. Testnodes: A cluster of various machines that are used for running tests.
|
||||
Developers usually schedule tests to run on `smithi`_ machines, which are
|
||||
dedicated test nodes for Teuthology integration testing.
|
||||
|
||||
Each Teuthology test *run* contains multiple test *jobs*. Each job runs in an
|
||||
environment isolated from other jobs, on a different collection of test nodes.
|
||||
|
||||
To test a change in Ceph, follow these steps:
|
||||
|
||||
1. Getting binaries - Build Ceph.
|
||||
2. Scheduling Test Run:
|
||||
|
||||
a. About Test Suites.
|
||||
b. Triggering Teuthology Tests.
|
||||
c. Testing QA changes (without re-building binaries).
|
||||
d. Filtering Tests.
|
||||
|
||||
3. Viewing Test Results:
|
||||
|
||||
a. Pulpito Dashboard.
|
||||
b. Teuthology Archives (Reviewing Logs).
|
||||
|
||||
4. Killing tests.
|
||||
5. Re-running tests.
|
||||
|
||||
|
||||
Getting binaries - Build Ceph
|
||||
-----------------------------
|
||||
|
||||
Ceph binaries must be built for your branch before you can use teuthology to run integration tests on them. Follow these steps to build the Ceph binaries:
|
||||
|
||||
@ -41,8 +98,44 @@ Ceph binaries must be built for your branch before you can use teuthology to run
|
||||
.. _the Chacra site: https://shaman.ceph.com/api/search/?status=ready&project=ceph
|
||||
|
||||
|
||||
Triggering Tests
|
||||
****************
|
||||
Naming the ceph-ci branch
|
||||
*************************
|
||||
Prepend your branch with your name before you push it to ceph-ci. For example,
|
||||
a branch named ``feature-x`` should be named ``wip-$yourname-feature-x``, where
|
||||
``$yourname`` is replaced with your name. Identifying your branch with your
|
||||
name makes your branch easily findable on Shaman and Pulpito.
|
||||
|
||||
If you are using one of the stable branches (`quincy`, `pacific`, etc.), include
|
||||
the name of that stable branch in your ceph-ci branch name.
|
||||
For example, the ``feature-x`` PR branch should be named
|
||||
``wip-feature-x-nautilus``. *This is not just a convention. This ensures that your branch is built in the correct environment.*
|
||||
|
||||
You can choose to only trigger a CentOS 9.Stream build (excluding other distro like ubuntu)
|
||||
by adding "centos9-only" at the end of the ceph-ci branch name. For example,
|
||||
``wip-$yourname-feature-centos9-only``. This helps to get quicker builds and save resources
|
||||
when you don't require binaries for other distros.
|
||||
|
||||
Delete the branch from ceph-ci when you no longer need it. If you are
|
||||
logged in to GitHub, all your branches on ceph-ci can be found here:
|
||||
https://github.com/ceph/ceph-ci/branches.
|
||||
|
||||
|
||||
Scheduling Test Run
|
||||
-------------------
|
||||
|
||||
About Test Suites
|
||||
*****************
|
||||
|
||||
Integration tests are organized into “suites”, which are defined in ``qa/suites``
|
||||
sub-directory of the Ceph repository. These test suites can be run with the teuthology-suite
|
||||
command.
|
||||
|
||||
See `Suites Inventory`_ for a list of available suites of integration tests.
|
||||
|
||||
More details understanding of how these test suites are defined can be found on `Integration Test Introduction Page`_.
|
||||
|
||||
Triggering Teuthology Tests
|
||||
***************************
|
||||
|
||||
After you have built Ceph binaries for your branch, you can run tests using
|
||||
teuthology. This procedure explains how to run tests using teuthology.
|
||||
@ -54,7 +147,10 @@ teuthology. This procedure explains how to run tests using teuthology.
|
||||
ssh <username>@teuthology.front.sepia.ceph.com
|
||||
|
||||
This requires Sepia lab access. To request access to the Sepia lab, see:
|
||||
https://ceph.github.io/sepia/adding_users/
|
||||
https://ceph.github.io/sepia/adding_users/.
|
||||
|
||||
#. For initial setup, follow `teuthology installation guide`_ to setup teuthology for
|
||||
your user on teuthology machine. This will enable you to run teuthology commands.
|
||||
|
||||
#. Run the ``teuthology-suite`` command:
|
||||
|
||||
@ -66,8 +162,7 @@ teuthology. This procedure explains how to run tests using teuthology.
|
||||
-s fs \
|
||||
-p 110 \
|
||||
--filter "cephfs-shell" \
|
||||
-e foo@gmail.com \
|
||||
-R fail
|
||||
-e foo@gmail.com
|
||||
|
||||
The options in the above command are defined here:
|
||||
|
||||
@ -106,10 +201,13 @@ teuthology. This procedure explains how to run tests using teuthology.
|
||||
`Pulpito`_ where the test results can be viewed.
|
||||
|
||||
|
||||
The ``--dry-run`` option allows you to demo-run ``teuthology-suite`` command without
|
||||
actually scheduling teuthology tests. This is helpful to check how many jobs and which jobs
|
||||
a command will schedule.
|
||||
|
||||
Other frequently used/useful options are ``-d`` (or ``--distro``),
|
||||
``--distroversion``, ``--filter-out``, ``--timeout``, ``flavor``, ``-rerun``,
|
||||
``-l`` (for limiting number of jobs) , ``-N`` (for how many times the job will
|
||||
``--distro-version``, ``--filter-out``, ``--timeout``, ``flavor``, ``--rerun``,
|
||||
``--limit`` (for limiting number of jobs) , ``-N`` (for how many times the job will
|
||||
run), and ``--subset`` (used to reduce the number of tests that are triggered). Run
|
||||
``teuthology-suite --help`` to read descriptions of these and other options.
|
||||
|
||||
@ -164,15 +262,15 @@ job config printed at the beginning of the teuthology job.
|
||||
for the builds to finish, then triggering tests and waiting for
|
||||
the test results.
|
||||
|
||||
About Suites and Filters
|
||||
************************
|
||||
|
||||
See `Suites Inventory`_ for a list of available suites of integration tests.
|
||||
Each directory under ``qa/suites`` in the Ceph repository is an integration
|
||||
test suite, and arguments appropriate to follow ``-s`` can be found there.
|
||||
Filtering Tests
|
||||
***************
|
||||
|
||||
Test suites includes combinations of many yaml files which can results in massive
|
||||
amount of jobs being scheduled for a suite. So filters can help to reduce the amount
|
||||
of jobs or schedule particular jobs within a suite.
|
||||
|
||||
Keywords for filtering tests can be found in
|
||||
``qa/suites/<suite-name>/<subsuite-name>/tasks`` and can be used as arguments
|
||||
``qa/suites/<suite-name>/<subsuite-name>/tasks`` in Ceph repository and can be used as arguments
|
||||
for ``--filter``. Each YAML file in that directory can trigger tests; using the
|
||||
name of the file without its filename extension as an argument to the
|
||||
``--filter`` triggers those tests.
|
||||
@ -187,6 +285,8 @@ contents of the file for the ``modules`` attribute. For ``cephfs-shell.yaml``
|
||||
the ``modules`` attribute is ``tasks.cephfs.test_cephfs_shell``. This means
|
||||
that it triggers all tests in ``qa/tasks/cephfs/test_cephfs_shell.py``.
|
||||
|
||||
Read more about how to `Filter Tests by their Description`_.
|
||||
|
||||
Viewing Test Results
|
||||
---------------------
|
||||
|
||||
@ -200,22 +300,35 @@ Teuthology Archives
|
||||
*******************
|
||||
|
||||
After the tests have finished running, the log for the job can be obtained by
|
||||
clicking on the job ID at the Pulpito page associated with your tests. It's
|
||||
clicking on the job ID at the Pulpito run page associated with your tests. It's
|
||||
more convenient to download the log and then view it rather than viewing it in
|
||||
an internet browser since these logs can easily be up to 1 GB in size. It is
|
||||
easier to ssh into the teuthology machine (``teuthology.front.sepia.ceph.com``)
|
||||
and access the following path::
|
||||
an internet browser since these logs can easily be up to 1 GB in size.
|
||||
It is also possible to ssh into a `developer playground machine`_ and access the following path::
|
||||
|
||||
/ceph/teuthology-archive/<test-id>/<job-id>/teuthology.log
|
||||
/teuthology/<run-name>/<job-id>/teuthology.log
|
||||
|
||||
For example: for the above test ID, the path is::
|
||||
|
||||
/ceph/teuthology-archive/teuthology-2019-12-10_05:00:03-smoke-master-testing-basic-smithi/4588482/teuthology.log
|
||||
/teuthology/teuthology-2019-12-10_05:00:03-smoke-master-testing-basic-smithi/4588482/teuthology.log
|
||||
|
||||
This method can be used to view the log more quickly than would be possible through a browser.
|
||||
|
||||
To view ceph logs (cephadm, ceph monitors, ceph-mgr, etc) or system logs,
|
||||
remove ``teuthology.log`` from the job's teuthology log url on browser and then navigate
|
||||
to ``remote/<machine>/log/``. System logs can be found at ``remote/<machine>/syslog/``.
|
||||
Similarly, these logs can be found on developer playground machines at
|
||||
``/teuthology/<test-id>/<job-id>/remote/<machine>/``.
|
||||
|
||||
Some other files that are included for debugging purposes:
|
||||
|
||||
* ``unit_test_summary.yaml``: Provides a summary of all unit test failures.
|
||||
Generated (optionally) when the ``unit_test_scan`` configuration option is
|
||||
used in the job's YAML file.
|
||||
|
||||
* ``valgrind.yaml``: Summarizes any Valgrind errors that may occur.
|
||||
|
||||
.. note:: To access archives more conveniently, ``/a/`` has been symbolically
|
||||
linked to ``/ceph/teuthology-archive/``. For instance, to access the previous
|
||||
linked to ``/teuthology/``. For instance, to access the previous
|
||||
example, we can use something like::
|
||||
|
||||
/a/teuthology-2019-12-10_05:00:03-smoke-master-testing-basic-smithi/4588482/teuthology.log
|
||||
@ -230,9 +343,9 @@ Here is the command that terminates jobs:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
teuthology-kill -r teuthology-2019-12-10_05:00:03-smoke-master-testing-basic-smithi
|
||||
teuthology-kill -p -r teuthology-2019-12-10_05:00:03-smoke-master-testing-basic-smithi -m smithi -o scheduled_teuthology@teuthology
|
||||
|
||||
Let's call the argument passed to ``-r`` as test ID. It can be found
|
||||
The argument passed to ``-r`` is run name. It can be found
|
||||
easily in the link to the Pulpito page for the tests you triggered. For
|
||||
example, for the above test ID, the link is - http://pulpito.front.sepia.ceph.com/teuthology-2019-12-10_05:00:03-smoke-master-testing-basic-smithi/
|
||||
|
||||
@ -257,23 +370,9 @@ can see in the example below:
|
||||
The meaning and function of the other options is covered in the table in the
|
||||
`Triggering Tests`_ section.
|
||||
|
||||
Naming the ceph-ci branch
|
||||
-------------------------
|
||||
Prepend your branch with your name before you push it to ceph-ci. For example,
|
||||
a branch named ``feature-x`` should be named ``wip-$yourname-feature-x``, where
|
||||
``$yourname`` is replaced with your name. Identifying your branch with your
|
||||
name makes your branch easily findable on Shaman and Pulpito.
|
||||
|
||||
If you are using one of the stable branches (for example, nautilis, mimic,
|
||||
etc.), include the name of that stable branch in your ceph-ci branch name.
|
||||
For example, the ``feature-x`` PR branch should be named
|
||||
``wip-feature-x-nautilus``. *This is not just a convention. This ensures that your branch is built in the correct environment.*
|
||||
|
||||
Delete the branch from ceph-ci when you no longer need it. If you are
|
||||
logged in to GitHub, all your branches on ceph-ci can be found here:
|
||||
https://github.com/ceph/ceph-ci/branches.
|
||||
|
||||
.. _ceph-ci: https://github.com/ceph/ceph-ci
|
||||
.. _Ceph Jenkins: https://jenkins.ceph.com/
|
||||
.. _Teuthology CLI: https://docs.ceph.com/projects/teuthology/en/latest/commands/list.html
|
||||
.. _Chacra: https://github.com/ceph/chacra/blob/master/README.rst
|
||||
.. _Pulpito: http://pulpito.front.sepia.ceph.com/
|
||||
.. _Running Your First Test: ../../running-tests-locally/#running-your-first-test
|
||||
@ -281,4 +380,9 @@ https://github.com/ceph/ceph-ci/branches.
|
||||
.. _Suites Inventory: ../tests-integration-testing-teuthology-intro/#suites-inventory
|
||||
.. _Testing Priority: ../tests-integration-testing-teuthology-intro/#testing-priority
|
||||
.. _Triggering Tests: ../tests-integration-testing-teuthology-workflow/#triggering-tests
|
||||
.. _Integration Test Introduction Page: ../tests-integration-testing-teuthology-intro/#how-integration-tests-are-defined
|
||||
.. _tests-sentry-developers-guide: ../tests-sentry-developers-guide/
|
||||
.. _smithi: https://wiki.sepia.ceph.com/doku.php?id=hardware:smithi
|
||||
.. _teuthology installation guide: https://docs.ceph.com/projects/teuthology/en/latest/INSTALL.html#installation-and-setup
|
||||
.. _Filter Tests by their Description: ../tests-integration-testing-teuthology-intro/#filtering-tests-by-their-description
|
||||
.. _developer playground machine: https://wiki.sepia.ceph.com/doku.php?id=devplayground
|
||||
|
@ -1,3 +1,5 @@
|
||||
.. _dev-testing-unit-tests:
|
||||
|
||||
Testing - unit tests
|
||||
====================
|
||||
|
||||
|
143
ceph/doc/dev/developer_guide/tests-windows.rst
Normal file
143
ceph/doc/dev/developer_guide/tests-windows.rst
Normal file
@ -0,0 +1,143 @@
|
||||
.. _dev-testing-windows:
|
||||
|
||||
=================
|
||||
Testing - Windows
|
||||
=================
|
||||
|
||||
Since Pacific, the Ceph client tools and libraries can be natively used on
|
||||
Windows. This allows Windows nodes to consume Ceph without additional layers
|
||||
such as iSCSI gateways or SMB shares.
|
||||
|
||||
A significant amount of unit tests and integration tests were ported in order
|
||||
to ensure that these components continue to function properly on Windows.
|
||||
|
||||
Windows CI Job
|
||||
==============
|
||||
|
||||
The `Windows CI job`_ performs the following steps for each GitHub pull request:
|
||||
|
||||
* spin up a Linux VM in which to build the server-side (Linux) Ceph binaries
|
||||
and cross-compile the Windows (client) binaries.
|
||||
* recreate the Linux VM and start a Ceph vstart cluster
|
||||
* boot a Windows VM and run the Ceph tests there
|
||||
|
||||
`A small PowerShell framework`_ parallelizes the tests, aggregates the results
|
||||
and isolates or skips certain tests that are known to be flaky.
|
||||
|
||||
The console output can contain compilation errors as well as the name of the
|
||||
tests that failed. To get the console output of the failing tests as well as
|
||||
Ceph and operating system logs, please check the build artifacts from the
|
||||
Jenkins "Status" page.
|
||||
|
||||
.. image:: ../../images/windows_ci_status_page.png
|
||||
:align: center
|
||||
|
||||
The Windows CI artifacts can be downloaded as a zip archive or viewed inside
|
||||
the browser. Click the "artifacts" button to see the contents of the artifacts
|
||||
folder.
|
||||
|
||||
.. image:: ../../images/windows_ci_artifacts.png
|
||||
:align: center
|
||||
|
||||
Artifact contents:
|
||||
|
||||
* ``client/`` - Ceph client-side logs (Windows)
|
||||
* ``eventlog/`` - Windows system logs
|
||||
* ``logs/`` - Ceph logs
|
||||
* ``-windows.conf`` - Ceph configuration file
|
||||
* ``cluster/`` - Ceph server-side logs (Linux)
|
||||
* ``ceph_logs/``
|
||||
* ``journal``
|
||||
* ``test_results/``
|
||||
* ``out/`` - raw and xml test output grouped by the test executable
|
||||
* ``test_results.html`` - aggregated test report (html)
|
||||
* ``test_results.txt`` - aggregated test report (plaintext)
|
||||
|
||||
We're using the `subunit`_ format and associated tools to aggregate the test
|
||||
results, which is especially handy when running a large amount of tests in
|
||||
parallel.
|
||||
|
||||
The aggregated test report provides a great overview of the failing tests.
|
||||
Go to the end of the file to see the actual errors::
|
||||
|
||||
{0} unittest_mempool.mempool.bufferlist_reassign [0.000000s] ... ok
|
||||
{0} unittest_mempool.mempool.bufferlist_c_str [0.006000s] ... ok
|
||||
{0} unittest_mempool.mempool.btree_map_test [0.000000s] ... ok
|
||||
{0} ceph_test_dokan.DokanTests.test_mount [9.203000s] ... FAILED
|
||||
|
||||
Captured details:
|
||||
~~~~~~~~~~~~~~~~~
|
||||
b'/home/ubuntu/ceph/src/test/dokan/dokan.cc:136'
|
||||
b'Expected equality of these values:'
|
||||
b' wait_for_mount(mountpoint)'
|
||||
b' Which is: -138'
|
||||
b' 0'
|
||||
b''
|
||||
b'/home/ubuntu/ceph/src/test/dokan/dokan.cc:208'
|
||||
b'Expected equality of these values:'
|
||||
b' ret'
|
||||
b' Which is: "ceph-dokan: exit status: -22"'
|
||||
b' ""'
|
||||
b'Failed unmapping: Y:\\'
|
||||
{0} ceph_test_dokan.DokanTests.test_mount_read_only [9.140000s] ... FAILED
|
||||
|
||||
The html report conveniently groups the test results by test suite (test binary).
|
||||
For security reasons it isn't rendered by default but it can be downloaded and
|
||||
viewed locally:
|
||||
|
||||
.. image:: ../../images/windows_ci_html_report.png
|
||||
:align: center
|
||||
|
||||
Timeouts and missing test results are often an indication that a process crashed.
|
||||
Note that the ceph status is printed out on the console before and after
|
||||
performing the tests, which can help identify crashed services.
|
||||
|
||||
You may also want to check the service logs (both client and server side). Also,
|
||||
be aware that the Windows "application" event log will contain entries in case
|
||||
of crashed Windows processes.
|
||||
|
||||
Frequently asked questions
|
||||
==========================
|
||||
|
||||
1. Why is the Windows CI job the only one that fails on my PR?
|
||||
|
||||
Ceph integration tests are normally performed through Teuthology on the Ceph
|
||||
Lab infrastructure. These tests are triggered on-demand by the Ceph QA
|
||||
team and do not run automatically for every submitted pull request.
|
||||
|
||||
Since the Windows CI job focuses only on the client-side Ceph components,
|
||||
it can run various integration tests in a timely manner for every pull request
|
||||
on GitHub. **In other words, it runs various librados, librbd and libcephfs
|
||||
tests that other checks such as "make check" do not.**
|
||||
|
||||
For this reason, the Windows CI often catches regressions that are missed by the
|
||||
other checks and would otherwise only come up through Teuthology. More often
|
||||
than not, these regressions are not platform-specific and affect Linux as well.
|
||||
|
||||
In case of Windows CI failures, we strongly suggest checking the test results
|
||||
as described above.
|
||||
|
||||
Be aware that the `Windows build script`_ may use different compilation flags
|
||||
and ``-D`` options passed to CMake. For example, it defaults to ``Release`` mode
|
||||
instead of ``Debug`` mode. At the same time, it uses a different toolchain
|
||||
(``mingw-llvm``) and a separate set of `dependencies`_, make sure to bump the
|
||||
versions if needed.
|
||||
|
||||
2. Why is the Windows CI job mandatory?
|
||||
|
||||
The test job was initially optional, as a result regressions were introduced
|
||||
very often.
|
||||
|
||||
After a time, Windows support became mature enough to make this CI job mandatory.
|
||||
This significantly reduces the amount of work required to address regressions
|
||||
and assures Ceph users of continued Windows support.
|
||||
|
||||
As said before, another great advantage is that it runs integration tests that
|
||||
quickly catch regressions which often affect Linux builds as well. This spares
|
||||
developers from having to wait for the full Teuthology results.
|
||||
|
||||
.. _Windows CI job: https://github.com/ceph/ceph-build/blob/main/ceph-windows-pull-requests/config/definitions/ceph-windows-pull-requests.yml
|
||||
.. _A small PowerShell framework: https://github.com/ceph/ceph-win32-tests/
|
||||
.. _Windows build script: https://github.com/ceph/ceph/blob/main/win32_build.sh
|
||||
.. _dependencies: https://github.com/ceph/ceph/blob/main/win32_deps_build.sh
|
||||
.. _subunit: https://github.com/testing-cabal/subunit
|
@ -30,36 +30,35 @@ by a programmer by implementing the ``encode`` and ``decode`` methods.
|
||||
|
||||
Principles for format change
|
||||
----------------------------
|
||||
It is not unusual that the format of serialization changes. This
|
||||
process requires careful attention from during both development
|
||||
It is not unusual for the format of serialization to change. This
|
||||
process requires careful attention both during development
|
||||
and review.
|
||||
|
||||
The general rule is that a decoder must understand what had been
|
||||
encoded by an encoder. Most of the problems come from ensuring
|
||||
that compatibility continues between old decoders and new encoders
|
||||
as well as new decoders and old decoders. One should assume
|
||||
that -- if not otherwise derogated -- any mix (old/new) is
|
||||
possible in a cluster. There are 2 main reasons for that:
|
||||
The general rule is that a decoder must understand what has been encoded by an
|
||||
encoder. Most difficulties arise during the process of ensuring the continuity
|
||||
of compatibility of old decoders with new encoders, and ensuring the continuity
|
||||
of compatibility of new decoders with old decoders. One should assume -- if not
|
||||
otherwise specified -- that any mix of old and new is possible in a cluster.
|
||||
There are two primary concerns:
|
||||
|
||||
1. Upgrades. Although there are recommendations related to the order
|
||||
of entity types (mons/osds/clients), it is not mandatory and
|
||||
no assumption should be made about it.
|
||||
2. Huge variability of client versions. It was always the case
|
||||
that kernel (and thus kernel clients) upgrades are decoupled
|
||||
from Ceph upgrades. Moreover, proliferation of containerization
|
||||
bring the variability even to e.g. ``librbd`` -- now user space
|
||||
libraries live on the container own.
|
||||
1. **Upgrades.** Although there are recommendations related to the order of
|
||||
entity types (mons/OSDs/clients), it is not mandatory and no assumption
|
||||
should be made.
|
||||
2. **Huge variability of client versions.** It has always been the case that
|
||||
kernel upgrades (and thus kernel clients) are decoupled from Ceph upgrades.
|
||||
Containerization brings variability even to ``librbd`` -- now user space
|
||||
libraries live in the container itself:
|
||||
|
||||
With this being said, there are few rules limiting the degree
|
||||
of interoperability between dencoders:
|
||||
There are a few rules limiting the degree of interoperability between
|
||||
dencoders:
|
||||
|
||||
* ``n-2`` for dencoding between daemons,
|
||||
* ``n-3`` hard requirement for client-involved scenarios,
|
||||
* ``n-3..`` soft requirements for clinet-involved scenarios. Ideally
|
||||
every client should be able to talk any version of daemons.
|
||||
* ``n-3`` hard requirement for client scenarios,
|
||||
* ``n-3..`` soft requirement for client scenarios. Ideally every client should
|
||||
be able to talk to any version of daemons.
|
||||
|
||||
As the underlying reasons are the same, the rules dencoders
|
||||
follow are virtually the same as for deprecations of our features
|
||||
As the underlying reasons are the same, the rules that dencoders
|
||||
follow are nearly the same as the rules for deprecations of our features
|
||||
bits. See the ``Notes on deprecation`` in ``src/include/ceph_features.h``.
|
||||
|
||||
Frameworks
|
||||
@ -163,7 +162,7 @@ macro.
|
||||
The append-extendability of our dencoders is a result of the forward
|
||||
compatibility that the ``ENCODE_START`` and ``DECODE_FINISH`` macros bring.
|
||||
|
||||
They are implementing extendibility facilities. An encoder, when filling
|
||||
They are implementing extensibility facilities. An encoder, when filling
|
||||
the bufferlist, prepends three fields: version of the current format,
|
||||
minimal version of a decoder compatible with it and the total size of
|
||||
all encoded fields.
|
||||
|
@ -2,10 +2,14 @@
|
||||
Ceph Internals
|
||||
================
|
||||
|
||||
.. note:: If you're looking for how to use Ceph as a library from your
|
||||
own software, please see :doc:`/api/index`.
|
||||
.. note:: For information on how to use Ceph as a library (from your own
|
||||
software), see :doc:`/api/index`.
|
||||
|
||||
You can start a development mode Ceph cluster, after compiling the source, with::
|
||||
Starting a Development-mode Ceph Cluster
|
||||
----------------------------------------
|
||||
|
||||
Compile the source and then run the following commands to start a
|
||||
development-mode Ceph cluster::
|
||||
|
||||
cd build
|
||||
OSD=3 MON=3 MGR=3 ../src/vstart.sh -n -x
|
||||
@ -16,12 +20,10 @@ You can start a development mode Ceph cluster, after compiling the source, with:
|
||||
|
||||
The ``dev@ceph.io`` list is for discussion about the development of Ceph,
|
||||
its interoperability with other technology, and the operations of the
|
||||
project itself. Subscribe by sending a message to ``dev-request@ceph.io``
|
||||
with the line::
|
||||
project itself. Subscribe by sending a message to ``dev-join@ceph.io``
|
||||
with the word `subscribe` in the subject.
|
||||
|
||||
subscribe ceph-devel
|
||||
|
||||
in the body of the message.
|
||||
Alternatively you can visit https://lists.ceph.io and register.
|
||||
|
||||
The ceph-devel@vger.kernel.org list is for discussion
|
||||
and patch review for the Linux kernel Ceph client component.
|
||||
|
@ -1,3 +1,5 @@
|
||||
.. _dev_mon_elections:
|
||||
|
||||
=================
|
||||
Monitor Elections
|
||||
=================
|
||||
|
@ -218,6 +218,8 @@ we may want to exploit.
|
||||
The dedup-tool needs to be updated to use ``LIST_SNAPS`` to discover
|
||||
clones as part of leak detection.
|
||||
|
||||
.. _osd-make-writeable:
|
||||
|
||||
An important question is how we deal with the fact that many clones
|
||||
will frequently have references to the same backing chunks at the same
|
||||
offset. In particular, ``make_writeable`` will generally create a clone
|
||||
|
@ -23,12 +23,11 @@ The difference between *pool snaps* and *self managed snaps* from the
|
||||
OSD's point of view lies in whether the *SnapContext* comes to the OSD
|
||||
via the client's MOSDOp or via the most recent OSDMap.
|
||||
|
||||
See OSD::make_writeable
|
||||
See :ref:`manifest.rst <osd-make-writeable>` for more information.
|
||||
|
||||
Ondisk Structures
|
||||
-----------------
|
||||
Each object has in the PG collection a *head* object (or *snapdir*, which we
|
||||
will come to shortly) and possibly a set of *clone* objects.
|
||||
Each object has in the PG collection a *head* object and possibly a set of *clone* objects.
|
||||
Each hobject_t has a snap field. For the *head* (the only writeable version
|
||||
of an object), the snap field is set to CEPH_NOSNAP. For the *clones*, the
|
||||
snap field is set to the *seq* of the *SnapContext* at their creation.
|
||||
@ -47,8 +46,12 @@ The *head* object contains a *SnapSet* encoded in an attribute, which tracks
|
||||
3. Overlapping intervals between clones for tracking space usage
|
||||
4. Clone size
|
||||
|
||||
If the *head* is deleted while there are still clones, a *snapdir* object
|
||||
is created instead to house the *SnapSet*.
|
||||
The *head* can't be deleted while there are still clones. Instead, it is
|
||||
marked as whiteout (``object_info_t::FLAG_WHITEOUT``) in order to house the
|
||||
*SnapSet* contained in it.
|
||||
In that case, the *head* object no longer logically exists.
|
||||
|
||||
See: should_whiteout()
|
||||
|
||||
Additionally, the *object_info_t* on each clone includes a vector of snaps
|
||||
for which clone is defined.
|
||||
@ -126,3 +129,111 @@ up to 8 prefixes need to be checked to determine all hobjects in a particular
|
||||
snap for a particular PG. Upon split, the prefixes to check on the parent
|
||||
are adjusted such that only the objects remaining in the PG will be visible.
|
||||
The children will immediately have the correct mapping.
|
||||
|
||||
clone_overlap
|
||||
-------------
|
||||
Each SnapSet attached to the *head* object contains the overlapping intervals
|
||||
between clone objects for optimizing space.
|
||||
The overlapping intervals are stored within the ``clone_overlap`` map, each element in the
|
||||
map stores the snap ID and the corresponding overlap with the next newest clone.
|
||||
|
||||
See the following example using a 4 byte object:
|
||||
|
||||
+--------+---------+
|
||||
| object | content |
|
||||
+========+=========+
|
||||
| head | [AAAA] |
|
||||
+--------+---------+
|
||||
|
||||
listsnaps output is as follows:
|
||||
|
||||
+---------+-------+------+---------+
|
||||
| cloneid | snaps | size | overlap |
|
||||
+=========+=======+======+=========+
|
||||
| head | - | 4 | |
|
||||
+---------+-------+------+---------+
|
||||
|
||||
After taking a snapshot (ID 1) and re-writing the first 2 bytes of the object,
|
||||
the clone created will overlap with the new *head* object in its last 2 bytes.
|
||||
|
||||
+------------+---------+
|
||||
| object | content |
|
||||
+============+=========+
|
||||
| head | [BBAA] |
|
||||
+------------+---------+
|
||||
| clone ID 1 | [AAAA] |
|
||||
+------------+---------+
|
||||
|
||||
+---------+-------+------+---------+
|
||||
| cloneid | snaps | size | overlap |
|
||||
+=========+=======+======+=========+
|
||||
| 1 | 1 | 4 | [2~2] |
|
||||
+---------+-------+------+---------+
|
||||
| head | - | 4 | |
|
||||
+---------+-------+------+---------+
|
||||
|
||||
By taking another snapshot (ID 2) and this time re-writing only the first 1 byte of the object,
|
||||
the clone created (ID 2) will overlap with the new *head* object in its last 3 bytes.
|
||||
While the oldest clone (ID 1) will overlap with the newest clone in its last 2 bytes.
|
||||
|
||||
+------------+---------+
|
||||
| object | content |
|
||||
+============+=========+
|
||||
| head | [CBAA] |
|
||||
+------------+---------+
|
||||
| clone ID 2 | [BBAA] |
|
||||
+------------+---------+
|
||||
| clone ID 1 | [AAAA] |
|
||||
+------------+---------+
|
||||
|
||||
+---------+-------+------+---------+
|
||||
| cloneid | snaps | size | overlap |
|
||||
+=========+=======+======+=========+
|
||||
| 1 | 1 | 4 | [2~2] |
|
||||
+---------+-------+------+---------+
|
||||
| 2 | 2 | 4 | [1~3] |
|
||||
+---------+-------+------+---------+
|
||||
| head | - | 4 | |
|
||||
+---------+-------+------+---------+
|
||||
|
||||
If the *head* object will be completely re-written by re-writing 4 bytes,
|
||||
the only existing overlap that will remain will be between the two clones.
|
||||
|
||||
+------------+---------+
|
||||
| object | content |
|
||||
+============+=========+
|
||||
| head | [DDDD] |
|
||||
+------------+---------+
|
||||
| clone ID 2 | [BBAA] |
|
||||
+------------+---------+
|
||||
| clone ID 1 | [AAAA] |
|
||||
+------------+---------+
|
||||
|
||||
+---------+-------+------+---------+
|
||||
| cloneid | snaps | size | overlap |
|
||||
+=========+=======+======+=========+
|
||||
| 1 | 1 | 4 | [2~2] |
|
||||
+---------+-------+------+---------+
|
||||
| 2 | 2 | 4 | |
|
||||
+---------+-------+------+---------+
|
||||
| head | - | 4 | |
|
||||
+---------+-------+------+---------+
|
||||
|
||||
Lastly, after the last snap (ID 2) is removed and snaptrim kicks in,
|
||||
no overlapping intervals will remain:
|
||||
|
||||
+------------+---------+
|
||||
| object | content |
|
||||
+============+=========+
|
||||
| head | [DDDD] |
|
||||
+------------+---------+
|
||||
| clone ID 1 | [AAAA] |
|
||||
+------------+---------+
|
||||
|
||||
+---------+-------+------+---------+
|
||||
| cloneid | snaps | size | overlap |
|
||||
+=========+=======+======+=========+
|
||||
| 1 | 1 | 4 | |
|
||||
+---------+-------+------+---------+
|
||||
| head | - | 4 | |
|
||||
+---------+-------+------+---------+
|
||||
|
@ -6,97 +6,93 @@ Concepts
|
||||
--------
|
||||
|
||||
*Peering*
|
||||
the process of bringing all of the OSDs that store
|
||||
a Placement Group (PG) into agreement about the state
|
||||
of all of the objects (and their metadata) in that PG.
|
||||
Note that agreeing on the state does not mean that
|
||||
they all have the latest contents.
|
||||
the process of bringing all of the OSDs that store a Placement Group (PG)
|
||||
into agreement about the state of all of the objects in that PG and all of
|
||||
the metadata associated with those objects. Two OSDs can agree on the state
|
||||
of the objects in the placement group yet still may not necessarily have the
|
||||
latest contents.
|
||||
|
||||
*Acting set*
|
||||
the ordered list of OSDs who are (or were as of some epoch)
|
||||
responsible for a particular PG.
|
||||
the ordered list of OSDs that are (or were as of some epoch) responsible for
|
||||
a particular PG.
|
||||
|
||||
*Up set*
|
||||
the ordered list of OSDs responsible for a particular PG for
|
||||
a particular epoch according to CRUSH. Normally this
|
||||
is the same as the *acting set*, except when the *acting set* has been
|
||||
explicitly overridden via *PG temp* in the OSDMap.
|
||||
the ordered list of OSDs responsible for a particular PG for a particular
|
||||
epoch, according to CRUSH. This is the same as the *acting set* except when
|
||||
the *acting set* has been explicitly overridden via *PG temp* in the OSDMap.
|
||||
|
||||
*PG temp*
|
||||
a temporary placement group acting set used while backfilling the
|
||||
primary osd. Let say acting is [0,1,2] and we are
|
||||
active+clean. Something happens and acting is now [3,1,2]. osd 3 is
|
||||
empty and can't serve reads although it is the primary. osd.3 will
|
||||
see that and request a *PG temp* of [1,2,3] to the monitors using a
|
||||
MOSDPGTemp message so that osd.1 temporarily becomes the
|
||||
primary. It will select osd.3 as a backfill peer and continue to
|
||||
serve reads and writes while osd.3 is backfilled. When backfilling
|
||||
is complete, *PG temp* is discarded and the acting set changes back
|
||||
to [3,1,2] and osd.3 becomes the primary.
|
||||
a temporary placement group acting set that is used while backfilling the
|
||||
primary OSD. Assume that the acting set is ``[0,1,2]`` and we are
|
||||
``active+clean``. Now assume that something happens and the acting set
|
||||
becomes ``[3,1,2]``. Under these circumstances, OSD ``3`` is empty and can't
|
||||
serve reads even though it is the primary. ``osd.3`` will respond by
|
||||
requesting a *PG temp* of ``[1,2,3]`` to the monitors using a ``MOSDPGTemp``
|
||||
message, and ``osd.1`` will become the primary temporarily. ``osd.1`` will
|
||||
select ``osd.3`` as a backfill peer and will continue to serve reads and
|
||||
writes while ``osd.3`` is backfilled. When backfilling is complete, *PG
|
||||
temp* is discarded. The acting set changes back to ``[3,1,2]`` and ``osd.3``
|
||||
becomes the primary.
|
||||
|
||||
*current interval* or *past interval*
|
||||
a sequence of OSD map epochs during which the *acting set* and *up
|
||||
set* for particular PG do not change
|
||||
a sequence of OSD map epochs during which the *acting set* and the *up
|
||||
set* for particular PG do not change.
|
||||
|
||||
*primary*
|
||||
the (by convention first) member of the *acting set*,
|
||||
who is responsible for coordination peering, and is
|
||||
the only OSD that will accept client initiated
|
||||
writes to objects in a placement group.
|
||||
the member of the *acting set* that is responsible for coordination peering.
|
||||
The only OSD that accepts client-initiated writes to the objects in a
|
||||
placement group. By convention, the primary is the first member of the
|
||||
*acting set*.
|
||||
|
||||
*replica*
|
||||
a non-primary OSD in the *acting set* for a placement group
|
||||
(and who has been recognized as such and *activated* by the primary).
|
||||
a non-primary OSD in the *acting set* of a placement group. A replica has
|
||||
been recognized as a non-primary OSD and has been *activated* by the
|
||||
primary.
|
||||
|
||||
*stray*
|
||||
an OSD who is not a member of the current *acting set*, but
|
||||
has not yet been told that it can delete its copies of a
|
||||
particular placement group.
|
||||
an OSD that is not a member of the current *acting set* and has not yet been
|
||||
told to delete its copies of a particular placement group.
|
||||
|
||||
*recovery*
|
||||
ensuring that copies of all of the objects in a PG
|
||||
are on all of the OSDs in the *acting set*. Once
|
||||
*peering* has been performed, the primary can start
|
||||
accepting write operations, and *recovery* can proceed
|
||||
in the background.
|
||||
the process of ensuring that copies of all of the objects in a PG are on all
|
||||
of the OSDs in the *acting set*. After *peering* has been performed, the
|
||||
primary can begin accepting write operations and *recovery* can proceed in
|
||||
the background.
|
||||
|
||||
*PG info* basic metadata about the PG's creation epoch, the version
|
||||
for the most recent write to the PG, *last epoch started*, *last
|
||||
epoch clean*, and the beginning of the *current interval*. Any
|
||||
inter-OSD communication about PGs includes the *PG info*, such that
|
||||
any OSD that knows a PG exists (or once existed) also has a lower
|
||||
bound on *last epoch clean* or *last epoch started*.
|
||||
*PG info*
|
||||
basic metadata about the PG's creation epoch, the version for the most
|
||||
recent write to the PG, the *last epoch started*, the *last epoch clean*,
|
||||
and the beginning of the *current interval*. Any inter-OSD communication
|
||||
about PGs includes the *PG info*, such that any OSD that knows a PG exists
|
||||
(or once existed) and also has a lower bound on *last epoch clean* or *last
|
||||
epoch started*.
|
||||
|
||||
*PG log*
|
||||
a list of recent updates made to objects in a PG.
|
||||
Note that these logs can be truncated after all OSDs
|
||||
in the *acting set* have acknowledged up to a certain
|
||||
point.
|
||||
a list of recent updates made to objects in a PG. These logs can be
|
||||
truncated after all OSDs in the *acting set* have acknowledged the changes.
|
||||
|
||||
*missing set*
|
||||
Each OSD notes update log entries and if they imply updates to
|
||||
the contents of an object, adds that object to a list of needed
|
||||
updates. This list is called the *missing set* for that <OSD,PG>.
|
||||
the set of all objects that have not yet had their contents updated to match
|
||||
the log entries. The missing set is collated by each OSD. Missing sets are
|
||||
kept track of on an ``<OSD,PG>`` basis.
|
||||
|
||||
*Authoritative History*
|
||||
a complete, and fully ordered set of operations that, if
|
||||
performed, would bring an OSD's copy of a Placement Group
|
||||
up to date.
|
||||
a complete and fully-ordered set of operations that bring an OSD's copy of a
|
||||
Placement Group up to date.
|
||||
|
||||
*epoch*
|
||||
a (monotonically increasing) OSD map version number
|
||||
a (monotonically increasing) OSD map version number.
|
||||
|
||||
*last epoch start*
|
||||
the last epoch at which all nodes in the *acting set*
|
||||
for a particular placement group agreed on an
|
||||
*authoritative history*. At this point, *peering* is
|
||||
deemed to have been successful.
|
||||
the last epoch at which all nodes in the *acting set* for a given placement
|
||||
group agreed on an *authoritative history*. At the start of the last epoch,
|
||||
*peering* is deemed to have been successful.
|
||||
|
||||
*up_thru*
|
||||
before a primary can successfully complete the *peering* process,
|
||||
it must inform a monitor that is alive through the current
|
||||
OSD map epoch by having the monitor set its *up_thru* in the osd
|
||||
map. This helps peering ignore previous *acting sets* for which
|
||||
map. This helps peering ignore previous *acting sets* for which
|
||||
peering never completed after certain sequences of failures, such as
|
||||
the second interval below:
|
||||
|
||||
@ -106,10 +102,9 @@ Concepts
|
||||
- *acting set* = [B] (B restarts, A does not)
|
||||
|
||||
*last epoch clean*
|
||||
the last epoch at which all nodes in the *acting set*
|
||||
for a particular placement group were completely
|
||||
up to date (both PG logs and object contents).
|
||||
At this point, *recovery* is deemed to have been
|
||||
the last epoch at which all nodes in the *acting set* for a given placement
|
||||
group were completely up to date (this includes both the PG's logs and the
|
||||
PG's object contents). At this point, *recovery* is deemed to have been
|
||||
completed.
|
||||
|
||||
Description of the Peering Process
|
||||
@ -256,4 +251,5 @@ The high level process is for the current PG primary to:
|
||||
State Model
|
||||
-----------
|
||||
|
||||
.. graphviz:: peering_graph.generated.dot
|
||||
.. image:: peering_graph.generated.svg
|
||||
|
||||
|
722
ceph/doc/dev/peering_graph.generated.svg
Normal file
722
ceph/doc/dev/peering_graph.generated.svg
Normal file
@ -0,0 +1,722 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Generated by graphviz version 2.43.0 (0)
|
||||
-->
|
||||
<!-- Title: G Pages: 1 -->
|
||||
<svg width="3777pt" height="1031pt"
|
||||
viewBox="0.00 0.00 3777.00 1031.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1027)">
|
||||
<title>G</title>
|
||||
<polygon fill="white" stroke="transparent" points="-4,4 -4,-1027 3773,-1027 3773,4 -4,4"/>
|
||||
<g id="clust1" class="cluster">
|
||||
<title>cluster0</title>
|
||||
<polygon fill="none" stroke="black" points="8,-8 8,-1015 3761,-1015 3761,-8 8,-8"/>
|
||||
<text text-anchor="middle" x="1884.5" y="-999.8" font-family="Times,serif" font-size="14.00">PeeringMachine</text>
|
||||
</g>
|
||||
<g id="clust2" class="cluster">
|
||||
<title>cluster1</title>
|
||||
<polygon fill="none" stroke="black" points="132,-16 132,-984 3245,-984 3245,-16 132,-16"/>
|
||||
<text text-anchor="middle" x="1688.5" y="-968.8" font-family="Times,serif" font-size="14.00">Started</text>
|
||||
</g>
|
||||
<g id="clust3" class="cluster">
|
||||
<title>cluster2</title>
|
||||
<polygon fill="none" stroke="black" points="1889,-24 1889,-876 3237,-876 3237,-24 1889,-24"/>
|
||||
<text text-anchor="middle" x="2563" y="-860.8" font-family="Times,serif" font-size="14.00">Primary</text>
|
||||
</g>
|
||||
<g id="clust4" class="cluster">
|
||||
<title>cluster3</title>
|
||||
<polygon fill="lightgrey" stroke="black" points="2719,-467 2719,-845 3029,-845 3029,-467 2719,-467"/>
|
||||
<text text-anchor="middle" x="2874" y="-829.8" font-family="Times,serif" font-size="14.00">Peering</text>
|
||||
</g>
|
||||
<g id="clust5" class="cluster">
|
||||
<title>cluster4</title>
|
||||
<polygon fill="none" stroke="black" points="1897,-32 1897,-737 2711,-737 2711,-32 1897,-32"/>
|
||||
<text text-anchor="middle" x="2304" y="-721.8" font-family="Times,serif" font-size="14.00">Active</text>
|
||||
</g>
|
||||
<g id="clust6" class="cluster">
|
||||
<title>cluster5</title>
|
||||
<polygon fill="none" stroke="black" points="140,-554 140,-845 1488,-845 1488,-554 140,-554"/>
|
||||
<text text-anchor="middle" x="814" y="-829.8" font-family="Times,serif" font-size="14.00">ReplicaActive</text>
|
||||
</g>
|
||||
<g id="clust7" class="cluster">
|
||||
<title>cluster6</title>
|
||||
<polygon fill="none" stroke="black" points="1496,-467 1496,-629 1881,-629 1881,-467 1496,-467"/>
|
||||
<text text-anchor="middle" x="1688.5" y="-613.8" font-family="Times,serif" font-size="14.00">ToDelete</text>
|
||||
</g>
|
||||
<!-- Crashed -->
|
||||
<g id="node1" class="node">
|
||||
<title>Crashed</title>
|
||||
<ellipse fill="none" stroke="black" cx="3488" cy="-406" rx="49.29" ry="18"/>
|
||||
<text text-anchor="middle" x="3488" y="-402.3" font-family="Times,serif" font-size="14.00">Crashed</text>
|
||||
</g>
|
||||
<!-- Initial -->
|
||||
<g id="node2" class="node">
|
||||
<title>Initial</title>
|
||||
<polygon fill="lightgrey" stroke="black" points="70,-953 15.54,-935 70,-917 124.46,-935 70,-953"/>
|
||||
<polyline fill="none" stroke="black" points="26.93,-938.77 26.93,-931.23 "/>
|
||||
<polyline fill="none" stroke="black" points="58.61,-920.77 81.39,-920.77 "/>
|
||||
<polyline fill="none" stroke="black" points="113.07,-931.23 113.07,-938.77 "/>
|
||||
<polyline fill="none" stroke="black" points="81.39,-949.23 58.61,-949.23 "/>
|
||||
<text text-anchor="middle" x="70" y="-931.3" font-family="Times,serif" font-size="14.00">Initial</text>
|
||||
</g>
|
||||
<!-- Initial->Crashed -->
|
||||
<g id="edge2" class="edge">
|
||||
<title>Initial->Crashed</title>
|
||||
<path fill="none" stroke="#1e90ff" d="M94.78,-925.08C104.94,-921.86 116.89,-918.63 128,-917 170.89,-910.72 3212.54,-922.45 3249,-899 3315.73,-856.09 3306.93,-814.48 3324,-737 3330.41,-707.92 3337.82,-696.38 3324,-670 3317.48,-657.55 3304.52,-664.45 3298,-652 3294.91,-646.09 3293.59,-642 3298,-637 3307.33,-626.41 3348.9,-636.26 3361,-629 3441.38,-580.81 3450.13,-544.77 3483,-457 3485.66,-449.89 3487.09,-441.84 3487.82,-434.31"/>
|
||||
<polygon fill="#1e90ff" stroke="#1e90ff" points="3491.32,-434.35 3488.44,-424.16 3484.34,-433.93 3491.32,-434.35"/>
|
||||
<text text-anchor="middle" x="3403.5" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#1e90ff">boost::statechart::event_base</text>
|
||||
</g>
|
||||
<!-- Reset -->
|
||||
<g id="node3" class="node">
|
||||
<title>Reset</title>
|
||||
<ellipse fill="none" stroke="black" cx="3290" cy="-493" rx="37.09" ry="18"/>
|
||||
<text text-anchor="middle" x="3290" y="-489.3" font-family="Times,serif" font-size="14.00">Reset</text>
|
||||
</g>
|
||||
<!-- Initial->Reset -->
|
||||
<g id="edge1" class="edge">
|
||||
<title>Initial->Reset</title>
|
||||
<path fill="none" stroke="#000000" d="M68.22,-917.57C65.6,-891.88 61,-840.62 61,-797 61,-797 61,-797 61,-579 61,-543.86 101.37,-559.95 136,-554 231.18,-537.65 2965.92,-513.61 3033,-511 3106.78,-508.13 3192.31,-501.89 3243.73,-497.82"/>
|
||||
<polygon fill="#000000" stroke="#000000" points="3244.17,-501.3 3253.86,-497.01 3243.61,-494.32 3244.17,-501.3"/>
|
||||
<text text-anchor="middle" x="92.5" y="-684.3" font-family="Times,serif" font-size="14.00" fill="#000000">Initialize</text>
|
||||
</g>
|
||||
<!-- GetInfo -->
|
||||
<g id="node6" class="node">
|
||||
<title>GetInfo</title>
|
||||
<polygon fill="lightgrey" stroke="black" points="2859,-814 2794.82,-796 2859,-778 2923.18,-796 2859,-814"/>
|
||||
<polyline fill="none" stroke="black" points="2806.37,-799.24 2806.37,-792.76 "/>
|
||||
<polyline fill="none" stroke="black" points="2847.45,-781.24 2870.55,-781.24 "/>
|
||||
<polyline fill="none" stroke="black" points="2911.63,-792.76 2911.63,-799.24 "/>
|
||||
<polyline fill="none" stroke="black" points="2870.55,-810.76 2847.45,-810.76 "/>
|
||||
<text text-anchor="middle" x="2859" y="-792.3" font-family="Times,serif" font-size="14.00">GetInfo</text>
|
||||
</g>
|
||||
<!-- Initial->GetInfo -->
|
||||
<g id="edge41" class="edge">
|
||||
<title>Initial->GetInfo</title>
|
||||
<path fill="none" stroke="#40e0d0" d="M94.79,-925.1C104.94,-921.89 116.89,-918.65 128,-917 233.63,-901.33 1948.11,-930.95 2050,-899 2061.29,-895.46 2060.71,-887.55 2072,-884 2108.21,-872.61 2692.07,-890.86 2781.21,-878.25"/>
|
||||
<polygon fill="#40e0d0" stroke="#40e0d0" points="2782.04,-881.65 2791,-876 2780.47,-874.83 2782.04,-881.65"/>
|
||||
<text text-anchor="middle" x="2114.5" y="-887.8" font-family="Times,serif" font-size="14.00" fill="#40e0d0">MNotifyRec</text>
|
||||
</g>
|
||||
<!-- Stray -->
|
||||
<g id="node27" class="node">
|
||||
<title>Stray</title>
|
||||
<ellipse fill="none" stroke="black" cx="1532" cy="-796" rx="36.29" ry="18"/>
|
||||
<text text-anchor="middle" x="1532" y="-792.3" font-family="Times,serif" font-size="14.00">Stray</text>
|
||||
</g>
|
||||
<!-- Initial->Stray -->
|
||||
<g id="edge44" class="edge">
|
||||
<title>Initial->Stray</title>
|
||||
<path fill="none" stroke="#1e90ff" d="M95.09,-925.13C105.18,-921.96 117.02,-918.75 128,-917 202.87,-905.1 1428.01,-916.66 1492,-876 1510.2,-864.44 1520.46,-841.83 1526.02,-823.75"/>
|
||||
<polygon fill="#1e90ff" stroke="#1e90ff" points="1529.43,-824.58 1528.71,-814.01 1522.68,-822.72 1529.43,-824.58"/>
|
||||
<text text-anchor="middle" x="1489.5" y="-887.8" font-family="Times,serif" font-size="14.00" fill="#1e90ff">MInfoRec</text>
|
||||
</g>
|
||||
<!-- Initial->Stray -->
|
||||
<g id="edge46" class="edge">
|
||||
<title>Initial->Stray</title>
|
||||
<path fill="none" stroke="#0000ff" d="M95.09,-925.14C105.18,-921.97 117.02,-918.75 128,-917 588.18,-843.54 710.18,-897.23 1176,-884 1193.55,-883.5 1477.16,-885.38 1492,-876 1510.23,-864.48 1520.49,-841.87 1526.04,-823.78"/>
|
||||
<polygon fill="#0000ff" stroke="#0000ff" points="1529.44,-824.6 1528.73,-814.03 1522.7,-822.74 1529.44,-824.6"/>
|
||||
<text text-anchor="middle" x="1209.5" y="-887.8" font-family="Times,serif" font-size="14.00" fill="#0000ff">MLogRec</text>
|
||||
</g>
|
||||
<!-- Reset->Crashed -->
|
||||
<g id="edge3" class="edge">
|
||||
<title>Reset->Crashed</title>
|
||||
<path fill="none" stroke="#ff0000" d="M3276.79,-476.1C3269.62,-465.47 3263.75,-451.7 3272,-442 3291.76,-418.78 3371.65,-410.92 3428.51,-408.28"/>
|
||||
<polygon fill="#ff0000" stroke="#ff0000" points="3428.99,-411.77 3438.84,-407.85 3428.7,-404.77 3428.99,-411.77"/>
|
||||
<text text-anchor="middle" x="3377.5" y="-445.8" font-family="Times,serif" font-size="14.00" fill="#ff0000">boost::statechart::event_base</text>
|
||||
</g>
|
||||
<!-- Start -->
|
||||
<g id="node4" class="node">
|
||||
<title>Start</title>
|
||||
<polygon fill="lightgrey" stroke="black" points="3188,-953 3139.07,-935 3188,-917 3236.93,-935 3188,-953"/>
|
||||
<polyline fill="none" stroke="black" points="3150.33,-939.14 3150.33,-930.86 "/>
|
||||
<polyline fill="none" stroke="black" points="3176.74,-921.14 3199.26,-921.14 "/>
|
||||
<polyline fill="none" stroke="black" points="3225.67,-930.86 3225.67,-939.14 "/>
|
||||
<polyline fill="none" stroke="black" points="3199.26,-948.86 3176.74,-948.86 "/>
|
||||
<text text-anchor="middle" x="3188" y="-931.3" font-family="Times,serif" font-size="14.00">Start</text>
|
||||
</g>
|
||||
<!-- Reset->Start -->
|
||||
<g id="edge53" class="edge">
|
||||
<title>Reset->Start</title>
|
||||
<path fill="none" stroke="#0000ff" d="M3310.54,-508.36C3318.12,-514.24 3326.44,-521.43 3333,-529 3362.43,-562.96 3377.81,-595.36 3348,-629 3328.4,-651.11 3304.65,-618.02 3282,-637 3272.93,-644.6 3260.35,-681.54 3247.77,-725.55"/>
|
||||
<polygon fill="#0000ff" stroke="#0000ff" points="3244.35,-724.77 3245,-735.34 3251.09,-726.67 3244.35,-724.77"/>
|
||||
<text text-anchor="middle" x="3292.5" y="-684.3" font-family="Times,serif" font-size="14.00" fill="#0000ff">ActMap</text>
|
||||
</g>
|
||||
<!-- Start->Crashed -->
|
||||
<g id="edge4" class="edge">
|
||||
<title>Start->Crashed</title>
|
||||
<path fill="none" stroke="#0000ff" d="M3245,-924.14C3345.1,-905.18 3542,-859.91 3542,-797 3542,-797 3542,-797 3542,-492 3542,-468.63 3527.27,-446.48 3513.2,-430.66"/>
|
||||
<polygon fill="#0000ff" stroke="#0000ff" points="3515.38,-427.87 3505.98,-422.99 3510.28,-432.67 3515.38,-427.87"/>
|
||||
<text text-anchor="middle" x="3647.5" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#0000ff">boost::statechart::event_base</text>
|
||||
</g>
|
||||
<!-- Start->Reset -->
|
||||
<g id="edge48" class="edge">
|
||||
<title>Start->Reset</title>
|
||||
<path fill="none" stroke="#40e0d0" d="M3245,-931.84C3276.16,-928.16 3314.01,-919.37 3342,-899 3437.71,-829.34 3391.79,-751.12 3478,-670 3490.74,-658.01 3503.89,-666.93 3513,-652 3560.6,-573.99 3410.35,-523.93 3333.01,-503.92"/>
|
||||
<polygon fill="#40e0d0" stroke="#40e0d0" points="3333.61,-500.46 3323.06,-501.4 3331.9,-507.25 3333.61,-500.46"/>
|
||||
<text text-anchor="middle" x="3507.5" y="-684.3" font-family="Times,serif" font-size="14.00" fill="#40e0d0">AdvMap</text>
|
||||
</g>
|
||||
<!-- Start->GetInfo -->
|
||||
<g id="edge5" class="edge">
|
||||
<title>Start->GetInfo</title>
|
||||
<path fill="none" stroke="#ffa500" d="M3166.29,-924.85C3132.41,-910.58 3070.55,-884.59 3068,-884 3040.4,-877.66 2970.72,-887.56 2936.7,-879.35"/>
|
||||
<polygon fill="#ffa500" stroke="#ffa500" points="2937.59,-875.95 2927,-876 2935.31,-882.57 2937.59,-875.95"/>
|
||||
<text text-anchor="middle" x="3149.5" y="-887.8" font-family="Times,serif" font-size="14.00" fill="#ffa500">MakePrimary</text>
|
||||
</g>
|
||||
<!-- Start->Stray -->
|
||||
<g id="edge6" class="edge">
|
||||
<title>Start->Stray</title>
|
||||
<path fill="none" stroke="#40e0d0" d="M3164.78,-925.49C3155.61,-922.41 3144.91,-919.16 3135,-917 3069.4,-902.68 3047.87,-922.59 2985,-899 2973.92,-894.84 2974.25,-887.67 2963,-884 2936.79,-875.45 2742.54,-877.36 2715,-876 2271.86,-854.06 1736.28,-813.06 1578.09,-800.65"/>
|
||||
<polygon fill="#40e0d0" stroke="#40e0d0" points="1578.03,-797.14 1567.79,-799.84 1577.48,-804.11 1578.03,-797.14"/>
|
||||
<text text-anchor="middle" x="3024.5" y="-887.8" font-family="Times,serif" font-size="14.00" fill="#40e0d0">MakeStray</text>
|
||||
</g>
|
||||
<!-- WaitActingChange -->
|
||||
<g id="node5" class="node">
|
||||
<title>WaitActingChange</title>
|
||||
<ellipse fill="none" stroke="black" cx="3133" cy="-580" rx="96.38" ry="18"/>
|
||||
<text text-anchor="middle" x="3133" y="-576.3" font-family="Times,serif" font-size="14.00">WaitActingChange</text>
|
||||
</g>
|
||||
<!-- WaitActingChange->Reset -->
|
||||
<g id="edge51" class="edge">
|
||||
<title>WaitActingChange->Reset</title>
|
||||
<path fill="none" stroke="#1e90ff" d="M3152.9,-562.22C3165.98,-551.74 3183.81,-538.49 3201,-529 3216.53,-520.43 3234.6,-512.92 3250.38,-507.07"/>
|
||||
<polygon fill="#1e90ff" stroke="#1e90ff" points="3251.64,-510.34 3259.87,-503.65 3249.27,-503.75 3251.64,-510.34"/>
|
||||
<text text-anchor="middle" x="3230.5" y="-532.8" font-family="Times,serif" font-size="14.00" fill="#1e90ff">AdvMap</text>
|
||||
</g>
|
||||
<!-- GetInfo->Reset -->
|
||||
<g id="edge49" class="edge">
|
||||
<title>GetInfo->Reset</title>
|
||||
<path fill="none" stroke="#c71585" d="M3029,-752.15C3099.34,-721.74 3117.07,-697.64 3190,-652 3200.35,-645.53 3201.42,-640.85 3213,-637 3226.1,-632.64 3327.56,-639.07 3337,-629 3359.8,-604.68 3347.87,-585.51 3337,-554 3332.25,-540.24 3322.91,-527.24 3313.8,-516.87"/>
|
||||
<polygon fill="#c71585" stroke="#c71585" points="3316.32,-514.44 3306.96,-509.49 3311.19,-519.2 3316.32,-514.44"/>
|
||||
<text text-anchor="middle" x="3242.5" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#c71585">AdvMap</text>
|
||||
</g>
|
||||
<!-- GetLog -->
|
||||
<g id="node7" class="node">
|
||||
<title>GetLog</title>
|
||||
<ellipse fill="none" stroke="black" cx="2825" cy="-688" rx="44.39" ry="18"/>
|
||||
<text text-anchor="middle" x="2825" y="-684.3" font-family="Times,serif" font-size="14.00">GetLog</text>
|
||||
</g>
|
||||
<!-- GetInfo->GetLog -->
|
||||
<g id="edge36" class="edge">
|
||||
<title>GetInfo->GetLog</title>
|
||||
<path fill="none" stroke="#000000" d="M2823.83,-787.78C2808.54,-782.71 2792.09,-774.19 2783,-760 2772.83,-744.13 2784.22,-725.81 2797.69,-711.75"/>
|
||||
<polygon fill="#000000" stroke="#000000" points="2800.51,-713.89 2805.27,-704.42 2795.64,-708.86 2800.51,-713.89"/>
|
||||
<text text-anchor="middle" x="2810" y="-748.8" font-family="Times,serif" font-size="14.00" fill="#000000">GotInfo</text>
|
||||
</g>
|
||||
<!-- Down -->
|
||||
<g id="node10" class="node">
|
||||
<title>Down</title>
|
||||
<ellipse fill="none" stroke="black" cx="2928" cy="-688" rx="37.09" ry="18"/>
|
||||
<text text-anchor="middle" x="2928" y="-684.3" font-family="Times,serif" font-size="14.00">Down</text>
|
||||
</g>
|
||||
<!-- GetInfo->Down -->
|
||||
<g id="edge37" class="edge">
|
||||
<title>GetInfo->Down</title>
|
||||
<path fill="none" stroke="#1e90ff" d="M2855.15,-778.82C2853.59,-768.33 2853.47,-754.84 2860,-745 2864.84,-737.71 2870.73,-741.88 2878,-737 2888.51,-729.94 2898.82,-720.58 2907.33,-712"/>
|
||||
<polygon fill="#1e90ff" stroke="#1e90ff" points="2909.9,-714.38 2914.29,-704.74 2904.85,-709.53 2909.9,-714.38"/>
|
||||
<text text-anchor="middle" x="2887" y="-748.8" font-family="Times,serif" font-size="14.00" fill="#1e90ff">IsDown</text>
|
||||
</g>
|
||||
<!-- Activating -->
|
||||
<g id="node22" class="node">
|
||||
<title>Activating</title>
|
||||
<polygon fill="lightgrey" stroke="black" points="2153,-706 2071.39,-688 2153,-670 2234.61,-688 2153,-706"/>
|
||||
<polyline fill="none" stroke="black" points="2083.11,-690.58 2083.11,-685.42 "/>
|
||||
<polyline fill="none" stroke="black" points="2141.28,-672.58 2164.72,-672.58 "/>
|
||||
<polyline fill="none" stroke="black" points="2222.89,-685.42 2222.89,-690.58 "/>
|
||||
<polyline fill="none" stroke="black" points="2164.72,-703.42 2141.28,-703.42 "/>
|
||||
<text text-anchor="middle" x="2153" y="-684.3" font-family="Times,serif" font-size="14.00">Activating</text>
|
||||
</g>
|
||||
<!-- GetInfo->Activating -->
|
||||
<g id="edge7" class="edge">
|
||||
<title>GetInfo->Activating</title>
|
||||
<path fill="none" stroke="#c71585" d="M2719,-764.71C2664.83,-755.19 2609.28,-746.41 2555.15,-738.47"/>
|
||||
<polygon fill="#c71585" stroke="#c71585" points="2555.5,-734.98 2545.1,-737 2554.49,-741.91 2555.5,-734.98"/>
|
||||
<text text-anchor="middle" x="2716.5" y="-748.8" font-family="Times,serif" font-size="14.00" fill="#c71585">Activate</text>
|
||||
</g>
|
||||
<!-- GetLog->Reset -->
|
||||
<g id="edge50" class="edge">
|
||||
<title>GetLog->Reset</title>
|
||||
<path fill="none" stroke="#000000" d="M2857.85,-675.8C2865.68,-673.53 2874.07,-671.4 2882,-670 2947.99,-658.35 3124.89,-685.37 3183,-652 3189.95,-648.01 3186.27,-641.35 3193,-637 3211.16,-625.26 3224.37,-642.83 3241,-629 3257.89,-614.95 3274.75,-556.24 3283.62,-521.04"/>
|
||||
<polygon fill="#000000" stroke="#000000" points="3287.11,-521.49 3286.1,-510.95 3280.31,-519.82 3287.11,-521.49"/>
|
||||
<text text-anchor="middle" x="3303.5" y="-576.3" font-family="Times,serif" font-size="14.00" fill="#000000">AdvMap</text>
|
||||
</g>
|
||||
<!-- GetLog->WaitActingChange -->
|
||||
<g id="edge38" class="edge">
|
||||
<title>GetLog->WaitActingChange</title>
|
||||
<path fill="none" stroke="#ff0000" d="M2858.54,-676.07C2866.19,-673.83 2874.33,-671.66 2882,-670 2939.44,-657.57 2957.51,-671.34 3013,-652 3030.56,-645.88 3070.25,-621.54 3099.19,-603.05"/>
|
||||
<polygon fill="#ff0000" stroke="#ff0000" points="3101.27,-605.88 3107.79,-597.53 3097.49,-599.99 3101.27,-605.88"/>
|
||||
<text text-anchor="middle" x="3111" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#ff0000">NeedActingChange</text>
|
||||
</g>
|
||||
<!-- GetMissing -->
|
||||
<g id="node8" class="node">
|
||||
<title>GetMissing</title>
|
||||
<ellipse fill="none" stroke="black" cx="2792" cy="-580" rx="63.89" ry="18"/>
|
||||
<text text-anchor="middle" x="2792" y="-576.3" font-family="Times,serif" font-size="14.00">GetMissing</text>
|
||||
</g>
|
||||
<!-- GetLog->GetMissing -->
|
||||
<g id="edge68" class="edge">
|
||||
<title>GetLog->GetMissing</title>
|
||||
<path fill="none" stroke="#ffa500" d="M2786,-679.33C2770.85,-674.24 2755,-665.83 2746,-652 2735.56,-635.96 2747.66,-617.86 2762.07,-603.94"/>
|
||||
<polygon fill="#ffa500" stroke="#ffa500" points="2764.6,-606.37 2769.7,-597.08 2759.92,-601.17 2764.6,-606.37"/>
|
||||
<text text-anchor="middle" x="2772" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#ffa500">GotLog</text>
|
||||
</g>
|
||||
<!-- Incomplete -->
|
||||
<g id="node11" class="node">
|
||||
<title>Incomplete</title>
|
||||
<ellipse fill="none" stroke="black" cx="2936" cy="-580" rx="62.29" ry="18"/>
|
||||
<text text-anchor="middle" x="2936" y="-576.3" font-family="Times,serif" font-size="14.00">Incomplete</text>
|
||||
</g>
|
||||
<!-- GetLog->Incomplete -->
|
||||
<g id="edge39" class="edge">
|
||||
<title>GetLog->Incomplete</title>
|
||||
<path fill="none" stroke="#0000ff" d="M2818.05,-669.86C2814.97,-659.24 2813.52,-645.96 2821,-637 2833.74,-621.74 2846.6,-636.5 2865,-629 2880.45,-622.7 2895.98,-612.71 2908.57,-603.47"/>
|
||||
<polygon fill="#0000ff" stroke="#0000ff" points="2910.92,-606.08 2916.77,-597.25 2906.69,-600.51 2910.92,-606.08"/>
|
||||
<text text-anchor="middle" x="2867.5" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#0000ff">IsIncomplete</text>
|
||||
</g>
|
||||
<!-- WaitUpThru -->
|
||||
<g id="node9" class="node">
|
||||
<title>WaitUpThru</title>
|
||||
<ellipse fill="none" stroke="black" cx="2794" cy="-493" rx="66.89" ry="18"/>
|
||||
<text text-anchor="middle" x="2794" y="-489.3" font-family="Times,serif" font-size="14.00">WaitUpThru</text>
|
||||
</g>
|
||||
<!-- GetMissing->WaitUpThru -->
|
||||
<g id="edge40" class="edge">
|
||||
<title>GetMissing->WaitUpThru</title>
|
||||
<path fill="none" stroke="#ffa500" d="M2792.31,-561.87C2792.49,-552.22 2792.73,-539.94 2793,-529 2793.06,-526.45 2793.13,-523.79 2793.2,-521.13"/>
|
||||
<polygon fill="#ffa500" stroke="#ffa500" points="2796.7,-521.18 2793.48,-511.09 2789.7,-520.99 2796.7,-521.18"/>
|
||||
<text text-anchor="middle" x="2839.5" y="-532.8" font-family="Times,serif" font-size="14.00" fill="#ffa500">NeedUpThru</text>
|
||||
</g>
|
||||
<!-- Down->GetInfo -->
|
||||
<g id="edge42" class="edge">
|
||||
<title>Down->GetInfo</title>
|
||||
<path fill="none" stroke="#c71585" d="M2928.3,-706.22C2927.76,-721.58 2924.91,-744.02 2914,-760 2908.56,-767.96 2900.65,-774.5 2892.52,-779.68"/>
|
||||
<polygon fill="#c71585" stroke="#c71585" points="2890.62,-776.74 2883.71,-784.77 2894.12,-782.8 2890.62,-776.74"/>
|
||||
<text text-anchor="middle" x="2963.5" y="-748.8" font-family="Times,serif" font-size="14.00" fill="#c71585">MNotifyRec</text>
|
||||
</g>
|
||||
<!-- Incomplete->Reset -->
|
||||
<g id="edge52" class="edge">
|
||||
<title>Incomplete->Reset</title>
|
||||
<path fill="none" stroke="#ff0000" d="M2940.02,-561.79C2943.59,-550.57 2950,-536.63 2961,-529 2983.68,-513.26 3157.81,-501.43 3243.15,-496.5"/>
|
||||
<polygon fill="#ff0000" stroke="#ff0000" points="3243.56,-499.99 3253.35,-495.92 3243.17,-493 3243.56,-499.99"/>
|
||||
<text text-anchor="middle" x="2990.5" y="-532.8" font-family="Times,serif" font-size="14.00" fill="#ff0000">AdvMap</text>
|
||||
</g>
|
||||
<!-- Incomplete->GetLog -->
|
||||
<g id="edge43" class="edge">
|
||||
<title>Incomplete->GetLog</title>
|
||||
<path fill="none" stroke="#000000" d="M2934.82,-598.28C2932.87,-614.11 2927.7,-637.25 2914,-652 2907.41,-659.1 2888.34,-667 2869.54,-673.49"/>
|
||||
<polygon fill="#000000" stroke="#000000" points="2868.17,-670.26 2859.8,-676.75 2870.39,-676.89 2868.17,-670.26"/>
|
||||
<text text-anchor="middle" x="2966.5" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#000000">MNotifyRec</text>
|
||||
</g>
|
||||
<!-- Clean -->
|
||||
<g id="node12" class="node">
|
||||
<title>Clean</title>
|
||||
<ellipse fill="none" stroke="black" cx="2601" cy="-688" rx="37.09" ry="18"/>
|
||||
<text text-anchor="middle" x="2601" y="-684.3" font-family="Times,serif" font-size="14.00">Clean</text>
|
||||
</g>
|
||||
<!-- WaitLocalRecoveryReserved -->
|
||||
<g id="node21" class="node">
|
||||
<title>WaitLocalRecoveryReserved</title>
|
||||
<ellipse fill="none" stroke="black" cx="2378" cy="-580" rx="141.88" ry="18"/>
|
||||
<text text-anchor="middle" x="2378" y="-576.3" font-family="Times,serif" font-size="14.00">WaitLocalRecoveryReserved</text>
|
||||
</g>
|
||||
<!-- Clean->WaitLocalRecoveryReserved -->
|
||||
<g id="edge8" class="edge">
|
||||
<title>Clean->WaitLocalRecoveryReserved</title>
|
||||
<path fill="none" stroke="#000000" d="M2575.42,-674.84C2537.96,-657.03 2467.67,-623.62 2421.86,-601.85"/>
|
||||
<polygon fill="#000000" stroke="#000000" points="2423.29,-598.65 2412.76,-597.52 2420.29,-604.98 2423.29,-598.65"/>
|
||||
<text text-anchor="middle" x="2565" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#000000">DoRecovery</text>
|
||||
</g>
|
||||
<!-- Recovered -->
|
||||
<g id="node13" class="node">
|
||||
<title>Recovered</title>
|
||||
<ellipse fill="none" stroke="black" cx="2614" cy="-58" rx="59.59" ry="18"/>
|
||||
<text text-anchor="middle" x="2614" y="-54.3" font-family="Times,serif" font-size="14.00">Recovered</text>
|
||||
</g>
|
||||
<!-- Recovered->Clean -->
|
||||
<g id="edge12" class="edge">
|
||||
<title>Recovered->Clean</title>
|
||||
<path fill="none" stroke="#ffa500" d="M2626.76,-75.96C2646.43,-103.28 2683.33,-159.52 2697,-214 2715.94,-289.5 2682.6,-310.36 2677,-388 2675.85,-403.96 2672.99,-408.51 2677,-424 2679.3,-432.86 2684.7,-433.14 2687,-442 2688.67,-448.45 2688.19,-450.44 2687,-457 2673.02,-534.29 2634.62,-619.45 2614.19,-661.13"/>
|
||||
<polygon fill="#ffa500" stroke="#ffa500" points="2610.96,-659.77 2609.65,-670.29 2617.23,-662.88 2610.96,-659.77"/>
|
||||
<text text-anchor="middle" x="2711.5" y="-358.8" font-family="Times,serif" font-size="14.00" fill="#ffa500">GoClean</text>
|
||||
</g>
|
||||
<!-- Recovered->WaitLocalRecoveryReserved -->
|
||||
<g id="edge9" class="edge">
|
||||
<title>Recovered->WaitLocalRecoveryReserved</title>
|
||||
<path fill="none" stroke="#1e90ff" d="M2616.2,-76.33C2618.06,-98.63 2617.93,-137.52 2599,-163 2584.24,-182.86 2518.29,-197.33 2500,-214 2485.97,-226.79 2492.06,-238.44 2477,-250 2417.45,-295.69 2385.31,-272.2 2316,-301 2284.06,-314.27 2279.04,-323.97 2247,-337 2221.21,-347.49 2207.56,-336.23 2187,-355 2177.1,-364.04 2144.55,-453.83 2142,-467 2138.29,-486.2 2130.24,-495.37 2142,-511 2159.51,-534.27 2225.53,-552.07 2283.3,-563.61"/>
|
||||
<polygon fill="#1e90ff" stroke="#1e90ff" points="2282.75,-567.06 2293.23,-565.55 2284.09,-560.19 2282.75,-567.06"/>
|
||||
<text text-anchor="middle" x="2359" y="-315.3" font-family="Times,serif" font-size="14.00" fill="#1e90ff">DoRecovery</text>
|
||||
</g>
|
||||
<!-- Backfilling -->
|
||||
<g id="node14" class="node">
|
||||
<title>Backfilling</title>
|
||||
<ellipse fill="none" stroke="black" cx="2515" cy="-145" rx="59.59" ry="18"/>
|
||||
<text text-anchor="middle" x="2515" y="-141.3" font-family="Times,serif" font-size="14.00">Backfilling</text>
|
||||
</g>
|
||||
<!-- Backfilling->Recovered -->
|
||||
<g id="edge55" class="edge">
|
||||
<title>Backfilling->Recovered</title>
|
||||
<path fill="none" stroke="#40e0d0" d="M2516.82,-126.9C2518.76,-116.29 2522.76,-103.01 2531,-94 2538.84,-85.43 2549.11,-78.89 2559.74,-73.92"/>
|
||||
<polygon fill="#40e0d0" stroke="#40e0d0" points="2561.29,-77.06 2569.13,-69.94 2558.56,-70.62 2561.29,-77.06"/>
|
||||
<text text-anchor="middle" x="2566.5" y="-97.8" font-family="Times,serif" font-size="14.00" fill="#40e0d0">Backfilled</text>
|
||||
</g>
|
||||
<!-- WaitLocalBackfillReserved -->
|
||||
<g id="node16" class="node">
|
||||
<title>WaitLocalBackfillReserved</title>
|
||||
<ellipse fill="none" stroke="black" cx="2070" cy="-319" rx="133.78" ry="18"/>
|
||||
<text text-anchor="middle" x="2070" y="-315.3" font-family="Times,serif" font-size="14.00">WaitLocalBackfillReserved</text>
|
||||
</g>
|
||||
<!-- Backfilling->WaitLocalBackfillReserved -->
|
||||
<g id="edge59" class="edge">
|
||||
<title>Backfilling->WaitLocalBackfillReserved</title>
|
||||
<path fill="none" stroke="#ff0000" d="M2468.63,-156.41C2400.77,-171.97 2280.74,-200.87 2265,-214 2238.89,-235.77 2258.67,-261.92 2232,-283 2221.44,-291.34 2196.98,-298.26 2170.18,-303.68"/>
|
||||
<polygon fill="#ff0000" stroke="#ff0000" points="2169.38,-300.27 2160.23,-305.61 2170.71,-307.14 2169.38,-300.27"/>
|
||||
<text text-anchor="middle" x="2366" y="-228.3" font-family="Times,serif" font-size="14.00" fill="#ff0000">RemoteReservationRevoked</text>
|
||||
</g>
|
||||
<!-- NotBackfilling -->
|
||||
<g id="node17" class="node">
|
||||
<title>NotBackfilling</title>
|
||||
<ellipse fill="none" stroke="black" cx="1990" cy="-58" rx="77.19" ry="18"/>
|
||||
<text text-anchor="middle" x="1990" y="-54.3" font-family="Times,serif" font-size="14.00">NotBackfilling</text>
|
||||
</g>
|
||||
<!-- Backfilling->NotBackfilling -->
|
||||
<g id="edge56" class="edge">
|
||||
<title>Backfilling->NotBackfilling</title>
|
||||
<path fill="none" stroke="#c71585" d="M2478.49,-130.53C2461.79,-124.22 2441.8,-116.46 2424,-109 2409.14,-102.77 2406.58,-98.09 2391,-94 2333.41,-78.88 2174.83,-68.38 2075.21,-63.05"/>
|
||||
<polygon fill="#c71585" stroke="#c71585" points="2075.33,-59.55 2065.16,-62.52 2074.96,-66.54 2075.33,-59.55"/>
|
||||
<text text-anchor="middle" x="2471" y="-97.8" font-family="Times,serif" font-size="14.00" fill="#c71585">DeferBackfill</text>
|
||||
</g>
|
||||
<!-- Backfilling->NotBackfilling -->
|
||||
<g id="edge57" class="edge">
|
||||
<title>Backfilling->NotBackfilling</title>
|
||||
<path fill="none" stroke="#000000" d="M2471.42,-132.58C2461.49,-130.37 2450.93,-128.33 2441,-127 2416.59,-123.73 2016.41,-125.36 1998,-109 1991.65,-103.35 1988.95,-94.84 1988.06,-86.43"/>
|
||||
<polygon fill="#000000" stroke="#000000" points="1991.55,-86.31 1987.73,-76.43 1984.56,-86.53 1991.55,-86.31"/>
|
||||
<text text-anchor="middle" x="2055.5" y="-97.8" font-family="Times,serif" font-size="14.00" fill="#000000">UnfoundBackfill</text>
|
||||
</g>
|
||||
<!-- Backfilling->NotBackfilling -->
|
||||
<g id="edge58" class="edge">
|
||||
<title>Backfilling->NotBackfilling</title>
|
||||
<path fill="none" stroke="#1e90ff" d="M2471.01,-132.62C2461.19,-130.44 2450.79,-128.4 2441,-127 2372.91,-117.27 2196.3,-133.44 2132,-109 2121.94,-105.18 2122.58,-98.9 2113,-94 2095.49,-85.05 2075.38,-78.1 2056.56,-72.84"/>
|
||||
<polygon fill="#1e90ff" stroke="#1e90ff" points="2057.42,-69.45 2046.85,-70.24 2055.61,-76.21 2057.42,-69.45"/>
|
||||
<text text-anchor="middle" x="2259.5" y="-97.8" font-family="Times,serif" font-size="14.00" fill="#1e90ff">RemoteReservationRevokedTooFull</text>
|
||||
</g>
|
||||
<!-- WaitRemoteBackfillReserved -->
|
||||
<g id="node15" class="node">
|
||||
<title>WaitRemoteBackfillReserved</title>
|
||||
<ellipse fill="none" stroke="black" cx="2070" cy="-232" rx="144.87" ry="18"/>
|
||||
<text text-anchor="middle" x="2070" y="-228.3" font-family="Times,serif" font-size="14.00">WaitRemoteBackfillReserved</text>
|
||||
</g>
|
||||
<!-- WaitRemoteBackfillReserved->Backfilling -->
|
||||
<g id="edge13" class="edge">
|
||||
<title>WaitRemoteBackfillReserved->Backfilling</title>
|
||||
<path fill="none" stroke="#40e0d0" d="M2099.47,-214.2C2109.25,-208.52 2120.15,-202.08 2130,-196 2140.39,-189.59 2141.42,-184.84 2153,-181 2213.87,-160.82 2377.54,-172.21 2441,-163 2447.57,-162.05 2454.43,-160.81 2461.19,-159.44"/>
|
||||
<polygon fill="#40e0d0" stroke="#40e0d0" points="2461.95,-162.85 2471.01,-157.35 2460.5,-156.01 2461.95,-162.85"/>
|
||||
<text text-anchor="middle" x="2226.5" y="-184.8" font-family="Times,serif" font-size="14.00" fill="#40e0d0">AllBackfillsReserved</text>
|
||||
</g>
|
||||
<!-- WaitRemoteBackfillReserved->NotBackfilling -->
|
||||
<g id="edge24" class="edge">
|
||||
<title>WaitRemoteBackfillReserved->NotBackfilling</title>
|
||||
<path fill="none" stroke="#ff0000" d="M2091.67,-213.9C2113.97,-196.07 2145.89,-169.79 2149,-163 2155.67,-148.46 2159.68,-138.91 2149,-127 2124.53,-99.7 2009.47,-136.3 1985,-109 1979.45,-102.81 1978.73,-94.3 1980,-86.06"/>
|
||||
<polygon fill="#ff0000" stroke="#ff0000" points="1983.41,-86.85 1982.36,-76.31 1976.61,-85.21 1983.41,-86.85"/>
|
||||
<text text-anchor="middle" x="2283" y="-141.3" font-family="Times,serif" font-size="14.00" fill="#ff0000">RemoteReservationRejectedTooFull</text>
|
||||
</g>
|
||||
<!-- WaitRemoteBackfillReserved->NotBackfilling -->
|
||||
<g id="edge60" class="edge">
|
||||
<title>WaitRemoteBackfillReserved->NotBackfilling</title>
|
||||
<path fill="none" stroke="#0000ff" d="M2062.7,-214C2057.03,-202.87 2048.1,-188.97 2036,-181 2000.84,-157.84 1969.79,-195.47 1943,-163 1923.26,-139.07 1945.24,-105.52 1965.29,-83.06"/>
|
||||
<polygon fill="#0000ff" stroke="#0000ff" points="1967.98,-85.32 1972.23,-75.61 1962.86,-80.54 1967.98,-85.32"/>
|
||||
<text text-anchor="middle" x="2044" y="-141.3" font-family="Times,serif" font-size="14.00" fill="#0000ff">RemoteReservationRevoked</text>
|
||||
</g>
|
||||
<!-- WaitLocalBackfillReserved->WaitRemoteBackfillReserved -->
|
||||
<g id="edge14" class="edge">
|
||||
<title>WaitLocalBackfillReserved->WaitRemoteBackfillReserved</title>
|
||||
<path fill="none" stroke="#c71585" d="M2070,-300.8C2070,-289.16 2070,-273.55 2070,-260.24"/>
|
||||
<polygon fill="#c71585" stroke="#c71585" points="2073.5,-260.18 2070,-250.18 2066.5,-260.18 2073.5,-260.18"/>
|
||||
<text text-anchor="middle" x="2149" y="-271.8" font-family="Times,serif" font-size="14.00" fill="#c71585">LocalBackfillReserved</text>
|
||||
</g>
|
||||
<!-- NotBackfilling->WaitLocalBackfillReserved -->
|
||||
<g id="edge15" class="edge">
|
||||
<title>NotBackfilling->WaitLocalBackfillReserved</title>
|
||||
<path fill="none" stroke="#000000" d="M1962.04,-74.97C1954.7,-80.25 1947.36,-86.67 1942,-94 1900.52,-150.74 1877.63,-191.11 1916,-250 1932.13,-274.75 1959.53,-290.56 1986.79,-300.63"/>
|
||||
<polygon fill="#000000" stroke="#000000" points="1985.81,-303.99 1996.4,-303.95 1988.1,-297.38 1985.81,-303.99"/>
|
||||
<text text-anchor="middle" x="1953" y="-184.8" font-family="Times,serif" font-size="14.00" fill="#000000">RequestBackfill</text>
|
||||
</g>
|
||||
<!-- NotRecovering -->
|
||||
<g id="node18" class="node">
|
||||
<title>NotRecovering</title>
|
||||
<ellipse fill="none" stroke="black" cx="2553" cy="-319" rx="79.89" ry="18"/>
|
||||
<text text-anchor="middle" x="2553" y="-315.3" font-family="Times,serif" font-size="14.00">NotRecovering</text>
|
||||
</g>
|
||||
<!-- NotRecovering->WaitLocalRecoveryReserved -->
|
||||
<g id="edge10" class="edge">
|
||||
<title>NotRecovering->WaitLocalRecoveryReserved</title>
|
||||
<path fill="none" stroke="#ff0000" d="M2591.96,-334.83C2600.34,-339.92 2608.13,-346.53 2613,-355 2636.25,-395.47 2613.73,-418.06 2588,-457 2554.21,-508.13 2534.64,-514.34 2481,-544 2469.85,-550.17 2457.42,-555.51 2445.17,-560.04"/>
|
||||
<polygon fill="#ff0000" stroke="#ff0000" points="2443.66,-556.86 2435.4,-563.5 2446,-563.46 2443.66,-556.86"/>
|
||||
<text text-anchor="middle" x="2640" y="-445.8" font-family="Times,serif" font-size="14.00" fill="#ff0000">DoRecovery</text>
|
||||
</g>
|
||||
<!-- Recovering -->
|
||||
<g id="node19" class="node">
|
||||
<title>Recovering</title>
|
||||
<ellipse fill="none" stroke="black" cx="2324" cy="-406" rx="63.09" ry="18"/>
|
||||
<text text-anchor="middle" x="2324" y="-402.3" font-family="Times,serif" font-size="14.00">Recovering</text>
|
||||
</g>
|
||||
<!-- Recovering->Recovered -->
|
||||
<g id="edge33" class="edge">
|
||||
<title>Recovering->Recovered</title>
|
||||
<path fill="none" stroke="#ffa500" d="M2322.28,-387.65C2322.19,-376.95 2324.01,-363.66 2332,-355 2354.96,-330.13 2376.06,-352.78 2406,-337 2478.39,-298.84 2478.42,-264.26 2543,-214 2575.1,-189.02 2599.86,-198.89 2619,-163 2631.7,-139.18 2627.88,-107.76 2622.49,-85.71"/>
|
||||
<polygon fill="#ffa500" stroke="#ffa500" points="2625.86,-84.75 2619.88,-76.01 2619.1,-86.57 2625.86,-84.75"/>
|
||||
<text text-anchor="middle" x="2620" y="-228.3" font-family="Times,serif" font-size="14.00" fill="#ffa500">AllReplicasRecovered</text>
|
||||
</g>
|
||||
<!-- Recovering->WaitLocalBackfillReserved -->
|
||||
<g id="edge17" class="edge">
|
||||
<title>Recovering->WaitLocalBackfillReserved</title>
|
||||
<path fill="none" stroke="#ff0000" d="M2277.91,-393.69C2256.04,-387.7 2229.76,-379.59 2207,-370 2194.35,-364.67 2192.6,-360.44 2180,-355 2165.89,-348.91 2150.36,-343.32 2135.53,-338.49"/>
|
||||
<polygon fill="#ff0000" stroke="#ff0000" points="2136.49,-335.13 2125.9,-335.43 2134.37,-341.8 2136.49,-335.13"/>
|
||||
<text text-anchor="middle" x="2263" y="-358.8" font-family="Times,serif" font-size="14.00" fill="#ff0000">RequestBackfill</text>
|
||||
</g>
|
||||
<!-- Recovering->NotRecovering -->
|
||||
<g id="edge66" class="edge">
|
||||
<title>Recovering->NotRecovering</title>
|
||||
<path fill="none" stroke="#ff0000" d="M2328.18,-388.01C2331.82,-376.89 2338.25,-362.98 2349,-355 2351.73,-352.98 2422.37,-341.14 2479.77,-331.78"/>
|
||||
<polygon fill="#ff0000" stroke="#ff0000" points="2480.64,-335.19 2489.95,-330.13 2479.51,-328.28 2480.64,-335.19"/>
|
||||
<text text-anchor="middle" x="2402" y="-358.8" font-family="Times,serif" font-size="14.00" fill="#ff0000">DeferRecovery</text>
|
||||
</g>
|
||||
<!-- Recovering->NotRecovering -->
|
||||
<g id="edge67" class="edge">
|
||||
<title>Recovering->NotRecovering</title>
|
||||
<path fill="none" stroke="#0000ff" d="M2375.04,-395.32C2399.82,-389.63 2429.63,-381.31 2455,-370 2466.15,-365.03 2467.33,-360.93 2478,-355 2487.71,-349.6 2498.4,-344.27 2508.59,-339.46"/>
|
||||
<polygon fill="#0000ff" stroke="#0000ff" points="2510.17,-342.59 2517.76,-335.2 2507.22,-336.24 2510.17,-342.59"/>
|
||||
<text text-anchor="middle" x="2542" y="-358.8" font-family="Times,serif" font-size="14.00" fill="#0000ff">UnfoundRecovery</text>
|
||||
</g>
|
||||
<!-- WaitRemoteRecoveryReserved -->
|
||||
<g id="node20" class="node">
|
||||
<title>WaitRemoteRecoveryReserved</title>
|
||||
<ellipse fill="none" stroke="black" cx="2304" cy="-493" rx="153.27" ry="18"/>
|
||||
<text text-anchor="middle" x="2304" y="-489.3" font-family="Times,serif" font-size="14.00">WaitRemoteRecoveryReserved</text>
|
||||
</g>
|
||||
<!-- WaitRemoteRecoveryReserved->Recovering -->
|
||||
<g id="edge30" class="edge">
|
||||
<title>WaitRemoteRecoveryReserved->Recovering</title>
|
||||
<path fill="none" stroke="#1e90ff" d="M2297.78,-474.96C2295.13,-465.12 2293.38,-452.62 2297,-442 2298.21,-438.45 2299.94,-434.98 2301.95,-431.69"/>
|
||||
<polygon fill="#1e90ff" stroke="#1e90ff" points="2304.85,-433.65 2307.76,-423.46 2299.13,-429.61 2304.85,-433.65"/>
|
||||
<text text-anchor="middle" x="2371" y="-445.8" font-family="Times,serif" font-size="14.00" fill="#1e90ff">AllRemotesReserved</text>
|
||||
</g>
|
||||
<!-- WaitLocalRecoveryReserved->NotRecovering -->
|
||||
<g id="edge65" class="edge">
|
||||
<title>WaitLocalRecoveryReserved->NotRecovering</title>
|
||||
<path fill="none" stroke="#1e90ff" d="M2447.25,-564.08C2456.6,-559.17 2465.01,-552.65 2471,-544 2475.48,-537.52 2477.38,-548.41 2466,-467 2464.46,-455.96 2458.17,-451.5 2464,-442 2501.03,-381.7 2568.97,-430.3 2606,-370 2612.54,-359.35 2606.14,-349.47 2595.81,-341.36"/>
|
||||
<polygon fill="#1e90ff" stroke="#1e90ff" points="2597.51,-338.28 2587.29,-335.51 2593.55,-344.06 2597.51,-338.28"/>
|
||||
<text text-anchor="middle" x="2524" y="-445.8" font-family="Times,serif" font-size="14.00" fill="#1e90ff">RecoveryTooFull</text>
|
||||
</g>
|
||||
<!-- WaitLocalRecoveryReserved->WaitRemoteRecoveryReserved -->
|
||||
<g id="edge31" class="edge">
|
||||
<title>WaitLocalRecoveryReserved->WaitRemoteRecoveryReserved</title>
|
||||
<path fill="none" stroke="#ff0000" d="M2321.68,-563.37C2313.09,-558.53 2305.32,-552.22 2300,-544 2295.66,-537.29 2294.94,-528.97 2295.81,-521.05"/>
|
||||
<polygon fill="#ff0000" stroke="#ff0000" points="2299.29,-521.51 2297.7,-511.04 2292.41,-520.22 2299.29,-521.51"/>
|
||||
<text text-anchor="middle" x="2385.5" y="-532.8" font-family="Times,serif" font-size="14.00" fill="#ff0000">LocalRecoveryReserved</text>
|
||||
</g>
|
||||
<!-- Activating->Recovered -->
|
||||
<g id="edge32" class="edge">
|
||||
<title>Activating->Recovered</title>
|
||||
<path fill="none" stroke="#0000ff" d="M2144.93,-671.5C2112.8,-609.36 1996.9,-380.36 2020,-355 2049.01,-323.16 2174.28,-355.87 2213,-337 2232.78,-327.36 2227.93,-311.99 2247,-301 2302.39,-269.08 2324.7,-282.35 2387,-268 2422.51,-259.82 2440.6,-275.12 2467,-250 2479.03,-238.55 2467,-227.25 2477,-214 2493.95,-191.55 2505.53,-192.93 2531,-181 2553.53,-170.45 2566.46,-180.64 2584,-163 2601.84,-145.06 2595.04,-133.32 2602,-109 2604.1,-101.65 2606.14,-93.62 2607.92,-86.19"/>
|
||||
<polygon fill="#0000ff" stroke="#0000ff" points="2611.39,-86.75 2610.25,-76.22 2604.57,-85.16 2611.39,-86.75"/>
|
||||
<text text-anchor="middle" x="2097" y="-358.8" font-family="Times,serif" font-size="14.00" fill="#0000ff">AllReplicasRecovered</text>
|
||||
</g>
|
||||
<!-- Activating->WaitLocalBackfillReserved -->
|
||||
<g id="edge16" class="edge">
|
||||
<title>Activating->WaitLocalBackfillReserved</title>
|
||||
<path fill="none" stroke="#1e90ff" d="M2116.05,-678.09C2051.03,-661.33 1923,-622.99 1923,-581 1923,-581 1923,-581 1923,-405 1923,-370.66 1953.44,-349.97 1986.62,-337.62"/>
|
||||
<polygon fill="#1e90ff" stroke="#1e90ff" points="1988.19,-340.78 1996.5,-334.2 1985.91,-334.16 1988.19,-340.78"/>
|
||||
<text text-anchor="middle" x="1979" y="-489.3" font-family="Times,serif" font-size="14.00" fill="#1e90ff">RequestBackfill</text>
|
||||
</g>
|
||||
<!-- Activating->WaitLocalRecoveryReserved -->
|
||||
<g id="edge11" class="edge">
|
||||
<title>Activating->WaitLocalRecoveryReserved</title>
|
||||
<path fill="none" stroke="#0000ff" d="M2177.94,-675.25C2215.51,-657.55 2287.18,-623.79 2333.76,-601.84"/>
|
||||
<polygon fill="#0000ff" stroke="#0000ff" points="2335.45,-604.91 2343.01,-597.49 2332.47,-598.58 2335.45,-604.91"/>
|
||||
<text text-anchor="middle" x="2301" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#0000ff">DoRecovery</text>
|
||||
</g>
|
||||
<!-- RepRecovering -->
|
||||
<g id="node23" class="node">
|
||||
<title>RepRecovering</title>
|
||||
<ellipse fill="none" stroke="black" cx="414" cy="-796" rx="81.49" ry="18"/>
|
||||
<text text-anchor="middle" x="414" y="-792.3" font-family="Times,serif" font-size="14.00">RepRecovering</text>
|
||||
</g>
|
||||
<!-- RepNotRecovering -->
|
||||
<g id="node26" class="node">
|
||||
<title>RepNotRecovering</title>
|
||||
<polygon fill="lightgrey" stroke="black" points="722,-706 583.41,-688 722,-670 860.59,-688 722,-706"/>
|
||||
<polyline fill="none" stroke="black" points="595.31,-689.55 595.31,-686.45 "/>
|
||||
<polyline fill="none" stroke="black" points="710.1,-671.55 733.9,-671.55 "/>
|
||||
<polyline fill="none" stroke="black" points="848.69,-686.45 848.69,-689.55 "/>
|
||||
<polyline fill="none" stroke="black" points="733.9,-704.45 710.1,-704.45 "/>
|
||||
<text text-anchor="middle" x="722" y="-684.3" font-family="Times,serif" font-size="14.00">RepNotRecovering</text>
|
||||
</g>
|
||||
<!-- RepRecovering->RepNotRecovering -->
|
||||
<g id="edge20" class="edge">
|
||||
<title>RepRecovering->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#40e0d0" d="M471.08,-783.17C480.39,-781.36 489.96,-779.57 499,-778 553,-768.62 570.09,-780.31 621,-760 625.06,-758.38 664.95,-730 693.37,-709.61"/>
|
||||
<polygon fill="#40e0d0" stroke="#40e0d0" points="695.68,-712.26 701.76,-703.58 691.6,-706.57 695.68,-712.26"/>
|
||||
<text text-anchor="middle" x="694.5" y="-748.8" font-family="Times,serif" font-size="14.00" fill="#40e0d0">RecoveryDone</text>
|
||||
</g>
|
||||
<!-- RepRecovering->RepNotRecovering -->
|
||||
<g id="edge22" class="edge">
|
||||
<title>RepRecovering->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#000000" d="M469.61,-782.84C479.37,-781.01 489.46,-779.3 499,-778 526.38,-774.28 728.18,-780.23 747,-760 759.05,-747.04 751.36,-727.92 741.44,-712.79"/>
|
||||
<polygon fill="#000000" stroke="#000000" points="744.12,-710.53 735.47,-704.42 738.42,-714.59 744.12,-710.53"/>
|
||||
<text text-anchor="middle" x="881" y="-748.8" font-family="Times,serif" font-size="14.00" fill="#000000">RemoteReservationRejectedTooFull</text>
|
||||
</g>
|
||||
<!-- RepRecovering->RepNotRecovering -->
|
||||
<g id="edge26" class="edge">
|
||||
<title>RepRecovering->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#ffa500" d="M407.27,-777.99C404.21,-767.15 402.85,-753.57 411,-745 423.11,-732.26 551.7,-740.1 569,-737 607.45,-730.11 649.71,-716.35 680.06,-705.33"/>
|
||||
<polygon fill="#ffa500" stroke="#ffa500" points="681.3,-708.6 689.48,-701.85 678.88,-702.03 681.3,-708.6"/>
|
||||
<text text-anchor="middle" x="514" y="-748.8" font-family="Times,serif" font-size="14.00" fill="#ffa500">RemoteReservationCanceled</text>
|
||||
</g>
|
||||
<!-- RepWaitBackfillReserved -->
|
||||
<g id="node24" class="node">
|
||||
<title>RepWaitBackfillReserved</title>
|
||||
<ellipse fill="none" stroke="black" cx="757" cy="-580" rx="127.28" ry="18"/>
|
||||
<text text-anchor="middle" x="757" y="-576.3" font-family="Times,serif" font-size="14.00">RepWaitBackfillReserved</text>
|
||||
</g>
|
||||
<!-- RepWaitBackfillReserved->RepRecovering -->
|
||||
<g id="edge64" class="edge">
|
||||
<title>RepWaitBackfillReserved->RepRecovering</title>
|
||||
<path fill="none" stroke="#000000" d="M669.39,-593.13C563.69,-607.97 399.48,-631.77 395,-637 363.58,-673.66 383.91,-733.89 399.98,-768.43"/>
|
||||
<polygon fill="#000000" stroke="#000000" points="397.04,-770.4 404.56,-777.86 403.34,-767.34 397.04,-770.4"/>
|
||||
<text text-anchor="middle" x="474.5" y="-684.3" font-family="Times,serif" font-size="14.00" fill="#000000">RemoteBackfillReserved</text>
|
||||
</g>
|
||||
<!-- RepWaitBackfillReserved->RepNotRecovering -->
|
||||
<g id="edge25" class="edge">
|
||||
<title>RepWaitBackfillReserved->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#0000ff" d="M851.8,-592.13C935.05,-603.88 1039.4,-624.51 1015,-652 1014.89,-652.13 887.51,-667.31 800.76,-677.63"/>
|
||||
<polygon fill="#0000ff" stroke="#0000ff" points="800.18,-674.18 790.66,-678.83 801,-681.13 800.18,-674.18"/>
|
||||
<text text-anchor="middle" x="1146" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#0000ff">RemoteReservationRejectedTooFull</text>
|
||||
</g>
|
||||
<!-- RepWaitBackfillReserved->RepNotRecovering -->
|
||||
<g id="edge29" class="edge">
|
||||
<title>RepWaitBackfillReserved->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#000000" d="M698.09,-596.08C677.31,-604.53 656.12,-617.47 644,-637 634.42,-652.45 648.49,-664 667.21,-672.08"/>
|
||||
<polygon fill="#000000" stroke="#000000" points="666.1,-675.41 676.69,-675.78 668.64,-668.89 666.1,-675.41"/>
|
||||
<text text-anchor="middle" x="747" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#000000">RemoteReservationCanceled</text>
|
||||
</g>
|
||||
<!-- RepWaitRecoveryReserved -->
|
||||
<g id="node25" class="node">
|
||||
<title>RepWaitRecoveryReserved</title>
|
||||
<ellipse fill="none" stroke="black" cx="284" cy="-580" rx="135.68" ry="18"/>
|
||||
<text text-anchor="middle" x="284" y="-576.3" font-family="Times,serif" font-size="14.00">RepWaitRecoveryReserved</text>
|
||||
</g>
|
||||
<!-- RepWaitRecoveryReserved->RepRecovering -->
|
||||
<g id="edge61" class="edge">
|
||||
<title>RepWaitRecoveryReserved->RepRecovering</title>
|
||||
<path fill="none" stroke="#ffa500" d="M252.44,-597.79C227.61,-612.96 194.81,-637.84 180,-670 167.55,-697.05 161.65,-713.55 180,-737 185.3,-743.78 280.47,-765.81 347.79,-780.68"/>
|
||||
<polygon fill="#ffa500" stroke="#ffa500" points="347.4,-784.18 357.92,-782.91 348.9,-777.34 347.4,-784.18"/>
|
||||
<text text-anchor="middle" x="274" y="-684.3" font-family="Times,serif" font-size="14.00" fill="#ffa500">RemoteRecoveryReserved</text>
|
||||
</g>
|
||||
<!-- RepWaitRecoveryReserved->RepNotRecovering -->
|
||||
<g id="edge28" class="edge">
|
||||
<title>RepWaitRecoveryReserved->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#c71585" d="M311.41,-597.79C339.94,-614.42 386.46,-639.2 430,-652 440.1,-654.97 558.3,-668.63 641.18,-677.98"/>
|
||||
<polygon fill="#c71585" stroke="#c71585" points="641.03,-681.49 651.36,-679.13 641.81,-674.53 641.03,-681.49"/>
|
||||
<text text-anchor="middle" x="533" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#c71585">RemoteReservationCanceled</text>
|
||||
</g>
|
||||
<!-- RepNotRecovering->RepWaitBackfillReserved -->
|
||||
<g id="edge62" class="edge">
|
||||
<title>RepNotRecovering->RepWaitBackfillReserved</title>
|
||||
<path fill="none" stroke="#40e0d0" d="M782.09,-677.74C811.03,-671.92 841.33,-663.35 850,-652 865.89,-631.18 842.19,-613.33 814.74,-600.73"/>
|
||||
<polygon fill="#40e0d0" stroke="#40e0d0" points="815.91,-597.42 805.34,-596.66 813.13,-603.84 815.91,-597.42"/>
|
||||
<text text-anchor="middle" x="926" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#40e0d0">RequestBackfillPrio</text>
|
||||
</g>
|
||||
<!-- RepNotRecovering->RepWaitRecoveryReserved -->
|
||||
<g id="edge63" class="edge">
|
||||
<title>RepNotRecovering->RepWaitRecoveryReserved</title>
|
||||
<path fill="none" stroke="#c71585" d="M654.67,-678.68C628.02,-675.61 597.1,-672.31 569,-670 550.42,-668.47 246.82,-665.53 234,-652 220.15,-637.39 234.36,-618.7 251.12,-604.16"/>
|
||||
<polygon fill="#c71585" stroke="#c71585" points="253.37,-606.84 258.91,-597.81 248.95,-601.42 253.37,-606.84"/>
|
||||
<text text-anchor="middle" x="311" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#c71585">RequestRecoveryPrio</text>
|
||||
</g>
|
||||
<!-- RepNotRecovering->RepNotRecovering -->
|
||||
<g id="edge21" class="edge">
|
||||
<title>RepNotRecovering->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#c71585" d="M841.63,-690.48C863.47,-690.17 878.79,-689.34 878.79,-688 878.79,-686.88 868.09,-686.12 851.82,-685.72"/>
|
||||
<polygon fill="#c71585" stroke="#c71585" points="851.7,-682.21 841.63,-685.52 851.56,-689.21 851.7,-682.21"/>
|
||||
<text text-anchor="middle" x="930.29" y="-684.3" font-family="Times,serif" font-size="14.00" fill="#c71585">RecoveryDone</text>
|
||||
</g>
|
||||
<!-- RepNotRecovering->RepNotRecovering -->
|
||||
<g id="edge23" class="edge">
|
||||
<title>RepNotRecovering->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#1e90ff" d="M823.64,-692.82C898.58,-694.11 981.79,-692.5 981.79,-688 981.79,-683.7 905.89,-682.04 833.78,-683.02"/>
|
||||
<polygon fill="#1e90ff" stroke="#1e90ff" points="833.58,-679.52 823.64,-683.18 833.69,-686.52 833.58,-679.52"/>
|
||||
<text text-anchor="middle" x="1109.79" y="-684.3" font-family="Times,serif" font-size="14.00" fill="#1e90ff">RemoteReservationRejectedTooFull</text>
|
||||
</g>
|
||||
<!-- RepNotRecovering->RepNotRecovering -->
|
||||
<g id="edge27" class="edge">
|
||||
<title>RepNotRecovering->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#40e0d0" d="M817.87,-693.59C970.41,-697.94 1237.79,-696.08 1237.79,-688 1237.79,-680.1 982.28,-678.14 828.32,-682.13"/>
|
||||
<polygon fill="#40e0d0" stroke="#40e0d0" points="827.78,-678.64 817.87,-682.41 827.97,-685.64 827.78,-678.64"/>
|
||||
<text text-anchor="middle" x="1340.79" y="-684.3" font-family="Times,serif" font-size="14.00" fill="#40e0d0">RemoteReservationCanceled</text>
|
||||
</g>
|
||||
<!-- WaitDeleteReserved -->
|
||||
<g id="node28" class="node">
|
||||
<title>WaitDeleteReserved</title>
|
||||
<polygon fill="lightgrey" stroke="black" points="1652,-598 1504.03,-580 1652,-562 1799.97,-580 1652,-598"/>
|
||||
<polyline fill="none" stroke="black" points="1515.94,-581.45 1515.94,-578.55 "/>
|
||||
<polyline fill="none" stroke="black" points="1640.09,-563.45 1663.91,-563.45 "/>
|
||||
<polyline fill="none" stroke="black" points="1788.06,-578.55 1788.06,-581.45 "/>
|
||||
<polyline fill="none" stroke="black" points="1663.91,-596.55 1640.09,-596.55 "/>
|
||||
<text text-anchor="middle" x="1652" y="-576.3" font-family="Times,serif" font-size="14.00">WaitDeleteReserved</text>
|
||||
</g>
|
||||
<!-- RepNotRecovering->WaitDeleteReserved -->
|
||||
<g id="edge18" class="edge">
|
||||
<title>RepNotRecovering->WaitDeleteReserved</title>
|
||||
<path fill="none" stroke="#0000ff" d="M1488,-629.63C1490,-629.3 1492,-628.97 1494,-628.63"/>
|
||||
<polygon fill="#0000ff" stroke="#0000ff" points="1486.71,-633.39 1496,-628.3 1485.57,-626.48 1486.71,-633.39"/>
|
||||
<text text-anchor="middle" x="1347" y="-640.8" font-family="Times,serif" font-size="14.00" fill="#0000ff">DeleteStart</text>
|
||||
</g>
|
||||
<!-- Stray->RepNotRecovering -->
|
||||
<g id="edge45" class="edge">
|
||||
<title>Stray->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#ff0000" d="M1507.04,-782.9C1504.29,-781.81 1501.45,-780.77 1497.9,-779.71"/>
|
||||
<polygon fill="#ff0000" stroke="#ff0000" points="1498.55,-776.27 1488,-777.15 1496.8,-783.04 1498.55,-776.27"/>
|
||||
<text text-anchor="middle" x="1312.5" y="-748.8" font-family="Times,serif" font-size="14.00" fill="#ff0000">MInfoRec</text>
|
||||
</g>
|
||||
<!-- Stray->RepNotRecovering -->
|
||||
<g id="edge47" class="edge">
|
||||
<title>Stray->RepNotRecovering</title>
|
||||
<path fill="none" stroke="#ffa500" d="M1506,-783.19C1503.47,-782.16 1500.9,-781.16 1497.68,-779.99"/>
|
||||
<polygon fill="#ffa500" stroke="#ffa500" points="1498.59,-776.61 1488,-776.69 1496.33,-783.23 1498.59,-776.61"/>
|
||||
<text text-anchor="middle" x="1464.5" y="-748.8" font-family="Times,serif" font-size="14.00" fill="#ffa500">MLogRec</text>
|
||||
</g>
|
||||
<!-- Stray->WaitDeleteReserved -->
|
||||
<g id="edge19" class="edge">
|
||||
<title>Stray->WaitDeleteReserved</title>
|
||||
<path fill="none" stroke="#ffa500" d="M1541.22,-778.56C1557.87,-748.87 1593.68,-685.01 1620.1,-637.89"/>
|
||||
<polygon fill="#ffa500" stroke="#ffa500" points="1623.25,-639.43 1625.08,-629 1617.14,-636.01 1623.25,-639.43"/>
|
||||
<text text-anchor="middle" x="1643" y="-684.3" font-family="Times,serif" font-size="14.00" fill="#ffa500">DeleteStart</text>
|
||||
</g>
|
||||
<!-- WaitDeleteReserved->WaitDeleteReserved -->
|
||||
<g id="edge54" class="edge">
|
||||
<title>WaitDeleteReserved->WaitDeleteReserved</title>
|
||||
<path fill="none" stroke="#ffa500" d="M1737.4,-587.64C1779.99,-588.7 1817.98,-586.15 1817.98,-580 1817.98,-574.35 1785.97,-571.74 1747.77,-572.17"/>
|
||||
<polygon fill="#ffa500" stroke="#ffa500" points="1747.34,-568.68 1737.4,-572.36 1747.46,-575.68 1747.34,-568.68"/>
|
||||
<text text-anchor="middle" x="1845.48" y="-576.3" font-family="Times,serif" font-size="14.00" fill="#ffa500">ActMap</text>
|
||||
</g>
|
||||
<!-- Deleting -->
|
||||
<g id="node29" class="node">
|
||||
<title>Deleting</title>
|
||||
<ellipse fill="none" stroke="black" cx="1614" cy="-493" rx="50.09" ry="18"/>
|
||||
<text text-anchor="middle" x="1614" y="-489.3" font-family="Times,serif" font-size="14.00">Deleting</text>
|
||||
</g>
|
||||
<!-- WaitDeleteReserved->Deleting -->
|
||||
<g id="edge34" class="edge">
|
||||
<title>WaitDeleteReserved->Deleting</title>
|
||||
<path fill="none" stroke="#40e0d0" d="M1579.03,-570.87C1563.44,-565.6 1548.75,-557.23 1539,-544 1527.51,-528.42 1544.64,-516.22 1565.41,-507.78"/>
|
||||
<polygon fill="#40e0d0" stroke="#40e0d0" points="1566.64,-511.05 1574.78,-504.27 1564.19,-504.49 1566.64,-511.05"/>
|
||||
<text text-anchor="middle" x="1596" y="-532.8" font-family="Times,serif" font-size="14.00" fill="#40e0d0">DeleteReserved</text>
|
||||
</g>
|
||||
<!-- Deleting->WaitDeleteReserved -->
|
||||
<g id="edge35" class="edge">
|
||||
<title>Deleting->WaitDeleteReserved</title>
|
||||
<path fill="none" stroke="#c71585" d="M1637.31,-509.28C1643.59,-514.69 1649.59,-521.36 1653,-529 1656.16,-536.09 1656.99,-544.32 1656.74,-552.03"/>
|
||||
<polygon fill="#c71585" stroke="#c71585" points="1653.24,-551.8 1655.83,-562.08 1660.22,-552.43 1653.24,-551.8"/>
|
||||
<text text-anchor="middle" x="1721" y="-532.8" font-family="Times,serif" font-size="14.00" fill="#c71585">DeleteInterrupted</text>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 49 KiB |
@ -106,3 +106,6 @@ First stable release
|
||||
|
||||
- [ ] src/ceph_release: change type `stable`
|
||||
- [ ] generate new object corpus for encoding/decoding tests - see :doc:`corpus`
|
||||
- [ ] src/cephadm/cephadmlib/constants.py: update `LATEST_STABLE_RELEASE`
|
||||
- [ ] activate latest release in readthedocs, as described in `the readthedocs
|
||||
documentation <https://docs.readthedocs.io/en/stable/versions.html>`_
|
||||
|
@ -97,15 +97,15 @@ We'll use a stable/regular 15.2.17 release of Octopus as an example throughout t
|
||||
|
||||
4. Use https://docs.ceph.com/en/latest/start/os-recommendations/?highlight=debian#platforms to determine the ``DISTROS`` parameter. For example,
|
||||
|
||||
+-------------------+-------------------------------------------+
|
||||
| Release | Distro Codemap |
|
||||
+===================+===========================================+
|
||||
| octopus (15.X.X) | ``focal bionic centos7 centos8 buster`` |
|
||||
+-------------------+-------------------------------------------+
|
||||
| pacific (16.X.X) | ``focal bionic centos8 buster bullseye`` |
|
||||
+-------------------+-------------------------------------------+
|
||||
| quincy (17.X.X) | ``focal centos8 centos9 bullseye`` |
|
||||
+-------------------+-------------------------------------------+
|
||||
+-------------------+--------------------------------------------------+
|
||||
| Release | Distro Codemap |
|
||||
+===================+==================================================+
|
||||
| octopus (15.X.X) | ``focal bionic centos7 centos8 buster`` |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| pacific (16.X.X) | ``focal bionic buster bullseye`` |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| quincy (17.X.X) | ``jammy focal centos9 bullseye`` |
|
||||
+-------------------+--------------------------------------------------+
|
||||
|
||||
5. Click ``Build``.
|
||||
|
||||
@ -179,11 +179,11 @@ See `the Ceph Tracker wiki page that explains how to write the release notes <ht
|
||||
|
||||
.. prompt:: bash
|
||||
|
||||
sign-rpms octopus
|
||||
sign-rpms ceph octopus
|
||||
|
||||
Example::
|
||||
|
||||
$ sign-rpms octopus
|
||||
|
||||
$ sign-rpms ceph octopus
|
||||
Checking packages in: /opt/repos/ceph/octopus-15.2.17/centos/7
|
||||
signing: /opt/repos/ceph/octopus-15.2.17/centos/7/SRPMS/ceph-release-1-1.el7.src.rpm
|
||||
/opt/repos/ceph/octopus-15.2.17/centos/7/SRPMS/ceph-release-1-1.el7.src.rpm:
|
||||
@ -197,7 +197,13 @@ See `the Ceph Tracker wiki page that explains how to write the release notes <ht
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
sync-push octopus
|
||||
sync-push ceph octopus
|
||||
|
||||
This leaves the packages in a password-protected prerelease area
|
||||
at https://download.ceph.com/prerelease/ceph. Verify them from there.
|
||||
When done and ready for release, mv the directories to the release
|
||||
directory (that is, "mv <whatever you're promoting> ../..".
|
||||
|
||||
|
||||
5. Build Containers
|
||||
===================
|
||||
|
@ -15,10 +15,12 @@
|
||||
introduced in the Ceph Kraken release. The Luminous release of
|
||||
Ceph promoted BlueStore to the default OSD back end,
|
||||
supplanting FileStore. As of the Reef release, FileStore is no
|
||||
longer available as a storage backend.
|
||||
longer available as a storage back end.
|
||||
|
||||
BlueStore stores objects directly on Ceph block devices without
|
||||
a mounted file system.
|
||||
BlueStore stores objects directly on raw block devices or
|
||||
partitions, and does not interact with mounted file systems.
|
||||
BlueStore uses RocksDB's key/value database to map object names
|
||||
to block locations on disk.
|
||||
|
||||
Bucket
|
||||
In the context of :term:`RGW`, a bucket is a group of objects.
|
||||
@ -40,6 +42,11 @@
|
||||
Ceph is a distributed network storage and file system with
|
||||
distributed metadata management and POSIX semantics.
|
||||
|
||||
`ceph-ansible <https://docs.ceph.com/projects/ceph-ansible/en/latest/index.html>`_
|
||||
A GitHub repository, supported from the Jewel release to the
|
||||
Quincy release, that facilitates the installation of a Ceph
|
||||
cluster.
|
||||
|
||||
Ceph Block Device
|
||||
A software instrument that orchestrates the storage of
|
||||
block-based data in Ceph. Ceph Block Device (also called "RBD",
|
||||
@ -88,6 +95,11 @@
|
||||
object store, RADOS. See :ref:`CephFS Architecture
|
||||
<arch-cephfs>` for more details.
|
||||
|
||||
:ref:`ceph-fuse <man-ceph-fuse>`
|
||||
:ref:`ceph-fuse <man-ceph-fuse>` is a FUSE ("**F**\ilesystem in
|
||||
**USE**\rspace") client for CephFS. ceph-fuse mounts a Ceph FS
|
||||
ata specified mount point.
|
||||
|
||||
Ceph Interim Release
|
||||
See :term:`Releases`.
|
||||
|
||||
@ -211,9 +223,24 @@
|
||||
Ceph cluster. See :ref:`the "Cluster Map" section of the
|
||||
Architecture document<architecture_cluster_map>` for details.
|
||||
|
||||
Crimson
|
||||
A next-generation OSD architecture whose core aim is the
|
||||
reduction of latency costs incurred due to cross-core
|
||||
communications. A re-design of the OSD that reduces lock
|
||||
contention by reducing communication between shards in the data
|
||||
path. Crimson improves upon the performance of classic Ceph
|
||||
OSDs by eliminating reliance on thread pools. See `Crimson:
|
||||
Next-generation Ceph OSD for Multi-core Scalability
|
||||
<https://ceph.io/en/news/blog/2023/crimson-multi-core-scalability/>`_.
|
||||
See the :ref:`Crimson developer
|
||||
documentation<crimson_dev_doc>`.
|
||||
|
||||
CRUSH
|
||||
Controlled Replication Under Scalable Hashing. It is the
|
||||
algorithm Ceph uses to compute object storage locations.
|
||||
**C**\ontrolled **R**\eplication **U**\nder **S**\calable
|
||||
**H**\ashing. The algorithm that Ceph uses to compute object
|
||||
storage locations. See `CRUSH: Controlled, Scalable,
|
||||
Decentralized Placement of Replicated Data
|
||||
<https://ceph.com/assets/pdfs/weil-crush-sc06.pdf>`_.
|
||||
|
||||
CRUSH rule
|
||||
The CRUSH data placement rule that applies to a particular
|
||||
@ -256,30 +283,76 @@
|
||||
Hybrid OSD
|
||||
Refers to an OSD that has both HDD and SSD drives.
|
||||
|
||||
librados
|
||||
An API that can be used to create a custom interface to a Ceph
|
||||
storage cluster. ``librados`` makes it possible to interact
|
||||
with Ceph Monitors and with OSDs. See :ref:`Introduction to
|
||||
librados <librados-intro>`. See :ref:`librados (Python)
|
||||
<librados-python>`.
|
||||
|
||||
LVM tags
|
||||
Extensible metadata for LVM volumes and groups. It is used to
|
||||
store Ceph-specific information about devices and its
|
||||
relationship with OSDs.
|
||||
|
||||
:ref:`MDS<cephfs_add_remote_mds>`
|
||||
MDS
|
||||
The Ceph **M**\eta\ **D**\ata **S**\erver daemon. Also referred
|
||||
to as "ceph-mds". The Ceph metadata server daemon must be
|
||||
running in any Ceph cluster that runs the CephFS file system.
|
||||
The MDS stores all filesystem metadata.
|
||||
The MDS stores all filesystem metadata. :term:`Client`\s work
|
||||
together with either a single MDS or a group of MDSes to
|
||||
maintain a distributed metadata cache that is required by
|
||||
CephFS.
|
||||
|
||||
See :ref:`Deploying Metadata Servers<cephfs_add_remote_mds>`.
|
||||
|
||||
See the :ref:`ceph-mds man page<ceph_mds_man>`.
|
||||
|
||||
MGR
|
||||
The Ceph manager software, which collects all the state from
|
||||
the whole cluster in one place.
|
||||
|
||||
MON
|
||||
:ref:`MON<arch_monitor>`
|
||||
The Ceph monitor software.
|
||||
|
||||
Monitor Store
|
||||
The persistent storage that is used by the Monitor. This
|
||||
includes the Monitor's RocksDB and all related files in
|
||||
``/var/lib/ceph``.
|
||||
|
||||
Node
|
||||
See :term:`Ceph Node`.
|
||||
|
||||
Object Storage
|
||||
Object storage is one of three kinds of storage relevant to
|
||||
Ceph. The other two kinds of storage relevant to Ceph are file
|
||||
storage and block storage. Object storage is the category of
|
||||
storage most fundamental to Ceph.
|
||||
|
||||
Object Storage Device
|
||||
See :term:`OSD`.
|
||||
|
||||
OMAP
|
||||
"object map". A key-value store (a database) that is used to
|
||||
reduce the time it takes to read data from and to write to the
|
||||
Ceph cluster. RGW bucket indexes are stored as OMAPs.
|
||||
Erasure-coded pools cannot store RADOS OMAP data structures.
|
||||
|
||||
Run the command ``ceph osd df`` to see your OMAPs.
|
||||
|
||||
See Eleanor Cawthon's 2012 paper `A Distributed Key-Value Store
|
||||
using Ceph
|
||||
<https://ceph.io/assets/pdfs/CawthonKeyValueStore.pdf>`_ (17
|
||||
pages).
|
||||
|
||||
OpenStack Swift
|
||||
In the context of Ceph, OpenStack Swift is one of the two APIs
|
||||
supported by the Ceph Object Store. The other API supported by
|
||||
the Ceph Object Store is S3.
|
||||
|
||||
See `the OpenStack Storage API overview page
|
||||
<https://docs.openstack.org/swift/latest/api/object_api_v1_overview.html>`_.
|
||||
|
||||
OSD
|
||||
Probably :term:`Ceph OSD`, but not necessarily. Sometimes
|
||||
(especially in older correspondence, and especially in
|
||||
@ -291,18 +364,19 @@
|
||||
mid-2010s to insist that "OSD" should refer to "Object Storage
|
||||
Device", so it is important to know which meaning is intended.
|
||||
|
||||
OSD fsid
|
||||
This is a unique identifier used to identify an OSD. It is
|
||||
found in the OSD path in a file called ``osd_fsid``. The
|
||||
term ``fsid`` is used interchangeably with ``uuid``
|
||||
OSD FSID
|
||||
The OSD fsid is a unique identifier that is used to identify an
|
||||
OSD. It is found in the OSD path in a file called ``osd_fsid``.
|
||||
The term ``FSID`` is used interchangeably with ``UUID``.
|
||||
|
||||
OSD id
|
||||
The integer that defines an OSD. It is generated by the
|
||||
monitors during the creation of each OSD.
|
||||
OSD ID
|
||||
The OSD id an integer unique to each OSD (each OSD has a unique
|
||||
OSD ID). Each OSD id is generated by the monitors during the
|
||||
creation of its associated OSD.
|
||||
|
||||
OSD uuid
|
||||
This is the unique identifier of an OSD. This term is used
|
||||
interchangeably with ``fsid``
|
||||
OSD UUID
|
||||
The OSD UUID is the unique identifier of an OSD. This term is
|
||||
used interchangeably with ``FSID``.
|
||||
|
||||
Period
|
||||
In the context of :term:`RGW`, a period is the configuration
|
||||
@ -324,7 +398,15 @@
|
||||
placement group, and each placement group belongs to exactly
|
||||
one Ceph pool.
|
||||
|
||||
PLP
|
||||
**P**\ower **L**\oss **P**\rotection. A technology that
|
||||
protects the data of solid-state drives by using capacitors to
|
||||
extend the amount of time available for transferring data from
|
||||
the DRAM cache to the SSD's permanent memory. Consumer-grade
|
||||
SSDs are rarely equipped with PLP.
|
||||
|
||||
:ref:`Pool<rados_pools>`
|
||||
|
||||
A pool is a logical partition used to store objects.
|
||||
|
||||
Pools
|
||||
@ -337,6 +419,18 @@
|
||||
Firefly (v. 0.80). See :ref:`Primary Affinity
|
||||
<rados_ops_primary_affinity>`.
|
||||
|
||||
:ref:`Prometheus <mgr-prometheus>`
|
||||
An open-source monitoring and alerting toolkit. Ceph offers a
|
||||
:ref:`"Prometheus module" <mgr-prometheus>`, which provides a
|
||||
Prometheus exporter that passes performance counters from a
|
||||
collection point in ``ceph-mgr`` to Prometheus.
|
||||
|
||||
Quorum
|
||||
Quorum is the state that exists when a majority of the
|
||||
:ref:`Monitors<arch_monitor>` in the cluster are ``up``. A
|
||||
minimum of three :ref:`Monitors<arch_monitor>` must exist in
|
||||
the cluster in order for Quorum to be possible.
|
||||
|
||||
RADOS
|
||||
**R**\eliable **A**\utonomic **D**\istributed **O**\bject
|
||||
**S**\tore. RADOS is the object store that provides a scalable
|
||||
@ -399,6 +493,14 @@
|
||||
Amazon S3 RESTful API and the OpenStack Swift API. Also called
|
||||
"RADOS Gateway" and "Ceph Object Gateway".
|
||||
|
||||
S3
|
||||
In the context of Ceph, S3 is one of the two APIs supported by
|
||||
the Ceph Object Store. The other API supported by the Ceph
|
||||
Object Store is OpenStack Swift.
|
||||
|
||||
See `the Amazon S3 overview page
|
||||
<https://aws.amazon.com/s3/>`_.
|
||||
|
||||
scrubs
|
||||
|
||||
The processes by which Ceph ensures data integrity. During the
|
||||
@ -435,6 +537,9 @@
|
||||
which will exit upon completion (it is not intended to
|
||||
daemonize)
|
||||
|
||||
Swift
|
||||
See :term:`OpenStack Swift`.
|
||||
|
||||
Teuthology
|
||||
The collection of software that performs scripted tests on Ceph.
|
||||
|
||||
|
@ -27,10 +27,9 @@ all code contributions go through a collaborative review process (and
|
||||
undergo testing) before being merged. The specifics of this process
|
||||
are dynamic and evolving over time.
|
||||
|
||||
New committers are added to the project (or committers removed from
|
||||
the project) at the discretion of the Ceph Leadership Team (below).
|
||||
The criteria for becoming a contributor include a consistent level of
|
||||
quality and engagement in the project over time.
|
||||
* Dan van der Ster <dan.vanderster@clyso.com>
|
||||
* Josh Durgin <jdurgin@redhat.com>
|
||||
* Neha Ojha <nojha@redhat.com>
|
||||
|
||||
|
||||
.. _clt:
|
||||
@ -55,16 +54,16 @@ Current CLT members are:
|
||||
* Casey Bodley <cbodley@redhat.com>
|
||||
* Dan van der Ster <dan.vanderster@clyso.com>
|
||||
* David Orman <ormandj@1111systems.com>
|
||||
* Ernesto Puerta <epuerta@redhat.com>
|
||||
* Ernesto Puerta <epuertat@redhat.com>
|
||||
* Gregory Farnum <gfarnum@redhat.com>
|
||||
* Haomai Wang <haomai@xsky.com>
|
||||
* Ilya Dryomov <idryomov@redhat.com>
|
||||
* Igor Fedotov <igor.fedotov@croit.io>
|
||||
* Jeff Layton <jlayton@redhat.com>
|
||||
* Josh Durgin <jdurgin@redhat.com>
|
||||
* João Eduardo Luis <joao@suse.de>
|
||||
* João Eduardo Luis <joao@clyso.com>
|
||||
* Ken Dreyer <kdreyer@redhat.com>
|
||||
* Mark Nelson <mnelson@redhat.com>
|
||||
* Mark Nelson <mark.nelson@clyso.com>
|
||||
* Matt Benjamin <mbenjami@redhat.com>
|
||||
* Mike Perez <miperez@redhat.com>
|
||||
* Myoungwon Oh <myoungwon.oh@samsung.com>
|
||||
@ -76,7 +75,7 @@ Current CLT members are:
|
||||
* Xie Xingguo <xie.xingguo@zte.com.cn>
|
||||
* Yehuda Sadeh <yehuda@redhat.com>
|
||||
* Yuri Weinstein <yweinste@redhat.com>
|
||||
* Zac Dover <zac.dover@gmail.com>
|
||||
* Zac Dover <zac.dover@proton.me>
|
||||
|
||||
Component Leads
|
||||
---------------
|
||||
|
BIN
ceph/doc/images/windows_ci_artifacts.png
Normal file
BIN
ceph/doc/images/windows_ci_artifacts.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 31 KiB |
BIN
ceph/doc/images/windows_ci_html_report.png
Normal file
BIN
ceph/doc/images/windows_ci_html_report.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 56 KiB |
BIN
ceph/doc/images/windows_ci_status_page.png
Normal file
BIN
ceph/doc/images/windows_ci_status_page.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 29 KiB |
@ -94,14 +94,14 @@ about Ceph, see our `Architecture`_ section.
|
||||
.. _Ceph Object Store: radosgw
|
||||
.. _Ceph Block Device: rbd
|
||||
.. _Ceph File System: cephfs
|
||||
.. _Getting Started: install
|
||||
.. _Getting Started: start
|
||||
.. _Architecture: architecture
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
:hidden:
|
||||
|
||||
start/intro
|
||||
start/index
|
||||
install/index
|
||||
cephadm/index
|
||||
rados/index
|
||||
|
@ -98,59 +98,7 @@ repository.
|
||||
Updating Submodules
|
||||
-------------------
|
||||
|
||||
#. Determine whether your submodules are out of date:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git status
|
||||
|
||||
A. If your submodules are up to date
|
||||
If your submodules are up to date, the following console output will
|
||||
appear:
|
||||
|
||||
::
|
||||
|
||||
On branch main
|
||||
Your branch is up to date with 'origin/main'.
|
||||
|
||||
nothing to commit, working tree clean
|
||||
|
||||
If you see this console output, then your submodules are up to date.
|
||||
You do not need this procedure.
|
||||
|
||||
|
||||
B. If your submodules are not up to date
|
||||
If your submodules are not up to date, you will see a message that
|
||||
includes a list of "untracked files". The example here shows such a
|
||||
list, which was generated from a real situation in which the
|
||||
submodules were no longer current. Your list of files will not be the
|
||||
same as this list of files, but this list is provided as an example.
|
||||
If in your case any untracked files are listed, then you should
|
||||
continue to the next step of this procedure.
|
||||
|
||||
::
|
||||
|
||||
On branch main
|
||||
Your branch is up to date with 'origin/main'.
|
||||
|
||||
Untracked files:
|
||||
(use "git add <file>..." to include in what will be committed)
|
||||
src/pybind/cephfs/build/
|
||||
src/pybind/cephfs/cephfs.c
|
||||
src/pybind/cephfs/cephfs.egg-info/
|
||||
src/pybind/rados/build/
|
||||
src/pybind/rados/rados.c
|
||||
src/pybind/rados/rados.egg-info/
|
||||
src/pybind/rbd/build/
|
||||
src/pybind/rbd/rbd.c
|
||||
src/pybind/rbd/rbd.egg-info/
|
||||
src/pybind/rgw/build/
|
||||
src/pybind/rgw/rgw.c
|
||||
src/pybind/rgw/rgw.egg-info/
|
||||
|
||||
nothing added to commit but untracked files present (use "git add" to track)
|
||||
|
||||
#. If your submodules are out of date, run the following commands:
|
||||
If your submodules are out of date, run the following commands:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
@ -158,24 +106,10 @@ Updating Submodules
|
||||
git clean -fdx
|
||||
git submodule foreach git clean -fdx
|
||||
|
||||
If you still have problems with a submodule directory, use ``rm -rf
|
||||
[directory name]`` to remove the directory. Then run ``git submodule update
|
||||
--init --recursive`` again.
|
||||
If you still have problems with a submodule directory, use ``rm -rf [directory
|
||||
name]`` to remove the directory. Then run ``git submodule update --init
|
||||
--recursive --progress`` again.
|
||||
|
||||
#. Run ``git status`` again:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git status
|
||||
|
||||
Your submodules are up to date if you see the following message:
|
||||
|
||||
::
|
||||
|
||||
On branch main
|
||||
Your branch is up to date with 'origin/main'.
|
||||
|
||||
nothing to commit, working tree clean
|
||||
|
||||
Choose a Branch
|
||||
===============
|
||||
|
@ -251,6 +251,17 @@ openSUSE Tumbleweed
|
||||
The newest major release of Ceph is already available through the normal Tumbleweed repositories.
|
||||
There's no need to add another package repository manually.
|
||||
|
||||
openEuler
|
||||
^^^^^^^^^
|
||||
|
||||
There are two Ceph releases supported in normal openEuler repositories. They are Ceph 12.2.8 in the openEuler-20.03-LTS series and Ceph 16.2.7 in the openEuler-22.03-LTS series. There’s no need to add another package repository manually.
|
||||
You can install Ceph by executing the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo yum -y install ceph
|
||||
|
||||
Also you can download packages manually from https://repo.openeuler.org/openEuler-{release}/everything/{arch}/Packages/.
|
||||
|
||||
Ceph Development Packages
|
||||
-------------------------
|
||||
|
@ -4,14 +4,13 @@
|
||||
Installing Ceph
|
||||
===============
|
||||
|
||||
There are multiple ways to install Ceph.
|
||||
There are multiple ways to install Ceph.
|
||||
|
||||
Recommended methods
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
:ref:`Cephadm <cephadm_deploying_new_cluster>` installs and manages a Ceph
|
||||
cluster that uses containers and systemd and is tightly integrated with the CLI
|
||||
and dashboard GUI.
|
||||
:ref:`Cephadm <cephadm_deploying_new_cluster>` is a tool that can be used to
|
||||
install and manage a Ceph cluster.
|
||||
|
||||
* cephadm supports only Octopus and newer releases.
|
||||
* cephadm is fully integrated with the orchestration API and fully supports the
|
||||
@ -59,6 +58,8 @@ tool that can be used to quickly deploy clusters. It is deprecated.
|
||||
|
||||
`github.com/openstack/puppet-ceph <https://github.com/openstack/puppet-ceph>`_ installs Ceph via Puppet.
|
||||
|
||||
`OpenNebula HCI clusters <https://docs.opennebula.io/stable/provision_clusters/hci_clusters/overview.html>`_ deploys Ceph on various cloud platforms.
|
||||
|
||||
Ceph can also be :ref:`installed manually <install-manual>`.
|
||||
|
||||
|
||||
|
@ -506,6 +506,52 @@ In the below instructions, ``{id}`` is an arbitrary name, such as the hostname o
|
||||
|
||||
#. Now you are ready to `create a Ceph file system`_.
|
||||
|
||||
Manually Installing RADOSGW
|
||||
===========================
|
||||
|
||||
For a more involved discussion of the procedure presented here, see `this
|
||||
thread on the ceph-users mailing list
|
||||
<https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/message/LB3YRIKAPOHXYCW7MKLVUJPYWYRQVARU/>`_.
|
||||
|
||||
#. Install ``radosgw`` packages on the nodes that will be the RGW nodes.
|
||||
|
||||
#. From a monitor or from a node with admin privileges, run a command of the
|
||||
following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph auth get-or-create client.$(hostname -s) mon 'allow rw' osd 'allow rwx'
|
||||
|
||||
#. On one of the RGW nodes, do the following:
|
||||
|
||||
a. Create a ``ceph-user``-owned directory. For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
install -d -o ceph -g ceph /var/lib/ceph/radosgw/ceph-$(hostname -s)
|
||||
|
||||
b. Enter the directory just created and create a ``keyring`` file:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
touch /var/lib/ceph/radosgw/ceph-$(hostname -s)/keyring
|
||||
|
||||
Use a command similar to this one to put the key from the earlier ``ceph
|
||||
auth get-or-create`` step in the ``keyring`` file. Use your preferred
|
||||
editor:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
$EDITOR /var/lib/ceph/radosgw/ceph-$(hostname -s)/keyring
|
||||
|
||||
c. Repeat these steps on every RGW node.
|
||||
|
||||
#. Start the RADOSGW service by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
systemctl start ceph-radosgw@$(hostname -s).service
|
||||
|
||||
|
||||
Summary
|
||||
=======
|
||||
|
@ -86,3 +86,4 @@ Further reading
|
||||
.. _Windows troubleshooting: ../windows-troubleshooting
|
||||
.. _General CephFS Prerequisites: ../../cephfs/mount-prerequisites
|
||||
.. _Client Authentication: ../../cephfs/client-auth
|
||||
.. _Windows testing: ../dev/tests-windows
|
||||
|
@ -10,17 +10,17 @@ BASIC ARCHITECTURE AND TERMINOLOGY
|
||||
----------------------------------
|
||||
|
||||
* TRACE: A trace shows the data/execution path through a system.
|
||||
* SPAN: A single unit of a trace, it is a data structure that stores
|
||||
information like operation name, timestamps, ordering in a trace.
|
||||
* JAEGER CLIENT: language-specific implementations of the OpenTracing API.
|
||||
* JAEGER AGENT: a daemon that listens for spans sent over User Datagram Protocol.
|
||||
The agent is meant to be placed on the same host as the instrumented
|
||||
application. (acts like a sidecar listener)
|
||||
* JAEGER COLLECTOR: Jaeger agent sends the spans to this daemon which then
|
||||
stitches the spans together to form a trace(if enabled, also persists a database
|
||||
for these traces)
|
||||
* JAEGER QUERY AND CONSOLE FRONTEND: UI based frontend to checkout the jaeger
|
||||
traces, navigate to http://<jaeger frontend host>:16686
|
||||
* SPAN: A single unit of a trace. A data structure that stores information such
|
||||
as the operation name, timestamps, and the ordering within a trace.
|
||||
* JAEGER CLIENT: Language-specific implementations of the OpenTracing API.
|
||||
* JAEGER AGENT: A daemon that listens for spans sent over User Datagram
|
||||
Protocol. The agent is meant to be placed on the same host as the
|
||||
instrumented application. (The Jaeger agent acts like a sidecar listener.)
|
||||
* JAEGER COLLECTOR: A daemon that receives spans sent by the Jaeger agent. The
|
||||
Jaeger collector then stitches the spans together to form a trace. (A database
|
||||
can be enabled to persist these traces).
|
||||
* JAEGER QUERY AND CONSOLE FRONTEND: The UI-based frontend that presents
|
||||
reports of the jaeger traces. Accessible at http://<jaeger frontend host>:16686.
|
||||
|
||||
|
||||
read more about jaeger tracing:.
|
||||
|
@ -93,15 +93,17 @@ Commands
|
||||
|
||||
:command:`bluefs-bdev-migrate` --dev-target *new-device* --devs-source *device1* [--devs-source *device2*]
|
||||
|
||||
Moves BlueFS data from source device(s) to the target one, source devices
|
||||
(except the main one) are removed on success. Target device can be both
|
||||
already attached or new device. In the latter case it's added to OSD
|
||||
replacing one of the source devices. Following replacement rules apply
|
||||
(in the order of precedence, stop on the first match):
|
||||
Moves BlueFS data from source device(s) to the target device. Source devices
|
||||
(except the main one) are removed on success. Expands the target storage
|
||||
(updates the size label), making "bluefs-bdev-expand" unnecessary. The
|
||||
target device can be either a new device or a device that is already
|
||||
attached. If the device is a new device, it is added to the OSD replacing
|
||||
one of the source devices. The following replacement rules apply (in the
|
||||
order of precedence, stop on the first match):
|
||||
|
||||
- if source list has DB volume - target device replaces it.
|
||||
- if source list has WAL volume - target device replace it.
|
||||
- if source list has slow volume only - operation isn't permitted, requires explicit allocation via new-db/new-wal command.
|
||||
- if the source list has DB volume - the target device replaces it.
|
||||
- if the source list has WAL volume - the target device replaces it.
|
||||
- if the source list has slow volume only - the operation isn't permitted and requires explicit allocation via a new-DB/new-WAL command.
|
||||
|
||||
:command:`show-label` --dev *device* [...]
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
:orphan:
|
||||
|
||||
.. _man-ceph-fuse:
|
||||
|
||||
=========================================
|
||||
ceph-fuse -- FUSE-based client for ceph
|
||||
=========================================
|
||||
|
@ -1,5 +1,7 @@
|
||||
:orphan:
|
||||
|
||||
.. _ceph_mds_man:
|
||||
|
||||
=========================================
|
||||
ceph-mds -- ceph metadata server daemon
|
||||
=========================================
|
||||
|
@ -60,6 +60,8 @@ Possible -op commands::
|
||||
* meta-list
|
||||
* get-osdmap
|
||||
* set-osdmap
|
||||
* get-superblock
|
||||
* set-superblock
|
||||
* get-inc-osdmap
|
||||
* set-inc-osdmap
|
||||
* mark-complete
|
||||
@ -248,47 +250,58 @@ Procedure
|
||||
Manipulating the Object Map Key
|
||||
-------------------------------
|
||||
|
||||
Use the **ceph-objectstore-tool** utility to change the object map (OMAP) key. You need to provide the data path, the placement group identifier (PG ID), the object, and the key in the OMAP.
|
||||
Note
|
||||
Use the **ceph-objectstore-tool** utility to change the object map (OMAP) key.
|
||||
Provide the data path, the placement group identifier (PG ID), the object, and
|
||||
the key in the OMAP.
|
||||
|
||||
If using FileStore as the OSD backend object store, then add the `--journal-path $PATH_TO_JOURNAL` argument when getting, setting or removing the object map key, where the `$PATH_TO_JOURNAL` variable is the absolute path to the OSD journal; for example `/var/lib/ceph/osd/ceph-0/journal`.
|
||||
|
||||
Prerequisites
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
* Having root access to the Ceph OSD node.
|
||||
* Stopping the ceph-osd daemon.
|
||||
|
||||
Procedure
|
||||
Commands
|
||||
^^^^^^^^
|
||||
|
||||
Get the object map key:
|
||||
Run the commands in this section as ``root`` on an OSD node.
|
||||
|
||||
Syntax::
|
||||
* **Getting the object map key**
|
||||
|
||||
Syntax:
|
||||
|
||||
.. code-block:: ini
|
||||
|
||||
ceph-objectstore-tool --data-path $PATH_TO_OSD --pgid $PG_ID $OBJECT get-omap $KEY > $OBJECT_MAP_FILE_NAME
|
||||
ceph-objectstore-tool --data-path $PATH_TO_OSD --pgid $PG_ID $OBJECT get-omap $KEY > $OBJECT_MAP_FILE_NAME
|
||||
|
||||
Example::
|
||||
|
||||
[root@osd ~]# ceph-objectstore-tool --data-path /var/lib/ceph/osd/ceph-0 --pgid 0.1c '{"oid":"zone_info.default","key":"","snapid":-2,"hash":235010478,"max":0,"pool":11,"namespace":""}' get-omap "" > zone_info.default.omap.txt
|
||||
ceph-objectstore-tool --data-path /var/lib/ceph/osd/ceph-0 --pgid 0.1c '{"oid":"zone_info.default","key":"","snapid":-2,"hash":235010478,"max":0,"pool":11,"namespace":""}' get-omap "" > zone_info.default.omap.txt
|
||||
|
||||
Set the object map key:
|
||||
* **Setting the object map key**
|
||||
|
||||
Syntax::
|
||||
Syntax:
|
||||
|
||||
ceph-objectstore-tool --data-path $PATH_TO_OSD --pgid $PG_ID $OBJECT set-omap $KEY < $OBJECT_MAP_FILE_NAME
|
||||
.. code-block:: ini
|
||||
|
||||
ceph-objectstore-tool --data-path $PATH_TO_OSD --pgid $PG_ID $OBJECT set-omap $KEY < $OBJECT_MAP_FILE_NAME
|
||||
|
||||
Example::
|
||||
|
||||
[root@osd ~]# ceph-objectstore-tool --data-path /var/lib/ceph/osd/ceph-0 --pgid 0.1c '{"oid":"zone_info.default","key":"","snapid":-2,"hash":235010478,"max":0,"pool":11,"namespace":""}' set-omap "" < zone_info.default.omap.txt
|
||||
ceph-objectstore-tool --data-path /var/lib/ceph/osd/ceph-0 --pgid 0.1c '{"oid":"zone_info.default","key":"","snapid":-2,"hash":235010478,"max":0,"pool":11,"namespace":""}' set-omap "" < zone_info.default.omap.txt
|
||||
|
||||
Remove the object map key:
|
||||
* **Removing the object map key**
|
||||
|
||||
Syntax::
|
||||
Syntax:
|
||||
|
||||
ceph-objectstore-tool --data-path $PATH_TO_OSD --pgid $PG_ID $OBJECT rm-omap $KEY
|
||||
.. code-block:: ini
|
||||
|
||||
ceph-objectstore-tool --data-path $PATH_TO_OSD --pgid $PG_ID $OBJECT rm-omap $KEY
|
||||
|
||||
Example::
|
||||
|
||||
[root@osd ~]# ceph-objectstore-tool --data-path /var/lib/ceph/osd/ceph-0 --pgid 0.1c '{"oid":"zone_info.default","key":"","snapid":-2,"hash":235010478,"max":0,"pool":11,"namespace":""}' rm-omap ""
|
||||
ceph-objectstore-tool --data-path /var/lib/ceph/osd/ceph-0 --pgid 0.1c '{"oid":"zone_info.default","key":"","snapid":-2,"hash":235010478,"max":0,"pool":11,"namespace":""}' rm-omap ""
|
||||
|
||||
|
||||
Listing an Object's Attributes
|
||||
@ -414,7 +427,7 @@ Options
|
||||
|
||||
.. option:: --op arg
|
||||
|
||||
Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log]
|
||||
Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-superblock, set-superblock, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log]
|
||||
|
||||
.. option:: --epoch arg
|
||||
|
||||
@ -422,7 +435,7 @@ Options
|
||||
|
||||
.. option:: --file arg
|
||||
|
||||
path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap
|
||||
path of file to export, export-remove, import, get-osdmap, set-osdmap, get-superblock, set-superblock, get-inc-osdmap or set-inc-osdmap
|
||||
|
||||
.. option:: --mon-store-path arg
|
||||
|
||||
|
@ -18,14 +18,16 @@ Synopsis
|
||||
Description
|
||||
===========
|
||||
|
||||
**ceph-osd** is the object storage daemon for the Ceph distributed file
|
||||
system. It is responsible for storing objects on a local file system
|
||||
and providing access to them over the network.
|
||||
**ceph-osd** is the **o**\bject **s**\torage **d**\aemon for the Ceph
|
||||
distributed file system. It manages data on local storage with redundancy and
|
||||
provides access to that data over the network.
|
||||
|
||||
The datapath argument should be a directory on a xfs file system
|
||||
where the object data resides. The journal is optional, and is only
|
||||
useful performance-wise when it resides on a different disk than
|
||||
datapath with low latency (ideally, an NVRAM device).
|
||||
For Filestore-backed clusters, the argument of the ``--osd-data datapath``
|
||||
option (which is ``datapath`` in this example) should be a directory on an XFS
|
||||
file system where the object data resides. The journal is optional. The journal
|
||||
improves performance only when it resides on a different disk than the disk
|
||||
specified by ``datapath`` . The storage medium on which the journal is stored
|
||||
should be a low-latency medium (ideally, an SSD device).
|
||||
|
||||
|
||||
Options
|
||||
|
@ -1314,7 +1314,7 @@ Subcommand ``cache-mode`` specifies the caching mode for cache tier <pool>.
|
||||
|
||||
Usage::
|
||||
|
||||
ceph osd tier cache-mode <poolname> writeback|readproxy|readonly|none
|
||||
ceph osd tier cache-mode <poolname> writeback|proxy|readproxy|readonly|none
|
||||
|
||||
Subcommand ``remove`` removes the tier <tierpool> (the second one) from base pool
|
||||
<pool> (the first one).
|
||||
@ -1524,13 +1524,13 @@ Usage::
|
||||
Options
|
||||
=======
|
||||
|
||||
.. option:: -i infile
|
||||
.. option:: -i infile, --in-file=infile
|
||||
|
||||
will specify an input file to be passed along as a payload with the
|
||||
command to the monitor cluster. This is only used for specific
|
||||
monitor commands.
|
||||
|
||||
.. option:: -o outfile
|
||||
.. option:: -o outfile, --out-file=outfile
|
||||
|
||||
will write any payload returned by the monitor cluster with its
|
||||
reply to outfile. Only specific monitor commands (e.g. osd getmap)
|
||||
|
@ -1,5 +1,7 @@
|
||||
:orphan:
|
||||
|
||||
.. _cephfs-shell:
|
||||
|
||||
===================================================
|
||||
cephfs-shell -- Shell-like tool talking with CephFS
|
||||
===================================================
|
||||
@ -57,7 +59,7 @@ Options
|
||||
.. code:: bash
|
||||
|
||||
[build]$ python3 -m venv venv && source venv/bin/activate && pip3 install cmd2
|
||||
[build]$ source vstart_environment.sh && source venv/bin/activate && python3 ../src/tools/cephfs/cephfs-shell
|
||||
[build]$ source vstart_environment.sh && source venv/bin/activate && python3 ../src/tools/cephfs/shell/cephfs-shell
|
||||
|
||||
Commands
|
||||
========
|
||||
|
@ -192,12 +192,57 @@ Advanced
|
||||
:command:`wsync`
|
||||
Execute all namespace operations synchronously. This ensures that the
|
||||
namespace operation will only complete after receiving a reply from
|
||||
the MDS. This is the default.
|
||||
the MDS.
|
||||
|
||||
:command:`nowsync`
|
||||
Allow the client to do namespace operations asynchronously. When this
|
||||
option is enabled, a namespace operation may complete before the MDS
|
||||
replies, if it has sufficient capabilities to do so.
|
||||
replies, if it has sufficient capabilities to do so. This has been the
|
||||
default since kernel version 5.16.
|
||||
|
||||
:command:`crush_location=x`
|
||||
Specify the location of the client in terms of CRUSH hierarchy (since 5.8).
|
||||
This is a set of key-value pairs separated from each other by '|', with
|
||||
keys separated from values by ':'. Note that '|' may need to be quoted
|
||||
or escaped to avoid it being interpreted as a pipe by the shell. The key
|
||||
is the bucket type name (e.g. rack, datacenter or region with default
|
||||
bucket types) and the value is the bucket name. For example, to indicate
|
||||
that the client is local to rack "myrack", data center "mydc" and region
|
||||
"myregion"::
|
||||
|
||||
crush_location=rack:myrack|datacenter:mydc|region:myregion
|
||||
|
||||
Each key-value pair stands on its own: "myrack" doesn't need to reside in
|
||||
"mydc", which in turn doesn't need to reside in "myregion". The location
|
||||
is not a path to the root of the hierarchy but rather a set of nodes that
|
||||
are matched independently. "Multipath" locations are supported, so it is
|
||||
possible to indicate locality for multiple parallel hierarchies::
|
||||
|
||||
crush_location=rack:myrack1|rack:myrack2|datacenter:mydc
|
||||
|
||||
|
||||
:command:`read_from_replica=<no|balance|localize>`
|
||||
- ``no``: Disable replica reads, always pick the primary OSD (since 5.8, default).
|
||||
|
||||
- ``balance``: When a replicated pool receives a read request, pick a random
|
||||
OSD from the PG's acting set to serve it (since 5.8).
|
||||
|
||||
This mode is safe for general use only since Octopus (i.e. after "ceph osd
|
||||
require-osd-release octopus"). Otherwise it should be limited to read-only
|
||||
workloads such as snapshots.
|
||||
|
||||
- ``localize``: When a replicated pool receives a read request, pick the most
|
||||
local OSD to serve it (since 5.8). The locality metric is calculated against
|
||||
the location of the client given with crush_location; a match with the
|
||||
lowest-valued bucket type wins. For example, an OSD in a matching rack
|
||||
is closer than an OSD in a matching data center, which in turn is closer
|
||||
than an OSD in a matching region.
|
||||
|
||||
This mode is safe for general use only since Octopus (i.e. after "ceph osd
|
||||
require-osd-release octopus"). Otherwise it should be limited to read-only
|
||||
workloads such as snapshots.
|
||||
|
||||
|
||||
|
||||
Examples
|
||||
========
|
||||
|
@ -336,7 +336,7 @@ as follows:
|
||||
List placement active set.
|
||||
|
||||
:command:`policy`
|
||||
Display bucket/object policy.
|
||||
Display bucket/object policies (e.g. permissions/ACLs etc.).
|
||||
|
||||
:command:`log list`
|
||||
List log objects.
|
||||
@ -361,6 +361,9 @@ as follows:
|
||||
:command:`gc process`
|
||||
Manually process garbage.
|
||||
|
||||
:command:`lc get`
|
||||
Get lifecycle config for a bucket.
|
||||
|
||||
:command:`lc list`
|
||||
List all bucket lifecycle progress.
|
||||
|
||||
|
@ -334,7 +334,7 @@ Commands
|
||||
be specified.
|
||||
|
||||
:command:`flatten` *image-spec*
|
||||
If image is a clone, copy all shared blocks from the parent snapshot and
|
||||
If the image is a clone, copy all shared blocks from the parent snapshot and
|
||||
make the child independent of the parent, severing the link between
|
||||
parent snap and child. The parent snapshot can be unprotected and
|
||||
deleted if it has no further dependent clones.
|
||||
@ -391,7 +391,7 @@ Commands
|
||||
Set metadata key with the value. They will displayed in `image-meta list`.
|
||||
|
||||
:command:`import` [--export-format *format (1 or 2)*] [--image-format *format-id*] [--object-size *size-in-B/K/M*] [--stripe-unit *size-in-B/K/M* --stripe-count *num*] [--image-feature *feature-name*]... [--image-shared] *src-path* [*image-spec*]
|
||||
Create a new image and imports its data from path (use - for
|
||||
Create a new image and import its data from path (use - for
|
||||
stdin). The import operation will try to create sparse rbd images
|
||||
if possible. For import from stdin, the sparsification unit is
|
||||
the data block size of the destination image (object size).
|
||||
@ -403,14 +403,14 @@ Commands
|
||||
of image, but also the snapshots and other properties, such as image_order, features.
|
||||
|
||||
:command:`import-diff` *src-path* *image-spec*
|
||||
Import an incremental diff of an image and applies it to the current image. If the diff
|
||||
Import an incremental diff of an image and apply it to the current image. If the diff
|
||||
was generated relative to a start snapshot, we verify that snapshot already exists before
|
||||
continuing. If there was an end snapshot we verify it does not already exist before
|
||||
applying the changes, and create the snapshot when we are done.
|
||||
|
||||
:command:`info` *image-spec* | *snap-spec*
|
||||
Will dump information (such as size and object size) about a specific rbd image.
|
||||
If image is a clone, information about its parent is also displayed.
|
||||
If the image is a clone, information about its parent is also displayed.
|
||||
If a snapshot is specified, whether it is protected is shown as well.
|
||||
|
||||
:command:`journal client disconnect` *journal-spec*
|
||||
@ -473,7 +473,7 @@ Commands
|
||||
the destination image are lost.
|
||||
|
||||
:command:`migration commit` *image-spec*
|
||||
Commit image migration. This step is run after a successful migration
|
||||
Commit image migration. This step is run after successful migration
|
||||
prepare and migration execute steps and removes the source image data.
|
||||
|
||||
:command:`migration execute` *image-spec*
|
||||
@ -500,14 +500,12 @@ Commands
|
||||
:command:`mirror image disable` [--force] *image-spec*
|
||||
Disable RBD mirroring for an image. If the mirroring is
|
||||
configured in ``image`` mode for the image's pool, then it
|
||||
can be explicitly disabled mirroring for each image within
|
||||
the pool.
|
||||
must be disabled for each image individually.
|
||||
|
||||
:command:`mirror image enable` *image-spec* *mode*
|
||||
Enable RBD mirroring for an image. If the mirroring is
|
||||
configured in ``image`` mode for the image's pool, then it
|
||||
can be explicitly enabled mirroring for each image within
|
||||
the pool.
|
||||
must be enabled for each image individually.
|
||||
|
||||
The mirror image mode can either be ``journal`` (default) or
|
||||
``snapshot``. The ``journal`` mode requires the RBD journaling
|
||||
@ -523,28 +521,28 @@ Commands
|
||||
Show RBD mirroring status for an image.
|
||||
|
||||
:command:`mirror pool demote` [*pool-name*]
|
||||
Demote all primary images within a pool to non-primary.
|
||||
Every mirroring enabled image will demoted in the pool.
|
||||
Demote all primary images within a pool or namespace to non-primary.
|
||||
Every mirror-enabled image in the pool or namespace will be demoted.
|
||||
|
||||
:command:`mirror pool disable` [*pool-name*]
|
||||
Disable RBD mirroring by default within a pool. When mirroring
|
||||
is disabled on a pool in this way, mirroring will also be
|
||||
disabled on any images (within the pool) for which mirroring
|
||||
was enabled explicitly.
|
||||
Disable RBD mirroring within a pool or namespace. When mirroring
|
||||
is disabled on a pool or namespace in this way, mirroring will also be
|
||||
disabled on all images (within the pool or namespace) for which mirroring
|
||||
was enabled, whether by default or explicitly.
|
||||
|
||||
:command:`mirror pool enable` [*pool-name*] *mode*
|
||||
Enable RBD mirroring by default within a pool.
|
||||
Enable RBD mirroring within a pool or namespace.
|
||||
The mirroring mode can either be ``pool`` or ``image``.
|
||||
If configured in ``pool`` mode, all images in the pool
|
||||
If configured in ``pool`` mode, all images in the pool or namespace
|
||||
with the journaling feature enabled are mirrored.
|
||||
If configured in ``image`` mode, mirroring needs to be
|
||||
explicitly enabled (by ``mirror image enable`` command)
|
||||
on each image.
|
||||
|
||||
:command:`mirror pool info` [*pool-name*]
|
||||
Show information about the pool mirroring configuration.
|
||||
It includes mirroring mode, peer UUID, remote cluster name,
|
||||
and remote client name.
|
||||
Show information about the pool or namespace mirroring configuration.
|
||||
For a pool, it includes mirroring mode, peer UUID, remote cluster name,
|
||||
and remote client name. For a namespace, it includes only mirroring mode.
|
||||
|
||||
:command:`mirror pool peer add` [*pool-name*] *remote-cluster-spec*
|
||||
Add a mirroring peer to a pool.
|
||||
@ -552,7 +550,7 @@ Commands
|
||||
|
||||
The default for *remote client name* is "client.admin".
|
||||
|
||||
This requires mirroring mode is enabled.
|
||||
This requires mirroring to be enabled on the pool.
|
||||
|
||||
:command:`mirror pool peer remove` [*pool-name*] *uuid*
|
||||
Remove a mirroring peer from a pool. The peer uuid is available
|
||||
@ -564,13 +562,13 @@ Commands
|
||||
is corresponding to remote client name or remote cluster name.
|
||||
|
||||
:command:`mirror pool promote` [--force] [*pool-name*]
|
||||
Promote all non-primary images within a pool to primary.
|
||||
Every mirroring enabled image will promoted in the pool.
|
||||
Promote all non-primary images within a pool or namespace to primary.
|
||||
Every mirror-enabled image in the pool or namespace will be promoted.
|
||||
|
||||
:command:`mirror pool status` [--verbose] [*pool-name*]
|
||||
Show status for all mirrored images in the pool.
|
||||
With --verbose, also show additionally output status
|
||||
details for every mirroring image in the pool.
|
||||
Show status for all mirrored images in the pool or namespace.
|
||||
With ``--verbose``, show additional output status
|
||||
details for every mirror-enabled image in the pool or namespace.
|
||||
|
||||
:command:`mirror snapshot schedule add` [-p | --pool *pool*] [--namespace *namespace*] [--image *image*] *interval* [*start-time*]
|
||||
Add mirror snapshot schedule.
|
||||
@ -604,7 +602,7 @@ Commands
|
||||
specified to rebuild an invalid object map for a snapshot.
|
||||
|
||||
:command:`pool init` [*pool-name*] [--force]
|
||||
Initialize pool for use by RBD. Newly created pools must initialized
|
||||
Initialize pool for use by RBD. Newly created pools must be initialized
|
||||
prior to use.
|
||||
|
||||
:command:`resize` (-s | --size *size-in-M/G/T*) [--allow-shrink] *image-spec*
|
||||
@ -616,7 +614,7 @@ Commands
|
||||
snapshots, this fails and nothing is deleted.
|
||||
|
||||
:command:`snap create` *snap-spec*
|
||||
Create a new snapshot. Requires the snapshot name parameter specified.
|
||||
Create a new snapshot. Requires the snapshot name parameter to be specified.
|
||||
|
||||
:command:`snap limit clear` *image-spec*
|
||||
Remove any previously set limit on the number of snapshots allowed on
|
||||
@ -626,7 +624,7 @@ Commands
|
||||
Set a limit for the number of snapshots allowed on an image.
|
||||
|
||||
:command:`snap ls` *image-spec*
|
||||
Dump the list of snapshots inside a specific image.
|
||||
Dump the list of snapshots of a specific image.
|
||||
|
||||
:command:`snap protect` *snap-spec*
|
||||
Protect a snapshot from deletion, so that clones can be made of it
|
||||
@ -669,9 +667,11 @@ Commands
|
||||
:command:`trash ls` [*pool-name*]
|
||||
List all entries from trash.
|
||||
|
||||
:command:`trash mv` *image-spec*
|
||||
:command:`trash mv` [--expires-at <expires-at>] *image-spec*
|
||||
Move an image to the trash. Images, even ones actively in-use by
|
||||
clones, can be moved to the trash and deleted at a later time.
|
||||
clones, can be moved to the trash and deleted at a later time. Use
|
||||
``--expires-at`` to set the expiration time of an image after which
|
||||
it's allowed to be removed.
|
||||
|
||||
:command:`trash purge` [*pool-name*]
|
||||
Remove all expired images from trash.
|
||||
@ -679,10 +679,10 @@ Commands
|
||||
:command:`trash restore` *image-id*
|
||||
Restore an image from trash.
|
||||
|
||||
:command:`trash rm` *image-id*
|
||||
Delete an image from trash. If image deferment time has not expired
|
||||
you can not removed it unless use force. But an actively in-use by clones
|
||||
or has snapshots can not be removed.
|
||||
:command:`trash rm` [--force] *image-id*
|
||||
Delete an image from trash. If the image deferment time has not expired
|
||||
it can be removed using ``--force``. An image that is actively in-use by clones
|
||||
or has snapshots cannot be removed.
|
||||
|
||||
:command:`trash purge schedule add` [-p | --pool *pool*] [--namespace *namespace*] *interval* [*start-time*]
|
||||
Add trash purge schedule.
|
||||
|
@ -6,33 +6,29 @@ Ceph Dashboard
|
||||
Overview
|
||||
--------
|
||||
|
||||
The Ceph Dashboard is a built-in web-based Ceph management and monitoring
|
||||
application through which you can inspect and administer various aspects
|
||||
and resources within the cluster. It is implemented as a :ref:`ceph-manager-daemon` module.
|
||||
The Ceph Dashboard is a web-based Ceph management-and-monitoring tool that can
|
||||
be used to inspect and administer resources in the cluster. It is implemented
|
||||
as a :ref:`ceph-manager-daemon` module.
|
||||
|
||||
The original Ceph Dashboard that was shipped with Ceph Luminous started
|
||||
out as a simple read-only view into run-time information and performance
|
||||
data of Ceph clusters. It used a very simple architecture to achieve the
|
||||
original goal. However, there was growing demand for richer web-based
|
||||
management capabilities, to make it easier to administer Ceph for users that
|
||||
prefer a WebUI over the CLI.
|
||||
The original Ceph Dashboard shipped with Ceph Luminous and was a simple
|
||||
read-only view into the run-time information and performance data of Ceph
|
||||
clusters. It had a simple architecture. However, demand grew for richer,
|
||||
web-based management capabilities for users who prefer a WebUI over the CLI.
|
||||
|
||||
The new :term:`Ceph Dashboard` module adds web-based monitoring and
|
||||
administration to the Ceph Manager. The architecture and functionality of this new
|
||||
module are derived from
|
||||
and inspired by the `openATTIC Ceph management and monitoring tool
|
||||
<https://openattic.org/>`_. Development is actively driven by the
|
||||
openATTIC team at `SUSE <https://www.suse.com/>`_, with support from
|
||||
companies including `Red Hat <https://redhat.com/>`_ and members of the Ceph
|
||||
community.
|
||||
The :term:`Ceph Dashboard` module adds web-based monitoring and administration
|
||||
to the Ceph Manager. The architecture and functionality of this new module are
|
||||
derived from the `openATTIC Ceph management and monitoring tool
|
||||
<https://openattic.org/>`_. Development was originally driven by the openATTIC
|
||||
team at `SUSE <https://www.suse.com/>`_, with support from members of the Ceph
|
||||
community and from companies including `Red Hat <https://redhat.com/>`_.
|
||||
|
||||
The dashboard module's backend code uses the CherryPy framework and implements
|
||||
a custom REST API. The WebUI implementation is based on
|
||||
Angular/TypeScript and includes both functionality from the original dashboard
|
||||
and new features originally developed for the standalone version
|
||||
of openATTIC. The Ceph Dashboard module is implemented as an
|
||||
application that provides a graphical representation of information and statistics
|
||||
through a web server hosted by ``ceph-mgr``.
|
||||
The dashboard module's backend code uses the CherryPy framework, and implements
|
||||
a custom REST API. The WebUI implementation is based on Angular/TypeScript and
|
||||
includes both functionality from the original dashboard and new features
|
||||
originally developed for the standalone version of openATTIC. The Ceph
|
||||
Dashboard module is implemented as an application that provides a graphical
|
||||
representation of information and statistics through a web server hosted by
|
||||
``ceph-mgr``.
|
||||
|
||||
Feature Overview
|
||||
^^^^^^^^^^^^^^^^
|
||||
@ -1243,19 +1239,37 @@ code of standby dashboards. To do so you need to run the command:
|
||||
Resolve IP address to hostname before redirect
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The redirect from a standby to the active dashboard is done via the IP
|
||||
address. This is done because resolving IP addresses to hostnames can be error
|
||||
prone in containerized environments. It is also the reason why the option is
|
||||
Redirection from a standby dashboard to the active dashboard is done via the
|
||||
manager's IP address, not via the manager's hostname. In virtualized
|
||||
environments, IP-address-based redirection reduces the incidence of error as
|
||||
compared to hostname-based resolution. Because of the increased risk of error
|
||||
due to hostname-based resolution, the option for hostname resolution is
|
||||
disabled by default.
|
||||
|
||||
However, in some situations it might be helpful to redirect via the hostname.
|
||||
For example if the configured TLS certificate matches only the hostnames. To
|
||||
activate the redirection via the hostname run the following command::
|
||||
For example, if the configured TLS certificate matches only the hostnames and
|
||||
not the IP addresses of those hosts, hostname redirection would be preferable.
|
||||
|
||||
$ ceph config set mgr mgr/dashboard/redirect_resolve_ip_addr True
|
||||
To activate redirection from standby dashboards to active dashboards via the
|
||||
manager's hostname, run the following command:
|
||||
|
||||
You can disable it again by::
|
||||
.. prompt:: bash $
|
||||
|
||||
$ ceph config set mgr mgr/dashboard/redirect_resolve_ip_addr False
|
||||
ceph config set mgr mgr/dashboard/redirect_resolve_ip_addr True
|
||||
|
||||
Disable hostname redirection by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/dashboard/redirect_resolve_ip_addr False
|
||||
|
||||
.. warning::
|
||||
|
||||
If you attempt to activate redirection by using the command above and you
|
||||
get the error message ``EINVAL: unrecognized config option
|
||||
'mgr/dashboard/redirect_resolve_ip_addr'``, then you might be running a
|
||||
release of Ceph prior to version 17.2.6. This feature was introduced in
|
||||
17.2.6, in this commit: https://github.com/ceph/ceph/pull/48219.
|
||||
|
||||
HAProxy example configuration
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -1427,9 +1441,9 @@ commands:
|
||||
|
||||
/var/log/ceph/$cluster-$name.log
|
||||
|
||||
#. Ensure the SSL/TSL support is configured properly:
|
||||
#. Ensure the SSL/TLS support is configured properly:
|
||||
|
||||
* Check if the SSL/TSL support is enabled:
|
||||
* Check if the SSL/TLS support is enabled:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
|
@ -538,6 +538,9 @@ If the NFS service is running on a non-standard port number:
|
||||
|
||||
.. note:: Only NFS v4.0+ is supported.
|
||||
|
||||
.. note:: As of this writing (01 Jan 2024), no version of Microsoft Windows
|
||||
supports mouting an NFS v4.x export natively.
|
||||
|
||||
Troubleshooting
|
||||
===============
|
||||
|
||||
|
@ -206,16 +206,6 @@ Example to turn up the sync interval to 10 minutes:
|
||||
|
||||
ceph config set mgr mgr/prometheus/rbd_stats_pools_refresh_interval 600
|
||||
|
||||
Ceph daemon performance counters metrics
|
||||
-----------------------------------------
|
||||
|
||||
With the introduction of ``ceph-exporter`` daemon, the prometheus module will no longer export Ceph daemon
|
||||
perf counters as prometheus metrics by default. However, one may re-enable exporting these metrics by setting
|
||||
the module option ``exclude_perf_counters`` to ``false``:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config set mgr mgr/prometheus/exclude_perf_counters false
|
||||
|
||||
Statistic names and labels
|
||||
==========================
|
||||
|
@ -77,6 +77,19 @@ If the port is not configured, *restful* will bind to port ``8003``.
|
||||
If the address it not configured, the *restful* will bind to ``::``,
|
||||
which corresponds to all available IPv4 and IPv6 addresses.
|
||||
|
||||
Configuring max_request
|
||||
---------------------------
|
||||
|
||||
The maximum request size can be configured via a central configuration
|
||||
option::
|
||||
|
||||
ceph config set mgr mgr/restful/$name/max_requests $NUM
|
||||
|
||||
where ``$name`` is the ID of the ceph-mgr daemon (usually the hostname).
|
||||
|
||||
.. mgr_module:: restful
|
||||
.. confval:: max_requests
|
||||
|
||||
.. _creating-an-api-user:
|
||||
|
||||
Creating an API User
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user