import ceph pacific 16.2.11
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
85
ceph/.github/CODEOWNERS
vendored
@ -49,3 +49,88 @@ COPYING* @ceph/doc-writers
|
||||
/doc/ @ceph/doc-writers
|
||||
README* @ceph/doc-writers
|
||||
*.rst @ceph/doc-writers
|
||||
|
||||
# core
|
||||
/doc/man/8/ceph-authtool.rst @ceph/core
|
||||
/doc/man/8/ceph-conf.rst @ceph/core
|
||||
/doc/man/8/ceph-create-keys.rst @ceph/core
|
||||
/doc/man/8/ceph-kvstore-tool.rst @ceph/core
|
||||
/doc/man/8/ceph-mon.rst @ceph/core
|
||||
/doc/man/8/ceph-objectstore-tool.rst @ceph/core
|
||||
/doc/man/8/ceph-osd.rst @ceph/core
|
||||
/doc/man/8/ceph.rst @ceph/core
|
||||
/doc/man/8/crushtool.rst @ceph/core
|
||||
/doc/man/8/monmaptool.rst @ceph/core
|
||||
/doc/man/8/rados.rst @ceph/core
|
||||
/doc/rados @ceph/core
|
||||
/qa/standalone @ceph/core
|
||||
/qa/suites/rados @ceph/core
|
||||
/qa/workunits/erasure-code @ceph/core
|
||||
/qa/workunits/mgr @ceph/core
|
||||
/qa/workunits/mon @ceph/core
|
||||
/qa/workunits/objectstore @ceph/core
|
||||
/qa/workunits/rados @ceph/core
|
||||
/src/ceph.in @ceph/core
|
||||
/src/ceph_osd.cc @ceph/core
|
||||
/src/ceph_mon.cc @ceph/core
|
||||
/src/blk @ceph/core
|
||||
/src/crush @ceph/core
|
||||
/src/erasure-code @ceph/core
|
||||
/src/kv @ceph/core
|
||||
/src/librados @ceph/core
|
||||
/src/mgr @ceph/core
|
||||
/src/mon @ceph/core
|
||||
/src/msg @ceph/core
|
||||
/src/os @ceph/core
|
||||
/src/osd @ceph/core
|
||||
/src/tools/rados @ceph/core
|
||||
/src/test/osd @ceph/core
|
||||
|
||||
# rbd
|
||||
/doc/dev/rbd* @ceph/rbd
|
||||
/doc/man/8/ceph-rbdnamer.rst @ceph/rbd
|
||||
/doc/man/8/rbd* @ceph/rbd
|
||||
/doc/rbd @ceph/rbd
|
||||
/doc/start/quick-rbd.rst @ceph/rbd
|
||||
/qa/rbd @ceph/rbd
|
||||
/qa/run_xfstests* @ceph/rbd
|
||||
/qa/suites/krbd @ceph/rbd
|
||||
/qa/suites/rbd @ceph/rbd
|
||||
/qa/tasks/ceph_iscsi_client.py @ceph/rbd
|
||||
/qa/tasks/metadata.yaml @ceph/rbd
|
||||
/qa/tasks/qemu.py @ceph/rbd
|
||||
/qa/tasks/rbd* @ceph/rbd
|
||||
/qa/tasks/userdata* @ceph/rbd
|
||||
/qa/workunits/cls/test_cls_journal.sh @ceph/rbd
|
||||
/qa/workunits/cls/test_cls_lock.sh @ceph/rbd
|
||||
/qa/workunits/cls/test_cls_rbd.sh @ceph/rbd
|
||||
/qa/workunits/rbd @ceph/rbd
|
||||
/src/ceph-rbdnamer @ceph/rbd
|
||||
/src/cls/journal @ceph/rbd
|
||||
/src/cls/lock @ceph/rbd
|
||||
/src/cls/rbd @ceph/rbd
|
||||
/src/common/options/rbd* @ceph/rbd
|
||||
/src/etc-rbdmap @ceph/rbd
|
||||
/src/include/krbd.h @ceph/rbd
|
||||
/src/include/rbd* @ceph/rbd
|
||||
/src/journal @ceph/rbd
|
||||
/src/krbd.cc @ceph/rbd
|
||||
/src/librbd @ceph/rbd
|
||||
/src/ocf @ceph/rbd
|
||||
/src/pybind/mgr/rbd_support @ceph/rbd
|
||||
/src/pybind/rbd @ceph/rbd
|
||||
/src/rbd* @ceph/rbd
|
||||
/src/test/cli/rbd @ceph/rbd
|
||||
/src/test/cli-integration/rbd @ceph/rbd
|
||||
/src/test/cls_journal @ceph/rbd
|
||||
/src/test/cls_lock @ceph/rbd
|
||||
/src/test/cls_rbd @ceph/rbd
|
||||
/src/test/journal @ceph/rbd
|
||||
/src/test/librbd @ceph/rbd
|
||||
/src/test/pybind/test_rbd.py @ceph/rbd
|
||||
/src/test/rbd* @ceph/rbd
|
||||
/src/test/run-rbd* @ceph/rbd
|
||||
/src/test/test_rbd* @ceph/rbd
|
||||
/src/tools/rbd* @ceph/rbd
|
||||
/systemd/rbdmap.service.in @ceph/rbd
|
||||
/udev/50-rbd.rules @ceph/rbd
|
||||
|
10
ceph/.github/workflows/pr-triage.yml
vendored
@ -12,13 +12,13 @@ jobs:
|
||||
with:
|
||||
sync-labels: ''
|
||||
repo-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
- name: Assign to Dashboard project
|
||||
uses: srggrs/assign-one-project-github-action@65a8ddab497df42ef268001e67bbf976f8fd39e1
|
||||
if: contains(github.event.pull_request.labels.*.name, 'dashboard')
|
||||
with:
|
||||
project: https://github.com/ceph/ceph/projects/6
|
||||
- name: Assign milestone based on target brach name
|
||||
uses: iyu/actions-milestone@dbf7e5348844c9ddc6b803a5721b85fa70fe3bb9
|
||||
with:
|
||||
configuration-path: .github/milestone.yml
|
||||
repo-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
- name: Assign to Dashboard project
|
||||
uses: srggrs/assign-one-project-github-action@65a8ddab497df42ef268001e67bbf976f8fd39e1
|
||||
if: contains(github.event.pull_request.labels.*.name, 'dashboard')
|
||||
with:
|
||||
project: https://github.com/ceph/ceph/projects/6
|
||||
|
@ -5,9 +5,13 @@
|
||||
version: 2
|
||||
formats: []
|
||||
build:
|
||||
image: latest
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.8"
|
||||
apt_packages:
|
||||
- ditaa
|
||||
- graphviz
|
||||
python:
|
||||
version: 3
|
||||
install:
|
||||
- requirements: admin/doc-requirements.txt
|
||||
- requirements: admin/doc-read-the-docs.txt
|
||||
|
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10.2)
|
||||
# remove cmake/modules/FindPython* once 3.12 is required
|
||||
|
||||
project(ceph
|
||||
VERSION 16.2.10
|
||||
VERSION 16.2.11
|
||||
LANGUAGES CXX C ASM)
|
||||
|
||||
foreach(policy
|
||||
@ -36,7 +36,15 @@ if(WIN32)
|
||||
# the targeted Windows version. The availability of certain functions and
|
||||
# structures will depend on it.
|
||||
set(WIN32_WINNT "0x0A00" CACHE STRING "Targeted Windows version.")
|
||||
add_definitions(-D_WIN32_WINNT=${WIN32_WINNT})
|
||||
# In order to avoid known winpthread issues, we're using the boost
|
||||
# shared mutex implementation.
|
||||
# https://github.com/msys2/MINGW-packages/issues/3319
|
||||
add_definitions(
|
||||
-D_WIN32_WINNT=${WIN32_WINNT}
|
||||
-DBOOST_THREAD_PROVIDES_GENERIC_SHARED_MUTEX_ON_WIN
|
||||
-DBOOST_THREAD_V2_SHARED_MUTEX
|
||||
)
|
||||
set(Boost_THREADAPI "win32")
|
||||
endif()
|
||||
|
||||
if(MINGW)
|
||||
|
@ -32,9 +32,33 @@
|
||||
in certain recovery scenarios, e.g., monitor database lost and rebuilt, and
|
||||
the restored file system is expected to have the same ID as before.
|
||||
|
||||
>=16.2.11
|
||||
--------
|
||||
|
||||
* Cephfs: The 'AT_NO_ATTR_SYNC' macro is deprecated, please use the standard
|
||||
'AT_STATX_DONT_SYNC' macro. The 'AT_NO_ATTR_SYNC' macro will be removed in
|
||||
the future.
|
||||
* Trimming of PGLog dups is now controlled by the size instead of the version.
|
||||
This fixes the PGLog inflation issue that was happening when the on-line
|
||||
(in OSD) trimming got jammed after a PG split operation. Also, a new off-line
|
||||
mechanism has been added: `ceph-objectstore-tool` got `trim-pg-log-dups` op
|
||||
that targets situations where OSD is unable to boot due to those inflated dups.
|
||||
If that is the case, in OSD logs the "You can be hit by THE DUPS BUG" warning
|
||||
will be visible.
|
||||
Relevant tracker: https://tracker.ceph.com/issues/53729
|
||||
* RBD: `rbd device unmap` command gained `--namespace` option. Support for
|
||||
namespaces was added to RBD in Nautilus 14.2.0 and it has been possible to
|
||||
map and unmap images in namespaces using the `image-spec` syntax since then
|
||||
but the corresponding option available in most other commands was missing.
|
||||
|
||||
>=16.2.8
|
||||
--------
|
||||
|
||||
* RGW: The behavior for Multipart Upload was modified so that only
|
||||
CompleteMultipartUpload notification is sent at the end of the multipart upload.
|
||||
The POST notification at the beginning of the upload, and PUT notifications that
|
||||
were sent on each part are not sent anymore.
|
||||
|
||||
* MON/MGR: Pools can now be created with `--bulk` flag. Any pools created with `bulk`
|
||||
will use a profile of the `pg_autoscaler` that provides more performance from the start.
|
||||
However, any pools created without the `--bulk` flag will remain using it's old behavior
|
||||
|
@ -29,7 +29,11 @@
|
||||
%else
|
||||
%bcond_without tcmalloc
|
||||
%endif
|
||||
%if 0%{?rhel} >= 9
|
||||
%bcond_without system_pmdk
|
||||
%else
|
||||
%bcond_with system_pmdk
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%bcond_without selinux
|
||||
%ifarch x86_64 ppc64le
|
||||
@ -120,11 +124,18 @@
|
||||
# disable dwz which compresses the debuginfo
|
||||
%global _find_debuginfo_dwz_opts %{nil}
|
||||
|
||||
%if 0%{with seastar}
|
||||
# disable -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1, as gcc-toolset-{9,10}-annobin
|
||||
# do not provide gcc-annobin.so anymore, despite that they provide annobin.so. but
|
||||
# redhat-rpm-config still passes -fplugin=gcc-annobin to the compiler.
|
||||
%undefine _annotated_build
|
||||
%endif
|
||||
|
||||
#################################################################################
|
||||
# main package definition
|
||||
#################################################################################
|
||||
Name: ceph
|
||||
Version: 16.2.10
|
||||
Version: 16.2.11
|
||||
Release: 0%{?dist}
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
Epoch: 2
|
||||
@ -140,7 +151,7 @@ License: LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-
|
||||
Group: System/Filesystems
|
||||
%endif
|
||||
URL: http://ceph.com/
|
||||
Source0: %{?_remote_tarball_prefix}ceph-16.2.10.tar.bz2
|
||||
Source0: %{?_remote_tarball_prefix}ceph-16.2.11.tar.bz2
|
||||
%if 0%{?suse_version}
|
||||
# _insert_obs_source_lines_here
|
||||
ExclusiveArch: x86_64 aarch64 ppc64le s390x
|
||||
@ -229,7 +240,6 @@ BuildRequires: %{luarocks_package_name}
|
||||
BuildRequires: jq
|
||||
BuildRequires: libuuid-devel
|
||||
BuildRequires: python%{python3_pkgversion}-bcrypt
|
||||
BuildRequires: python%{python3_pkgversion}-nose
|
||||
BuildRequires: python%{python3_pkgversion}-pecan
|
||||
BuildRequires: python%{python3_pkgversion}-requests
|
||||
BuildRequires: python%{python3_pkgversion}-dateutil
|
||||
@ -304,6 +314,7 @@ BuildRequires: rdma-core-devel
|
||||
BuildRequires: liblz4-devel >= 1.7
|
||||
# for prometheus-alerts
|
||||
BuildRequires: golang-github-prometheus-prometheus
|
||||
BuildRequires: jsonnet
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
Requires: systemd
|
||||
@ -345,6 +356,7 @@ BuildRequires: python%{python3_pkgversion}-pyOpenSSL
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
BuildRequires: golang-github-prometheus-prometheus
|
||||
BuildRequires: jsonnet
|
||||
BuildRequires: libxmlsec1-1
|
||||
BuildRequires: libxmlsec1-nss1
|
||||
BuildRequires: libxmlsec1-openssl1
|
||||
@ -548,6 +560,7 @@ Group: System/Filesystems
|
||||
Requires: ceph-mgr = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: ceph-grafana-dashboards = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: ceph-prometheus-alerts = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: python%{python3_pkgversion}-setuptools
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
Requires: python%{python3_pkgversion}-cherrypy
|
||||
Requires: python%{python3_pkgversion}-jwt
|
||||
@ -597,6 +610,7 @@ Requires: python%{python3_pkgversion}-pecan
|
||||
Requires: python%{python3_pkgversion}-pyOpenSSL
|
||||
Requires: python%{python3_pkgversion}-requests
|
||||
Requires: python%{python3_pkgversion}-dateutil
|
||||
Requires: python%{python3_pkgversion}-setuptools
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8
|
||||
Requires: python%{python3_pkgversion}-cherrypy
|
||||
Requires: python%{python3_pkgversion}-pyyaml
|
||||
@ -1194,12 +1208,14 @@ This package provides Ceph default alerts for Prometheus.
|
||||
# common
|
||||
#################################################################################
|
||||
%prep
|
||||
%autosetup -p1 -n ceph-16.2.10
|
||||
%autosetup -p1 -n ceph-16.2.11
|
||||
|
||||
%build
|
||||
# LTO can be enabled as soon as the following GCC bug is fixed:
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48200
|
||||
# Disable lto on systems that do not support symver attribute
|
||||
# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48200 for details
|
||||
%if ( 0%{?rhel} && 0%{?rhel} < 9 ) || ( 0%{?suse_version} && 0%{?suse_version} <= 1500 )
|
||||
%define _lto_cflags %{nil}
|
||||
%endif
|
||||
|
||||
%if 0%{with seastar} && 0%{?rhel}
|
||||
. /opt/rh/gcc-toolset-9/enable
|
||||
@ -1433,6 +1449,9 @@ install -m 644 -D monitoring/ceph-mixin/prometheus_alerts.yml %{buildroot}/etc/p
|
||||
|
||||
%clean
|
||||
rm -rf %{buildroot}
|
||||
# built binaries are no longer necessary at this point,
|
||||
# but are consuming ~17GB of disk in the build environment
|
||||
rm -rf build
|
||||
|
||||
#################################################################################
|
||||
# files and systemd scriptlets
|
||||
@ -1528,8 +1547,7 @@ exit 0
|
||||
|
||||
%if ! 0%{?suse_version}
|
||||
%postun -n cephadm
|
||||
userdel -r cephadm || true
|
||||
exit 0
|
||||
[ $1 -ne 0 ] || userdel cephadm || :
|
||||
%endif
|
||||
|
||||
%files -n cephadm
|
||||
@ -1566,6 +1584,8 @@ exit 0
|
||||
%{_bindir}/rbd-replay-prep
|
||||
%endif
|
||||
%{_bindir}/ceph-post-file
|
||||
%dir %{_libdir}/ceph/denc
|
||||
%{_libdir}/ceph/denc/denc-mod-*.so
|
||||
%{_tmpfilesdir}/ceph-common.conf
|
||||
%{_mandir}/man8/ceph-authtool.8*
|
||||
%{_mandir}/man8/ceph-conf.8*
|
||||
|
@ -29,7 +29,11 @@
|
||||
%else
|
||||
%bcond_without tcmalloc
|
||||
%endif
|
||||
%if 0%{?rhel} >= 9
|
||||
%bcond_without system_pmdk
|
||||
%else
|
||||
%bcond_with system_pmdk
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
%bcond_without selinux
|
||||
%ifarch x86_64 ppc64le
|
||||
@ -120,6 +124,13 @@
|
||||
# disable dwz which compresses the debuginfo
|
||||
%global _find_debuginfo_dwz_opts %{nil}
|
||||
|
||||
%if 0%{with seastar}
|
||||
# disable -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1, as gcc-toolset-{9,10}-annobin
|
||||
# do not provide gcc-annobin.so anymore, despite that they provide annobin.so. but
|
||||
# redhat-rpm-config still passes -fplugin=gcc-annobin to the compiler.
|
||||
%undefine _annotated_build
|
||||
%endif
|
||||
|
||||
#################################################################################
|
||||
# main package definition
|
||||
#################################################################################
|
||||
@ -229,7 +240,6 @@ BuildRequires: %{luarocks_package_name}
|
||||
BuildRequires: jq
|
||||
BuildRequires: libuuid-devel
|
||||
BuildRequires: python%{python3_pkgversion}-bcrypt
|
||||
BuildRequires: python%{python3_pkgversion}-nose
|
||||
BuildRequires: python%{python3_pkgversion}-pecan
|
||||
BuildRequires: python%{python3_pkgversion}-requests
|
||||
BuildRequires: python%{python3_pkgversion}-dateutil
|
||||
@ -304,6 +314,7 @@ BuildRequires: rdma-core-devel
|
||||
BuildRequires: liblz4-devel >= 1.7
|
||||
# for prometheus-alerts
|
||||
BuildRequires: golang-github-prometheus-prometheus
|
||||
BuildRequires: jsonnet
|
||||
%endif
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
Requires: systemd
|
||||
@ -345,6 +356,7 @@ BuildRequires: python%{python3_pkgversion}-pyOpenSSL
|
||||
%endif
|
||||
%if 0%{?suse_version}
|
||||
BuildRequires: golang-github-prometheus-prometheus
|
||||
BuildRequires: jsonnet
|
||||
BuildRequires: libxmlsec1-1
|
||||
BuildRequires: libxmlsec1-nss1
|
||||
BuildRequires: libxmlsec1-openssl1
|
||||
@ -548,6 +560,7 @@ Group: System/Filesystems
|
||||
Requires: ceph-mgr = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: ceph-grafana-dashboards = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: ceph-prometheus-alerts = %{_epoch_prefix}%{version}-%{release}
|
||||
Requires: python%{python3_pkgversion}-setuptools
|
||||
%if 0%{?fedora} || 0%{?rhel}
|
||||
Requires: python%{python3_pkgversion}-cherrypy
|
||||
Requires: python%{python3_pkgversion}-jwt
|
||||
@ -597,6 +610,7 @@ Requires: python%{python3_pkgversion}-pecan
|
||||
Requires: python%{python3_pkgversion}-pyOpenSSL
|
||||
Requires: python%{python3_pkgversion}-requests
|
||||
Requires: python%{python3_pkgversion}-dateutil
|
||||
Requires: python%{python3_pkgversion}-setuptools
|
||||
%if 0%{?fedora} || 0%{?rhel} >= 8
|
||||
Requires: python%{python3_pkgversion}-cherrypy
|
||||
Requires: python%{python3_pkgversion}-pyyaml
|
||||
@ -1197,9 +1211,11 @@ This package provides Ceph default alerts for Prometheus.
|
||||
%autosetup -p1 -n @TARBALL_BASENAME@
|
||||
|
||||
%build
|
||||
# LTO can be enabled as soon as the following GCC bug is fixed:
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48200
|
||||
# Disable lto on systems that do not support symver attribute
|
||||
# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48200 for details
|
||||
%if ( 0%{?rhel} && 0%{?rhel} < 9 ) || ( 0%{?suse_version} && 0%{?suse_version} <= 1500 )
|
||||
%define _lto_cflags %{nil}
|
||||
%endif
|
||||
|
||||
%if 0%{with seastar} && 0%{?rhel}
|
||||
. /opt/rh/gcc-toolset-9/enable
|
||||
@ -1433,6 +1449,9 @@ install -m 644 -D monitoring/ceph-mixin/prometheus_alerts.yml %{buildroot}/etc/p
|
||||
|
||||
%clean
|
||||
rm -rf %{buildroot}
|
||||
# built binaries are no longer necessary at this point,
|
||||
# but are consuming ~17GB of disk in the build environment
|
||||
rm -rf build
|
||||
|
||||
#################################################################################
|
||||
# files and systemd scriptlets
|
||||
@ -1528,8 +1547,7 @@ exit 0
|
||||
|
||||
%if ! 0%{?suse_version}
|
||||
%postun -n cephadm
|
||||
userdel -r cephadm || true
|
||||
exit 0
|
||||
[ $1 -ne 0 ] || userdel cephadm || :
|
||||
%endif
|
||||
|
||||
%files -n cephadm
|
||||
@ -1566,6 +1584,8 @@ exit 0
|
||||
%{_bindir}/rbd-replay-prep
|
||||
%endif
|
||||
%{_bindir}/ceph-post-file
|
||||
%dir %{_libdir}/ceph/denc
|
||||
%{_libdir}/ceph/denc/denc-mod-*.so
|
||||
%{_tmpfilesdir}/ceph-common.conf
|
||||
%{_mandir}/man8/ceph-authtool.8*
|
||||
%{_mandir}/man8/ceph-conf.8*
|
||||
|
@ -1,7 +1,13 @@
|
||||
ceph (16.2.10-1focal) focal; urgency=medium
|
||||
ceph (16.2.11-1focal) focal; urgency=medium
|
||||
|
||||
|
||||
-- Jenkins Build Slave User <jenkins-build@adami06.front.sepia.ceph.com> Thu, 21 Jul 2022 17:38:01 +0000
|
||||
-- Jenkins Build Slave User <jenkins-build@braggi16.front.sepia.ceph.com> Tue, 24 Jan 2023 21:28:06 +0000
|
||||
|
||||
ceph (16.2.11-1) stable; urgency=medium
|
||||
|
||||
* New upstream release
|
||||
|
||||
-- Ceph Release Team <ceph-maintainers@ceph.io> Tue, 24 Jan 2023 20:43:11 +0000
|
||||
|
||||
ceph (16.2.10-1) stable; urgency=medium
|
||||
|
||||
|
@ -11,6 +11,9 @@
|
||||
# Boost_USE_MULTITHREADED : boolean (default: OFF)
|
||||
# BOOST_J: integer (defanult 1)
|
||||
|
||||
# CMAKE_CURRENT_FUNCTION_LIST_DIR is introduced by cmake 3.17, but ubuntu comes with 3.16
|
||||
set(_build_boost_list_dir "${CMAKE_CURRENT_LIST_DIR}")
|
||||
|
||||
function(check_boost_version source_dir expected_version)
|
||||
set(version_hpp "${source_dir}/boost/version.hpp")
|
||||
if(NOT EXISTS ${version_hpp})
|
||||
@ -70,7 +73,7 @@ function(do_build_boost version)
|
||||
if(c MATCHES "^python([0-9])\$")
|
||||
set(with_python_version "${CMAKE_MATCH_1}")
|
||||
list(APPEND boost_with_libs "python")
|
||||
elseif(c MATCHES "^python([0-9])\\.?([0-9])\$")
|
||||
elseif(c MATCHES "^python([0-9])\\.?([0-9]+)\$")
|
||||
set(with_python_version "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}")
|
||||
list(APPEND boost_with_libs "python")
|
||||
else()
|
||||
@ -167,10 +170,12 @@ function(do_build_boost version)
|
||||
URL_HASH SHA256=${boost_sha256}
|
||||
DOWNLOAD_NO_PROGRESS 1)
|
||||
endif()
|
||||
find_program(PATCH_EXECUTABLE patch)
|
||||
# build all components in a single shot
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(Boost
|
||||
${source_dir}
|
||||
PATCH_COMMAND ${PATCH_EXECUTABLE} -p3 -i ${_build_boost_list_dir}/boost-python-use-public-api-for-filename.patch
|
||||
CONFIGURE_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${configure_command}
|
||||
BUILD_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${build_command}
|
||||
BUILD_IN_SOURCE 1
|
||||
|
@ -9,14 +9,15 @@ function(build_fio)
|
||||
include(FindMake)
|
||||
find_make("MAKE_EXECUTABLE" "make_cmd")
|
||||
|
||||
set(source_dir ${CMAKE_BINARY_DIR}/src/fio)
|
||||
file(MAKE_DIRECTORY ${source_dir})
|
||||
ExternalProject_Add(fio_ext
|
||||
DOWNLOAD_DIR ${CMAKE_BINARY_DIR}/src/
|
||||
UPDATE_COMMAND "" # this disables rebuild on each run
|
||||
GIT_REPOSITORY "https://github.com/axboe/fio.git"
|
||||
GIT_REPOSITORY "https://github.com/ceph/fio.git"
|
||||
GIT_CONFIG advice.detachedHead=false
|
||||
GIT_SHALLOW 1
|
||||
GIT_TAG "fio-3.15"
|
||||
SOURCE_DIR ${CMAKE_BINARY_DIR}/src/fio
|
||||
GIT_TAG "fio-3.27-cxx"
|
||||
SOURCE_DIR ${source_dir}
|
||||
BUILD_IN_SOURCE 1
|
||||
CONFIGURE_COMMAND <SOURCE_DIR>/configure
|
||||
BUILD_COMMAND ${make_cmd} fio EXTFLAGS=-Wno-format-truncation ${FIO_EXTLIBS}
|
||||
@ -25,5 +26,6 @@ function(build_fio)
|
||||
add_library(fio INTERFACE IMPORTED)
|
||||
add_dependencies(fio fio_ext)
|
||||
set_target_properties(fio PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_BINARY_DIR}/src/fio)
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${source_dir}
|
||||
INTERFACE_COMPILE_OPTIONS "-include;${source_dir}/config-host.h;$<$<COMPILE_LANGUAGE:C>:-std=gnu99>$<$<COMPILE_LANGUAGE:CXX>:-std=gnu++17>")
|
||||
endfunction()
|
||||
|
@ -21,6 +21,7 @@ function(build_pmem)
|
||||
set(PMDK_LIB_DIR "nondebug")
|
||||
endif()
|
||||
|
||||
set(pmdk_cflags "-Wno-error -fno-lto")
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(pmdk_ext
|
||||
${source_dir_args}
|
||||
@ -29,7 +30,7 @@ function(build_pmem)
|
||||
# build system tests statically linking to librbd (which uses
|
||||
# libpmemobj) will not link (because we don't build the ndctl
|
||||
# static library here).
|
||||
BUILD_COMMAND ${make_cmd} CC=${CMAKE_C_COMPILER} NDCTL_ENABLE=n BUILD_EXAMPLES=n BUILD_BENCHMARKS=n DOC=n
|
||||
BUILD_COMMAND ${make_cmd} CC=${CMAKE_C_COMPILER} "EXTRA_CFLAGS=${pmdk_cflags}" NDCTL_ENABLE=n BUILD_EXAMPLES=n BUILD_BENCHMARKS=n DOC=n
|
||||
BUILD_IN_SOURCE 1
|
||||
BUILD_BYPRODUCTS "<SOURCE_DIR>/src/${PMDK_LIB_DIR}/libpmem.a" "<SOURCE_DIR>/src/${PMDK_LIB_DIR}/libpmemobj.a"
|
||||
INSTALL_COMMAND "")
|
||||
|
@ -144,6 +144,34 @@ else(NOT CMAKE_CROSSCOMPILING)
|
||||
message(STATUS "Assuming unaligned access is supported")
|
||||
endif(NOT CMAKE_CROSSCOMPILING)
|
||||
|
||||
set(version_script_source "v1 { }; v2 { } v1;")
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/version_script.txt "${version_script_source}")
|
||||
cmake_push_check_state(RESET)
|
||||
set(CMAKE_REQUIRED_FLAGS "-Werror -Wl,--version-script=${CMAKE_CURRENT_BINARY_DIR}/version_script.txt")
|
||||
check_c_source_compiles("
|
||||
__attribute__((__symver__ (\"func@v1\"))) void func_v1() {};
|
||||
__attribute__((__symver__ (\"func@v2\"))) void func_v2() {};
|
||||
|
||||
int main() {}"
|
||||
HAVE_ATTR_SYMVER)
|
||||
if(NOT HAVE_ATTR_SYMVER)
|
||||
if(CMAKE_CXX_FLAGS MATCHES "-flto" AND NOT CMAKE_CXX_FLAGS MATCHES "-flto-partition=none")
|
||||
# https://tracker.ceph.com/issues/40060
|
||||
message(FATAL_ERROR "please pass -flto-partition=none as part of CXXFLAGS")
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_FLAGS -Wl,--version-script=${CMAKE_CURRENT_BINARY_DIR}/version_script.txt)
|
||||
check_c_source_compiles("
|
||||
void func_v1() {}
|
||||
__asm__(\".symver func_v1, func@v1\");
|
||||
void func_v2() {}
|
||||
__asm__(\".symver func_v2, func@v2\");
|
||||
|
||||
int main() {}"
|
||||
HAVE_ASM_SYMVER)
|
||||
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/version_script.txt)
|
||||
cmake_pop_check_state()
|
||||
|
||||
# should use LINK_OPTIONS instead of LINK_LIBRARIES, if we can use cmake v3.14+
|
||||
try_compile(HAVE_LINK_VERSION_SCRIPT
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
|
@ -65,14 +65,13 @@ function(distutils_add_cython_module target name src)
|
||||
# This little bit of magic wipes out __Pyx_check_single_interpreter()
|
||||
# Note: this is reproduced in distutils_install_cython_module
|
||||
list(APPEND cflags -D'void0=dead_function\(void\)')
|
||||
list(APPEND cflags -D'__Pyx_check_single_interpreter\(ARG\)=ARG \#\# 0')
|
||||
list(APPEND cflags -D'__Pyx_check_single_interpreter\(ARG\)=ARG\#\#0')
|
||||
set(PY_CC ${compiler_launcher} ${CMAKE_C_COMPILER} ${c_compiler_arg1} ${cflags})
|
||||
set(PY_CXX ${compiler_launcher} ${CMAKE_CXX_COMPILER} ${cxx_compiler_arg1})
|
||||
set(PY_LDSHARED ${link_launcher} ${CMAKE_C_COMPILER} ${c_compiler_arg1} "-shared")
|
||||
|
||||
set(suffix_var "EXT_SUFFIX")
|
||||
execute_process(COMMAND "${Python3_EXECUTABLE}" -c
|
||||
"from distutils import sysconfig; print(sysconfig.get_config_var('${suffix_var}'))"
|
||||
"import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))"
|
||||
RESULT_VARIABLE result
|
||||
OUTPUT_VARIABLE ext_suffix
|
||||
ERROR_VARIABLE error
|
||||
@ -113,7 +112,7 @@ function(distutils_install_cython_module name)
|
||||
set(ENV{LDSHARED} \"${PY_LDSHARED}\")
|
||||
set(ENV{CPPFLAGS} \"-iquote${CMAKE_SOURCE_DIR}/src/include
|
||||
-D'void0=dead_function\(void\)' \
|
||||
-D'__Pyx_check_single_interpreter\(ARG\)=ARG \#\# 0'\")
|
||||
-D'__Pyx_check_single_interpreter\(ARG\)=ARG\#\#0'\")
|
||||
set(ENV{LDFLAGS} \"-L${CMAKE_LIBRARY_OUTPUT_DIRECTORY}\")
|
||||
set(ENV{CYTHON_BUILD_DIR} \"${CMAKE_CURRENT_BINARY_DIR}\")
|
||||
set(ENV{CEPH_LIBDIR} \"${CMAKE_LIBRARY_OUTPUT_DIRECTORY}\")
|
||||
|
@ -0,0 +1,38 @@
|
||||
From d9f06052e28873037db7f98629bce72182a42410 Mon Sep 17 00:00:00 2001
|
||||
From: Pat Riehecky <riehecky@fnal.gov>
|
||||
Date: Mon, 29 Jun 2020 10:51:58 -0500
|
||||
Subject: [PATCH] Convert Python 3.1+ to use public C API for filenames
|
||||
---
|
||||
src/exec.cpp | 16 ++++++++++++----
|
||||
1 file changed, 12 insertions(+), 4 deletions(-)
|
||||
diff --git a/src/exec.cpp b/src/exec.cpp
|
||||
index 171c6f4189..b2eabe59f6 100644
|
||||
--- a/src/boost/libs/python/src/exec.cpp
|
||||
+++ b/src/boost/libs/python/src/exec.cpp
|
||||
@@ -104,14 +104,22 @@ object BOOST_PYTHON_DECL exec_file(char const *filename, object global, object l
|
||||
if (local.is_none()) local = global;
|
||||
// should be 'char const *' but older python versions don't use 'const' yet.
|
||||
char *f = const_cast<char *>(filename);
|
||||
- // Let python open the file to avoid potential binary incompatibilities.
|
||||
-#if PY_VERSION_HEX >= 0x03040000
|
||||
- FILE *fs = _Py_fopen(f, "r");
|
||||
+#if PY_VERSION_HEX >= 0x03010000
|
||||
+ // Let python manage any UTF bits to avoid potential incompatibilities.
|
||||
+ PyObject *fo = Py_BuildValue("s", f);
|
||||
+ PyObject *fb = Py_None;
|
||||
+ PyUnicode_FSConverter(fo, &fb);
|
||||
+ f = PyBytes_AsString(fb);
|
||||
+ FILE *fs = fopen(f, "r");
|
||||
+ Py_DECREF(fo);
|
||||
+ Py_DECREF(fb);
|
||||
#elif PY_VERSION_HEX >= 0x03000000
|
||||
+ // Let python open the file to avoid potential binary incompatibilities.
|
||||
PyObject *fo = Py_BuildValue("s", f);
|
||||
- FILE *fs = _Py_fopen(fo, "r");
|
||||
+ FILE *fs = _Py_fopen(fo, "r"); // Private CPython API
|
||||
Py_DECREF(fo);
|
||||
#else
|
||||
+ // Let python open the file to avoid potential binary incompatibilities.
|
||||
PyObject *pyfile = PyFile_FromString(f, const_cast<char*>("r"));
|
||||
if (!pyfile) throw std::invalid_argument(std::string(f) + " : no such file");
|
||||
python::handle<> file(pyfile);
|
@ -23,6 +23,7 @@ usr/bin/rbd-replay*
|
||||
usr/bin/ceph-post-file
|
||||
usr/sbin/mount.ceph sbin
|
||||
usr/lib/ceph/compressor/*
|
||||
usr/lib/ceph/denc/*
|
||||
usr/lib/ceph/crypto/* [amd64]
|
||||
usr/share/man/man8/ceph-authtool.8
|
||||
usr/share/man/man8/ceph-conf.8
|
||||
|
@ -24,6 +24,7 @@ Build-Depends: automake,
|
||||
g++ (>= 7),
|
||||
javahelper,
|
||||
jq <pkg.ceph.check>,
|
||||
jsonnet <pkg.ceph.check>,
|
||||
junit4,
|
||||
libaio-dev,
|
||||
libbabeltrace-ctf-dev,
|
||||
@ -37,7 +38,7 @@ Build-Depends: automake,
|
||||
libcurl4-openssl-dev,
|
||||
# Jaeger libevent-dev,
|
||||
libexpat1-dev,
|
||||
# Make-Check libffi-dev [!amd64],
|
||||
libffi-dev [!amd64] <pkg.ceph.check>,
|
||||
libfuse-dev,
|
||||
libgoogle-perftools-dev [i386 amd64 arm64],
|
||||
# Crimson libgnutls28-dev,
|
||||
@ -68,44 +69,44 @@ Build-Depends: automake,
|
||||
librabbitmq-dev,
|
||||
librdkafka-dev,
|
||||
luarocks,
|
||||
# Make-Check libxmlsec1,
|
||||
# Make-Check libxmlsec1-nss,
|
||||
# Make-Check libxmlsec1-openssl,
|
||||
# Make-Check libxmlsec1-dev,
|
||||
libxmlsec1 <pkg.ceph.check>,
|
||||
libxmlsec1-nss <pkg.ceph.check>,
|
||||
libxmlsec1-openssl <pkg.ceph.check>,
|
||||
libxmlsec1-dev <pkg.ceph.check>,
|
||||
# Crimson libyaml-cpp-dev,
|
||||
# Jaeger nlohmann-json-dev | nlohmann-json3-dev,
|
||||
parted,
|
||||
patch,
|
||||
pkg-config,
|
||||
# Make-Check prometheus,
|
||||
prometheus <pkg.ceph.check>,
|
||||
# Crimson protobuf-compiler,
|
||||
python3-all-dev,
|
||||
python3-cherrypy3,
|
||||
# Make-Check python3-jwt,
|
||||
# Make-Check python3-nose,
|
||||
# Make-Check python3-pecan,
|
||||
# Make-Check python3-bcrypt,
|
||||
# Make-Check tox,
|
||||
# Make-Check python3-coverage,
|
||||
# Make-Check python3-dateutil,
|
||||
# Make-Check python3-openssl,
|
||||
# Make-Check python3-prettytable,
|
||||
# Make-Check python3-requests,
|
||||
# Make-Check python3-scipy,
|
||||
python3-jwt <pkg.ceph.check>,
|
||||
python3-pecan <pkg.ceph.check>,
|
||||
python3-bcrypt <pkg.ceph.check>,
|
||||
tox <pkg.ceph.check>,
|
||||
python3-coverage <pkg.ceph.check>,
|
||||
python3-dateutil <pkg.ceph.check>,
|
||||
python3-pkg-resources <pkg.ceph.check>,
|
||||
python3-openssl <pkg.ceph.check>,
|
||||
python3-prettytable <pkg.ceph.check>,
|
||||
python3-requests <pkg.ceph.check>,
|
||||
python3-scipy <pkg.ceph.check>,
|
||||
python3-setuptools,
|
||||
python3-sphinx,
|
||||
# Make-Check python3-werkzeug,
|
||||
python3-werkzeug <pkg.ceph.check>,
|
||||
python3-setuptools,
|
||||
python3-venv,
|
||||
# Crimson ragel,
|
||||
# Make-Check socat,
|
||||
socat <pkg.ceph.check>,
|
||||
# Crimson systemtap-sdt-dev,
|
||||
# Make-Check uuid-dev,
|
||||
uuid-dev <pkg.ceph.check>,
|
||||
uuid-runtime,
|
||||
valgrind,
|
||||
xfslibs-dev,
|
||||
# Make-Check xfsprogs,
|
||||
# Make-Check xmlstarlet,
|
||||
xfsprogs <pkg.ceph.check>,
|
||||
xmlstarlet <pkg.ceph.check>,
|
||||
nasm [amd64],
|
||||
zlib1g-dev,
|
||||
# Jaeger Built-Using: libyaml-cpp-dev (>= 0.6),
|
||||
@ -234,6 +235,8 @@ Depends: ceph-base (= ${binary:Version}),
|
||||
python3-pecan,
|
||||
python3-requests,
|
||||
python3-werkzeug,
|
||||
libsqlite3-mod-ceph (= ${binary:Version}),
|
||||
librados2 (= ${binary:Version}),
|
||||
${misc:Depends},
|
||||
${python:Depends},
|
||||
${shlibs:Depends},
|
||||
@ -258,6 +261,7 @@ Depends: ceph-mgr (= ${binary:Version}),
|
||||
python3-cherrypy3,
|
||||
python3-jwt,
|
||||
python3-bcrypt,
|
||||
python3-pkg-resources,
|
||||
python3-werkzeug,
|
||||
python3-routes,
|
||||
${misc:Depends},
|
||||
@ -735,7 +739,8 @@ Description: RADOS distributed object store client C++ library (development file
|
||||
Package: libsqlite3-mod-ceph
|
||||
Architecture: any
|
||||
Section: libs
|
||||
Depends: ${misc:Depends},
|
||||
Depends: librados2 (= ${binary:Version}),
|
||||
${misc:Depends},
|
||||
${shlibs:Depends},
|
||||
Description: SQLite3 VFS for Ceph
|
||||
A SQLite3 VFS for storing and manipulating databases stored on Ceph's RADOS
|
||||
@ -1249,3 +1254,4 @@ Description: prometheus alerts for the ceph dashboard
|
||||
.
|
||||
This package contains alerts used for prometheus to interact with the
|
||||
Ceph Dashboard.
|
||||
|
||||
|
6
ceph/debian/python3-ceph-argparse.install
Normal file → Executable file
@ -1,2 +1,4 @@
|
||||
usr/lib/python3*/dist-packages/ceph_argparse.py
|
||||
usr/lib/python3*/dist-packages/ceph_daemon.py
|
||||
#! /usr/bin/dh-exec
|
||||
|
||||
usr/lib/python3*/*-packages/ceph_argparse.py /usr/lib/python3/dist-packages/
|
||||
usr/lib/python3*/*-packages/ceph_daemon.py /usr/lib/python3/dist-packages/
|
||||
|
4
ceph/debian/python3-cephfs.install
Normal file → Executable file
@ -1,3 +1,5 @@
|
||||
usr/lib/python3*/dist-packages/ceph_volume_client.py
|
||||
#! /usr/bin/dh-exec
|
||||
|
||||
usr/lib/python3*/*-packages/ceph_volume_client.py /usr/lib/python3/dist-packages/
|
||||
usr/lib/python3*/dist-packages/cephfs-*.egg-info
|
||||
usr/lib/python3*/dist-packages/cephfs.cpython*.so
|
||||
|
@ -36,6 +36,13 @@ if [ -r /etc/os-release ]; then
|
||||
ARGS+=" -DWITH_RADOSGW_AMQP_ENDPOINT=OFF"
|
||||
ARGS+=" -DWITH_RADOSGW_KAFKA_ENDPOINT=OFF"
|
||||
;;
|
||||
ubuntu)
|
||||
MAJOR_VER=$(echo "$VERSION_ID" | sed -e 's/\..*$//')
|
||||
if [ "$MAJOR_VER" -ge "22" ] ; then
|
||||
PYBUILD="3.10"
|
||||
fi
|
||||
;;
|
||||
|
||||
esac
|
||||
elif [ "$(uname)" == FreeBSD ] ; then
|
||||
PYBUILD="3"
|
||||
|
20
ceph/doc/_static/css/custom.css
vendored
@ -1,3 +1,23 @@
|
||||
dt {
|
||||
scroll-margin-top: 3em;
|
||||
}
|
||||
|
||||
h2 {
|
||||
scroll-margin-top: 4em;
|
||||
}
|
||||
|
||||
h3 {
|
||||
scroll-margin-top: 4em;
|
||||
}
|
||||
|
||||
section {
|
||||
scroll-margin-top: 4em;
|
||||
}
|
||||
|
||||
span {
|
||||
scroll-margin-top: 2em;
|
||||
}
|
||||
|
||||
ul.simple > li > ul > li:last-child {
|
||||
margin-block-end : 1em;
|
||||
}
|
||||
|
@ -13,6 +13,7 @@ replicate and redistribute data dynamically.
|
||||
|
||||
.. image:: images/stack.png
|
||||
|
||||
.. _arch-ceph-storage-cluster:
|
||||
|
||||
The Ceph Storage Cluster
|
||||
========================
|
||||
@ -59,7 +60,7 @@ service interfaces built on top of ``librados``.
|
||||
Storing Data
|
||||
------------
|
||||
|
||||
The Ceph Storage Cluster receives data from :term:`Ceph Clients`--whether it
|
||||
The Ceph Storage Cluster receives data from :term:`Ceph Client`\s--whether it
|
||||
comes through a :term:`Ceph Block Device`, :term:`Ceph Object Storage`, the
|
||||
:term:`Ceph File System` or a custom implementation you create using
|
||||
``librados``-- which is stored as RADOS objects. Each object is stored on an
|
||||
@ -80,7 +81,7 @@ stored in a monolithic database-like fashion.
|
||||
Ceph OSD Daemons store data as objects in a flat namespace (e.g., no
|
||||
hierarchy of directories). An object has an identifier, binary data, and
|
||||
metadata consisting of a set of name/value pairs. The semantics are completely
|
||||
up to :term:`Ceph Clients`. For example, CephFS uses metadata to store file
|
||||
up to :term:`Ceph Client`\s. For example, CephFS uses metadata to store file
|
||||
attributes such as the file owner, created date, last modified date, and so
|
||||
forth.
|
||||
|
||||
@ -135,6 +136,8 @@ Placement of Replicated Data`_.
|
||||
|
||||
.. index:: architecture; cluster map
|
||||
|
||||
.. _architecture_cluster_map:
|
||||
|
||||
Cluster Map
|
||||
~~~~~~~~~~~
|
||||
|
||||
@ -581,7 +584,7 @@ objects.
|
||||
Peering and Sets
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
In previous sections, we noted that Ceph OSD Daemons check each others
|
||||
In previous sections, we noted that Ceph OSD Daemons check each other's
|
||||
heartbeats and report back to the Ceph Monitor. Another thing Ceph OSD daemons
|
||||
do is called 'peering', which is the process of bringing all of the OSDs that
|
||||
store a Placement Group (PG) into agreement about the state of all of the
|
||||
@ -1619,13 +1622,13 @@ instance for high availability.
|
||||
|
||||
|
||||
|
||||
.. _RADOS - A Scalable, Reliable Storage Service for Petabyte-scale Storage Clusters: https://ceph.com/wp-content/uploads/2016/08/weil-rados-pdsw07.pdf
|
||||
.. _RADOS - A Scalable, Reliable Storage Service for Petabyte-scale Storage Clusters: https://ceph.io/assets/pdfs/weil-rados-pdsw07.pdf
|
||||
.. _Paxos: https://en.wikipedia.org/wiki/Paxos_(computer_science)
|
||||
.. _Monitor Config Reference: ../rados/configuration/mon-config-ref
|
||||
.. _Monitoring OSDs and PGs: ../rados/operations/monitoring-osd-pg
|
||||
.. _Heartbeats: ../rados/configuration/mon-osd-interaction
|
||||
.. _Monitoring OSDs: ../rados/operations/monitoring-osd-pg/#monitoring-osds
|
||||
.. _CRUSH - Controlled, Scalable, Decentralized Placement of Replicated Data: https://ceph.com/wp-content/uploads/2016/08/weil-crush-sc06.pdf
|
||||
.. _CRUSH - Controlled, Scalable, Decentralized Placement of Replicated Data: https://ceph.io/assets/pdfs/weil-crush-sc06.pdf
|
||||
.. _Data Scrubbing: ../rados/configuration/osd-config-ref#scrubbing
|
||||
.. _Report Peering Failure: ../rados/configuration/mon-osd-interaction#osds-report-peering-failure
|
||||
.. _Troubleshooting Peering Failure: ../rados/troubleshooting/troubleshooting-pg#placement-group-down-peering-failure
|
||||
|
@ -2,25 +2,22 @@
|
||||
|
||||
``prepare``
|
||||
===========
|
||||
This subcommand allows a :term:`filestore` or :term:`bluestore` setup. It is
|
||||
recommended to pre-provision a logical volume before using it with
|
||||
``ceph-volume lvm``.
|
||||
Before you run ``ceph-volume lvm prepare``, we recommend that you provision a
|
||||
logical volume. Then you can run ``prepare`` on that logical volume.
|
||||
|
||||
Logical volumes are not altered except for adding extra metadata.
|
||||
``prepare`` adds metadata to logical volumes but does not alter them in any
|
||||
other way.
|
||||
|
||||
.. note:: This is part of a two step process to deploy an OSD. If looking for
|
||||
a single-call way, please see :ref:`ceph-volume-lvm-create`
|
||||
.. note:: This is part of a two-step process to deploy an OSD. If you prefer
|
||||
to deploy an OSD by using only one command, see :ref:`ceph-volume-lvm-create`.
|
||||
|
||||
To help identify volumes, the process of preparing a volume (or volumes) to
|
||||
work with Ceph, the tool will assign a few pieces of metadata information using
|
||||
:term:`LVM tags`.
|
||||
|
||||
:term:`LVM tags` makes volumes easy to discover later, and help identify them as
|
||||
part of a Ceph system, and what role they have (journal, filestore, bluestore,
|
||||
etc...)
|
||||
|
||||
Although :term:`bluestore` is the default, the back end can be specified with:
|
||||
``prepare`` uses :term:`LVM tags` to assign several pieces of metadata to a
|
||||
logical volume. Volumes tagged in this way are easier to identify and easier to
|
||||
use with Ceph. :term:`LVM tags` identify logical volumes by the role that they
|
||||
play in the Ceph cluster (for example: BlueStore data or BlueStore WAL+DB).
|
||||
|
||||
:term:`BlueStore<bluestore>` is the default backend. Ceph permits changing
|
||||
the backend, which can be done by using the following flags and arguments:
|
||||
|
||||
* :ref:`--filestore <ceph-volume-lvm-prepare_filestore>`
|
||||
* :ref:`--bluestore <ceph-volume-lvm-prepare_bluestore>`
|
||||
@ -29,50 +26,58 @@ Although :term:`bluestore` is the default, the back end can be specified with:
|
||||
|
||||
``bluestore``
|
||||
-------------
|
||||
The :term:`bluestore` objectstore is the default for new OSDs. It offers a bit
|
||||
more flexibility for devices compared to :term:`filestore`.
|
||||
Bluestore supports the following configurations:
|
||||
:term:`Bluestore<bluestore>` is the default backend for new OSDs. It
|
||||
offers more flexibility for devices than :term:`filestore` does. Bluestore
|
||||
supports the following configurations:
|
||||
|
||||
* A block device, a block.wal, and a block.db device
|
||||
* A block device and a block.wal device
|
||||
* A block device and a block.db device
|
||||
* A single block device
|
||||
* a block device, a block.wal device, and a block.db device
|
||||
* a block device and a block.wal device
|
||||
* a block device and a block.db device
|
||||
* a single block device
|
||||
|
||||
The bluestore subcommand accepts physical block devices, partitions on
|
||||
physical block devices or logical volumes as arguments for the various device parameters
|
||||
If a physical device is provided, a logical volume will be created. A volume group will
|
||||
either be created or reused it its name begins with ``ceph``.
|
||||
This allows a simpler approach at using LVM but at the cost of flexibility:
|
||||
there are no options or configurations to change how the LV is created.
|
||||
The ``bluestore`` subcommand accepts physical block devices, partitions on physical
|
||||
block devices, or logical volumes as arguments for the various device
|
||||
parameters. If a physical block device is provided, a logical volume will be
|
||||
created. If the provided volume group's name begins with `ceph`, it will be
|
||||
created if it does not yet exist and it will be clobbered and reused if it
|
||||
already exists. This allows for a simpler approach to using LVM but at the
|
||||
cost of flexibility: no option or configuration can be used to change how the
|
||||
logical volume is created.
|
||||
|
||||
The ``block`` is specified with the ``--data`` flag, and in its simplest use
|
||||
case it looks like::
|
||||
case it looks like:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph-volume lvm prepare --bluestore --data vg/lv
|
||||
|
||||
A raw device can be specified in the same way::
|
||||
A raw device can be specified in the same way:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph-volume lvm prepare --bluestore --data /path/to/device
|
||||
|
||||
For enabling :ref:`encryption <ceph-volume-lvm-encryption>`, the ``--dmcrypt`` flag is required::
|
||||
For enabling :ref:`encryption <ceph-volume-lvm-encryption>`, the ``--dmcrypt`` flag is required:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph-volume lvm prepare --bluestore --dmcrypt --data vg/lv
|
||||
|
||||
If a ``block.db`` or a ``block.wal`` is needed (they are optional for
|
||||
bluestore) they can be specified with ``--block.db`` and ``--block.wal``
|
||||
accordingly. These can be a physical device, a partition or
|
||||
a logical volume.
|
||||
If a ``block.db`` device or a ``block.wal`` device is needed, it can be
|
||||
specified with ``--block.db`` or ``--block.wal``. These can be physical
|
||||
devices, partitions, or logical volumes. ``block.db`` and ``block.wal`` are
|
||||
optional for bluestore.
|
||||
|
||||
For both ``block.db`` and ``block.wal`` partitions aren't made logical volumes
|
||||
because they can be used as-is.
|
||||
For both ``block.db`` and ``block.wal``, partitions can be used as-is, and
|
||||
therefore are not made into logical volumes.
|
||||
|
||||
While creating the OSD directory, the process will use a ``tmpfs`` mount to
|
||||
place all the files needed for the OSD. These files are initially created by
|
||||
``ceph-osd --mkfs`` and are fully ephemeral.
|
||||
While creating the OSD directory, the process uses a ``tmpfs`` mount to hold
|
||||
the files needed for the OSD. These files are created by ``ceph-osd --mkfs``
|
||||
and are ephemeral.
|
||||
|
||||
A symlink is always created for the ``block`` device, and optionally for
|
||||
``block.db`` and ``block.wal``. For a cluster with a default name, and an OSD
|
||||
id of 0, the directory could look like::
|
||||
A symlink is created for the ``block`` device, and is optional for ``block.db``
|
||||
and ``block.wal``. For a cluster with a default name and an OSD ID of 0, the
|
||||
directory looks like this::
|
||||
|
||||
# ls -l /var/lib/ceph/osd/ceph-0
|
||||
lrwxrwxrwx. 1 ceph ceph 93 Oct 20 13:05 block -> /dev/ceph-be2b6fbd-bcf2-4c51-b35d-a35a162a02f0/osd-block-25cf0a05-2bc6-44ef-9137-79d65bd7ad62
|
||||
@ -85,11 +90,11 @@ id of 0, the directory could look like::
|
||||
-rw-------. 1 ceph ceph 10 Oct 20 13:05 type
|
||||
-rw-------. 1 ceph ceph 2 Oct 20 13:05 whoami
|
||||
|
||||
In the above case, a device was used for ``block`` so ``ceph-volume`` create
|
||||
a volume group and a logical volume using the following convention:
|
||||
In the above case, a device was used for ``block``, so ``ceph-volume`` created
|
||||
a volume group and a logical volume using the following conventions:
|
||||
|
||||
* volume group name: ``ceph-{cluster fsid}`` or if the vg exists already
|
||||
``ceph-{random uuid}``
|
||||
* volume group name: ``ceph-{cluster fsid}`` (or if the volume group already
|
||||
exists: ``ceph-{random uuid}``)
|
||||
|
||||
* logical volume name: ``osd-block-{osd_fsid}``
|
||||
|
||||
@ -98,78 +103,100 @@ a volume group and a logical volume using the following convention:
|
||||
|
||||
``filestore``
|
||||
-------------
|
||||
This is the OSD backend that allows preparation of logical volumes for
|
||||
a :term:`filestore` objectstore OSD.
|
||||
``Filestore<filestore>`` is the OSD backend that prepares logical volumes for a
|
||||
:term:`filestore`-backed object-store OSD.
|
||||
|
||||
It can use a logical volume for the OSD data and a physical device, a partition
|
||||
or logical volume for the journal. A physical device will have a logical volume
|
||||
created on it. A volume group will either be created or reused it its name begins
|
||||
with ``ceph``. No special preparation is needed for these volumes other than
|
||||
following the minimum size requirements for data and journal.
|
||||
|
||||
The CLI call looks like this of a basic standalone filestore OSD::
|
||||
``Filestore<filestore>`` uses a logical volume to store OSD data and it uses
|
||||
physical devices, partitions, or logical volumes to store the journal. If a
|
||||
physical device is used to create a filestore backend, a logical volume will be
|
||||
created on that physical device. If the provided volume group's name begins
|
||||
with `ceph`, it will be created if it does not yet exist and it will be
|
||||
clobbered and reused if it already exists. No special preparation is needed for
|
||||
these volumes, but be sure to meet the minimum size requirements for OSD data and
|
||||
for the journal.
|
||||
|
||||
ceph-volume lvm prepare --filestore --data <data block device>
|
||||
Use the following command to create a basic filestore OSD:
|
||||
|
||||
To deploy file store with an external journal::
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph-volume lvm prepare --filestore --data <data block device> --journal <journal block device>
|
||||
ceph-volume lvm prepare --filestore --data <data block device>
|
||||
|
||||
For enabling :ref:`encryption <ceph-volume-lvm-encryption>`, the ``--dmcrypt`` flag is required::
|
||||
Use this command to deploy filestore with an external journal:
|
||||
|
||||
ceph-volume lvm prepare --filestore --dmcrypt --data <data block device> --journal <journal block device>
|
||||
.. prompt:: bash #
|
||||
|
||||
Both the journal and data block device can take three forms:
|
||||
ceph-volume lvm prepare --filestore --data <data block device> --journal <journal block device>
|
||||
|
||||
Use this command to enable :ref:`encryption <ceph-volume-lvm-encryption>`, and note that the ``--dmcrypt`` flag is required:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph-volume lvm prepare --filestore --dmcrypt --data <data block device> --journal <journal block device>
|
||||
|
||||
The data block device and the journal can each take one of three forms:
|
||||
|
||||
* a physical block device
|
||||
* a partition on a physical block device
|
||||
* a logical volume
|
||||
|
||||
When using logical volumes the value *must* be of the format
|
||||
``volume_group/logical_volume``. Since logical volume names
|
||||
are not enforced for uniqueness, this prevents accidentally
|
||||
choosing the wrong volume.
|
||||
If you use a logical volume to deploy filestore, the value that you pass in the
|
||||
command *must* be of the format ``volume_group/logical_volume_name``. Since logical
|
||||
volume names are not enforced for uniqueness, using this format is an important
|
||||
safeguard against accidentally choosing the wrong volume (and clobbering its data).
|
||||
|
||||
When using a partition, it *must* contain a ``PARTUUID``, that can be
|
||||
discovered by ``blkid``. THis ensure it can later be identified correctly
|
||||
regardless of the device name (or path).
|
||||
If you use a partition to deploy filestore, the partition *must* contain a
|
||||
``PARTUUID`` that can be discovered by ``blkid``. This ensures that the
|
||||
partition can be identified correctly regardless of the device's name (or path).
|
||||
|
||||
For example: passing a logical volume for data and a partition ``/dev/sdc1`` for
|
||||
the journal::
|
||||
For example, to use a logical volume for OSD data and a partition
|
||||
(``/dev/sdc1``) for the journal, run a command of this form:
|
||||
|
||||
ceph-volume lvm prepare --filestore --data volume_group/lv_name --journal /dev/sdc1
|
||||
.. prompt:: bash #
|
||||
|
||||
Passing a bare device for data and a logical volume ias the journal::
|
||||
ceph-volume lvm prepare --filestore --data volume_group/logical_volume_name --journal /dev/sdc1
|
||||
|
||||
ceph-volume lvm prepare --filestore --data /dev/sdc --journal volume_group/journal_lv
|
||||
Or, to use a bare device for data and a logical volume for the journal:
|
||||
|
||||
A generated uuid is used to ask the cluster for a new OSD. These two pieces are
|
||||
crucial for identifying an OSD and will later be used throughout the
|
||||
:ref:`ceph-volume-lvm-activate` process.
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph-volume lvm prepare --filestore --data /dev/sdc --journal volume_group/journal_lv
|
||||
|
||||
A generated UUID is used when asking the cluster for a new OSD. These two
|
||||
pieces of information (the OSD ID and the OSD UUID) are necessary for
|
||||
identifying a given OSD and will later be used throughout the
|
||||
:ref:`activation<ceph-volume-lvm-activate>` process.
|
||||
|
||||
The OSD data directory is created using the following convention::
|
||||
|
||||
/var/lib/ceph/osd/<cluster name>-<osd id>
|
||||
|
||||
At this point the data volume is mounted at this location, and the journal
|
||||
volume is linked::
|
||||
To link the journal volume to the mounted data volume, use this command:
|
||||
|
||||
ln -s /path/to/journal /var/lib/ceph/osd/<cluster_name>-<osd-id>/journal
|
||||
.. prompt:: bash #
|
||||
|
||||
The monmap is fetched using the bootstrap key from the OSD::
|
||||
ln -s /path/to/journal /var/lib/ceph/osd/<cluster_name>-<osd-id>/journal
|
||||
|
||||
/usr/bin/ceph --cluster ceph --name client.bootstrap-osd
|
||||
--keyring /var/lib/ceph/bootstrap-osd/ceph.keyring
|
||||
mon getmap -o /var/lib/ceph/osd/<cluster name>-<osd id>/activate.monmap
|
||||
To fetch the monmap by using the bootstrap key from the OSD, use this command:
|
||||
|
||||
``ceph-osd`` will be called to populate the OSD directory, that is already
|
||||
mounted, re-using all the pieces of information from the initial steps::
|
||||
.. prompt:: bash #
|
||||
|
||||
/usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring
|
||||
/var/lib/ceph/bootstrap-osd/ceph.keyring mon getmap -o
|
||||
/var/lib/ceph/osd/<cluster name>-<osd id>/activate.monmap
|
||||
|
||||
To populate the OSD directory (which has already been mounted), use this ``ceph-osd`` command:
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph-osd --cluster ceph --mkfs --mkkey -i <osd id> \ --monmap
|
||||
/var/lib/ceph/osd/<cluster name>-<osd id>/activate.monmap --osd-data \
|
||||
/var/lib/ceph/osd/<cluster name>-<osd id> --osd-journal
|
||||
/var/lib/ceph/osd/<cluster name>-<osd id>/journal \ --osd-uuid <osd uuid>
|
||||
--keyring /var/lib/ceph/osd/<cluster name>-<osd id>/keyring \ --setuser ceph
|
||||
--setgroup ceph
|
||||
|
||||
All of the information from the previous steps is used in the above command.
|
||||
|
||||
ceph-osd --cluster ceph --mkfs --mkkey -i <osd id> \
|
||||
--monmap /var/lib/ceph/osd/<cluster name>-<osd id>/activate.monmap --osd-data \
|
||||
/var/lib/ceph/osd/<cluster name>-<osd id> --osd-journal /var/lib/ceph/osd/<cluster name>-<osd id>/journal \
|
||||
--osd-uuid <osd uuid> --keyring /var/lib/ceph/osd/<cluster name>-<osd id>/keyring \
|
||||
--setuser ceph --setgroup ceph
|
||||
|
||||
|
||||
.. _ceph-volume-lvm-partitions:
|
||||
|
@ -113,15 +113,15 @@ Adoption process
|
||||
ssh-copy-id -f -i ~/ceph.pub root@<host>
|
||||
|
||||
.. note::
|
||||
It is also possible to import an existing ssh key. See
|
||||
:ref:`ssh errors <cephadm-ssh-errors>` in the troubleshooting
|
||||
It is also possible to import an existing SSH key. See
|
||||
:ref:`SSH errors <cephadm-ssh-errors>` in the troubleshooting
|
||||
document for instructions that describe how to import existing
|
||||
ssh keys.
|
||||
SSH keys.
|
||||
|
||||
.. note::
|
||||
It is also possible to have cephadm use a non-root user to ssh
|
||||
It is also possible to have cephadm use a non-root user to SSH
|
||||
into cluster hosts. This user needs to have passwordless sudo access.
|
||||
Use ``ceph cephadm set-user <user>`` and copy the ssh key to that user.
|
||||
Use ``ceph cephadm set-user <user>`` and copy the SSH key to that user.
|
||||
See :ref:`cephadm-ssh-user`
|
||||
|
||||
#. Tell cephadm which hosts to manage:
|
||||
|
@ -8,11 +8,10 @@ Compatibility and Stability
|
||||
Compatibility with Podman Versions
|
||||
----------------------------------
|
||||
|
||||
Podman and Ceph have different end-of-life strategies that
|
||||
might make it challenging to find compatible Podman and Ceph
|
||||
versions
|
||||
Podman and Ceph have different end-of-life strategies. This means that care
|
||||
must be taken in finding a version of Podman that is compatible with Ceph.
|
||||
|
||||
Those versions are expected to work:
|
||||
These versions are expected to work:
|
||||
|
||||
|
||||
+-----------+---------------------------------------+
|
||||
@ -28,7 +27,13 @@ Those versions are expected to work:
|
||||
+-----------+-------+-------+-------+-------+-------+
|
||||
|
||||
.. warning::
|
||||
Only podman versions that are 2.0.0 and higher work with Ceph Pacific, with the exception of podman version 2.2.1, which does not work with Ceph Pacific. kubic stable is known to work with Ceph Pacific, but it must be run with a newer kernel.
|
||||
|
||||
To use Podman with Ceph Pacific, you must use **a version of Podman that
|
||||
is 2.0.0 or higher**. However, **Podman version 2.2.1 does not work with
|
||||
Ceph Pacific**.
|
||||
|
||||
"Kubic stable" is known to work with Ceph Pacific, but it must be run
|
||||
with a newer kernel.
|
||||
|
||||
|
||||
.. _cephadm-stability:
|
||||
@ -36,19 +41,18 @@ Those versions are expected to work:
|
||||
Stability
|
||||
---------
|
||||
|
||||
Cephadm is actively in development. Please be aware that some
|
||||
functionality is still rough around the edges. Especially the
|
||||
following components are working with cephadm, but the
|
||||
documentation is not as complete as we would like, and there may be some
|
||||
changes in the near future:
|
||||
Cephadm is under development. Some functionality is incomplete. Be aware
|
||||
that some of the components of Ceph may not work perfectly with cephadm.
|
||||
These include:
|
||||
|
||||
- RGW
|
||||
|
||||
Cephadm support for the following features is still under development and may see breaking
|
||||
changes in future releases:
|
||||
Cephadm support remains under development for the following features:
|
||||
|
||||
- Ingress
|
||||
- Cephadm exporter daemon
|
||||
- cephfs-mirror
|
||||
|
||||
In case you encounter issues, see also :ref:`cephadm-pause`.
|
||||
If a cephadm command fails or a service stops running properly, see
|
||||
:ref:`cephadm-pause` for instructions on how to pause the Ceph cluster's
|
||||
background activity and how to disable cephadm.
|
||||
|
@ -4,17 +4,26 @@
|
||||
Host Management
|
||||
===============
|
||||
|
||||
To list hosts associated with the cluster:
|
||||
Listing Hosts
|
||||
=============
|
||||
|
||||
Run a command of this form to list hosts associated with the cluster:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host ls [--format yaml] [--host-pattern <name>] [--label <label>] [--host-status <status>]
|
||||
ceph orch host ls [--format yaml] [--host-pattern <name>] [--label <label>] [--host-status <status>]
|
||||
|
||||
where the optional arguments "host-pattern", "label" and "host-status" are used for filtering.
|
||||
"host-pattern" is a regex that will match against hostnames and will only return matching hosts
|
||||
"label" will only return hosts with the given label
|
||||
"host-status" will only return hosts with the given status (currently "offline" or "maintenance")
|
||||
Any combination of these filtering flags is valid. You may filter against name, label and/or status simultaneously
|
||||
In commands of this form, the arguments "host-pattern", "label" and
|
||||
"host-status" are optional and are used for filtering.
|
||||
|
||||
- "host-pattern" is a regex that matches against hostnames and returns only
|
||||
matching hosts.
|
||||
- "label" returns only hosts with the specified label.
|
||||
- "host-status" returns only hosts with the specified status (currently
|
||||
"offline" or "maintenance").
|
||||
- Any combination of these filtering flags is valid. It is possible to filter
|
||||
against name, label and status simultaneously, or to filter against any
|
||||
proper subset of name, label and status.
|
||||
|
||||
.. _cephadm-adding-hosts:
|
||||
|
||||
@ -30,7 +39,7 @@ To add each new host to the cluster, perform two steps:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ssh-copy-id -f -i /etc/ceph/ceph.pub root@*<new-host>*
|
||||
ssh-copy-id -f -i /etc/ceph/ceph.pub root@*<new-host>*
|
||||
|
||||
For example:
|
||||
|
||||
@ -43,7 +52,7 @@ To add each new host to the cluster, perform two steps:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host add *<newhost>* [*<ip>*] [*<label1> ...*]
|
||||
ceph orch host add *<newhost>* [*<ip>*] [*<label1> ...*]
|
||||
|
||||
For example:
|
||||
|
||||
@ -63,54 +72,60 @@ To add each new host to the cluster, perform two steps:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host add host4 10.10.0.104 --labels _admin
|
||||
ceph orch host add host4 10.10.0.104 --labels _admin
|
||||
|
||||
.. _cephadm-removing-hosts:
|
||||
|
||||
Removing Hosts
|
||||
==============
|
||||
|
||||
A host can safely be removed from a the cluster once all daemons are removed from it.
|
||||
A host can safely be removed from the cluster after all daemons are removed
|
||||
from it.
|
||||
|
||||
To drain all daemons from a host do the following:
|
||||
To drain all daemons from a host, run a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host drain *<host>*
|
||||
ceph orch host drain *<host>*
|
||||
|
||||
The '_no_schedule' label will be applied to the host. See :ref:`cephadm-special-host-labels`
|
||||
The ``_no_schedule`` label will be applied to the host. See
|
||||
:ref:`cephadm-special-host-labels`.
|
||||
|
||||
All osds on the host will be scheduled to be removed. You can check osd removal progress with the following:
|
||||
All OSDs on the host will be scheduled to be removed. You can check the progress of the OSD removal operation with the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch osd rm status
|
||||
ceph orch osd rm status
|
||||
|
||||
see :ref:`cephadm-osd-removal` for more details about osd removal
|
||||
See :ref:`cephadm-osd-removal` for more details about OSD removal.
|
||||
|
||||
You can check if there are no deamons left on the host with the following:
|
||||
Use the following command to determine whether any daemons are still on the
|
||||
host:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch ps <host>
|
||||
ceph orch ps <host>
|
||||
|
||||
Once all daemons are removed you can remove the host with the following:
|
||||
After all daemons have been removed from the host, remove the host from the
|
||||
cluster by running the following command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host rm <host>
|
||||
ceph orch host rm <host>
|
||||
|
||||
Offline host removal
|
||||
--------------------
|
||||
|
||||
If a host is offline and can not be recovered it can still be removed from the cluster with the following:
|
||||
Even if a host is offline and can not be recovered, it can be removed from the
|
||||
cluster by running a command of the following form:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host rm <host> --offline --force
|
||||
ceph orch host rm <host> --offline --force
|
||||
|
||||
This can potentially cause data loss as osds will be forcefully purged from the cluster by calling ``osd purge-actual`` for each osd.
|
||||
Service specs that still contain this host should be manually updated.
|
||||
.. warning:: This can potentially cause data loss. This command forcefully
|
||||
purges OSDs from the cluster by calling ``osd purge-actual`` for each OSD.
|
||||
Any service specs that still contain this host should be manually updated.
|
||||
|
||||
.. _orchestrator-host-labels:
|
||||
|
||||
@ -122,18 +137,24 @@ are free form and have no particular meaning by itself and each host
|
||||
can have multiple labels. They can be used to specify placement
|
||||
of daemons. See :ref:`orch-placement-by-labels`
|
||||
|
||||
Labels can be added when adding a host with the ``--labels`` flag::
|
||||
Labels can be added when adding a host with the ``--labels`` flag:
|
||||
|
||||
ceph orch host add my_hostname --labels=my_label1
|
||||
ceph orch host add my_hostname --labels=my_label1,my_label2
|
||||
.. prompt:: bash #
|
||||
|
||||
To add a label a existing host, run::
|
||||
ceph orch host add my_hostname --labels=my_label1
|
||||
ceph orch host add my_hostname --labels=my_label1,my_label2
|
||||
|
||||
ceph orch host label add my_hostname my_label
|
||||
To add a label a existing host, run:
|
||||
|
||||
To remove a label, run::
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host label rm my_hostname my_label
|
||||
ceph orch host label add my_hostname my_label
|
||||
|
||||
To remove a label, run:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host label rm my_hostname my_label
|
||||
|
||||
|
||||
.. _cephadm-special-host-labels:
|
||||
@ -166,15 +187,39 @@ The following host labels have a special meaning to cephadm. All start with ``_
|
||||
Maintenance Mode
|
||||
================
|
||||
|
||||
Place a host in and out of maintenance mode (stops all Ceph daemons on host)::
|
||||
Place a host in and out of maintenance mode (stops all Ceph daemons on host):
|
||||
|
||||
ceph orch host maintenance enter <hostname> [--force]
|
||||
ceph orch host maintenance exit <hostname>
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host maintenance enter <hostname> [--force]
|
||||
ceph orch host maintenance exit <hostname>
|
||||
|
||||
Where the force flag when entering maintenance allows the user to bypass warnings (but not alerts)
|
||||
|
||||
See also :ref:`cephadm-fqdn`
|
||||
|
||||
Rescanning Host Devices
|
||||
=======================
|
||||
|
||||
Some servers and external enclosures may not register device removal or insertion with the
|
||||
kernel. In these scenarios, you'll need to perform a host rescan. A rescan is typically
|
||||
non-disruptive, and can be performed with the following CLI command:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch host rescan <hostname> [--with-summary]
|
||||
|
||||
The ``with-summary`` flag provides a breakdown of the number of HBAs found and scanned, together
|
||||
with any that failed:
|
||||
|
||||
.. prompt:: bash [ceph:root@rh9-ceph1/]#
|
||||
|
||||
ceph orch host rescan rh9-ceph1 --with-summary
|
||||
|
||||
::
|
||||
|
||||
Ok. 2 adapters detected: 2 rescanned, 0 skipped, 0 failed (0.32s)
|
||||
|
||||
Creating many hosts at once
|
||||
===========================
|
||||
|
||||
@ -241,26 +286,36 @@ connect to remote hosts. When the cluster is bootstrapped, this SSH
|
||||
key is generated automatically and no additional configuration
|
||||
is necessary.
|
||||
|
||||
A *new* SSH key can be generated with::
|
||||
A *new* SSH key can be generated with:
|
||||
|
||||
ceph cephadm generate-key
|
||||
.. prompt:: bash #
|
||||
|
||||
The public portion of the SSH key can be retrieved with::
|
||||
ceph cephadm generate-key
|
||||
|
||||
ceph cephadm get-pub-key
|
||||
The public portion of the SSH key can be retrieved with:
|
||||
|
||||
The currently stored SSH key can be deleted with::
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph cephadm clear-key
|
||||
ceph cephadm get-pub-key
|
||||
|
||||
You can make use of an existing key by directly importing it with::
|
||||
The currently stored SSH key can be deleted with:
|
||||
|
||||
ceph config-key set mgr/cephadm/ssh_identity_key -i <key>
|
||||
ceph config-key set mgr/cephadm/ssh_identity_pub -i <pub>
|
||||
.. prompt:: bash #
|
||||
|
||||
You will then need to restart the mgr daemon to reload the configuration with::
|
||||
ceph cephadm clear-key
|
||||
|
||||
ceph mgr fail
|
||||
You can make use of an existing key by directly importing it with:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config-key set mgr/cephadm/ssh_identity_key -i <key>
|
||||
ceph config-key set mgr/cephadm/ssh_identity_pub -i <pub>
|
||||
|
||||
You will then need to restart the mgr daemon to reload the configuration with:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph mgr fail
|
||||
|
||||
.. _cephadm-ssh-user:
|
||||
|
||||
@ -272,11 +327,13 @@ that has enough privileges to download container images, start containers
|
||||
and execute commands without prompting for a password. If you do not want
|
||||
to use the "root" user (default option in cephadm), you must provide
|
||||
cephadm the name of the user that is going to be used to perform all the
|
||||
cephadm operations. Use the command::
|
||||
cephadm operations. Use the command:
|
||||
|
||||
ceph cephadm set-user <user>
|
||||
.. prompt:: bash #
|
||||
|
||||
Prior to running this the cluster ssh key needs to be added to this users
|
||||
ceph cephadm set-user <user>
|
||||
|
||||
Prior to running this the cluster SSH key needs to be added to this users
|
||||
authorized_keys file and non-root users must have passwordless sudo access.
|
||||
|
||||
|
||||
@ -295,17 +352,23 @@ something like this::
|
||||
There are two ways to customize this configuration for your environment:
|
||||
|
||||
#. Import a customized configuration file that will be stored
|
||||
by the monitor with::
|
||||
by the monitor with:
|
||||
|
||||
ceph cephadm set-ssh-config -i <ssh_config_file>
|
||||
.. prompt:: bash #
|
||||
|
||||
To remove a customized SSH config and revert back to the default behavior::
|
||||
ceph cephadm set-ssh-config -i <ssh_config_file>
|
||||
|
||||
ceph cephadm clear-ssh-config
|
||||
To remove a customized SSH config and revert back to the default behavior:
|
||||
|
||||
#. You can configure a file location for the SSH configuration file with::
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/ssh_config_file <path>
|
||||
ceph cephadm clear-ssh-config
|
||||
|
||||
#. You can configure a file location for the SSH configuration file with:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/cephadm/ssh_config_file <path>
|
||||
|
||||
We do *not recommend* this approach. The path name must be
|
||||
visible to *any* mgr daemon, and cephadm runs all daemons as
|
||||
@ -370,4 +433,4 @@ requires the bare host name when adding a host to the cluster:
|
||||
|
||||
..
|
||||
TODO: This chapter needs to provide way for users to configure
|
||||
Grafana in the dashboard, as this is right no very hard to do.
|
||||
Grafana in the dashboard, as this is right now very hard to do.
|
||||
|
@ -4,21 +4,36 @@
|
||||
Cephadm
|
||||
=======
|
||||
|
||||
``cephadm`` deploys and manages a Ceph cluster. It does this by connecting the
|
||||
manager daemon to hosts via SSH. The manager daemon is able to add, remove, and
|
||||
update Ceph containers. ``cephadm`` does not rely on external configuration
|
||||
tools such as Ansible, Rook, and Salt.
|
||||
``cephadm`` is a utility that is used to manage a Ceph cluster.
|
||||
|
||||
``cephadm`` manages the full lifecycle of a Ceph cluster. This lifecycle
|
||||
starts with the bootstrapping process, when ``cephadm`` creates a tiny
|
||||
Ceph cluster on a single node. This cluster consists of one monitor and
|
||||
one manager. ``cephadm`` then uses the orchestration interface ("day 2"
|
||||
commands) to expand the cluster, adding all hosts and provisioning all
|
||||
Ceph daemons and services. Management of this lifecycle can be performed
|
||||
either via the Ceph command-line interface (CLI) or via the dashboard (GUI).
|
||||
Here is a list of some of the things that ``cephadm`` can do:
|
||||
|
||||
``cephadm`` is new in Ceph release v15.2.0 (Octopus) and does not support older
|
||||
versions of Ceph.
|
||||
- ``cephadm`` can add a Ceph container to the cluster.
|
||||
- ``cephadm`` can remove a Ceph container from the cluster.
|
||||
- ``cephadm`` can update Ceph containers.
|
||||
|
||||
``cephadm`` does not rely on external configuration tools like Ansible, Rook,
|
||||
or Salt. However, those external configuration tools can be used to automate
|
||||
operations not performed by cephadm itself. To learn more about these external
|
||||
configuration tools, visit their pages:
|
||||
|
||||
* https://github.com/ceph/cephadm-ansible
|
||||
* https://rook.io/docs/rook/v1.10/Getting-Started/intro/
|
||||
* https://github.com/ceph/ceph-salt
|
||||
|
||||
``cephadm`` manages the full lifecycle of a Ceph cluster. This lifecycle starts
|
||||
with the bootstrapping process, when ``cephadm`` creates a tiny Ceph cluster on
|
||||
a single node. This cluster consists of one monitor and one manager.
|
||||
``cephadm`` then uses the orchestration interface to expand the cluster, adding
|
||||
hosts and provisioning Ceph daemons and services. Management of this lifecycle
|
||||
can be performed either via the Ceph command-line interface (CLI) or via the
|
||||
dashboard (GUI).
|
||||
|
||||
To use ``cephadm`` to get started with Ceph, follow the instructions in
|
||||
:ref:`cephadm_deploying_new_cluster`.
|
||||
|
||||
``cephadm`` was introduced in Ceph release v15.2.0 (Octopus) and does not
|
||||
support older versions of Ceph.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
@ -1,3 +1,5 @@
|
||||
.. _cephadm_deploying_new_cluster:
|
||||
|
||||
============================
|
||||
Deploying a new Ceph cluster
|
||||
============================
|
||||
@ -8,7 +10,6 @@ then deploying the needed services.
|
||||
|
||||
.. highlight:: console
|
||||
|
||||
|
||||
.. _cephadm-host-requirements:
|
||||
|
||||
Requirements
|
||||
@ -35,17 +36,12 @@ Ceph.
|
||||
Install cephadm
|
||||
===============
|
||||
|
||||
The ``cephadm`` command can
|
||||
|
||||
#. bootstrap a new cluster
|
||||
#. launch a containerized shell with a working Ceph CLI
|
||||
#. aid in debugging containerized Ceph daemons
|
||||
|
||||
There are two ways to install ``cephadm``:
|
||||
|
||||
#. a :ref:`curl-based installation<cephadm_install_curl>` method
|
||||
#. :ref:`distribution-specific installation methods<cephadm_install_distros>`
|
||||
|
||||
|
||||
.. _cephadm_install_curl:
|
||||
|
||||
curl-based installation
|
||||
@ -214,8 +210,8 @@ available options.
|
||||
EOF
|
||||
$ ./cephadm bootstrap --config initial-ceph.conf ...
|
||||
|
||||
* The ``--ssh-user *<user>*`` option makes it possible to choose which ssh
|
||||
user cephadm will use to connect to hosts. The associated ssh key will be
|
||||
* The ``--ssh-user *<user>*`` option makes it possible to choose which SSH
|
||||
user cephadm will use to connect to hosts. The associated SSH key will be
|
||||
added to ``/home/*<user>*/.ssh/authorized_keys``. The user that you
|
||||
designate with this option must have passwordless sudo access.
|
||||
|
||||
@ -366,38 +362,78 @@ Different deployment scenarios
|
||||
Single host
|
||||
-----------
|
||||
|
||||
To configure a Ceph cluster to run on a single host, use the ``--single-host-defaults`` flag when bootstrapping. For use cases of this, see :ref:`one-node-cluster`.
|
||||
To configure a Ceph cluster to run on a single host, use the
|
||||
``--single-host-defaults`` flag when bootstrapping. For use cases of this, see
|
||||
:ref:`one-node-cluster`.
|
||||
|
||||
The ``--single-host-defaults`` flag sets the following configuration options::
|
||||
|
||||
global/osd_crush_choose_leaf_type = 0
|
||||
global/osd_crush_chooseleaf_type = 0
|
||||
global/osd_pool_default_size = 2
|
||||
mgr/mgr_standby_modules = False
|
||||
|
||||
For more information on these options, see :ref:`one-node-cluster` and ``mgr_standby_modules`` in :ref:`mgr-administrator-guide`.
|
||||
|
||||
For more information on these options, see :ref:`one-node-cluster` and
|
||||
``mgr_standby_modules`` in :ref:`mgr-administrator-guide`.
|
||||
|
||||
.. _cephadm-airgap:
|
||||
|
||||
Deployment in an isolated environment
|
||||
-------------------------------------
|
||||
|
||||
You can install Cephadm in an isolated environment by using a custom container registry. You can either configure Podman or Docker to use an insecure registry, or make the registry secure. Ensure your container image is inside the registry and that you have access to all hosts you wish to add to the cluster.
|
||||
You might need to install cephadm in an environment that is not connected
|
||||
directly to the internet (such an environment is also called an "isolated
|
||||
environment"). This can be done if a custom container registry is used. Either
|
||||
of two kinds of custom container registry can be used in this scenario: (1) a
|
||||
Podman-based or Docker-based insecure registry, or (2) a secure registry.
|
||||
|
||||
Run a local container registry:
|
||||
The practice of installing software on systems that are not connected directly
|
||||
to the internet is called "airgapping" and registries that are not connected
|
||||
directly to the internet are referred to as "airgapped".
|
||||
|
||||
.. prompt:: bash #
|
||||
Make sure that your container image is inside the registry. Make sure that you
|
||||
have access to all hosts that you plan to add to the cluster.
|
||||
|
||||
podman run --privileged -d --name registry -p 5000:5000 -v /var/lib/registry:/var/lib/registry --restart=always registry:2
|
||||
#. Run a local container registry:
|
||||
|
||||
If you are using an insecure registry, configure Podman or Docker with the hostname and port where the registry is running.
|
||||
.. prompt:: bash #
|
||||
|
||||
.. note:: For every host which accesses the local insecure registry, you will need to repeat this step on the host.
|
||||
podman run --privileged -d --name registry -p 5000:5000 -v /var/lib/registry:/var/lib/registry --restart=always registry:2
|
||||
|
||||
Next, push your container image to your local registry.
|
||||
#. If you are using an insecure registry, configure Podman or Docker with the
|
||||
hostname and port where the registry is running.
|
||||
|
||||
Then run bootstrap using the ``--image`` flag with your container image. For example:
|
||||
.. note:: You must repeat this step for every host that accesses the local
|
||||
insecure registry.
|
||||
|
||||
.. prompt:: bash #
|
||||
#. Push your container image to your local registry. Here are some acceptable
|
||||
kinds of container images:
|
||||
|
||||
cephadm --image *<hostname>*:5000/ceph/ceph bootstrap --mon-ip *<mon-ip>*
|
||||
* Ceph container image. See :ref:`containers`.
|
||||
* Prometheus container image
|
||||
* Node exporter container image
|
||||
* Grafana container image
|
||||
* Alertmanager container image
|
||||
|
||||
#. Create a temporary configuration file to store the names of the monitoring
|
||||
images. (See :ref:`cephadm_monitoring-images`):
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
cat <<EOF > initial-ceph.conf
|
||||
|
||||
::
|
||||
|
||||
[mgr]
|
||||
mgr/cephadm/container_image_prometheus *<hostname>*:5000/prometheus
|
||||
mgr/cephadm/container_image_node_exporter *<hostname>*:5000/node_exporter
|
||||
mgr/cephadm/container_image_grafana *<hostname>*:5000/grafana
|
||||
mgr/cephadm/container_image_alertmanager *<hostname>*:5000/alertmanger
|
||||
|
||||
#. Run bootstrap using the ``--image`` flag and pass the name of your
|
||||
container image as the argument of the image flag. For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
cephadm --image *<hostname>*:5000/ceph/ceph bootstrap --mon-ip *<mon-ip>*
|
||||
|
||||
.. _cluster network: ../rados/configuration/network-config-ref#cluster-network
|
||||
|
@ -86,7 +86,20 @@ Service Specification
|
||||
=====================
|
||||
|
||||
A *Service Specification* is a data structure that is used to specify the
|
||||
deployment of services. Here is an example of a service specification in YAML:
|
||||
deployment of services. In addition to parameters such as `placement` or
|
||||
`networks`, the user can set initial values of service configuration parameters
|
||||
by means of the `config` section. For each param/value configuration pair,
|
||||
cephadm calls the following command to set its value:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set <service-name> <param> <value>
|
||||
|
||||
cephadm raises health warnings in case invalid configuration parameters are
|
||||
found in the spec (`CEPHADM_INVALID_CONFIG_OPTION`) or if any error while
|
||||
trying to apply the new configuration option(s) (`CEPHADM_FAILED_SET_OPTION`).
|
||||
|
||||
Here is an example of a service specification in YAML:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
@ -97,6 +110,10 @@ deployment of services. Here is an example of a service specification in YAML:
|
||||
- host1
|
||||
- host2
|
||||
- host3
|
||||
config:
|
||||
param_1: val_1
|
||||
...
|
||||
param_N: val_N
|
||||
unmanaged: false
|
||||
networks:
|
||||
- 192.169.142.0/24
|
||||
@ -414,7 +431,7 @@ Cephadm supports the deployment of multiple daemons on the same host:
|
||||
service_type: rgw
|
||||
placement:
|
||||
label: rgw
|
||||
count-per-host: 2
|
||||
count_per_host: 2
|
||||
|
||||
The main reason for deploying multiple daemons per host is an additional
|
||||
performance benefit for running multiple RGW and MDS daemons on the same host.
|
||||
@ -501,10 +518,32 @@ a spec like
|
||||
- host2
|
||||
- host3
|
||||
extra_container_args:
|
||||
- "--cpus=2"
|
||||
- "--cpus=2"
|
||||
|
||||
which would cause each mon daemon to be deployed with `--cpus=2`.
|
||||
|
||||
Mounting Files with Extra Container Arguments
|
||||
---------------------------------------------
|
||||
|
||||
A common use case for extra container arguments is to mount additional
|
||||
files within the container. However, some intuitive formats for doing
|
||||
so can cause deployment to fail (see https://tracker.ceph.com/issues/57338).
|
||||
The recommended syntax for mounting a file with extra container arguments is:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
extra_container_args:
|
||||
- "-v"
|
||||
- "/absolute/file/path/on/host:/absolute/file/path/in/container"
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
extra_container_args:
|
||||
- "-v"
|
||||
- "/opt/ceph_cert/host.cert:/etc/grafana/certs/cert_file:ro"
|
||||
|
||||
.. _orch-rm:
|
||||
|
||||
Removing a Service
|
||||
|
@ -103,6 +103,8 @@ example spec file:
|
||||
spec:
|
||||
port: 4200
|
||||
|
||||
.. _cephadm_monitoring-images:
|
||||
|
||||
Using custom images
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@ -161,6 +163,8 @@ For example, if you had changed the prometheus image
|
||||
|
||||
ceph config rm mgr mgr/cephadm/container_image_prometheus
|
||||
|
||||
See also :ref:`cephadm-airgap`.
|
||||
|
||||
.. _cephadm-overwrite-jinja2-templates:
|
||||
|
||||
Using custom configuration files
|
||||
@ -195,6 +199,7 @@ set``:
|
||||
- ``services/grafana/ceph-dashboard.yml``
|
||||
- ``services/grafana/grafana.ini``
|
||||
- ``services/prometheus/prometheus.yml``
|
||||
- ``services/prometheus/alerting/custom_alerts.yml``
|
||||
|
||||
You can look up the file templates that are currently used by cephadm in
|
||||
``src/pybind/mgr/cephadm/templates``:
|
||||
@ -240,6 +245,15 @@ Example
|
||||
# reconfig the prometheus service
|
||||
ceph orch reconfig prometheus
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# set additional custom alerting rules for Prometheus
|
||||
ceph config-key set mgr/cephadm/services/prometheus/alerting/custom_alerts.yml \
|
||||
-i $PWD/custom_alerts.yml
|
||||
|
||||
# Note that custom alerting rules are not parsed by Jinja and hence escaping
|
||||
# will not be an issue.
|
||||
|
||||
Deploying monitoring without cephadm
|
||||
------------------------------------
|
||||
|
||||
@ -282,6 +296,32 @@ Due to performance reasons, monitoring of RBD images is disabled by default. For
|
||||
:ref:`prometheus-rbd-io-statistics`. If disabled, the overview and details dashboards will stay empty in Grafana
|
||||
and the metrics will not be visible in Prometheus.
|
||||
|
||||
Setting up Prometheus
|
||||
-----------------------
|
||||
|
||||
Setting Prometheus Retention Time
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Cephadm provides the option to set the Prometheus TDSB retention time using
|
||||
a ``retention_time`` field in the Prometheus service spec. The value defaults
|
||||
to 15 days (15d). If you would like a different value, such as 1 year (1y) you
|
||||
can apply a service spec similar to:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
service_type: prometheus
|
||||
placement:
|
||||
count: 1
|
||||
spec:
|
||||
retention_time: "1y"
|
||||
|
||||
.. note::
|
||||
|
||||
If you already had Prometheus daemon(s) deployed before and are updating an
|
||||
existent spec as opposed to doing a fresh Prometheus deployment, you must also
|
||||
tell cephadm to redeploy the Prometheus daemon(s) to put this change into effect.
|
||||
This can be done with a ``ceph orch redeploy prometheus`` command.
|
||||
|
||||
Setting up Grafana
|
||||
------------------
|
||||
|
||||
|
@ -138,6 +138,12 @@ There are a few ways to create new OSDs:
|
||||
|
||||
ceph orch daemon add osd host1:/dev/sdb
|
||||
|
||||
Advanced OSD creation from specific devices on a specific host:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch daemon add osd host1:data_devices=/dev/sda,/dev/sdb,db_devices=/dev/sdc,osds_per_device=2
|
||||
|
||||
* You can use :ref:`drivegroups` to categorize device(s) based on their
|
||||
properties. This might be useful in forming a clearer picture of which
|
||||
devices are available to consume. Properties include device type (SSD or
|
||||
@ -239,6 +245,18 @@ Expected output::
|
||||
|
||||
OSDs that are not safe to destroy will be rejected.
|
||||
|
||||
.. note::
|
||||
After removing OSDs, if the drives the OSDs were deployed on once again
|
||||
become available, cephadm may automatically try to deploy more OSDs
|
||||
on these drives if they match an existing drivegroup spec. If you deployed
|
||||
the OSDs you are removing with a spec and don't want any new OSDs deployed on
|
||||
the drives after removal, it's best to modify the drivegroup spec before removal.
|
||||
Either set ``unmanaged: true`` to stop it from picking up new drives at all,
|
||||
or modify it in some way that it no longer matches the drives used for the
|
||||
OSDs you wish to remove. Then re-apply the spec. For more info on drivegroup
|
||||
specs see :ref:`drivegroups`. For more info on the declarative nature of
|
||||
cephadm in reference to deploying OSDs, see :ref:`cephadm-osd-declarative`
|
||||
|
||||
Monitoring OSD State
|
||||
--------------------
|
||||
|
||||
@ -283,6 +301,7 @@ Expected output::
|
||||
|
||||
This resets the initial state of the OSD and takes it off the removal queue.
|
||||
|
||||
.. _cephadm-replacing-an-osd:
|
||||
|
||||
Replacing an OSD
|
||||
----------------
|
||||
|
@ -65,14 +65,14 @@ example spec file:
|
||||
.. code-block:: yaml
|
||||
|
||||
service_type: rgw
|
||||
service_name: foo
|
||||
service_id: foo
|
||||
placement:
|
||||
label: rgw
|
||||
count-per-host: 2
|
||||
count_per_host: 2
|
||||
networks:
|
||||
- 192.169.142.0/24
|
||||
spec:
|
||||
port: 8000
|
||||
rgw_frontend_port: 8080
|
||||
|
||||
|
||||
Multisite zones
|
||||
@ -224,6 +224,33 @@ It is a yaml format file with the following properties:
|
||||
...
|
||||
-----END PRIVATE KEY-----
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
service_type: ingress
|
||||
service_id: rgw.something # adjust to match your existing RGW service
|
||||
placement:
|
||||
hosts:
|
||||
- host1
|
||||
- host2
|
||||
- host3
|
||||
spec:
|
||||
backend_service: rgw.something # adjust to match your existing RGW service
|
||||
virtual_ips_list:
|
||||
- <string>/<string> # ex: 192.168.20.1/24
|
||||
- <string>/<string> # ex: 192.168.20.2/24
|
||||
- <string>/<string> # ex: 192.168.20.3/24
|
||||
frontend_port: <integer> # ex: 8080
|
||||
monitor_port: <integer> # ex: 1967, used by haproxy for load balancer status
|
||||
virtual_interface_networks: [ ... ] # optional: list of CIDR networks
|
||||
ssl_cert: | # optional: SSL certificate and key
|
||||
-----BEGIN CERTIFICATE-----
|
||||
...
|
||||
-----END CERTIFICATE-----
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
...
|
||||
-----END PRIVATE KEY-----
|
||||
|
||||
|
||||
where the properties of this service specification are:
|
||||
|
||||
* ``service_type``
|
||||
@ -237,6 +264,10 @@ where the properties of this service specification are:
|
||||
to match the nodes where RGW is deployed.
|
||||
* ``virtual_ip``
|
||||
The virtual IP (and network) in CIDR format where the ingress service will be available.
|
||||
* ``virtual_ips_list``
|
||||
The virtual IP address in CIDR format where the ingress service will be available.
|
||||
Each virtual IP address will be primary on one node running the ingress service. The number
|
||||
of virtual IP addresses must be less than or equal to the number of ingress nodes.
|
||||
* ``virtual_interface_networks``
|
||||
A list of networks to identify which ethernet interface to use for the virtual IP.
|
||||
* ``frontend_port``
|
||||
|
@ -179,7 +179,7 @@ container execution command.
|
||||
|
||||
.. _cephadm-ssh-errors:
|
||||
|
||||
ssh errors
|
||||
SSH errors
|
||||
----------
|
||||
|
||||
Error message::
|
||||
@ -208,7 +208,7 @@ Things users can do:
|
||||
|
||||
[root@mon1 ~]# cat ~/cephadm_private_key | cephadm shell -- ceph cephadm set-ssk-key -i -
|
||||
|
||||
2. Ensure that the ssh config is correct::
|
||||
2. Ensure that the SSH config is correct::
|
||||
|
||||
[root@mon1 ~]# cephadm shell -- ceph cephadm get-ssh-config > config
|
||||
|
||||
|
@ -96,6 +96,12 @@ You can stop the upgrade process at any time by running the following command:
|
||||
|
||||
ceph orch upgrade stop
|
||||
|
||||
Post upgrade actions
|
||||
====================
|
||||
|
||||
In case the new version is based on ``cephadm``, once done with the upgrade the user
|
||||
has to update the ``cephadm`` package (or ceph-common package in case the user
|
||||
doesn't use ``cephadm shell``) to a version compatible with the new version.
|
||||
|
||||
Potential problems
|
||||
==================
|
||||
@ -165,3 +171,100 @@ you need. For example, the following command upgrades to a development build:
|
||||
ceph orch upgrade start --image quay.io/ceph-ci/ceph:recent-git-branch-name
|
||||
|
||||
For more information about available container images, see :ref:`containers`.
|
||||
|
||||
Staggered Upgrade
|
||||
=================
|
||||
|
||||
Some users may prefer to upgrade components in phases rather than all at once.
|
||||
The upgrade command, starting in 16.2.11 and 17.2.1 allows parameters
|
||||
to limit which daemons are upgraded by a single upgrade command. The options in
|
||||
include ``daemon_types``, ``services``, ``hosts`` and ``limit``. ``daemon_types``
|
||||
takes a comma-separated list of daemon types and will only upgrade daemons of those
|
||||
types. ``services`` is mutually exclusive with ``daemon_types``, only takes services
|
||||
of one type at a time (e.g. can't provide an OSD and RGW service at the same time), and
|
||||
will only upgrade daemons belonging to those services. ``hosts`` can be combined
|
||||
with ``daemon_types`` or ``services`` or provided on its own. The ``hosts`` parameter
|
||||
follows the same format as the command line options for :ref:`orchestrator-cli-placement-spec`.
|
||||
``limit`` takes an integer > 0 and provides a numerical limit on the number of
|
||||
daemons cephadm will upgrade. ``limit`` can be combined with any of the other
|
||||
parameters. For example, if you specify to upgrade daemons of type osd on host
|
||||
Host1 with ``limit`` set to 3, cephadm will upgrade (up to) 3 osd daemons on
|
||||
Host1.
|
||||
|
||||
Example: specifying daemon types and hosts:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch upgrade start --image <image-name> --daemon-types mgr,mon --hosts host1,host2
|
||||
|
||||
Example: specifying services and using limit:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch upgrade start --image <image-name> --services rgw.example1,rgw.example2 --limit 2
|
||||
|
||||
.. note::
|
||||
|
||||
Cephadm strictly enforces an order to the upgrade of daemons that is still present
|
||||
in staggered upgrade scenarios. The current upgrade ordering is
|
||||
``mgr -> mon -> crash -> osd -> mds -> rgw -> rbd-mirror -> cephfs-mirror -> iscsi -> nfs``.
|
||||
If you specify parameters that would upgrade daemons out of order, the upgrade
|
||||
command will block and note which daemons will be missed if you proceed.
|
||||
|
||||
.. note::
|
||||
|
||||
Upgrade commands with limiting parameters will validate the options before beginning the
|
||||
upgrade, which may require pulling the new container image. Do not be surprised
|
||||
if the upgrade start command takes a while to return when limiting parameters are provided.
|
||||
|
||||
.. note::
|
||||
|
||||
In staggered upgrade scenarios (when a limiting parameter is provided) monitoring
|
||||
stack daemons including Prometheus and node-exporter are refreshed after the Manager
|
||||
daemons have been upgraded. Do not be surprised if Manager upgrades thus take longer
|
||||
than expected. Note that the versions of monitoring stack daemons may not change between
|
||||
Ceph releases, in which case they are only redeployed.
|
||||
|
||||
Upgrading to a version that supports staggered upgrade from one that doesn't
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
While upgrading from a version that already supports staggered upgrades the process
|
||||
simply requires providing the necessary arguments. However, if you wish to upgrade
|
||||
to a version that supports staggered upgrade from one that does not, there is a
|
||||
workaround. It requires first manually upgrading the Manager daemons and then passing
|
||||
the limiting parameters as usual.
|
||||
|
||||
.. warning::
|
||||
Make sure you have multiple running mgr daemons before attempting this procedure.
|
||||
|
||||
To start with, determine which Manager is your active one and which are standby. This
|
||||
can be done in a variety of ways such as looking at the ``ceph -s`` output. Then,
|
||||
manually upgrade each standby mgr daemon with:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch daemon redeploy mgr.example1.abcdef --image <new-image-name>
|
||||
|
||||
.. note::
|
||||
|
||||
If you are on a very early version of cephadm (early Octopus) the ``orch daemon redeploy``
|
||||
command may not have the ``--image`` flag. In that case, you must manually set the
|
||||
Manager container image ``ceph config set mgr container_image <new-image-name>`` and then
|
||||
redeploy the Manager ``ceph orch daemon redeploy mgr.example1.abcdef``
|
||||
|
||||
At this point, a Manager fail over should allow us to have the active Manager be one
|
||||
running the new version.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph mgr fail
|
||||
|
||||
Verify the active Manager is now one running the new version. To complete the Manager
|
||||
upgrading:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph orch upgrade start --image <new-image-name> --daemon-types mgr
|
||||
|
||||
You should now have all your Manager daemons on the new version and be able to
|
||||
specify the limiting parameters for the rest of the upgrade.
|
||||
|
@ -1,3 +1,12 @@
|
||||
.. _cephfs_add_remote_mds:
|
||||
|
||||
.. note::
|
||||
It is highly recommended to use :doc:`/cephadm/index` or another Ceph
|
||||
orchestrator for setting up the ceph cluster. Use this approach only if you
|
||||
are setting up the ceph cluster manually. If one still intends to use the
|
||||
manual way for deploying MDS daemons, :doc:`/cephadm/services/mds/` can
|
||||
also be used.
|
||||
|
||||
============================
|
||||
Deploying Metadata Servers
|
||||
============================
|
||||
@ -62,7 +71,7 @@ means limiting its cache size.
|
||||
Adding an MDS
|
||||
=============
|
||||
|
||||
#. Create an mds data point ``/var/lib/ceph/mds/ceph-${id}``. The daemon only uses this directory to store its keyring.
|
||||
#. Create an mds directory ``/var/lib/ceph/mds/ceph-${id}``. The daemon only uses this directory to store its keyring.
|
||||
|
||||
#. Create the authentication key, if you use CephX: ::
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
.. _ceph-dokan:
|
||||
|
||||
=======================
|
||||
Mount CephFS on Windows
|
||||
=======================
|
||||
|
@ -64,7 +64,7 @@ Copy a file/directory to Ceph File System from Local File System.
|
||||
|
||||
Usage :
|
||||
|
||||
put [options] <source_path> [target_path]
|
||||
put [options] <source_path> <target_path>
|
||||
|
||||
* source_path - local file/directory path to be copied to cephfs.
|
||||
* if `.` copies all the file/directories in the local working directory.
|
||||
@ -84,7 +84,7 @@ Copy a file from Ceph File System to Local File System.
|
||||
|
||||
Usage :
|
||||
|
||||
get [options] <source_path> [target_path]
|
||||
get [options] <source_path> <target_path>
|
||||
|
||||
* source_path - remote file/directory path which is to be copied to local file system.
|
||||
* if `.` copies all the file/directories in the remote working directory.
|
||||
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 27 KiB |
@ -1,3 +1,5 @@
|
||||
.. _cephfs-top:
|
||||
|
||||
==================
|
||||
CephFS Top Utility
|
||||
==================
|
||||
@ -7,7 +9,7 @@ in realtime. `cephfs-top` is a curses based python script which makes use of `st
|
||||
plugin in Ceph Manager to fetch (and display) metrics.
|
||||
|
||||
Manager Plugin
|
||||
--------------
|
||||
==============
|
||||
|
||||
Ceph Filesystem clients periodically forward various metrics to Ceph Metadata Servers (MDS)
|
||||
which in turn get forwarded to Ceph Manager by MDS rank zero. Each active MDS forward its
|
||||
@ -27,9 +29,8 @@ metrics are for a particular MDS rank (e.g., number of subtrees handled by an MD
|
||||
Once enabled, Ceph Filesystem metrics can be fetched via::
|
||||
|
||||
$ ceph fs perf stats
|
||||
{"version": 1, "global_counters": ["cap_hit", "read_latency", "write_latency", "metadata_latency", "dentry_lease"], "counters": [], "client_metadata": {"client.614146": {"IP": "10.1.1.100", "hostname" : "ceph-host1", "root": "/", "mount_point": "/mnt/cephfs", "valid_metrics": ["cap_hit", "read_latency", "write_latency", "metadata_latency", "dentry_lease"]}}, "global_metrics": {"client.614146": [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]}, "metrics": {"delayed_ranks": [], "mds.0": {"client.614146": []}}}
|
||||
|
||||
Details of the JSON command output are as follows:
|
||||
The output format is JSON and contains fields as follows:
|
||||
|
||||
- `version`: Version of stats output
|
||||
- `global_counters`: List of global performance metrics
|
||||
@ -52,7 +53,7 @@ To fetch metrics only for a subset of active MDSs (e.g., MDS rank 1 and 2)::
|
||||
$ ceph fs perf stats --mds_rank=1,2
|
||||
|
||||
`cephfs-top`
|
||||
------------
|
||||
============
|
||||
|
||||
`cephfs-top` utility relies on `stats` plugin to fetch performance metrics and display in
|
||||
`top(1)` like format. `cephfs-top` is available as part of `cephfs-top` package.
|
||||
@ -62,6 +63,9 @@ By default, `cephfs-top` uses `client.fstop` user to connect to a Ceph cluster::
|
||||
$ ceph auth get-or-create client.fstop mon 'allow r' mds 'allow r' osd 'allow r' mgr 'allow r'
|
||||
$ cephfs-top
|
||||
|
||||
Command-Line Options
|
||||
--------------------
|
||||
|
||||
To use a non-default user (other than `client.fstop`) use::
|
||||
|
||||
$ cephfs-top --id <name>
|
||||
@ -76,8 +80,18 @@ By default, `cephfs-top` connects to cluster name `ceph`. To use a non-default c
|
||||
|
||||
Interval should be greater or equal to 0.5 second. Fractional seconds are honoured.
|
||||
|
||||
Sample screenshot running `cephfs-top` with 2 clients:
|
||||
Interactive Commands
|
||||
--------------------
|
||||
|
||||
1. m : Filesystem selection
|
||||
Displays a menu of filesystems for selection.
|
||||
|
||||
2. q : Quit
|
||||
Exit the utility if you are at the home screen (All Filesystem Info),
|
||||
otherwise escape back to the home screen.
|
||||
|
||||
The metrics display can be scrolled using the Arrow Keys, PgUp/PgDn, Home/End and mouse.
|
||||
|
||||
Sample screenshot running `cephfs-top` with 2 filesystems:
|
||||
|
||||
.. image:: cephfs-top.png
|
||||
|
||||
.. note:: As of now, `cephfs-top` does not reliably work with multiple Ceph Filesystems.
|
||||
|
@ -80,18 +80,63 @@ List volumes using::
|
||||
|
||||
$ ceph fs volume ls
|
||||
|
||||
Fetch the information of a CephFS volume using::
|
||||
|
||||
$ ceph fs volume info vol_name [--human_readable]
|
||||
|
||||
The ``--human_readable`` flag shows used and available pool capacities in KB/MB/GB.
|
||||
|
||||
The output format is JSON and contains fields as follows:
|
||||
|
||||
* pools: Attributes of data and metadata pools
|
||||
* avail: The amount of free space available in bytes
|
||||
* used: The amount of storage consumed in bytes
|
||||
* name: Name of the pool
|
||||
* mon_addrs: List of monitor addresses
|
||||
* used_size: Current used size of the CephFS volume in bytes
|
||||
* pending_subvolume_deletions: Number of subvolumes pending deletion
|
||||
|
||||
Sample output of volume info command::
|
||||
|
||||
$ ceph fs volume info vol_name
|
||||
{
|
||||
"mon_addrs": [
|
||||
"192.168.1.7:40977"
|
||||
],
|
||||
"pending_subvolume_deletions": 0,
|
||||
"pools": {
|
||||
"data": [
|
||||
{
|
||||
"avail": 106288709632,
|
||||
"name": "cephfs.vol_name.data",
|
||||
"used": 4096
|
||||
}
|
||||
],
|
||||
"metadata": [
|
||||
{
|
||||
"avail": 106288709632,
|
||||
"name": "cephfs.vol_name.meta",
|
||||
"used": 155648
|
||||
}
|
||||
]
|
||||
},
|
||||
"used_size": 0
|
||||
}
|
||||
|
||||
FS Subvolume groups
|
||||
-------------------
|
||||
|
||||
Create a subvolume group using::
|
||||
|
||||
$ ceph fs subvolumegroup create <vol_name> <group_name> [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>]
|
||||
$ ceph fs subvolumegroup create <vol_name> <group_name> [--size <size_in_bytes>] [--pool_layout <data_pool_name>] [--uid <uid>] [--gid <gid>] [--mode <octal_mode>]
|
||||
|
||||
The command succeeds even if the subvolume group already exists.
|
||||
|
||||
When creating a subvolume group you can specify its data pool layout (see
|
||||
:doc:`/cephfs/file-layouts`), uid, gid, and file mode in octal numerals. By default, the
|
||||
subvolume group is created with an octal file mode '755', uid '0', gid '0' and data pool
|
||||
:doc:`/cephfs/file-layouts`), uid, gid, file mode in octal numerals and
|
||||
size in bytes. The size of the subvolume group is specified by setting
|
||||
a quota on it (see :doc:`/cephfs/quota`). By default, the subvolume group
|
||||
is created with an octal file mode '755', uid '0', gid '0' and the data pool
|
||||
layout of its parent directory.
|
||||
|
||||
|
||||
@ -114,6 +159,47 @@ List subvolume groups using::
|
||||
.. note:: Subvolume group snapshot feature is no longer supported in mainline CephFS (existing group
|
||||
snapshots can still be listed and deleted)
|
||||
|
||||
Fetch the metadata of a subvolume group using::
|
||||
|
||||
$ ceph fs subvolumegroup info <vol_name> <group_name>
|
||||
|
||||
The output format is JSON and contains fields as follows.
|
||||
|
||||
* atime: access time of subvolume group path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* mtime: modification time of subvolume group path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* ctime: change time of subvolume group path in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* uid: uid of subvolume group path
|
||||
* gid: gid of subvolume group path
|
||||
* mode: mode of subvolume group path
|
||||
* mon_addrs: list of monitor addresses
|
||||
* bytes_pcent: quota used in percentage if quota is set, else displays "undefined"
|
||||
* bytes_quota: quota size in bytes if quota is set, else displays "infinite"
|
||||
* bytes_used: current used size of the subvolume group in bytes
|
||||
* created_at: time of creation of subvolume group in the format "YYYY-MM-DD HH:MM:SS"
|
||||
* data_pool: data pool the subvolume group belongs to
|
||||
|
||||
Check the presence of any subvolume group using::
|
||||
|
||||
$ ceph fs subvolumegroup exist <vol_name>
|
||||
|
||||
The strings returned by the 'exist' command:
|
||||
|
||||
* "subvolumegroup exists": if any subvolumegroup is present
|
||||
* "no subvolumegroup exists": if no subvolumegroup is present
|
||||
|
||||
.. note:: It checks for the presence of custom groups and not the default one. To validate the emptiness of the volume, subvolumegroup existence check alone is not sufficient. The subvolume existence also needs to be checked as there might be subvolumes in the default group.
|
||||
|
||||
Resize a subvolume group using::
|
||||
|
||||
$ ceph fs subvolumegroup resize <vol_name> <group_name> <new_size> [--no_shrink]
|
||||
|
||||
The command resizes the subvolume group quota using the size specified by 'new_size'.
|
||||
The '--no_shrink' flag prevents the subvolume group to shrink below the current used
|
||||
size of the subvolume group.
|
||||
|
||||
The subvolume group can be resized to an unlimited size by passing 'inf' or 'infinite'
|
||||
as the new_size.
|
||||
|
||||
Remove a snapshot of a subvolume group using::
|
||||
|
||||
$ ceph fs subvolumegroup snapshot rm <vol_name> <group_name> <snap_name> [--force]
|
||||
@ -195,7 +281,7 @@ Fetch the absolute path of a subvolume using::
|
||||
|
||||
$ ceph fs subvolume getpath <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Fetch the metadata of a subvolume using::
|
||||
Fetch the information of a subvolume using::
|
||||
|
||||
$ ceph fs subvolume info <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
@ -243,6 +329,40 @@ List subvolumes using::
|
||||
|
||||
.. note:: subvolumes that are removed but have snapshots retained, are also listed.
|
||||
|
||||
Check the presence of any subvolume using::
|
||||
|
||||
$ ceph fs subvolume exist <vol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
The strings returned by the 'exist' command:
|
||||
|
||||
* "subvolume exists": if any subvolume of given group_name is present
|
||||
* "no subvolume exists": if no subvolume of given group_name is present
|
||||
|
||||
Set custom metadata on the subvolume as a key-value pair using::
|
||||
|
||||
$ ceph fs subvolume metadata set <vol_name> <subvol_name> <key_name> <value> [--group_name <subvol_group_name>]
|
||||
|
||||
.. note:: If the key_name already exists then the old value will get replaced by the new value.
|
||||
|
||||
.. note:: key_name and value should be a string of ASCII characters (as specified in python's string.printable). key_name is case-insensitive and always stored in lower case.
|
||||
|
||||
.. note:: Custom metadata on a subvolume is not preserved when snapshotting the subvolume, and hence, is also not preserved when cloning the subvolume snapshot.
|
||||
|
||||
Get custom metadata set on the subvolume using the metadata key::
|
||||
|
||||
$ ceph fs subvolume metadata get <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>]
|
||||
|
||||
List custom metadata (key-value pairs) set on the subvolume using::
|
||||
|
||||
$ ceph fs subvolume metadata ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Remove custom metadata set on the subvolume using the metadata key::
|
||||
|
||||
$ ceph fs subvolume metadata rm <vol_name> <subvol_name> <key_name> [--group_name <subvol_group_name>] [--force]
|
||||
|
||||
Using the '--force' flag allows the command to succeed that would otherwise
|
||||
fail if the metadata key did not exist.
|
||||
|
||||
Create a snapshot of a subvolume using::
|
||||
|
||||
$ ceph fs subvolume snapshot create <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
@ -261,16 +381,73 @@ List snapshots of a subvolume using::
|
||||
|
||||
$ ceph fs subvolume snapshot ls <vol_name> <subvol_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Fetch the metadata of a snapshot using::
|
||||
Fetch the information of a snapshot using::
|
||||
|
||||
$ ceph fs subvolume snapshot info <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
|
||||
The output format is json and contains fields as follows.
|
||||
The output format is JSON and contains fields as follows.
|
||||
|
||||
* created_at: time of creation of snapshot in the format "YYYY-MM-DD HH:MM:SS:ffffff"
|
||||
* data_pool: data pool the snapshot belongs to
|
||||
* has_pending_clones: "yes" if snapshot clone is in progress otherwise "no"
|
||||
* size: snapshot size in bytes
|
||||
* pending_clones: list of in progress or pending clones and their target group if exist otherwise this field is not shown
|
||||
* orphan_clones_count: count of orphan clones if snapshot has orphan clones otherwise this field is not shown
|
||||
|
||||
Sample output when snapshot clones are in progress or pending state::
|
||||
|
||||
$ ceph fs subvolume snapshot info cephfs subvol snap
|
||||
{
|
||||
"created_at": "2022-06-14 13:54:58.618769",
|
||||
"data_pool": "cephfs.cephfs.data",
|
||||
"has_pending_clones": "yes",
|
||||
"pending_clones": [
|
||||
{
|
||||
"name": "clone_1",
|
||||
"target_group": "target_subvol_group"
|
||||
},
|
||||
{
|
||||
"name": "clone_2"
|
||||
},
|
||||
{
|
||||
"name": "clone_3",
|
||||
"target_group": "target_subvol_group"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Sample output when no snapshot clone is in progress or pending state::
|
||||
|
||||
$ ceph fs subvolume snapshot info cephfs subvol snap
|
||||
{
|
||||
"created_at": "2022-06-14 13:54:58.618769",
|
||||
"data_pool": "cephfs.cephfs.data",
|
||||
"has_pending_clones": "no"
|
||||
}
|
||||
|
||||
Set custom metadata on the snapshot as a key-value pair using::
|
||||
|
||||
$ ceph fs subvolume snapshot metadata set <vol_name> <subvol_name> <snap_name> <key_name> <value> [--group_name <subvol_group_name>]
|
||||
|
||||
.. note:: If the key_name already exists then the old value will get replaced by the new value.
|
||||
|
||||
.. note:: The key_name and value should be a string of ASCII characters (as specified in python's string.printable). The key_name is case-insensitive and always stored in lower case.
|
||||
|
||||
.. note:: Custom metadata on a snapshots is not preserved when snapshotting the subvolume, and hence, is also not preserved when cloning the subvolume snapshot.
|
||||
|
||||
Get custom metadata set on the snapshot using the metadata key::
|
||||
|
||||
$ ceph fs subvolume snapshot metadata get <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>]
|
||||
|
||||
List custom metadata (key-value pairs) set on the snapshot using::
|
||||
|
||||
$ ceph fs subvolume snapshot metadata ls <vol_name> <subvol_name> <snap_name> [--group_name <subvol_group_name>]
|
||||
|
||||
Remove custom metadata set on the snapshot using the metadata key::
|
||||
|
||||
$ ceph fs subvolume snapshot metadata rm <vol_name> <subvol_name> <snap_name> <key_name> [--group_name <subvol_group_name>] [--force]
|
||||
|
||||
Using the '--force' flag allows the command to succeed that would otherwise
|
||||
fail if the metadata key did not exist.
|
||||
|
||||
Cloning Snapshots
|
||||
-----------------
|
||||
@ -323,8 +500,14 @@ A clone can be in one of the following states:
|
||||
#. `in-progress` : Clone operation is in progress
|
||||
#. `complete` : Clone operation has successfully finished
|
||||
#. `failed` : Clone operation has failed
|
||||
#. `canceled` : Clone operation is cancelled by user
|
||||
|
||||
Sample output from an `in-progress` clone operation::
|
||||
The reason for a clone failure is shown as below:
|
||||
|
||||
#. `errno` : error number
|
||||
#. `error_msg` : failure error string
|
||||
|
||||
Sample output of an `in-progress` clone operation::
|
||||
|
||||
$ ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1
|
||||
$ ceph fs clone status cephfs clone1
|
||||
@ -339,6 +522,28 @@ Sample output from an `in-progress` clone operation::
|
||||
}
|
||||
}
|
||||
|
||||
.. note:: The `failure` section will be shown only if the clone is in failed or cancelled state
|
||||
|
||||
Sample output of a `failed` clone operation::
|
||||
|
||||
$ ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1
|
||||
$ ceph fs clone status cephfs clone1
|
||||
{
|
||||
"status": {
|
||||
"state": "failed",
|
||||
"source": {
|
||||
"volume": "cephfs",
|
||||
"subvolume": "subvol1",
|
||||
"snapshot": "snap1"
|
||||
"size": "104857600"
|
||||
},
|
||||
"failure": {
|
||||
"errno": "122",
|
||||
"errstr": "Disk quota exceeded"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(NOTE: since `subvol1` is in default group, `source` section in `clone status` does not include group name)
|
||||
|
||||
.. note:: Cloned subvolumes are accessible only after the clone operation has successfully completed.
|
||||
|
@ -57,6 +57,7 @@ D0 -> N3 [color=red,penwidth=2.0];
|
||||
// terminal (but not "in")
|
||||
node [shape=polygon,sides=6,color=black,peripheries=1];
|
||||
D1 [label="down:damaged"]
|
||||
S2 -> D1 [color=black,penwidth=2.0];
|
||||
N3 -> D1 [color=black,penwidth=2.0];
|
||||
N4 -> D1 [color=black,penwidth=2.0];
|
||||
N5 -> D1 [color=black,penwidth=2.0];
|
||||
@ -69,5 +70,6 @@ D1 -> D0 [color=red,penwidth=2.0]
|
||||
node [shape=polygon,sides=6,color=purple,peripheries=1];
|
||||
D3 [label="down:stopped"]
|
||||
S3 -> D3 [color=purple,penwidth=2.0];
|
||||
N6 -> D3 [color=purple,penwidth=2.0];
|
||||
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ POSIX semantics for various reasons:
|
||||
writes are not coherently propagated to other clients' caches. That
|
||||
is, if a page is cached on host A, and then updated on host B, host
|
||||
A's page is not coherently invalidated. (Shared writable mmap
|
||||
appears to be quite rare--we have yet to here any complaints about this
|
||||
appears to be quite rare--we have yet to hear any complaints about this
|
||||
behavior, and implementing cache coherency properly is complex.)
|
||||
- CephFS clients present a hidden ``.snap`` directory that is used to
|
||||
access, create, delete, and rename snapshots. Although the virtual
|
||||
@ -62,17 +62,15 @@ as client data may not even be flushed to the server until the file is
|
||||
closed (and more generally writes will be significantly more
|
||||
time-shifted than CephFS, leading to less predictable results).
|
||||
|
||||
However, all of there are very close to POSIX, and most of the time
|
||||
applications don't notice too much. Many other storage systems (e.g.,
|
||||
HDFS) claim to be "POSIX-like" but diverge significantly from the
|
||||
standard by dropping support for things like in-place file
|
||||
modifications, truncate, or directory renames.
|
||||
|
||||
Regardless, these are all similar enough to POSIX, and applications still work
|
||||
most of the time. Many other storage systems (e.g., HDFS) claim to be
|
||||
"POSIX-like" but diverge significantly from the standard by dropping support
|
||||
for things like in-place file modifications, truncate, or directory renames.
|
||||
|
||||
Bottom line
|
||||
-----------
|
||||
|
||||
CephFS relaxes more than local Linux kernel file systems (e.g., writes
|
||||
CephFS relaxes more than local Linux kernel file systems (for example, writes
|
||||
spanning object boundaries may be torn). It relaxes strictly less
|
||||
than NFS when it comes to multiclient consistency, and generally less
|
||||
than NFS when it comes to write atomicity.
|
||||
|
@ -41,6 +41,17 @@ Limitations
|
||||
the directory the client is restricted too (e.g., ``/home/user``)
|
||||
or something nested beneath it.
|
||||
|
||||
Kernel clients need access to the parent of the directory inode on
|
||||
which quotas are configured in order to enforce them. If quota is
|
||||
configured on a directory path (e.g., ``/home/volumes/group``), the
|
||||
kclient needs to have access to the parent (e.g., ``/home/volumes``).
|
||||
|
||||
An example command to create such an user is as below::
|
||||
|
||||
$ ceph auth get-or-create client.guest mds 'allow r path=/home/volumes, allow rw path=/home/volumes/group' mgr 'allow rw' osd 'allow rw tag cephfs metadata=*' mon 'allow r'
|
||||
|
||||
See also: https://tracker.ceph.com/issues/55090
|
||||
|
||||
#. *Snapshot file data which has since been deleted or changed does not count
|
||||
towards the quota.* See also: http://tracker.ceph.com/issues/24284
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
.. _snap-schedule:
|
||||
|
||||
==========================
|
||||
Snapshot Scheduling Module
|
||||
==========================
|
||||
@ -47,10 +49,9 @@ The following time periods are recognized: `h(our), d(ay), w(eek), m(onth),
|
||||
y(ear)` and `n`. The latter is a special modifier where e.g. `10n` means keep
|
||||
the last 10 snapshots regardless of timing,
|
||||
|
||||
All subcommands take optional `fs` and `subvol` arguments to specify paths in
|
||||
All subcommands take optional `fs` argument to specify paths in
|
||||
multi-fs setups and :doc:`/cephfs/fs-volumes` managed setups. If not
|
||||
passed `fs` defaults to the first file system listed in the fs_map, `subvolume`
|
||||
defaults to nothing.
|
||||
passed `fs` defaults to the first file system listed in the fs_map.
|
||||
When using :doc:`/cephfs/fs-volumes` the argument `fs` is equivalent to a
|
||||
`volume`.
|
||||
|
||||
@ -64,16 +65,21 @@ When no subcommand is supplied a synopsis is printed::
|
||||
|
||||
#> ceph fs snap-schedule
|
||||
no valid command found; 8 closest matches:
|
||||
fs snap-schedule status [<path>] [<subvol>] [<fs>] [<format>]
|
||||
fs snap-schedule list <path> [<subvol>] [--recursive] [<fs>] [<format>]
|
||||
fs snap-schedule add <path> <snap_schedule> [<start>] [<fs>] [<subvol>]
|
||||
fs snap-schedule remove <path> [<repeat>] [<start>] [<subvol>] [<fs>]
|
||||
fs snap-schedule retention add <path> <retention_spec_or_period> [<retention_count>] [<fs>] [<subvol>]
|
||||
fs snap-schedule retention remove <path> <retention_spec_or_period> [<retention_count>] [<fs>] [<subvol>]
|
||||
fs snap-schedule activate <path> [<repeat>] [<start>] [<subvol>] [<fs>]
|
||||
fs snap-schedule deactivate <path> [<repeat>] [<start>] [<subvol>] [<fs>]
|
||||
fs snap-schedule status [<path>] [<fs>] [<format>]
|
||||
fs snap-schedule list <path> [--recursive] [<fs>] [<format>]
|
||||
fs snap-schedule add <path> <snap_schedule> [<start>] [<fs>]
|
||||
fs snap-schedule remove <path> [<repeat>] [<start>] [<fs>]
|
||||
fs snap-schedule retention add <path> <retention_spec_or_period> [<retention_count>] [<fs>]
|
||||
fs snap-schedule retention remove <path> <retention_spec_or_period> [<retention_count>] [<fs>]
|
||||
fs snap-schedule activate <path> [<repeat>] [<start>] [<fs>]
|
||||
fs snap-schedule deactivate <path> [<repeat>] [<start>] [<fs>]
|
||||
Error EINVAL: invalid command
|
||||
|
||||
Note:
|
||||
^^^^^
|
||||
A `subvolume` argument is no longer accepted by the commands.
|
||||
|
||||
|
||||
Inspect snapshot schedules
|
||||
--------------------------
|
||||
|
||||
|
@ -109,6 +109,9 @@ extensions = [
|
||||
|
||||
ditaa = shutil.which("ditaa")
|
||||
if ditaa is not None:
|
||||
# in case we don't have binfmt_misc enabled or jar is not registered
|
||||
ditaa_args = ['-jar', ditaa]
|
||||
ditaa = 'java'
|
||||
extensions += ['sphinxcontrib.ditaa']
|
||||
else:
|
||||
extensions += ['plantweb.directive']
|
||||
|
@ -32,8 +32,8 @@ The ceph orch command will be extended to support maintenance.
|
||||
|
||||
.. code-block::
|
||||
|
||||
ceph orch host enter-maintenance <host> [ --check ]
|
||||
ceph orch host exit-maintenance <host>
|
||||
ceph orch host maintenance enter <host> [ --force ]
|
||||
ceph orch host maintenance exit <host>
|
||||
|
||||
.. note:: In addition, the host's status should be updated to reflect whether it
|
||||
is in maintenance or not.
|
||||
|
285
ceph/doc/dev/continuous-integration.rst
Normal file
@ -0,0 +1,285 @@
|
||||
Continuous Integration Architecture
|
||||
===================================
|
||||
|
||||
In Ceph, we rely on multiple CI pipelines in our development. Most of these pipelines
|
||||
are centered around Jenkins. And their configurations are generated using `Jenkins Job Builder`_.
|
||||
|
||||
.. _Jenkins Job Builder: https://docs.openstack.org/infra/jenkins-job-builder/
|
||||
|
||||
Let's take the ``make check`` performed by Jenkins as an example.
|
||||
|
||||
ceph-pull-requests
|
||||
------------------
|
||||
|
||||
``ceph-pull-requests`` is a jenkins job which gets triggered by a GitHub pull
|
||||
request or a trigger phrase like::
|
||||
|
||||
jenkins test make check
|
||||
|
||||
There are multiple parties involved in this jenkins job:
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
rankdir="LR";
|
||||
github [
|
||||
label="<git> git_repo | <webhooks> webhooks | <api> api";
|
||||
shape=record;
|
||||
href="https://github.com/ceph/ceph";
|
||||
];
|
||||
subgraph cluster_lab {
|
||||
label="Sepia Lab";
|
||||
href="https://wiki.sepia.ceph.com/doku.php";
|
||||
shape=circle;
|
||||
apt_mirror [
|
||||
href="http://apt-mirror.front.sepia.ceph.com";
|
||||
];
|
||||
shaman [
|
||||
href="https://shaman.ceph.com";
|
||||
];
|
||||
chacra [
|
||||
peripheries=3;
|
||||
href="https://chacra.ceph.com";
|
||||
];
|
||||
subgraph cluster_jenkins {
|
||||
label="jenkins";
|
||||
href="https://jenkins.ceph.com";
|
||||
jenkins_controller [ label = "controller" ];
|
||||
jenkins_agents [ label = "agents", peripheries=3 ];
|
||||
};
|
||||
};
|
||||
{
|
||||
rank=same;
|
||||
package_repos [ peripheries=3 ];
|
||||
pypi;
|
||||
npm;
|
||||
}
|
||||
github:webhooks -> jenkins_controller [ label = "notify", color = "crimson" ];
|
||||
jenkins_controller -> jenkins_agents [ label = "schedule jobs" ];
|
||||
jenkins_agents -> github:git [ label = "git pull" ];
|
||||
jenkins_agents -> shaman [ label = "query for chacra repo URL" ];
|
||||
jenkins_agents -> chacra [ label = "pull build dependencies" ];
|
||||
jenkins_agents -> package_repos [ label = "pull build dependencies" ];
|
||||
jenkins_agents -> pypi [ label = "pull Python packages" ];
|
||||
jenkins_agents -> npm [ label = "pull JavaScript packages" ];
|
||||
jenkins_agents -> apt_mirror [ label = "pull build dependencies" ];
|
||||
jenkins_agents -> github:api [ label = "update", color = "crimson" ];
|
||||
}
|
||||
|
||||
Where
|
||||
|
||||
Sepia Lab
|
||||
`Sepia Lab`_ is a test lab used by the Ceph project. This lab offers
|
||||
the storage and computing resources required by our CI infra.
|
||||
|
||||
Jenkins agents
|
||||
are a set of machines which perform the CI jobs. In this case, they
|
||||
|
||||
#. pull the git repo from GitHub and
|
||||
#. rebase the pull request against the latest master
|
||||
#. set necessary environment variables
|
||||
#. run ``run-make-check.sh``
|
||||
|
||||
Chacra
|
||||
is a server offering RESTful API allowing the clients to store and
|
||||
retrieve binary packages. It also creates the repo for uploaded
|
||||
packages automatically. Once a certain repo is created on chacra, the
|
||||
configured shaman server is updated as well, then we can query shaman
|
||||
for the corresponding repo address. Chacra not only hosts Ceph packages,
|
||||
it also hosts quite a few other packages like various build dependencies.
|
||||
|
||||
Shaman
|
||||
is a server offering RESTful API allowing the clients to query the
|
||||
information of repos hosted by chacra nodes. Shaman is also known
|
||||
for its `Web UI`_. But please note, shaman does not build the
|
||||
packages, it just offers information on the builds.
|
||||
|
||||
As the following shows, `chacra`_ manages multiple projects whose metadata
|
||||
are stored in a database. These metadata are exposed via Shaman as a web
|
||||
service. `chacractl`_ is a utility to interact with the `chacra`_ service.
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph {
|
||||
libboost [
|
||||
shape=cylinder;
|
||||
];
|
||||
libzbd [
|
||||
shape=cylinder;
|
||||
];
|
||||
other_repos [
|
||||
label="...";
|
||||
shape=cylinder;
|
||||
];
|
||||
postgresql [
|
||||
shape=cylinder;
|
||||
style=filled;
|
||||
]
|
||||
shaman -> postgresql;
|
||||
chacra -> postgresql;
|
||||
chacractl -> chacra;
|
||||
chacra -> libboost;
|
||||
chacra -> libzbd;
|
||||
chacra -> other_repos;
|
||||
}
|
||||
|
||||
.. _Sepia Lab: https://wiki.sepia.ceph.com/doku.php
|
||||
.. _Web UI: https://shaman.ceph.com
|
||||
|
||||
build dependencies
|
||||
------------------
|
||||
|
||||
Just like lots of other software projects, Ceph has both build-time and
|
||||
run-time dependencies. Most of time, we are inclined to use the packages
|
||||
prebuilt by the distro. But there are cases where
|
||||
|
||||
- the necessary dependencies are either missing in the distro, or
|
||||
- their versions are too old, or
|
||||
- they are packaged without some important feature enabled.
|
||||
- we want to ensure that the version of a certain runtime dependency is
|
||||
identical to the one we tested in our lab.
|
||||
|
||||
No matter what the reason is, we either need to build them from source, or
|
||||
to package them as binary packages instead of using the ones shipped by the
|
||||
distro. Quite a few build-time dependencies are included as git submodules,
|
||||
but in order to avoid rebuilding these dependencies repeatedly, we pre-built
|
||||
some of them and uploaded them to our own repos. So, when performing
|
||||
``make check``, the building hosts in our CI just pull them from our internal
|
||||
repos hosting these packages instead of building them.
|
||||
|
||||
So far, following packages are prebuilt for ubuntu focal, and then uploaded to
|
||||
`chacra`_:
|
||||
|
||||
libboost
|
||||
packages `boost`_. The packages' names are changed from ``libboost-*`` to
|
||||
``ceph-libboost-*``, and they are instead installed into ``/opt/ceph``, so
|
||||
they don't interfere with the official ``libboost`` packages shipped by
|
||||
distro. Its build scripts are hosted at https://github.com/ceph/ceph-boost.
|
||||
See https://github.com/ceph/ceph-boost/commit/2a8ae02932b2a1fd6a68072da8ca0df2b99b805c
|
||||
for an example of how to bump the version number. The commands used to
|
||||
build 1.79 on a vanilla Ubuntu Focal OS are below.
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo apt install debhelper dctrl-tools chrpath libbz2-dev libicu-dev bison \
|
||||
flex docbook-to-man help2man xsltproc doxygen dh-python python3-all-dev graphviz
|
||||
wget http://download.ceph.com/qa/boost_1_79_0.tar.bz2
|
||||
git clone https://github.com/ceph/ceph-boost
|
||||
tar xjf boost_1_79_0.tar.bz2
|
||||
cp -ra ceph-boost/debian boost_1_79_0/
|
||||
pushd boost_1_79_0
|
||||
export DEB_BUILD_OPTIONS='parallel=6 nodoc'
|
||||
dpkg-buildpackage -us -uc -b
|
||||
popd
|
||||
BOOST_SHA=$(git ls-remote https://github.com/ceph/ceph-boost main | awk '{ print $1 }')
|
||||
ls *.deb | chacractl binary create \
|
||||
libboost/master/$BOOST_SHA/ubuntu/focal/amd64/flavors/default
|
||||
|
||||
libzbd
|
||||
packages `libzbd`_ . The upstream libzbd includes debian packaging already.
|
||||
|
||||
libpmem
|
||||
packages `pmdk`_ . Please note, ``ndctl`` is one of the build dependencies of
|
||||
pmdk, for an updated debian packaging, please see
|
||||
https://github.com/ceph/ceph-ndctl .
|
||||
|
||||
.. note::
|
||||
|
||||
please ensure that the package version and the release number of the
|
||||
packaging are properly updated when updating/upgrading the packaging,
|
||||
otherwise it would be difficult to tell which version of the package
|
||||
is installed. We check the package version before trying to upgrade
|
||||
it in ``install-deps.sh``.
|
||||
|
||||
.. _boost: https://www.boost.org
|
||||
.. _libzbd: https://github.com/westerndigitalcorporation/libzbd
|
||||
.. _pmdk: https://github.com/pmem/pmdk
|
||||
|
||||
But in addition to these libraries, ``ceph-mgr-dashboard``'s frontend uses lots of
|
||||
JavaScript packages. Quite a few of them are not packaged by distros. Not to
|
||||
mention the trouble of testing different combination of versions of these
|
||||
packages. So we decided to include these JavaScript packages in our dist tarball
|
||||
using ``make-dist``.
|
||||
|
||||
Also, because our downstream might not want to use the prepackaged binaries when
|
||||
redistributing the precompiled Ceph packages, we also need to include these
|
||||
libraries in our dist tarball. They are
|
||||
|
||||
- boost
|
||||
- liburing
|
||||
- pmdk
|
||||
|
||||
``make-dist`` is a script used by our CI pipeline to create dist tarball so the
|
||||
tarball can be used to build the Ceph packages in a clean room environment. When
|
||||
we need to upgrade these third party libraries, we should
|
||||
|
||||
- update the CMake script
|
||||
- rebuild the prebuilt packages and
|
||||
- update this script to reflect the change.
|
||||
|
||||
Uploading Dependencies
|
||||
----------------------
|
||||
|
||||
To ensure that prebuilt packages are available by the jenkins agents, we need to
|
||||
upload them to either ``apt-mirror.front.sepia.ceph.com`` or `chacra`_. To upload
|
||||
packages to the former would require the help of our lab administrator, so if we
|
||||
want to maintain the package repositories on regular basis, a better choice would be
|
||||
to manage them using `chacractl`_. `chacra`_ represents packages repositories using
|
||||
a resource hierarchy, like::
|
||||
|
||||
<project>/<branch>/<ref>/<distro>/<distro-version>/<arch>
|
||||
|
||||
In which:
|
||||
|
||||
project
|
||||
in general, it is used for denoting a set of related packages. For instance,
|
||||
``libboost``.
|
||||
|
||||
branch
|
||||
branch of project. This mirrors the concept of a Git repo.
|
||||
|
||||
ref
|
||||
a unique id of a given version of a set packages. This id is used to reference
|
||||
the set packages under the ``<project>/<branch>``. It is a good practice to
|
||||
version the packaging recipes, like the ``debian`` directory for building DEB
|
||||
packages and the ``spec`` for building RPM packages, and use the SHA1 of the
|
||||
packaging recipe for the ``ref``. But you could also use a random string for
|
||||
``ref``, like the tag name of the built source tree.
|
||||
|
||||
distro
|
||||
the distro name for which the packages are built. Currently, following distros are
|
||||
supported:
|
||||
|
||||
- centos
|
||||
- debian
|
||||
- fedora
|
||||
- rhel
|
||||
- ubuntu
|
||||
|
||||
distro-version
|
||||
the version of the distro. For instance, if a package is built on ubuntu focal,
|
||||
the ``distro-version`` should be ``20.04``.
|
||||
|
||||
arch
|
||||
the architecture of the packages. It could be:
|
||||
|
||||
- arm64
|
||||
- amd64
|
||||
- noarch
|
||||
|
||||
So, for example, we can upload the prebuilt boost packages to chacra like
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ls *.deb | chacractl binary create \
|
||||
libboost/master/099c0fd56b4a54457e288a2eff8fffdc0d416f7a/ubuntu/focal/amd64/flavors/default
|
||||
|
||||
.. _chacra: https://github.com/ceph/chacra
|
||||
.. _chacractl: https://github.com/ceph/chacractl
|
||||
|
||||
Update ``install-deps.sh``
|
||||
--------------------------
|
||||
|
||||
We also need to update ``install-deps.sh`` to point the built script to the new
|
||||
repo. Please refer to the `script <https://github.com/ceph/ceph/blob/master/install-deps.sh>`_,
|
||||
for more details.
|
@ -1,151 +0,0 @@
|
||||
===============
|
||||
Deduplication
|
||||
===============
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
Applying data deduplication on an existing software stack is not easy
|
||||
due to additional metadata management and original data processing
|
||||
procedure.
|
||||
|
||||
In a typical deduplication system, the input source as a data
|
||||
object is split into multiple chunks by a chunking algorithm.
|
||||
The deduplication system then compares each chunk with
|
||||
the existing data chunks, stored in the storage previously.
|
||||
To this end, a fingerprint index that stores the hash value
|
||||
of each chunk is employed by the deduplication system
|
||||
in order to easily find the existing chunks by comparing
|
||||
hash value rather than searching all contents that reside in
|
||||
the underlying storage.
|
||||
|
||||
There are many challenges in order to implement deduplication on top
|
||||
of Ceph. Among them, two issues are essential for deduplication.
|
||||
First is managing scalability of fingerprint index; Second is
|
||||
it is complex to ensure compatibility between newly applied
|
||||
deduplication metadata and existing metadata.
|
||||
|
||||
Key Idea
|
||||
========
|
||||
1. Content hashing (Double hashing): Each client can find an object data
|
||||
for an object ID using CRUSH. With CRUSH, a client knows object's location
|
||||
in Base tier.
|
||||
By hashing object's content at Base tier, a new OID (chunk ID) is generated.
|
||||
Chunk tier stores in the new OID that has a partial content of original object.
|
||||
|
||||
Client 1 -> OID=1 -> HASH(1's content)=K -> OID=K ->
|
||||
CRUSH(K) -> chunk's location
|
||||
|
||||
|
||||
2. Self-contained object: The external metadata design
|
||||
makes difficult for integration with storage feature support
|
||||
since existing storage features cannot recognize the
|
||||
additional external data structures. If we can design data
|
||||
deduplication system without any external component, the
|
||||
original storage features can be reused.
|
||||
|
||||
More details in https://ieeexplore.ieee.org/document/8416369
|
||||
|
||||
Design
|
||||
======
|
||||
|
||||
.. ditaa::
|
||||
|
||||
+-------------+
|
||||
| Ceph Client |
|
||||
+------+------+
|
||||
^
|
||||
Tiering is |
|
||||
Transparent | Metadata
|
||||
to Ceph | +---------------+
|
||||
Client Ops | | |
|
||||
| +----->+ Base Pool |
|
||||
| | | |
|
||||
| | +-----+---+-----+
|
||||
| | | ^
|
||||
v v | | Dedup metadata in Base Pool
|
||||
+------+----+--+ | | (Dedup metadata contains chunk offsets
|
||||
| Objecter | | | and fingerprints)
|
||||
+-----------+--+ | |
|
||||
^ | | Data in Chunk Pool
|
||||
| v |
|
||||
| +-----+---+-----+
|
||||
| | |
|
||||
+----->| Chunk Pool |
|
||||
| |
|
||||
+---------------+
|
||||
Data
|
||||
|
||||
|
||||
Pool-based object management:
|
||||
We define two pools.
|
||||
The metadata pool stores metadata objects and the chunk pool stores
|
||||
chunk objects. Since these two pools are divided based on
|
||||
the purpose and usage, each pool can be managed more
|
||||
efficiently according to its different characteristics. Base
|
||||
pool and the chunk pool can separately select a redundancy
|
||||
scheme between replication and erasure coding depending on
|
||||
its usage and each pool can be placed in a different storage
|
||||
location depending on the required performance.
|
||||
|
||||
Regarding how to use, please see ``osd_internals/manifest.rst``
|
||||
|
||||
Usage Patterns
|
||||
==============
|
||||
|
||||
The different Ceph interface layers present potentially different oportunities
|
||||
and costs for deduplication and tiering in general.
|
||||
|
||||
RadosGW
|
||||
-------
|
||||
|
||||
S3 big data workloads seem like a good opportunity for deduplication. These
|
||||
objects tend to be write once, read mostly objects which don't see partial
|
||||
overwrites. As such, it makes sense to fingerprint and dedup up front.
|
||||
|
||||
Unlike cephfs and rbd, radosgw has a system for storing
|
||||
explicit metadata in the head object of a logical s3 object for
|
||||
locating the remaining pieces. As such, radosgw could use the
|
||||
refcounting machinery (``osd_internals/refcount.rst``) directly without
|
||||
needing direct support from rados for manifests.
|
||||
|
||||
RBD/Cephfs
|
||||
----------
|
||||
|
||||
RBD and CephFS both use deterministic naming schemes to partition
|
||||
block devices/file data over rados objects. As such, the redirection
|
||||
metadata would need to be included as part of rados, presumably
|
||||
transparently.
|
||||
|
||||
Moreover, unlike radosgw, rbd/cephfs rados objects can see overwrites.
|
||||
For those objects, we don't really want to perform dedup, and we don't
|
||||
want to pay a write latency penalty in the hot path to do so anyway.
|
||||
As such, performing tiering and dedup on cold objects in the background
|
||||
is likely to be preferred.
|
||||
|
||||
One important wrinkle, however, is that both rbd and cephfs workloads
|
||||
often feature usage of snapshots. This means that the rados manifest
|
||||
support needs robust support for snapshots.
|
||||
|
||||
RADOS Machinery
|
||||
===============
|
||||
|
||||
For more information on rados redirect/chunk/dedup support, see ``osd_internals/manifest.rst``.
|
||||
For more information on rados refcount support, see ``osd_internals/refcount.rst``.
|
||||
|
||||
Status and Future Work
|
||||
======================
|
||||
|
||||
At the moment, there exists some preliminary support for manifest
|
||||
objects within the OSD as well as a dedup tool.
|
||||
|
||||
RadosGW data warehouse workloads probably represent the largest
|
||||
opportunity for this feature, so the first priority is probably to add
|
||||
direct support for fingerprinting and redirects into the refcount pool
|
||||
to radosgw.
|
||||
|
||||
Aside from radosgw, completing work on manifest object support in the
|
||||
OSD particularly as it relates to snapshots would be the next step for
|
||||
rbd and cephfs workloads.
|
||||
|
@ -2,11 +2,12 @@
|
||||
CephFS delayed deletion
|
||||
=========================
|
||||
|
||||
When you delete a file, the data is not immediately removed. Each
|
||||
object in the file needs to be removed independently, and sending
|
||||
``size_of_file / stripe_size * replication_count`` messages would slow
|
||||
the client down too much, and use a too much of the clients
|
||||
bandwidth. Additionally, snapshots may mean some objects should not be
|
||||
deleted.
|
||||
The deletion of a file does not immediately remove its data. Each of the file's
|
||||
underlying objects must be removed independently. If these objects were removed
|
||||
immediately, the client would have to send ``size_of_file / stripe_size *
|
||||
replication_count`` messages. This would consume significant bandwith and would
|
||||
slow the client unacceptably. If snapshots exist, their existence can prevent
|
||||
the deletion of objects associated with them.
|
||||
|
||||
Instead, the file is marked as deleted on the MDS, and deleted lazily.
|
||||
In these cases, such files are (1) marked as deleted on the MDS and (2) deleted
|
||||
lazily.
|
||||
|
@ -1,3 +1,5 @@
|
||||
.. _basic workflow dev guide:
|
||||
|
||||
Basic Workflow
|
||||
==============
|
||||
|
||||
@ -8,7 +10,7 @@ The following chart illustrates the basic Ceph development workflow:
|
||||
Upstream Code Your Local Environment
|
||||
|
||||
/----------\ git clone /-------------\
|
||||
| Ceph | -------------------------> | ceph/master |
|
||||
| Ceph | -------------------------> | ceph/main |
|
||||
\----------/ \-------------/
|
||||
^ |
|
||||
| | git branch fix_1
|
||||
@ -29,50 +31,79 @@ The following chart illustrates the basic Ceph development workflow:
|
||||
\--------------/
|
||||
|
||||
This page assumes that you are a new contributor with an idea for a bugfix or
|
||||
enhancement, but do not know how to proceed. Watch the `Getting Started with
|
||||
Ceph Development <https://www.youtube.com/watch?v=t5UIehZ1oLs>`_ video for a
|
||||
practical summary of this workflow.
|
||||
an enhancement, but you do not know how to proceed. Watch the `Getting Started
|
||||
with Ceph Development <https://www.youtube.com/watch?v=t5UIehZ1oLs>`_ video for
|
||||
a practical summary of this workflow.
|
||||
|
||||
Updating the tracker
|
||||
--------------------
|
||||
|
||||
Before you start, you should know the :ref:`issue-tracker` (Redmine) number
|
||||
of the bug you intend to fix. If there is no tracker issue, now is the time to
|
||||
create one for code changes. Straightforward documentation cleanup does
|
||||
not necessarily require a corresponding tracker issue. However, an issue
|
||||
(ticket) should be created if one is adding new documentation chapters or
|
||||
files, or for other substantial changes.
|
||||
Find the :ref:`issue-tracker` (Redmine) number of the bug you intend to fix. If
|
||||
no tracker issue exists, create one. There is only one case in which you do not
|
||||
have to create a Redmine tracker issue: the case of minor documentation changes.
|
||||
|
||||
The tracker ticket serves to explain the issue (bug) to your fellow Ceph
|
||||
developers and keep them informed as you make progress toward resolution. To
|
||||
this end, please provide a descriptive title and write appropriate information
|
||||
and details into the description. When composing the ticket's title, consider "If I
|
||||
want to search for this ticket two years from now, what keywords will I search
|
||||
for?"
|
||||
Simple documentation cleanup does not require a corresponding tracker issue.
|
||||
Major documenatation changes do require a tracker issue. Major documentation
|
||||
changes include adding new documentation chapters or files, and making
|
||||
substantial changes to the structure or content of the documentation.
|
||||
|
||||
If you have sufficient tracker permissions, assign the bug to yourself by
|
||||
setting the ``Assignee`` field. If your tracker permissions have not been
|
||||
elevated, simply add a comment with a short message like "I am working on this
|
||||
issue".
|
||||
A (Redmine) tracker ticket explains the issue (bug) to other Ceph developers to
|
||||
keep them informed as the bug nears resolution. Provide a useful, clear title
|
||||
and include detailed information in the description. When composing the title
|
||||
of the ticket, ask yourself "If I need to search for this ticket two years from
|
||||
now, which keywords am I likely to search for?" Then include those keywords in
|
||||
the title.
|
||||
|
||||
Forking and Cloning the Ceph Repository
|
||||
---------------------------------------
|
||||
If your tracker permissions are elevated, assign the bug to yourself by setting
|
||||
the ``Assignee`` field. If your tracker permissions have not been elevated,
|
||||
just add a comment with a short message that says "I am working on this issue".
|
||||
|
||||
This section, and the ones that follow, correspond to nodes in the above chart.
|
||||
Ceph Workflow Overview
|
||||
----------------------
|
||||
|
||||
The upstream code is found at https://github.com/ceph/ceph.git, which is known
|
||||
as the "upstream repo", or simply "upstream". As the chart shows, we will make
|
||||
a local copy of this repository, modify it, test our modifications, then submit
|
||||
the modifications for review and merging.
|
||||
Three repositories are involved in the Ceph workflow. They are:
|
||||
|
||||
A local copy of the upstream code is made by
|
||||
1. The upstream repository (ceph/ceph)
|
||||
2. Your fork of the upstream repository (your_github_id/ceph)
|
||||
3. Your local working copy of the repository (on your workstation)
|
||||
|
||||
1. Forking the upstream repo on GitHub, and
|
||||
2. Cloning your fork to make a local working copy
|
||||
The procedure for making changes to the Ceph repository is as follows:
|
||||
|
||||
#. Configure your local environment
|
||||
|
||||
Forking The Ceph Repository
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
#. :ref:`Create a fork<forking>` of the "upstream Ceph"
|
||||
repository.
|
||||
|
||||
#. :ref:`Clone the fork<cloning>` to your local filesystem.
|
||||
|
||||
#. Fix the bug
|
||||
|
||||
#. :ref:`Synchronize local main with upstream main<synchronizing>`.
|
||||
|
||||
#. :ref:`Create a bugfix branch<bugfix_branch>` in your local working copy.
|
||||
|
||||
#. :ref:`Make alterations to the local working copy of the repository in your
|
||||
local filesystem<fixing_bug_locally>`.
|
||||
|
||||
#. :ref:`Push the changes in your local working copy to your fork<push_changes>`.
|
||||
|
||||
#. Create a Pull Request to push the change upstream
|
||||
|
||||
#. Create a Pull Request that asks for your changes to be added into the
|
||||
"upstream Ceph" repository.
|
||||
|
||||
Preparing Your Local Working Copy of the Ceph Repository
|
||||
--------------------------------------------------------
|
||||
|
||||
The procedures in this section, "Preparing Your Local Working Copy of the Ceph
|
||||
Repository", must be followed only when you are first setting up your local
|
||||
environment. If this is your first time working with the Ceph project, then
|
||||
these commands are necessary and are the first commands that you should run.
|
||||
|
||||
.. _forking:
|
||||
|
||||
Creating a Fork of the Ceph Repository
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
See the `GitHub documentation
|
||||
<https://help.github.com/articles/fork-a-repo/#platform-linux>`_ for
|
||||
@ -80,148 +111,234 @@ detailed instructions on forking. In short, if your GitHub username is
|
||||
"mygithubaccount", your fork of the upstream repo will appear at
|
||||
``https://github.com/mygithubaccount/ceph``.
|
||||
|
||||
.. _cloning:
|
||||
|
||||
Cloning Your Fork
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
Once you have created your fork, clone it by running:
|
||||
After you have created your fork, clone it by running the following command:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git clone https://github.com/mygithubaccount/ceph
|
||||
|
||||
You must fork the Ceph repository before you clone it. Without forking, you cannot
|
||||
open a `GitHub pull request
|
||||
You must fork the Ceph repository before you clone it. If you fail to fork,
|
||||
you cannot open a `GitHub pull request
|
||||
<https://docs.github.com/en/free-pro-team@latest/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request>`_.
|
||||
|
||||
For more information on using GitHub, refer to `GitHub Help
|
||||
<https://help.github.com/>`_.
|
||||
|
||||
Configuring Your Local Environment
|
||||
----------------------------------
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In the local environment created in the previous step, you now have a copy of
|
||||
the ``master`` branch in ``remotes/origin/master``. This fork
|
||||
(https://github.com/mygithubaccount/ceph.git) is frozen in time and the
|
||||
upstream repo (https://github.com/ceph/ceph.git, typically abbreviated to
|
||||
``ceph/ceph.git``) is updated frequently by other contributors. This means that
|
||||
you must sync your fork periodically. Failure to synchronize your fork may
|
||||
result in your commits and pull requests failing to merge because they refer to
|
||||
file contents that have changed since you last synchronized your fork.
|
||||
The commands in this section configure your local git environment so that it
|
||||
generates "Signed-off-by:" tags. These commands also set up your local
|
||||
environment so that it can stay synchronized with the upstream repository.
|
||||
|
||||
Configure your local git environment with your name and email address.
|
||||
These commands are necessary only during the initial setup of your local
|
||||
working copy. Another way to say that is "These commands are necessary
|
||||
only the first time that you are working with the Ceph repository. They are,
|
||||
however, unavoidable, and if you fail to run them then you will not be able
|
||||
to work on the Ceph repository.".
|
||||
|
||||
.. prompt:: bash $
|
||||
1. Configure your local git environment with your name and email address.
|
||||
|
||||
git config user.name "FIRST_NAME LAST_NAME"
|
||||
git config user.email "MY_NAME@example.com"
|
||||
.. note::
|
||||
These commands will work only from within the ``ceph/`` directory
|
||||
that was created when you cloned your fork.
|
||||
|
||||
Add the upstream repo as a "remote" and fetch it:
|
||||
.. prompt:: bash $
|
||||
|
||||
.. prompt:: bash $
|
||||
git config user.name "FIRST_NAME LAST_NAME"
|
||||
git config user.email "MY_NAME@example.com"
|
||||
|
||||
git remote add ceph https://github.com/ceph/ceph.git
|
||||
git fetch ceph
|
||||
2. Add the upstream repo as a "remote" and fetch it:
|
||||
|
||||
Fetching is a process that downloads all objects (commits, branches) that have
|
||||
been added since the last sync. These commands download all the branches from
|
||||
``ceph/ceph.git`` to the local git repo as ``remotes/ceph/$BRANCH_NAME`` and
|
||||
can be referenced as ``ceph/$BRANCH_NAME`` in local git commands.
|
||||
.. prompt:: bash $
|
||||
|
||||
git remote add ceph https://github.com/ceph/ceph.git
|
||||
git fetch ceph
|
||||
|
||||
Resetting Local Master to Upstream Master
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
These commands fetch all the branches and commits from ``ceph/ceph.git`` to
|
||||
the local git repo as ``remotes/ceph/$BRANCH_NAME`` and can be referenced as
|
||||
``ceph/$BRANCH_NAME`` in local git commands.
|
||||
|
||||
Your local ``master`` branch can be reset to the upstream Ceph ``master``
|
||||
branch by running the following commands:
|
||||
Fixing the Bug
|
||||
--------------
|
||||
|
||||
.. _synchronizing:
|
||||
|
||||
Synchronizing Local Main with Upstream Main
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In your local working copy, there is a copy of the ``main`` branch in
|
||||
``remotes/origin/main``. This is called "local main". This copy of the
|
||||
main branch (https://github.com/your_github_id/ceph.git) is "frozen in time"
|
||||
at the moment that you cloned it, but the upstream repo
|
||||
(https://github.com/ceph/ceph.git, typically abbreviated to ``ceph/ceph.git``)
|
||||
that it was forked from is not frozen in time: the upstream repo is still being
|
||||
updated by other contributors.
|
||||
|
||||
Because upstream main is continually receiving updates from other
|
||||
contributors, your fork will drift farther and farther from the state of the
|
||||
upstream repo when you cloned it.
|
||||
|
||||
Keep your fork's ``main`` branch synchronized with upstream main to reduce drift
|
||||
between your fork's main branch and the upstream main branch.
|
||||
|
||||
Here are the commands for keeping your fork synchronized with the
|
||||
upstream repository:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git fetch ceph
|
||||
git checkout master
|
||||
git reset --hard ceph/master
|
||||
git push -u origin master
|
||||
git checkout main
|
||||
git reset --hard ceph/main
|
||||
git push -u origin main
|
||||
|
||||
This procedure should be followed often, in order to keep your local ``master``
|
||||
in sync with upstream ``master``.
|
||||
Follow this procedure often to keep your local ``main`` in sync with upstream
|
||||
``main``.
|
||||
|
||||
If the command ``git status`` returns a line that reads "Untracked files", see
|
||||
:ref:`the procedure on updating submodules <update-submodules>`.
|
||||
|
||||
.. _bugfix_branch:
|
||||
|
||||
Creating a Bugfix branch
|
||||
------------------------
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Create a branch for your bugfix:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git checkout master
|
||||
git checkout main
|
||||
git checkout -b fix_1
|
||||
git push -u origin fix_1
|
||||
|
||||
This creates a local branch called ``fix_1`` in our GitHub fork. At this point,
|
||||
the ``fix_1`` branch is identical to the ``master`` branch, but not for long!
|
||||
You are now ready to modify the code. Be careful to always run `git checkout
|
||||
master` first, otherwise you may find commits from an unrelated branch mixed
|
||||
with your new work.
|
||||
The first command (git checkout main) makes sure that the bugfix branch
|
||||
"fix_1" is created from the most recent state of the main branch of the
|
||||
upstream repository.
|
||||
|
||||
Fixing the bug locally
|
||||
----------------------
|
||||
The second command (git checkout -b fix_1) creates a "bugfix branch" called
|
||||
"fix_1" in your local working copy of the repository. The changes that you make
|
||||
in order to fix the bug will be commited to this branch.
|
||||
|
||||
In the `Ceph issue tracker <https://tracker.ceph.com>`_, change the status of
|
||||
the tracker issue to "In progress". This communicates to other Ceph
|
||||
contributors that you have begun working on a fix, which helps to avoid
|
||||
duplication of effort. If you don't have permission to change that field, your
|
||||
previous comment that you are working on the issue is sufficient.
|
||||
The third command (git push -u origin fix_1) pushes the bugfix branch from
|
||||
your local working repository to your fork of the upstream repository.
|
||||
|
||||
Your fix may be very simple and require only minimal testing. But that's not
|
||||
likely. It is more likely that the process of fixing your bug will be iterative
|
||||
and will involve trial and error, as well as skill. An explanation of how to
|
||||
fix bugs is beyond the scope of this document. Instead, we focus on the
|
||||
mechanics of the process in the context of the Ceph project.
|
||||
.. _fixing_bug_locally:
|
||||
|
||||
For a detailed discussion of the tools available for validating bugfixes,
|
||||
see the chapters on testing.
|
||||
Fixing the bug in the local working copy
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
For now, let us assume that you have finished work on the bugfix, that you have
|
||||
tested the bugfix , and that you believe that it works. Commit the changes to
|
||||
your local branch using the ``--signoff`` option (here represented as the `s`
|
||||
portion of the `-as` flag):
|
||||
#. **Updating the tracker**
|
||||
|
||||
.. prompt:: bash $
|
||||
In the `Ceph issue tracker <https://tracker.ceph.com>`_, change the status
|
||||
of the tracker issue to "In progress". This communicates to other Ceph
|
||||
contributors that you have begun working on a fix, which helps to avoid
|
||||
duplication of effort. If you don't have permission to change that field,
|
||||
just comment that you are working on the issue.
|
||||
|
||||
git commit -as
|
||||
#. **Fixing the bug itself**
|
||||
|
||||
Push the changes to your fork:
|
||||
This guide cannot tell you how to fix the bug that you have chosen to fix.
|
||||
This guide assumes that you know what required improvement, and that you
|
||||
know what to do to provide that improvement.
|
||||
|
||||
.. prompt:: bash $
|
||||
It might be that your fix is simple and requires only minimal testing. But
|
||||
that's unlikely. It is more likely that the process of fixing your bug will
|
||||
be iterative and will involve trial, error, skill, and patience.
|
||||
|
||||
For a detailed discussion of the tools available for validating bugfixes,
|
||||
see the chapters on testing.
|
||||
|
||||
Pushing the Fix to Your Fork
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
You have finished work on the bugfix. You have tested the bugfix, and you
|
||||
believe that it works.
|
||||
|
||||
#. Commit the changes to your local working copy.
|
||||
|
||||
Commit the changes to the `fix_1` branch of your local working copy by using
|
||||
the ``--signoff`` option (here represented as the `s` portion of the `-as`
|
||||
flag):
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git commit -as
|
||||
|
||||
.. _push_changes:
|
||||
|
||||
#. Push the changes to your fork:
|
||||
|
||||
Push the changes from the `fix_1` branch of your local working copy to the
|
||||
`fix_1` branch of your fork of the upstream repository:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git push origin fix_1
|
||||
|
||||
.. note::
|
||||
|
||||
In the command ``git push origin fix_1``, ``origin`` is the name of your
|
||||
fork of the upstream Ceph repository, and can be thought of as a nickname
|
||||
for ``git@github.com:username/ceph.git``, where ``username`` is your
|
||||
GitHub username.
|
||||
|
||||
It is possible that ``origin`` is not the name of your fork. Discover the
|
||||
name of your fork by running ``git remote -v``, as shown here:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ git remote -v
|
||||
ceph https://github.com/ceph/ceph.git (fetch)
|
||||
ceph https://github.com/ceph/ceph.git (push)
|
||||
origin git@github.com:username/ceph.git (fetch)
|
||||
origin git@github.com:username/ceph.git (push)
|
||||
|
||||
The line::
|
||||
|
||||
origin git@github.com:username/ceph.git (fetch)
|
||||
|
||||
and the line::
|
||||
|
||||
origin git@github.com:username/ceph.git (push)
|
||||
|
||||
provide the information that "origin" is the name of your fork of the
|
||||
Ceph repository.
|
||||
|
||||
git push origin fix_1
|
||||
|
||||
Opening a GitHub pull request
|
||||
-----------------------------
|
||||
|
||||
The next step is to open a GitHub pull request (PR). This makes your bugfix
|
||||
visible to the community of Ceph contributors. They will review it and may
|
||||
perform additional testing and / or request changes.
|
||||
After you have pushed the bugfix to your fork, open a GitHub pull request
|
||||
(PR). This makes your bugfix visible to the community of Ceph contributors.
|
||||
They will review it. They may perform additional testing on your bugfix, and
|
||||
they might request changes to the bugfix.
|
||||
|
||||
This is the point where you "go public" with your modifications. Be prepared
|
||||
to receive suggestions and constructive criticism in the form of comments
|
||||
within the PR. Don't worry! The Ceph project is a friendly place!
|
||||
Be prepared to receive suggestions and constructive criticism in the form of
|
||||
comments within the PR.
|
||||
|
||||
If you are uncertain how to create and manage pull requests, you may read
|
||||
`this GitHub pull request tutorial`_.
|
||||
If you don't know how to create and manage pull requests, read `this GitHub
|
||||
pull request tutorial`_.
|
||||
|
||||
.. _`this GitHub pull request tutorial`:
|
||||
https://help.github.com/articles/using-pull-requests/
|
||||
|
||||
For ideas on what constitutes a "good" pull request, see
|
||||
To learn what constitutes a "good" pull request, see
|
||||
the `Git Commit Good Practice`_ article at the `OpenStack Project Wiki`_.
|
||||
|
||||
.. _`Git Commit Good Practice`: https://wiki.openstack.org/wiki/GitCommitMessages
|
||||
.. _`OpenStack Project Wiki`: https://wiki.openstack.org/wiki/Main_Page
|
||||
|
||||
and our own `Submitting Patches <https://github.com/ceph/ceph/blob/master/SubmittingPatches.rst>`_ document.
|
||||
See also our own `Submitting Patches
|
||||
<https://github.com/ceph/ceph/blob/main/SubmittingPatches.rst>`_ document.
|
||||
|
||||
Once your pull request (PR) is opened, update the :ref:`issue-tracker` by
|
||||
adding a comment directing other contributors to your PR. The comment can be
|
||||
as simple as::
|
||||
After your pull request (PR) has been opened, update the :ref:`issue-tracker`
|
||||
by adding a comment directing other contributors to your PR. The comment can be
|
||||
as simple as this::
|
||||
|
||||
*PR*: https://github.com/ceph/ceph/pull/$NUMBER_OF_YOUR_PULL_REQUEST
|
||||
|
||||
@ -230,24 +347,24 @@ Understanding Automated PR validation
|
||||
|
||||
When you create or update your PR, the Ceph project's `Continuous Integration
|
||||
(CI) <https://en.wikipedia.org/wiki/Continuous_integration>`_ infrastructure
|
||||
automatically tests it. At the time of this writing (September 2020), the
|
||||
automated CI testing included five tests:
|
||||
automatically tests it. At the time of this writing (May 2022), the automated
|
||||
CI testing included many tests. These five are among them:
|
||||
|
||||
#. a test to check that the commits are properly signed (see :ref:`submitting-patches`):
|
||||
#. a test to check that the documentation builds
|
||||
#. a test to check that the submodules are unmodified
|
||||
#. a test to check that the API is in order
|
||||
#. a :ref:`make check<make-check>` test
|
||||
|
||||
Additional tests may be performed depending on which files your PR modifies.
|
||||
#. a :ref:`make check<make-check>` test
|
||||
|
||||
The :ref:`make check<make-check>` test builds the PR and runs it through a battery of
|
||||
tests. These tests run on servers operated by the Ceph Continuous
|
||||
Integration (CI) team. When the tests complete, the result will be shown
|
||||
on GitHub in the pull request itself.
|
||||
Additional tests may be run depending on which files your PR modifies.
|
||||
|
||||
You should test your modifications before you open a PR.
|
||||
Refer to the chapters on testing for details.
|
||||
The :ref:`make check<make-check>` test builds the PR and runs it through a
|
||||
battery of tests. These tests run on servers that are operated by the Ceph
|
||||
Continuous Integration (CI) team. When the tests have completed their run, the
|
||||
result is shown on GitHub in the pull request itself.
|
||||
|
||||
Test your modifications before you open a PR. Refer to the chapters
|
||||
on testing for details.
|
||||
|
||||
Notes on PR make check test
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -255,26 +372,28 @@ Notes on PR make check test
|
||||
The GitHub :ref:`make check<make-check>` test is driven by a Jenkins instance.
|
||||
|
||||
Jenkins merges your PR branch into the latest version of the base branch before
|
||||
starting tests. This means that you don't have to rebase the PR to pick up any fixes.
|
||||
it starts any tests. This means that you don't have to rebase the PR in order
|
||||
to pick up any fixes.
|
||||
|
||||
You can trigger PR tests at any time by adding a comment to the PR - the
|
||||
comment should contain the string "test this please". Since a human subscribed
|
||||
to the PR might interpret that as a request for him or her to test the PR, we
|
||||
recommend that you address Jenkins directly. For example, write "jenkins retest
|
||||
this please". For efficiency a single re-test can also be requested with
|
||||
e.g. "jenkins test signed". For reference, a list of these requests is
|
||||
automatically added to the end of each new PR's description.
|
||||
comment should contain the string "test this please". Since a human who is
|
||||
subscribed to the PR might interpret that as a request for him or her to test
|
||||
the PR, you must address Jenkins directly. For example, write "jenkins retest
|
||||
this please". If you need to run only one of the tests, you can request it with
|
||||
a command like "jenkins test signed". A list of these requests is automatically
|
||||
added to the end of each new PR's description, so check there to find the
|
||||
single test you need.
|
||||
|
||||
If there is a build failure and you aren't sure what caused it, check the
|
||||
:ref:`make check<make-check>` log. To access it, click on the "details" (next
|
||||
to the :ref:`make check<make-check>` test in the PR) link to enter the Jenkins web
|
||||
GUI. Then click on "Console Output" (on the left).
|
||||
:ref:`make check<make-check>` log. To access the make check log, click the
|
||||
"details" (next to the :ref:`make check<make-check>` test in the PR) link to
|
||||
enter the Jenkins web GUI. Then click "Console Output" (on the left).
|
||||
|
||||
Jenkins is configured to search logs for strings known to have been associated
|
||||
with :ref:`make check<make-check>` failures in the past. However, there is no
|
||||
guarantee that these known strings are associated with any given
|
||||
:ref:`make check<make-check>` failure. You'll have to read through the log to determine the
|
||||
cause of your specific failure.
|
||||
Jenkins is configured to search logs for strings that are known to have been
|
||||
associated with :ref:`make check<make-check>` failures in the past. However,
|
||||
there is no guarantee that these known strings are associated with any given
|
||||
:ref:`make check<make-check>` failure. You'll have to read through the log to
|
||||
determine the cause of your specific failure.
|
||||
|
||||
Integration tests AKA ceph-qa-suite
|
||||
-----------------------------------
|
||||
@ -284,7 +403,7 @@ see how it behaves on real clusters running on physical or virtual
|
||||
hardware. Tests designed for this purpose live in the `ceph/qa
|
||||
sub-directory`_ and are run via the `teuthology framework`_.
|
||||
|
||||
.. _`ceph/qa sub-directory`: https://github.com/ceph/ceph/tree/master/qa/
|
||||
.. _`ceph/qa sub-directory`: https://github.com/ceph/ceph/tree/main/qa/
|
||||
.. _`teuthology repository`: https://github.com/ceph/teuthology
|
||||
.. _`teuthology framework`: https://github.com/ceph/teuthology
|
||||
|
||||
@ -329,7 +448,7 @@ will need to force push your branch with:
|
||||
git push --force origin fix_1
|
||||
|
||||
Why do we take these extra steps instead of simply adding additional commits
|
||||
the the PR? It is best practice for a PR to consist of a single commit; this
|
||||
the PR? It is best practice for a PR to consist of a single commit; this
|
||||
makes for clean history, eases peer review of your changes, and facilitates
|
||||
merges. In rare circumstances it also makes it easier to cleanly revert
|
||||
changes.
|
||||
@ -388,7 +507,7 @@ Another method of generating merge commits involves using Patrick Donnelly's
|
||||
**/ceph/src/script/ptl-tool.py**. Merge commits that have been generated by
|
||||
the **ptl-tool** have the following form::
|
||||
|
||||
Merge PR #36257 into master
|
||||
Merge PR #36257 into main
|
||||
* refs/pull/36257/head:
|
||||
client: move client_lock to _unmount()
|
||||
client: add timer_lock support
|
||||
|
@ -139,8 +139,46 @@ using `Internet Relay Chat`_.
|
||||
|
||||
.. _`Internet Relay Chat`: http://www.irchelp.org/
|
||||
|
||||
See ``https://ceph.com/irc/`` for how to set up your IRC
|
||||
client and a list of channels.
|
||||
The Ceph community gathers in the #ceph channel of the Open and Free Technology
|
||||
Community (OFTC) IRC network.
|
||||
|
||||
Created in 1988, Internet Relay Chat (IRC) is a relay-based, real-time chat
|
||||
protocol. It is mainly designed for group (many-to-many) communication in
|
||||
discussion forums called channels, but also allows one-to-one communication via
|
||||
private message. On IRC you can talk to many other members using Ceph, on
|
||||
topics ranging from idle chit-chat to support questions. Though a channel might
|
||||
have many people in it at any one time, they might not always be at their
|
||||
keyboard; so if no-one responds, just wait around and someone will hopefully
|
||||
answer soon enough.
|
||||
|
||||
Registration
|
||||
^^^^^^^^^^^^
|
||||
|
||||
If you intend to use the IRC service on a continued basis, you are advised to
|
||||
register an account. Registering gives you a unique IRC identity and allows you
|
||||
to access channels where unregistered users have been locked out for technical
|
||||
reasons.
|
||||
|
||||
See ``the official OFTC (Open and Free Technology Community) documentation's
|
||||
registration instructions
|
||||
<https://www.oftc.net/Services/#register-your-account>`` to learn how to
|
||||
register your IRC account.
|
||||
|
||||
Channels
|
||||
~~~~~~~~
|
||||
|
||||
To connect to the OFTC IRC network, download an IRC client and configure it to
|
||||
connect to ``irc.oftc.net``. Then join one or more of the channels. Discussions
|
||||
inside #ceph are logged and archives are available online.
|
||||
|
||||
Here are the real-time discussion channels for the Ceph community:
|
||||
|
||||
- #ceph
|
||||
- #ceph-devel
|
||||
- #cephfs
|
||||
- #ceph-dashboard
|
||||
- #ceph-orchestrators
|
||||
- #sepia
|
||||
|
||||
.. _submitting-patches:
|
||||
|
||||
@ -152,7 +190,7 @@ file `CONTRIBUTING.rst`_ in the top-level directory of the source-code
|
||||
tree. There may be some overlap between this guide and that file.
|
||||
|
||||
.. _`CONTRIBUTING.rst`:
|
||||
https://github.com/ceph/ceph/blob/master/CONTRIBUTING.rst
|
||||
https://github.com/ceph/ceph/blob/main/CONTRIBUTING.rst
|
||||
|
||||
All newcomers are encouraged to read that file carefully.
|
||||
|
||||
@ -252,7 +290,7 @@ See :ref:`kubernetes-dev`
|
||||
Backporting
|
||||
-----------
|
||||
|
||||
All bugfixes should be merged to the ``master`` branch before being
|
||||
All bugfixes should be merged to the ``main`` branch before being
|
||||
backported. To flag a bugfix for backporting, make sure it has a
|
||||
`tracker issue`_ associated with it and set the ``Backport`` field to a
|
||||
comma-separated list of previous releases (e.g. "hammer,jewel") that you think
|
||||
@ -263,6 +301,36 @@ The rest (including the actual backporting) will be taken care of by the
|
||||
.. _`tracker issue`: http://tracker.ceph.com/
|
||||
.. _`Stable Releases and Backports`: http://tracker.ceph.com/projects/ceph-releases/wiki
|
||||
|
||||
Dependabot
|
||||
----------
|
||||
|
||||
Dependabot is a GitHub bot that scans the dependencies in the repositories for
|
||||
security vulnerabilities (CVEs). If a fix is available for a discovered CVE,
|
||||
Dependabot creates a pull request to update the dependency.
|
||||
|
||||
Dependabot also indicates the compatibility score of the upgrade. This score is
|
||||
based on the number of CI failures that occur in other GitHub repositories
|
||||
where the fix was applied.
|
||||
|
||||
With some configuration, Dependabot can perform non-security updates (for
|
||||
example, it can upgrade to the latest minor version or patch version).
|
||||
|
||||
Dependabot supports `several languages and package managers
|
||||
<https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/about-dependabot-version-updates#supported-repositories-and-ecosystems>`_.
|
||||
As of July 2022, the Ceph project receives alerts only from pip (based on the
|
||||
`requirements.txt` files) and npm (`package*.json`). It is possible to extend
|
||||
these alerts to git submodules, Golang, and Java. As of July 2022, there is no
|
||||
support for C++ package managers such as vcpkg, conan, C++20 modules.
|
||||
|
||||
Many of the dependencies discovered by Dependabot will best be updated
|
||||
elsewhere than the Ceph Github repository (distribution packages, for example,
|
||||
will be a better place to update some of the dependencies). Nonetheless, the
|
||||
list of new and existing vulnerabilities generated by Dependabot will be
|
||||
useful.
|
||||
|
||||
`Here is an example of a Dependabot pull request.
|
||||
<https://github.com/ceph/ceph/pull/46998>`_
|
||||
|
||||
Guidance for use of cluster log
|
||||
-------------------------------
|
||||
|
||||
|
@ -18,10 +18,10 @@ What ?
|
||||
Where ?
|
||||
^^^^^^^
|
||||
|
||||
Features are merged to the *master* branch. Bug fixes should be merged to the
|
||||
Features are merged to the *main* branch. Bug fixes should be merged to the
|
||||
corresponding named branch (e.g. *nautilus* for 14.0.z, *pacific* for 16.0.z,
|
||||
etc.). However, this is not mandatory - bug fixes and documentation
|
||||
enhancements can be merged to the *master* branch as well, since the *master*
|
||||
enhancements can be merged to the *main* branch as well, since the *main*
|
||||
branch is itself occasionally merged to the named branch during the development
|
||||
releases phase. In either case, if a bug fix is important it can also be
|
||||
flagged for backport to one or more previous stable releases.
|
||||
@ -32,16 +32,16 @@ When ?
|
||||
After each stable release, candidate branches for previous releases enter
|
||||
phase 2 (see below). For example: the *jewel* named branch was created when
|
||||
the *infernalis* release candidates entered phase 2. From this point on,
|
||||
*master* was no longer associated with *infernalis*. After he named branch of
|
||||
the next stable release is created, *master* will be occasionally merged into
|
||||
*main* was no longer associated with *infernalis*. After he named branch of
|
||||
the next stable release is created, *main* will be occasionally merged into
|
||||
it.
|
||||
|
||||
Branch merges
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
* The latest stable release branch is merged periodically into master.
|
||||
* The master branch is merged periodically into the branch of the stable release.
|
||||
* The master is merged into the stable release branch
|
||||
* The latest stable release branch is merged periodically into main.
|
||||
* The main branch is merged periodically into the branch of the stable release.
|
||||
* The main is merged into the stable release branch
|
||||
immediately after each development (x.0.z) release.
|
||||
|
||||
Stable release candidates (i.e. x.1.z) phase 1
|
||||
@ -56,12 +56,12 @@ Where ?
|
||||
^^^^^^^
|
||||
|
||||
The stable release branch (e.g. *jewel* for 10.0.z, *luminous*
|
||||
for 12.0.z, etc.) or *master*. Bug fixes should be merged to the named
|
||||
for 12.0.z, etc.) or *main*. Bug fixes should be merged to the named
|
||||
branch corresponding to the stable release candidate (e.g. *jewel* for
|
||||
10.1.z) or to *master*. During this phase, all commits to *master* will be
|
||||
10.1.z) or to *main*. During this phase, all commits to *main* will be
|
||||
merged to the named branch, and vice versa. In other words, it makes
|
||||
no difference whether a commit is merged to the named branch or to
|
||||
*master* - it will make it into the next release candidate either way.
|
||||
*main* - it will make it into the next release candidate either way.
|
||||
|
||||
When ?
|
||||
^^^^^^
|
||||
@ -72,9 +72,9 @@ x.1.0 tag is set in the release branch.
|
||||
Branch merges
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
* The stable release branch is merged periodically into *master*.
|
||||
* The *master* branch is merged periodically into the stable release branch.
|
||||
* The *master* branch is merged into the stable release branch
|
||||
* The stable release branch is merged periodically into *main*.
|
||||
* The *main* branch is merged periodically into the stable release branch.
|
||||
* The *main* branch is merged into the stable release branch
|
||||
immediately after each x.1.z release candidate.
|
||||
|
||||
Stable release candidates (i.e. x.1.z) phase 2
|
||||
@ -90,7 +90,7 @@ Where ?
|
||||
|
||||
The stable release branch (e.g. *mimic* for 13.0.z, *octopus* for 15.0.z
|
||||
,etc.). During this phase, all commits to the named branch will be merged into
|
||||
*master*. Cherry-picking to the named branch during release candidate phase 2
|
||||
*main*. Cherry-picking to the named branch during release candidate phase 2
|
||||
is performed manually since the official backporting process begins only when
|
||||
the release is pronounced "stable".
|
||||
|
||||
@ -102,7 +102,7 @@ After Sage Weil announces that it is time for phase 2 to happen.
|
||||
Branch merges
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
* The stable release branch is occasionally merged into master.
|
||||
* The stable release branch is occasionally merged into main.
|
||||
|
||||
Stable releases (i.e. x.2.z)
|
||||
----------------------------
|
||||
@ -112,8 +112,8 @@ What ?
|
||||
|
||||
* Bug fixes
|
||||
* Features are sometime accepted
|
||||
* Commits should be cherry-picked from *master* when possible
|
||||
* Commits that are not cherry-picked from *master* must pertain to a bug unique to
|
||||
* Commits should be cherry-picked from *main* when possible
|
||||
* Commits that are not cherry-picked from *main* must pertain to a bug unique to
|
||||
the stable release
|
||||
* See also the `backport HOWTO`_ document
|
||||
|
||||
|
@ -166,5 +166,12 @@ Unit test caveats
|
||||
explicitly linked against something else. This enables tools such as
|
||||
**valgrind** to be used in the tests.
|
||||
|
||||
#. Google Test unit testing library hides the client output from the shell.
|
||||
In order to debug the client after setting the desired debug level
|
||||
(e.g ``ceph config set client debug_rbd 20``), the debug log file can
|
||||
be found at ``build/out/client.admin.<pid>.log``.
|
||||
This can also be handy when examining teuthology failed unit test
|
||||
jobs, the job's debug level can be set at the relevant yaml file.
|
||||
|
||||
.. _make check:
|
||||
.. _teuthology framework: https://github.com/ceph/teuthology
|
||||
|
@ -86,6 +86,10 @@ separate file, like this::
|
||||
|
||||
.. graphviz:: myfile.dot
|
||||
|
||||
See the `Dot User's Manual <https://www.graphviz.org/pdf/dotguide.pdf>`_ by
|
||||
Emden R. Gansner, Eleftherios Koutsofios, and Stephen North for examples of
|
||||
digraphs. This is especially useful if this is your first time encountering
|
||||
GraphViz.
|
||||
|
||||
Ditaa
|
||||
-----
|
||||
|
@ -4,11 +4,7 @@
|
||||
|
||||
.. graphviz::
|
||||
|
||||
/*
|
||||
* Rough outline of object store module dependencies
|
||||
*/
|
||||
|
||||
digraph object_store {
|
||||
digraph object_store {
|
||||
size="7,7";
|
||||
node [color=lightblue2, style=filled, fontname="Serif"];
|
||||
|
||||
@ -68,3 +64,4 @@
|
||||
|
||||
|
||||
.. todo:: write more here
|
||||
|
||||
|
@ -6,47 +6,52 @@ Glossary
|
||||
--------
|
||||
|
||||
*chunk*
|
||||
when the encoding function is called, it returns chunks of the same
|
||||
size. Data chunks which can be concatenated to reconstruct the original
|
||||
object and coding chunks which can be used to rebuild a lost chunk.
|
||||
When the encoding function is called, it returns chunks of the same
|
||||
size as each other. There are two kinds of chunks: (1) *data
|
||||
chunks*, which can be concatenated to reconstruct the original
|
||||
object, and (2) *coding chunks*, which can be used to rebuild a
|
||||
lost chunk.
|
||||
|
||||
*chunk rank*
|
||||
the index of a chunk when returned by the encoding function. The
|
||||
rank of the first chunk is 0, the rank of the second chunk is 1
|
||||
etc.
|
||||
|
||||
*stripe*
|
||||
when an object is too large to be encoded with a single call,
|
||||
each set of chunks created by a call to the encoding function is
|
||||
called a stripe.
|
||||
|
||||
*shard|strip*
|
||||
an ordered sequence of chunks of the same rank from the same
|
||||
object. For a given placement group, each OSD contains shards of
|
||||
the same rank. When dealing with objects that are encoded with a
|
||||
single operation, *chunk* is sometime used instead of *shard*
|
||||
because the shard is made of a single chunk. The *chunks* in a
|
||||
*shard* are ordered according to the rank of the stripe they belong
|
||||
to.
|
||||
The index of a chunk, as determined by the encoding function. The
|
||||
rank of the first chunk is 0, the rank of the second chunk is 1,
|
||||
and so on.
|
||||
|
||||
*K*
|
||||
the number of data *chunks*, i.e. the number of *chunks* in which the
|
||||
original object is divided. For instance if *K* = 2 a 10KB object
|
||||
will be divided into *K* objects of 5KB each.
|
||||
The number of data chunks into which an object is divided. For
|
||||
example, if *K* = 2, then a 10KB object is divided into two objects
|
||||
of 5KB each.
|
||||
|
||||
*M*
|
||||
the number of coding *chunks*, i.e. the number of additional *chunks*
|
||||
computed by the encoding functions. If there are 2 coding *chunks*,
|
||||
it means 2 OSDs can be out without losing data.
|
||||
The number of coding chunks computed by the encoding function. *M*
|
||||
is equal to the number of OSDs that can be missing from the cluster
|
||||
without the cluster suffering data loss. For example, if there are
|
||||
two coding chunks, then two OSDs can be missing without data loss.
|
||||
|
||||
*N*
|
||||
the number of data *chunks* plus the number of coding *chunks*,
|
||||
i.e. *K+M*.
|
||||
The number of data chunks plus the number of coding chunks: that
|
||||
is, *K* + *M*.
|
||||
|
||||
*rate*
|
||||
the proportion of the *chunks* that contains useful information, i.e. *K/N*.
|
||||
For instance, for *K* = 9 and *M* = 3 (i.e. *K+M* = *N* = 12) the rate is
|
||||
*K* = 9 / *N* = 12 = 0.75, i.e. 75% of the chunks contain useful information.
|
||||
The proportion of the total chunks containing useful information:
|
||||
that is, *K* divided by *N*. For example, suppose that *K* = 9 and
|
||||
*M* = 3. This would mean that *N* = 12 (because *K* + *M* = 9 + 3).
|
||||
Therefore, the *rate* (*K* / *N*) would be 9 / 12 = 0.75. In other
|
||||
words, 75% of the chunks would contain useful information.
|
||||
|
||||
*shard* (also called *strip*)
|
||||
An ordered sequence of chunks of the same rank from the same object. For a
|
||||
given placement group, each OSD contains shards of the same rank. In the
|
||||
special case in which an object is encoded with only one call to the
|
||||
encoding function, the term *chunk* may be used instead of *shard* because
|
||||
the shard is made of a single chunk. The chunks in a shard are ordered
|
||||
according to the rank of the stripe (see *stripe* below) they belong to.
|
||||
|
||||
|
||||
*stripe*
|
||||
If an object is so large that encoding it requires more than one
|
||||
call to the encoding function, each of these calls creates a set of
|
||||
chunks called a *stripe*.
|
||||
|
||||
The definitions are illustrated as follows (PG stands for placement group):
|
||||
::
|
||||
@ -71,8 +76,8 @@ The definitions are illustrated as follows (PG stands for placement group):
|
||||
| ... | | ... |
|
||||
+-------------------------+ +-------------------------+
|
||||
|
||||
Table of content
|
||||
----------------
|
||||
Table of contents
|
||||
-----------------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
@ -2,172 +2,224 @@
|
||||
Ceph Release Process
|
||||
======================
|
||||
|
||||
1. Build environment
|
||||
====================
|
||||
Prerequisites
|
||||
=============
|
||||
|
||||
There are multiple build environments, debian based packages are built via pbuilder for multiple distributions. The build hosts are listed in the ``deb_hosts`` file, and the list of distributions are in ``deb_dist``. All distributions are build on each of the build hosts. Currently there is 1 64 bit and 1 32 bit build host.
|
||||
Signing Machine
|
||||
---------------
|
||||
The signing machine is a virtual machine in the `Sepia lab
|
||||
<https://wiki.sepia.ceph.com/doku.php?id=start>`_. SSH access to the signing
|
||||
machine is limited to the usual Infrastructure Admins along with a few other
|
||||
component leads (e.g., nfs-ganesha, ceph-iscsi).
|
||||
|
||||
The RPM based packages are built natively, so one distribution per build host. The list of hosts is found in ``rpm_hosts``.
|
||||
The ``ubuntu`` user on the machine has some `build scripts <https://github.com/ceph/ceph-build/tree/main/scripts>`_ that help with pulling, pushing, and signing packages.
|
||||
|
||||
Prior to building, it's necessary to update the pbuilder seed tarballs::
|
||||
The GPG signing key permanently lives on a `Nitrokey Pro <https://shop.nitrokey.com/shop/product/nkpr2-nitrokey-pro-2-3>`_ and is passed through to the VM via RHV. This helps to ensure that the key cannot be exported or leave the datacenter in any way.
|
||||
|
||||
./update_all_pbuilders.sh
|
||||
New Major Releases
|
||||
------------------
|
||||
For each new major (alphabetical) release, you must create one ``ceph-release`` RPM for each RPM repo (e.g., one for el8 and one for el9). `chacra <https://github.com/ceph/chacra>`_ is a python service we use to store DEB and RPM repos. The chacra repos are configured to include this ceph-release RPM, but it must be built separately. You must make sure that chacra is properly configured to include this RPM for each particular release.
|
||||
|
||||
2. Setup keyring for signing packages
|
||||
=====================================
|
||||
1. Update chacra so it is aware of the new Ceph release. See `this PR <https://github.com/ceph/chacra/pull/219>`_ for an example.
|
||||
2. Redeploy chacra (e.g., ``ansible-playbook chacra.ceph.com.yml``)
|
||||
3. Run https://jenkins.ceph.com/view/all/job/ceph-release-rpm/
|
||||
|
||||
::
|
||||
Summarized build process
|
||||
========================
|
||||
|
||||
export GNUPGHOME=<path to keyring dir>
|
||||
1. QE finishes testing and finds a stopping point. That commit is pushed to the ``$release-release`` branch in ceph.git (e.g., ``quincy-release``). This allows work to continue in the working ``$release`` branch without having to freeze it during the release process.
|
||||
2. The Ceph Council approves and notifies the "Build Lead".
|
||||
3. The "Build Lead" starts the `Jenkins multijob <https://jenkins.ceph.com/view/all/job/ceph>`_, which triggers all builds.
|
||||
4. Packages are pushed to chacra.ceph.com.
|
||||
5. Packages are pulled from chacra.ceph.com to the Signer VM.
|
||||
6. Packages are signed.
|
||||
7. Packages are pushed to download.ceph.com.
|
||||
8. Release containers are built and pushed to quay.io.
|
||||
|
||||
# verify it's accessible
|
||||
gpg --list-keys
|
||||
Hotfix Release Process Deviation
|
||||
--------------------------------
|
||||
|
||||
The release key should be present::
|
||||
A hotfix release has a couple differences.
|
||||
|
||||
pub 4096R/17ED316D 2012-05-20
|
||||
uid Ceph Release Key <sage@newdream.net>
|
||||
1. Check out the most recent tag. For example, if we're releasing a hotfix on top of 17.2.3, ``git checkout -f -B quincy-release origin/v17.2.3``
|
||||
2. ``git cherry-pick -x`` the necessary hotfix commits
|
||||
3. ``git push -f origin quincy-release``
|
||||
4. Notify the "Build Lead" to start the build.
|
||||
5. The "Build Lead" should set ``RELEASE_TYPE=HOTFIX`` instead of ``STABLE``.
|
||||
|
||||
Security Release Process Deviation
|
||||
----------------------------------
|
||||
|
||||
3. Set up build area
|
||||
====================
|
||||
A security/CVE release is similar to a hotfix release with two differences:
|
||||
|
||||
Clone the ceph and ceph-build source trees::
|
||||
1. The fix should be pushed to the `ceph-private <https://github.com/ceph/ceph-private>`_ repo instead of ceph.git (requires GitHub Admin Role).
|
||||
2. The tags (e.g., v17.2.4) must be manually pushed to ceph.git by the "Build Lead."
|
||||
|
||||
git clone http://github.com/ceph/ceph.git
|
||||
git clone http://github.com/ceph/ceph-build.git
|
||||
1. Check out the most recent tag. For example, if we're releasing a security fix on top of 17.2.3, ``git checkout -f -B quincy-release origin/v17.2.3``
|
||||
2. ``git cherry-pick -x`` the necessary security fix commits
|
||||
3. ``git remote add security git@github.com:ceph/ceph-private.git``
|
||||
4. ``git push -f security quincy-release``
|
||||
5. Notify the "Build Lead" to start the build.
|
||||
6. The "Build Lead" should set ``RELEASE_TYPE=SECURITY`` instead of ``STABLE``.
|
||||
7. Finally, the `ceph-tag <https://github.com/ceph/ceph-build/blob/main/ansible/roles/ceph-release/tasks/push.yml>`_ steps need to be manually run by the "Build Lead" as close to the Announcement time as possible::
|
||||
|
||||
In the ceph source directory, checkout next branch (for point releases use the {codename} branch)::
|
||||
# Example using quincy pretending 17.2.4 is the security release version
|
||||
# Add the ceph-releases repo (also requires GitHub Admin Role). The `ceph-setup <https://jenkins.ceph.com/job/ceph-setup>`_ job will have already created and pushed the tag to ceph-releases.git.
|
||||
git remote add releases git@github.com:ceph/ceph-releases.git
|
||||
git fetch --all
|
||||
# Check out the version commit
|
||||
git checkout -f -B quincy-release releases/quincy-release
|
||||
git push -f origin quincy-release
|
||||
git push origin v17.2.4
|
||||
# Now create a Pull Request of quincy-release targeting quincy to merge the version commit and security fixes back into the quincy branch
|
||||
|
||||
git checkout next
|
||||
1. Preparing the release branch
|
||||
===============================
|
||||
|
||||
Checkout the submodules::
|
||||
Once QE has determined a stopping point in the working (e.g., ``quincy``) branch, that commit should be pushed to the corresponding ``quincy-release`` branch.
|
||||
|
||||
git submodule update --force --init --recursive
|
||||
Notify the "Build Lead" that the release branch is ready.
|
||||
|
||||
4. Update Build version numbers
|
||||
================================
|
||||
|
||||
Substitute the ceph release number where indicated below by the string ``0.xx``.
|
||||
|
||||
Edit configure.ac and update the version number. Example diff::
|
||||
|
||||
-AC_INIT([ceph], [0.54], [ceph-devel@vger.kernel.org])
|
||||
+AC_INIT([ceph], [0.55], [ceph-devel@vger.kernel.org])
|
||||
|
||||
Update the version number in the debian change log::
|
||||
|
||||
DEBEMAIL user@host dch -v 0.xx-1
|
||||
|
||||
Commit the changes::
|
||||
|
||||
git commit -a
|
||||
|
||||
Tag the release::
|
||||
|
||||
../ceph-build/tag-release v0.xx
|
||||
|
||||
|
||||
5. Create Makefiles
|
||||
===================
|
||||
|
||||
The actual configure options used to build packages are in the
|
||||
``ceph.spec.in`` and ``debian/rules`` files. At this point we just
|
||||
need to create a Makefile.::
|
||||
|
||||
./do_autogen.sh
|
||||
|
||||
|
||||
6. Run the release scripts
|
||||
==========================
|
||||
|
||||
This creates tarballs and copies them, with other needed files to
|
||||
the build hosts listed in deb_hosts and rpm_hosts, runs a local build
|
||||
script, then rsyncs the results back to the specified release directory.::
|
||||
|
||||
../ceph-build/do_release.sh /tmp/release
|
||||
|
||||
|
||||
7. Create RPM Repo
|
||||
==================
|
||||
|
||||
Copy the rpms to the destination repo::
|
||||
|
||||
mkdir /tmp/rpm-repo
|
||||
../ceph-build/push_to_rpm_repo.sh /tmp/release /tmp/rpm-repo 0.xx
|
||||
|
||||
Next add any additional rpms to the repo that are needed such as leveldb.
|
||||
See RPM Backports section
|
||||
|
||||
Finally, sign the rpms and build the repo indexes::
|
||||
|
||||
../ceph-build/sign_and_index_rpm_repo.sh /tmp/release /tmp/rpm-repo 0.xx
|
||||
|
||||
|
||||
8. Create Debian repo
|
||||
2. Starting the build
|
||||
=====================
|
||||
|
||||
The key-id used below is the id of the ceph release key from step 2::
|
||||
We'll use a stable/regular 15.2.17 release of Octopus as an example throughout this document.
|
||||
|
||||
mkdir /tmp/debian-repo
|
||||
../ceph-build/gen_reprepro_conf.sh /tmp/debian-repo key-id
|
||||
../ceph-build/push_to_deb_repo.sh /tmp/release /tmp/debian-repo 0.xx main
|
||||
1. Browse to https://jenkins.ceph.com/view/all/job/ceph/build?delay=0sec
|
||||
2. Log in with GitHub OAuth
|
||||
3. Set the parameters as necessary::
|
||||
|
||||
BRANCH=octopus
|
||||
TAG=checked
|
||||
VERSION=15.2.17
|
||||
RELEASE_TYPE=STABLE
|
||||
ARCHS=x86_64 arm64
|
||||
|
||||
Next add any addition debian packages that are needed such as leveldb.
|
||||
See the Debian Backports section below.
|
||||
4. Use https://docs.ceph.com/en/latest/start/os-recommendations/?highlight=debian#platforms to determine the ``DISTROS`` parameter. For example,
|
||||
|
||||
Debian packages are signed when added to the repo, so no further action is
|
||||
needed.
|
||||
+-------------------+-------------------------------------------+
|
||||
| Release | Distro Codemap |
|
||||
+===================+===========================================+
|
||||
| octopus (15.X.X) | ``focal bionic centos7 centos8 buster`` |
|
||||
+-------------------+-------------------------------------------+
|
||||
| pacific (16.X.X) | ``focal bionic centos8 buster bullseye`` |
|
||||
+-------------------+-------------------------------------------+
|
||||
| quincy (17.X.X) | ``focal centos8 centos9 bullseye`` |
|
||||
+-------------------+-------------------------------------------+
|
||||
|
||||
5. Click ``Build``.
|
||||
|
||||
9. Push repos to ceph.org
|
||||
==========================
|
||||
3. Release Notes
|
||||
================
|
||||
|
||||
For a development release::
|
||||
Packages take hours to build. Use those hours to create the Release Notes and Announcements:
|
||||
|
||||
rcp ceph-0.xx.tar.bz2 ceph-0.xx.tar.gz \
|
||||
ceph_site@ceph.com:ceph.com/downloads/.
|
||||
rsync -av /tmp/rpm-repo/0.xx/ ceph_site@ceph.com:ceph.com/rpm-testing
|
||||
rsync -av /tmp/debian-repo/ ceph_site@ceph.com:ceph.com/debian-testing
|
||||
1. ceph.git Release Notes (e.g., `v15.2.17's ceph.git (docs.ceph.com) PR <https://github.com/ceph/ceph/pull/47198>`_)
|
||||
2. ceph.io Release Notes (e.g., `v15.2.17's ceph.io.git (www.ceph.io) PR <https://github.com/ceph/ceph.io/pull/427>`_)
|
||||
3. E-mail announcement
|
||||
|
||||
For a stable release, replace {CODENAME} with the release codename (e.g., ``argonaut`` or ``bobtail``)::
|
||||
See `the Ceph Tracker wiki page that explains how to write the release notes <https://tracker.ceph.com/projects/ceph-releases/wiki/HOWTO_write_the_release_notes>`_.
|
||||
|
||||
rcp ceph-0.xx.tar.bz2 \
|
||||
ceph_site@ceph.com:ceph.com/downloads/ceph-0.xx.tar.bz2
|
||||
rcp ceph-0.xx.tar.gz \
|
||||
ceph_site@ceph.com:ceph.com/downloads/ceph-0.xx.tar.gz
|
||||
rsync -av /tmp/rpm-repo/0.xx/ ceph_site@ceph.com:ceph.com/rpm-{CODENAME}
|
||||
rsync -auv /tmp/debian-repo/ ceph_site@ceph.com:ceph.com/debian-{CODENAME}
|
||||
4. Signing and Publishing the Build
|
||||
===================================
|
||||
|
||||
10. Update Git
|
||||
==============
|
||||
#. Obtain the sha1 of the version commit from the `build job <https://jenkins.ceph.com/view/all/job/ceph>`_ or the ``sha1`` file created by the `ceph-setup <https://jenkins.ceph.com/job/ceph-setup/>`_ job.
|
||||
|
||||
Point release
|
||||
-------------
|
||||
#. Download the packages from chacra.ceph.com to the signing virtual machine. These packages get downloaded to ``/opt/repos`` where the `Sepia Lab Long Running (Ceph) Cluster <https://wiki.sepia.ceph.com/doku.php?id=services:longrunningcluster>`_ is mounted.
|
||||
|
||||
For point releases just push the version number update to the
|
||||
branch and the new tag::
|
||||
.. prompt:: bash $
|
||||
|
||||
git push origin {codename}
|
||||
git push origin v0.xx
|
||||
ssh ubuntu@signer.front.sepia.ceph.com
|
||||
sync-pull ceph [pacific|quincy|etc] <sha1>
|
||||
|
||||
Example::
|
||||
|
||||
$ sync-pull ceph octopus 8a82819d84cf884bd39c17e3236e0632ac146dc4
|
||||
sync for: ceph octopus
|
||||
********************************************
|
||||
Found the most packages (332) in ubuntu/bionic.
|
||||
No JSON object could be decoded
|
||||
No JSON object could be decoded
|
||||
ubuntu@chacra.ceph.com:/opt/repos/ceph/octopus/8a82819d84cf884bd39c17e3236e0632ac146dc4/ubuntu/bionic/flavors/default/* /opt/repos/ceph/octopus-15.2.17/debian/jessie/
|
||||
--------------------------------------------
|
||||
receiving incremental file list
|
||||
db/
|
||||
db/checksums.db
|
||||
180.22K 100% 2.23MB/s 0:00:00 (xfr#1, to-chk=463/467)
|
||||
db/contents.cache.db
|
||||
507.90K 100% 1.95MB/s 0:00:00 (xfr#2, to-chk=462/467)
|
||||
db/packages.db
|
||||
|
||||
etc...
|
||||
|
||||
Development and Stable releases
|
||||
-------------------------------
|
||||
#. Sign the DEBs:
|
||||
|
||||
For a development release, update tags for ``ceph.git``::
|
||||
.. prompt:: bash
|
||||
|
||||
git push origin v0.xx
|
||||
git push origin HEAD:last
|
||||
git checkout master
|
||||
git merge next
|
||||
git push origin master
|
||||
git push origin HEAD:next
|
||||
merfi gpg /opt/repos/ceph/octopus-15.2.17/debian
|
||||
|
||||
Similarly, for a development release, for both ``teuthology.git`` and ``ceph-qa-suite.git``::
|
||||
Example::
|
||||
|
||||
git checkout master
|
||||
git reset --hard origin/master
|
||||
git branch -f last origin/next
|
||||
git push -f origin last
|
||||
git push -f origin master:next
|
||||
$ merfi gpg /opt/repos/ceph/octopus-15.2.17/debian
|
||||
--> Starting path collection, looking for files to sign
|
||||
--> 18 matching paths found
|
||||
--> will sign with the following commands:
|
||||
--> gpg --batch --yes --armor --detach-sig --output Release.gpg Release
|
||||
--> gpg --batch --yes --clearsign --output InRelease Release
|
||||
--> signing: /opt/repos/ceph/octopus-15.2.17/debian/jessie/dists/bionic/Release
|
||||
--> Running command: gpg --batch --yes --armor --detach-sig --output Release.gpg Release
|
||||
--> Running command: gpg --batch --yes --clearsign --output InRelease Release
|
||||
--> signing: /opt/repos/ceph/octopus-15.2.17/debian/jessie/dists/focal/Release
|
||||
--> Running command: gpg --batch --yes --armor --detach-sig --output Release.gpg Release
|
||||
--> Running command: gpg --batch --yes --clearsign --output InRelease Release
|
||||
|
||||
etc...
|
||||
|
||||
#. Sign the RPMs:
|
||||
|
||||
.. prompt:: bash
|
||||
|
||||
sign-rpms octopus
|
||||
|
||||
Example::
|
||||
|
||||
$ sign-rpms octopus
|
||||
Checking packages in: /opt/repos/ceph/octopus-15.2.17/centos/7
|
||||
signing: /opt/repos/ceph/octopus-15.2.17/centos/7/SRPMS/ceph-release-1-1.el7.src.rpm
|
||||
/opt/repos/ceph/octopus-15.2.17/centos/7/SRPMS/ceph-release-1-1.el7.src.rpm:
|
||||
signing: /opt/repos/ceph/octopus-15.2.17/centos/7/SRPMS/ceph-15.2.17-0.el7.src.rpm
|
||||
/opt/repos/ceph/octopus-15.2.17/centos/7/SRPMS/ceph-15.2.17-0.el7.src.rpm:
|
||||
signing: /opt/repos/ceph/octopus-15.2.17/centos/7/noarch/ceph-mgr-modules-core-15.2.17-0.el7.noarch.rpm
|
||||
|
||||
etc...
|
||||
|
||||
5. Publish the packages to download.ceph.com:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
sync-push octopus
|
||||
|
||||
5. Build Containers
|
||||
===================
|
||||
|
||||
Start the following two jobs:
|
||||
|
||||
#. https://2.jenkins.ceph.com/job/ceph-container-build-ceph-base-push-imgs/
|
||||
#. https://2.jenkins.ceph.com/job/ceph-container-build-ceph-base-push-imgs-arm64/
|
||||
|
||||
6. Announce the Release
|
||||
=======================
|
||||
|
||||
Version Commit PR
|
||||
-----------------
|
||||
|
||||
The `ceph-tag Jenkins job <https://jenkins.ceph.com/job/ceph-tag>`_ creates a Pull Request in ceph.git that targets the release branch.
|
||||
|
||||
If this was a regular release (not a hotfix release or a security release), the only commit in that Pull Request should be the version commit. For example, see `v15.2.17's version commit PR <https://github.com/ceph/ceph/pull/47520>`_.
|
||||
|
||||
Request a review and then merge the Pull Request.
|
||||
|
||||
Announcing
|
||||
----------
|
||||
|
||||
Publish the Release Notes on ceph.io before announcing the release by email, because the e-mail announcement references the ceph.io blog post.
|
||||
|
@ -2,198 +2,355 @@
|
||||
Ceph Glossary
|
||||
===============
|
||||
|
||||
Ceph is growing rapidly. As firms deploy Ceph, the technical terms such as
|
||||
"RADOS", "RBD," "RGW" and so forth require corresponding marketing terms
|
||||
that explain what each component does. The terms in this glossary are
|
||||
intended to complement the existing technical terminology.
|
||||
|
||||
Sometimes more than one term applies to a definition. Generally, the first
|
||||
term reflects a term consistent with Ceph's marketing, and secondary terms
|
||||
reflect either technical terms or legacy ways of referring to Ceph systems.
|
||||
|
||||
|
||||
.. glossary::
|
||||
|
||||
Ceph Project
|
||||
The aggregate term for the people, software, mission and infrastructure
|
||||
of Ceph.
|
||||
|
||||
cephx
|
||||
The Ceph authentication protocol. Cephx operates like Kerberos, but it
|
||||
has no single point of failure.
|
||||
:ref:`BlueStore<rados_config_storage_devices_bluestore>`
|
||||
OSD BlueStore is a storage back end used by OSD daemons, and
|
||||
was designed specifically for use with Ceph. BlueStore was
|
||||
introduced in the Ceph Kraken release. In the Ceph Luminous
|
||||
release, BlueStore became Ceph's default storage back end,
|
||||
supplanting FileStore. Unlike :term:`filestore`, BlueStore
|
||||
stores objects directly on Ceph block devices without any file
|
||||
system interface. Since Luminous (12.2), BlueStore has been
|
||||
Ceph's default and recommended storage back end.
|
||||
|
||||
Ceph
|
||||
Ceph Platform
|
||||
All Ceph software, which includes any piece of code hosted at
|
||||
`https://github.com/ceph`_.
|
||||
Ceph is a distributed network storage and file system with
|
||||
distributed metadata management and POSIX semantics.
|
||||
|
||||
Ceph Block Device
|
||||
A software instrument that orchestrates the storage of
|
||||
block-based data in Ceph. Ceph Block Device (also called "RBD",
|
||||
or "RADOS block device") splits block-based application data
|
||||
into "chunks". RADOS stores these chunks as objects. Ceph Block
|
||||
Device orchestrates the storage of those objects across the
|
||||
storage cluster. See also :term:`RBD`.
|
||||
|
||||
Ceph Block Storage
|
||||
One of the three kinds of storage supported by Ceph (the other
|
||||
two are object storage and file storage). Ceph Block Storage is
|
||||
the block storage "product", which refers to block-storage
|
||||
related services and capabilities when used in conjunction with
|
||||
the collection of (1) ``librbd`` (a python module that provides
|
||||
file-like access to :term:`RBD` images), (2) a hypervisor such
|
||||
as QEMU or Xen, and (3) a hypervisor abstraction layer such as
|
||||
``libvirt``.
|
||||
|
||||
Ceph Client
|
||||
Any of the Ceph components that can access a Ceph Storage
|
||||
Cluster. This includes the Ceph Object Gateway, the Ceph Block
|
||||
Device, the Ceph File System, and their corresponding
|
||||
libraries. It also includes kernel modules, and FUSEs
|
||||
(Filesystems in USERspace).
|
||||
|
||||
Ceph Client Libraries
|
||||
The collection of libraries that can be used to interact with
|
||||
components of the Ceph Cluster.
|
||||
|
||||
Ceph Cluster Map
|
||||
See :term:`Cluster Map`
|
||||
|
||||
Ceph Dashboard
|
||||
:ref:`The Ceph Dashboard<mgr-dashboard>` is a built-in
|
||||
web-based Ceph management and monitoring application through
|
||||
which you can inspect and administer various resources within
|
||||
the cluster. It is implemented as a :ref:`ceph-manager-daemon`
|
||||
module.
|
||||
|
||||
Ceph File System
|
||||
See :term:`CephFS`
|
||||
|
||||
:ref:`CephFS<ceph-file-system>`
|
||||
The **Ceph F**\ile **S**\ystem, or CephFS, is a
|
||||
POSIX-compliant file system built on top of Ceph’s distributed
|
||||
object store, RADOS. See :ref:`CephFS Architecture
|
||||
<arch-cephfs>` for more details.
|
||||
|
||||
Ceph Interim Release
|
||||
See :term:`Releases`.
|
||||
|
||||
Ceph Kernel Modules
|
||||
The collection of kernel modules that can be used to interact
|
||||
with the Ceph Cluster (for example: ``ceph.ko``, ``rbd.ko``).
|
||||
|
||||
:ref:`Ceph Manager<ceph-manager-daemon>`
|
||||
The Ceph manager daemon (ceph-mgr) is a daemon that runs
|
||||
alongside monitor daemons to provide monitoring and interfacing
|
||||
to external monitoring and management systems. Since the
|
||||
Luminous release (12.x), no Ceph cluster functions properly
|
||||
unless it contains a running ceph-mgr daemon.
|
||||
|
||||
Ceph Manager Dashboard
|
||||
See :term:`Ceph Dashboard`.
|
||||
|
||||
Ceph Metadata Server
|
||||
See :term:`MDS`.
|
||||
|
||||
Ceph Monitor
|
||||
A daemon that maintains a map of the state of the cluster. This
|
||||
"cluster state" includes the monitor map, the manager map, the
|
||||
OSD map, and the CRUSH map. A Ceph cluster must contain a
|
||||
minimum of three running monitors in order to be both redundant
|
||||
and highly-available. Ceph monitors and the nodes on which they
|
||||
run are often referred to as "mon"s. See :ref:`Monitor Config
|
||||
Reference <monitor-config-reference>`.
|
||||
|
||||
Ceph Node
|
||||
A Ceph node is a unit of the Ceph Cluster that communicates with
|
||||
other nodes in the Ceph Cluster in order to replicate and
|
||||
redistribute data. All of the nodes together are called the
|
||||
:term:`Ceph Storage Cluster`. Ceph nodes include :term:`OSD`\s,
|
||||
:term:`Ceph Monitor`\s, :term:`Ceph Manager`\s, and
|
||||
:term:`MDS`\es. The term "node" is usually equivalent to "host"
|
||||
in the Ceph documentation. If you have a running Ceph Cluster,
|
||||
you can list all of the nodes in it by running the command
|
||||
``ceph node ls all``.
|
||||
|
||||
:ref:`Ceph Object Gateway<object-gateway>`
|
||||
An object storage interface built on top of librados. Ceph
|
||||
Object Gateway provides a RESTful gateway between applications
|
||||
and Ceph storage clusters.
|
||||
|
||||
Ceph Object Storage
|
||||
See :term:`Ceph Object Store`.
|
||||
|
||||
Ceph Object Store
|
||||
A Ceph Object Store consists of a :term:`Ceph Storage Cluster`
|
||||
and a :term:`Ceph Object Gateway` (RGW).
|
||||
|
||||
:ref:`Ceph OSD<rados_configuration_storage-devices_ceph_osd>`
|
||||
Ceph **O**\bject **S**\torage **D**\aemon. The Ceph OSD
|
||||
software, which interacts with logical disks (:term:`OSD`).
|
||||
Around 2013, there was an attempt by "research and industry"
|
||||
(Sage's own words) to insist on using the term "OSD" to mean
|
||||
only "Object Storage Device", but the Ceph community has always
|
||||
persisted in using the term to mean "Object Storage Daemon" and
|
||||
no less an authority than Sage Weil himself confirms in
|
||||
November of 2022 that "Daemon is more accurate for how Ceph is
|
||||
built" (private correspondence between Zac Dover and Sage Weil,
|
||||
07 Nov 2022).
|
||||
|
||||
Ceph OSD Daemon
|
||||
See :term:`Ceph OSD`.
|
||||
|
||||
Ceph OSD Daemons
|
||||
See :term:`Ceph OSD`.
|
||||
|
||||
Ceph Platform
|
||||
All Ceph software, which includes any piece of code hosted at
|
||||
`https://github.com/ceph`_.
|
||||
|
||||
Ceph Point Release
|
||||
See :term:`Releases`.
|
||||
|
||||
Ceph Project
|
||||
The aggregate term for the people, software, mission and
|
||||
infrastructure of Ceph.
|
||||
|
||||
Ceph Release
|
||||
See :term:`Releases`.
|
||||
|
||||
Ceph Release Candidate
|
||||
See :term:`Releases`.
|
||||
|
||||
Ceph Stable Release
|
||||
See :term:`Releases`.
|
||||
|
||||
Ceph System
|
||||
Ceph Stack
|
||||
A collection of two or more components of Ceph.
|
||||
|
||||
Ceph Node
|
||||
Node
|
||||
Host
|
||||
Any single machine or server in a Ceph System.
|
||||
:ref:`Ceph Storage Cluster<arch-ceph-storage-cluster>`
|
||||
The collection of :term:`Ceph Monitor`\s, :term:`Ceph
|
||||
Manager`\s, :term:`Ceph Metadata Server`\s, and :term:`OSD`\s
|
||||
that work together to store and replicate data for use by
|
||||
applications, Ceph Users, and :term:`Ceph Client`\s. Ceph
|
||||
Storage Clusters receive data from :term:`Ceph Client`\s.
|
||||
|
||||
Ceph Storage Cluster
|
||||
Ceph Object Store
|
||||
RADOS
|
||||
RADOS Cluster
|
||||
Reliable Autonomic Distributed Object Store
|
||||
The core set of storage software which stores the user's data (MON+OSD).
|
||||
|
||||
Ceph Cluster Map
|
||||
Cluster Map
|
||||
The set of maps comprising the monitor map, OSD map, PG map, MDS map and
|
||||
CRUSH map. See `Cluster Map`_ for details.
|
||||
|
||||
Ceph Object Storage
|
||||
The object storage "product", service or capabilities, which consists
|
||||
essentially of a Ceph Storage Cluster and a Ceph Object Gateway.
|
||||
|
||||
Ceph Object Gateway
|
||||
RADOS Gateway
|
||||
RGW
|
||||
The S3/Swift gateway component of Ceph.
|
||||
|
||||
Ceph Block Device
|
||||
RBD
|
||||
The block storage component of Ceph.
|
||||
|
||||
Ceph Block Storage
|
||||
The block storage "product," service or capabilities when used in
|
||||
conjunction with ``librbd``, a hypervisor such as QEMU or Xen, and a
|
||||
hypervisor abstraction layer such as ``libvirt``.
|
||||
|
||||
Ceph File System
|
||||
CephFS
|
||||
Ceph FS
|
||||
The POSIX filesystem components of Ceph. Refer
|
||||
:ref:`CephFS Architecture <arch-cephfs>` and :ref:`ceph-file-system` for
|
||||
more details.
|
||||
cephx
|
||||
The Ceph authentication protocol. Cephx operates like Kerberos,
|
||||
but it has no single point of failure.
|
||||
|
||||
Cloud Platforms
|
||||
Cloud Stacks
|
||||
Third party cloud provisioning platforms such as OpenStack, CloudStack,
|
||||
OpenNebula, ProxMox, etc.
|
||||
Third party cloud provisioning platforms such as OpenStack,
|
||||
CloudStack, OpenNebula, and Proxmox VE.
|
||||
|
||||
Object Storage Device
|
||||
OSD
|
||||
A physical or logical storage unit (*e.g.*, LUN).
|
||||
Sometimes, Ceph users use the
|
||||
term "OSD" to refer to :term:`Ceph OSD Daemon`, though the
|
||||
proper term is "Ceph OSD".
|
||||
Cluster Map
|
||||
The set of maps consisting of the monitor map, OSD map, PG map,
|
||||
MDS map, and CRUSH map, which together report the state of the
|
||||
Ceph cluster. See :ref:`the "Cluster Map" section of the
|
||||
Architecture document<architecture_cluster_map>` for details.
|
||||
|
||||
Ceph OSD Daemon
|
||||
Ceph OSD Daemons
|
||||
Ceph OSD
|
||||
The Ceph OSD software, which interacts with a logical
|
||||
disk (:term:`OSD`). Sometimes, Ceph users use the
|
||||
term "OSD" to refer to "Ceph OSD Daemon", though the
|
||||
proper term is "Ceph OSD".
|
||||
CRUSH
|
||||
Controlled Replication Under Scalable Hashing. It is the
|
||||
algorithm Ceph uses to compute object storage locations.
|
||||
|
||||
OSD id
|
||||
The integer that defines an OSD. It is generated by the monitors as part
|
||||
of the creation of a new OSD.
|
||||
CRUSH rule
|
||||
The CRUSH data placement rule that applies to a particular
|
||||
pool(s).
|
||||
|
||||
OSD fsid
|
||||
This is a unique identifier used to further improve the uniqueness of an
|
||||
OSD and it is found in the OSD path in a file called ``osd_fsid``. This
|
||||
``fsid`` term is used interchangeably with ``uuid``
|
||||
DAS
|
||||
**D**\irect-\ **A**\ttached **S**\torage. Storage that is
|
||||
attached directly to the computer accessing it, without passing
|
||||
through a network. Contrast with NAS and SAN.
|
||||
|
||||
OSD uuid
|
||||
Just like the OSD fsid, this is the OSD unique identifier and is used
|
||||
interchangeably with ``fsid``
|
||||
:ref:`Dashboard<mgr-dashboard>`
|
||||
A built-in web-based Ceph management and monitoring application
|
||||
to administer various aspects and objects of the cluster. The
|
||||
dashboard is implemented as a Ceph Manager module. See
|
||||
:ref:`mgr-dashboard` for more details.
|
||||
|
||||
bluestore
|
||||
OSD BlueStore is a new back end for OSD daemons (kraken and newer
|
||||
versions). Unlike :term:`filestore` it stores objects directly on the
|
||||
Ceph block devices without any file system interface.
|
||||
Dashboard Module
|
||||
Another name for :term:`Dashboard`.
|
||||
|
||||
Dashboard Plugin
|
||||
filestore
|
||||
A back end for OSD daemons, where a Journal is needed and files are
|
||||
written to the filesystem.
|
||||
A back end for OSD daemons, where a Journal is needed and files
|
||||
are written to the filesystem.
|
||||
|
||||
FQDN
|
||||
**F**\ully **Q**\ualified **D**\omain **N**\ame. A domain name
|
||||
that is applied to a node in a network and that specifies the
|
||||
node's exact location in the tree hierarchy of the DNS.
|
||||
|
||||
In the context of Ceph cluster administration, FQDNs are often
|
||||
applied to hosts. In this documentation, the term "FQDN" is
|
||||
used mostly to distinguish between FQDNs and relatively simpler
|
||||
hostnames, which do not specify the exact location of the host
|
||||
in the tree hierarchy of the DNS but merely name the host.
|
||||
|
||||
Host
|
||||
Any single machine or server in a Ceph Cluster. See :term:`Ceph
|
||||
Node`.
|
||||
|
||||
LVM tags
|
||||
Extensible metadata for LVM volumes and groups. It is used to
|
||||
store Ceph-specific information about devices and its
|
||||
relationship with OSDs.
|
||||
|
||||
:ref:`MDS<cephfs_add_remote_mds>`
|
||||
The Ceph **M**\eta\ **D**\ata **S**\erver daemon. Also referred
|
||||
to as "ceph-mds". The Ceph metadata server daemon must be
|
||||
running in any Ceph cluster that runs the CephFS file system.
|
||||
The MDS stores all filesystem metadata.
|
||||
|
||||
MGR
|
||||
The Ceph manager software, which collects all the state from
|
||||
the whole cluster in one place.
|
||||
|
||||
Ceph Monitor
|
||||
MON
|
||||
The Ceph monitor software.
|
||||
|
||||
Ceph Manager
|
||||
MGR
|
||||
The Ceph manager software, which collects all the state from the whole
|
||||
cluster in one place.
|
||||
Node
|
||||
See :term:`Ceph Node`.
|
||||
|
||||
Ceph Manager Dashboard
|
||||
Ceph Dashboard
|
||||
Dashboard Module
|
||||
Dashboard Plugin
|
||||
Dashboard
|
||||
A built-in web-based Ceph management and monitoring application to
|
||||
administer various aspects and objects of the cluster. The dashboard is
|
||||
implemented as a Ceph Manager module. See :ref:`mgr-dashboard` for more
|
||||
details.
|
||||
Object Storage Device
|
||||
See :term:`OSD`.
|
||||
|
||||
Ceph Metadata Server
|
||||
MDS
|
||||
The Ceph metadata software.
|
||||
OSD
|
||||
Probably :term:`Ceph OSD`, but not necessarily. Sometimes
|
||||
(especially in older correspondence, and especially in
|
||||
documentation that is not written specifically for Ceph), "OSD"
|
||||
means "**O**\bject **S**\torage **D**\evice", which refers to a
|
||||
physical or logical storage unit (for example: LUN). The Ceph
|
||||
community has always used the term "OSD" to refer to
|
||||
:term:`Ceph OSD Daemon` despite an industry push in the
|
||||
mid-2010s to insist that "OSD" should refer to "Object Storage
|
||||
Device", so it is important to know which meaning is intended.
|
||||
|
||||
Ceph Clients
|
||||
Ceph Client
|
||||
The collection of Ceph components which can access a Ceph Storage
|
||||
Cluster. These include the Ceph Object Gateway, the Ceph Block Device,
|
||||
the Ceph File System, and their corresponding libraries, kernel modules,
|
||||
and FUSEs.
|
||||
OSD fsid
|
||||
This is a unique identifier used to identify an OSD. It is
|
||||
found in the OSD path in a file called ``osd_fsid``. The
|
||||
term ``fsid`` is used interchangeably with ``uuid``
|
||||
|
||||
Ceph Kernel Modules
|
||||
The collection of kernel modules which can be used to interact with the
|
||||
Ceph System (e.g., ``ceph.ko``, ``rbd.ko``).
|
||||
OSD id
|
||||
The integer that defines an OSD. It is generated by the
|
||||
monitors during the creation of each OSD.
|
||||
|
||||
Ceph Client Libraries
|
||||
The collection of libraries that can be used to interact with components
|
||||
of the Ceph System.
|
||||
OSD uuid
|
||||
This is the unique identifier of an OSD. This term is used
|
||||
interchangeably with ``fsid``
|
||||
|
||||
Ceph Release
|
||||
Any distinct numbered version of Ceph.
|
||||
:ref:`Pool<rados_pools>`
|
||||
A pool is a logical partition used to store objects.
|
||||
|
||||
Ceph Point Release
|
||||
Any ad-hoc release that includes only bug or security fixes.
|
||||
Pools
|
||||
See :term:`pool`.
|
||||
|
||||
Ceph Interim Release
|
||||
Versions of Ceph that have not yet been put through quality assurance
|
||||
testing, but may contain new features.
|
||||
RADOS
|
||||
**R**\eliable **A**\utonomic **D**\istributed **O**\bject
|
||||
**S**\tore. RADOS is the object store that provides a scalable
|
||||
service for variably-sized objects. The RADOS object store is
|
||||
the core component of a Ceph cluster. `This blog post from
|
||||
2009
|
||||
<https://ceph.io/en/news/blog/2009/the-rados-distributed-object-store/>`_
|
||||
provides a beginner's introduction to RADOS. Readers interested
|
||||
in a deeper understanding of RADOS are directed to `RADOS: A
|
||||
Scalable, Reliable Storage Service for Petabyte-scale Storage
|
||||
Clusters <https://ceph.io/assets/pdfs/weil-rados-pdsw07.pdf>`_.
|
||||
|
||||
Ceph Release Candidate
|
||||
A major version of Ceph that has undergone initial quality assurance
|
||||
testing and is ready for beta testers.
|
||||
RADOS Cluster
|
||||
A proper subset of the Ceph Cluster consisting of
|
||||
:term:`OSD`\s, :term:`Ceph Monitor`\s, and :term:`Ceph
|
||||
Manager`\s.
|
||||
|
||||
RADOS Gateway
|
||||
See :term:`RGW`.
|
||||
|
||||
Ceph Stable Release
|
||||
A major version of Ceph where all features from the preceding interim
|
||||
releases have been put through quality assurance testing successfully.
|
||||
RBD
|
||||
The block storage component of Ceph. Also called "RADOS Block
|
||||
Device" or :term:`Ceph Block Device`.
|
||||
|
||||
Releases
|
||||
|
||||
Ceph Interim Release
|
||||
A version of Ceph that has not yet been put through
|
||||
quality assurance testing. May contain new features.
|
||||
|
||||
Ceph Point Release
|
||||
Any ad hoc release that includes only bug fixes and
|
||||
security fixes.
|
||||
|
||||
Ceph Release
|
||||
Any distinct numbered version of Ceph.
|
||||
|
||||
Ceph Release Candidate
|
||||
A major version of Ceph that has undergone initial
|
||||
quality assurance testing and is ready for beta
|
||||
testers.
|
||||
|
||||
Ceph Stable Release
|
||||
A major version of Ceph where all features from the
|
||||
preceding interim releases have been put through
|
||||
quality assurance testing successfully.
|
||||
|
||||
Reliable Autonomic Distributed Object Store
|
||||
The core set of storage software which stores the user's data
|
||||
(MON+OSD). See also :term:`RADOS`.
|
||||
|
||||
:ref:`RGW<object-gateway>`
|
||||
**R**\ADOS **G**\ate **W**\ay.
|
||||
|
||||
The component of Ceph that provides a gateway to both the
|
||||
Amazon S3 RESTful API and the OpenStack Swift API. Also called
|
||||
"RADOS Gateway" and "Ceph Object Gateway".
|
||||
|
||||
secrets
|
||||
Secrets are credentials used to perform digital authentication
|
||||
whenever privileged users must access systems that require
|
||||
authentication. Secrets can be passwords, API keys, tokens, SSH
|
||||
keys, private certificates, or encryption keys.
|
||||
|
||||
SDS
|
||||
Software-defined storage.
|
||||
|
||||
systemd oneshot
|
||||
A systemd ``type`` where a command is defined in ``ExecStart``
|
||||
which will exit upon completion (it is not intended to
|
||||
daemonize)
|
||||
|
||||
Ceph Test Framework
|
||||
Teuthology
|
||||
The collection of software that performs scripted tests on Ceph.
|
||||
|
||||
CRUSH
|
||||
Controlled Replication Under Scalable Hashing. It is the algorithm
|
||||
Ceph uses to compute object storage locations.
|
||||
|
||||
CRUSH rule
|
||||
The CRUSH data placement rule that applies to a particular pool(s).
|
||||
|
||||
Pool
|
||||
Pools
|
||||
Pools are logical partitions for storing objects.
|
||||
|
||||
systemd oneshot
|
||||
A systemd ``type`` where a command is defined in ``ExecStart`` which will
|
||||
exit upon completion (it is not intended to daemonize)
|
||||
|
||||
LVM tags
|
||||
Extensible metadata for LVM volumes and groups. It is used to store
|
||||
Ceph-specific information about devices and its relationship with
|
||||
OSDs.
|
||||
|
||||
.. _https://github.com/ceph: https://github.com/ceph
|
||||
.. _Cluster Map: ../architecture#cluster-map
|
||||
.. _Cluster Map: ../architecture#cluster-map
|
||||
|
BIN
ceph/doc/images/keycloak-adduser.png
Normal file
After Width: | Height: | Size: 78 KiB |
BIN
ceph/doc/images/keycloak-userclientmapper.png
Normal file
After Width: | Height: | Size: 124 KiB |
BIN
ceph/doc/images/keycloak-usercredentials.png
Normal file
After Width: | Height: | Size: 95 KiB |
BIN
ceph/doc/images/keycloak-userdetails.png
Normal file
After Width: | Height: | Size: 87 KiB |
BIN
ceph/doc/images/keycloak-usertags.png
Normal file
After Width: | Height: | Size: 91 KiB |
@ -5,10 +5,18 @@
|
||||
Ceph uniquely delivers **object, block, and file storage in one unified
|
||||
system**.
|
||||
|
||||
.. warning::
|
||||
|
||||
:ref:`If this is your first time using Ceph, read the "Basic Workflow"
|
||||
page in the Ceph Developer Guide to learn how to contribute to the
|
||||
Ceph project. (Click anywhere in this paragraph to read the "Basic
|
||||
Workflow" page of the Ceph Developer Guide.) <basic workflow dev guide>`.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<style type="text/css">div.body h3{margin:5px 0px 0px 0px;}</style>
|
||||
<table cellpadding="10"><colgroup><col width="33%"><col width="33%"><col width="33%"></colgroup><tbody valign="top"><tr><td><h3>Ceph Object Store</h3>
|
||||
<style type="text/css">div.body h3{margin:5px 0px 0px 0px;}</style>
|
||||
<table cellpadding="10"><colgroup><col width="33%"><col width="33%">
|
||||
<col width="33%"></colgroup><tbody valign="top"><tr><td><h3>Ceph Object Store</h3>
|
||||
|
||||
- RESTful Interface
|
||||
- S3- and Swift-compliant APIs
|
||||
@ -107,6 +115,7 @@ about Ceph, see our `Architecture`_ section.
|
||||
governance
|
||||
foundation
|
||||
ceph-volume/index
|
||||
releases/general
|
||||
releases/index
|
||||
Ceph Releases (general) <https://docs.ceph.com/en/latest/releases/general/>
|
||||
Ceph Releases (index) <https://docs.ceph.com/en/latest/releases/>
|
||||
security/index
|
||||
Glossary <glossary>
|
||||
|
@ -2,33 +2,37 @@
|
||||
Cloning the Ceph Source Code Repository
|
||||
=========================================
|
||||
|
||||
You may clone a Ceph branch of the Ceph source code by going to `github Ceph
|
||||
Repository`_, selecting a branch (``master`` by default), and clicking the
|
||||
**Download ZIP** button.
|
||||
To clone a Ceph branch of the Ceph source code, go to `github Ceph
|
||||
Repository`_, select a branch (``main`` by default), and click the **Download
|
||||
ZIP** button.
|
||||
|
||||
.. _github Ceph Repository: https://github.com/ceph/ceph
|
||||
|
||||
To clone the entire git repository, :ref:`install <install-git>` and configure
|
||||
``git``.
|
||||
|
||||
To clone the entire git repository, install and configure ``git``.
|
||||
|
||||
.. _install-git:
|
||||
|
||||
Install Git
|
||||
===========
|
||||
|
||||
To install ``git`` on Debian/Ubuntu, execute::
|
||||
To install ``git`` on Debian/Ubuntu, run the following command:
|
||||
|
||||
sudo apt-get install git
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo apt-get install git
|
||||
|
||||
|
||||
To install ``git`` on CentOS/RHEL, execute::
|
||||
To install ``git`` on CentOS/RHEL, run the following command:
|
||||
|
||||
sudo yum install git
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo yum install git
|
||||
|
||||
|
||||
You must also have a ``github`` account. If you do not have a
|
||||
``github`` account, go to `github.com`_ and register.
|
||||
Follow the directions for setting up git at
|
||||
`Set Up Git`_.
|
||||
You must have a ``github`` account. If you do not have a ``github``
|
||||
account, go to `github.com`_ and register. Follow the directions for setting
|
||||
up git at `Set Up Git`_.
|
||||
|
||||
.. _github.com: https://github.com
|
||||
.. _Set Up Git: https://help.github.com/linux-set-up-git
|
||||
@ -37,26 +41,31 @@ Follow the directions for setting up git at
|
||||
Add SSH Keys (Optional)
|
||||
=======================
|
||||
|
||||
If you intend to commit code to Ceph or to clone using SSH
|
||||
To commit code to Ceph or to clone the respository by using SSH
|
||||
(``git@github.com:ceph/ceph.git``), you must generate SSH keys for github.
|
||||
|
||||
.. tip:: If you only intend to clone the repository, you may
|
||||
.. tip:: If you want only to clone the repository, you can
|
||||
use ``git clone --recursive https://github.com/ceph/ceph.git``
|
||||
without generating SSH keys.
|
||||
|
||||
To generate SSH keys for ``github``, execute::
|
||||
To generate SSH keys for ``github``, run the following command:
|
||||
|
||||
ssh-keygen
|
||||
.. prompt:: bash $
|
||||
|
||||
Get the key to add to your ``github`` account (the following example
|
||||
assumes you used the default file path)::
|
||||
ssh-keygen
|
||||
|
||||
cat .ssh/id_rsa.pub
|
||||
To print the SSH key that you just generated and that you will add to your
|
||||
``github`` account, use the ``cat`` command. (The following example assumes you
|
||||
used the default file path.):
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
cat .ssh/id_rsa.pub
|
||||
|
||||
Copy the public key.
|
||||
|
||||
Go to your ``github`` account, click on "Account Settings" (i.e., the
|
||||
'tools' icon); then, click "SSH Keys" on the left side navbar.
|
||||
Go to your ``github`` account, click "Account Settings" (represented by the
|
||||
'tools' icon), and click "SSH Keys" on the left side navbar.
|
||||
|
||||
Click "Add SSH key" in the "SSH Keys" list, enter a name for the key, paste the
|
||||
key you generated, and press the "Add key" button.
|
||||
@ -65,37 +74,122 @@ key you generated, and press the "Add key" button.
|
||||
Clone the Source
|
||||
================
|
||||
|
||||
To clone the Ceph source code repository, execute::
|
||||
To clone the Ceph source code repository, run the following command:
|
||||
|
||||
git clone --recursive https://github.com/ceph/ceph.git
|
||||
.. prompt:: bash $
|
||||
|
||||
Once ``git clone`` executes, you should have a full copy of the Ceph
|
||||
git clone --recursive https://github.com/ceph/ceph.git
|
||||
|
||||
After ``git clone`` has run, you should have a full copy of the Ceph
|
||||
repository.
|
||||
|
||||
.. tip:: Make sure you maintain the latest copies of the submodules
|
||||
included in the repository. Running ``git status`` will tell you if
|
||||
the submodules are out of date.
|
||||
.. tip:: Make sure you maintain the latest copies of the submodules included in
|
||||
the repository. Running ``git status`` will tell you whether the submodules
|
||||
are out of date. See :ref:`update-submodules` for more information.
|
||||
|
||||
::
|
||||
|
||||
cd ceph
|
||||
git status
|
||||
.. prompt:: bash $
|
||||
|
||||
If your submodules are out of date, run::
|
||||
cd ceph
|
||||
git status
|
||||
|
||||
git submodule update --force --init --recursive
|
||||
.. _update-submodules:
|
||||
|
||||
Updating Submodules
|
||||
-------------------
|
||||
|
||||
#. Determine whether your submodules are out of date:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git status
|
||||
|
||||
A. If your submodules are up to date
|
||||
If your submodules are up to date, the following console output will
|
||||
appear:
|
||||
|
||||
::
|
||||
|
||||
On branch main
|
||||
Your branch is up to date with 'origin/main'.
|
||||
|
||||
nothing to commit, working tree clean
|
||||
|
||||
If you see this console output, then your submodules are up to date.
|
||||
You do not need this procedure.
|
||||
|
||||
|
||||
B. If your submodules are not up to date
|
||||
If your submodules are not up to date, you will see a message that
|
||||
includes a list of "untracked files". The example here shows such a
|
||||
list, which was generated from a real situation in which the
|
||||
submodules were no longer current. Your list of files will not be the
|
||||
same as this list of files, but this list is provided as an example.
|
||||
If in your case any untracked files are listed, then you should
|
||||
continue to the next step of this procedure.
|
||||
|
||||
::
|
||||
|
||||
On branch main
|
||||
Your branch is up to date with 'origin/main'.
|
||||
|
||||
Untracked files:
|
||||
(use "git add <file>..." to include in what will be committed)
|
||||
src/pybind/cephfs/build/
|
||||
src/pybind/cephfs/cephfs.c
|
||||
src/pybind/cephfs/cephfs.egg-info/
|
||||
src/pybind/rados/build/
|
||||
src/pybind/rados/rados.c
|
||||
src/pybind/rados/rados.egg-info/
|
||||
src/pybind/rbd/build/
|
||||
src/pybind/rbd/rbd.c
|
||||
src/pybind/rbd/rbd.egg-info/
|
||||
src/pybind/rgw/build/
|
||||
src/pybind/rgw/rgw.c
|
||||
src/pybind/rgw/rgw.egg-info/
|
||||
|
||||
nothing added to commit but untracked files present (use "git add" to track)
|
||||
|
||||
#. If your submodules are out of date, run the following commands:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git submodule update --force --init --recursive
|
||||
git clean -fdx
|
||||
git submodule foreach git clean -fdx
|
||||
|
||||
If you still have problems with a submodule directory, use ``rm -rf
|
||||
[directory name]`` to remove the directory. Then run ``git submodule update
|
||||
--init --recursive`` again.
|
||||
|
||||
#. Run ``git status`` again:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
git status
|
||||
|
||||
Your submodules are up to date if you see the following message:
|
||||
|
||||
::
|
||||
|
||||
On branch main
|
||||
Your branch is up to date with 'origin/main'.
|
||||
|
||||
nothing to commit, working tree clean
|
||||
|
||||
Choose a Branch
|
||||
===============
|
||||
|
||||
Once you clone the source code and submodules, your Ceph repository
|
||||
will be on the ``master`` branch by default, which is the unstable
|
||||
will be on the ``main`` branch by default, which is the unstable
|
||||
development branch. You may choose other branches too.
|
||||
|
||||
- ``master``: The unstable development branch.
|
||||
- ``stable``: The bugfix branch.
|
||||
- ``main``: The unstable development branch.
|
||||
- ``stable-release-name``: The name of the stable, `Active Releases`_. e.g. ``Pacific``
|
||||
- ``next``: The release candidate branch.
|
||||
|
||||
::
|
||||
|
||||
git checkout master
|
||||
git checkout main
|
||||
|
||||
.. _Active Releases: https://docs.ceph.com/en/latest/releases/#active-releases
|
||||
|
@ -19,7 +19,7 @@ Ceph Container Images
|
||||
Official Releases
|
||||
-----------------
|
||||
|
||||
Ceph Container images are available from both Quay and Docker Hub::
|
||||
Ceph Container images are available from Quay:
|
||||
|
||||
https://quay.io/repository/ceph/ceph
|
||||
https://hub.docker.com/r/ceph
|
||||
|
@ -120,7 +120,7 @@ For RPMs::
|
||||
|
||||
https://download.ceph.com/rpm-{version}
|
||||
|
||||
The major releases of Ceph are summarized at: :ref:`ceph-releases-general`
|
||||
The major releases of Ceph are summarized at: `Releases`_
|
||||
|
||||
.. tip:: For non-US users: There might be a mirror close to you where
|
||||
to download Ceph from. For more information see: `Ceph Mirrors`_.
|
||||
@ -396,6 +396,7 @@ line to get the short codename.
|
||||
|
||||
|
||||
|
||||
.. _Releases: https://docs.ceph.com/en/latest/releases/
|
||||
.. _the testing Debian repository: https://download.ceph.com/debian-testing/dists
|
||||
.. _the shaman page: https://shaman.ceph.com
|
||||
.. _Ceph Mirrors: ../mirrors
|
||||
|
@ -1,5 +1,7 @@
|
||||
:orphan:
|
||||
|
||||
.. _ceph_osd-daemon:
|
||||
|
||||
========================================
|
||||
ceph-osd -- ceph object storage daemon
|
||||
========================================
|
||||
|
@ -16,15 +16,10 @@ Synopsis
|
||||
Description
|
||||
===========
|
||||
|
||||
**ceph-rbdnamer** prints the pool and image name for the given RBD devices
|
||||
to stdout. It is used by `udev` (using a rule like the one below) to
|
||||
set up a device symlink.
|
||||
|
||||
|
||||
::
|
||||
|
||||
KERNEL=="rbd[0-9]*", PROGRAM="/usr/bin/ceph-rbdnamer %n", SYMLINK+="rbd/%c{1}/%c{2}"
|
||||
|
||||
**ceph-rbdnamer** prints the pool, namespace, image and snapshot names
|
||||
for a given RBD device to stdout. It is used by `udev` device manager
|
||||
to set up RBD device symlinks. The appropriate `udev` rules are
|
||||
provided in a file named `50-rbd.rules`.
|
||||
|
||||
Availability
|
||||
============
|
||||
|
@ -43,18 +43,6 @@ Descriptions of fields
|
||||
|
||||
cap hit rate
|
||||
|
||||
.. describe:: rlat
|
||||
|
||||
read latency
|
||||
|
||||
.. describe:: wlat
|
||||
|
||||
write latency
|
||||
|
||||
.. describe:: mlat
|
||||
|
||||
metadata latency
|
||||
|
||||
.. describe:: dlease
|
||||
|
||||
dentry lease rate
|
||||
@ -95,6 +83,29 @@ Descriptions of fields
|
||||
|
||||
speed of write IOs compared with the last refresh
|
||||
|
||||
.. describe:: rlatavg
|
||||
|
||||
average read latency
|
||||
|
||||
.. describe:: rlatsd
|
||||
|
||||
standard deviation (variance) for read latency
|
||||
|
||||
.. describe:: wlatavg
|
||||
|
||||
average write latency
|
||||
|
||||
.. describe:: wlatsd
|
||||
|
||||
standard deviation (variance) for write latency
|
||||
|
||||
.. describe:: mlatavg
|
||||
|
||||
average metadata latency
|
||||
|
||||
.. describe:: mlatsd
|
||||
|
||||
standard deviation (variance) for metadata latency
|
||||
|
||||
Availability
|
||||
============
|
||||
|
@ -108,6 +108,16 @@ pools; it only runs simulations by mapping values in the range
|
||||
shows that value **24** is mapped to devices **[11,6]** by rule
|
||||
**1**.
|
||||
|
||||
One of the following is required when using the ``--show-mappings`` option:
|
||||
|
||||
(a) ``--num-rep``
|
||||
(b) both ``--min-rep`` and ``--max-rep``
|
||||
|
||||
``--num-rep`` stands for "number of replicas, indicates the number of
|
||||
replicas in a pool, and is used to specify an exact number of replicas (for
|
||||
example ``--num-rep 5``). ``--min-rep`` and ``--max-rep`` are used together
|
||||
to specify a range of replicas (for example, ``--min-rep 1 --max-rep 10``).
|
||||
|
||||
.. option:: --show-bad-mappings
|
||||
|
||||
Displays which value failed to be mapped to the required number of
|
||||
|
@ -825,7 +825,8 @@ Per mapping (block device) `rbd device map` options:
|
||||
* alloc_size - Minimum allocation unit of the underlying OSD object store
|
||||
backend (since 5.1, default is 64K bytes). This is used to round off and
|
||||
drop discards that are too small. For bluestore, the recommended setting is
|
||||
bluestore_min_alloc_size (typically 64K for hard disk drives and 16K for
|
||||
bluestore_min_alloc_size (currently set to 4K for all types of drives,
|
||||
previously used to be set to 64K for hard disk drives and 16K for
|
||||
solid-state drives). For filestore with filestore_punch_hole = false, the
|
||||
recommended setting is image object size (typically 4M).
|
||||
|
||||
|
@ -4,13 +4,30 @@ Debug
|
||||
^^^^^
|
||||
|
||||
This plugin allows to customize the behaviour of the dashboard according to the
|
||||
debug mode. It can be enabled, disabled or checked with the following command::
|
||||
debug mode. It can be enabled, disabled or checked with the following command:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph dashboard debug status
|
||||
|
||||
::
|
||||
|
||||
$ ceph dashboard debug status
|
||||
Debug: 'disabled'
|
||||
$ ceph dashboard debug enable
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph dashboard debug enable
|
||||
|
||||
::
|
||||
|
||||
Debug: 'enabled'
|
||||
$ ceph dashboard debug disable
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph dashboard debug disable
|
||||
|
||||
::
|
||||
|
||||
Debug: 'disabled'
|
||||
|
||||
By default, it's disabled. This is the recommended setting for production
|
||||
|
@ -25,9 +25,14 @@ The list of features that can be enabled/disabled is:
|
||||
|
||||
By default all features come enabled.
|
||||
|
||||
To retrieve a list of features and their current statuses::
|
||||
To retrieve a list of features and their current statuses:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph dashboard feature status
|
||||
|
||||
::
|
||||
|
||||
$ ceph dashboard feature status
|
||||
Feature 'cephfs': 'enabled'
|
||||
Feature 'iscsi': 'enabled'
|
||||
Feature 'mirroring': 'enabled'
|
||||
@ -35,9 +40,14 @@ To retrieve a list of features and their current statuses::
|
||||
Feature 'rgw': 'enabled'
|
||||
Feature 'nfs': 'enabled'
|
||||
|
||||
To enable or disable the status of a single or multiple features::
|
||||
To enable or disable the status of a single or multiple features:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph dashboard feature disable iscsi mirroring
|
||||
|
||||
::
|
||||
|
||||
$ ceph dashboard feature disable iscsi mirroring
|
||||
Feature 'iscsi': disabled
|
||||
Feature 'mirroring': disabled
|
||||
|
||||
|
@ -12,17 +12,23 @@ syntax to specify the expiration time: `Ns|m|h|d|w` for seconds, minutes,
|
||||
hours, days and weeks. If the MOTD should expire after 2 hours, use `2h`
|
||||
or `5w` for 5 weeks. Use `0` to configure a MOTD that does not expire.
|
||||
|
||||
To configure a MOTD, run the following command::
|
||||
To configure a MOTD, run the following command:
|
||||
|
||||
$ ceph dashboard motd set <severity:info|warning|danger> <expires> <message>
|
||||
.. prompt:: bash $
|
||||
|
||||
To show the configured MOTD::
|
||||
ceph dashboard motd set <severity:info|warning|danger> <expires> <message>
|
||||
|
||||
$ ceph dashboard motd get
|
||||
To show the configured MOTD:
|
||||
|
||||
To clear the configured MOTD run::
|
||||
.. prompt:: bash $
|
||||
|
||||
$ ceph dashboard motd clear
|
||||
ceph dashboard motd get
|
||||
|
||||
To clear the configured MOTD run:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph dashboard motd clear
|
||||
|
||||
A MOTD with a `info` or `warning` severity can be closed by the user. The
|
||||
`info` MOTD is not displayed anymore until the local storage cookies are
|
||||
|
@ -5,14 +5,18 @@
|
||||
Orchestrator CLI
|
||||
================
|
||||
|
||||
This module provides a command line interface (CLI) to orchestrator
|
||||
modules (``ceph-mgr`` modules which interface with external orchestration services).
|
||||
This module provides a command line interface (CLI) for orchestrator modules.
|
||||
Orchestrator modules are ``ceph-mgr`` plugins that interface with external
|
||||
orchestration services.
|
||||
|
||||
As the orchestrator CLI unifies multiple external orchestrators, a common nomenclature
|
||||
for the orchestrator module is needed.
|
||||
Definition of Terms
|
||||
===================
|
||||
|
||||
The orchestrator CLI unifies multiple external orchestrators, so we need a
|
||||
common nomenclature for the orchestrator module:
|
||||
|
||||
+--------------------------------------+---------------------------------------+
|
||||
| *host* | hostname (not DNS name) of the |
|
||||
| *host* | hostname (not the DNS name) of the |
|
||||
| | physical host. Not the podname, |
|
||||
| | container name, or hostname inside |
|
||||
| | the container. |
|
||||
@ -20,7 +24,7 @@ for the orchestrator module is needed.
|
||||
| *service type* | The type of the service. e.g., nfs, |
|
||||
| | mds, osd, mon, rgw, mgr, iscsi |
|
||||
+--------------------------------------+---------------------------------------+
|
||||
| *service* | A logical service, Typically |
|
||||
| *service* | A logical service. Typically |
|
||||
| | comprised of multiple service |
|
||||
| | instances on multiple hosts for HA |
|
||||
| | |
|
||||
@ -34,29 +38,28 @@ for the orchestrator module is needed.
|
||||
| | like LIO or knfsd or whatever) |
|
||||
| | |
|
||||
| | This identifier should |
|
||||
| | uniquely identify the instance |
|
||||
| | uniquely identify the instance. |
|
||||
+--------------------------------------+---------------------------------------+
|
||||
|
||||
The relation between the names is the following:
|
||||
|
||||
* A *service* has a specific *service type*
|
||||
* A *daemon* is a physical instance of a *service type*
|
||||
Here is how the names relate:
|
||||
|
||||
* A *service* has a specific *service type*.
|
||||
* A *daemon* is a physical instance of a *service type*.
|
||||
|
||||
.. note::
|
||||
|
||||
Orchestrator modules may only implement a subset of the commands listed below.
|
||||
Also, the implementation of the commands may differ between modules.
|
||||
Orchestrator modules might implement only a subset of the commands listed
|
||||
below. The implementation of the commands may differ between modules.
|
||||
|
||||
Status
|
||||
======
|
||||
|
||||
::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph orch status [--detail]
|
||||
ceph orch status [--detail]
|
||||
|
||||
Show current orchestrator mode and high-level status (whether the orchestrator
|
||||
plugin is available and operational)
|
||||
This command shows the current orchestrator mode and its high-level status
|
||||
(whether the orchestrator plugin is available and operational).
|
||||
|
||||
|
||||
..
|
||||
@ -92,15 +95,20 @@ plugin is available and operational)
|
||||
Stateless services (MDS/RGW/NFS/rbd-mirror/iSCSI)
|
||||
=================================================
|
||||
|
||||
(Please note: The orchestrator will not configure the services. Please look into the corresponding
|
||||
documentation for service configuration details.)
|
||||
.. note::
|
||||
|
||||
The ``name`` parameter is an identifier of the group of instances:
|
||||
The orchestrator will not configure the services. See the relevant
|
||||
documentation for details about how to configure particular services.
|
||||
|
||||
* a CephFS file system for a group of MDS daemons,
|
||||
* a zone name for a group of RGWs
|
||||
The ``name`` parameter identifies the kind of the group of instances. The
|
||||
following short list explains the meaning of the ``name`` parameter:
|
||||
|
||||
Creating/growing/shrinking/removing services::
|
||||
* A CephFS file system identifies a group of MDS daemons.
|
||||
* A zone name identifies a group of RGWs.
|
||||
|
||||
Creating/growing/shrinking/removing services:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph orch apply mds <fs_name> [--placement=<placement>] [--dry-run]
|
||||
ceph orch apply rgw <name> [--realm=<realm>] [--zone=<zone>] [--port=<port>] [--ssl] [--placement=<placement>] [--dry-run]
|
||||
@ -111,33 +119,73 @@ where ``placement`` is a :ref:`orchestrator-cli-placement-spec`.
|
||||
|
||||
e.g., ``ceph orch apply mds myfs --placement="3 host1 host2 host3"``
|
||||
|
||||
Service Commands::
|
||||
Service Commands:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph orch <start|stop|restart|redeploy|reconfig> <service_name>
|
||||
|
||||
.. note:: These commands apply only to cephadm containerized daemons.
|
||||
|
||||
Options
|
||||
=======
|
||||
|
||||
.. option:: start
|
||||
|
||||
Start the daemon on the corresponding host.
|
||||
|
||||
.. option:: stop
|
||||
|
||||
Stop the daemon on the corresponding host.
|
||||
|
||||
.. option:: restart
|
||||
|
||||
Restart the daemon on the corresponding host.
|
||||
|
||||
.. option:: redeploy
|
||||
|
||||
Redeploy the ceph daemon on the corresponding host. This will recreate the daemon directory
|
||||
structure under ``/var/lib/ceph/<fsid>/<daemon-name>`` (if it doesn't exist), refresh its
|
||||
configuration files, regenerate its unit-files and restarts the systemd daemon.
|
||||
|
||||
.. option:: reconfig
|
||||
|
||||
Reconfigure the daemon on the corresponding host. This will refresh configuration files then restart the daemon.
|
||||
|
||||
.. note:: this command assumes the daemon directory ``/var/lib/ceph/<fsid>/<daemon-name>`` already exists.
|
||||
|
||||
|
||||
Configuring the Orchestrator CLI
|
||||
================================
|
||||
|
||||
To enable the orchestrator, select the orchestrator module to use
|
||||
with the ``set backend`` command::
|
||||
Enable the orchestrator by using the ``set backend`` command to select the orchestrator module that will be used:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph orch set backend <module>
|
||||
|
||||
For example, to enable the Rook orchestrator module and use it with the CLI::
|
||||
Example - Configuring the Orchestrator CLI
|
||||
------------------------------------------
|
||||
|
||||
For example, to enable the Rook orchestrator module and use it with the CLI:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mgr module enable rook
|
||||
ceph orch set backend rook
|
||||
|
||||
Check the backend is properly configured::
|
||||
Confirm that the backend is properly configured:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph orch status
|
||||
|
||||
Disable the Orchestrator
|
||||
------------------------
|
||||
|
||||
To disable the orchestrator, use the empty string ``""``::
|
||||
To disable the orchestrator, use the empty string ``""``:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph orch set backend ""
|
||||
ceph mgr module disable rook
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
The :term:`Ceph Storage Cluster` has a messaging layer protocol that enables
|
||||
clients to interact with a :term:`Ceph Monitor` and a :term:`Ceph OSD Daemon`.
|
||||
``librados`` provides this functionality to :term:`Ceph Clients` in the form of
|
||||
``librados`` provides this functionality to :term:`Ceph Client`\s in the form of
|
||||
a library. All Ceph Clients either use ``librados`` or the same functionality
|
||||
encapsulated in ``librados`` to interact with the object store. For example,
|
||||
``librbd`` and ``libcephfs`` leverage this functionality. You may use
|
||||
|
@ -43,19 +43,25 @@ Getting librados for C/C++
|
||||
--------------------------
|
||||
|
||||
To install ``librados`` development support files for C/C++ on Debian/Ubuntu
|
||||
distributions, execute the following::
|
||||
distributions, execute the following:
|
||||
|
||||
sudo apt-get install librados-dev
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo apt-get install librados-dev
|
||||
|
||||
To install ``librados`` development support files for C/C++ on RHEL/CentOS
|
||||
distributions, execute the following::
|
||||
distributions, execute the following:
|
||||
|
||||
sudo yum install librados2-devel
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo yum install librados2-devel
|
||||
|
||||
Once you install ``librados`` for developers, you can find the required
|
||||
headers for C/C++ under ``/usr/include/rados``. ::
|
||||
headers for C/C++ under ``/usr/include/rados``:
|
||||
|
||||
ls /usr/include/rados
|
||||
.. prompt:: bash $
|
||||
|
||||
ls /usr/include/rados
|
||||
|
||||
|
||||
Getting librados for Python
|
||||
@ -68,14 +74,25 @@ and the ``librados2-devel`` package for RHEL/CentOS will install the
|
||||
directly too.
|
||||
|
||||
To install ``librados`` development support files for Python on Debian/Ubuntu
|
||||
distributions, execute the following::
|
||||
distributions, execute the following:
|
||||
|
||||
sudo apt-get install python-rados
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo apt-get install python3-rados
|
||||
|
||||
To install ``librados`` development support files for Python on RHEL/CentOS
|
||||
distributions, execute the following::
|
||||
distributions, execute the following:
|
||||
|
||||
sudo yum install python-rados
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo yum install python-rados
|
||||
|
||||
To install ``librados`` development support files for Python on SLE/openSUSE
|
||||
distributions, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo zypper install python3-rados
|
||||
|
||||
You can find the module under ``/usr/share/pyshared`` on Debian systems,
|
||||
or under ``/usr/lib/python*/site-packages`` on CentOS/RHEL systems.
|
||||
@ -86,37 +103,49 @@ Getting librados for Java
|
||||
|
||||
To install ``librados`` for Java, you need to execute the following procedure:
|
||||
|
||||
#. Install ``jna.jar``. For Debian/Ubuntu, execute::
|
||||
#. Install ``jna.jar``. For Debian/Ubuntu, execute:
|
||||
|
||||
sudo apt-get install libjna-java
|
||||
.. prompt:: bash $
|
||||
|
||||
For CentOS/RHEL, execute::
|
||||
sudo apt-get install libjna-java
|
||||
|
||||
sudo yum install jna
|
||||
For CentOS/RHEL, execute:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo yum install jna
|
||||
|
||||
The JAR files are located in ``/usr/share/java``.
|
||||
|
||||
#. Clone the ``rados-java`` repository::
|
||||
#. Clone the ``rados-java`` repository:
|
||||
|
||||
git clone --recursive https://github.com/ceph/rados-java.git
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Build the ``rados-java`` repository::
|
||||
git clone --recursive https://github.com/ceph/rados-java.git
|
||||
|
||||
cd rados-java
|
||||
ant
|
||||
#. Build the ``rados-java`` repository:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
cd rados-java
|
||||
ant
|
||||
|
||||
The JAR file is located under ``rados-java/target``.
|
||||
|
||||
#. Copy the JAR for RADOS to a common location (e.g., ``/usr/share/java``) and
|
||||
ensure that it and the JNA JAR are in your JVM's classpath. For example::
|
||||
ensure that it and the JNA JAR are in your JVM's classpath. For example:
|
||||
|
||||
sudo cp target/rados-0.1.3.jar /usr/share/java/rados-0.1.3.jar
|
||||
sudo ln -s /usr/share/java/jna-3.2.7.jar /usr/lib/jvm/default-java/jre/lib/ext/jna-3.2.7.jar
|
||||
sudo ln -s /usr/share/java/rados-0.1.3.jar /usr/lib/jvm/default-java/jre/lib/ext/rados-0.1.3.jar
|
||||
.. prompt:: bash $
|
||||
|
||||
To build the documentation, execute the following::
|
||||
sudo cp target/rados-0.1.3.jar /usr/share/java/rados-0.1.3.jar
|
||||
sudo ln -s /usr/share/java/jna-3.2.7.jar /usr/lib/jvm/default-java/jre/lib/ext/jna-3.2.7.jar
|
||||
sudo ln -s /usr/share/java/rados-0.1.3.jar /usr/lib/jvm/default-java/jre/lib/ext/rados-0.1.3.jar
|
||||
|
||||
ant docs
|
||||
To build the documentation, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ant docs
|
||||
|
||||
|
||||
Getting librados for PHP
|
||||
@ -124,29 +153,37 @@ Getting librados for PHP
|
||||
|
||||
To install the ``librados`` extension for PHP, you need to execute the following procedure:
|
||||
|
||||
#. Install php-dev. For Debian/Ubuntu, execute::
|
||||
#. Install php-dev. For Debian/Ubuntu, execute:
|
||||
|
||||
sudo apt-get install php5-dev build-essential
|
||||
.. prompt:: bash $
|
||||
|
||||
For CentOS/RHEL, execute::
|
||||
sudo apt-get install php5-dev build-essential
|
||||
|
||||
sudo yum install php-devel
|
||||
For CentOS/RHEL, execute:
|
||||
|
||||
#. Clone the ``phprados`` repository::
|
||||
.. prompt:: bash $
|
||||
|
||||
git clone https://github.com/ceph/phprados.git
|
||||
sudo yum install php-devel
|
||||
|
||||
#. Build ``phprados``::
|
||||
#. Clone the ``phprados`` repository:
|
||||
|
||||
cd phprados
|
||||
phpize
|
||||
./configure
|
||||
make
|
||||
sudo make install
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Enable ``phprados`` in php.ini by adding::
|
||||
git clone https://github.com/ceph/phprados.git
|
||||
|
||||
extension=rados.so
|
||||
#. Build ``phprados``:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
cd phprados
|
||||
phpize
|
||||
./configure
|
||||
make
|
||||
sudo make install
|
||||
|
||||
#. Enable ``phprados`` by adding the following line to ``php.ini``::
|
||||
|
||||
extension=rados.so
|
||||
|
||||
|
||||
Step 2: Configuring a Cluster Handle
|
||||
@ -321,9 +358,11 @@ it and connecting to the cluster might look something like this:
|
||||
|
||||
}
|
||||
|
||||
Compile your client and link to ``librados`` using ``-lrados``. For example::
|
||||
Compile your client and link to ``librados`` using ``-lrados``. For example:
|
||||
|
||||
gcc ceph-client.c -lrados -o ceph-client
|
||||
.. prompt:: bash $
|
||||
|
||||
gcc ceph-client.c -lrados -o ceph-client
|
||||
|
||||
|
||||
C++ Example
|
||||
@ -399,10 +438,12 @@ you to initialize a ``librados::Rados`` cluster handle object:
|
||||
|
||||
|
||||
Compile the source; then, link ``librados`` using ``-lrados``.
|
||||
For example::
|
||||
For example:
|
||||
|
||||
g++ -g -c ceph-client.cc -o ceph-client.o
|
||||
g++ -g ceph-client.o -lrados -o ceph-client
|
||||
.. prompt:: bash $
|
||||
|
||||
g++ -g -c ceph-client.cc -o ceph-client.o
|
||||
g++ -g ceph-client.o -lrados -o ceph-client
|
||||
|
||||
|
||||
|
||||
@ -436,9 +477,11 @@ into exceptions.
|
||||
print "Connected to the cluster."
|
||||
|
||||
|
||||
Execute the example to verify that it connects to your cluster. ::
|
||||
Execute the example to verify that it connects to your cluster:
|
||||
|
||||
python ceph-client.py
|
||||
.. prompt:: bash $
|
||||
|
||||
python ceph-client.py
|
||||
|
||||
|
||||
Java Example
|
||||
@ -478,10 +521,12 @@ binding converts C++-based errors into exceptions.
|
||||
|
||||
Compile the source; then, run it. If you have copied the JAR to
|
||||
``/usr/share/java`` and sym linked from your ``ext`` directory, you won't need
|
||||
to specify the classpath. For example::
|
||||
to specify the classpath. For example:
|
||||
|
||||
javac CephClient.java
|
||||
java CephClient
|
||||
.. prompt:: bash $
|
||||
|
||||
javac CephClient.java
|
||||
java CephClient
|
||||
|
||||
|
||||
PHP Example
|
||||
@ -502,9 +547,11 @@ With the RADOS extension enabled in PHP you can start creating a new cluster han
|
||||
}
|
||||
|
||||
|
||||
Save this as rados.php and run the code::
|
||||
Save this as rados.php and run the code:
|
||||
|
||||
php rados.php
|
||||
.. prompt:: bash $
|
||||
|
||||
php rados.php
|
||||
|
||||
|
||||
Step 3: Creating an I/O Context
|
||||
|
@ -68,7 +68,7 @@ Your Python client also requires a client keyring. For this example, we use the
|
||||
``client.admin`` key by default. If you would like to specify the keyring when
|
||||
creating the cluster handle, you may use the ``conf`` argument. Alternatively,
|
||||
you may specify the keyring path in your Ceph configuration file. For example,
|
||||
you may add something like the following line to you Ceph configuration file::
|
||||
you may add something like the following line to your Ceph configuration file::
|
||||
|
||||
keyring = /path/to/ceph.client.admin.keyring
|
||||
|
||||
|
@ -195,9 +195,11 @@ specify a ``keyring`` entry in your Ceph configuration file.
|
||||
We recommend copying the Ceph Storage Cluster's keyring file to nodes where you
|
||||
will run administrative commands, because it contains the ``client.admin`` key.
|
||||
|
||||
To perform this step manually, execute the following::
|
||||
To perform this step manually, execute the following:
|
||||
|
||||
sudo scp {user}@{ceph-cluster-host}:/etc/ceph/ceph.client.admin.keyring /etc/ceph/ceph.client.admin.keyring
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo scp {user}@{ceph-cluster-host}:/etc/ceph/ceph.client.admin.keyring /etc/ceph/ceph.client.admin.keyring
|
||||
|
||||
.. tip:: Ensure the ``ceph.keyring`` file has appropriate permissions set
|
||||
(e.g., ``chmod 644``) on your client machine.
|
||||
|
@ -42,13 +42,17 @@ it will fit). This means that if a DB device is specified but an explicit
|
||||
WAL device is not, the WAL will be implicitly colocated with the DB on the faster
|
||||
device.
|
||||
|
||||
A single-device (colocated) BlueStore OSD can be provisioned with::
|
||||
A single-device (colocated) BlueStore OSD can be provisioned with:
|
||||
|
||||
ceph-volume lvm prepare --bluestore --data <device>
|
||||
.. prompt:: bash $
|
||||
|
||||
To specify a WAL device and/or DB device, ::
|
||||
ceph-volume lvm prepare --bluestore --data <device>
|
||||
|
||||
ceph-volume lvm prepare --bluestore --data <device> --block.wal <wal-device> --block.db <db-device>
|
||||
To specify a WAL device and/or DB device:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-volume lvm prepare --bluestore --data <device> --block.wal <wal-device> --block.db <db-device>
|
||||
|
||||
.. note:: ``--data`` can be a Logical Volume using *vg/lv* notation. Other
|
||||
devices can be existing logical volumes or GPT partitions.
|
||||
@ -64,17 +68,21 @@ the deployment strategy:
|
||||
**block (data) only**
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
If all devices are the same type, for example all rotational drives, and
|
||||
there are no fast devices to use for metadata, it makes sense to specifiy the
|
||||
there are no fast devices to use for metadata, it makes sense to specify the
|
||||
block device only and to not separate ``block.db`` or ``block.wal``. The
|
||||
:ref:`ceph-volume-lvm` command for a single ``/dev/sda`` device looks like::
|
||||
:ref:`ceph-volume-lvm` command for a single ``/dev/sda`` device looks like:
|
||||
|
||||
ceph-volume lvm create --bluestore --data /dev/sda
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-volume lvm create --bluestore --data /dev/sda
|
||||
|
||||
If logical volumes have already been created for each device, (a single LV
|
||||
using 100% of the device), then the :ref:`ceph-volume-lvm` call for an LV named
|
||||
``ceph-vg/block-lv`` would look like::
|
||||
``ceph-vg/block-lv`` would look like:
|
||||
|
||||
ceph-volume lvm create --bluestore --data ceph-vg/block-lv
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-volume lvm create --bluestore --data ceph-vg/block-lv
|
||||
|
||||
.. _bluestore-mixed-device-config:
|
||||
|
||||
@ -88,35 +96,43 @@ You must create these volume groups and logical volumes manually as
|
||||
the ``ceph-volume`` tool is currently not able to do so automatically.
|
||||
|
||||
For the below example, let us assume four rotational (``sda``, ``sdb``, ``sdc``, and ``sdd``)
|
||||
and one (fast) solid state drive (``sdx``). First create the volume groups::
|
||||
and one (fast) solid state drive (``sdx``). First create the volume groups:
|
||||
|
||||
$ vgcreate ceph-block-0 /dev/sda
|
||||
$ vgcreate ceph-block-1 /dev/sdb
|
||||
$ vgcreate ceph-block-2 /dev/sdc
|
||||
$ vgcreate ceph-block-3 /dev/sdd
|
||||
.. prompt:: bash $
|
||||
|
||||
Now create the logical volumes for ``block``::
|
||||
vgcreate ceph-block-0 /dev/sda
|
||||
vgcreate ceph-block-1 /dev/sdb
|
||||
vgcreate ceph-block-2 /dev/sdc
|
||||
vgcreate ceph-block-3 /dev/sdd
|
||||
|
||||
$ lvcreate -l 100%FREE -n block-0 ceph-block-0
|
||||
$ lvcreate -l 100%FREE -n block-1 ceph-block-1
|
||||
$ lvcreate -l 100%FREE -n block-2 ceph-block-2
|
||||
$ lvcreate -l 100%FREE -n block-3 ceph-block-3
|
||||
Now create the logical volumes for ``block``:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
lvcreate -l 100%FREE -n block-0 ceph-block-0
|
||||
lvcreate -l 100%FREE -n block-1 ceph-block-1
|
||||
lvcreate -l 100%FREE -n block-2 ceph-block-2
|
||||
lvcreate -l 100%FREE -n block-3 ceph-block-3
|
||||
|
||||
We are creating 4 OSDs for the four slow spinning devices, so assuming a 200GB
|
||||
SSD in ``/dev/sdx`` we will create 4 logical volumes, each of 50GB::
|
||||
SSD in ``/dev/sdx`` we will create 4 logical volumes, each of 50GB:
|
||||
|
||||
$ vgcreate ceph-db-0 /dev/sdx
|
||||
$ lvcreate -L 50GB -n db-0 ceph-db-0
|
||||
$ lvcreate -L 50GB -n db-1 ceph-db-0
|
||||
$ lvcreate -L 50GB -n db-2 ceph-db-0
|
||||
$ lvcreate -L 50GB -n db-3 ceph-db-0
|
||||
.. prompt:: bash $
|
||||
|
||||
Finally, create the 4 OSDs with ``ceph-volume``::
|
||||
vgcreate ceph-db-0 /dev/sdx
|
||||
lvcreate -L 50GB -n db-0 ceph-db-0
|
||||
lvcreate -L 50GB -n db-1 ceph-db-0
|
||||
lvcreate -L 50GB -n db-2 ceph-db-0
|
||||
lvcreate -L 50GB -n db-3 ceph-db-0
|
||||
|
||||
$ ceph-volume lvm create --bluestore --data ceph-block-0/block-0 --block.db ceph-db-0/db-0
|
||||
$ ceph-volume lvm create --bluestore --data ceph-block-1/block-1 --block.db ceph-db-0/db-1
|
||||
$ ceph-volume lvm create --bluestore --data ceph-block-2/block-2 --block.db ceph-db-0/db-2
|
||||
$ ceph-volume lvm create --bluestore --data ceph-block-3/block-3 --block.db ceph-db-0/db-3
|
||||
Finally, create the 4 OSDs with ``ceph-volume``:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-volume lvm create --bluestore --data ceph-block-0/block-0 --block.db ceph-db-0/db-0
|
||||
ceph-volume lvm create --bluestore --data ceph-block-1/block-1 --block.db ceph-db-0/db-1
|
||||
ceph-volume lvm create --bluestore --data ceph-block-2/block-2 --block.db ceph-db-0/db-2
|
||||
ceph-volume lvm create --bluestore --data ceph-block-3/block-3 --block.db ceph-db-0/db-3
|
||||
|
||||
These operations should end up creating four OSDs, with ``block`` on the slower
|
||||
rotational drives with a 50 GB logical volume (DB) for each on the solid state
|
||||
@ -139,7 +155,7 @@ In older releases, internal level sizes mean that the DB can fully utilize only
|
||||
specific partition / LV sizes that correspond to sums of L0, L0+L1, L1+L2,
|
||||
etc. sizes, which with default settings means roughly 3 GB, 30 GB, 300 GB, and
|
||||
so forth. Most deployments will not substantially benefit from sizing to
|
||||
accomodate L3 and higher, though DB compaction can be facilitated by doubling
|
||||
accommodate L3 and higher, though DB compaction can be facilitated by doubling
|
||||
these figures to 6GB, 60GB, and 600GB.
|
||||
|
||||
Improvements in releases beginning with Nautilus 14.2.12 and Octopus 15.2.6
|
||||
@ -167,93 +183,6 @@ of priorities. If priority information is not available, the
|
||||
``bluestore_cache_meta_ratio`` and ``bluestore_cache_kv_ratio`` options are
|
||||
used as fallbacks.
|
||||
|
||||
``bluestore_cache_autotune``
|
||||
|
||||
:Description: Automatically tune the space ratios assigned to various BlueStore
|
||||
caches while respecting minimum values.
|
||||
:Type: Boolean
|
||||
:Required: Yes
|
||||
:Default: ``True``
|
||||
|
||||
``osd_memory_target``
|
||||
|
||||
:Description: When TCMalloc is available and cache autotuning is enabled, try to
|
||||
keep this many bytes mapped in memory. Note: This may not exactly
|
||||
match the RSS memory usage of the process. While the total amount
|
||||
of heap memory mapped by the process should usually be close
|
||||
to this target, there is no guarantee that the kernel will actually
|
||||
reclaim memory that has been unmapped. During initial development,
|
||||
it was found that some kernels result in the OSD's RSS memory
|
||||
exceeding the mapped memory by up to 20%. It is hypothesised
|
||||
however, that the kernel generally may be more aggressive about
|
||||
reclaiming unmapped memory when there is a high amount of memory
|
||||
pressure. Your mileage may vary.
|
||||
:Type: Unsigned Integer
|
||||
:Required: Yes
|
||||
:Default: ``4294967296``
|
||||
|
||||
``bluestore_cache_autotune_chunk_size``
|
||||
|
||||
:Description: The chunk size in bytes to allocate to caches when cache autotune
|
||||
is enabled. When the autotuner assigns memory to various caches,
|
||||
it will allocate memory in chunks. This is done to avoid
|
||||
evictions when there are minor fluctuations in the heap size or
|
||||
autotuned cache ratios.
|
||||
:Type: Unsigned Integer
|
||||
:Required: No
|
||||
:Default: ``33554432``
|
||||
|
||||
``bluestore_cache_autotune_interval``
|
||||
|
||||
:Description: The number of seconds to wait between rebalances when cache autotune
|
||||
is enabled. This setting changes how quickly the allocation ratios of
|
||||
various caches are recomputed. Note: Setting this interval too small
|
||||
can result in high CPU usage and lower performance.
|
||||
:Type: Float
|
||||
:Required: No
|
||||
:Default: ``5``
|
||||
|
||||
``osd_memory_base``
|
||||
|
||||
:Description: When TCMalloc and cache autotuning are enabled, estimate the minimum
|
||||
amount of memory in bytes the OSD will need. This is used to help
|
||||
the autotuner estimate the expected aggregate memory consumption of
|
||||
the caches.
|
||||
:Type: Unsigned Integer
|
||||
:Required: No
|
||||
:Default: ``805306368``
|
||||
|
||||
``osd_memory_expected_fragmentation``
|
||||
|
||||
:Description: When TCMalloc and cache autotuning is enabled, estimate the
|
||||
percentage of memory fragmentation. This is used to help the
|
||||
autotuner estimate the expected aggregate memory consumption
|
||||
of the caches.
|
||||
:Type: Float
|
||||
:Required: No
|
||||
:Default: ``0.15``
|
||||
|
||||
``osd_memory_cache_min``
|
||||
|
||||
:Description: When TCMalloc and cache autotuning are enabled, set the minimum
|
||||
amount of memory used for caches. Note: Setting this value too
|
||||
low can result in significant cache thrashing.
|
||||
:Type: Unsigned Integer
|
||||
:Required: No
|
||||
:Default: ``134217728``
|
||||
|
||||
``osd_memory_cache_resize_interval``
|
||||
|
||||
:Description: When TCMalloc and cache autotuning are enabled, wait this many
|
||||
seconds between resizing caches. This setting changes the total
|
||||
amount of memory available for BlueStore to use for caching. Note
|
||||
that setting this interval too small can result in memory allocator
|
||||
thrashing and lower performance.
|
||||
:Type: Float
|
||||
:Required: No
|
||||
:Default: ``1``
|
||||
|
||||
|
||||
Manual Cache Sizing
|
||||
===================
|
||||
|
||||
@ -286,53 +215,6 @@ device) as well as the meta and kv ratios.
|
||||
The data fraction can be calculated by
|
||||
``<effective_cache_size> * (1 - bluestore_cache_meta_ratio - bluestore_cache_kv_ratio)``
|
||||
|
||||
``bluestore_cache_size``
|
||||
|
||||
:Description: The amount of memory BlueStore will use for its cache. If zero,
|
||||
``bluestore_cache_size_hdd`` or ``bluestore_cache_size_ssd`` will
|
||||
be used instead.
|
||||
:Type: Unsigned Integer
|
||||
:Required: Yes
|
||||
:Default: ``0``
|
||||
|
||||
``bluestore_cache_size_hdd``
|
||||
|
||||
:Description: The default amount of memory BlueStore will use for its cache when
|
||||
backed by an HDD.
|
||||
:Type: Unsigned Integer
|
||||
:Required: Yes
|
||||
:Default: ``1 * 1024 * 1024 * 1024`` (1 GB)
|
||||
|
||||
``bluestore_cache_size_ssd``
|
||||
|
||||
:Description: The default amount of memory BlueStore will use for its cache when
|
||||
backed by an SSD.
|
||||
:Type: Unsigned Integer
|
||||
:Required: Yes
|
||||
:Default: ``3 * 1024 * 1024 * 1024`` (3 GB)
|
||||
|
||||
``bluestore_cache_meta_ratio``
|
||||
|
||||
:Description: The ratio of cache devoted to metadata.
|
||||
:Type: Floating point
|
||||
:Required: Yes
|
||||
:Default: ``.4``
|
||||
|
||||
``bluestore_cache_kv_ratio``
|
||||
|
||||
:Description: The ratio of cache devoted to key/value data (RocksDB).
|
||||
:Type: Floating point
|
||||
:Required: Yes
|
||||
:Default: ``.4``
|
||||
|
||||
``bluestore_cache_kv_max``
|
||||
|
||||
:Description: The maximum amount of cache devoted to key/value data (RocksDB).
|
||||
:Type: Unsigned Integer
|
||||
:Required: Yes
|
||||
:Default: ``512 * 1024*1024`` (512 MB)
|
||||
|
||||
|
||||
Checksums
|
||||
=========
|
||||
|
||||
@ -358,18 +240,11 @@ The smaller checksum values can be used by selecting `crc32c_16` or
|
||||
`crc32c_8` as the checksum algorithm.
|
||||
|
||||
The *checksum algorithm* can be set either via a per-pool
|
||||
``csum_type`` property or the global config option. For example, ::
|
||||
``csum_type`` property or the global config option. For example:
|
||||
|
||||
ceph osd pool set <pool-name> csum_type <algorithm>
|
||||
|
||||
``bluestore_csum_type``
|
||||
|
||||
:Description: The default checksum algorithm to use.
|
||||
:Type: String
|
||||
:Required: Yes
|
||||
:Valid Settings: ``none``, ``crc32c``, ``crc32c_16``, ``crc32c_8``, ``xxhash32``, ``xxhash64``
|
||||
:Default: ``crc32c``
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set <pool-name> csum_type <algorithm>
|
||||
|
||||
Inline Compression
|
||||
==================
|
||||
@ -401,107 +276,47 @@ must be 70% of the size of the original (or smaller).
|
||||
The *compression mode*, *compression algorithm*, *compression required
|
||||
ratio*, *min blob size*, and *max blob size* can be set either via a
|
||||
per-pool property or a global config option. Pool properties can be
|
||||
set with::
|
||||
set with:
|
||||
|
||||
ceph osd pool set <pool-name> compression_algorithm <algorithm>
|
||||
ceph osd pool set <pool-name> compression_mode <mode>
|
||||
ceph osd pool set <pool-name> compression_required_ratio <ratio>
|
||||
ceph osd pool set <pool-name> compression_min_blob_size <size>
|
||||
ceph osd pool set <pool-name> compression_max_blob_size <size>
|
||||
.. prompt:: bash $
|
||||
|
||||
``bluestore_compression_algorithm``
|
||||
ceph osd pool set <pool-name> compression_algorithm <algorithm>
|
||||
ceph osd pool set <pool-name> compression_mode <mode>
|
||||
ceph osd pool set <pool-name> compression_required_ratio <ratio>
|
||||
ceph osd pool set <pool-name> compression_min_blob_size <size>
|
||||
ceph osd pool set <pool-name> compression_max_blob_size <size>
|
||||
|
||||
:Description: The default compressor to use (if any) if the per-pool property
|
||||
``compression_algorithm`` is not set. Note that ``zstd`` is *not*
|
||||
recommended for BlueStore due to high CPU overhead when
|
||||
compressing small amounts of data.
|
||||
:Type: String
|
||||
:Required: No
|
||||
:Valid Settings: ``lz4``, ``snappy``, ``zlib``, ``zstd``
|
||||
:Default: ``snappy``
|
||||
.. _bluestore-rocksdb-sharding:
|
||||
|
||||
``bluestore_compression_mode``
|
||||
RocksDB Sharding
|
||||
================
|
||||
|
||||
:Description: The default policy for using compression if the per-pool property
|
||||
``compression_mode`` is not set. ``none`` means never use
|
||||
compression. ``passive`` means use compression when
|
||||
:c:func:`clients hint <rados_set_alloc_hint>` that data is
|
||||
compressible. ``aggressive`` means use compression unless
|
||||
clients hint that data is not compressible. ``force`` means use
|
||||
compression under all circumstances even if the clients hint that
|
||||
the data is not compressible.
|
||||
:Type: String
|
||||
:Required: No
|
||||
:Valid Settings: ``none``, ``passive``, ``aggressive``, ``force``
|
||||
:Default: ``none``
|
||||
Internally BlueStore uses multiple types of key-value data,
|
||||
stored in RocksDB. Each data type in BlueStore is assigned a
|
||||
unique prefix. Until Pacific all key-value data was stored in
|
||||
single RocksDB column family: 'default'. Since Pacific,
|
||||
BlueStore can divide this data into multiple RocksDB column
|
||||
families. When keys have similar access frequency, modification
|
||||
frequency and lifetime, BlueStore benefits from better caching
|
||||
and more precise compaction. This improves performance, and also
|
||||
requires less disk space during compaction, since each column
|
||||
family is smaller and can compact independent of others.
|
||||
|
||||
``bluestore_compression_required_ratio``
|
||||
OSDs deployed in Pacific or later use RocksDB sharding by default.
|
||||
If Ceph is upgraded to Pacific from a previous version, sharding is off.
|
||||
|
||||
:Description: The ratio of the size of the data chunk after
|
||||
compression relative to the original size must be at
|
||||
least this small in order to store the compressed
|
||||
version.
|
||||
To enable sharding and apply the Pacific defaults, stop an OSD and run
|
||||
|
||||
:Type: Floating point
|
||||
:Required: No
|
||||
:Default: .875
|
||||
.. prompt:: bash #
|
||||
|
||||
``bluestore_compression_min_blob_size``
|
||||
ceph-bluestore-tool \
|
||||
--path <data path> \
|
||||
--sharding="m(3) p(3,0-12) O(3,0-13)=block_cache={type=binned_lru} L P" \
|
||||
reshard
|
||||
|
||||
:Description: Chunks smaller than this are never compressed.
|
||||
The per-pool property ``compression_min_blob_size`` overrides
|
||||
this setting.
|
||||
|
||||
:Type: Unsigned Integer
|
||||
:Required: No
|
||||
:Default: 0
|
||||
|
||||
``bluestore_compression_min_blob_size_hdd``
|
||||
|
||||
:Description: Default value of ``bluestore compression min blob size``
|
||||
for rotational media.
|
||||
|
||||
:Type: Unsigned Integer
|
||||
:Required: No
|
||||
:Default: 128K
|
||||
|
||||
``bluestore_compression_min_blob_size_ssd``
|
||||
|
||||
:Description: Default value of ``bluestore compression min blob size``
|
||||
for non-rotational (solid state) media.
|
||||
|
||||
:Type: Unsigned Integer
|
||||
:Required: No
|
||||
:Default: 8K
|
||||
|
||||
``bluestore_compression_max_blob_size``
|
||||
|
||||
:Description: Chunks larger than this value are broken into smaller blobs of at most
|
||||
``bluestore_compression_max_blob_size`` bytes before being compressed.
|
||||
The per-pool property ``compression_max_blob_size`` overrides
|
||||
this setting.
|
||||
|
||||
:Type: Unsigned Integer
|
||||
:Required: No
|
||||
:Default: 0
|
||||
|
||||
``bluestore_compression_max_blob_size_hdd``
|
||||
|
||||
:Description: Default value of ``bluestore compression max blob size``
|
||||
for rotational media.
|
||||
|
||||
:Type: Unsigned Integer
|
||||
:Required: No
|
||||
:Default: 512K
|
||||
|
||||
``bluestore_compression_max_blob_size_ssd``
|
||||
|
||||
:Description: Default value of ``bluestore compression max blob size``
|
||||
for non-rotational (SSD, NVMe) media.
|
||||
|
||||
:Type: Unsigned Integer
|
||||
:Required: No
|
||||
:Default: 64K
|
||||
Throttling
|
||||
==========
|
||||
|
||||
SPDK Usage
|
||||
==================
|
||||
@ -512,29 +327,38 @@ Refer to `SPDK document`__ for more details.
|
||||
.. __: http://www.spdk.io/doc/getting_started.html#getting_started_examples
|
||||
|
||||
SPDK offers a script to configure the device automatically. Users can run the
|
||||
script as root::
|
||||
script as root:
|
||||
|
||||
$ sudo src/spdk/scripts/setup.sh
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo src/spdk/scripts/setup.sh
|
||||
|
||||
You will need to specify the subject NVMe device's device selector with
|
||||
the "spdk:" prefix for ``bluestore_block_path``.
|
||||
|
||||
For example, you can find the device selector of an Intel PCIe SSD with::
|
||||
For example, you can find the device selector of an Intel PCIe SSD with:
|
||||
|
||||
$ lspci -mm -n -D -d 8086:0953
|
||||
.. prompt:: bash $
|
||||
|
||||
lspci -mm -n -D -d 8086:0953
|
||||
|
||||
The device selector always has the form of ``DDDD:BB:DD.FF`` or ``DDDD.BB.DD.FF``.
|
||||
|
||||
and then set::
|
||||
|
||||
bluestore_block_path = spdk:0000:01:00.0
|
||||
bluestore_block_path = "spdk:trtype:PCIe traddr:0000:01:00.0"
|
||||
|
||||
Where ``0000:01:00.0`` is the device selector found in the output of ``lspci``
|
||||
command above.
|
||||
|
||||
You may also specify a remote NVMeoF target over the TCP transport as in the
|
||||
following example::
|
||||
|
||||
bluestore_block_path = "spdk:trtype:TCP traddr:10.67.110.197 trsvcid:4420 subnqn:nqn.2019-02.io.spdk:cnode1"
|
||||
|
||||
To run multiple SPDK instances per node, you must specify the
|
||||
amount of dpdk memory in MB that each instance will use, to make sure each
|
||||
instance uses its own dpdk memory
|
||||
instance uses its own DPDK memory.
|
||||
|
||||
In most cases, a single device can be used for data, DB, and WAL. We describe
|
||||
this strategy as *colocating* these components. Be sure to enter the below
|
||||
@ -547,3 +371,112 @@ settings to ensure that all IOs are issued through SPDK.::
|
||||
|
||||
Otherwise, the current implementation will populate the SPDK map files with
|
||||
kernel file system symbols and will use the kernel driver to issue DB/WAL IO.
|
||||
|
||||
Minimum Allocation Size
|
||||
========================
|
||||
|
||||
There is a configured minimum amount of storage that BlueStore will allocate on
|
||||
an OSD. In practice, this is the least amount of capacity that a RADOS object
|
||||
can consume. The value of `bluestore_min_alloc_size` is derived from the
|
||||
value of `bluestore_min_alloc_size_hdd` or `bluestore_min_alloc_size_ssd`
|
||||
depending on the OSD's ``rotational`` attribute. This means that when an OSD
|
||||
is created on an HDD, BlueStore will be initialized with the current value
|
||||
of `bluestore_min_alloc_size_hdd`, and SSD OSDs (including NVMe devices)
|
||||
with the value of `bluestore_min_alloc_size_ssd`.
|
||||
|
||||
Through the Mimic release, the default values were 64KB and 16KB for rotational
|
||||
(HDD) and non-rotational (SSD) media respectively. Octopus changed the default
|
||||
for SSD (non-rotational) media to 4KB, and Pacific changed the default for HDD
|
||||
(rotational) media to 4KB as well.
|
||||
|
||||
These changes were driven by space amplification experienced by Ceph RADOS
|
||||
GateWay (RGW) deployments that host large numbers of small files
|
||||
(S3/Swift objects).
|
||||
|
||||
For example, when an RGW client stores a 1KB S3 object, it is written to a
|
||||
single RADOS object. With the default `min_alloc_size` value, 4KB of
|
||||
underlying drive space is allocated. This means that roughly
|
||||
(4KB - 1KB) == 3KB is allocated but never used, which corresponds to 300%
|
||||
overhead or 25% efficiency. Similarly, a 5KB user object will be stored
|
||||
as one 4KB and one 1KB RADOS object, again stranding 4KB of device capcity,
|
||||
though in this case the overhead is a much smaller percentage. Think of this
|
||||
in terms of the remainder from a modulus operation. The overhead *percentage*
|
||||
thus decreases rapidly as user object size increases.
|
||||
|
||||
An easily missed additional subtlety is that this
|
||||
takes place for *each* replica. So when using the default three copies of
|
||||
data (3R), a 1KB S3 object actually consumes roughly 9KB of storage device
|
||||
capacity. If erasure coding (EC) is used instead of replication, the
|
||||
amplification may be even higher: for a ``k=4,m=2`` pool, our 1KB S3 object
|
||||
will allocate (6 * 4KB) = 24KB of device capacity.
|
||||
|
||||
When an RGW bucket pool contains many relatively large user objects, the effect
|
||||
of this phenomenon is often negligible, but should be considered for deployments
|
||||
that expect a signficiant fraction of relatively small objects.
|
||||
|
||||
The 4KB default value aligns well with conventional HDD and SSD devices. Some
|
||||
new coarse-IU (Indirection Unit) QLC SSDs however perform and wear best
|
||||
when `bluestore_min_alloc_size_ssd`
|
||||
is set at OSD creation to match the device's IU:. 8KB, 16KB, or even 64KB.
|
||||
These novel storage drives allow one to achieve read performance competitive
|
||||
with conventional TLC SSDs and write performance faster than HDDs, with
|
||||
high density and lower cost than TLC SSDs.
|
||||
|
||||
Note that when creating OSDs on these devices, one must carefully apply the
|
||||
non-default value only to appropriate devices, and not to conventional SSD and
|
||||
HDD devices. This may be done through careful ordering of OSD creation, custom
|
||||
OSD device classes, and especially by the use of central configuration _masks_.
|
||||
|
||||
Quincy and later releases add
|
||||
the `bluestore_use_optimal_io_size_for_min_alloc_size`
|
||||
option that enables automatic discovery of the appropriate value as each OSD is
|
||||
created. Note that the use of ``bcache``, ``OpenCAS``, ``dmcrypt``,
|
||||
``ATA over Ethernet``, `iSCSI`, or other device layering / abstraction
|
||||
technologies may confound the determination of appropriate values. OSDs
|
||||
deployed on top of VMware storage have been reported to also
|
||||
sometimes report a ``rotational`` attribute that does not match the underlying
|
||||
hardware.
|
||||
|
||||
We suggest inspecting such OSDs at startup via logs and admin sockets to ensure that
|
||||
behavior is appropriate. Note that this also may not work as desired with
|
||||
older kernels. You can check for this by examining the presence and value
|
||||
of ``/sys/block/<drive>/queue/optimal_io_size``.
|
||||
|
||||
You may also inspect a given OSD:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph osd metadata osd.1701 | grep rotational
|
||||
|
||||
This space amplification may manifest as an unusually high ratio of raw to
|
||||
stored data reported by ``ceph df``. ``ceph osd df`` may also report
|
||||
anomalously high ``%USE`` / ``VAR`` values when
|
||||
compared to other, ostensibly identical OSDs. A pool using OSDs with
|
||||
mismatched ``min_alloc_size`` values may experience unexpected balancer
|
||||
behavior as well.
|
||||
|
||||
Note that this BlueStore attribute takes effect *only* at OSD creation; if
|
||||
changed later, a given OSD's behavior will not change unless / until it is
|
||||
destroyed and redeployed with the appropriate option value(s). Upgrading
|
||||
to a later Ceph release will *not* change the value used by OSDs deployed
|
||||
under older releases or with other settings.
|
||||
|
||||
DSA (Data Streaming Accelerator Usage)
|
||||
======================================
|
||||
|
||||
If you want to use the DML library to drive DSA device for offloading
|
||||
read/write operations on Persist memory in Bluestore. You need to install
|
||||
`DML`_ and `idxd-config`_ library in your machine with SPR (Sapphire Rapids) CPU.
|
||||
|
||||
.. _DML: https://github.com/intel/DML
|
||||
.. _idxd-config: https://github.com/intel/idxd-config
|
||||
|
||||
After installing the DML software, you need to configure the shared
|
||||
work queues (WQs) with the following WQ configuration example via accel-config tool:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
accel-config config-wq --group-id=1 --mode=shared --wq-size=16 --threshold=15 --type=user --name="MyApp1" --priority=10 --block-on-fault=1 dsa0/wq0.1
|
||||
accel-config config-engine dsa0/engine0.1 --group-id=1
|
||||
accel-config enable-device dsa0
|
||||
accel-config enable-wq dsa0/wq0.1
|
||||
|
@ -484,17 +484,26 @@ The following CLI commands are used to configure the cluster:
|
||||
Help
|
||||
====
|
||||
|
||||
You can get help for a particular option with::
|
||||
You can get help for a particular option with:
|
||||
|
||||
ceph config help <option>
|
||||
.. prompt:: bash $
|
||||
|
||||
Note that this will use the configuration schema that is compiled into the running monitors. If you have a mixed-version cluster (e.g., during an upgrade), you might also want to query the option schema from a specific running daemon::
|
||||
ceph config help <option>
|
||||
|
||||
ceph daemon <name> config help [option]
|
||||
Note that this will use the configuration schema that is compiled into the running monitors. If you have a mixed-version cluster (e.g., during an upgrade), you might also want to query the option schema from a specific running daemon:
|
||||
|
||||
For example,::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph daemon <name> config help [option]
|
||||
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config help log_file
|
||||
|
||||
::
|
||||
|
||||
$ ceph config help log_file
|
||||
log_file - path to log file
|
||||
(std::string, basic)
|
||||
Default (non-daemon):
|
||||
@ -502,9 +511,14 @@ For example,::
|
||||
Can update at runtime: false
|
||||
See also: [log_to_stderr,err_to_stderr,log_to_syslog,err_to_syslog]
|
||||
|
||||
or::
|
||||
or:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config help log_file -f json-pretty
|
||||
|
||||
::
|
||||
|
||||
$ ceph config help log_file -f json-pretty
|
||||
{
|
||||
"name": "log_file",
|
||||
"type": "std::string",
|
||||
@ -541,9 +555,11 @@ increasing/decreasing logging output, enabling/disabling debug
|
||||
settings, and even for runtime optimization.
|
||||
|
||||
Generally speaking, configuration options can be updated in the usual
|
||||
way via the ``ceph config set`` command. For example, do enable the debug log level on a specific OSD,::
|
||||
way via the ``ceph config set`` command. For example, do enable the debug log level on a specific OSD:
|
||||
|
||||
ceph config set osd.123 debug_ms 20
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config set osd.123 debug_ms 20
|
||||
|
||||
Note that if the same option is also customized in a local
|
||||
configuration file, the monitor setting will be ignored (it has a
|
||||
@ -559,28 +575,38 @@ the daemon or process restarts.
|
||||
|
||||
Override values can be set in two ways:
|
||||
|
||||
#. From any host, we can send a message to a daemon over the network with::
|
||||
#. From any host, we can send a message to a daemon over the network with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph tell <name> config set <option> <value>
|
||||
ceph tell <name> config set <option> <value>
|
||||
|
||||
For example,::
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph tell osd.123 config set debug_osd 20
|
||||
ceph tell osd.123 config set debug_osd 20
|
||||
|
||||
The `tell` command can also accept a wildcard for the daemon
|
||||
identifier. For example, to adjust the debug level on all OSD
|
||||
daemons,::
|
||||
daemons:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph tell osd.* config set debug_osd 20
|
||||
ceph tell osd.* config set debug_osd 20
|
||||
|
||||
#. From the host the process is running on, we can connect directly to
|
||||
the process via a socket in ``/var/run/ceph`` with::
|
||||
the process via a socket in ``/var/run/ceph`` with:
|
||||
|
||||
ceph daemon <name> config set <option> <value>
|
||||
.. prompt:: bash $
|
||||
|
||||
For example,::
|
||||
ceph daemon <name> config set <option> <value>
|
||||
|
||||
ceph daemon osd.4 config set debug_osd 20
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph daemon osd.4 config set debug_osd 20
|
||||
|
||||
Note that in the ``ceph config show`` command output these temporary
|
||||
values will be shown with a source of ``override``.
|
||||
@ -589,29 +615,41 @@ values will be shown with a source of ``override``.
|
||||
Viewing runtime settings
|
||||
========================
|
||||
|
||||
You can see the current options set for a running daemon with the ``ceph config show`` command. For example,::
|
||||
You can see the current options set for a running daemon with the ``ceph config show`` command. For example:
|
||||
|
||||
ceph config show osd.0
|
||||
.. prompt:: bash $
|
||||
|
||||
will show you the (non-default) options for that daemon. You can also look at a specific option with::
|
||||
ceph config show osd.0
|
||||
|
||||
ceph config show osd.0 debug_osd
|
||||
will show you the (non-default) options for that daemon. You can also look at a specific option with:
|
||||
|
||||
or view all options (even those with default values) with::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config show-with-defaults osd.0
|
||||
ceph config show osd.0 debug_osd
|
||||
|
||||
You can also observe settings for a running daemon by connecting to it from the local host via the admin socket. For example,::
|
||||
or view all options (even those with default values) with:
|
||||
|
||||
ceph daemon osd.0 config show
|
||||
.. prompt:: bash $
|
||||
|
||||
will dump all current settings,::
|
||||
ceph config show-with-defaults osd.0
|
||||
|
||||
ceph daemon osd.0 config diff
|
||||
You can also observe settings for a running daemon by connecting to it from the local host via the admin socket. For example:
|
||||
|
||||
will show only non-default settings (as well as where the value came from: a config file, the monitor, an override, etc.), and::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph daemon osd.0 config get debug_osd
|
||||
ceph daemon osd.0 config show
|
||||
|
||||
will dump all current settings:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph daemon osd.0 config diff
|
||||
|
||||
will show only non-default settings (as well as where the value came from: a config file, the monitor, an override, etc.), and:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph daemon osd.0 config get debug_osd
|
||||
|
||||
will report the value of a single option.
|
||||
|
||||
|
@ -2,12 +2,13 @@
|
||||
Configuration
|
||||
===============
|
||||
|
||||
Each Ceph process, daemon, or utility draws its configuration from
|
||||
several sources on startup, include a local configuration, the
|
||||
monitors, the command line, or environment variables. Configuration
|
||||
options may be set globally such that they apply to all daemons, to
|
||||
all daemons or services of a particular type, or only to a specific
|
||||
daemon, process, or client.
|
||||
Each Ceph process, daemon, or utility draws its configuration from several
|
||||
sources on startup. Such sources can include (1) a local configuration, (2) the
|
||||
monitors, (3) the command line, and (4) environment variables.
|
||||
|
||||
Configuration options can be set globally so that they apply (1) to all
|
||||
daemons, (2) to all daemons or services of a particular type, or (3) to only a
|
||||
specific daemon, process, or client.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
.. _monitor-config-reference:
|
||||
|
||||
==========================
|
||||
Monitor Config Reference
|
||||
==========================
|
||||
|
@ -163,16 +163,21 @@ By default, ``ms_bind_msgr2`` is true starting with Nautilus 14.2.z.
|
||||
However, until the monitors start using v2, only limited services will
|
||||
start advertising v2 addresses.
|
||||
|
||||
For most users, the monitors are binding to the default legacy port ``6789`` for the v1 protocol. When this is the case, enabling v2 is as simple as::
|
||||
For most users, the monitors are binding to the default legacy port ``6789``
|
||||
for the v1 protocol. When this is the case, enabling v2 is as simple as:
|
||||
|
||||
ceph mon enable-msgr2
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mon enable-msgr2
|
||||
|
||||
If the monitors are bound to non-standard ports, you will need to
|
||||
specify an additional port for v2 explicitly. For example, if your
|
||||
monitor ``mon.a`` binds to ``1.2.3.4:1111``, and you want to add v2 on
|
||||
port ``1112``,::
|
||||
port ``1112``:
|
||||
|
||||
ceph mon set-addrs a [v2:1.2.3.4:1112,v1:1.2.3.4:1111]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mon set-addrs a [v2:1.2.3.4:1112,v1:1.2.3.4:1111]
|
||||
|
||||
Once the monitors bind to v2, each daemon will start advertising a v2
|
||||
address when it is next restarted.
|
||||
|
@ -60,7 +60,9 @@ By default, daemons `bind`_ to ports within the ``6800:7300`` range. You may
|
||||
configure this range at your discretion. Before configuring your IP tables,
|
||||
check the default ``iptables`` configuration.
|
||||
|
||||
sudo iptables -L
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo iptables -L
|
||||
|
||||
Some Linux distributions include rules that reject all inbound requests
|
||||
except SSH from all network interfaces. For example::
|
||||
@ -80,7 +82,9 @@ default. Additionally, Ceph Monitors always operate on the public
|
||||
network. When you add the rule using the example below, make sure you
|
||||
replace ``{iface}`` with the public network interface (e.g., ``eth0``,
|
||||
``eth1``, etc.), ``{ip-address}`` with the IP address of the public
|
||||
network and ``{netmask}`` with the netmask for the public network. ::
|
||||
network and ``{netmask}`` with the netmask for the public network. :
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo iptables -A INPUT -i {iface} -p tcp -s {ip-address}/{netmask} --dport 6789 -j ACCEPT
|
||||
|
||||
@ -98,9 +102,11 @@ you replace ``{iface}`` with the public network interface (e.g., ``eth0``,
|
||||
``eth1``, etc.), ``{ip-address}`` with the IP address of the public network
|
||||
and ``{netmask}`` with the netmask of the public network.
|
||||
|
||||
For example::
|
||||
For example:
|
||||
|
||||
sudo iptables -A INPUT -i {iface} -m multiport -p tcp -s {ip-address}/{netmask} --dports 6800:7300 -j ACCEPT
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo iptables -A INPUT -i {iface} -m multiport -p tcp -s {ip-address}/{netmask} --dports 6800:7300 -j ACCEPT
|
||||
|
||||
|
||||
OSD IP Tables
|
||||
@ -139,9 +145,11 @@ the public network and other Ceph OSD Daemons will connect using the cluster
|
||||
network. When you add the rule using the example below, make sure you replace
|
||||
``{iface}`` with the network interface (e.g., ``eth0``, ``eth1``, etc.),
|
||||
``{ip-address}`` with the IP address and ``{netmask}`` with the netmask of the
|
||||
public or cluster network. For example::
|
||||
public or cluster network. For example:
|
||||
|
||||
sudo iptables -A INPUT -i {iface} -m multiport -p tcp -s {ip-address}/{netmask} --dports 6800:7300 -j ACCEPT
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo iptables -A INPUT -i {iface} -m multiport -p tcp -s {ip-address}/{netmask} --dports 6800:7300 -j ACCEPT
|
||||
|
||||
.. tip:: If you run Ceph Metadata Servers on the same Ceph Node as the
|
||||
Ceph OSD Daemons, you can consolidate the public network configuration step.
|
||||
|
@ -4,37 +4,50 @@
|
||||
|
||||
There are two Ceph daemons that store data on devices:
|
||||
|
||||
* **Ceph OSDs** (or Object Storage Daemons) are where most of the
|
||||
data is stored in Ceph. Generally speaking, each OSD is backed by
|
||||
a single storage device, like a traditional hard disk (HDD) or
|
||||
solid state disk (SSD). OSDs can also be backed by a combination
|
||||
of devices, like a HDD for most data and an SSD (or partition of an
|
||||
SSD) for some metadata. The number of OSDs in a cluster is
|
||||
generally a function of how much data will be stored, how big each
|
||||
storage device will be, and the level and type of redundancy
|
||||
(replication or erasure coding).
|
||||
* **Ceph Monitor** daemons manage critical cluster state like cluster
|
||||
membership and authentication information. For smaller clusters a
|
||||
few gigabytes is all that is needed, although for larger clusters
|
||||
the monitor database can reach tens or possibly hundreds of
|
||||
gigabytes.
|
||||
.. _rados_configuration_storage-devices_ceph_osd:
|
||||
|
||||
* **Ceph OSDs** (Object Storage Daemons) store most of the data
|
||||
in Ceph. Usually each OSD is backed by a single storage device.
|
||||
This can be a traditional hard disk (HDD) or a solid state disk
|
||||
(SSD). OSDs can also be backed by a combination of devices: for
|
||||
example, a HDD for most data and an SSD (or partition of an
|
||||
SSD) for some metadata. The number of OSDs in a cluster is
|
||||
usually a function of the amount of data to be stored, the size
|
||||
of each storage device, and the level and type of redundancy
|
||||
specified (replication or erasure coding).
|
||||
* **Ceph Monitor** daemons manage critical cluster state. This
|
||||
includes cluster membership and authentication information.
|
||||
Small clusters require only a few gigabytes of storage to hold
|
||||
the monitor database. In large clusters, however, the monitor
|
||||
database can reach sizes of tens of gigabytes to hundreds of
|
||||
gigabytes.
|
||||
* **Ceph Manager** daemons run alongside monitor daemons, providing
|
||||
additional monitoring and providing interfaces to external
|
||||
monitoring and management systems.
|
||||
|
||||
|
||||
OSD Backends
|
||||
============
|
||||
OSD Back Ends
|
||||
=============
|
||||
|
||||
There are two ways that OSDs can manage the data they store. Starting
|
||||
with the Luminous 12.2.z release, the new default (and recommended) backend is
|
||||
*BlueStore*. Prior to Luminous, the default (and only option) was
|
||||
*Filestore*.
|
||||
There are two ways that OSDs manage the data they store. As of the Luminous
|
||||
12.2.z release, the default (and recommended) back end is *BlueStore*. Prior
|
||||
to the Luminous release, the default (and only) back end was *Filestore*.
|
||||
|
||||
.. _rados_config_storage_devices_bluestore:
|
||||
|
||||
BlueStore
|
||||
---------
|
||||
|
||||
<<<<<<< HEAD
|
||||
BlueStore is a special-purpose storage backend designed specifically
|
||||
for managing data on disk for Ceph OSD workloads. It is motivated by
|
||||
experience supporting and managing OSDs using FileStore over the
|
||||
last ten years. Key BlueStore features include:
|
||||
=======
|
||||
BlueStore is a special-purpose storage back end designed specifically for
|
||||
managing data on disk for Ceph OSD workloads. BlueStore's design is based on
|
||||
a decade of experience of supporting and managing Filestore OSDs.
|
||||
>>>>>>> 28abc6a9a59 (doc/rados: s/backend/back end/)
|
||||
|
||||
* Direct management of storage devices. BlueStore consumes raw block
|
||||
devices or partitions. This avoids any intervening layers of
|
||||
|
@ -95,7 +95,9 @@ without the ``mon.`` prefix (i.e., ``{mon-id}`` should be the ``a``
|
||||
on ``mon.a``).
|
||||
|
||||
#. Create the default directory on the machine that will host your
|
||||
new monitor. ::
|
||||
new monitor:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ssh {new-mon-host}
|
||||
sudo mkdir /var/lib/ceph/mon/ceph-{mon-id}
|
||||
@ -103,36 +105,46 @@ on ``mon.a``).
|
||||
#. Create a temporary directory ``{tmp}`` to keep the files needed during
|
||||
this process. This directory should be different from the monitor's default
|
||||
directory created in the previous step, and can be removed after all the
|
||||
steps are executed. ::
|
||||
steps are executed:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
mkdir {tmp}
|
||||
|
||||
#. Retrieve the keyring for your monitors, where ``{tmp}`` is the path to
|
||||
the retrieved keyring, and ``{key-filename}`` is the name of the file
|
||||
containing the retrieved monitor key. ::
|
||||
containing the retrieved monitor key:
|
||||
|
||||
ceph auth get mon. -o {tmp}/{key-filename}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph auth get mon. -o {tmp}/{key-filename}
|
||||
|
||||
#. Retrieve the monitor map, where ``{tmp}`` is the path to
|
||||
the retrieved monitor map, and ``{map-filename}`` is the name of the file
|
||||
containing the retrieved monitor map. ::
|
||||
containing the retrieved monitor map:
|
||||
|
||||
ceph mon getmap -o {tmp}/{map-filename}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mon getmap -o {tmp}/{map-filename}
|
||||
|
||||
#. Prepare the monitor's data directory created in the first step. You must
|
||||
specify the path to the monitor map so that you can retrieve the
|
||||
information about a quorum of monitors and their ``fsid``. You must also
|
||||
specify a path to the monitor keyring::
|
||||
specify a path to the monitor keyring:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo ceph-mon -i {mon-id} --mkfs --monmap {tmp}/{map-filename} --keyring {tmp}/{key-filename}
|
||||
sudo ceph-mon -i {mon-id} --mkfs --monmap {tmp}/{map-filename} --keyring {tmp}/{key-filename}
|
||||
|
||||
|
||||
#. Start the new monitor and it will automatically join the cluster.
|
||||
The daemon needs to know which address to bind to, via either the
|
||||
``--public-addr {ip}`` or ``--public-network {network}`` argument.
|
||||
For example::
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-mon -i {mon-id} --public-addr {ip:port}
|
||||
ceph-mon -i {mon-id} --public-addr {ip:port}
|
||||
|
||||
.. _removing-monitors:
|
||||
|
||||
@ -154,13 +166,17 @@ procedure results in only two monitor daemons, you may add or remove another
|
||||
monitor until you have a number of ``ceph-mon`` daemons that can achieve a
|
||||
quorum.
|
||||
|
||||
#. Stop the monitor. ::
|
||||
#. Stop the monitor:
|
||||
|
||||
service ceph -a stop mon.{mon-id}
|
||||
.. prompt:: bash $
|
||||
|
||||
service ceph -a stop mon.{mon-id}
|
||||
|
||||
#. Remove the monitor from the cluster. ::
|
||||
#. Remove the monitor from the cluster:
|
||||
|
||||
ceph mon remove {mon-id}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mon remove {mon-id}
|
||||
|
||||
#. Remove the monitor entry from ``ceph.conf``.
|
||||
|
||||
@ -174,38 +190,61 @@ cluster, for example a cluster where the monitors cannot form a
|
||||
quorum.
|
||||
|
||||
|
||||
#. Stop all ``ceph-mon`` daemons on all monitor hosts. ::
|
||||
#. Stop all ``ceph-mon`` daemons on all monitor hosts:
|
||||
|
||||
ssh {mon-host}
|
||||
systemctl stop ceph-mon.target
|
||||
# and repeat for all mons
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Identify a surviving monitor and log in to that host. ::
|
||||
ssh {mon-host}
|
||||
systemctl stop ceph-mon.target
|
||||
|
||||
ssh {mon-host}
|
||||
Repeat for all monitor hosts.
|
||||
|
||||
#. Extract a copy of the monmap file. ::
|
||||
#. Identify a surviving monitor and log in to that host:
|
||||
|
||||
ceph-mon -i {mon-id} --extract-monmap {map-path}
|
||||
# in most cases, that's
|
||||
ceph-mon -i `hostname` --extract-monmap /tmp/monmap
|
||||
.. prompt:: bash $
|
||||
|
||||
ssh {mon-host}
|
||||
|
||||
#. Extract a copy of the monmap file:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-mon -i {mon-id} --extract-monmap {map-path}
|
||||
|
||||
In most cases, this command will be:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-mon -i `hostname` --extract-monmap /tmp/monmap
|
||||
|
||||
#. Remove the non-surviving or problematic monitors. For example, if
|
||||
you have three monitors, ``mon.a``, ``mon.b``, and ``mon.c``, where
|
||||
only ``mon.a`` will survive, follow the example below::
|
||||
only ``mon.a`` will survive, follow the example below:
|
||||
|
||||
monmaptool {map-path} --rm {mon-id}
|
||||
# for example,
|
||||
monmaptool /tmp/monmap --rm b
|
||||
monmaptool /tmp/monmap --rm c
|
||||
.. prompt:: bash $
|
||||
|
||||
monmaptool {map-path} --rm {mon-id}
|
||||
|
||||
For example,
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
monmaptool /tmp/monmap --rm b
|
||||
monmaptool /tmp/monmap --rm c
|
||||
|
||||
#. Inject the surviving map with the removed monitors into the
|
||||
surviving monitor(s). For example, to inject a map into monitor
|
||||
``mon.a``, follow the example below::
|
||||
``mon.a``, follow the example below:
|
||||
|
||||
ceph-mon -i {mon-id} --inject-monmap {map-path}
|
||||
# for example,
|
||||
ceph-mon -i a --inject-monmap /tmp/monmap
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-mon -i {mon-id} --inject-monmap {map-path}
|
||||
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-mon -i a --inject-monmap /tmp/monmap
|
||||
|
||||
#. Start only the surviving monitors.
|
||||
|
||||
@ -316,14 +355,20 @@ networks are unable to communicate. Use the following procedure:
|
||||
|
||||
#. Retrieve the monitor map, where ``{tmp}`` is the path to
|
||||
the retrieved monitor map, and ``{filename}`` is the name of the file
|
||||
containing the retrieved monitor map. ::
|
||||
containing the retrieved monitor map:
|
||||
|
||||
ceph mon getmap -o {tmp}/{filename}
|
||||
.. prompt:: bash $
|
||||
|
||||
#. The following example demonstrates the contents of the monmap. ::
|
||||
ceph mon getmap -o {tmp}/{filename}
|
||||
|
||||
#. The following example demonstrates the contents of the monmap:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
monmaptool --print {tmp}/{filename}
|
||||
|
||||
::
|
||||
|
||||
$ monmaptool --print {tmp}/{filename}
|
||||
|
||||
monmaptool: monmap file {tmp}/{filename}
|
||||
epoch 1
|
||||
fsid 224e376d-c5fe-4504-96bb-ea6332a19e61
|
||||
@ -333,27 +378,41 @@ networks are unable to communicate. Use the following procedure:
|
||||
1: 10.0.0.2:6789/0 mon.b
|
||||
2: 10.0.0.3:6789/0 mon.c
|
||||
|
||||
#. Remove the existing monitors. ::
|
||||
#. Remove the existing monitors:
|
||||
|
||||
$ monmaptool --rm a --rm b --rm c {tmp}/{filename}
|
||||
.. prompt:: bash $
|
||||
|
||||
monmaptool --rm a --rm b --rm c {tmp}/{filename}
|
||||
|
||||
|
||||
::
|
||||
|
||||
monmaptool: monmap file {tmp}/{filename}
|
||||
monmaptool: removing a
|
||||
monmaptool: removing b
|
||||
monmaptool: removing c
|
||||
monmaptool: writing epoch 1 to {tmp}/{filename} (0 monitors)
|
||||
|
||||
#. Add the new monitor locations. ::
|
||||
#. Add the new monitor locations:
|
||||
|
||||
$ monmaptool --add a 10.1.0.1:6789 --add b 10.1.0.2:6789 --add c 10.1.0.3:6789 {tmp}/{filename}
|
||||
.. prompt:: bash $
|
||||
|
||||
monmaptool --add a 10.1.0.1:6789 --add b 10.1.0.2:6789 --add c 10.1.0.3:6789 {tmp}/{filename}
|
||||
|
||||
|
||||
::
|
||||
|
||||
monmaptool: monmap file {tmp}/{filename}
|
||||
monmaptool: writing epoch 1 to {tmp}/{filename} (3 monitors)
|
||||
monmaptool: monmap file {tmp}/{filename}
|
||||
monmaptool: writing epoch 1 to {tmp}/{filename} (3 monitors)
|
||||
|
||||
#. Check new contents. ::
|
||||
#. Check new contents:
|
||||
|
||||
$ monmaptool --print {tmp}/{filename}
|
||||
.. prompt:: bash $
|
||||
|
||||
monmaptool --print {tmp}/{filename}
|
||||
|
||||
::
|
||||
|
||||
monmaptool: monmap file {tmp}/{filename}
|
||||
epoch 1
|
||||
fsid 224e376d-c5fe-4504-96bb-ea6332a19e61
|
||||
@ -370,9 +429,11 @@ monitors, and inject the modified monmap into each new monitor.
|
||||
#. First, make sure to stop all your monitors. Injection must be done while
|
||||
the daemon is not running.
|
||||
|
||||
#. Inject the monmap. ::
|
||||
#. Inject the monmap:
|
||||
|
||||
ceph-mon -i {mon-id} --inject-monmap {tmp}/{filename}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-mon -i {mon-id} --inject-monmap {tmp}/{filename}
|
||||
|
||||
#. Restart the monitors.
|
||||
|
||||
|
@ -71,9 +71,11 @@ weight).
|
||||
|
||||
#. Create the OSD. If no UUID is given, it will be set automatically when the
|
||||
OSD starts up. The following command will output the OSD number, which you
|
||||
will need for subsequent steps. ::
|
||||
will need for subsequent steps:
|
||||
|
||||
ceph osd create [{uuid} [{id}]]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd create [{uuid} [{id}]]
|
||||
|
||||
If the optional parameter {id} is given it will be used as the OSD id.
|
||||
Note, in this case the command may fail if the number is already in use.
|
||||
@ -84,33 +86,38 @@ weight).
|
||||
clusters are large. If {id} is not specified, the smallest available is
|
||||
used.
|
||||
|
||||
#. Create the default directory on your new OSD. ::
|
||||
#. Create the default directory on your new OSD:
|
||||
|
||||
ssh {new-osd-host}
|
||||
sudo mkdir /var/lib/ceph/osd/ceph-{osd-number}
|
||||
.. prompt:: bash $
|
||||
|
||||
ssh {new-osd-host}
|
||||
sudo mkdir /var/lib/ceph/osd/ceph-{osd-number}
|
||||
|
||||
#. If the OSD is for a drive other than the OS drive, prepare it
|
||||
for use with Ceph, and mount it to the directory you just created::
|
||||
for use with Ceph, and mount it to the directory you just created:
|
||||
|
||||
ssh {new-osd-host}
|
||||
sudo mkfs -t {fstype} /dev/{drive}
|
||||
sudo mount -o user_xattr /dev/{hdd} /var/lib/ceph/osd/ceph-{osd-number}
|
||||
.. prompt:: bash $
|
||||
|
||||
ssh {new-osd-host}
|
||||
sudo mkfs -t {fstype} /dev/{drive}
|
||||
sudo mount -o user_xattr /dev/{hdd} /var/lib/ceph/osd/ceph-{osd-number}
|
||||
|
||||
#. Initialize the OSD data directory. ::
|
||||
#. Initialize the OSD data directory:
|
||||
|
||||
ssh {new-osd-host}
|
||||
ceph-osd -i {osd-num} --mkfs --mkkey
|
||||
.. prompt:: bash $
|
||||
|
||||
ssh {new-osd-host}
|
||||
ceph-osd -i {osd-num} --mkfs --mkkey
|
||||
|
||||
The directory must be empty before you can run ``ceph-osd``.
|
||||
|
||||
#. Register the OSD authentication key. The value of ``ceph`` for
|
||||
``ceph-{osd-num}`` in the path is the ``$cluster-$id``. If your
|
||||
cluster name differs from ``ceph``, use your cluster name instead.::
|
||||
cluster name differs from ``ceph``, use your cluster name instead:
|
||||
|
||||
ceph auth add osd.{osd-num} osd 'allow *' mon 'allow rwx' -i /var/lib/ceph/osd/ceph-{osd-num}/keyring
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph auth add osd.{osd-num} osd 'allow *' mon 'allow rwx' -i /var/lib/ceph/osd/ceph-{osd-num}/keyring
|
||||
|
||||
#. Add the OSD to the CRUSH map so that the OSD can begin receiving data. The
|
||||
``ceph osd crush add`` command allows you to add OSDs to the CRUSH hierarchy
|
||||
@ -120,9 +127,11 @@ weight).
|
||||
you specify only the root bucket, the command will attach the OSD directly
|
||||
to the root, but CRUSH rules expect OSDs to be inside of hosts.
|
||||
|
||||
Execute the following::
|
||||
Execute the following:
|
||||
|
||||
ceph osd crush add {id-or-name} {weight} [{bucket-type}={bucket-name} ...]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush add {id-or-name} {weight} [{bucket-type}={bucket-name} ...]
|
||||
|
||||
You may also decompile the CRUSH map, add the OSD to the device list, add the
|
||||
host as a bucket (if it's not already in the CRUSH map), add the device as an
|
||||
@ -135,36 +144,51 @@ weight).
|
||||
Replacing an OSD
|
||||
----------------
|
||||
|
||||
.. note:: If the instructions in this section do not work for you, try the
|
||||
instructions in the cephadm documentation: :ref:`cephadm-replacing-an-osd`.
|
||||
|
||||
When disks fail, or if an administrator wants to reprovision OSDs with a new
|
||||
backend, for instance, for switching from FileStore to BlueStore, OSDs need to
|
||||
be replaced. Unlike `Removing the OSD`_, replaced OSD's id and CRUSH map entry
|
||||
need to be keep intact after the OSD is destroyed for replacement.
|
||||
|
||||
#. Make sure it is safe to destroy the OSD::
|
||||
#. Make sure it is safe to destroy the OSD:
|
||||
|
||||
while ! ceph osd safe-to-destroy osd.{id} ; do sleep 10 ; done
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Destroy the OSD first::
|
||||
while ! ceph osd safe-to-destroy osd.{id} ; do sleep 10 ; done
|
||||
|
||||
ceph osd destroy {id} --yes-i-really-mean-it
|
||||
#. Destroy the OSD first:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd destroy {id} --yes-i-really-mean-it
|
||||
|
||||
#. Zap a disk for the new OSD, if the disk was used before for other purposes.
|
||||
It's not necessary for a new disk::
|
||||
It's not necessary for a new disk:
|
||||
|
||||
ceph-volume lvm zap /dev/sdX
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Prepare the disk for replacement by using the previously destroyed OSD id::
|
||||
ceph-volume lvm zap /dev/sdX
|
||||
|
||||
ceph-volume lvm prepare --osd-id {id} --data /dev/sdX
|
||||
#. Prepare the disk for replacement by using the previously destroyed OSD id:
|
||||
|
||||
#. And activate the OSD::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-volume lvm activate {id} {fsid}
|
||||
ceph-volume lvm prepare --osd-id {id} --data /dev/sdX
|
||||
|
||||
#. And activate the OSD:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-volume lvm activate {id} {fsid}
|
||||
|
||||
Alternatively, instead of preparing and activating, the device can be recreated
|
||||
in one call, like::
|
||||
in one call, like:
|
||||
|
||||
ceph-volume lvm create --osd-id {id} --data /dev/sdX
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-volume lvm create --osd-id {id} --data /dev/sdX
|
||||
|
||||
|
||||
Starting the OSD
|
||||
@ -174,15 +198,11 @@ After you add an OSD to Ceph, the OSD is in your configuration. However,
|
||||
it is not yet running. The OSD is ``down`` and ``in``. You must start
|
||||
your new OSD before it can begin receiving data. You may use
|
||||
``service ceph`` from your admin host or start the OSD from its host
|
||||
machine.
|
||||
machine:
|
||||
|
||||
For Ubuntu Trusty use Upstart. ::
|
||||
.. prompt:: bash $
|
||||
|
||||
sudo start ceph-osd id={osd-num}
|
||||
|
||||
For all other distros use systemd. ::
|
||||
|
||||
sudo systemctl start ceph-osd@{osd-num}
|
||||
sudo systemctl start ceph-osd@{osd-num}
|
||||
|
||||
|
||||
Once you start your OSD, it is ``up`` and ``in``.
|
||||
@ -193,15 +213,16 @@ Observe the Data Migration
|
||||
|
||||
Once you have added your new OSD to the CRUSH map, Ceph will begin rebalancing
|
||||
the server by migrating placement groups to your new OSD. You can observe this
|
||||
process with the `ceph`_ tool. ::
|
||||
process with the `ceph`_ tool. :
|
||||
|
||||
ceph -w
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph -w
|
||||
|
||||
You should see the placement group states change from ``active+clean`` to
|
||||
``active, some degraded objects``, and finally ``active+clean`` when migration
|
||||
completes. (Control-c to exit.)
|
||||
|
||||
|
||||
.. _Add/Move an OSD: ../crush-map#addosd
|
||||
.. _ceph: ../monitoring
|
||||
|
||||
@ -228,9 +249,11 @@ Take the OSD out of the Cluster
|
||||
|
||||
Before you remove an OSD, it is usually ``up`` and ``in``. You need to take it
|
||||
out of the cluster so that Ceph can begin rebalancing and copying its data to
|
||||
other OSDs. ::
|
||||
other OSDs. :
|
||||
|
||||
ceph osd out {osd-num}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd out {osd-num}
|
||||
|
||||
|
||||
Observe the Data Migration
|
||||
@ -238,9 +261,11 @@ Observe the Data Migration
|
||||
|
||||
Once you have taken your OSD ``out`` of the cluster, Ceph will begin
|
||||
rebalancing the cluster by migrating placement groups out of the OSD you
|
||||
removed. You can observe this process with the `ceph`_ tool. ::
|
||||
removed. You can observe this process with the `ceph`_ tool. :
|
||||
|
||||
ceph -w
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph -w
|
||||
|
||||
You should see the placement group states change from ``active+clean`` to
|
||||
``active, some degraded objects``, and finally ``active+clean`` when migration
|
||||
@ -252,12 +277,16 @@ completes. (Control-c to exit.)
|
||||
``active+remapped`` state. If you are in this case, you should mark
|
||||
the OSD ``in`` with:
|
||||
|
||||
``ceph osd in {osd-num}``
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd in {osd-num}
|
||||
|
||||
to come back to the initial state and then, instead of marking ``out``
|
||||
the OSD, set its weight to 0 with:
|
||||
|
||||
``ceph osd crush reweight osd.{osd-num} 0``
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush reweight osd.{osd-num} 0
|
||||
|
||||
After that, you can observe the data migration which should come to its
|
||||
end. The difference between marking ``out`` the OSD and reweighting it
|
||||
@ -273,10 +302,12 @@ Stopping the OSD
|
||||
|
||||
After you take an OSD out of the cluster, it may still be running.
|
||||
That is, the OSD may be ``up`` and ``out``. You must stop
|
||||
your OSD before you remove it from the configuration. ::
|
||||
your OSD before you remove it from the configuration:
|
||||
|
||||
ssh {osd-host}
|
||||
sudo systemctl stop ceph-osd@{osd-num}
|
||||
.. prompt:: bash $
|
||||
|
||||
ssh {osd-host}
|
||||
sudo systemctl stop ceph-osd@{osd-num}
|
||||
|
||||
Once you stop your OSD, it is ``down``.
|
||||
|
||||
@ -292,50 +323,64 @@ OSD for each drive by repeating this procedure.
|
||||
#. Let the cluster forget the OSD first. This step removes the OSD from the CRUSH
|
||||
map, removes its authentication key. And it is removed from the OSD map as
|
||||
well. Please note the :ref:`purge subcommand <ceph-admin-osd>` is introduced in Luminous, for older
|
||||
versions, please see below ::
|
||||
versions, please see below:
|
||||
|
||||
ceph osd purge {id} --yes-i-really-mean-it
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd purge {id} --yes-i-really-mean-it
|
||||
|
||||
#. Navigate to the host where you keep the master copy of the cluster's
|
||||
``ceph.conf`` file. ::
|
||||
``ceph.conf`` file:
|
||||
|
||||
ssh {admin-host}
|
||||
cd /etc/ceph
|
||||
vim ceph.conf
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Remove the OSD entry from your ``ceph.conf`` file (if it exists). ::
|
||||
ssh {admin-host}
|
||||
cd /etc/ceph
|
||||
vim ceph.conf
|
||||
|
||||
#. Remove the OSD entry from your ``ceph.conf`` file (if it exists)::
|
||||
|
||||
[osd.1]
|
||||
host = {hostname}
|
||||
|
||||
#. From the host where you keep the master copy of the cluster's ``ceph.conf`` file,
|
||||
copy the updated ``ceph.conf`` file to the ``/etc/ceph`` directory of other
|
||||
hosts in your cluster.
|
||||
#. From the host where you keep the master copy of the cluster's ``ceph.conf``
|
||||
file, copy the updated ``ceph.conf`` file to the ``/etc/ceph`` directory of
|
||||
other hosts in your cluster.
|
||||
|
||||
If your Ceph cluster is older than Luminous, instead of using ``ceph osd purge``,
|
||||
you need to perform this step manually:
|
||||
If your Ceph cluster is older than Luminous, instead of using ``ceph osd
|
||||
purge``, you need to perform this step manually:
|
||||
|
||||
|
||||
#. Remove the OSD from the CRUSH map so that it no longer receives data. You may
|
||||
also decompile the CRUSH map, remove the OSD from the device list, remove the
|
||||
device as an item in the host bucket or remove the host bucket (if it's in the
|
||||
CRUSH map and you intend to remove the host), recompile the map and set it.
|
||||
See `Remove an OSD`_ for details. ::
|
||||
See `Remove an OSD`_ for details:
|
||||
|
||||
ceph osd crush remove {name}
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Remove the OSD authentication key. ::
|
||||
ceph osd crush remove {name}
|
||||
|
||||
ceph auth del osd.{osd-num}
|
||||
#. Remove the OSD authentication key:
|
||||
|
||||
The value of ``ceph`` for ``ceph-{osd-num}`` in the path is the ``$cluster-$id``.
|
||||
If your cluster name differs from ``ceph``, use your cluster name instead.
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Remove the OSD. ::
|
||||
ceph auth del osd.{osd-num}
|
||||
|
||||
ceph osd rm {osd-num}
|
||||
#for example
|
||||
ceph osd rm 1
|
||||
The value of ``ceph`` for ``ceph-{osd-num}`` in the path is the
|
||||
``$cluster-$id``. If your cluster name differs from ``ceph``, use your
|
||||
cluster name instead.
|
||||
|
||||
#. Remove the OSD:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd rm {osd-num}
|
||||
|
||||
for example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd rm 1
|
||||
|
||||
.. _Remove an OSD: ../crush-map#removeosd
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
.. _balancer:
|
||||
|
||||
Balancer
|
||||
@ -11,9 +10,11 @@ supervised fashion.
|
||||
Status
|
||||
------
|
||||
|
||||
The current status of the balancer can be checked at any time with::
|
||||
The current status of the balancer can be checked at any time with:
|
||||
|
||||
ceph balancer status
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph balancer status
|
||||
|
||||
|
||||
Automatic balancing
|
||||
@ -21,9 +22,11 @@ Automatic balancing
|
||||
|
||||
The automatic balancing feature is enabled by default in ``upmap``
|
||||
mode. Please refer to :ref:`upmap` for more details. The balancer can be
|
||||
turned off with::
|
||||
turned off with:
|
||||
|
||||
ceph balancer off
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph balancer off
|
||||
|
||||
The balancer mode can be changed to ``crush-compat`` mode, which is
|
||||
backward compatible with older clients, and will make small changes to
|
||||
@ -40,37 +43,51 @@ healed itself).
|
||||
When the cluster is healthy, the balancer will throttle its changes
|
||||
such that the percentage of PGs that are misplaced (i.e., that need to
|
||||
be moved) is below a threshold of (by default) 5%. The
|
||||
``target_max_misplaced_ratio`` threshold can be adjusted with::
|
||||
``target_max_misplaced_ratio`` threshold can be adjusted with:
|
||||
|
||||
ceph config set mgr target_max_misplaced_ratio .07 # 7%
|
||||
.. prompt:: bash $
|
||||
|
||||
Set the number of seconds to sleep in between runs of the automatic balancer::
|
||||
ceph config set mgr target_max_misplaced_ratio .07 # 7%
|
||||
|
||||
ceph config set mgr mgr/balancer/sleep_interval 60
|
||||
Set the number of seconds to sleep in between runs of the automatic balancer:
|
||||
|
||||
Set the time of day to begin automatic balancing in HHMM format::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config set mgr mgr/balancer/begin_time 0000
|
||||
ceph config set mgr mgr/balancer/sleep_interval 60
|
||||
|
||||
Set the time of day to finish automatic balancing in HHMM format::
|
||||
Set the time of day to begin automatic balancing in HHMM format:
|
||||
|
||||
ceph config set mgr mgr/balancer/end_time 2400
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config set mgr mgr/balancer/begin_time 0000
|
||||
|
||||
Set the time of day to finish automatic balancing in HHMM format:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config set mgr mgr/balancer/end_time 2359
|
||||
|
||||
Restrict automatic balancing to this day of the week or later.
|
||||
Uses the same conventions as crontab, 0 or 7 is Sunday, 1 is Monday, and so on::
|
||||
Uses the same conventions as crontab, 0 is Sunday, 1 is Monday, and so on:
|
||||
|
||||
ceph config set mgr mgr/balancer/begin_weekday 0
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config set mgr mgr/balancer/begin_weekday 0
|
||||
|
||||
Restrict automatic balancing to this day of the week or earlier.
|
||||
Uses the same conventions as crontab, 0 or 7 is Sunday, 1 is Monday, and so on::
|
||||
Uses the same conventions as crontab, 0 is Sunday, 1 is Monday, and so on:
|
||||
|
||||
ceph config set mgr mgr/balancer/end_weekday 7
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config set mgr mgr/balancer/end_weekday 6
|
||||
|
||||
Pool IDs to which the automatic balancing will be limited.
|
||||
The default for this is an empty string, meaning all pools will be balanced.
|
||||
The numeric pool IDs can be gotten with the :command:`ceph osd pool ls detail` command::
|
||||
The numeric pool IDs can be gotten with the :command:`ceph osd pool ls detail` command:
|
||||
|
||||
ceph config set mgr mgr/balancer/pool_ids 1,2,3
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config set mgr mgr/balancer/pool_ids 1,2,3
|
||||
|
||||
|
||||
Modes
|
||||
@ -112,9 +129,11 @@ There are currently two supported balancer modes:
|
||||
|
||||
Note that using upmap requires that all clients be Luminous or newer.
|
||||
|
||||
The default mode is ``upmap``. The mode can be adjusted with::
|
||||
The default mode is ``upmap``. The mode can be adjusted with:
|
||||
|
||||
ceph balancer mode crush-compat
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph balancer mode crush-compat
|
||||
|
||||
Supervised optimization
|
||||
-----------------------
|
||||
@ -125,43 +144,63 @@ The balancer operation is broken into a few distinct phases:
|
||||
#. evaluating the quality of the data distribution, either for the current PG distribution, or the PG distribution that would result after executing a *plan*
|
||||
#. executing the *plan*
|
||||
|
||||
To evaluate and score the current distribution::
|
||||
To evaluate and score the current distribution:
|
||||
|
||||
ceph balancer eval
|
||||
.. prompt:: bash $
|
||||
|
||||
You can also evaluate the distribution for a single pool with::
|
||||
ceph balancer eval
|
||||
|
||||
ceph balancer eval <pool-name>
|
||||
You can also evaluate the distribution for a single pool with:
|
||||
|
||||
Greater detail for the evaluation can be seen with::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph balancer eval-verbose ...
|
||||
ceph balancer eval <pool-name>
|
||||
|
||||
Greater detail for the evaluation can be seen with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph balancer eval-verbose ...
|
||||
|
||||
The balancer can generate a plan, using the currently configured mode, with::
|
||||
The balancer can generate a plan, using the currently configured mode, with:
|
||||
|
||||
ceph balancer optimize <plan-name>
|
||||
.. prompt:: bash $
|
||||
|
||||
The name is provided by the user and can be any useful identifying string. The contents of a plan can be seen with::
|
||||
ceph balancer optimize <plan-name>
|
||||
|
||||
ceph balancer show <plan-name>
|
||||
The name is provided by the user and can be any useful identifying string. The contents of a plan can be seen with:
|
||||
|
||||
All plans can be shown with::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph balancer ls
|
||||
ceph balancer show <plan-name>
|
||||
|
||||
Old plans can be discarded with::
|
||||
All plans can be shown with:
|
||||
|
||||
ceph balancer rm <plan-name>
|
||||
.. prompt:: bash $
|
||||
|
||||
Currently recorded plans are shown as part of the status command::
|
||||
ceph balancer ls
|
||||
|
||||
ceph balancer status
|
||||
Old plans can be discarded with:
|
||||
|
||||
The quality of the distribution that would result after executing a plan can be calculated with::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph balancer eval <plan-name>
|
||||
ceph balancer rm <plan-name>
|
||||
|
||||
Assuming the plan is expected to improve the distribution (i.e., it has a lower score than the current cluster state), the user can execute that plan with::
|
||||
Currently recorded plans are shown as part of the status command:
|
||||
|
||||
ceph balancer execute <plan-name>
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph balancer status
|
||||
|
||||
The quality of the distribution that would result after executing a plan can be calculated with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph balancer eval <plan-name>
|
||||
|
||||
Assuming the plan is expected to improve the distribution (i.e., it has a lower score than the current cluster state), the user can execute that plan with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph balancer execute <plan-name>
|
||||
|
||||
|
@ -41,50 +41,70 @@ more data migration than should be necessary, so it is not optimal.
|
||||
ID=<osd-id-number>
|
||||
DEVICE=<disk-device>
|
||||
|
||||
You can tell whether a given OSD is FileStore or BlueStore with::
|
||||
You can tell whether a given OSD is FileStore or BlueStore with:
|
||||
|
||||
ceph osd metadata $ID | grep osd_objectstore
|
||||
.. prompt:: bash $
|
||||
|
||||
You can get a current count of filestore vs bluestore with::
|
||||
ceph osd metadata $ID | grep osd_objectstore
|
||||
|
||||
ceph osd count-metadata osd_objectstore
|
||||
You can get a current count of filestore vs bluestore with:
|
||||
|
||||
#. Mark the filestore OSD out::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd out $ID
|
||||
ceph osd count-metadata osd_objectstore
|
||||
|
||||
#. Wait for the data to migrate off the OSD in question::
|
||||
#. Mark the filestore OSD out:
|
||||
|
||||
while ! ceph osd safe-to-destroy $ID ; do sleep 60 ; done
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Stop the OSD::
|
||||
ceph osd out $ID
|
||||
|
||||
systemctl kill ceph-osd@$ID
|
||||
#. Wait for the data to migrate off the OSD in question:
|
||||
|
||||
#. Make note of which device this OSD is using::
|
||||
.. prompt:: bash $
|
||||
|
||||
mount | grep /var/lib/ceph/osd/ceph-$ID
|
||||
while ! ceph osd safe-to-destroy $ID ; do sleep 60 ; done
|
||||
|
||||
#. Unmount the OSD::
|
||||
#. Stop the OSD:
|
||||
|
||||
umount /var/lib/ceph/osd/ceph-$ID
|
||||
.. prompt:: bash $
|
||||
|
||||
systemctl kill ceph-osd@$ID
|
||||
|
||||
#. Make note of which device this OSD is using:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
mount | grep /var/lib/ceph/osd/ceph-$ID
|
||||
|
||||
#. Unmount the OSD:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
umount /var/lib/ceph/osd/ceph-$ID
|
||||
|
||||
#. Destroy the OSD data. Be *EXTREMELY CAREFUL* as this will destroy
|
||||
the contents of the device; be certain the data on the device is
|
||||
not needed (i.e., that the cluster is healthy) before proceeding. ::
|
||||
not needed (i.e., that the cluster is healthy) before proceeding:
|
||||
|
||||
ceph-volume lvm zap $DEVICE
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-volume lvm zap $DEVICE
|
||||
|
||||
#. Tell the cluster the OSD has been destroyed (and a new OSD can be
|
||||
reprovisioned with the same ID)::
|
||||
reprovisioned with the same ID):
|
||||
|
||||
ceph osd destroy $ID --yes-i-really-mean-it
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd destroy $ID --yes-i-really-mean-it
|
||||
|
||||
#. Reprovision a BlueStore OSD in its place with the same OSD ID.
|
||||
This requires you do identify which device to wipe based on what you saw
|
||||
mounted above. BE CAREFUL! ::
|
||||
mounted above. BE CAREFUL! :
|
||||
|
||||
ceph-volume lvm create --bluestore --data $DEVICE --osd-id $ID
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-volume lvm create --bluestore --data $DEVICE --osd-id $ID
|
||||
|
||||
#. Repeat.
|
||||
|
||||
@ -127,9 +147,11 @@ doesn't strictly matter). ::
|
||||
|
||||
NEWHOST=<empty-host-name>
|
||||
|
||||
Add the host to the CRUSH hierarchy, but do not attach it to the root::
|
||||
Add the host to the CRUSH hierarchy, but do not attach it to the root:
|
||||
|
||||
ceph osd crush add-bucket $NEWHOST host
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush add-bucket $NEWHOST host
|
||||
|
||||
Make sure the ceph packages are installed.
|
||||
|
||||
@ -142,14 +164,22 @@ space on that host so that all of its data can be migrated off,
|
||||
then you can instead do::
|
||||
|
||||
OLDHOST=<existing-cluster-host-to-offload>
|
||||
ceph osd crush unlink $OLDHOST default
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush unlink $OLDHOST default
|
||||
|
||||
where "default" is the immediate ancestor in the CRUSH map. (For
|
||||
smaller clusters with unmodified configurations this will normally
|
||||
be "default", but it might also be a rack name.) You should now
|
||||
see the host at the top of the OSD tree output with no parent::
|
||||
see the host at the top of the OSD tree output with no parent:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
bin/ceph osd tree
|
||||
|
||||
::
|
||||
|
||||
$ bin/ceph osd tree
|
||||
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
|
||||
-5 0 host oldhost
|
||||
10 ssd 1.00000 osd.10 up 1.00000 1.00000
|
||||
@ -172,13 +202,17 @@ Migration process
|
||||
If you're using a new host, start at step #1. For an existing host,
|
||||
jump to step #5 below.
|
||||
|
||||
#. Provision new BlueStore OSDs for all devices::
|
||||
#. Provision new BlueStore OSDs for all devices:
|
||||
|
||||
ceph-volume lvm create --bluestore --data /dev/$DEVICE
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Verify OSDs join the cluster with::
|
||||
ceph-volume lvm create --bluestore --data /dev/$DEVICE
|
||||
|
||||
ceph osd tree
|
||||
#. Verify OSDs join the cluster with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tree
|
||||
|
||||
You should see the new host ``$NEWHOST`` with all of the OSDs beneath
|
||||
it, but the host should *not* be nested beneath any other node in
|
||||
@ -198,13 +232,17 @@ jump to step #5 below.
|
||||
2 ssd 1.00000 osd.2 up 1.00000 1.00000
|
||||
...
|
||||
|
||||
#. Identify the first target host to convert ::
|
||||
#. Identify the first target host to convert :
|
||||
|
||||
OLDHOST=<existing-cluster-host-to-convert>
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Swap the new host into the old host's position in the cluster::
|
||||
OLDHOST=<existing-cluster-host-to-convert>
|
||||
|
||||
ceph osd crush swap-bucket $NEWHOST $OLDHOST
|
||||
#. Swap the new host into the old host's position in the cluster:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush swap-bucket $NEWHOST $OLDHOST
|
||||
|
||||
At this point all data on ``$OLDHOST`` will start migrating to OSDs
|
||||
on ``$NEWHOST``. If there is a difference in the total capacity of
|
||||
@ -212,26 +250,34 @@ jump to step #5 below.
|
||||
other nodes in the cluster, but as long as the hosts are similarly
|
||||
sized this will be a relatively small amount of data.
|
||||
|
||||
#. Wait for data migration to complete::
|
||||
#. Wait for data migration to complete:
|
||||
|
||||
while ! ceph osd safe-to-destroy $(ceph osd ls-tree $OLDHOST); do sleep 60 ; done
|
||||
.. prompt:: bash $
|
||||
|
||||
#. Stop all old OSDs on the now-empty ``$OLDHOST``::
|
||||
while ! ceph osd safe-to-destroy $(ceph osd ls-tree $OLDHOST); do sleep 60 ; done
|
||||
|
||||
ssh $OLDHOST
|
||||
#. Stop all old OSDs on the now-empty ``$OLDHOST``:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ssh $OLDHOST
|
||||
systemctl kill ceph-osd.target
|
||||
umount /var/lib/ceph/osd/ceph-*
|
||||
|
||||
#. Destroy and purge the old OSDs::
|
||||
#. Destroy and purge the old OSDs:
|
||||
|
||||
for osd in `ceph osd ls-tree $OLDHOST`; do
|
||||
.. prompt:: bash $
|
||||
|
||||
for osd in `ceph osd ls-tree $OLDHOST`; do
|
||||
ceph osd purge $osd --yes-i-really-mean-it
|
||||
done
|
||||
|
||||
#. Wipe the old OSD devices. This requires you do identify which
|
||||
devices are to be wiped manually (BE CAREFUL!). For each device,::
|
||||
devices are to be wiped manually (BE CAREFUL!). For each device:
|
||||
|
||||
ceph-volume lvm zap $DEVICE
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph-volume lvm zap $DEVICE
|
||||
|
||||
#. Use the now-empty host as the new host, and repeat::
|
||||
|
||||
|
@ -45,16 +45,22 @@ and the backing storage tier automatically. However, admins have the ability to
|
||||
configure how this migration takes place by setting the ``cache-mode``. There are
|
||||
two main scenarios:
|
||||
|
||||
- **writeback** mode: When admins configure tiers with ``writeback`` mode, Ceph
|
||||
clients write data to the cache tier and receive an ACK from the cache tier.
|
||||
In time, the data written to the cache tier migrates to the storage tier
|
||||
and gets flushed from the cache tier. Conceptually, the cache tier is
|
||||
overlaid "in front" of the backing storage tier. When a Ceph client needs
|
||||
data that resides in the storage tier, the cache tiering agent migrates the
|
||||
data to the cache tier on read, then it is sent to the Ceph client.
|
||||
Thereafter, the Ceph client can perform I/O using the cache tier, until the
|
||||
data becomes inactive. This is ideal for mutable data (e.g., photo/video
|
||||
editing, transactional data, etc.).
|
||||
- **writeback** mode: If the base tier and the cache tier are configured in
|
||||
``writeback`` mode, Ceph clients receive an ACK from the base tier every time
|
||||
they write data to it. Then the cache tiering agent determines whether
|
||||
``osd_tier_default_cache_min_write_recency_for_promote`` has been set. If it
|
||||
has been set and the data has been written more than a specified number of
|
||||
times per interval, the data is promoted to the cache tier.
|
||||
|
||||
When Ceph clients need access to data stored in the base tier, the cache
|
||||
tiering agent reads the data from the base tier and returns it to the client.
|
||||
While data is being read from the base tier, the cache tiering agent consults
|
||||
the value of ``osd_tier_default_cache_min_read_recency_for_promote`` and
|
||||
decides whether to promote that data from the base tier to the cache tier.
|
||||
When data has been promoted from the base tier to the cache tier, the Ceph
|
||||
client is able to perform I/O operations on it using the cache tier. This is
|
||||
well-suited for mutable data (for example, photo/video editing, transactional
|
||||
data).
|
||||
|
||||
- **readproxy** mode: This mode will use any objects that already
|
||||
exist in the cache tier, but if an object is not present in the
|
||||
@ -199,62 +205,82 @@ Creating a Cache Tier
|
||||
=====================
|
||||
|
||||
Setting up a cache tier involves associating a backing storage pool with
|
||||
a cache pool ::
|
||||
a cache pool:
|
||||
|
||||
ceph osd tier add {storagepool} {cachepool}
|
||||
.. prompt:: bash $
|
||||
|
||||
For example ::
|
||||
ceph osd tier add {storagepool} {cachepool}
|
||||
|
||||
ceph osd tier add cold-storage hot-storage
|
||||
For example:
|
||||
|
||||
To set the cache mode, execute the following::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tier cache-mode {cachepool} {cache-mode}
|
||||
ceph osd tier add cold-storage hot-storage
|
||||
|
||||
For example::
|
||||
To set the cache mode, execute the following:
|
||||
|
||||
ceph osd tier cache-mode hot-storage writeback
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tier cache-mode {cachepool} {cache-mode}
|
||||
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tier cache-mode hot-storage writeback
|
||||
|
||||
The cache tiers overlay the backing storage tier, so they require one
|
||||
additional step: you must direct all client traffic from the storage pool to
|
||||
the cache pool. To direct client traffic directly to the cache pool, execute
|
||||
the following::
|
||||
the following:
|
||||
|
||||
ceph osd tier set-overlay {storagepool} {cachepool}
|
||||
.. prompt:: bash $
|
||||
|
||||
For example::
|
||||
ceph osd tier set-overlay {storagepool} {cachepool}
|
||||
|
||||
ceph osd tier set-overlay cold-storage hot-storage
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tier set-overlay cold-storage hot-storage
|
||||
|
||||
|
||||
Configuring a Cache Tier
|
||||
========================
|
||||
|
||||
Cache tiers have several configuration options. You may set
|
||||
cache tier configuration options with the following usage::
|
||||
cache tier configuration options with the following usage:
|
||||
|
||||
ceph osd pool set {cachepool} {key} {value}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set {cachepool} {key} {value}
|
||||
|
||||
See `Pools - Set Pool Values`_ for details.
|
||||
|
||||
|
||||
Target Size and Type
|
||||
--------------------
|
||||
|
||||
Ceph's production cache tiers use a `Bloom Filter`_ for the ``hit_set_type``::
|
||||
Ceph's production cache tiers use a `Bloom Filter`_ for the ``hit_set_type``:
|
||||
|
||||
ceph osd pool set {cachepool} hit_set_type bloom
|
||||
.. prompt:: bash $
|
||||
|
||||
For example::
|
||||
ceph osd pool set {cachepool} hit_set_type bloom
|
||||
|
||||
ceph osd pool set hot-storage hit_set_type bloom
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set hot-storage hit_set_type bloom
|
||||
|
||||
The ``hit_set_count`` and ``hit_set_period`` define how many such HitSets to
|
||||
store, and how much time each HitSet should cover. ::
|
||||
store, and how much time each HitSet should cover:
|
||||
|
||||
ceph osd pool set {cachepool} hit_set_count 12
|
||||
ceph osd pool set {cachepool} hit_set_period 14400
|
||||
ceph osd pool set {cachepool} target_max_bytes 1000000000000
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set {cachepool} hit_set_count 12
|
||||
ceph osd pool set {cachepool} hit_set_period 14400
|
||||
ceph osd pool set {cachepool} target_max_bytes 1000000000000
|
||||
|
||||
.. note:: A larger ``hit_set_count`` results in more RAM consumed by
|
||||
the ``ceph-osd`` process.
|
||||
@ -273,10 +299,12 @@ number of archive HitSets are checked. The object is promoted if the object is
|
||||
found in any of the most recent ``min_read_recency_for_promote`` HitSets.
|
||||
|
||||
A similar parameter can be set for the write operation, which is
|
||||
``min_write_recency_for_promote``. ::
|
||||
``min_write_recency_for_promote``:
|
||||
|
||||
ceph osd pool set {cachepool} min_read_recency_for_promote 2
|
||||
ceph osd pool set {cachepool} min_write_recency_for_promote 2
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set {cachepool} min_read_recency_for_promote 2
|
||||
ceph osd pool set {cachepool} min_write_recency_for_promote 2
|
||||
|
||||
.. note:: The longer the period and the higher the
|
||||
``min_read_recency_for_promote`` and
|
||||
@ -303,22 +331,29 @@ Absolute Sizing
|
||||
|
||||
The cache tiering agent can flush or evict objects based upon the total number
|
||||
of bytes or the total number of objects. To specify a maximum number of bytes,
|
||||
execute the following::
|
||||
execute the following:
|
||||
|
||||
ceph osd pool set {cachepool} target_max_bytes {#bytes}
|
||||
.. prompt:: bash $
|
||||
|
||||
For example, to flush or evict at 1 TB, execute the following::
|
||||
ceph osd pool set {cachepool} target_max_bytes {#bytes}
|
||||
|
||||
ceph osd pool set hot-storage target_max_bytes 1099511627776
|
||||
For example, to flush or evict at 1 TB, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
To specify the maximum number of objects, execute the following::
|
||||
ceph osd pool set hot-storage target_max_bytes 1099511627776
|
||||
|
||||
ceph osd pool set {cachepool} target_max_objects {#objects}
|
||||
To specify the maximum number of objects, execute the following:
|
||||
|
||||
For example, to flush or evict at 1M objects, execute the following::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set hot-storage target_max_objects 1000000
|
||||
ceph osd pool set {cachepool} target_max_objects {#objects}
|
||||
|
||||
For example, to flush or evict at 1M objects, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set hot-storage target_max_objects 1000000
|
||||
|
||||
.. note:: Ceph is not able to determine the size of a cache pool automatically, so
|
||||
the configuration on the absolute size is required here, otherwise the
|
||||
@ -335,59 +370,79 @@ The cache tiering agent can flush or evict objects relative to the size of the
|
||||
cache pool(specified by ``target_max_bytes`` / ``target_max_objects`` in
|
||||
`Absolute sizing`_). When the cache pool consists of a certain percentage of
|
||||
modified (or dirty) objects, the cache tiering agent will flush them to the
|
||||
storage pool. To set the ``cache_target_dirty_ratio``, execute the following::
|
||||
storage pool. To set the ``cache_target_dirty_ratio``, execute the following:
|
||||
|
||||
ceph osd pool set {cachepool} cache_target_dirty_ratio {0.0..1.0}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set {cachepool} cache_target_dirty_ratio {0.0..1.0}
|
||||
|
||||
For example, setting the value to ``0.4`` will begin flushing modified
|
||||
(dirty) objects when they reach 40% of the cache pool's capacity::
|
||||
(dirty) objects when they reach 40% of the cache pool's capacity:
|
||||
|
||||
ceph osd pool set hot-storage cache_target_dirty_ratio 0.4
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set hot-storage cache_target_dirty_ratio 0.4
|
||||
|
||||
When the dirty objects reaches a certain percentage of its capacity, flush dirty
|
||||
objects with a higher speed. To set the ``cache_target_dirty_high_ratio``::
|
||||
objects with a higher speed. To set the ``cache_target_dirty_high_ratio``:
|
||||
|
||||
ceph osd pool set {cachepool} cache_target_dirty_high_ratio {0.0..1.0}
|
||||
.. prompt:: bash $
|
||||
|
||||
For example, setting the value to ``0.6`` will begin aggressively flush dirty objects
|
||||
when they reach 60% of the cache pool's capacity. obviously, we'd better set the value
|
||||
between dirty_ratio and full_ratio::
|
||||
ceph osd pool set {cachepool} cache_target_dirty_high_ratio {0.0..1.0}
|
||||
|
||||
ceph osd pool set hot-storage cache_target_dirty_high_ratio 0.6
|
||||
For example, setting the value to ``0.6`` will begin aggressively flush dirty
|
||||
objects when they reach 60% of the cache pool's capacity. obviously, we'd
|
||||
better set the value between dirty_ratio and full_ratio:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set hot-storage cache_target_dirty_high_ratio 0.6
|
||||
|
||||
When the cache pool reaches a certain percentage of its capacity, the cache
|
||||
tiering agent will evict objects to maintain free capacity. To set the
|
||||
``cache_target_full_ratio``, execute the following::
|
||||
``cache_target_full_ratio``, execute the following:
|
||||
|
||||
ceph osd pool set {cachepool} cache_target_full_ratio {0.0..1.0}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set {cachepool} cache_target_full_ratio {0.0..1.0}
|
||||
|
||||
For example, setting the value to ``0.8`` will begin flushing unmodified
|
||||
(clean) objects when they reach 80% of the cache pool's capacity::
|
||||
(clean) objects when they reach 80% of the cache pool's capacity:
|
||||
|
||||
ceph osd pool set hot-storage cache_target_full_ratio 0.8
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set hot-storage cache_target_full_ratio 0.8
|
||||
|
||||
|
||||
Cache Age
|
||||
---------
|
||||
|
||||
You can specify the minimum age of an object before the cache tiering agent
|
||||
flushes a recently modified (or dirty) object to the backing storage pool::
|
||||
flushes a recently modified (or dirty) object to the backing storage pool:
|
||||
|
||||
ceph osd pool set {cachepool} cache_min_flush_age {#seconds}
|
||||
.. prompt:: bash $
|
||||
|
||||
For example, to flush modified (or dirty) objects after 10 minutes, execute
|
||||
the following::
|
||||
ceph osd pool set {cachepool} cache_min_flush_age {#seconds}
|
||||
|
||||
ceph osd pool set hot-storage cache_min_flush_age 600
|
||||
For example, to flush modified (or dirty) objects after 10 minutes, execute the
|
||||
following:
|
||||
|
||||
You can specify the minimum age of an object before it will be evicted from
|
||||
the cache tier::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool {cache-tier} cache_min_evict_age {#seconds}
|
||||
ceph osd pool set hot-storage cache_min_flush_age 600
|
||||
|
||||
For example, to evict objects after 30 minutes, execute the following::
|
||||
You can specify the minimum age of an object before it will be evicted from the
|
||||
cache tier:
|
||||
|
||||
ceph osd pool set hot-storage cache_min_evict_age 1800
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool {cache-tier} cache_min_evict_age {#seconds}
|
||||
|
||||
For example, to evict objects after 30 minutes, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set hot-storage cache_min_evict_age 1800
|
||||
|
||||
|
||||
Removing a Cache Tier
|
||||
@ -403,22 +458,29 @@ Removing a Read-Only Cache
|
||||
Since a read-only cache does not have modified data, you can disable
|
||||
and remove it without losing any recent changes to objects in the cache.
|
||||
|
||||
#. Change the cache-mode to ``none`` to disable it. ::
|
||||
#. Change the cache-mode to ``none`` to disable it.:
|
||||
|
||||
ceph osd tier cache-mode {cachepool} none
|
||||
.. prompt:: bash
|
||||
|
||||
For example::
|
||||
ceph osd tier cache-mode {cachepool} none
|
||||
|
||||
ceph osd tier cache-mode hot-storage none
|
||||
For example:
|
||||
|
||||
#. Remove the cache pool from the backing pool. ::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tier remove {storagepool} {cachepool}
|
||||
ceph osd tier cache-mode hot-storage none
|
||||
|
||||
For example::
|
||||
#. Remove the cache pool from the backing pool.:
|
||||
|
||||
ceph osd tier remove cold-storage hot-storage
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tier remove {storagepool} {cachepool}
|
||||
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tier remove cold-storage hot-storage
|
||||
|
||||
|
||||
Removing a Writeback Cache
|
||||
@ -430,41 +492,57 @@ disable and remove it.
|
||||
|
||||
|
||||
#. Change the cache mode to ``proxy`` so that new and modified objects will
|
||||
flush to the backing storage pool. ::
|
||||
flush to the backing storage pool.:
|
||||
|
||||
ceph osd tier cache-mode {cachepool} proxy
|
||||
.. prompt:: bash $
|
||||
|
||||
For example::
|
||||
ceph osd tier cache-mode {cachepool} proxy
|
||||
|
||||
ceph osd tier cache-mode hot-storage proxy
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tier cache-mode hot-storage proxy
|
||||
|
||||
|
||||
#. Ensure that the cache pool has been flushed. This may take a few minutes::
|
||||
#. Ensure that the cache pool has been flushed. This may take a few minutes:
|
||||
|
||||
rados -p {cachepool} ls
|
||||
.. prompt:: bash $
|
||||
|
||||
rados -p {cachepool} ls
|
||||
|
||||
If the cache pool still has objects, you can flush them manually.
|
||||
For example::
|
||||
For example:
|
||||
|
||||
rados -p {cachepool} cache-flush-evict-all
|
||||
.. prompt:: bash $
|
||||
|
||||
rados -p {cachepool} cache-flush-evict-all
|
||||
|
||||
|
||||
#. Remove the overlay so that clients will not direct traffic to the cache. ::
|
||||
#. Remove the overlay so that clients will not direct traffic to the cache.:
|
||||
|
||||
ceph osd tier remove-overlay {storagetier}
|
||||
.. prompt:: bash $
|
||||
|
||||
For example::
|
||||
ceph osd tier remove-overlay {storagetier}
|
||||
|
||||
ceph osd tier remove-overlay cold-storage
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tier remove-overlay cold-storage
|
||||
|
||||
|
||||
#. Finally, remove the cache tier pool from the backing storage pool. ::
|
||||
#. Finally, remove the cache tier pool from the backing storage pool.:
|
||||
|
||||
ceph osd tier remove {storagepool} {cachepool}
|
||||
.. prompt:: bash $
|
||||
|
||||
For example::
|
||||
ceph osd tier remove {storagepool} {cachepool}
|
||||
|
||||
ceph osd tier remove cold-storage hot-storage
|
||||
For example:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tier remove cold-storage hot-storage
|
||||
|
||||
|
||||
.. _Create a Pool: ../pools#create-a-pool
|
||||
|
@ -30,18 +30,24 @@ This mode lets you mark monitors as disallowd, in which case they will
|
||||
participate in the quorum and serve clients, but cannot be elected leader. You
|
||||
may wish to use this if you have some monitors which are known to be far away
|
||||
from clients.
|
||||
You can disallow a leader by running ::
|
||||
You can disallow a leader by running:
|
||||
|
||||
$ ceph mon add disallowed_leader {name}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mon add disallowed_leader {name}
|
||||
|
||||
You can remove a monitor from the disallowed list, and allow it to become
|
||||
a leader again, by running ::
|
||||
a leader again, by running:
|
||||
|
||||
$ ceph mon rm disallowed_leader {name}
|
||||
.. prompt:: bash $
|
||||
|
||||
The list of disallowed_leaders is included when you run ::
|
||||
ceph mon rm disallowed_leader {name}
|
||||
|
||||
$ ceph mon dump
|
||||
The list of disallowed_leaders is included when you run:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mon dump
|
||||
|
||||
The connectivity Mode
|
||||
=====================
|
||||
@ -58,9 +64,11 @@ Examining connectivity scores
|
||||
=============================
|
||||
The monitors maintain connection scores even if they aren't in
|
||||
the connectivity election mode. You can examine the scores a monitor
|
||||
has by running ::
|
||||
has by running:
|
||||
|
||||
ceph daemon mon.{name} connection scores dump
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph daemon mon.{name} connection scores dump
|
||||
|
||||
Scores for individual connections range from 0-1 inclusive, and also
|
||||
include whether the connection is considered alive or dead (determined by
|
||||
@ -68,9 +76,11 @@ whether it returned its latest ping within the timeout).
|
||||
|
||||
While this would be an unexpected occurrence, if for some reason you experience
|
||||
problems and troubleshooting makes you think your scores have become invalid,
|
||||
you can forget history and reset them by running ::
|
||||
you can forget history and reset them by running:
|
||||
|
||||
ceph daemon mon.{name} connection scores reset
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph daemon mon.{name} connection scores reset
|
||||
|
||||
While resetting scores has low risk (monitors will still quickly determine
|
||||
if a connection is alive or dead, and trend back to the previous scores if they
|
||||
|
@ -8,38 +8,50 @@
|
||||
Monitor Commands
|
||||
================
|
||||
|
||||
Monitor commands are issued using the ``ceph`` utility::
|
||||
Monitor commands are issued using the ``ceph`` utility:
|
||||
|
||||
ceph [-m monhost] {command}
|
||||
.. prompt:: bash $
|
||||
|
||||
The command is usually (though not always) of the form::
|
||||
ceph [-m monhost] {command}
|
||||
|
||||
ceph {subsystem} {command}
|
||||
The command is usually (though not always) of the form:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph {subsystem} {command}
|
||||
|
||||
|
||||
System Commands
|
||||
===============
|
||||
|
||||
Execute the following to display the current cluster status. ::
|
||||
Execute the following to display the current cluster status. :
|
||||
|
||||
ceph -s
|
||||
ceph status
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph -s
|
||||
ceph status
|
||||
|
||||
Execute the following to display a running summary of cluster status
|
||||
and major events. ::
|
||||
and major events. :
|
||||
|
||||
ceph -w
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph -w
|
||||
|
||||
Execute the following to show the monitor quorum, including which monitors are
|
||||
participating and which one is the leader. ::
|
||||
participating and which one is the leader. :
|
||||
|
||||
ceph mon stat
|
||||
ceph quorum_status
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mon stat
|
||||
ceph quorum_status
|
||||
|
||||
Execute the following to query the status of a single monitor, including whether
|
||||
or not it is in the quorum. ::
|
||||
or not it is in the quorum. :
|
||||
|
||||
ceph tell mon.[id] mon_status
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph tell mon.[id] mon_status
|
||||
|
||||
where the value of ``[id]`` can be determined, e.g., from ``ceph -s``.
|
||||
|
||||
@ -47,21 +59,27 @@ where the value of ``[id]`` can be determined, e.g., from ``ceph -s``.
|
||||
Authentication Subsystem
|
||||
========================
|
||||
|
||||
To add a keyring for an OSD, execute the following::
|
||||
To add a keyring for an OSD, execute the following:
|
||||
|
||||
ceph auth add {osd} {--in-file|-i} {path-to-osd-keyring}
|
||||
.. prompt:: bash $
|
||||
|
||||
To list the cluster's keys and their capabilities, execute the following::
|
||||
ceph auth add {osd} {--in-file|-i} {path-to-osd-keyring}
|
||||
|
||||
ceph auth ls
|
||||
To list the cluster's keys and their capabilities, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph auth ls
|
||||
|
||||
|
||||
Placement Group Subsystem
|
||||
=========================
|
||||
|
||||
To display the statistics for all placement groups (PGs), execute the following::
|
||||
To display the statistics for all placement groups (PGs), execute the following:
|
||||
|
||||
ceph pg dump [--format {format}]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph pg dump [--format {format}]
|
||||
|
||||
The valid formats are ``plain`` (default), ``json`` ``json-pretty``, ``xml``, and ``xml-pretty``.
|
||||
When implementing monitoring and other tools, it is best to use ``json`` format.
|
||||
@ -70,9 +88,11 @@ less variable from release to release. The ``jq`` utility can be invaluable whe
|
||||
data from JSON output.
|
||||
|
||||
To display the statistics for all placement groups stuck in a specified state,
|
||||
execute the following::
|
||||
execute the following:
|
||||
|
||||
ceph pg dump_stuck inactive|unclean|stale|undersized|degraded [--format {format}] [-t|--threshold {seconds}]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph pg dump_stuck inactive|unclean|stale|undersized|degraded [--format {format}] [-t|--threshold {seconds}]
|
||||
|
||||
|
||||
``--format`` may be ``plain`` (default), ``json``, ``json-pretty``, ``xml``, or ``xml-pretty``.
|
||||
@ -90,9 +110,11 @@ reported to the monitor cluster in a while (configured by
|
||||
``mon_osd_report_timeout``).
|
||||
|
||||
Delete "lost" objects or revert them to their prior state, either a previous version
|
||||
or delete them if they were just created. ::
|
||||
or delete them if they were just created. :
|
||||
|
||||
ceph pg {pgid} mark_unfound_lost revert|delete
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph pg {pgid} mark_unfound_lost revert|delete
|
||||
|
||||
|
||||
.. _osd-subsystem:
|
||||
@ -100,105 +122,149 @@ or delete them if they were just created. ::
|
||||
OSD Subsystem
|
||||
=============
|
||||
|
||||
Query OSD subsystem status. ::
|
||||
Query OSD subsystem status. :
|
||||
|
||||
ceph osd stat
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd stat
|
||||
|
||||
Write a copy of the most recent OSD map to a file. See
|
||||
:ref:`osdmaptool <osdmaptool>`. ::
|
||||
:ref:`osdmaptool <osdmaptool>`. :
|
||||
|
||||
ceph osd getmap -o file
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd getmap -o file
|
||||
|
||||
Write a copy of the crush map from the most recent OSD map to
|
||||
file. ::
|
||||
file. :
|
||||
|
||||
ceph osd getcrushmap -o file
|
||||
.. prompt:: bash $
|
||||
|
||||
The foregoing is functionally equivalent to ::
|
||||
ceph osd getcrushmap -o file
|
||||
|
||||
ceph osd getmap -o /tmp/osdmap
|
||||
osdmaptool /tmp/osdmap --export-crush file
|
||||
The foregoing is functionally equivalent to :
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd getmap -o /tmp/osdmap
|
||||
osdmaptool /tmp/osdmap --export-crush file
|
||||
|
||||
Dump the OSD map. Valid formats for ``-f`` are ``plain``, ``json``, ``json-pretty``,
|
||||
``xml``, and ``xml-pretty``. If no ``--format`` option is given, the OSD map is
|
||||
dumped as plain text. As above, JSON format is best for tools, scripting, and other automation. ::
|
||||
dumped as plain text. As above, JSON format is best for tools, scripting, and other automation. :
|
||||
|
||||
ceph osd dump [--format {format}]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd dump [--format {format}]
|
||||
|
||||
Dump the OSD map as a tree with one line per OSD containing weight
|
||||
and state. ::
|
||||
and state. :
|
||||
|
||||
ceph osd tree [--format {format}]
|
||||
.. prompt:: bash $
|
||||
|
||||
Find out where a specific object is or would be stored in the system::
|
||||
ceph osd tree [--format {format}]
|
||||
|
||||
ceph osd map <pool-name> <object-name>
|
||||
Find out where a specific object is or would be stored in the system:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd map <pool-name> <object-name>
|
||||
|
||||
Add or move a new item (OSD) with the given id/name/weight at the specified
|
||||
location. ::
|
||||
location. :
|
||||
|
||||
ceph osd crush set {id} {weight} [{loc1} [{loc2} ...]]
|
||||
.. prompt:: bash $
|
||||
|
||||
Remove an existing item (OSD) from the CRUSH map. ::
|
||||
ceph osd crush set {id} {weight} [{loc1} [{loc2} ...]]
|
||||
|
||||
ceph osd crush remove {name}
|
||||
Remove an existing item (OSD) from the CRUSH map. :
|
||||
|
||||
Remove an existing bucket from the CRUSH map. ::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush remove {bucket-name}
|
||||
ceph osd crush remove {name}
|
||||
|
||||
Move an existing bucket from one position in the hierarchy to another. ::
|
||||
Remove an existing bucket from the CRUSH map. :
|
||||
|
||||
ceph osd crush move {id} {loc1} [{loc2} ...]
|
||||
.. prompt:: bash $
|
||||
|
||||
Set the weight of the item given by ``{name}`` to ``{weight}``. ::
|
||||
ceph osd crush remove {bucket-name}
|
||||
|
||||
ceph osd crush reweight {name} {weight}
|
||||
Move an existing bucket from one position in the hierarchy to another. :
|
||||
|
||||
Mark an OSD as ``lost``. This may result in permanent data loss. Use with caution. ::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd lost {id} [--yes-i-really-mean-it]
|
||||
ceph osd crush move {id} {loc1} [{loc2} ...]
|
||||
|
||||
Set the weight of the item given by ``{name}`` to ``{weight}``. :
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush reweight {name} {weight}
|
||||
|
||||
Mark an OSD as ``lost``. This may result in permanent data loss. Use with caution. :
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd lost {id} [--yes-i-really-mean-it]
|
||||
|
||||
Create a new OSD. If no UUID is given, it will be set automatically when the OSD
|
||||
starts up. ::
|
||||
starts up. :
|
||||
|
||||
ceph osd create [{uuid}]
|
||||
.. prompt:: bash $
|
||||
|
||||
Remove the given OSD(s). ::
|
||||
ceph osd create [{uuid}]
|
||||
|
||||
ceph osd rm [{id}...]
|
||||
Remove the given OSD(s). :
|
||||
|
||||
Query the current ``max_osd`` parameter in the OSD map. ::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd getmaxosd
|
||||
ceph osd rm [{id}...]
|
||||
|
||||
Import the given crush map. ::
|
||||
Query the current ``max_osd`` parameter in the OSD map. :
|
||||
|
||||
ceph osd setcrushmap -i file
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd getmaxosd
|
||||
|
||||
Import the given crush map. :
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd setcrushmap -i file
|
||||
|
||||
Set the ``max_osd`` parameter in the OSD map. This defaults to 10000 now so
|
||||
most admins will never need to adjust this. ::
|
||||
most admins will never need to adjust this. :
|
||||
|
||||
ceph osd setmaxosd
|
||||
.. prompt:: bash $
|
||||
|
||||
Mark OSD ``{osd-num}`` down. ::
|
||||
ceph osd setmaxosd
|
||||
|
||||
ceph osd down {osd-num}
|
||||
Mark OSD ``{osd-num}`` down. :
|
||||
|
||||
Mark OSD ``{osd-num}`` out of the distribution (i.e. allocated no data). ::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd out {osd-num}
|
||||
ceph osd down {osd-num}
|
||||
|
||||
Mark ``{osd-num}`` in the distribution (i.e. allocated data). ::
|
||||
Mark OSD ``{osd-num}`` out of the distribution (i.e. allocated no data). :
|
||||
|
||||
ceph osd in {osd-num}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd out {osd-num}
|
||||
|
||||
Mark ``{osd-num}`` in the distribution (i.e. allocated data). :
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd in {osd-num}
|
||||
|
||||
Set or clear the pause flags in the OSD map. If set, no IO requests
|
||||
will be sent to any OSD. Clearing the flags via unpause results in
|
||||
resending pending requests. ::
|
||||
resending pending requests. :
|
||||
|
||||
ceph osd pause
|
||||
ceph osd unpause
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pause
|
||||
ceph osd unpause
|
||||
|
||||
Set the override weight (reweight) of ``{osd-num}`` to ``{weight}``. Two OSDs with the
|
||||
same weight will receive roughly the same number of I/O requests and
|
||||
@ -209,9 +275,11 @@ otherwise live on this drive. It does not change weights assigned
|
||||
to the buckets above the OSD in the crush map, and is a corrective
|
||||
measure in case the normal CRUSH distribution is not working out quite
|
||||
right. For instance, if one of your OSDs is at 90% and the others are
|
||||
at 50%, you could reduce this weight to compensate. ::
|
||||
at 50%, you could reduce this weight to compensate. :
|
||||
|
||||
ceph osd reweight {osd-num} {weight}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd reweight {osd-num} {weight}
|
||||
|
||||
Balance OSD fullness by reducing the override weight of OSDs which are
|
||||
overly utilized. Note that these override aka ``reweight`` values
|
||||
@ -219,9 +287,11 @@ default to 1.00000 and are relative only to each other; they not absolute.
|
||||
It is crucial to distinguish them from CRUSH weights, which reflect the
|
||||
absolute capacity of a bucket in TiB. By default this command adjusts
|
||||
override weight on OSDs which have + or - 20% of the average utilization,
|
||||
but if you include a ``threshold`` that percentage will be used instead. ::
|
||||
but if you include a ``threshold`` that percentage will be used instead. :
|
||||
|
||||
ceph osd reweight-by-utilization [threshold [max_change [max_osds]]] [--no-increasing]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd reweight-by-utilization [threshold [max_change [max_osds]]] [--no-increasing]
|
||||
|
||||
To limit the step by which any OSD's reweight will be changed, specify
|
||||
``max_change`` which defaults to 0.05. To limit the number of OSDs that will
|
||||
@ -230,9 +300,11 @@ parameters can speed leveling of OSD utilization, at the potential cost of
|
||||
greater impact on client operations due to more data moving at once.
|
||||
|
||||
To determine which and how many PGs and OSDs will be affected by a given invocation
|
||||
you can test before executing. ::
|
||||
you can test before executing. :
|
||||
|
||||
ceph osd test-reweight-by-utilization [threshold [max_change max_osds]] [--no-increasing]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd test-reweight-by-utilization [threshold [max_change max_osds]] [--no-increasing]
|
||||
|
||||
Adding ``--no-increasing`` to either command prevents increasing any
|
||||
override weights that are currently < 1.00000. This can be useful when
|
||||
@ -243,33 +315,46 @@ Deployments utilizing Nautilus (or later revisions of Luminous and Mimic)
|
||||
that have no pre-Luminous cients may instead wish to instead enable the
|
||||
`balancer`` module for ``ceph-mgr``.
|
||||
|
||||
Add/remove an IP address to/from the blocklist. When adding an address,
|
||||
Add/remove an IP address or CIDR range to/from the blocklist.
|
||||
When adding to the blocklist,
|
||||
you can specify how long it should be blocklisted in seconds; otherwise,
|
||||
it will default to 1 hour. A blocklisted address is prevented from
|
||||
connecting to any OSD. Blocklisting is most often used to prevent a
|
||||
lagging metadata server from making bad changes to data on the OSDs.
|
||||
connecting to any OSD. If you blocklist an IP or range containing an OSD, be aware
|
||||
that OSD will also be prevented from performing operations on its peers where it
|
||||
acts as a client. (This includes tiering and copy-from functionality.)
|
||||
|
||||
If you want to blocklist a range (in CIDR format), you may do so by
|
||||
including the ``range`` keyword.
|
||||
|
||||
These commands are mostly only useful for failure testing, as
|
||||
blocklists are normally maintained automatically and shouldn't need
|
||||
manual intervention. ::
|
||||
manual intervention. :
|
||||
|
||||
ceph osd blocklist add ADDRESS[:source_port] [TIME]
|
||||
ceph osd blocklist rm ADDRESS[:source_port]
|
||||
.. prompt:: bash $
|
||||
|
||||
Creates/deletes a snapshot of a pool. ::
|
||||
ceph osd blocklist ["range"] add ADDRESS[:source_port][/netmask_bits] [TIME]
|
||||
ceph osd blocklist ["range"] rm ADDRESS[:source_port][/netmask_bits]
|
||||
|
||||
ceph osd pool mksnap {pool-name} {snap-name}
|
||||
ceph osd pool rmsnap {pool-name} {snap-name}
|
||||
Creates/deletes a snapshot of a pool. :
|
||||
|
||||
Creates/deletes/renames a storage pool. ::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool create {pool-name} [pg_num [pgp_num]]
|
||||
ceph osd pool delete {pool-name} [{pool-name} --yes-i-really-really-mean-it]
|
||||
ceph osd pool rename {old-name} {new-name}
|
||||
ceph osd pool mksnap {pool-name} {snap-name}
|
||||
ceph osd pool rmsnap {pool-name} {snap-name}
|
||||
|
||||
Changes a pool setting. ::
|
||||
Creates/deletes/renames a storage pool. :
|
||||
|
||||
ceph osd pool set {pool-name} {field} {value}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool create {pool-name} [pg_num [pgp_num]]
|
||||
ceph osd pool delete {pool-name} [{pool-name} --yes-i-really-really-mean-it]
|
||||
ceph osd pool rename {old-name} {new-name}
|
||||
|
||||
Changes a pool setting. :
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set {pool-name} {field} {value}
|
||||
|
||||
Valid fields are:
|
||||
|
||||
@ -278,9 +363,11 @@ Valid fields are:
|
||||
* ``pgp_num``: Effective number when calculating pg placement.
|
||||
* ``crush_rule``: rule number for mapping placement.
|
||||
|
||||
Get the value of a pool setting. ::
|
||||
Get the value of a pool setting. :
|
||||
|
||||
ceph osd pool get {pool-name} {field}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool get {pool-name} {field}
|
||||
|
||||
Valid fields are:
|
||||
|
||||
@ -288,49 +375,67 @@ Valid fields are:
|
||||
* ``pgp_num``: Effective number of placement groups when calculating placement.
|
||||
|
||||
|
||||
Sends a scrub command to OSD ``{osd-num}``. To send the command to all OSDs, use ``*``. ::
|
||||
Sends a scrub command to OSD ``{osd-num}``. To send the command to all OSDs, use ``*``. :
|
||||
|
||||
ceph osd scrub {osd-num}
|
||||
.. prompt:: bash $
|
||||
|
||||
Sends a repair command to OSD.N. To send the command to all OSDs, use ``*``. ::
|
||||
ceph osd scrub {osd-num}
|
||||
|
||||
ceph osd repair N
|
||||
Sends a repair command to OSD.N. To send the command to all OSDs, use ``*``. :
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd repair N
|
||||
|
||||
Runs a simple throughput benchmark against OSD.N, writing ``TOTAL_DATA_BYTES``
|
||||
in write requests of ``BYTES_PER_WRITE`` each. By default, the test
|
||||
writes 1 GB in total in 4-MB increments.
|
||||
The benchmark is non-destructive and will not overwrite existing live
|
||||
OSD data, but might temporarily affect the performance of clients
|
||||
concurrently accessing the OSD. ::
|
||||
concurrently accessing the OSD. :
|
||||
|
||||
ceph tell osd.N bench [TOTAL_DATA_BYTES] [BYTES_PER_WRITE]
|
||||
.. prompt:: bash $
|
||||
|
||||
To clear an OSD's caches between benchmark runs, use the 'cache drop' command ::
|
||||
ceph tell osd.N bench [TOTAL_DATA_BYTES] [BYTES_PER_WRITE]
|
||||
|
||||
ceph tell osd.N cache drop
|
||||
To clear an OSD's caches between benchmark runs, use the 'cache drop' command :
|
||||
|
||||
To get the cache statistics of an OSD, use the 'cache status' command ::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph tell osd.N cache status
|
||||
ceph tell osd.N cache drop
|
||||
|
||||
To get the cache statistics of an OSD, use the 'cache status' command :
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph tell osd.N cache status
|
||||
|
||||
MDS Subsystem
|
||||
=============
|
||||
|
||||
Change configuration parameters on a running mds. ::
|
||||
Change configuration parameters on a running mds. :
|
||||
|
||||
ceph tell mds.{mds-id} config set {setting} {value}
|
||||
.. prompt:: bash $
|
||||
|
||||
Example::
|
||||
ceph tell mds.{mds-id} config set {setting} {value}
|
||||
|
||||
ceph tell mds.0 config set debug_ms 1
|
||||
Example:
|
||||
|
||||
Enables debug messages. ::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mds stat
|
||||
ceph tell mds.0 config set debug_ms 1
|
||||
|
||||
Displays the status of all metadata servers. ::
|
||||
Enables debug messages. :
|
||||
|
||||
ceph mds fail 0
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mds stat
|
||||
|
||||
Displays the status of all metadata servers. :
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mds fail 0
|
||||
|
||||
Marks the active MDS as failed, triggering failover to a standby if present.
|
||||
|
||||
@ -340,18 +445,24 @@ Marks the active MDS as failed, triggering failover to a standby if present.
|
||||
Mon Subsystem
|
||||
=============
|
||||
|
||||
Show monitor stats::
|
||||
Show monitor stats:
|
||||
|
||||
ceph mon stat
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mon stat
|
||||
|
||||
::
|
||||
|
||||
e2: 3 mons at {a=127.0.0.1:40000/0,b=127.0.0.1:40001/0,c=127.0.0.1:40002/0}, election epoch 6, quorum 0,1,2 a,b,c
|
||||
|
||||
|
||||
The ``quorum`` list at the end lists monitor nodes that are part of the current quorum.
|
||||
|
||||
This is also available more directly::
|
||||
This is also available more directly:
|
||||
|
||||
ceph quorum_status -f json-pretty
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph quorum_status -f json-pretty
|
||||
|
||||
.. code-block:: javascript
|
||||
|
||||
@ -405,9 +516,11 @@ This is also available more directly::
|
||||
|
||||
The above will block until a quorum is reached.
|
||||
|
||||
For a status of just a single monitor::
|
||||
For a status of just a single monitor:
|
||||
|
||||
ceph tell mon.[name] mon_status
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph tell mon.[name] mon_status
|
||||
|
||||
where the value of ``[name]`` can be taken from ``ceph quorum_status``. Sample
|
||||
output::
|
||||
@ -469,10 +582,14 @@ output::
|
||||
}
|
||||
}
|
||||
|
||||
A dump of the monitor state::
|
||||
A dump of the monitor state:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph mon dump
|
||||
|
||||
::
|
||||
|
||||
dumped monmap epoch 2
|
||||
epoch 2
|
||||
fsid ba807e74-b64f-4b72-b43f-597dfe60ddbc
|
||||
|
@ -35,7 +35,9 @@ Pool Values`_.
|
||||
Get a CRUSH Map
|
||||
---------------
|
||||
|
||||
To get the CRUSH map for your cluster, execute the following::
|
||||
To get the CRUSH map for your cluster, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd getcrushmap -o {compiled-crushmap-filename}
|
||||
|
||||
@ -48,7 +50,9 @@ edit it.
|
||||
Decompile a CRUSH Map
|
||||
---------------------
|
||||
|
||||
To decompile a CRUSH map, execute the following::
|
||||
To decompile a CRUSH map, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
crushtool -d {compiled-crushmap-filename} -o {decompiled-crushmap-filename}
|
||||
|
||||
@ -57,7 +61,9 @@ To decompile a CRUSH map, execute the following::
|
||||
Recompile a CRUSH Map
|
||||
---------------------
|
||||
|
||||
To compile a CRUSH map, execute the following::
|
||||
To compile a CRUSH map, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
crushtool -c {decompiled-crushmap-filename} -o {compiled-crushmap-filename}
|
||||
|
||||
@ -66,7 +72,9 @@ To compile a CRUSH map, execute the following::
|
||||
Set the CRUSH Map
|
||||
-----------------
|
||||
|
||||
To set the CRUSH map for your cluster, execute the following::
|
||||
To set the CRUSH map for your cluster, execute the following:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd setcrushmap -i {compiled-crushmap-filename}
|
||||
|
||||
@ -118,14 +126,22 @@ Devices may also have a *device class* associated with them (e.g.,
|
||||
``hdd`` or ``ssd``), allowing them to be conveniently targeted by a
|
||||
crush rule.
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
devices
|
||||
|
||||
::
|
||||
|
||||
# devices
|
||||
device {num} {osd.name} [class {class}]
|
||||
|
||||
For example::
|
||||
For example:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
devices
|
||||
|
||||
::
|
||||
|
||||
# devices
|
||||
device 0 osd.0 class ssd
|
||||
device 1 osd.1 class hdd
|
||||
device 2 osd.2
|
||||
@ -136,10 +152,6 @@ is normally a single storage device, a pair of devices (for example,
|
||||
one for data and one for a journal or metadata), or in some cases a
|
||||
small RAID device.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
CRUSH Map Bucket Types
|
||||
----------------------
|
||||
|
||||
@ -157,9 +169,9 @@ media.
|
||||
To add a bucket type to the CRUSH map, create a new line under your list of
|
||||
bucket types. Enter ``type`` followed by a unique numeric ID and a bucket name.
|
||||
By convention, there is one leaf bucket and it is ``type 0``; however, you may
|
||||
give it any name you like (e.g., osd, disk, drive, storage, etc.)::
|
||||
give it any name you like (e.g., osd, disk, drive, storage)::
|
||||
|
||||
#types
|
||||
# types
|
||||
type {num} {bucket-name}
|
||||
|
||||
For example::
|
||||
@ -199,8 +211,8 @@ distribution units, pods, rows, rooms, and data centers. With the exception of
|
||||
the leaf nodes representing OSDs, the rest of the hierarchy is arbitrary, and
|
||||
you may define it according to your own needs.
|
||||
|
||||
We recommend adapting your CRUSH map to your firms's hardware naming conventions
|
||||
and using instances names that reflect the physical hardware. Your naming
|
||||
We recommend adapting your CRUSH map to your firm's hardware naming conventions
|
||||
and using instance names that reflect the physical hardware. Your naming
|
||||
practice can make it easier to administer the cluster and troubleshoot
|
||||
problems when an OSD and/or other hardware malfunctions and the administrator
|
||||
need access to physical hardware.
|
||||
@ -655,29 +667,38 @@ There are three types of transformations possible:
|
||||
single bucket. For example, in the previous example, we want the
|
||||
``ssd`` bucket to be mapped to the ``default`` bucket.
|
||||
|
||||
The final command to convert the map comprised of the above fragments would be something like::
|
||||
The final command to convert the map comprising the above fragments would be something like:
|
||||
|
||||
$ ceph osd getcrushmap -o original
|
||||
$ crushtool -i original --reclassify \
|
||||
--set-subtree-class default hdd \
|
||||
--reclassify-root default hdd \
|
||||
--reclassify-bucket %-ssd ssd default \
|
||||
--reclassify-bucket ssd ssd default \
|
||||
-o adjusted
|
||||
.. prompt:: bash $
|
||||
|
||||
In order to ensure that the conversion is correct, there is a ``--compare`` command that will test a large sample of inputs to the CRUSH map and ensure that the same result comes back out. These inputs are controlled by the same options that apply to the ``--test`` command. For the above example,::
|
||||
ceph osd getcrushmap -o original
|
||||
crushtool -i original --reclassify \
|
||||
--set-subtree-class default hdd \
|
||||
--reclassify-root default hdd \
|
||||
--reclassify-bucket %-ssd ssd default \
|
||||
--reclassify-bucket ssd ssd default \
|
||||
-o adjusted
|
||||
|
||||
In order to ensure that the conversion is correct, there is a ``--compare`` command that will test a large sample of inputs against the CRUSH map and check that the same result is output. These inputs are controlled by the same options that apply to the ``--test`` command. For the above example,:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
crushtool -i original --compare adjusted
|
||||
|
||||
::
|
||||
|
||||
$ crushtool -i original --compare adjusted
|
||||
rule 0 had 0/10240 mismatched mappings (0)
|
||||
rule 1 had 0/10240 mismatched mappings (0)
|
||||
maps appear equivalent
|
||||
|
||||
If there were difference, you'd see what ratio of inputs are remapped
|
||||
in the parentheses.
|
||||
If there were differences, the ratio of remapped inputs would be reported in
|
||||
the parentheses.
|
||||
|
||||
If you are satisfied with the adjusted map, you can apply it to the cluster with something like::
|
||||
When you are satisfied with the adjusted map, apply it to the cluster with a command of the form:
|
||||
|
||||
ceph osd setcrushmap -i adjusted
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd setcrushmap -i adjusted
|
||||
|
||||
Tuning CRUSH, the hard way
|
||||
--------------------------
|
||||
@ -686,7 +707,9 @@ If you can ensure that all clients are running recent code, you can
|
||||
adjust the tunables by extracting the CRUSH map, modifying the values,
|
||||
and reinjecting it into the cluster.
|
||||
|
||||
* Extract the latest CRUSH map::
|
||||
* Extract the latest CRUSH map:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd getcrushmap -o /tmp/crush
|
||||
|
||||
@ -694,19 +717,25 @@ and reinjecting it into the cluster.
|
||||
for both large and small clusters we tested with. You will need to
|
||||
additionally specify the ``--enable-unsafe-tunables`` argument to
|
||||
``crushtool`` for this to work. Please use this option with
|
||||
extreme care.::
|
||||
extreme care.:
|
||||
|
||||
crushtool -i /tmp/crush --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 -o /tmp/crush.new
|
||||
.. prompt:: bash $
|
||||
|
||||
* Reinject modified map::
|
||||
crushtool -i /tmp/crush --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 -o /tmp/crush.new
|
||||
|
||||
ceph osd setcrushmap -i /tmp/crush.new
|
||||
* Reinject modified map:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd setcrushmap -i /tmp/crush.new
|
||||
|
||||
Legacy values
|
||||
-------------
|
||||
|
||||
For reference, the legacy values for the CRUSH tunables can be set
|
||||
with::
|
||||
with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
crushtool -i /tmp/crush --set-choose-local-tries 2 --set-choose-local-fallback-tries 5 --set-choose-total-tries 19 --set-chooseleaf-descend-once 0 --set-chooseleaf-vary-r 0 -o /tmp/crush.legacy
|
||||
|
||||
@ -715,4 +744,4 @@ Further, as noted above, be careful running old versions of the
|
||||
``ceph-osd`` daemon after reverting to legacy values as the feature
|
||||
bit is not perfectly enforced.
|
||||
|
||||
.. _CRUSH - Controlled, Scalable, Decentralized Placement of Replicated Data: https://ceph.com/wp-content/uploads/2016/08/weil-crush-sc06.pdf
|
||||
.. _CRUSH - Controlled, Scalable, Decentralized Placement of Replicated Data: https://ceph.io/assets/pdfs/weil-crush-sc06.pdf
|
||||
|
@ -184,9 +184,11 @@ will be the total of all devices contained beneath it. Normally
|
||||
weights are in units of terabytes (TB).
|
||||
|
||||
You can get a simple view the of CRUSH hierarchy for your cluster,
|
||||
including weights, with::
|
||||
including weights, with:
|
||||
|
||||
ceph osd tree
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tree
|
||||
|
||||
Rules
|
||||
-----
|
||||
@ -208,13 +210,17 @@ erasure coded), the *failure domain*, and optionally a *device class*.
|
||||
In rare cases rules must be written by hand by manually editing the
|
||||
CRUSH map.
|
||||
|
||||
You can see what rules are defined for your cluster with::
|
||||
You can see what rules are defined for your cluster with:
|
||||
|
||||
ceph osd crush rule ls
|
||||
.. prompt:: bash $
|
||||
|
||||
You can view the contents of the rules with::
|
||||
ceph osd crush rule ls
|
||||
|
||||
ceph osd crush rule dump
|
||||
You can view the contents of the rules with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush rule dump
|
||||
|
||||
Device classes
|
||||
--------------
|
||||
@ -224,34 +230,44 @@ default, OSDs automatically set their class at startup to
|
||||
`hdd`, `ssd`, or `nvme` based on the type of device they are backed
|
||||
by.
|
||||
|
||||
The device class for one or more OSDs can be explicitly set with::
|
||||
The device class for one or more OSDs can be explicitly set with:
|
||||
|
||||
ceph osd crush set-device-class <class> <osd-name> [...]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush set-device-class <class> <osd-name> [...]
|
||||
|
||||
Once a device class is set, it cannot be changed to another class
|
||||
until the old class is unset with::
|
||||
until the old class is unset with:
|
||||
|
||||
ceph osd crush rm-device-class <osd-name> [...]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush rm-device-class <osd-name> [...]
|
||||
|
||||
This allows administrators to set device classes without the class
|
||||
being changed on OSD restart or by some other script.
|
||||
|
||||
A placement rule that targets a specific device class can be created with::
|
||||
A placement rule that targets a specific device class can be created with:
|
||||
|
||||
ceph osd crush rule create-replicated <rule-name> <root> <failure-domain> <class>
|
||||
.. prompt:: bash $
|
||||
|
||||
A pool can then be changed to use the new rule with::
|
||||
ceph osd crush rule create-replicated <rule-name> <root> <failure-domain> <class>
|
||||
|
||||
ceph osd pool set <pool-name> crush_rule <rule-name>
|
||||
A pool can then be changed to use the new rule with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd pool set <pool-name> crush_rule <rule-name>
|
||||
|
||||
Device classes are implemented by creating a "shadow" CRUSH hierarchy
|
||||
for each device class in use that contains only devices of that class.
|
||||
CRUSH rules can then distribute data over the shadow hierarchy.
|
||||
This approach is fully backward compatible with
|
||||
old Ceph clients. You can view the CRUSH hierarchy with shadow items
|
||||
with::
|
||||
with:
|
||||
|
||||
ceph osd crush tree --show-shadow
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush tree --show-shadow
|
||||
|
||||
For older clusters created before Luminous that relied on manually
|
||||
crafted CRUSH maps to maintain per-device-type hierarchies, there is a
|
||||
@ -295,9 +311,11 @@ There are two types of weight sets supported:
|
||||
|
||||
When weight sets are in use, the weights associated with each node in
|
||||
the hierarchy is visible as a separate column (labeled either
|
||||
``(compat)`` or the pool name) from the command::
|
||||
``(compat)`` or the pool name) from the command:
|
||||
|
||||
ceph osd tree
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd tree
|
||||
|
||||
When both *compat* and *per-pool* weight sets are in use, data
|
||||
placement for a particular pool will use its own per-pool weight set
|
||||
@ -320,9 +338,11 @@ Add/Move an OSD
|
||||
.. note: OSDs are normally automatically added to the CRUSH map when
|
||||
the OSD is created. This command is rarely needed.
|
||||
|
||||
To add or move an OSD in the CRUSH map of a running cluster::
|
||||
To add or move an OSD in the CRUSH map of a running cluster:
|
||||
|
||||
ceph osd crush set {name} {weight} root={root} [{bucket-type}={bucket-name} ...]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush set {name} {weight} root={root} [{bucket-type}={bucket-name} ...]
|
||||
|
||||
Where:
|
||||
|
||||
@ -359,9 +379,11 @@ Where:
|
||||
|
||||
|
||||
The following example adds ``osd.0`` to the hierarchy, or moves the
|
||||
OSD from a previous location. ::
|
||||
OSD from a previous location:
|
||||
|
||||
ceph osd crush set osd.0 1.0 root=default datacenter=dc1 room=room1 row=foo rack=bar host=foo-bar-1
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush set osd.0 1.0 root=default datacenter=dc1 room=room1 row=foo rack=bar host=foo-bar-1
|
||||
|
||||
|
||||
Adjust OSD weight
|
||||
@ -372,9 +394,11 @@ Adjust OSD weight
|
||||
is rarely needed.
|
||||
|
||||
To adjust an OSD's CRUSH weight in the CRUSH map of a running cluster, execute
|
||||
the following::
|
||||
the following:
|
||||
|
||||
ceph osd crush reweight {name} {weight}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush reweight {name} {weight}
|
||||
|
||||
Where:
|
||||
|
||||
@ -403,9 +427,11 @@ Remove an OSD
|
||||
``ceph osd purge`` command. This command is rarely needed.
|
||||
|
||||
To remove an OSD from the CRUSH map of a running cluster, execute the
|
||||
following::
|
||||
following:
|
||||
|
||||
ceph osd crush remove {name}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush remove {name}
|
||||
|
||||
Where:
|
||||
|
||||
@ -431,9 +457,11 @@ Add a Bucket
|
||||
``default`` or other root as described below.
|
||||
|
||||
To add a bucket in the CRUSH map of a running cluster, execute the
|
||||
``ceph osd crush add-bucket`` command::
|
||||
``ceph osd crush add-bucket`` command:
|
||||
|
||||
ceph osd crush add-bucket {bucket-name} {bucket-type}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush add-bucket {bucket-name} {bucket-type}
|
||||
|
||||
Where:
|
||||
|
||||
@ -453,17 +481,21 @@ Where:
|
||||
:Example: ``rack``
|
||||
|
||||
|
||||
The following example adds the ``rack12`` bucket to the hierarchy::
|
||||
The following example adds the ``rack12`` bucket to the hierarchy:
|
||||
|
||||
ceph osd crush add-bucket rack12 rack
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush add-bucket rack12 rack
|
||||
|
||||
Move a Bucket
|
||||
-------------
|
||||
|
||||
To move a bucket to a different location or position in the CRUSH map
|
||||
hierarchy, execute the following::
|
||||
hierarchy, execute the following:
|
||||
|
||||
ceph osd crush move {bucket-name} {bucket-type}={bucket-name}, [...]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush move {bucket-name} {bucket-type}={bucket-name}, [...]
|
||||
|
||||
Where:
|
||||
|
||||
@ -484,9 +516,11 @@ Where:
|
||||
Remove a Bucket
|
||||
---------------
|
||||
|
||||
To remove a bucket from the CRUSH hierarchy, execute the following::
|
||||
To remove a bucket from the CRUSH hierarchy, execute the following:
|
||||
|
||||
ceph osd crush remove {bucket-name}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush remove {bucket-name}
|
||||
|
||||
.. note:: A bucket must be empty before removing it from the CRUSH hierarchy.
|
||||
|
||||
@ -499,9 +533,11 @@ Where:
|
||||
:Required: Yes
|
||||
:Example: ``rack12``
|
||||
|
||||
The following example removes the ``rack12`` bucket from the hierarchy::
|
||||
The following example removes the ``rack12`` bucket from the hierarchy:
|
||||
|
||||
ceph osd crush remove rack12
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush remove rack12
|
||||
|
||||
Creating a compat weight set
|
||||
----------------------------
|
||||
@ -509,24 +545,32 @@ Creating a compat weight set
|
||||
.. note: This step is normally done automatically by the ``balancer``
|
||||
module when enabled.
|
||||
|
||||
To create a *compat* weight set::
|
||||
To create a *compat* weight set:
|
||||
|
||||
ceph osd crush weight-set create-compat
|
||||
.. prompt:: bash $
|
||||
|
||||
Weights for the compat weight set can be adjusted with::
|
||||
ceph osd crush weight-set create-compat
|
||||
|
||||
ceph osd crush weight-set reweight-compat {name} {weight}
|
||||
Weights for the compat weight set can be adjusted with:
|
||||
|
||||
The compat weight set can be destroyed with::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush weight-set rm-compat
|
||||
ceph osd crush weight-set reweight-compat {name} {weight}
|
||||
|
||||
The compat weight set can be destroyed with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush weight-set rm-compat
|
||||
|
||||
Creating per-pool weight sets
|
||||
-----------------------------
|
||||
|
||||
To create a weight set for a specific pool,::
|
||||
To create a weight set for a specific pool:
|
||||
|
||||
ceph osd crush weight-set create {pool-name} {mode}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush weight-set create {pool-name} {mode}
|
||||
|
||||
.. note:: Per-pool weight sets require that all servers and daemons
|
||||
run Luminous v12.2.z or later.
|
||||
@ -553,17 +597,23 @@ Where:
|
||||
:Required: Yes
|
||||
:Example: ``flat``
|
||||
|
||||
To adjust the weight of an item in a weight set::
|
||||
To adjust the weight of an item in a weight set:
|
||||
|
||||
ceph osd crush weight-set reweight {pool-name} {item-name} {weight [...]}
|
||||
.. prompt:: bash $
|
||||
|
||||
To list existing weight sets,::
|
||||
ceph osd crush weight-set reweight {pool-name} {item-name} {weight [...]}
|
||||
|
||||
ceph osd crush weight-set ls
|
||||
To list existing weight sets:
|
||||
|
||||
To remove a weight set,::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush weight-set rm {pool-name}
|
||||
ceph osd crush weight-set ls
|
||||
|
||||
To remove a weight set:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush weight-set rm {pool-name}
|
||||
|
||||
Creating a rule for a replicated pool
|
||||
-------------------------------------
|
||||
@ -588,9 +638,11 @@ classify themselves as either ``hdd`` or ``ssd``, depending on the
|
||||
underlying type of device being used. These classes can also be
|
||||
customized.
|
||||
|
||||
To create a replicated rule,::
|
||||
To create a replicated rule:
|
||||
|
||||
ceph osd crush rule create-replicated {name} {root} {failure-domain-type} [{class}]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush rule create-replicated {name} {root} {failure-domain-type} [{class}]
|
||||
|
||||
Where:
|
||||
|
||||
@ -635,13 +687,17 @@ you must include this information in the *erasure code profile*. A CRUSH
|
||||
rule will then be created from that either explicitly or automatically when
|
||||
the profile is used to create a pool.
|
||||
|
||||
The erasure code profiles can be listed with::
|
||||
The erasure code profiles can be listed with:
|
||||
|
||||
ceph osd erasure-code-profile ls
|
||||
.. prompt:: bash $
|
||||
|
||||
An existing profile can be viewed with::
|
||||
ceph osd erasure-code-profile ls
|
||||
|
||||
ceph osd erasure-code-profile get {profile-name}
|
||||
An existing profile can be viewed with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd erasure-code-profile get {profile-name}
|
||||
|
||||
Normally profiles should never be modified; instead, a new profile
|
||||
should be created and used when creating a new pool or creating a new
|
||||
@ -659,9 +715,11 @@ The erasure code profile properties of interest are:
|
||||
* **crush-device-class**: the device class on which to place data [default: none, meaning all devices are used].
|
||||
* **k** and **m** (and, for the ``lrc`` plugin, **l**): these determine the number of erasure code shards, affecting the resulting CRUSH rule.
|
||||
|
||||
Once a profile is defined, you can create a CRUSH rule with::
|
||||
Once a profile is defined, you can create a CRUSH rule with:
|
||||
|
||||
ceph osd crush rule create-erasure {name} {profile-name}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush rule create-erasure {name} {profile-name}
|
||||
|
||||
.. note: When creating a new pool, it is not actually necessary to
|
||||
explicitly create the rule. If the erasure code profile alone is
|
||||
@ -671,9 +729,11 @@ Once a profile is defined, you can create a CRUSH rule with::
|
||||
Deleting rules
|
||||
--------------
|
||||
|
||||
Rules that are not in use by pools can be deleted with::
|
||||
Rules that are not in use by pools can be deleted with:
|
||||
|
||||
ceph osd crush rule rm {rule-name}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush rule rm {rule-name}
|
||||
|
||||
|
||||
.. _crush-map-tunables:
|
||||
@ -882,14 +942,18 @@ To make this warning go away, you have two options:
|
||||
result in some data movement (possibly as much as 10%). This is the
|
||||
preferred route, but should be taken with care on a production cluster
|
||||
where the data movement may affect performance. You can enable optimal
|
||||
tunables with::
|
||||
tunables with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush tunables optimal
|
||||
|
||||
If things go poorly (e.g., too much load) and not very much
|
||||
progress has been made, or there is a client compatibility problem
|
||||
(old kernel CephFS or RBD clients, or pre-Bobtail ``librados``
|
||||
clients), you can switch back with::
|
||||
clients), you can switch back with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush tunables legacy
|
||||
|
||||
@ -899,7 +963,9 @@ To make this warning go away, you have two options:
|
||||
mon warn on legacy crush tunables = false
|
||||
|
||||
For the change to take effect, you will need to restart the monitors, or
|
||||
apply the option to running monitors with::
|
||||
apply the option to running monitors with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph tell mon.\* config set mon_warn_on_legacy_crush_tunables false
|
||||
|
||||
@ -936,7 +1002,7 @@ sets known as *profiles*. As of the Octopus release these are:
|
||||
* ``firefly``: the values supported by the firefly release
|
||||
* ``hammer``: the values supported by the hammer release
|
||||
* ``jewel``: the values supported by the jewel release
|
||||
* ``optimal``: the best (ie optimal) values of the current version of Ceph
|
||||
* ``optimal``: the best (i.e. optimal) values of the current version of Ceph
|
||||
* ``default``: the default values of a new cluster installed from
|
||||
scratch. These values, which depend on the current version of Ceph,
|
||||
are hardcoded and are generally a mix of optimal and legacy values.
|
||||
@ -944,17 +1010,18 @@ sets known as *profiles*. As of the Octopus release these are:
|
||||
LTS release, or the most recent release for which we generally expect
|
||||
most users to have up-to-date clients for.
|
||||
|
||||
You can apply a profile to a running cluster with the command::
|
||||
You can apply a profile to a running cluster with the command:
|
||||
|
||||
ceph osd crush tunables {PROFILE}
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd crush tunables {PROFILE}
|
||||
|
||||
Note that this may result in data movement, potentially quite a bit. Study
|
||||
release notes and documentation carefully before changing the profile on a
|
||||
running cluster, and consider throttling recovery/backfill parameters to
|
||||
limit the impact of a bolus of backfill.
|
||||
|
||||
|
||||
.. _CRUSH - Controlled, Scalable, Decentralized Placement of Replicated Data: https://ceph.com/wp-content/uploads/2016/08/weil-crush-sc06.pdf
|
||||
.. _CRUSH - Controlled, Scalable, Decentralized Placement of Replicated Data: https://ceph.io/assets/pdfs/weil-crush-sc06.pdf
|
||||
|
||||
|
||||
Primary Affinity
|
||||
@ -987,19 +1054,20 @@ interface bandwidth and CPU cycles more evenly.
|
||||
By default, all ceph OSDs have primary affinity of ``1``, which indicates that
|
||||
any OSD may act as a primary with equal probability.
|
||||
|
||||
You can reduce a Ceph OSD's primary affinity so that CRUSH is less likely to choose
|
||||
the OSD as primary in a PG's acting set.::
|
||||
You can reduce a Ceph OSD's primary affinity so that CRUSH is less likely to
|
||||
choose the OSD as primary in a PG's acting set.:
|
||||
|
||||
ceph osd primary-affinity <osd-id> <weight>
|
||||
.. prompt:: bash $
|
||||
|
||||
You may set an OSD's primary affinity to a real number in the range
|
||||
``[0-1]``, where ``0`` indicates that the OSD may **NOT** be used as a primary
|
||||
and ``1`` indicates that an OSD may be used as a primary. When the weight is
|
||||
between these extremes, it is less likely that
|
||||
CRUSH will select that OSD as a primary. The process for
|
||||
selecting the lead OSD is more nuanced than a simple probability based on
|
||||
relative affinity values, but measurable results can be achieved even with
|
||||
first-order approximations of desirable values.
|
||||
ceph osd primary-affinity <osd-id> <weight>
|
||||
|
||||
You may set an OSD's primary affinity to a real number in the range ``[0-1]``,
|
||||
where ``0`` indicates that the OSD may **NOT** be used as a primary and ``1``
|
||||
indicates that an OSD may be used as a primary. When the weight is between
|
||||
these extremes, it is less likely that CRUSH will select that OSD as a primary.
|
||||
The process for selecting the lead OSD is more nuanced than a simple
|
||||
probability based on relative affinity values, but measurable results can be
|
||||
achieved even with first-order approximations of desirable values.
|
||||
|
||||
Custom CRUSH Rules
|
||||
------------------
|
||||
@ -1052,7 +1120,6 @@ must not contain the same servers::
|
||||
}
|
||||
|
||||
|
||||
|
||||
Note also that on failure of an SSD, requests to a PG will be served temporarily
|
||||
from a (slower) HDD OSD until the PG's data has been replicated onto the replacement
|
||||
primary SSD OSD.
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
.. _devices:
|
||||
|
||||
Device Management
|
||||
@ -11,19 +10,25 @@ provide tools to predict and/or automatically respond to hardware failure.
|
||||
Device tracking
|
||||
---------------
|
||||
|
||||
You can query which storage devices are in use with::
|
||||
You can query which storage devices are in use with:
|
||||
|
||||
ceph device ls
|
||||
.. prompt:: bash $
|
||||
|
||||
You can also list devices by daemon or by host::
|
||||
ceph device ls
|
||||
|
||||
ceph device ls-by-daemon <daemon>
|
||||
ceph device ls-by-host <host>
|
||||
You can also list devices by daemon or by host:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device ls-by-daemon <daemon>
|
||||
ceph device ls-by-host <host>
|
||||
|
||||
For any individual device, you can query information about its
|
||||
location and how it is being consumed with::
|
||||
location and how it is being consumed with:
|
||||
|
||||
ceph device info <devid>
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device info <devid>
|
||||
|
||||
Identifying physical devices
|
||||
----------------------------
|
||||
@ -34,18 +39,22 @@ failed disks easy and less error-prone. Use the following command::
|
||||
device light on|off <devid> [ident|fault] [--force]
|
||||
|
||||
The ``<devid>`` parameter is the device identification. You can obtain this
|
||||
information using the following command::
|
||||
information using the following command:
|
||||
|
||||
ceph device ls
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device ls
|
||||
|
||||
The ``[ident|fault]`` parameter is used to set the kind of light to blink.
|
||||
By default, the `identification` light is used.
|
||||
|
||||
.. note::
|
||||
This command needs the Cephadm or the Rook `orchestrator <https://docs.ceph.com/docs/master/mgr/orchestrator/#orchestrator-cli-module>`_ module enabled.
|
||||
The orchestrator module enabled is shown by executing the following command::
|
||||
The orchestrator module enabled is shown by executing the following command:
|
||||
|
||||
ceph orch status
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph orch status
|
||||
|
||||
The command behind the scene to blink the drive LEDs is `lsmcli`. If you need
|
||||
to customize this command you can configure this via a Jinja2 template::
|
||||
@ -77,40 +86,54 @@ or unrecoverable read errors. Other device types like SAS and NVMe
|
||||
implement a similar set of metrics (via slightly different standards).
|
||||
All of these can be collected by Ceph via the ``smartctl`` tool.
|
||||
|
||||
You can enable or disable health monitoring with::
|
||||
You can enable or disable health monitoring with:
|
||||
|
||||
ceph device monitoring on
|
||||
.. prompt:: bash $
|
||||
|
||||
or::
|
||||
ceph device monitoring on
|
||||
|
||||
ceph device monitoring off
|
||||
or:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device monitoring off
|
||||
|
||||
|
||||
Scraping
|
||||
--------
|
||||
|
||||
If monitoring is enabled, metrics will automatically be scraped at regular intervals. That interval can be configured with::
|
||||
If monitoring is enabled, metrics will automatically be scraped at regular intervals. That interval can be configured with:
|
||||
|
||||
ceph config set mgr mgr/devicehealth/scrape_frequency <seconds>
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config set mgr mgr/devicehealth/scrape_frequency <seconds>
|
||||
|
||||
The default is to scrape once every 24 hours.
|
||||
|
||||
You can manually trigger a scrape of all devices with::
|
||||
You can manually trigger a scrape of all devices with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device scrape-health-metrics
|
||||
ceph device scrape-health-metrics
|
||||
|
||||
A single device can be scraped with::
|
||||
A single device can be scraped with:
|
||||
|
||||
ceph device scrape-health-metrics <device-id>
|
||||
.. prompt:: bash $
|
||||
|
||||
Or a single daemon's devices can be scraped with::
|
||||
ceph device scrape-health-metrics <device-id>
|
||||
|
||||
ceph device scrape-daemon-health-metrics <who>
|
||||
Or a single daemon's devices can be scraped with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device scrape-daemon-health-metrics <who>
|
||||
|
||||
The stored health metrics for a device can be retrieved (optionally
|
||||
for a specific timestamp) with::
|
||||
for a specific timestamp) with:
|
||||
|
||||
ceph device get-health-metrics <devid> [sample-timestamp]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device get-health-metrics <devid> [sample-timestamp]
|
||||
|
||||
Failure prediction
|
||||
------------------
|
||||
@ -121,29 +144,39 @@ health metrics it collects. There are three modes:
|
||||
* *none*: disable device failure prediction.
|
||||
* *local*: use a pre-trained prediction model from the ceph-mgr daemon
|
||||
|
||||
The prediction mode can be configured with::
|
||||
The prediction mode can be configured with:
|
||||
|
||||
ceph config set global device_failure_prediction_mode <mode>
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph config set global device_failure_prediction_mode <mode>
|
||||
|
||||
Prediction normally runs in the background on a periodic basis, so it
|
||||
may take some time before life expectancy values are populated. You
|
||||
can see the life expectancy of all devices in output from::
|
||||
can see the life expectancy of all devices in output from:
|
||||
|
||||
ceph device ls
|
||||
.. prompt:: bash $
|
||||
|
||||
You can also query the metadata for a specific device with::
|
||||
ceph device ls
|
||||
|
||||
ceph device info <devid>
|
||||
You can also query the metadata for a specific device with:
|
||||
|
||||
You can explicitly force prediction of a device's life expectancy with::
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device predict-life-expectancy <devid>
|
||||
ceph device info <devid>
|
||||
|
||||
You can explicitly force prediction of a device's life expectancy with:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device predict-life-expectancy <devid>
|
||||
|
||||
If you are not using Ceph's internal device failure prediction but
|
||||
have some external source of information about device failures, you
|
||||
can inform Ceph of a device's life expectancy with::
|
||||
can inform Ceph of a device's life expectancy with:
|
||||
|
||||
ceph device set-life-expectancy <devid> <from> [<to>]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device set-life-expectancy <devid> <from> [<to>]
|
||||
|
||||
Life expectancies are expressed as a time interval so that
|
||||
uncertainty can be expressed in the form of a wide interval. The
|
||||
@ -156,9 +189,11 @@ The ``mgr/devicehealth/warn_threshold`` controls how soon an expected
|
||||
device failure must be before we generate a health warning.
|
||||
|
||||
The stored life expectancy of all devices can be checked, and any
|
||||
appropriate health alerts generated, with::
|
||||
appropriate health alerts generated, with:
|
||||
|
||||
ceph device check-health
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph device check-health
|
||||
|
||||
Automatic Mitigation
|
||||
--------------------
|
||||
|
@ -38,30 +38,35 @@ to achieve recovery from an OSD failure.
|
||||
Erasure-code profile examples
|
||||
=============================
|
||||
|
||||
An example configuration that can be used to observe reduced bandwidth usage::
|
||||
An example configuration that can be used to observe reduced bandwidth usage:
|
||||
|
||||
$ ceph osd erasure-code-profile set CLAYprofile \
|
||||
plugin=clay \
|
||||
k=4 m=2 d=5 \
|
||||
crush-failure-domain=host
|
||||
$ ceph osd pool create claypool erasure CLAYprofile
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd erasure-code-profile set CLAYprofile \
|
||||
plugin=clay \
|
||||
k=4 m=2 d=5 \
|
||||
crush-failure-domain=host
|
||||
ceph osd pool create claypool erasure CLAYprofile
|
||||
|
||||
|
||||
Creating a clay profile
|
||||
=======================
|
||||
|
||||
To create a new clay code profile::
|
||||
To create a new clay code profile:
|
||||
|
||||
ceph osd erasure-code-profile set {name} \
|
||||
plugin=clay \
|
||||
k={data-chunks} \
|
||||
m={coding-chunks} \
|
||||
[d={helper-chunks}] \
|
||||
[scalar_mds={plugin-name}] \
|
||||
[technique={technique-name}] \
|
||||
[crush-failure-domain={bucket-type}] \
|
||||
[directory={directory}] \
|
||||
[--force]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd erasure-code-profile set {name} \
|
||||
plugin=clay \
|
||||
k={data-chunks} \
|
||||
m={coding-chunks} \
|
||||
[d={helper-chunks}] \
|
||||
[scalar_mds={plugin-name}] \
|
||||
[technique={technique-name}] \
|
||||
[crush-failure-domain={bucket-type}] \
|
||||
[crush-device-class={device-class}] \
|
||||
[directory={directory}] \
|
||||
[--force]
|
||||
|
||||
Where:
|
||||
|
||||
|
@ -9,18 +9,20 @@ library.
|
||||
Create an isa profile
|
||||
=====================
|
||||
|
||||
To create a new *isa* erasure code profile::
|
||||
To create a new *isa* erasure code profile:
|
||||
|
||||
ceph osd erasure-code-profile set {name} \
|
||||
plugin=isa \
|
||||
technique={reed_sol_van|cauchy} \
|
||||
[k={data-chunks}] \
|
||||
[m={coding-chunks}] \
|
||||
[crush-root={root}] \
|
||||
[crush-failure-domain={bucket-type}] \
|
||||
[crush-device-class={device-class}] \
|
||||
[directory={directory}] \
|
||||
[--force]
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd erasure-code-profile set {name} \
|
||||
plugin=isa \
|
||||
technique={reed_sol_van|cauchy} \
|
||||
[k={data-chunks}] \
|
||||
[m={coding-chunks}] \
|
||||
[crush-root={root}] \
|
||||
[crush-failure-domain={bucket-type}] \
|
||||
[crush-device-class={device-class}] \
|
||||
[directory={directory}] \
|
||||
[--force]
|
||||
|
||||
Where:
|
||||
|
||||
|
@ -13,18 +13,20 @@ understanding of the parameters.
|
||||
Create a jerasure profile
|
||||
=========================
|
||||
|
||||
To create a new *jerasure* erasure code profile::
|
||||
To create a new *jerasure* erasure code profile:
|
||||
|
||||
.. prompt:: bash $
|
||||
|
||||
ceph osd erasure-code-profile set {name} \
|
||||
plugin=jerasure \
|
||||
k={data-chunks} \
|
||||
m={coding-chunks} \
|
||||
technique={reed_sol_van|reed_sol_r6_op|cauchy_orig|cauchy_good|liberation|blaum_roth|liber8tion} \
|
||||
[crush-root={root}] \
|
||||
[crush-failure-domain={bucket-type}] \
|
||||
[crush-device-class={device-class}] \
|
||||
[directory={directory}] \
|
||||
[--force]
|
||||
ceph osd erasure-code-profile set {name} \
|
||||
plugin=jerasure \
|
||||
k={data-chunks} \
|
||||
m={coding-chunks} \
|
||||
technique={reed_sol_van|reed_sol_r6_op|cauchy_orig|cauchy_good|liberation|blaum_roth|liber8tion} \
|
||||
[crush-root={root}] \
|
||||
[crush-failure-domain={bucket-type}] \
|
||||
[crush-device-class={device-class}] \
|
||||
[directory={directory}] \
|
||||
[--force]
|
||||
|
||||
Where:
|
||||
|
||||
|