import preliminary last stable release 14.2.22

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Thomas Lamprecht 2021-07-05 19:42:40 +02:00
parent 886a8c9442
commit d500a7f9ff
934 changed files with 181812 additions and 24420 deletions

3
ceph/.gitmodules vendored
View File

@ -58,3 +58,6 @@
[submodule "src/c-ares"]
path = src/c-ares
url = https://github.com/ceph/c-ares.git
[submodule "src/spawn"]
path = src/spawn
url = https://github.com/ceph/spawn.git

View File

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.5.1)
project(ceph CXX C ASM)
set(VERSION 14.2.20)
set(VERSION 14.2.22)
if(POLICY CMP0028)
cmake_policy(SET CMP0028 NEW)
@ -123,6 +123,7 @@ cmake_pop_check_state()
CHECK_FUNCTION_EXISTS(eventfd HAVE_EVENTFD)
CHECK_FUNCTION_EXISTS(getprogname HAVE_GETPROGNAME)
CHECK_FUNCTION_EXISTS(gettid HAVE_GETTID)
CHECK_INCLUDE_FILES("linux/types.h" HAVE_LINUX_TYPES_H)
CHECK_INCLUDE_FILES("linux/version.h" HAVE_LINUX_VERSION_H)

View File

@ -1,6 +1,13 @@
14.2.17
14.2.19
-------
* $pid expansion in config paths like `admin_socket` will now properly expand
to the daemon pid for commands like `ceph-mds` or `ceph-osd`. Previously only
`ceph-fuse`/`rbd-nbd` expanded `$pid` with the actual daemon pid.
* OSD: the option ``osd_fast_shutdown_notify_mon`` has been introduced to allow
the OSD to notify the monitor it is shutting down even if ``osd_fast_shutdown``
is enabled. This helps with the monitor logs on larger clusters, that may get
many 'osd.X reported immediately failed by osd.Y' messages, and confuse tools.
14.2.18
-------
* This release fixes issues loading the dashboard and volumes manager
modules in some environments.

View File

@ -1,7 +1,7 @@
# Contributor: John Coyle <dx9err@gmail.com>
# Maintainer: John Coyle <dx9err@gmail.com>
pkgname=ceph
pkgver=14.2.20
pkgver=14.2.22
pkgrel=0
pkgdesc="Ceph is a distributed object store and file system"
pkgusers="ceph"
@ -64,7 +64,7 @@ makedepends="
xmlstarlet
yasm
"
source="ceph-14.2.20.tar.bz2"
source="ceph-14.2.22.tar.bz2"
subpackages="
$pkgname-base
$pkgname-common
@ -117,7 +117,7 @@ _sysconfdir=/etc
_udevrulesdir=/etc/udev/rules.d
_python_sitelib=/usr/lib/python2.7/site-packages
builddir=$srcdir/ceph-14.2.20
builddir=$srcdir/ceph-14.2.22
build() {
export CEPH_BUILD_VIRTUALENV=$builddir

View File

@ -23,7 +23,7 @@
#################################################################################
%bcond_with make_check
%bcond_without ceph_test_package
%ifarch s390 s390x
%ifarch s390
%bcond_with tcmalloc
%else
%bcond_without tcmalloc
@ -109,7 +109,7 @@
# main package definition
#################################################################################
Name: ceph
Version: 14.2.20
Version: 14.2.22
Release: 0%{?dist}
%if 0%{?fedora} || 0%{?rhel}
Epoch: 2
@ -125,7 +125,7 @@ License: LGPL-2.1 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and
Group: System/Filesystems
%endif
URL: http://ceph.com/
Source0: %{?_remote_tarball_prefix}ceph-14.2.20.tar.bz2
Source0: %{?_remote_tarball_prefix}ceph-14.2.22.tar.bz2
%if 0%{?suse_version}
# _insert_obs_source_lines_here
ExclusiveArch: x86_64 aarch64 ppc64le s390x
@ -293,6 +293,7 @@ BuildRequires: pyOpenSSL%{_python_buildid}
%else
BuildRequires: python%{_python_buildid}-pyOpenSSL
%endif
BuildRequires: golang-github-prometheus
BuildRequires: libtool-ltdl-devel
BuildRequires: python%{_python_buildid}-cherrypy
BuildRequires: python%{_python_buildid}-jwt
@ -306,6 +307,7 @@ BuildRequires: xmlsec1-openssl
BuildRequires: xmlsec1-openssl-devel
%endif
%if 0%{?suse_version}
BuildRequires: golang-github-prometheus-prometheus
BuildRequires: libxmlsec1-1
BuildRequires: libxmlsec1-nss1
BuildRequires: libxmlsec1-openssl1
@ -659,6 +661,9 @@ Requires: librgw2 = %{_epoch_prefix}%{version}-%{release}
%if 0%{?rhel} || 0%{?fedora}
Requires: mailcap
%endif
%if 0%{?weak_deps}
Recommends: gawk
%endif
%description radosgw
RADOS is a distributed object store used by the Ceph distributed
storage system. This package provides a REST gateway to the
@ -1142,7 +1147,7 @@ This package provides Cephs default alerts for Prometheus.
# common
#################################################################################
%prep
%autosetup -p1 -n ceph-14.2.20
%autosetup -p1 -n ceph-14.2.22
%build
# LTO can be enabled as soon as the following GCC bug is fixed:
@ -1870,6 +1875,8 @@ fi
%{_bindir}/radosgw-token
%{_bindir}/radosgw-es
%{_bindir}/radosgw-object-expirer
%{_bindir}/rgw-gap-list
%{_bindir}/rgw-gap-list-comparator
%{_bindir}/rgw-orphan-list
%{_mandir}/man8/radosgw.8*
%dir %{_localstatedir}/lib/ceph/radosgw

View File

@ -23,7 +23,7 @@
#################################################################################
%bcond_with make_check
%bcond_without ceph_test_package
%ifarch s390 s390x
%ifarch s390
%bcond_with tcmalloc
%else
%bcond_without tcmalloc
@ -293,6 +293,7 @@ BuildRequires: pyOpenSSL%{_python_buildid}
%else
BuildRequires: python%{_python_buildid}-pyOpenSSL
%endif
BuildRequires: golang-github-prometheus
BuildRequires: libtool-ltdl-devel
BuildRequires: python%{_python_buildid}-cherrypy
BuildRequires: python%{_python_buildid}-jwt
@ -306,6 +307,7 @@ BuildRequires: xmlsec1-openssl
BuildRequires: xmlsec1-openssl-devel
%endif
%if 0%{?suse_version}
BuildRequires: golang-github-prometheus-prometheus
BuildRequires: libxmlsec1-1
BuildRequires: libxmlsec1-nss1
BuildRequires: libxmlsec1-openssl1
@ -659,6 +661,9 @@ Requires: librgw2 = %{_epoch_prefix}%{version}-%{release}
%if 0%{?rhel} || 0%{?fedora}
Requires: mailcap
%endif
%if 0%{?weak_deps}
Recommends: gawk
%endif
%description radosgw
RADOS is a distributed object store used by the Ceph distributed
storage system. This package provides a REST gateway to the
@ -1870,6 +1875,8 @@ fi
%{_bindir}/radosgw-token
%{_bindir}/radosgw-es
%{_bindir}/radosgw-object-expirer
%{_bindir}/rgw-gap-list
%{_bindir}/rgw-gap-list-comparator
%{_bindir}/rgw-orphan-list
%{_mandir}/man8/radosgw.8*
%dir %{_localstatedir}/lib/ceph/radosgw

View File

@ -1,7 +1,19 @@
ceph (14.2.20-1xenial) xenial; urgency=medium
ceph (14.2.22-1xenial) xenial; urgency=medium
-- Jenkins Build Slave User <jenkins-build@confusa08.front.sepia.ceph.com> Mon, 19 Apr 2021 10:22:46 -0400
-- Jenkins Build Slave User <jenkins-build@braggi10.front.sepia.ceph.com> Tue, 29 Jun 2021 22:18:42 +0000
ceph (14.2.22-1) stable; urgency=medium
* New upstream release
-- Ceph Release Team <ceph-maintainers@ceph.com> Tue, 29 Jun 2021 22:09:07 +0000
ceph (14.2.21-1) stable; urgency=medium
* New upstream release
-- Ceph Release Team <ceph-maintainers@ceph.com> Thu, 13 May 2021 17:23:05 +0000
ceph (14.2.20-1) stable; urgency=medium

View File

@ -230,6 +230,7 @@ macro(build_boost version)
INTERFACE_LINK_LIBRARIES "${dependencies}")
unset(dependencies)
endif()
set(Boost_${c}_FOUND "TRUE")
endforeach()
# for header-only libraries

View File

@ -0,0 +1,22 @@
# libzstd - build it statically
function(build_Zstd)
set(ZSTD_C_FLAGS "-fPIC -Wno-unused-variable -O3")
include(ExternalProject)
ExternalProject_Add(zstd_ext
SOURCE_DIR ${CMAKE_SOURCE_DIR}/src/zstd/build/cmake
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_C_FLAGS=${ZSTD_C_FLAGS}
-DCMAKE_AR=${CMAKE_AR}
-DCMAKE_POSITION_INDEPENDENT_CODE=${ENABLE_SHARED}
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/libzstd
BUILD_COMMAND ${CMAKE_COMMAND} --build <BINARY_DIR> --target libzstd_static
BUILD_BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/libzstd/lib/libzstd.a"
INSTALL_COMMAND "")
add_library(Zstd::Zstd STATIC IMPORTED)
set_target_properties(Zstd::Zstd PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/src/zstd/lib"
IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/libzstd/lib/libzstd.a")
add_dependencies(Zstd::Zstd zstd_ext)
endfunction()

View File

@ -0,0 +1,51 @@
# Try to find liblz4
#
# Once done, this will define
#
# Zstd_FOUND
# Zstd_INCLUDE_DIRS
# Zstd_LIBRARIES
# Zstd_VERSION_STRING
# Zstd_VERSION_MAJOR
# Zstd_VERSION_MINOR
# Zstd_VERSION_RELEASE
find_path(Zstd_INCLUDE_DIR
NAMES zstd.h
HINTS ${Zstd_ROOT_DIR}/include)
if(Zstd_INCLUDE_DIR AND EXISTS "${Zstd_INCLUDE_DIR}/zstd.h")
foreach(ver "MAJOR" "MINOR" "RELEASE")
file(STRINGS "${Zstd_INCLUDE_DIR}/zstd.h" Zstd_VER_${ver}_LINE
REGEX "^#define[ \t]+ZSTD_VERSION_${ver}[ \t]+[0-9]+$")
string(REGEX REPLACE "^#define[ \t]+ZSTD_VERSION_${ver}[ \t]+([0-9]+)$"
"\\1" Zstd_VERSION_${ver} "${Zstd_VER_${ver}_LINE}")
unset(${Zstd_VER_${ver}_LINE})
endforeach()
set(Zstd_VERSION_STRING
"${Zstd_VERSION_MAJOR}.${Zstd_VERSION_MINOR}.${Zstd_VERSION_RELEASE}")
endif()
find_library(Zstd_LIBRARY
NAMES "${CMAKE_STATIC_LIBRARY_PREFIX}zstd.${CMAKE_STATIC_LIBRARY_SUFFIX}" zstd
HINTS ${Zstd_ROOT_DIR}/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Zstd
REQUIRED_VARS Zstd_LIBRARY Zstd_INCLUDE_DIR
VERSION_VAR Zstd_VERSION_STRING)
mark_as_advanced(
Zstd_LIBRARY
Zstd_INCLUDE_DIR)
if(Zstd_FOUND AND NOT (TARGET Zstd::Zstd))
set(Zstd_INCLUDE_DIRS ${Zstd_INCLUDE_DIR})
set(Zstd_LIBRARIES ${Zstd_LIBRARY})
add_library (Zstd::Zstd UNKNOWN IMPORTED)
set_target_properties(Zstd::Zstd PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES ${Zstd_INCLUDE_DIR}
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION ${Zstd_LIBRARY}
VERSION "${Zstd_VERSION_STRING}")
endif()

View File

@ -78,7 +78,9 @@ case "$1" in
fi
if ! dpkg-statoverride --list /var/log/ceph >/dev/null
then
chown -R $SERVER_USER:$SERVER_GROUP /var/log/ceph
# take care not to touch cephadm log subdirs
chown $SERVER_USER:$SERVER_GROUP /var/log/ceph
chown $SERVER_USER:$SERVER_GROUP /var/log/ceph/*.log* || true
# members of group ceph can log here, but cannot remove
# others' files. non-members cannot read any logs.
chmod u=rwx,g=rwxs,o=t /var/log/ceph

View File

@ -51,30 +51,32 @@ Build-Depends: cmake (>= 3.5),
libxml2-dev,
librabbitmq-dev,
librdkafka-dev,
# Make-Check libxmlsec1
# Make-Check libxmlsec1-nss
# Make-Check libxmlsec1-openssl
# Make-Check libxmlsec1-dev
# Make-Check libzstd-dev,
# Make-Check libxmlsec1,
# Make-Check libxmlsec1-nss,
# Make-Check libxmlsec1-openssl,
# Make-Check libxmlsec1-dev,
lsb-release,
parted,
patch,
pkg-config,
python (>= 2.7),
python-all-dev,
python-cherrypy3,
# Make-Check python-jwt,
# Make-Check python-nose,
# Make-Check python-pecan,
# Make-Check python-bcrypt,
# Make-Check python-six,
# Make-Check python-tox,
# Make-Check python-coverage,
# Make-Check python-openssl,
# Make-Check python-prettytable,
# Make-Check python-requests,
python-cherrypy3 | python3-cherrypy3,
# Make-Check prometheus,
# Make-Check python3-jwt,
# Make-Check python3-nose,
# Make-Check python3-pecan,
# Make-Check python3-bcrypt,
# Make-Check python3-six,
# Make-Check tox,
# Make-Check python3-coverage,
# Make-Check python3-openssl,
# Make-Check python3-prettytable,
# Make-Check python3-requests,
python-setuptools,
python-sphinx,
# Make-Check python-werkzeug,
python3-sphinx,
# Make-Check python3-werkzeug,
python3-all-dev,
python3-setuptools,
# Make-Check socat,
@ -827,6 +829,7 @@ Depends: ceph-common (= ${binary:Version}),
mime-support,
${misc:Depends},
${shlibs:Depends},
Suggests: gawk,
Recommends: ntp | time-daemon,
Description: REST gateway for RADOS distributed object store
RADOS is a distributed object store used by the Ceph distributed

View File

@ -4,6 +4,8 @@ usr/bin/radosgw
usr/bin/radosgw-es
usr/bin/radosgw-object-expirer
usr/bin/radosgw-token
usr/bin/rgw-gap-list
usr/bin/rgw-gap-list-comparator
usr/bin/rgw-orphan-list
usr/share/man/man8/ceph-diff-sorted.8
usr/share/man/man8/radosgw.8

View File

@ -6,7 +6,7 @@ if test -e build; then
exit 1
fi
PYBUILD="2"
PYBUILD="3"
if [ -r /etc/os-release ]; then
source /etc/os-release
case "$ID" in

View File

@ -15,7 +15,7 @@ Synopsis
| **ceph-volume** **inventory**
| **ceph-volume** **lvm** [ *trigger* | *create* | *activate* | *prepare*
| *zap* | *list* | *batch*]
| *zap* | *list* | *batch* | *new-wal* | *new-db* | *migrate* ]
| **ceph-volume** **simple** [ *trigger* | *scan* | *activate* ]
@ -243,6 +243,71 @@ Positional arguments:
``/path/to/sda1`` or ``/path/to/sda`` for regular devices.
**new-wal**
Attaches the given logical volume to OSD as a WAL. Logical volume
name format is vg/lv. Fails if OSD has already got attached WAL.
Usage::
ceph-volume lvm new-wal --osd-id OSD_ID --osd-fsid OSD_FSID --target TARGET_LV
Optional arguments:
* [-h, --help] show the help message and exit
Required arguments:
* --osd-id OSD_ID OSD id to attach new WAL to
* --osd-fsid OSD_FSID OSD fsid to attach new WAL to
* --target TARGET_LV logical volume name to attach as WAL
**new-db**
Attaches the given logical volume to OSD as a DB. Logical volume
name format is vg/lv. Fails if OSD has already got attached DB.
Usage::
ceph-volume lvm new-db --osd-id OSD_ID --osd-fsid OSD_FSID --target <target lv>
Optional arguments:
* [-h, --help] show the help message and exit
Required arguments:
* --osd-id OSD_ID OSD id to attach new DB to
* --osd-fsid OSD_FSID OSD fsid to attach new DB to
* --target TARGET_LV logical volume name to attach as DB
**migrate**
Moves BlueFS data from source volume(s) to the target one, source volumes
(except the main, i.e. data or block one) are removed on success. LVM volumes
are permitted for Target only, both already attached or new one. In the latter
case it is attached to the OSD replacing one of the source devices. Following
replacement rules apply (in the order of precedence, stop on the first match):
- if source list has DB volume - target device replaces it.
- if source list has WAL volume - target device replace it.
- if source list has slow volume only - operation is not permitted,
requires explicit allocation via new-db/new-wal command.
Usage::
ceph-volume lvm migrate --osd-id OSD_ID --osd-fsid OSD_FSID --target TARGET_LV --from {data|db|wal} [{data|db|wal} ...]
Optional arguments:
* [-h, --help] show the help message and exit
Required arguments:
* --osd-id OSD_ID OSD id to perform migration at
* --osd-fsid OSD_FSID OSD fsid to perform migration at
* --target TARGET_LV logical volume to move data to
* --from TYPE_LIST list of source device type names, e.g. --from db wal
simple
------

View File

@ -904,11 +904,16 @@ data should remain readable and writeable, although data redundancy
may be reduced as some PGs may end up in a degraded (but active)
state. It will return a success code if it is okay to stop the
OSD(s), or an error code and informative message if it is not or if no
conclusion can be drawn at the current time.
conclusion can be drawn at the current time. When ``--max <num>`` is
provided, up to <num> OSDs IDs will return (including the provided
OSDs) that can all be stopped simultaneously. This allows larger sets
of stoppable OSDs to be generated easily by providing a single
starting OSD and a max. Additional OSDs are drawn from adjacent locations
in the CRUSH hierarchy.
Usage::
ceph osd ok-to-stop <id> [<ids>...]
ceph osd ok-to-stop <id> [<ids>...] [--max <num>]
Subcommand ``pause`` pauses osd.

View File

@ -791,6 +791,14 @@ Trimming requires that the placement groups are ``active + clean``.
:Default: 500
``paxos service trim max multiplier``
:Description: The factor by which paxos service trim max will be multiplied
to get a new upper bound when trim sizes are high (0 disables it)
:Type: Integer
:Default: ``20``
``mon max log epochs``
:Description: The maximum amount of log epochs to trim during a single proposal

View File

@ -88,7 +88,7 @@ Similarly, two options control whether IPv4 and IPv6 addresses are used:
* ``ms_bind_ipv6`` [default: false] controls whether a daemon binds
to an IPv6 address
.. note: The ability to bind to multiple ports has paved the way for
.. note:: The ability to bind to multiple ports has paved the way for
dual-stack IPv4 and IPv6 support. That said, dual-stack support is
not yet tested as of Nautilus v14.2.0 and likely needs some
additional code changes to work correctly.

View File

@ -201,6 +201,27 @@ following option to the ``[global]`` section of your Ceph configuration file.
We prefer that the cluster network is **NOT** reachable from the public network
or the Internet for added security.
IPv4/IPv6 Dual Stack Mode
-------------------------
If you want to run in an IPv4/IPv6 dual stack mode and want to define your public and/or
cluster networks, then you need to specify both your IPv4 and IPv6 networks for each:
.. code-block:: ini
[global]
# ... elided configuration
public network = {IPv4 public-network/netmask}, {IPv6 public-network/netmask}
This is so ceph can find a valid IP address for both address families.
If you want just an IPv4 or an IPv6 stack environment, then make sure you set the `ms bind`
options correctly.
.. note::
Binding to IPv4 is enabled by default, so if you just add the option to bind to IPv6
you'll actually put yourself into dual stack mode. If you want just IPv6, then disable IPv4 and
enable IPv6. See `Bind`_ below.
Ceph Daemons
============
@ -336,11 +357,16 @@ addresses.
:Default: ``7300``
:Required: No.
``ms bind ipv4``
:Description: Enables Ceph daemons to bind to IPv4 addresses.
:Type: Boolean
:Default: ``true``
:Required: No
``ms bind ipv6``
:Description: Enables Ceph daemons to bind to IPv6 addresses. Currently the
messenger *either* uses IPv4 or IPv6, but it cannot do both.
:Description: Enables Ceph daemons to bind to IPv6 addresses.
:Type: Boolean
:Default: ``false``
:Required: No

View File

@ -147,13 +147,21 @@ function install_pkg_on_ubuntu {
}
function install_boost_on_ubuntu {
local codename=$1
if dpkg -s ceph-libboost1.67-dev &> /dev/null; then
$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove 'ceph-libboost.*1.67.*'
$SUDO rm /etc/apt/sources.list.d/ceph-libboost1.67.list
fi
local project=libboost
local ver=1.72
local installed_ver=$(apt -qq list --installed ceph-libboost*-dev 2>/dev/null |
grep -e 'libboost[0-9].[0-9]\+-dev' |
cut -d' ' -f2 |
cut -d'.' -f1,2)
if test -n "$installed_ver"; then
if echo "$installed_ver" | grep -q "^$ver"; then
return
else
$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove "ceph-libboost.*${installed_ver}.*"
$SUDO rm -f /etc/apt/sources.list.d/ceph-libboost${installed_ver}.list
fi
fi
local codename=$1
local project=libboost
local sha1=1d7c7a00cc3f37e340bae0360191a757b44ec80c
install_pkg_on_ubuntu \
$project \

View File

@ -1,7 +1,21 @@
#!/bin/sh -e
SCRIPTNAME="$(basename "${0}")"
BASEDIR="$(readlink -f "$(dirname "${0}")")"
if [ ! -d .git ]; then
echo "no .git present. run this from the base dir of the git checkout."
echo "$SCRIPTNAME: Full path to the script: $BASEDIR/$SCRIPTNAME"
echo "$SCRIPTNAME: No .git present. Run this from the base dir of the git checkout."
exit 1
fi
# Running the script from a directory containing a colon anywhere in the path
# will expose us to the dreaded "[BUG] npm run [command] failed if the directory
# path contains colon" bug https://github.com/npm/cli/issues/633
# (see https://tracker.ceph.com/issues/39556 for details)
if [[ "$BASEDIR" == *:* ]] ; then
echo "$SCRIPTNAME: Full path to the script: $BASEDIR/$SCRIPTNAME"
echo "$SCRIPTNAME: The path to the script contains a colon. Their presence has been known to break the script."
exit 1
fi
@ -67,7 +81,7 @@ build_dashboard_frontend() {
$CURR_DIR/src/tools/setup-virtualenv.sh $TEMP_DIR
$TEMP_DIR/bin/pip install nodeenv
$TEMP_DIR/bin/nodeenv -p --node=10.13.0
$TEMP_DIR/bin/nodeenv --verbose -p --node=10.13.0
cd src/pybind/mgr/dashboard/frontend
DEFAULT_LANG=`jq -r .config.locale package.json`

View File

@ -37,7 +37,7 @@
"gnetId": null,
"graphTooltip": 0,
"id": null,
"iteration": 1557386759572,
"iteration": 1615564911000,
"links": [],
"panels": [
{
@ -182,7 +182,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts).*\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts).*\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts).*\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts).*\"}[1m]))\n) * 100",
"expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]))\n) * 100",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{mode}}",
@ -283,14 +283,14 @@
"steppedLine": false,
"targets": [
{
"expr": "(node_memory_MemTotal{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]].*\"})- (\n (node_memory_MemFree{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]].*\"}) + \n (node_memory_Cached{instance=~\"[[ceph_hosts]].*\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]].*\"}) + \n (node_memory_Buffers{instance=~\"[[ceph_hosts]].*\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]].*\"}) +\n (node_memory_Slab{instance=~\"[[ceph_hosts]].*\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]].*\"})\n )\n \n",
"expr": "(node_memory_MemTotal{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"})- (\n (node_memory_MemFree{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) + \n (node_memory_Cached{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) + \n (node_memory_Buffers{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) +\n (node_memory_Slab{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"})\n )\n \n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "used",
"refId": "D"
},
{
"expr": "node_memory_MemFree{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]].*\"} ",
"expr": "node_memory_MemFree{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} ",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
@ -298,7 +298,7 @@
"refId": "A"
},
{
"expr": "(node_memory_Cached{instance=~\"[[ceph_hosts]].*\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]].*\"}) + \n(node_memory_Buffers{instance=~\"[[ceph_hosts]].*\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]].*\"}) +\n(node_memory_Slab{instance=~\"[[ceph_hosts]].*\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]].*\"}) \n",
"expr": "(node_memory_Cached{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) + \n(node_memory_Buffers{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) +\n(node_memory_Slab{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}) \n",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
@ -306,7 +306,7 @@
"refId": "C"
},
{
"expr": "node_memory_MemTotal{instance=~\"[[ceph_hosts]].*\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]].*\"} ",
"expr": "node_memory_MemTotal{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"} ",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
@ -401,7 +401,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m])\n)",
"expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}.rx",
@ -410,7 +410,7 @@
"textEditor": true
},
{
"expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts).*\",device!=\"lo\"}[1m])\n)",
"expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}.tx",
@ -501,7 +501,7 @@
"steppedLine": false,
"targets": [
{
"expr": "irate(node_network_receive_drop{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"[[ceph_hosts]].*\"}[1m])",
"expr": "irate(node_network_receive_drop{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
@ -509,7 +509,7 @@
"refId": "A"
},
{
"expr": "irate(node_network_transmit_drop{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"[[ceph_hosts]].*\"}[1m])",
"expr": "irate(node_network_transmit_drop{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}.tx",
@ -621,7 +621,7 @@
"tableColumn": "",
"targets": [
{
"expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts).*\"})",
"expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"})",
"format": "time_series",
"intervalFactor": 2,
"refId": "A",
@ -685,7 +685,7 @@
"steppedLine": false,
"targets": [
{
"expr": "irate(node_network_receive_errs{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"[[ceph_hosts]].*\"}[1m])",
"expr": "irate(node_network_receive_errs{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
@ -693,7 +693,7 @@
"refId": "A"
},
{
"expr": "irate(node_network_transmit_errs{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"[[ceph_hosts]].*\"}[1m])",
"expr": "irate(node_network_transmit_errs{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"[[ceph_hosts]]([\\\\.:].*)?\"}[1m])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}.tx",
@ -798,7 +798,7 @@
"steppedLine": false,
"targets": [
{
"expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts).*\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts).*\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
"expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) writes",
@ -807,7 +807,7 @@
"textEditor": true
},
{
"expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts).*\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
"expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device, ceph_daemon) group_left\n label_replace(\n label_replace(\n ceph_disk_occupation,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
@ -899,14 +899,14 @@
"steppedLine": false,
"targets": [
{
"expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) write",
"refId": "B"
},
{
"expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) read",
@ -992,7 +992,7 @@
"steppedLine": false,
"targets": [
{
"expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance,device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
@ -1083,7 +1083,7 @@
"steppedLine": false,
"targets": [
{
"expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts).*\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts).*\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device, ceph_daemon) group_left label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
"format": "time_series",
"hide": false,
"intervalFactor": 1,

View File

@ -131,7 +131,6 @@
"#d44a3a"
],
"datasource": "$datasource",
"decimals": 0,
"description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster",
"decimals": 2,
"format": "percentunit",
@ -215,7 +214,6 @@
"#d44a3a"
],
"datasource": "$datasource",
"decimals": 0,
"description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)",
"decimals": 2,
"format": "percentunit",
@ -433,7 +431,7 @@
"tableColumn": "",
"targets": [
{
"expr" : "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)",
"expr" : "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device, ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,

View File

@ -0,0 +1,440 @@
{
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "5.0.0"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"iteration": 1534386107523,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 0
},
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum[30s]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Replication (throughput) from Source Zone",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"unit": "bytes",
"format": "Bps",
"decimals": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"h": 7,
"w": 7.4,
"x": 8.3,
"y": 0
},
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count[30s]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Replication (objects) from Source Zone",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"decimals": null,
"label": "Objects/s",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 0
},
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum[30s]) * 1000)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Polling Request Latency from Source Zone",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"unit": "s",
"format": "ms",
"decimals": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 7
},
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors[30s]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Unsuccessful Object Replications from Source Zone",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"decimals": null,
"label": "Count/s",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"refresh": "15s",
"schemaVersion": 16,
"style": "dark",
"tags": [
"overview"
],
"templating": {
"list": [
{
"allValue": null,
"current": {},
"datasource": "$datasource",
"hide": 2,
"includeAll": true,
"label": null,
"multi": false,
"name": "rgw_servers",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"tags": [],
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data Source",
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"15s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "RGW Sync Overview",
"uid": "rgw-sync-overview",
"version": 2
}

View File

@ -0,0 +1,769 @@
rule_files:
- ceph_default_alerts.yml
evaluation_interval: 5m
tests:
# health error
- interval: 5m
input_series:
- series: 'ceph_health_status{instance="ceph:9283",job="ceph"}'
values: '2 2 2 2 2 2 2'
promql_expr_test:
- expr: ceph_health_status == 2
eval_time: 5m
exp_samples:
- labels: 'ceph_health_status{instance="ceph:9283",job="ceph"}'
value: 2
alert_rule_test:
- eval_time: 1m
alertname: health error
- eval_time: 6m
alertname: health error
exp_alerts:
- exp_labels:
instance: ceph:9283
job: ceph
type: ceph_default
severity: critical
exp_annotations:
description: >
Ceph in HEALTH_ERROR state for more than 5 minutes.
Please check "ceph health detail" for more information.
# health warning
- interval: 5m
input_series:
- series: 'ceph_health_status{instance="ceph:9283",job="ceph"}'
values: '1 1 1 1 1 1 1 1 1 1'
promql_expr_test:
- expr: ceph_health_status == 1
eval_time: 15m
exp_samples:
- labels: 'ceph_health_status{instance="ceph:9283",job="ceph"}'
value: 1
alert_rule_test:
- eval_time: 10m
alertname: health warn
- eval_time: 20m
alertname: health warn
exp_alerts:
- exp_labels:
instance: ceph:9283
job: ceph
type: ceph_default
severity: warning
exp_annotations:
description: >
Ceph has been in HEALTH_WARN for more than 15 minutes.
Please check "ceph health detail" for more information.
# low monitor quorum count
- interval: 1m
input_series:
- series: 'ceph_mon_quorum_status{ceph_daemon="mon.a",instance="ceph:9283",
job="ceph"}'
values: '1 1 1 1 1'
- series: 'ceph_mon_quorum_status{ceph_daemon="mon.b",instance="ceph:9283",
job="ceph"}'
values: '1 1 1 1 1'
- series: 'ceph_mon_quorum_status{ceph_daemon="mon.c",instance="ceph:9283",
job="ceph"}'
values: '0 0 0 0 0'
- series: 'ceph_mon_metadata{ceph_daemon="mon.a",ceph_version="ceph version
17.0.0-189-g3558fd72 (3558fd7291855971aa6481a2ade468ad61fbb346) pacific
(dev)",hostname="ceph",instance="ceph:9283",job="ceph",
public_addr="172.20.0.2",rank="0"}'
values: '1 1 1 1 1'
- series: 'ceph_mon_metadata{ceph_daemon="mon.b",ceph_version="ceph version
17.0.0-189-g3558fd72 (3558fd7291855971aa6481a2ade468ad61fbb346) pacific
(dev)",hostname="ceph",instance="ceph:9283",job="ceph",
public_addr="172.20.0.2",rank="1"}'
values: '1 1 1 1 1'
- series: 'ceph_mon_metadata{ceph_daemon="mon.c",ceph_version="ceph version
17.0.0-189-g3558fd72 (3558fd7291855971aa6481a2ade468ad61fbb346) pacific
(dev)",hostname="ceph",instance="ceph:9283",job="ceph",
public_addr="172.20.0.2",rank="2"}'
values: '1 1 1 1 1'
promql_expr_test:
- expr: sum(ceph_mon_quorum_status) < 3
eval_time: 1m
exp_samples:
- labels: '{}'
value: 2
alert_rule_test:
- eval_time: 1m
alertname: low monitor quorum count
exp_alerts:
- exp_labels:
type: ceph_default
severity: critical
exp_annotations:
description: |
Monitor count in quorum is below three.
Only 2 of 3 monitors are active.
The following monitors are down:
- mon.c on ceph
# 10% OSDs down
- interval: 1m
input_series:
- series: 'ceph_osd_up{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"}'
values: '1 1 1 1 1'
- series: 'ceph_osd_up{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"}'
values: '0 0 0 0 0'
- series: 'ceph_osd_up{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"}'
values: '1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.0",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.1",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.2",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1'
promql_expr_test:
- expr: count(ceph_osd_up == 0) / count(ceph_osd_up) * 100 >= 10
eval_time: 1m
exp_samples:
- labels: '{}'
value: 3.333333333333333E+01
alert_rule_test:
- eval_time: 1m
alertname: 10% OSDs down
exp_alerts:
- exp_labels:
type: ceph_default
severity: critical
exp_annotations:
description: |
33.33% or 1 of 3 OSDs are down (≥ 10%).
The following OSDs are down:
- osd.1 on ceph
# OSD down
- interval: 1m
input_series:
- series: 'ceph_osd_up{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_osd_up{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"}'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'ceph_osd_up{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.0",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.1",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.2",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
promql_expr_test:
- expr: count(ceph_osd_up == 0) > 0
eval_time: 1m
exp_samples:
- labels: '{}'
value: 1
alert_rule_test:
- eval_time: 15m
alertname: OSD down
exp_alerts:
- exp_labels:
type: ceph_default
severity: warning
exp_annotations:
description: |
1 OSD down for more than 15 minutes.
1 of 3 OSDs are down.
The following OSD is down:
- osd.1 on ceph
# OSDs near full
- interval: 1m
input_series:
- series: 'ceph_osd_stat_bytes_used{ceph_daemon="osd.0",instance="ceph:9283"
,job="ceph"}'
values: '1076310016 1076310016 1076310016 1076310016 1076310016
1076310016'
- series: 'ceph_osd_stat_bytes_used{ceph_daemon="osd.1",instance="ceph:9283"
,job="ceph"}'
values: '1076310016 1076310016 1076310016 1076310016 1076310016
1076310016'
- series: 'ceph_osd_stat_bytes_used{ceph_daemon="osd.2",instance="ceph:9283"
,job="ceph"}'
values: '1076310016 1076310016 1076310016 1076310016 1076310016
100856561909.76'
- series: 'ceph_osd_stat_bytes{ceph_daemon="osd.0",instance="ceph:9283"
,job="ceph"}'
values: '108447916032 108447916032 108447916032 108447916032 108447916032
108447916032'
- series: 'ceph_osd_stat_bytes{ceph_daemon="osd.1",instance="ceph:9283"
,job="ceph"}'
values: '108447916032 108447916032 108447916032 108447916032 108447916032
108447916032'
- series: 'ceph_osd_stat_bytes{ceph_daemon="osd.2",instance="ceph:9283"
,job="ceph"}'
values: '108447916032 108447916032 108447916032 108447916032 108447916032
108447916032'
- series: 'ceph_osd_up{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"}'
values: '1 1 1 1 1 1'
- series: 'ceph_osd_up{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"}'
values: '1 1 1 1 1 1'
- series: 'ceph_osd_up{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"}'
values: '1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.0",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.1",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.2",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1'
promql_expr_test:
- expr: |
(
((ceph_osd_stat_bytes_used / ceph_osd_stat_bytes) and on(ceph_daemon)
ceph_osd_up == 1) * on(ceph_daemon) group_left(hostname)
ceph_osd_metadata
) * 100 > 90
eval_time: 5m
exp_samples:
- labels: '{ceph_daemon="osd.2",hostname="ceph",instance="ceph:9283",
job="ceph"}'
value: 9.3E+01
alert_rule_test:
- eval_time: 10m
alertname: OSDs near full
exp_alerts:
- exp_labels:
ceph_daemon: osd.2
hostname: ceph
instance: ceph:9283
job: ceph
type: ceph_default
severity: critical
exp_annotations:
description: >
OSD osd.2 on ceph is dangerously full: 93%
# flapping OSD
- interval: 1s
input_series:
- series: 'ceph_osd_up{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"}'
values: '1+1x100'
- series: 'ceph_osd_up{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"}'
values: '1+0x100'
- series: 'ceph_osd_up{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"}'
values: '1+0x100'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.0",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.1",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.2",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1'
promql_expr_test:
- expr: |
(
rate(ceph_osd_up[5m])
* on(ceph_daemon) group_left(hostname) ceph_osd_metadata
) * 60 > 1
eval_time: 1m
exp_samples:
- labels: '{ceph_daemon="osd.0", hostname="ceph", instance="ceph:9283",
job="ceph"}'
value: 1.2200000000000001E+01
alert_rule_test:
- eval_time: 5m
alertname: flapping OSD
exp_alerts:
- exp_labels:
ceph_daemon: osd.0
hostname: ceph
instance: ceph:9283
job: ceph
severity: warning
type: ceph_default
exp_annotations:
description: >
OSD osd.0 on ceph was
marked down and back up at 20.1 times once a
minute for 5 minutes.
# high pg count deviation
- interval: 1m
input_series:
- series: 'ceph_osd_numpg{ceph_daemon="osd.0",instance="ceph:9283",
job="ceph"}'
values: '100 100 100 100 100 160'
- series: 'ceph_osd_numpg{ceph_daemon="osd.1",instance="ceph:9283",
job="ceph"}'
values: '100 100 100 100 100 320'
- series: 'ceph_osd_numpg{ceph_daemon="osd.2",instance="ceph:9283",
job="ceph"}'
values: '100 100 100 100 100 160'
- series: 'ceph_osd_numpg{ceph_daemon="osd.3",instance="ceph:9283",
job="ceph"}'
values: '100 100 100 100 100 160'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.0",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.1",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.2",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1'
- series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.3",
ceph_version="ceph version 17.0.0-189-g3558fd72
(3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
public_addr="172.20.0.2"}'
values: '1 1 1 1 1 1'
promql_expr_test:
- expr: |
abs(
(
(ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0)
by (job)
) / on (job) group_left avg(ceph_osd_numpg > 0) by (job)
) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30
eval_time: 5m
exp_samples:
- labels: '{ceph_daemon="osd.1", hostname="ceph", instance="ceph:9283",
job="ceph"}'
value: 6E-01
alert_rule_test:
- eval_time: 10m
alertname: high pg count deviation
exp_alerts:
- exp_labels:
ceph_daemon: osd.1
hostname: ceph
instance: ceph:9283
job: ceph
severity: warning
type: ceph_default
exp_annotations:
description: >
OSD osd.1 on ceph deviates
by more than 30% from average PG count.
# pgs inactive
- interval: 1m
input_series:
- series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
name="device_health_metrics",pool_id="1"}'
values: '1 1 1 1 1 1 1 1'
- series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
name="device_health_metrics",pool_id="2"}'
values: '1 1 1 1 1 1 1 1'
- series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
name="device_health_metrics",pool_id="3"}'
values: '1 1 1 1 1 1 1 1'
- series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="1"}'
values: '1 1 1 1 1 1 1 1'
- series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="2"}'
values: '32 32 32 32 32 32 32 32'
- series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="3"}'
values: '33 32 32 32 32 33 33 32'
- series: 'ceph_pg_active{instance="ceph:9283",job="ceph",pool_id="1"}'
values: '1 1 1 1 1 1 1 1 1'
- series: 'ceph_pg_active{instance="ceph:9283",job="ceph",pool_id="2"}'
values: '32 32 32 32 32 32 32 32'
- series: 'ceph_pg_active{instance="ceph:9283",job="ceph",pool_id="3"}'
values: '32 32 32 32 32 32 32 32'
promql_expr_test:
- expr: ceph_pool_metadata * on(pool_id,instance) group_left()
(ceph_pg_total - ceph_pg_active) > 0
eval_time: 5m
exp_samples:
- labels: '{instance="ceph:9283", job="ceph",
name="device_health_metrics",
pool_id="3"}'
value: 1
alert_rule_test:
- eval_time: 5m
alertname: pgs inactive
exp_alerts:
- exp_labels:
instance: ceph:9283
job: ceph
name: device_health_metrics
pool_id: 3
severity: critical
type: ceph_default
exp_annotations:
description: >
1 PGs have been inactive for more than 5 minutes in pool
device_health_metrics.
Inactive placement groups aren't able to serve read/write
requests.
#pgs unclean
- interval: 1m
input_series:
- series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
name="device_health_metrics",pool_id="1"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
name="device_health_metrics",pool_id="2"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
name="device_health_metrics",pool_id="3"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="1"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="2"}'
values: '32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32
32 32 32'
- series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="3"}'
values: '33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33
33 33'
- series: 'ceph_pg_clean{instance="ceph:9283",job="ceph",pool_id="1"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_pg_clean{instance="ceph:9283",job="ceph",pool_id="2"}'
values: '32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32
32 32'
- series: 'ceph_pg_clean{instance="ceph:9283",job="ceph",pool_id="3"}'
values: '32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32
32 32'
promql_expr_test:
- expr: ceph_pool_metadata * on(pool_id,instance) group_left()
(ceph_pg_total - ceph_pg_clean) > 0
eval_time: 15m
exp_samples:
- labels: '{instance="ceph:9283", job="ceph",
name="device_health_metrics", pool_id="3"}'
value: 1
alert_rule_test:
- eval_time: 16m
alertname: pgs unclean
exp_alerts:
- exp_labels:
instance: ceph:9283
job: ceph
name: device_health_metrics
pool_id: 3
severity: warning
type: ceph_default
exp_annotations:
description: >
1 PGs haven't been clean for more than 15 minutes in pool
device_health_metrics.
Unclean PGs haven't been able to completely recover from a
previous failure.
# root volume full
- interval: 1m
input_series:
- series: 'node_filesystem_avail_bytes{device="/dev/mapper/fedora_localhost
--live-home",fstype="ext4",instance="node-exporter",job="node-exporter",
mountpoint="/"}'
values: '35336400896 35336400896 35336400896 35336400896 35336400896
3525385519.104 3533640089'
- series: 'node_filesystem_size_bytes{device="/dev/mapper/fedora_localhost
--live-home",fstype="ext4",instance="node-exporter",job="node-exporter",
mountpoint="/"}'
values: '73445531648 73445531648 73445531648 73445531648 73445531648
73445531648 73445531648'
promql_expr_test:
- expr: node_filesystem_avail_bytes{mountpoint="/"} /
node_filesystem_size_bytes{mountpoint="/"} * 100 < 5
eval_time: 5m
exp_samples:
- labels: '{device="/dev/mapper/fedora_localhost --live-home",
fstype="ext4", instance="node-exporter", job="node-exporter",
mountpoint="/"}'
value: 4.8E+00
alert_rule_test:
- eval_time: 10m
alertname: root volume full
exp_alerts:
- exp_labels:
device: /dev/mapper/fedora_localhost --live-home
fstype: ext4
instance: node-exporter
job: node-exporter
mountpoint: /
severity: critical
type: ceph_default
exp_annotations:
description: >
Root volume (OSD and MON store) is dangerously full: 4.811% free.
# network packets dropped
- interval: 1s
input_series:
- series: 'node_network_receive_drop_total{device="eth0",
instance="node-exporter",job="node-exporter"}'
values: '1+1x500'
- series: 'node_network_transmit_drop_total{device="eth0",
instance="node-exporter",job="node-exporter"}'
values: '1+1x500'
promql_expr_test:
- expr: |
(
increase(node_network_receive_drop_total{device!="lo"}[1m]) +
increase(node_network_transmit_drop_total{device!="lo"}[1m])
) / (
increase(node_network_receive_packets_total{device!="lo"}[1m]) +
increase(node_network_transmit_packets_total{device!="lo"}[1m])
) >= 0.0001 or (
increase(node_network_receive_drop_total{device!="lo"}[1m]) +
increase(node_network_transmit_drop_total{device!="lo"}[1m])
) >= 10
eval_time: 5m
exp_samples:
- labels: '{device="eth0", instance="node-exporter",
job="node-exporter"}'
value: 1.2E+02
alert_rule_test:
- eval_time: 5m
alertname: network packets dropped
exp_alerts:
- exp_labels:
device: eth0
instance: node-exporter
job: node-exporter
severity: warning
type: ceph_default
exp_annotations:
description: >
Node node-exporter experiences packet drop > 0.01% or >
10 packets/s on interface eth0.
# network packets errors
- interval: 1s
input_series:
- series: 'node_network_receive_errs_total{device="eth0",
instance="node-exporter",job="node-exporter"}'
values: '1+1x500'
- series: 'node_network_transmit_errs_total{device="eth0",
instance="node-exporter",job="node-exporter"}'
values: '1+1x500'
promql_expr_test:
- expr: |
(
increase(node_network_receive_errs_total{device!="lo"}[1m]) +
increase(node_network_transmit_errs_total{device!="lo"}[1m])
) / (
increase(node_network_receive_packets_total{device!="lo"}[1m]) +
increase(node_network_transmit_packets_total{device!="lo"}[1m])
) >= 0.0001 or (
increase(node_network_receive_errs_total{device!="lo"}[1m]) +
increase(node_network_transmit_errs_total{device!="lo"}[1m])
) >= 10
eval_time: 5m
exp_samples:
- labels: '{device="eth0", instance="node-exporter",
job="node-exporter"}'
value: 1.2E+02
alert_rule_test:
- eval_time: 5m
alertname: network packet errors
exp_alerts:
- exp_labels:
device: eth0
instance: node-exporter
job: node-exporter
severity: warning
type: ceph_default
exp_annotations:
description: >
Node node-exporter experiences packet errors > 0.01% or > 10
packets/s on interface eth0.
# MTU Mismatch
- interval: 1m
input_series:
- series: 'node_network_mtu_bytes{device="eth0",instance="node-exporter",
job="node-exporter"}'
values: '1500 1500 1500 1500 1500'
- series: 'node_network_mtu_bytes{device="eth1",instance="node-exporter",
job="node-exporter"}'
values: '1500 1500 1500 1500 1500'
- series: 'node_network_mtu_bytes{device="eth2",instance="node-exporter",
job="node-exporter"}'
values: '1500 1500 1500 1500 1500'
- series: 'node_network_mtu_bytes{device="eth3",instance="node-exporter",
job="node-exporter"}'
values: '1500 1500 1500 1500 1500'
- series: 'node_network_mtu_bytes{device="eth4",instance="node-exporter",
job="node-exporter"}'
values: '9000 9000 9000 9000 9000'
promql_expr_test:
- expr: node_network_mtu_bytes{device!="lo"} != on() group_left()
(quantile(0.5, node_network_mtu_bytes{device!="lo"}))
eval_time: 1m
exp_samples:
- labels: '{__name__="node_network_mtu_bytes", device="eth4",
instance="node-exporter", job="node-exporter"}'
value: 9000
alert_rule_test:
- eval_time: 1m
alertname: MTU Mismatch
exp_alerts:
- exp_labels:
device: eth4
instance: node-exporter
job: node-exporter
oid: 1.3.6.1.4.1.50495.15.1.2.8.5
severity: warning
type: ceph_default
exp_annotations:
description: >
Node node-exporter has a different MTU size (9000)
than the median value on device eth4.
# pool full
- interval: 1m
input_series:
- series: 'ceph_pool_stored{instance="ceph:9283",job="ceph",pool_id="1"}'
values: '0 0 0 0 0 0 0 0 0'
- series: 'ceph_pool_stored{instance="ceph:9283",job="ceph",pool_id="2"}'
values: '1850 1850 1850 1850 1850 1850 1850'
- series: 'ceph_pool_stored{instance="ceph:9283",job="ceph",pool_id="3"}'
values: '900 900 23524 23524 23524 23524 23524 23524
23524'
- series: 'ceph_pool_max_avail{instance="ceph:9283",job="ceph",pool_id="1"}'
values: '106287063040 106287063040 106287063040 106287063040 106287063040
106287063040 106287063040'
- series: 'ceph_pool_max_avail{instance="ceph:9283",job="ceph",pool_id="2"}'
values: '106287063040 106287063040 106287063040 106287063040 106287063040
106287063040 106287063040'
- series: 'ceph_pool_max_avail{instance="ceph:9283",job="ceph",pool_id="3"}'
values: '37.5 37.5 37.5 37.5 37.5 37.5 37.5'
- series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
name="device_health_metrics",pool_id="1"}'
values: '1 1 1 1 1 1 1 1 1'
- series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
name=".rgw.root",pool_id="2"}'
values: '1 1 1 1 1 1 1 1 1'
- series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
name="default.rgw.log",pool_id="3"}'
values: '1 1 1 1 1 1 1 1 1'
promql_expr_test:
- expr: |
ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)
* on(pool_id) group_right ceph_pool_metadata * 100 > 90
eval_time: 1m
exp_samples:
- labels: '{instance="ceph:9283", job="ceph", name="default.rgw.log",
pool_id="3"}'
value: 9.6E+01
alert_rule_test:
- eval_time: 2m
alertname: pool full
exp_alerts:
- exp_labels:
instance: ceph:9283
job: ceph
name: default.rgw.log
pool_id: 3
severity: critical
type: ceph_default
exp_annotations:
description: Pool default.rgw.log at 96% capacity.
# slow OSD ops
- interval : 1m
input_series:
- series: 'ceph_healthcheck_slow_ops{instance="ceph:9283",job="ceph"}'
values: '1+0x120'
promql_expr_test:
- expr: ceph_healthcheck_slow_ops > 0
eval_time: 1m
exp_samples:
- labels: '{__name__="ceph_healthcheck_slow_ops", instance="ceph:9283",
job="ceph"}'
value: 1
alert_rule_test:
- eval_time: 20m
alertname: Slow OSD Ops
exp_alerts:
- exp_labels:
instance: ceph:9283
job: ceph
severity: warning
type: ceph_default
exp_annotations:
description: >
1 OSD requests are taking too long to process
(osd_op_complaint_time exceeded)

View File

@ -6,6 +6,7 @@ overrides:
conf:
osd:
filestore flush min: 0
osd heartbeat grace: 60
tasks:
- check-counter:
counters:

View File

@ -146,9 +146,7 @@ R/O, unpartitioned:
$ blockdev --setrw $DEV
.*BLKROSET: Permission denied (re)
[1]
$ sudo blockdev --setrw $DEV
.*BLKROSET: Read-only file system (re)
[1]
$ sudo blockdev --setrw $DEV # succeeds but effectively ignored
$ blockdev --getro $DEV
1
$ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none
@ -182,15 +180,11 @@ R/O, partitioned:
$ blockdev --setrw ${DEV}p1
.*BLKROSET: Permission denied (re)
[1]
$ sudo blockdev --setrw ${DEV}p1
.*BLKROSET: Read-only file system (re)
[1]
$ sudo blockdev --setrw ${DEV}p1 # succeeds but effectively ignored
$ blockdev --setrw ${DEV}p2
.*BLKROSET: Permission denied (re)
[1]
$ sudo blockdev --setrw ${DEV}p2
.*BLKROSET: Read-only file system (re)
[1]
$ sudo blockdev --setrw ${DEV}p2 # succeeds but effectively ignored
$ blockdev --getro ${DEV}p1
1
$ blockdev --getro ${DEV}p2
@ -227,9 +221,7 @@ Unpartitioned:
$ blockdev --setrw $DEV
.*BLKROSET: Permission denied (re)
[1]
$ sudo blockdev --setrw $DEV
.*BLKROSET: Read-only file system (re)
[1]
$ sudo blockdev --setrw $DEV # succeeds but effectively ignored
$ blockdev --getro $DEV
1
$ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none
@ -263,15 +255,11 @@ Partitioned:
$ blockdev --setrw ${DEV}p1
.*BLKROSET: Permission denied (re)
[1]
$ sudo blockdev --setrw ${DEV}p1
.*BLKROSET: Read-only file system (re)
[1]
$ sudo blockdev --setrw ${DEV}p1 # succeeds but effectively ignored
$ blockdev --setrw ${DEV}p2
.*BLKROSET: Permission denied (re)
[1]
$ sudo blockdev --setrw ${DEV}p2
.*BLKROSET: Read-only file system (re)
[1]
$ sudo blockdev --setrw ${DEV}p2 # succeeds but effectively ignored
$ blockdev --getro ${DEV}p1
1
$ blockdev --getro ${DEV}p2

View File

@ -57,6 +57,34 @@ function get_osds_up() {
echo $osds
}
function TEST_reweight_vs_classes() {
local dir=$1
# CrushWrapper::update_item (and ceph osd crush set) must rebuild the shadow
# tree too. https://tracker.ceph.com/issues/48065
run_mon $dir a || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
ceph osd crush set-device-class ssd osd.0 || return 1
ceph osd crush class ls-osd ssd | grep 0 || return 1
ceph osd crush set-device-class ssd osd.1 || return 1
ceph osd crush class ls-osd ssd | grep 1 || return 1
ceph osd crush reweight osd.0 1
h=`hostname -s`
ceph osd crush dump | jq ".buckets[] | select(.name==\"$h\") | .items[0].weight" | grep 65536
ceph osd crush dump | jq ".buckets[] | select(.name==\"$h~ssd\") | .items[0].weight" | grep 65536
ceph osd crush set 0 2 host=$h
ceph osd crush dump | jq ".buckets[] | select(.name==\"$h\") | .items[0].weight" | grep 131072
ceph osd crush dump | jq ".buckets[] | select(.name==\"$h~ssd\") | .items[0].weight" | grep 131072
}
function TEST_classes() {
local dir=$1

View File

@ -264,6 +264,8 @@ function TEST_0_osd() {
ceph osd ok-to-stop 3 || return 1
! ceph osd ok-to-stop 0 1 || return 1
! ceph osd ok-to-stop 2 3 || return 1
ceph osd ok-to-stop 0 --max 2 | grep '[0]' || return 1
ceph osd ok-to-stop 1 --max 2 | grep '[1]' || return 1
# with min_size 2 we can stop 1 osds
ceph osd pool set ec min_size 2 || return 1
@ -274,6 +276,11 @@ function TEST_0_osd() {
! ceph osd ok-to-stop 0 1 2 || return 1
! ceph osd ok-to-stop 1 2 3 || return 1
ceph osd ok-to-stop 0 --max 2 | grep '[0,1]' || return 1
ceph osd ok-to-stop 0 --max 20 | grep '[0,1]' || return 1
ceph osd ok-to-stop 2 --max 2 | grep '[2,3]' || return 1
ceph osd ok-to-stop 2 --max 20 | grep '[2,3]' || return 1
# we should get the same result with one of the osds already down
kill_daemons $dir TERM osd.0 || return 1
ceph osd down 0 || return 1

View File

@ -2,7 +2,10 @@ overrides:
ceph:
conf:
mgr:
debug client: 10
debug mgr: 20
debug ms: 1
debug finisher: 20
debug client: 20
log-whitelist:
- OSD full dropping all updates
- OSD near full

View File

@ -7,4 +7,6 @@ tasks:
mon.a:
- ceph fs dump --format=json-pretty
- ceph fs set cephfs min_compat_client mimic
- sleep:
duration: 5
- fs.clients_evicted:

View File

@ -7,6 +7,8 @@ tasks:
mon.a:
- ceph fs dump --format=json-pretty
- ceph fs set cephfs min_compat_client mimic
- sleep:
duration: 5
- fs.clients_evicted:
clients:
client.0: False

View File

@ -3,6 +3,9 @@ overrides:
log-whitelist:
- SLOW_OPS
- slow request
conf:
osd:
osd heartbeat grace: 60
tasks:
- workunit:
clients:

View File

@ -6,6 +6,7 @@ overrides:
conf:
osd:
filestore flush min: 0
osd heartbeat grace: 60
tasks:
- workunit:
clients:

View File

@ -0,0 +1,4 @@
openstack:
- volumes: # attached to each instance
count: 4
size: 30 # GB

View File

@ -0,0 +1,21 @@
meta:
- desc: |
Insatll and run ceph on one node,
with a separate client 1.
Upgrade client 1 to nautilus
Run tests against old cluster
roles:
- - mon.a
- mon.b
- mon.c
- osd.0
- osd.1
- osd.2
- client.0
- mgr.x
- - client.1
overrides:
ceph:
log-whitelist:
- failed to encode map
fs: xfs

View File

@ -0,0 +1,11 @@
tasks:
- install:
branch: nautilus
exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev','python34-cephfs','python34-rados']
- print: "**** done install nautilus"
- install.upgrade:
exclude_packages: ['ceph-test', 'ceph-test-dbg','libcephfs1', 'python-ceph']
client.1:
- print: "**** done install.upgrade to -x on client.1"
- ceph:
- print: "**** done ceph task"

View File

@ -0,0 +1,6 @@
overrides:
ceph:
conf:
client:
rbd default features: 61

View File

@ -0,0 +1,6 @@
overrides:
ceph:
conf:
client:
rbd default features: 1

View File

@ -0,0 +1,34 @@
tasks:
- parallel:
- workunit:
branch: nautilus
clients:
client.0:
- rbd/notify_master.sh
env:
RBD_FEATURES: "61"
- workunit:
branch: pacific
clients:
client.1:
- rbd/notify_slave.sh
env:
RBD_FEATURES: "61"
RBD_DISABLE_UPDATE_FEATURES: "1"
- print: "**** done rbd: old librbd -> new librbd"
- parallel:
- workunit:
branch: nautilus
clients:
client.0:
- rbd/notify_slave.sh
env:
RBD_FEATURES: "61"
- workunit:
branch: pacific
clients:
client.1:
- rbd/notify_master.sh
env:
RBD_FEATURES: "61"
- print: "**** done rbd: new librbd -> old librbd"

View File

@ -0,0 +1 @@
../../../../../../distros/all/ubuntu_18.04.yaml

View File

@ -1,9 +1,9 @@
meta:
- desc: |
Run ceph on two nodes, using one of them as a client,
with a separate client-only node.
with a separate client-only node.
Use xfs beneath the osds.
install ceph/nautilus v14.2.2 point version
install ceph/nautilus v14.2.20 point version
run workload and upgrade-sequence in parallel
(every point reslease should be tested)
run workload and upgrade-sequence in parallel
@ -32,8 +32,8 @@ overrides:
- cache pools at or near target size
- filesystem is degraded
- OBJECT_MISPLACED
### ref: https://tracker.ceph.com/issues/40251
#removed see ^ - failed to encode map
### ref: https://tracker.ceph.com/issues/40251
#removed see ^ - failed to encode map
fs: xfs
@ -46,7 +46,7 @@ overrides:
osd:
osd map max advance: 1000
osd_class_default_list: "*"
osd_class_load_list: "*"
osd_class_load_list: "*"
client:
rgw_crypt_require_ssl: false
rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
@ -70,19 +70,19 @@ openstack:
size: 30 # GB
tasks:
# v14.2.0 removed per http://tracker.ceph.com/issues/40251
- print: "**** done nautilus v14.2.2 about to install"
- print: "**** done nautilus v14.2.20 about to install"
- install:
tag: v14.2.2
tag: v14.2.20
# line below can be removed its from jewel test
#exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev', 'librgw2']
- print: "**** done v14.2.2 install"
- print: "**** done v14.2.20 install"
- ceph:
fs: xfs
add_osds_to_crush: true
- print: "**** done ceph xfs"
- sequential:
- workload
- print: "**** done workload v14.2.2"
- print: "**** done workload v14.2.20"
# v14.2.1 removed per http://tracker.ceph.com/issues/40251

View File

@ -1,11 +1,11 @@
meta:
- desc: install ceph/nautilus v14.2.2
- desc: install ceph/nautilus v14.2.20
tasks:
- install:
tag: v14.2.2
tag: v14.2.20
exclude_packages: ['librados3']
extra_packages: ['librados2']
- print: "**** done install nautilus v14.2.2"
- print: "**** done install nautilus v14.2.20"
- ceph:
- exec:
osd.0:

View File

@ -3,7 +3,7 @@ meta:
librbd python api tests
tasks:
- workunit:
tag: v14.2.10
tag: v14.2.20
clients:
client.0:
- rbd/test_librbd_python.sh

View File

@ -3,6 +3,7 @@ ceph manager -- Thrasher and CephManager objects
"""
from functools import wraps
import contextlib
import errno
import random
import signal
import time
@ -2560,13 +2561,22 @@ class CephManager:
Loop until quorum size is reached.
"""
self.log('waiting for quorum size %d' % size)
start = time.time()
while not len(self.get_mon_quorum()) == size:
if timeout is not None:
assert time.time() - start < timeout, \
('failed to reach quorum size %d '
'before timeout expired' % size)
time.sleep(3)
sleep = 3
with safe_while(sleep=sleep,
tries=timeout // sleep,
action=f'wait for quorum size {size}') as proceed:
while proceed():
try:
if len(self.get_mon_quorum()) == size:
break
except CommandFailedError as e:
# could fail instea4d of blocked if the rotating key of the
# connected monitor is not updated yet after they form the
# quorum
if e.exitstatus == errno.EACCES:
pass
else:
raise
self.log("quorum is size %d" % size)
def get_mon_health(self, debug=False):

View File

@ -176,6 +176,9 @@ class CephFSTestCase(CephTestCase):
for m in self.mounts:
m.teardown()
# To prevent failover messages during Unwind of ceph task
self.mds_cluster.delete_all_filesystems()
for i, m in enumerate(self.mounts):
m.client_id = self._original_client_ids[i]

View File

@ -6,6 +6,7 @@ import logging
import errno
import time
from teuthology.exceptions import CommandFailedError
from teuthology.contextutil import safe_while
import os
from tasks.cephfs.cephfs_test_case import CephFSTestCase
@ -30,22 +31,46 @@ class TestScrubControls(CephFSTestCase):
self.assertEqual(res['return_code'], expected)
def _get_scrub_status(self):
return self.fs.rank_tell(["scrub", "status"])
def _check_task_status(self, expected_status):
task_status = self.fs.get_task_status("scrub status")
active = self.fs.get_active_names()
log.debug("current active={0}".format(active))
self.assertTrue(task_status[active[0]].startswith(expected_status))
def _check_task_status(self, expected_status, timo=120):
""" check scrub status for current active mds in ceph status """
with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
while proceed():
active = self.fs.get_active_names()
log.debug("current active={0}".format(active))
task_status = self.fs.get_task_status("scrub status")
try:
if task_status[active[0]].startswith(expected_status):
return True
except KeyError:
pass
def _check_task_status_na(self, timo=120):
""" check absence of scrub status in ceph status """
with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
while proceed():
active = self.fs.get_active_names()
log.debug("current active={0}".format(active))
task_status = self.fs.get_task_status("scrub status")
if not active[0] in task_status:
return True
def create_scrub_data(self, test_dir):
for i in range(32):
dirname = "dir.{0}".format(i)
dirpath = os.path.join(test_dir, dirname)
self.mount_a.run_shell_payload(f"""
set -e
mkdir -p {dirpath}
for ((i = 0; i < 32; i++)); do
dd if=/dev/urandom of={dirpath}/filename.$i bs=1M conv=fdatasync count=1
done
""")
def test_scrub_abort(self):
test_dir = "scrub_control_test_path"
abs_test_path = "/{0}".format(test_dir)
log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
client_path = os.path.join(self.mount_a.mountpoint, test_dir)
log.info("client_path: {0}".format(client_path))
log.info("Cloning repo into place")
TestScrubChecks.clone_repo(self.mount_a, client_path)
self.create_scrub_data(test_dir)
out_json = self.fs.rank_tell(["scrub", "start", abs_test_path, "recursive"])
self.assertNotEqual(out_json, None)
@ -56,8 +81,8 @@ class TestScrubControls(CephFSTestCase):
self.assertTrue("no active" in out_json['status'])
# sleep enough to fetch updated task status
time.sleep(10)
self._check_task_status("idle")
checked = self._check_task_status_na()
self.assertTrue(checked)
def test_scrub_pause_and_resume(self):
test_dir = "scrub_control_test_path"
@ -67,8 +92,7 @@ class TestScrubControls(CephFSTestCase):
client_path = os.path.join(self.mount_a.mountpoint, test_dir)
log.info("client_path: {0}".format(client_path))
log.info("Cloning repo into place")
_ = TestScrubChecks.clone_repo(self.mount_a, client_path)
self.create_scrub_data(test_dir)
out_json = self.fs.rank_tell(["scrub", "start", abs_test_path, "recursive"])
self.assertNotEqual(out_json, None)
@ -78,25 +102,22 @@ class TestScrubControls(CephFSTestCase):
out_json = self._get_scrub_status()
self.assertTrue("PAUSED" in out_json['status'])
# sleep enough to fetch updated task status
time.sleep(10)
self._check_task_status("paused")
checked = self._check_task_status("paused")
self.assertTrue(checked)
# resume and verify
self._resume_scrub(0)
out_json = self._get_scrub_status()
self.assertFalse("PAUSED" in out_json['status'])
checked = self._check_task_status_na()
self.assertTrue(checked)
def test_scrub_pause_and_resume_with_abort(self):
test_dir = "scrub_control_test_path"
abs_test_path = "/{0}".format(test_dir)
log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
client_path = os.path.join(self.mount_a.mountpoint, test_dir)
log.info("client_path: {0}".format(client_path))
log.info("Cloning repo into place")
_ = TestScrubChecks.clone_repo(self.mount_a, client_path)
self.create_scrub_data(test_dir)
out_json = self.fs.rank_tell(["scrub", "start", abs_test_path, "recursive"])
self.assertNotEqual(out_json, None)
@ -106,9 +127,8 @@ class TestScrubControls(CephFSTestCase):
out_json = self._get_scrub_status()
self.assertTrue("PAUSED" in out_json['status'])
# sleep enough to fetch updated task status
time.sleep(10)
self._check_task_status("paused")
checked = self._check_task_status("paused")
self.assertTrue(checked)
# abort and verify
self._abort_scrub(0)
@ -116,26 +136,37 @@ class TestScrubControls(CephFSTestCase):
self.assertTrue("PAUSED" in out_json['status'])
self.assertTrue("0 inodes" in out_json['status'])
# sleep enough to fetch updated task status
time.sleep(10)
self._check_task_status("paused")
# scrub status should still be paused...
checked = self._check_task_status("paused")
self.assertTrue(checked)
# resume and verify
self._resume_scrub(0)
out_json = self._get_scrub_status()
self.assertTrue("no active" in out_json['status'])
# sleep enough to fetch updated task status
time.sleep(10)
self._check_task_status("idle")
checked = self._check_task_status_na()
self.assertTrue(checked)
def test_scrub_task_status_on_mds_failover(self):
# sleep enough to fetch updated task status
time.sleep(10)
(original_active, ) = self.fs.get_active_names()
original_standbys = self.mds_cluster.get_standby_daemons()
self._check_task_status("idle")
test_dir = "scrub_control_test_path"
abs_test_path = "/{0}".format(test_dir)
self.create_scrub_data(test_dir)
out_json = self.fs.rank_tell(["scrub", "start", abs_test_path, "recursive"])
self.assertNotEqual(out_json, None)
# pause and verify
self._pause_scrub(0)
out_json = self._get_scrub_status()
self.assertTrue("PAUSED" in out_json['status'])
checked = self._check_task_status("paused")
self.assertTrue(checked)
# Kill the rank 0
self.fs.mds_stop(original_active)
@ -150,12 +181,7 @@ class TestScrubControls(CephFSTestCase):
original_standbys))
self.wait_until_true(promoted, timeout=grace*2)
mgr_beacon_grace = float(self.fs.get_config("mgr_service_beacon_grace", service_type="mon"))
def status_check():
task_status = self.fs.get_task_status("scrub status")
return original_active not in task_status
self.wait_until_true(status_check, timeout=mgr_beacon_grace*2)
self._check_task_status_na()
class TestScrubChecks(CephFSTestCase):
"""

View File

@ -562,6 +562,9 @@ vc.disconnect()
self.mount_a.run_shell(["touch", os.path.join(mount_path, "noperms")])
self.mount_a.run_shell(["chmod", "0000", os.path.join(mount_path, "noperms")])
# A folder with non-ascii characters
self.mount_a.run_shell(["mkdir", os.path.join(mount_path, u"f\u00F6n")])
self._volume_client_python(self.mount_b, dedent("""
vp = VolumePath("{group_id}", u"{volume_id}")
vc.delete_volume(vp)

View File

@ -3649,6 +3649,48 @@ class TestVolumes(CephFSTestCase):
# verify trash dir is clean
self._wait_for_trash_empty()
def test_subvolume_snapshot_clone_retain_suid_guid(self):
subvolume = self._generate_random_subvolume_name()
snapshot = self._generate_random_snapshot_name()
clone = self._generate_random_clone_name()
# create subvolume
self._fs_cmd("subvolume", "create", self.volname, subvolume)
# Create a file with suid, guid bits set along with executable bit.
args = ["subvolume", "getpath", self.volname, subvolume]
args = tuple(args)
subvolpath = self._fs_cmd(*args)
self.assertNotEqual(subvolpath, None)
subvolpath = subvolpath[1:].rstrip() # remove "/" prefix and any trailing newline
file_path = subvolpath
file_path = os.path.join(subvolpath, "test_suid_file")
self.mount_a.run_shell(["touch", file_path])
self.mount_a.run_shell(["chmod", "u+sx,g+sx", file_path])
# snapshot subvolume
self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
# schedule a clone
self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
# check clone status
self._wait_for_clone_to_complete(clone)
# verify clone
self._verify_clone(subvolume, snapshot, clone)
# remove snapshot
self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
# remove subvolumes
self._fs_cmd("subvolume", "rm", self.volname, subvolume)
self._fs_cmd("subvolume", "rm", self.volname, clone)
# verify trash dir is clean
self._wait_for_trash_empty()
def test_subvolume_snapshot_reconf_max_concurrent_clones(self):
"""
Validate 'max_concurrent_clones' config option

View File

@ -107,6 +107,16 @@ class RgwApiCredentialsTest(RgwTestCase):
data['message'])
class RgwSiteTest(RgwTestCase):
AUTH_ROLES = ['rgw-manager']
def test_get_realms(self):
data = self._get('/api/rgw/site?query=realms')
self.assertStatus(200)
self.assertSchema(data, JList(str))
class RgwBucketTest(RgwTestCase):
AUTH_ROLES = ['rgw-manager']

View File

@ -166,8 +166,7 @@ class TestProgress(MgrTestCase):
# Wait for a progress event to pop up
self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
timeout=self.EVENT_CREATION_PERIOD*2,
period=1)
timeout=self.EVENT_CREATION_PERIOD*2)
ev = self._get_osd_in_out_events('out')[0]
log.info(json.dumps(ev, indent=1))
self.assertIn("Rebalancing after osd.0 marked out", ev['message'])
@ -182,13 +181,12 @@ class TestProgress(MgrTestCase):
# First Event should complete promptly
self.wait_until_true(lambda: self._is_complete(initial_event['id']),
timeout=self.EVENT_CREATION_PERIOD)
timeout=self.RECOVERY_PERIOD)
try:
# Wait for progress event marked in to pop up
self.wait_until_equal(lambda: self._osd_in_out_events_count('in'), 1,
timeout=self.EVENT_CREATION_PERIOD*2,
period=1)
timeout=self.EVENT_CREATION_PERIOD*2)
except RuntimeError as ex:
if not "Timed out after" in str(ex):
raise ex
@ -261,7 +259,7 @@ class TestProgress(MgrTestCase):
# Event should complete promptly
self.wait_until_true(lambda: self._is_complete(ev['id']),
timeout=self.EVENT_CREATION_PERIOD)
timeout=self.RECOVERY_PERIOD)
self.assertTrue(self._is_quiet())
def test_osd_came_back(self):
@ -274,10 +272,11 @@ class TestProgress(MgrTestCase):
ev1 = self._simulate_failure()
ev2 = self._simulate_back_in([0], ev1)
# Wait for progress event to ultimately complete
self.wait_until_true(lambda: self._is_complete(ev2['id']),
timeout=self.RECOVERY_PERIOD)
if ev2 is not None:
# Wait for progress event to ultimately complete
self.wait_until_true(lambda: self._is_complete(ev2['id']),
timeout=self.RECOVERY_PERIOD)
self.assertTrue(self._is_quiet())
@ -364,8 +363,8 @@ class TestProgress(MgrTestCase):
'osd', 'out', '0')
# Wait for a progress event to pop up
self.wait_until_equal(lambda: len(self._all_events()), 1,
timeout=self.EVENT_CREATION_PERIOD*2)
self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
timeout=self.RECOVERY_PERIOD)
ev = self._all_events()[0]

View File

@ -14,6 +14,8 @@
#!/usr/bin/env bash
# mount a NFS share for storing logs
sed -i 's/archive.ubuntu.com/old-releases.ubuntu.com/' /etc/apt/sources.list
sed -i 's/security.ubuntu.com/old-releases.ubuntu.com/' /etc/apt/sources.list
apt-get update
apt-get -y install nfs-common
mkdir /mnt/log

View File

@ -298,6 +298,7 @@ class LocalRemote(object):
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
cwd=cwd,
env=env,
shell=True)
else:
# Sanity check that we've got a list of strings
@ -917,7 +918,9 @@ class LocalContext(object):
self.daemons.daemons[prefixed_type][svc_id] = LocalDaemon(svc_type, svc_id)
def __del__(self):
shutil.rmtree(self.teuthology_config['test_path'])
path = self.teuthology_config['test_path']
if path is not None:
shutil.rmtree(path)
def exec_test():
# Parse arguments

View File

@ -485,21 +485,148 @@ test_purge() {
echo "testing trash purge..."
remove_images
rbd trash ls | wc -l | grep 0
rbd trash purge
rbd create $RBD_CREATE_ARGS --size 256 testimg1
rbd create $RBD_CREATE_ARGS --size 256 testimg2
rbd trash mv testimg1
rbd trash mv testimg2
rbd trash ls | wc -l | grep 2
rbd trash purge
rbd trash ls | wc -l | grep 0
rbd create $RBD_CREATE_ARGS foo -s 1
rbd create $RBD_CREATE_ARGS bar -s 1
rbd create $RBD_CREATE_ARGS --size 256 testimg1
rbd create $RBD_CREATE_ARGS --size 256 testimg2
rbd trash mv testimg1 --expires-at "1 hour"
rbd trash mv testimg2 --expires-at "3 hours"
rbd trash ls | wc -l | grep 2
rbd trash purge
rbd trash ls | wc -l | grep 2
rbd trash purge --expired-before "now + 2 hours"
rbd trash ls | wc -l | grep 1
rbd trash ls | grep testimg2
rbd trash purge --expired-before "now + 4 hours"
rbd trash ls | wc -l | grep 0
rbd trash mv foo --expires-at "10 sec"
rbd trash mv bar --expires-at "30 sec"
rbd create $RBD_CREATE_ARGS --size 256 testimg1
rbd snap create testimg1@snap # pin testimg1
rbd create $RBD_CREATE_ARGS --size 256 testimg2
rbd create $RBD_CREATE_ARGS --size 256 testimg3
rbd trash mv testimg1
rbd trash mv testimg2
rbd trash mv testimg3
rbd trash ls | wc -l | grep 3
rbd trash purge 2>&1 | grep 'some expired images could not be removed'
rbd trash ls | wc -l | grep 1
rbd trash ls | grep testimg1
ID=$(rbd trash ls | awk '{ print $1 }')
rbd snap purge --image-id $ID
rbd trash purge
rbd trash ls | wc -l | grep 0
rbd trash purge --expired-before "now + 10 sec"
rbd trash ls | grep -v foo | wc -l | grep 1
rbd trash ls | grep bar
rbd create $RBD_CREATE_ARGS --size 256 testimg1
rbd create $RBD_CREATE_ARGS --size 256 testimg2
rbd snap create testimg2@snap # pin testimg2
rbd create $RBD_CREATE_ARGS --size 256 testimg3
rbd trash mv testimg1
rbd trash mv testimg2
rbd trash mv testimg3
rbd trash ls | wc -l | grep 3
rbd trash purge 2>&1 | grep 'some expired images could not be removed'
rbd trash ls | wc -l | grep 1
rbd trash ls | grep testimg2
ID=$(rbd trash ls | awk '{ print $1 }')
rbd snap purge --image-id $ID
rbd trash purge
rbd trash ls | wc -l | grep 0
LAST_IMG=$(rbd trash ls | grep bar | awk '{print $1;}')
rbd trash rm $LAST_IMG --force --no-progress | grep -v '.' | wc -l | grep 0
rbd create $RBD_CREATE_ARGS --size 256 testimg1
rbd create $RBD_CREATE_ARGS --size 256 testimg2
rbd create $RBD_CREATE_ARGS --size 256 testimg3
rbd snap create testimg3@snap # pin testimg3
rbd trash mv testimg1
rbd trash mv testimg2
rbd trash mv testimg3
rbd trash ls | wc -l | grep 3
rbd trash purge 2>&1 | grep 'some expired images could not be removed'
rbd trash ls | wc -l | grep 1
rbd trash ls | grep testimg3
ID=$(rbd trash ls | awk '{ print $1 }')
rbd snap purge --image-id $ID
rbd trash purge
rbd trash ls | wc -l | grep 0
# test purging a clone with a chain of parents
rbd create $RBD_CREATE_ARGS --size 256 testimg1
rbd snap create testimg1@snap
rbd clone --rbd-default-clone-format=2 testimg1@snap testimg2
rbd snap rm testimg1@snap
rbd create $RBD_CREATE_ARGS --size 256 testimg3
rbd snap create testimg2@snap
rbd clone --rbd-default-clone-format=2 testimg2@snap testimg4
rbd clone --rbd-default-clone-format=2 testimg2@snap testimg5
rbd snap rm testimg2@snap
rbd snap create testimg4@snap
rbd clone --rbd-default-clone-format=2 testimg4@snap testimg6
rbd snap rm testimg4@snap
rbd trash mv testimg1
rbd trash mv testimg2
rbd trash mv testimg3
rbd trash mv testimg4
rbd trash ls | wc -l | grep 4
rbd trash purge 2>&1 | grep 'some expired images could not be removed'
rbd trash ls | wc -l | grep 3
rbd trash ls | grep testimg1
rbd trash ls | grep testimg2
rbd trash ls | grep testimg4
rbd trash mv testimg6
rbd trash ls | wc -l | grep 4
rbd trash purge 2>&1 | grep 'some expired images could not be removed'
rbd trash ls | wc -l | grep 2
rbd trash ls | grep testimg1
rbd trash ls | grep testimg2
rbd trash mv testimg5
rbd trash ls | wc -l | grep 3
rbd trash purge
rbd trash ls | wc -l | grep 0
rbd create $RBD_CREATE_ARGS --size 256 testimg1
rbd snap create testimg1@snap
rbd clone --rbd-default-clone-format=2 testimg1@snap testimg2
rbd snap rm testimg1@snap
rbd create $RBD_CREATE_ARGS --size 256 testimg3
rbd snap create testimg3@snap # pin testimg3
rbd snap create testimg2@snap
rbd clone --rbd-default-clone-format=2 testimg2@snap testimg4
rbd clone --rbd-default-clone-format=2 testimg2@snap testimg5
rbd snap rm testimg2@snap
rbd snap create testimg4@snap
rbd clone --rbd-default-clone-format=2 testimg4@snap testimg6
rbd snap rm testimg4@snap
rbd trash mv testimg1
rbd trash mv testimg2
rbd trash mv testimg3
rbd trash mv testimg4
rbd trash ls | wc -l | grep 4
rbd trash purge 2>&1 | grep 'some expired images could not be removed'
rbd trash ls | wc -l | grep 4
rbd trash mv testimg6
rbd trash ls | wc -l | grep 5
rbd trash purge 2>&1 | grep 'some expired images could not be removed'
rbd trash ls | wc -l | grep 3
rbd trash ls | grep testimg1
rbd trash ls | grep testimg2
rbd trash ls | grep testimg3
rbd trash mv testimg5
rbd trash ls | wc -l | grep 4
rbd trash purge 2>&1 | grep 'some expired images could not be removed'
rbd trash ls | wc -l | grep 1
rbd trash ls | grep testimg3
ID=$(rbd trash ls | awk '{ print $1 }')
rbd snap purge --image-id $ID
rbd trash purge
rbd trash ls | wc -l | grep 0
}
test_deep_copy_clone() {

View File

@ -1,12 +1,13 @@
#!/usr/bin/env bash
set -ex
# set -x
set -e
# if defined, debug messages will be displayed and prepended with the string
# debug="DEBUG"
huge_size=2222 # in megabytes
big_size=6 # in megabytes
huge_size=5100 # in megabytes
big_size=7 # in megabytes
huge_obj=/tmp/huge_obj.temp.$$
big_obj=/tmp/big_obj.temp.$$
@ -160,7 +161,6 @@ mys3uploadkill() {
exit 1
fi
set -v
local_file="$1"
remote_bkt="$2"
remote_obj="$3"
@ -229,8 +229,16 @@ mys3cmd ls s3://multipart-bkt
bkt="incomplete-mp-bkt-1"
mys3cmd mb s3://$bkt
mys3uploadkill $huge_obj $bkt incomplete-mp-obj-1 $fifo 20
mys3uploadkill $huge_obj $bkt incomplete-mp-obj-2 $fifo 100
mys3uploadkill $huge_obj $bkt incomplete-mp-obj-c $fifo 20
# generate an incomplete multipart with more than 1,000 parts
mys3uploadkill $huge_obj $bkt incomplete-mp-obj-b $fifo 1005
# generate more than 1000 incomplet multiparts
for c in $(seq 1005) ;do
mys3uploadkill $huge_obj $bkt incomplete-mp-obj-c-$c $fifo 3
done
####################################
# resharded bucket

View File

@ -22,6 +22,10 @@ source src/script/run-make.sh
set -e
function in_jenkins() {
test -n "$JENKINS_HOME"
}
function run() {
# to prevent OSD EMFILE death on tests, make sure ulimit >= 1024
$DRY_RUN ulimit -n $(ulimit -Hn)
@ -35,9 +39,16 @@ function run() {
$DRY_RUN sudo /sbin/sysctl -q -w fs.aio-max-nr=$((65536 * 16))
CHECK_MAKEOPTS=${CHECK_MAKEOPTS:-$DEFAULT_MAKEOPTS}
if ! $DRY_RUN ctest $CHECK_MAKEOPTS --output-on-failure; then
rm -fr ${TMPDIR:-/tmp}/ceph-asok.*
return 1
if in_jenkins; then
if ! ctest $CHECK_MAKEOPTS --no-compress-output --output-on-failure -T Test; then
# do not return failure, as the jenkins publisher will take care of this
rm -fr ${TMPDIR:-/tmp}/ceph-asok.*
fi
else
if ! $DRY_RUN ctest $CHECK_MAKEOPTS --output-on-failure; then
rm -fr ${TMPDIR:-/tmp}/ceph-asok.*
return 1
fi
fi
}

View File

@ -1,2 +1,2 @@
36274af6eb7f2a5055f2d53ad448f2694e9046a0
v14.2.20
ca74598065096e6fcbd8433c8779a2be0c889351
v14.2.22

View File

@ -401,7 +401,7 @@ target_link_libraries(ceph-common ${ceph_common_deps})
# appease dpkg-shlibdeps
set_target_properties(ceph-common PROPERTIES
SOVERSION 0
INSTALL_RPATH "")
SKIP_RPATH TRUE)
if(NOT APPLE AND NOT FREEBSD)
# Apple uses Mach-O, not ELF. so this option does not apply to APPLE.
#
@ -692,6 +692,12 @@ if(WITH_RBD)
add_subdirectory(rbd_replay)
endif(WITH_RBD)
if(WITH_BOOST_CONTEXT)
set(SPAWN_BUILD_TESTS OFF CACHE INTERNAL "disable building of spawn unit tests")
set(SPAWN_INSTALL OFF CACHE INTERNAL "disable installation of spawn headers")
add_subdirectory(spawn)
endif()
# RadosGW
if(WITH_KVS)
add_subdirectory(key_value_store)

View File

@ -1134,3 +1134,15 @@ def get_device_lvs(device, name_prefix=''):
lvs = _output_parser(stdout, LV_FIELDS)
return [Volume(**lv) for lv in lvs if lv['lv_name'] and
lv['lv_name'].startswith(name_prefix)]
def get_lv_by_fullname(full_name):
"""
returns LV by the specified LV's full name (formatted as vg_name/lv_name)
"""
try:
vg_name, lv_name = full_name.split('/')
res_lv = get_first_lv(filters={'lv_name': lv_name,
'vg_name': vg_name})
except ValueError:
res_lv = None
return res_lv

View File

@ -106,7 +106,7 @@ def get_physical_fast_allocs(devices, type_, fast_slots_per_device, new_osds, ar
requested_slots = fast_slots_per_device
requested_size = getattr(args, '{}_size'.format(type_), 0)
if requested_size == 0:
if not requested_size or requested_size == 0:
# no size argument was specified, check ceph.conf
get_size_fct = getattr(prepare, 'get_{}_size'.format(type_))
requested_size = get_size_fct(lv_format=False)
@ -126,6 +126,7 @@ def get_physical_fast_allocs(devices, type_, fast_slots_per_device, new_osds, ar
if requested_size:
if requested_size <= abs_size:
abs_size = requested_size
relative_size = int(abs_size) / dev_size
else:
mlogger.error(
'{} was requested for {}, but only {} can be fulfilled'.format(

View File

@ -9,6 +9,7 @@ from . import trigger
from . import listing
from . import zap
from . import batch
from . import migrate
class LVM(object):
@ -30,6 +31,9 @@ class LVM(object):
'trigger': trigger.Trigger,
'list': listing.List,
'zap': zap.Zap,
'migrate': migrate.Migrate,
'new-wal': migrate.NewWAL,
'new-db': migrate.NewDB,
}
def __init__(self, argv):

View File

@ -0,0 +1,674 @@
from __future__ import print_function
import argparse
import logging
import os
from textwrap import dedent
from ceph_volume.util import system, disk, merge_dict
from ceph_volume.util.device import Device
from ceph_volume import decorators, terminal, process
from ceph_volume.api import lvm as api
from ceph_volume.systemd import systemctl
logger = logging.getLogger(__name__)
mlogger = terminal.MultiLogger(__name__)
def get_cluster_name(osd_id, osd_fsid):
"""
From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the
system that match those tag values, then return cluster_name for the first
one.
"""
lv_tags = {}
lv_tags['ceph.osd_id'] = osd_id
lv_tags['ceph.osd_fsid'] = osd_fsid
lvs = api.get_lvs(tags=lv_tags)
if not lvs:
mlogger.error(
'Unable to find any LV for source OSD: id:{} fsid:{}'.format(
osd_id, osd_fsid) )
raise SystemExit('Unexpected error, terminating')
return next(iter(lvs)).tags["ceph.cluster_name"]
def get_osd_path(osd_id, osd_fsid):
return '/var/lib/ceph/osd/{}-{}'.format(
get_cluster_name(osd_id, osd_fsid), osd_id)
def find_associated_devices(osd_id, osd_fsid):
"""
From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the
system that match those tag values, further detect if any partitions are
part of the OSD, and then return the set of LVs and partitions (if any).
"""
lv_tags = {}
lv_tags['ceph.osd_id'] = osd_id
lv_tags['ceph.osd_fsid'] = osd_fsid
lvs = api.get_lvs(tags=lv_tags)
if not lvs:
mlogger.error(
'Unable to find any LV for source OSD: id:{} fsid:{}'.format(
osd_id, osd_fsid) )
raise SystemExit('Unexpected error, terminating')
devices = set(ensure_associated_lvs(lvs, lv_tags))
return [(Device(path), type) for path, type in devices if path]
def ensure_associated_lvs(lvs, lv_tags):
"""
Go through each LV and ensure if backing devices (journal, wal, block)
are LVs or partitions, so that they can be accurately reported.
"""
# look for many LVs for each backing type, because it is possible to
# receive a filtering for osd.1, and have multiple failed deployments
# leaving many journals with osd.1 - usually, only a single LV will be
# returned
block_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'block'}))
db_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'db'}))
wal_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'wal'}))
backing_devices = [(block_lvs, 'block'), (db_lvs, 'db'),
(wal_lvs, 'wal')]
verified_devices = []
for lv in lvs:
# go through each lv and append it, otherwise query `blkid` to find
# a physical device. Do this for each type (journal,db,wal) regardless
# if they have been processed in the previous LV, so that bad devices
# with the same ID can be caught
for ceph_lvs, type in backing_devices:
if ceph_lvs:
verified_devices.extend([(l.lv_path, type) for l in ceph_lvs])
continue
# must be a disk partition, by querying blkid by the uuid we are
# ensuring that the device path is always correct
try:
device_uuid = lv.tags['ceph.{}_uuid'.format(type)]
except KeyError:
# Bluestore will not have ceph.journal_uuid, and Filestore
# will not not have ceph.db_uuid
continue
osd_device = disk.get_device_from_partuuid(device_uuid)
if not osd_device:
# if the osd_device is not found by the partuuid, then it is
# not possible to ensure this device exists anymore, so skip it
continue
verified_devices.append((osd_device, type))
return verified_devices
class VolumeTagTracker(object):
def __init__(self, devices, target_lv):
self.target_lv = target_lv
self.data_device = self.db_device = self.wal_device = None
for device, type in devices:
if type == 'block':
self.data_device = device
elif type == 'db':
self.db_device = device
elif type == 'wal':
self.wal_device = device
if not self.data_device:
mlogger.error('Data device not found')
raise SystemExit(
"Unexpected error, terminating")
if not self.data_device.is_lv:
mlogger.error('Data device isn\'t LVM')
raise SystemExit(
"Unexpected error, terminating")
self.old_target_tags = self.target_lv.tags.copy()
self.old_data_tags = (
self.data_device.lv_api.tags.copy()
if self.data_device.is_lv else None)
self.old_db_tags = (
self.db_device.lv_api.tags.copy()
if self.db_device and self.db_device.is_lv else None)
self.old_wal_tags = (
self.wal_device.lv_api.tags.copy()
if self.wal_device and self.wal_device.is_lv else None)
def update_tags_when_lv_create(self, create_type):
tags = {}
if not self.data_device.is_lv:
mlogger.warning(
'Data device is not LVM, wouldn\'t update LVM tags')
else:
tags["ceph.{}_uuid".format(create_type)] = self.target_lv.lv_uuid
tags["ceph.{}_device".format(create_type)] = self.target_lv.lv_path
self.data_device.lv_api.set_tags(tags)
tags = self.data_device.lv_api.tags.copy()
tags["ceph.type"] = create_type
self.target_lv.set_tags(tags)
aux_dev = None
if create_type == "db" and self.wal_device:
aux_dev = self.wal_device
elif create_type == "wal" and self.db_device:
aux_dev = self.db_device
else:
return
if not aux_dev.is_lv:
mlogger.warning(
'{} device is not LVM, wouldn\'t update LVM tags'.format(
create_type.upper()))
else:
tags = {}
tags["ceph.{}_uuid".format(create_type)] = self.target_lv.lv_uuid
tags["ceph.{}_device".format(create_type)] = self.target_lv.lv_path
aux_dev.lv_api.set_tags(tags)
def remove_lvs(self, source_devices, target_type):
remaining_devices = [self.data_device, self.db_device, self.wal_device]
outdated_tags = []
for device, type in source_devices:
if type == "block" or type == target_type:
continue
remaining_devices.remove(device)
if device.is_lv:
outdated_tags.append("ceph.{}_uuid".format(type))
outdated_tags.append("ceph.{}_device".format(type))
device.lv_api.clear_tags()
if len(outdated_tags) > 0:
for d in remaining_devices:
if d and d.is_lv:
d.lv_api.clear_tags(outdated_tags)
def replace_lvs(self, source_devices, target_type):
remaining_devices = [self.data_device]
if self.db_device:
remaining_devices.append(self.db_device)
if self.wal_device:
remaining_devices.append(self.wal_device)
outdated_tags = []
for device, type in source_devices:
if type == "block":
continue
remaining_devices.remove(device)
if device.is_lv:
outdated_tags.append("ceph.{}_uuid".format(type))
outdated_tags.append("ceph.{}_device".format(type))
device.lv_api.clear_tags()
new_tags = {}
new_tags["ceph.{}_uuid".format(target_type)] = self.target_lv.lv_uuid
new_tags["ceph.{}_device".format(target_type)] = self.target_lv.lv_path
for d in remaining_devices:
if d and d.is_lv:
if len(outdated_tags) > 0:
d.lv_api.clear_tags(outdated_tags)
d.lv_api.set_tags(new_tags)
if not self.data_device.is_lv:
mlogger.warning(
'Data device is not LVM, wouldn\'t properly update target LVM tags')
else:
tags = self.data_device.lv_api.tags.copy()
tags["ceph.type"] = target_type
tags["ceph.{}_uuid".format(target_type)] = self.target_lv.lv_uuid
tags["ceph.{}_device".format(target_type)] = self.target_lv.lv_path
self.target_lv.set_tags(tags)
def undo(self):
mlogger.info(
'Undoing lv tag set')
if self.data_device:
if self.old_data_tags:
self.data_device.lv_api.set_tags(self.old_data_tags)
else:
self.data_device.lv_api.clear_tags()
if self.db_device:
if self.old_db_tags:
self.db_device.lv_api.set_tags(self.old_db_tags)
else:
self.db_device.lv_api.clear_tags()
if self.wal_device:
if self.old_wal_tags:
self.wal_device.lv_api.set_tags(self.old_wal_tags)
else:
self.wal_device.lv_api.clear_tags()
if self.old_target_tags:
self.target_lv.set_tags(self.old_target_tags)
else:
self.target_lv.clear_tags()
class Migrate(object):
help = 'Migrate BlueFS data from to another LVM device'
def __init__(self, argv):
self.argv = argv
self.osd_id = None
def get_source_devices(self, devices, target_type=""):
ret = []
for device, type in devices:
if type == target_type:
continue
if type == 'block':
if 'data' not in self.args.from_:
continue;
elif type == 'db':
if 'db' not in self.args.from_:
continue;
elif type == 'wal':
if 'wal' not in self.args.from_:
continue;
ret.append([device, type])
if ret == []:
mlogger.error('Source device list is empty')
raise SystemExit(
'Unable to migrate to : {}'.format(self.args.target))
return ret
# ceph-bluestore-tool uses the following replacement rules
# (in the order of precedence, stop on the first match)
# if source list has DB volume - target device replaces it.
# if source list has WAL volume - target device replace it.
# if source list has slow volume only - operation isn't permitted,
# requires explicit allocation via new-db/new-wal command.detects which
def get_target_type_by_source(self, devices):
ret = None
for device, type in devices:
if type == 'db':
return 'db'
elif type == 'wal':
ret = 'wal'
return ret
def get_filename_by_type(self, type):
filename = 'block'
if type == 'db' or type == 'wal':
filename += '.' + type
return filename
def get_source_args(self, osd_path, devices):
ret = []
for device, type in devices:
ret = ret + ["--devs-source", os.path.join(
osd_path, self.get_filename_by_type(type))]
return ret
@decorators.needs_root
def migrate_to_new(self, osd_id, osd_fsid, devices, target_lv):
source_devices = self.get_source_devices(devices)
target_type = self.get_target_type_by_source(source_devices)
if not target_type:
mlogger.error(
"Unable to determine new volume type,"
" please use new-db or new-wal command before.")
raise SystemExit(
"Unable to migrate to : {}".format(self.args.target))
target_path = target_lv.lv_path
try:
tag_tracker = VolumeTagTracker(devices, target_lv)
# we need to update lvm tags for all the remaining volumes
# and clear for ones which to be removed
# ceph-bluestore-tool removes source volume(s) other than block one
# and attaches target one after successful migration
tag_tracker.replace_lvs(source_devices, target_type)
osd_path = get_osd_path(osd_id, osd_fsid)
source_args = self.get_source_args(osd_path, source_devices)
mlogger.info("Migrate to new, Source: {} Target: {}".format(
source_args, target_path))
stdout, stderr, exit_code = process.call([
'ceph-bluestore-tool',
'--path',
osd_path,
'--dev-target',
target_path,
'--command',
'bluefs-bdev-migrate'] +
source_args)
if exit_code != 0:
mlogger.error(
'Failed to migrate device, error code:{}'.format(exit_code))
raise SystemExit(
'Failed to migrate to : {}'.format(self.args.target))
else:
system.chown(os.path.join(osd_path, "block.{}".format(
target_type)))
terminal.success('Migration successful.')
except:
tag_tracker.undo()
raise
return
@decorators.needs_root
def migrate_to_existing(self, osd_id, osd_fsid, devices, target_lv):
target_type = target_lv.tags["ceph.type"]
if target_type == "wal":
mlogger.error("Migrate to WAL is not supported")
raise SystemExit(
"Unable to migrate to : {}".format(self.args.target))
target_filename = self.get_filename_by_type(target_type)
if (target_filename == ""):
mlogger.error(
"Target Logical Volume doesn't have proper volume type "
"(ceph.type LVM tag): {}".format(target_type))
raise SystemExit(
"Unable to migrate to : {}".format(self.args.target))
osd_path = get_osd_path(osd_id, osd_fsid)
source_devices = self.get_source_devices(devices, target_type)
target_path = os.path.join(osd_path, target_filename)
tag_tracker = VolumeTagTracker(devices, target_lv)
try:
# ceph-bluestore-tool removes source volume(s) other than
# block and target ones after successful migration
tag_tracker.remove_lvs(source_devices, target_type)
source_args = self.get_source_args(osd_path, source_devices)
mlogger.info("Migrate to existing, Source: {} Target: {}".format(
source_args, target_path))
stdout, stderr, exit_code = process.call([
'ceph-bluestore-tool',
'--path',
osd_path,
'--dev-target',
target_path,
'--command',
'bluefs-bdev-migrate'] +
source_args)
if exit_code != 0:
mlogger.error(
'Failed to migrate device, error code:{}'.format(exit_code))
raise SystemExit(
'Failed to migrate to : {}'.format(self.args.target))
else:
terminal.success('Migration successful.')
except:
tag_tracker.undo()
raise
return
@decorators.needs_root
def migrate_osd(self):
if self.args.osd_id:
osd_is_running = systemctl.osd_is_active(self.args.osd_id)
if osd_is_running:
mlogger.error('OSD is running, stop it with: '
'systemctl stop ceph-osd@{}'.format(
self.args.osd_id))
raise SystemExit(
'Unable to migrate devices associated with OSD ID: {}'
.format(self.args.osd_id))
target_lv = api.get_lv_by_fullname(self.args.target)
if not target_lv:
mlogger.error(
'Target path "{}" is not a Logical Volume'.formaat(
self.args.target))
raise SystemExit(
'Unable to migrate to : {}'.format(self.args.target))
devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid)
if (not target_lv.used_by_ceph):
self.migrate_to_new(self.args.osd_id, self.args.osd_fsid,
devices,
target_lv)
else:
if (target_lv.tags['ceph.osd_id'] != self.args.osd_id or
target_lv.tags['ceph.osd_fsid'] != self.args.osd_fsid):
mlogger.error(
'Target Logical Volume isn\'t used by the specified OSD: '
'{} FSID: {}'.format(self.args.osd_id,
self.args.osd_fsid))
raise SystemExit(
'Unable to migrate to : {}'.format(self.args.target))
self.migrate_to_existing(self.args.osd_id, self.args.osd_fsid,
devices,
target_lv)
def parse_argv(self):
sub_command_help = dedent("""
Moves BlueFS data from source volume(s) to the target one, source
volumes (except the main (i.e. data or block) one) are removed on
success. LVM volumes are permitted for Target only, both already
attached or new logical one. In the latter case it is attached to OSD
replacing one of the source devices. Following replacement rules apply
(in the order of precedence, stop on the first match):
* if source list has DB volume - target device replaces it.
* if source list has WAL volume - target device replace it.
* if source list has slow volume only - operation is not permitted,
requires explicit allocation via new-db/new-wal command.
Example calls for supported scenarios:
Moves BlueFS data from main device to LV already attached as DB:
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data --target vgname/db
Moves BlueFS data from shared main device to LV which will be attached
as a new DB:
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data --target vgname/new_db
Moves BlueFS data from DB device to new LV, DB is replaced:
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from db --target vgname/new_db
Moves BlueFS data from main and DB devices to new LV, DB is replaced:
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data db --target vgname/new_db
Moves BlueFS data from main, DB and WAL devices to new LV, WAL is
removed and DB is replaced:
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from data db wal --target vgname/new_db
Moves BlueFS data from main, DB and WAL devices to main device, WAL
and DB are removed:
ceph-volume lvm migrate --osd-id 1 --osd-fsid <uuid> --from db wal --target vgname/data
""")
parser = argparse.ArgumentParser(
prog='ceph-volume lvm migrate',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=sub_command_help,
)
parser.add_argument(
'--osd-id',
required=True,
help='Specify an OSD ID to detect associated devices for zapping',
)
parser.add_argument(
'--osd-fsid',
required=True,
help='Specify an OSD FSID to detect associated devices for zapping',
)
parser.add_argument(
'--target',
required=True,
help='Specify target Logical Volume (LV) to migrate data to',
)
parser.add_argument(
'--from',
nargs='*',
dest='from_',
required=True,
choices=['data', 'db', 'wal'],
help='Copy BlueFS data from DB device',
)
if len(self.argv) == 0:
print(sub_command_help)
return
self.args = parser.parse_args(self.argv)
def main(self):
self.parse_argv()
self.migrate_osd()
class NewVolume(object):
def __init__(self, create_type, argv):
self.create_type = create_type
self.argv = argv
def make_parser(self, prog, sub_command_help):
parser = argparse.ArgumentParser(
prog=prog,
formatter_class=argparse.RawDescriptionHelpFormatter,
description=sub_command_help,
)
parser.add_argument(
'--osd-id',
required=True,
help='Specify an OSD ID to attach new volume to',
)
parser.add_argument(
'--osd-fsid',
required=True,
help='Specify an OSD FSIDto attach new volume to',
)
parser.add_argument(
'--target',
required=True,
help='Specify target Logical Volume (LV) to attach',
)
return parser
@decorators.needs_root
def make_new_volume(self, osd_id, osd_fsid, devices, target_lv):
osd_path = get_osd_path(osd_id, osd_fsid)
mlogger.info(
'Making new volume at {} for OSD: {} ({})'.format(
target_lv.lv_path, osd_id, osd_path))
tag_tracker = VolumeTagTracker(devices, target_lv)
try:
tag_tracker.update_tags_when_lv_create(self.create_type)
stdout, stderr, exit_code = process.call([
'ceph-bluestore-tool',
'--path',
osd_path,
'--dev-target',
target_lv.lv_path,
'--command',
'bluefs-bdev-new-{}'.format(self.create_type)
])
if exit_code != 0:
mlogger.error(
'failed to attach new volume, error code:{}'.format(
exit_code))
raise SystemExit(
"Failed to attach new volume: {}".format(
self.args.target))
else:
system.chown(os.path.join(osd_path, "block.{}".format(
self.create_type)))
terminal.success('New volume attached.')
except:
tag_tracker.undo()
raise
return
@decorators.needs_root
def new_volume(self):
if self.args.osd_id:
osd_is_running = systemctl.osd_is_active(self.args.osd_id)
if osd_is_running:
mlogger.error('OSD ID is running, stop it with:'
' systemctl stop ceph-osd@{}'.format(self.args.osd_id))
raise SystemExit(
'Unable to attach new volume for OSD: {}'.format(
self.args.osd_id))
target_lv = api.get_lv_by_fullname(self.args.target)
if not target_lv:
mlogger.error(
'Target path {} is not a Logical Volume'.format(
self.args.target))
raise SystemExit(
'Unable to attach new volume : {}'.format(self.args.target))
if target_lv.used_by_ceph:
mlogger.error(
'Target Logical Volume is already used by ceph: {}'.format(
self.args.target))
raise SystemExit(
'Unable to attach new volume : {}'.format(self.args.target))
else:
devices = find_associated_devices(self.args.osd_id,
self.args.osd_fsid)
self.make_new_volume(
self.args.osd_id,
self.args.osd_fsid,
devices,
target_lv)
class NewWAL(NewVolume):
help = 'Allocate new WAL volume for OSD at specified Logical Volume'
def __init__(self, argv):
super(NewWAL, self).__init__("wal", argv)
def main(self):
sub_command_help = dedent("""
Attaches the given logical volume to the given OSD as a WAL volume.
Logical volume format is vg/lv. Fails if OSD has already got attached DB.
Example:
Attach vgname/lvname as a WAL volume to OSD 1
ceph-volume lvm new-wal --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_wal
""")
parser = self.make_parser('ceph-volume lvm new-wal', sub_command_help)
if len(self.argv) == 0:
print(sub_command_help)
return
self.args = parser.parse_args(self.argv)
self.new_volume()
class NewDB(NewVolume):
help = 'Allocate new DB volume for OSD at specified Logical Volume'
def __init__(self, argv):
super(NewDB, self).__init__("db", argv)
def main(self):
sub_command_help = dedent("""
Attaches the given logical volume to the given OSD as a DB volume.
Logical volume format is vg/lv. Fails if OSD has already got attached DB.
Example:
Attach vgname/lvname as a DB volume to OSD 1
ceph-volume lvm new-db --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_db
""")
parser = self.make_parser('ceph-volume lvm new-db', sub_command_help)
if len(self.argv) == 0:
print(sub_command_help)
return
self.args = parser.parse_args(self.argv)
self.new_volume()

File diff suppressed because it is too large Load Diff

View File

@ -24,7 +24,7 @@ def get_partuuid(device):
device
"""
out, err, rc = process.call(
['blkid', '-s', 'PARTUUID', '-o', 'value', device]
['blkid', '-c', '/dev/null', '-s', 'PARTUUID', '-o', 'value', device]
)
return ' '.join(out).strip()
@ -98,7 +98,7 @@ def blkid(device):
PART_ENTRY_UUID PARTUUID
"""
out, err, rc = process.call(
['blkid', '-p', device]
['blkid', '-c', '/dev/null', '-p', device]
)
return _blkid_parser(' '.join(out))
@ -110,7 +110,7 @@ def get_part_entry_type(device):
used for udev rules, but it is useful in this case as it is the only
consistent way to retrieve the GUID used by ceph-disk to identify devices.
"""
out, err, rc = process.call(['blkid', '-p', '-o', 'udev', device])
out, err, rc = process.call(['blkid', '-c', '/dev/null', '-p', '-o', 'udev', device])
for line in out:
if 'ID_PART_ENTRY_TYPE=' in line:
return line.split('=')[-1].strip()
@ -123,7 +123,7 @@ def get_device_from_partuuid(partuuid):
device is
"""
out, err, rc = process.call(
['blkid', '-t', 'PARTUUID="%s"' % partuuid, '-o', 'device']
['blkid', '-c', '/dev/null', '-t', 'PARTUUID="%s"' % partuuid, '-o', 'device']
)
return ' '.join(out).strip()

View File

@ -1213,18 +1213,6 @@ def main():
errno.errorcode.get(ret, 'Unknown'), outs),
file=sys.stderr)
if ret < 0:
ret = -ret
errstr = errno.errorcode.get(ret, 'Unknown')
print(u'Error {0}: {1}'.format(errstr, outs), file=sys.stderr)
if len(targets) > 1:
final_ret = ret
else:
return ret
if outs:
print(prefix + outs, file=sys.stderr)
sys.stdout.flush()
if parsed_args.output_file:
@ -1250,12 +1238,23 @@ def main():
except IOError as e:
if e.errno != errno.EPIPE:
raise e
final_e = None
try:
sys.stdout.flush()
except IOError as e:
if e.errno != errno.EPIPE:
raise e
final_e = e
if ret < 0:
ret = -ret
errstr = errno.errorcode.get(ret, 'Unknown')
print(u'Error {0}: {1}'.format(errstr, outs), file=sys.stderr)
final_ret = ret
elif outs:
print(prefix + outs, file=sys.stderr)
if final_e:
raise final_e
# Block until command completion (currently scrub and deep_scrub only)
if block:

View File

@ -109,6 +109,14 @@ int obtain_monmap(MonitorDBStore &store, bufferlist &bl)
}
}
if (store.exists("mon_sync", "temp_newer_monmap")) {
dout(10) << __func__ << " found temp_newer_monmap" << dendl;
int err = store.get("mon_sync", "temp_newer_monmap", bl);
ceph_assert(err == 0);
ceph_assert(bl.length() > 0);
return 0;
}
if (store.exists("mkfs", "monmap")) {
dout(10) << __func__ << " found mkfs monmap" << dendl;
int err = store.get("mkfs", "monmap", bl);

View File

@ -125,12 +125,24 @@
#define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED)
#ifndef S_IXUGO
#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
#endif
void client_flush_set_callback(void *p, ObjectCacher::ObjectSet *oset)
{
Client *client = static_cast<Client*>(p);
client->flush_set_callback(oset);
}
bool Client::is_reserved_vino(vinodeno_t &vino) {
if (MDS_IS_PRIVATE_INO(vino.ino)) {
ldout(cct, -1) << __func__ << " attempt to access reserved inode number " << vino << dendl;
return true;
}
return false;
}
// -------------
@ -3192,7 +3204,7 @@ void Client::put_cap_ref(Inode *in, int cap)
int put_nref = 0;
int drop = last & ~in->caps_issued();
if (in->snapid == CEPH_NOSNAP) {
if ((last & CEPH_CAP_FILE_WR) &&
if ((last & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) &&
!in->cap_snaps.empty() &&
in->cap_snaps.rbegin()->second.writing) {
ldout(cct, 10) << __func__ << " finishing pending cap_snap on " << *in << dendl;
@ -3661,9 +3673,9 @@ void Client::queue_cap_snap(Inode *in, SnapContext& old_snapc)
capsnap.context = old_snapc;
capsnap.issued = in->caps_issued();
capsnap.dirty = in->caps_dirty();
capsnap.dirty_data = (used & CEPH_CAP_FILE_BUFFER);
capsnap.uid = in->uid;
capsnap.gid = in->gid;
capsnap.mode = in->mode;
@ -3672,7 +3684,7 @@ void Client::queue_cap_snap(Inode *in, SnapContext& old_snapc)
capsnap.xattr_version = in->xattr_version;
capsnap.cap_dirtier_uid = in->cap_dirtier_uid;
capsnap.cap_dirtier_gid = in->cap_dirtier_gid;
if (used & CEPH_CAP_FILE_WR) {
ldout(cct, 10) << __func__ << " WR used on " << *in << dendl;
capsnap.writing = 1;
@ -3707,6 +3719,7 @@ void Client::finish_cap_snap(Inode *in, CapSnap &capsnap, int used)
}
if (used & CEPH_CAP_FILE_BUFFER) {
capsnap.writing = 1;
ldout(cct, 10) << __func__ << " " << *in << " cap_snap " << &capsnap << " used " << used
<< " WRBUFFER, delaying" << dendl;
} else {
@ -3715,13 +3728,6 @@ void Client::finish_cap_snap(Inode *in, CapSnap &capsnap, int used)
}
}
void Client::_flushed_cap_snap(Inode *in, snapid_t seq)
{
ldout(cct, 10) << __func__ << " seq " << seq << " on " << *in << dendl;
in->cap_snaps.at(seq).dirty_data = 0;
flush_snaps(in);
}
void Client::send_flush_snap(Inode *in, MetaSession *session,
snapid_t follows, CapSnap& capsnap)
{
@ -3789,7 +3795,7 @@ void Client::flush_snaps(Inode *in)
<< " on " << *in << dendl;
if (capsnap.dirty_data || capsnap.writing)
break;
capsnap.flush_tid = ++last_flush_tid;
session->flushing_caps_tids.insert(capsnap.flush_tid);
in->flushing_cap_tids[capsnap.flush_tid] = 0;
@ -4336,7 +4342,7 @@ void Client::trim_caps(MetaSession *s, uint64_t max)
++q;
if (dn->lru_is_expireable()) {
if (can_invalidate_dentries &&
dn->dir->parent_inode->ino == MDS_INO_ROOT) {
dn->dir->parent_inode->ino == CEPH_INO_ROOT) {
// Only issue one of these per DN for inodes in root: handle
// others more efficiently by calling for root-child DNs at
// the end of this function.
@ -4349,10 +4355,10 @@ void Client::trim_caps(MetaSession *s, uint64_t max)
all = false;
}
}
if (in->ll_ref == 1 && in->ino != MDS_INO_ROOT) {
if (in->ll_ref == 1 && in->ino != CEPH_INO_ROOT) {
_schedule_ino_release_callback(in.get());
}
if (all && in->ino != MDS_INO_ROOT) {
if (all && in->ino != CEPH_INO_ROOT) {
ldout(cct, 20) << __func__ << " counting as trimmed: " << *in << dendl;
trimmed++;
}
@ -4738,25 +4744,19 @@ void Client::update_snap_trace(const bufferlist& bl, SnapRealm **realm_ret, bool
ldout(cct, 10) << __func__ << " " << *realm << " seq " << info.seq()
<< " <= " << realm->seq << " and same parent, SKIPPING" << dendl;
}
if (!first_realm)
first_realm = realm;
else
put_snap_realm(realm);
}
for (map<SnapRealm*, SnapContext>::iterator q = dirty_realms.begin();
q != dirty_realms.end();
++q) {
SnapRealm *realm = q->first;
for (auto &[realm, snapc] : dirty_realms) {
// if there are new snaps ?
if (has_new_snaps(q->second, realm->get_snap_context())) {
if (has_new_snaps(snapc, realm->get_snap_context())) {
ldout(cct, 10) << " flushing caps on " << *realm << dendl;
xlist<Inode*>::iterator r = realm->inodes_with_caps.begin();
while (!r.end()) {
Inode *in = *r;
++r;
queue_cap_snap(in, q->second);
for (auto&& in : realm->inodes_with_caps) {
queue_cap_snap(in, snapc);
}
} else {
ldout(cct, 10) << " no new snap on " << *realm << dendl;
@ -5383,8 +5383,12 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, const M
int Client::inode_permission(Inode *in, const UserPerm& perms, unsigned want)
{
if (perms.uid() == 0)
if (perms.uid() == 0) {
// Executable are overridable when there is at least one exec bit set
if((want & MAY_EXEC) && !(in->mode & S_IXUGO))
return -EACCES;
return 0;
}
if (perms.uid() != in->uid && (in->mode & S_IRWXG)) {
int ret = _posix_acl_permission(in, perms, want);
@ -8659,33 +8663,44 @@ int Client::lookup_hash(inodeno_t ino, inodeno_t dirino, const char *name,
* the resulting Inode object in one operation, so that caller
* can safely assume inode will still be there after return.
*/
int Client::_lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode)
int Client::_lookup_vino(vinodeno_t vino, const UserPerm& perms, Inode **inode)
{
ldout(cct, 8) << __func__ << " enter(" << ino << ")" << dendl;
ldout(cct, 8) << __func__ << " enter(" << vino << ")" << dendl;
if (unmounting)
return -ENOTCONN;
if (is_reserved_vino(vino))
return -ESTALE;
MetaRequest *req = new MetaRequest(CEPH_MDS_OP_LOOKUPINO);
filepath path(ino);
filepath path(vino.ino);
req->set_filepath(path);
/*
* The MDS expects either a "real" snapid here or 0. The special value
* carveouts for the snapid are all at the end of the range so we can
* just look for any snapid below this value.
*/
if (vino.snapid < CEPH_NOSNAP)
req->head.args.lookupino.snapid = vino.snapid;
int r = make_request(req, perms, NULL, NULL, rand() % mdsmap->get_num_in_mds());
if (r == 0 && inode != NULL) {
vinodeno_t vino(ino, CEPH_NOSNAP);
unordered_map<vinodeno_t,Inode*>::iterator p = inode_map.find(vino);
ceph_assert(p != inode_map.end());
*inode = p->second;
_ll_get(*inode);
}
ldout(cct, 8) << __func__ << " exit(" << ino << ") = " << r << dendl;
ldout(cct, 8) << __func__ << " exit(" << vino << ") = " << r << dendl;
return r;
}
int Client::lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode)
{
vinodeno_t vino(ino, CEPH_NOSNAP);
std::lock_guard lock(client_lock);
return _lookup_ino(ino, perms, inode);
return _lookup_vino(vino, perms, inode);
}
/**
@ -9055,8 +9070,15 @@ void Client::lock_fh_pos(Fh *f)
void Client::unlock_fh_pos(Fh *f)
{
ceph_assert(client_lock.is_locked_by_me());
ldout(cct, 10) << __func__ << " " << f << dendl;
f->pos_locked = false;
if (!f->pos_waiters.empty()) {
// only wake up the oldest waiter
auto cond = f->pos_waiters.front();
cond->SignalOne();
}
}
int Client::uninline_data(Inode *in, Context *onfinish)
@ -10805,56 +10827,59 @@ int Client::ll_lookup(Inode *parent, const char *name, struct stat *attr,
return r;
}
int Client::ll_lookup_vino(
vinodeno_t vino,
const UserPerm& perms,
Inode **inode)
{
ceph_assert(inode != NULL);
if (unmounting)
return -ENOTCONN;
if (is_reserved_vino(vino))
return -ESTALE;
std::lock_guard lock(client_lock);
ldout(cct, 3) << __func__ << vino << dendl;
// Check the cache first
unordered_map<vinodeno_t,Inode*>::iterator p = inode_map.find(vino);
if (p != inode_map.end()) {
*inode = p->second;
_ll_get(*inode);
return 0;
}
uint64_t snapid = vino.snapid;
// for snapdir, find the non-snapped dir inode
if (snapid == CEPH_SNAPDIR)
vino.snapid = CEPH_NOSNAP;
int r = _lookup_vino(vino, perms, inode);
if (r)
return r;
ceph_assert(*inode != NULL);
if (snapid == CEPH_SNAPDIR) {
Inode *tmp = *inode;
// open the snapdir and put the inode ref
*inode = open_snapdir(tmp);
_ll_forget(tmp, 1);
_ll_get(*inode);
}
return 0;
}
int Client::ll_lookup_inode(
struct inodeno_t ino,
const UserPerm& perms,
Inode **inode)
{
ceph_assert(inode != NULL);
std::lock_guard lock(client_lock);
ldout(cct, 3) << "ll_lookup_inode " << ino << dendl;
if (unmounting)
return -ENOTCONN;
// Num1: get inode and *inode
int r = _lookup_ino(ino, perms, inode);
if (r)
return r;
ceph_assert(*inode != NULL);
if (!(*inode)->dentries.empty()) {
ldout(cct, 8) << __func__ << " dentry already present" << dendl;
return 0;
}
if ((*inode)->is_root()) {
ldout(cct, 8) << "ino is root, no parent" << dendl;
return 0;
}
// Num2: Request the parent inode, so that we can look up the name
Inode *parent;
r = _lookup_parent(*inode, perms, &parent);
if (r) {
_ll_forget(*inode, 1);
return r;
}
ceph_assert(parent != NULL);
// Num3: Finally, get the name (dentry) of the requested inode
r = _lookup_name(*inode, parent, perms);
if (r) {
// Unexpected error
_ll_forget(parent, 1);
_ll_forget(*inode, 1);
return r;
}
_ll_forget(parent, 1);
return 0;
vinodeno_t vino(ino, CEPH_NOSNAP);
return ll_lookup_vino(vino, perms, inode);
}
int Client::ll_lookupx(Inode *parent, const char *name, Inode **out,
@ -11066,6 +11091,9 @@ Inode *Client::ll_get_inode(vinodeno_t vino)
if (unmounting)
return NULL;
if (is_reserved_vino(vino))
return NULL;
unordered_map<vinodeno_t,Inode*>::iterator p = inode_map.find(vino);
if (p == inode_map.end())
return NULL;
@ -14262,6 +14290,10 @@ int Client::check_pool_perm(Inode *in, int need)
if (!cct->_conf->client_check_pool_perm)
return 0;
/* Only need to do this for regular files */
if (!in->is_file())
return 0;
int64_t pool_id = in->layout.pool_id;
std::string pool_ns = in->layout.pool_ns;
std::pair<int64_t, std::string> perm_key(pool_id, pool_ns);

View File

@ -479,6 +479,7 @@ public:
int ll_lookup(Inode *parent, const char *name, struct stat *attr,
Inode **out, const UserPerm& perms);
int ll_lookup_inode(struct inodeno_t ino, const UserPerm& perms, Inode **inode);
int ll_lookup_vino(vinodeno_t vino, const UserPerm& perms, Inode **inode);
int ll_lookupx(Inode *parent, const char *name, Inode **out,
struct ceph_statx *stx, unsigned want, unsigned flags,
const UserPerm& perms);
@ -664,7 +665,6 @@ public:
void wait_sync_caps(ceph_tid_t want);
void queue_cap_snap(Inode *in, SnapContext &old_snapc);
void finish_cap_snap(Inode *in, CapSnap &capsnap, int used);
void _flushed_cap_snap(Inode *in, snapid_t seq);
void _schedule_invalidate_dentry_callback(Dentry *dn, bool del);
void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name);
@ -1012,6 +1012,7 @@ private:
static const VXattr _common_vxattrs[];
bool is_reserved_vino(vinodeno_t &vino);
void fill_dirent(struct dirent *de, const char *name, int type, uint64_t ino, loff_t next_off);
@ -1182,7 +1183,7 @@ private:
int _ll_getattr(Inode *in, int caps, const UserPerm& perms);
int _lookup_parent(Inode *in, const UserPerm& perms, Inode **parent=NULL);
int _lookup_name(Inode *in, Inode *parent, const UserPerm& perms);
int _lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
int _lookup_vino(vinodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
bool _ll_forget(Inode *in, uint64_t count);

View File

@ -163,7 +163,7 @@ struct Inode {
version_t inline_version;
bufferlist inline_data;
bool is_root() const { return ino == MDS_INO_ROOT; }
bool is_root() const { return ino == CEPH_INO_ROOT; }
bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; }
bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; }
bool is_file() const { return (mode & S_IFMT) == S_IFREG; }

View File

@ -86,6 +86,10 @@ if (WITH_RADOSGW)
otp/cls_otp_types.cc
)
add_library(cls_otp_client STATIC ${cls_otp_client_srcs})
if (WITH_BOOST_CONTEXT)
target_include_directories(cls_otp_client PRIVATE
$<TARGET_PROPERTY:spawn,INTERFACE_INCLUDE_DIRECTORIES>)
endif()
endif (WITH_RADOSGW)
# cls_refcount

View File

@ -35,6 +35,9 @@ static std::string bucket_index_prefixes[] = { "", /* special handling for the o
/* this must be the last index */
"9999_",};
static const std::string BI_PREFIX_END = string(1, BI_PREFIX_CHAR) +
bucket_index_prefixes[BI_BUCKET_LAST_INDEX];
static bool bi_is_objs_index(const string& s) {
return ((unsigned char)s[0] != BI_PREFIX_CHAR);
}
@ -2322,29 +2325,29 @@ static int rgw_bi_put_op(cls_method_context_t hctx, bufferlist *in, bufferlist *
return 0;
}
static int list_plain_entries(cls_method_context_t hctx, const string& name, const string& marker, uint32_t max,
list<rgw_cls_bi_entry> *entries, bool *pmore)
static int list_plain_entries(cls_method_context_t hctx,
const string& filter,
const string& start_after_key,
const string& end_key,
uint32_t max,
list<rgw_cls_bi_entry> *entries,
bool *end_key_reached,
bool *pmore)
{
string filter = name;
string start_key = marker;
string end_key; // stop listing at bi_log_prefix
bi_log_prefix(end_key);
int count = 0;
map<string, bufferlist> keys;
int ret = cls_cxx_map_get_vals(hctx, start_key, filter, max, &keys, pmore);
int ret = cls_cxx_map_get_vals(hctx, start_after_key, filter, max, &keys,
pmore);
if (ret < 0) {
return ret;
}
map<string, bufferlist>::iterator iter;
for (iter = keys.begin(); iter != keys.end(); ++iter) {
if (iter->first >= end_key) {
/* past the end of plain namespace */
if (pmore) {
*pmore = false;
}
*end_key_reached = false;
for (auto iter = keys.begin(); iter != keys.end(); ++iter) {
if (!end_key.empty() && iter->first >= end_key) {
*end_key_reached = true;
*pmore = true;
return count;
}
@ -2363,13 +2366,12 @@ static int list_plain_entries(cls_method_context_t hctx, const string& name, con
return -EIO;
}
CLS_LOG(20, "%s(): entry.idx=%s e.key.name=%s", __func__, escape_str(entry.idx).c_str(), escape_str(e.key.name).c_str());
CLS_LOG(20, "%s(): entry.idx=%s e.key.name=%s", __func__,
escape_str(entry.idx).c_str(), escape_str(e.key.name).c_str());
if (!name.empty() && e.key.name != name) {
if (!filter.empty() && e.key.name != filter) {
/* we are skipping the rest of the entries */
if (pmore) {
*pmore = false;
}
*pmore = false;
return count;
}
@ -2378,12 +2380,54 @@ static int list_plain_entries(cls_method_context_t hctx, const string& name, con
if (count >= (int)max) {
return count;
}
start_key = entry.idx;
}
return count;
}
static int list_plain_entries(cls_method_context_t hctx,
const string& name,
const string& marker,
uint32_t max,
list<rgw_cls_bi_entry> *entries,
bool *pmore) {
string start_after_key = marker;
string end_key;
bi_log_prefix(end_key);
int r;
bool end_key_reached;
bool more;
if (start_after_key < end_key) {
// listing ascii plain namespace
int r = list_plain_entries(hctx, name, start_after_key, end_key, max,
entries, &end_key_reached, &more);
if (r < 0) {
return r;
}
if (r >= (int)max || !end_key_reached || !more) {
if (pmore) {
*pmore = more;
}
return r;
}
start_after_key = BI_PREFIX_END;
max = max - r;
}
// listing non-ascii plain namespace
r = list_plain_entries(hctx, name, start_after_key, {}, max, entries,
&end_key_reached, &more);
if (r < 0) {
return r;
}
if (pmore) {
*pmore = more;
}
return r;
}
static int list_instance_entries(cls_method_context_t hctx, const string& name, const string& marker, uint32_t max,
list<rgw_cls_bi_entry> *entries, bool *pmore)
{

View File

@ -5,8 +5,7 @@
#define CEPH_ASYNC_OP_TRACKER_H
#include "common/ceph_mutex.h"
struct Context;
#include "include/Context.h"
class AsyncOpTracker {
public:
@ -27,4 +26,23 @@ private:
};
class C_TrackedOp : public Context {
public:
C_TrackedOp(AsyncOpTracker& async_op_tracker, Context* on_finish)
: m_async_op_tracker(async_op_tracker), m_on_finish(on_finish) {
m_async_op_tracker.start_op();
}
void finish(int r) override {
if (m_on_finish != nullptr) {
m_on_finish->complete(r);
}
m_async_op_tracker.finish_op();
}
private:
AsyncOpTracker& m_async_op_tracker;
Context* m_on_finish;
};
#endif // CEPH_ASYNC_OP_TRACKER_H

View File

@ -168,7 +168,7 @@ elseif(HAVE_ARMV8_CRC)
crc32c_aarch64.c)
endif(HAVE_INTEL)
add_library(crc32 ${crc32_srcs})
add_library(crc32 STATIC ${crc32_srcs})
if(HAVE_ARMV8_CRC)
set_target_properties(crc32 PROPERTIES
COMPILE_FLAGS "${CMAKE_C_FLAGS} ${ARMV8_CRC_COMPILE_FLAGS}")

View File

@ -145,6 +145,14 @@ LogClientTemp::~LogClientTemp()
parent.do_log(type, ss);
}
void LogChannel::set_log_to_monitors(bool v)
{
if (log_to_monitors != v) {
parent->reset();
log_to_monitors = v;
}
}
void LogChannel::update_config(map<string,string> &log_to_monitors,
map<string,string> &log_to_syslog,
map<string,string> &log_channels,
@ -342,6 +350,15 @@ version_t LogClient::queue(LogEntry &entry)
return entry.seq;
}
void LogClient::reset()
{
std::lock_guard l(log_lock);
if (log_queue.size()) {
log_queue.clear();
}
last_log_sent = last_log;
}
uint64_t LogClient::get_next_seq()
{
std::lock_guard l(log_lock);

View File

@ -134,9 +134,7 @@ public:
do_log(CLOG_SEC, s);
}
void set_log_to_monitors(bool v) {
log_to_monitors = v;
}
void set_log_to_monitors(bool v);
void set_log_to_syslog(bool v) {
log_to_syslog = v;
}
@ -253,6 +251,7 @@ public:
const EntityName& get_myname();
entity_name_t get_myrank();
version_t queue(LogEntry &entry);
void reset();
private:
Message *_get_mon_log_message();

View File

@ -22,31 +22,28 @@
#ifndef HAVE_BOOST_CONTEXT
// hide the dependencies on boost::context and boost::coroutines
namespace boost::asio {
// hide the dependency on boost::context
namespace spawn {
struct yield_context;
}
#else // HAVE_BOOST_CONTEXT
#ifndef BOOST_COROUTINES_NO_DEPRECATION_WARNING
#define BOOST_COROUTINES_NO_DEPRECATION_WARNING
#endif
#include <boost/asio/spawn.hpp>
#include <spawn/spawn.hpp>
#endif // HAVE_BOOST_CONTEXT
/// optional-like wrapper for a boost::asio::yield_context and its associated
/// optional-like wrapper for a spawn::yield_context and its associated
/// boost::asio::io_context. operations that take an optional_yield argument
/// will, when passed a non-empty yield context, suspend this coroutine instead
/// of the blocking the thread of execution
class optional_yield {
boost::asio::io_context *c = nullptr;
boost::asio::yield_context *y = nullptr;
spawn::yield_context *y = nullptr;
public:
/// construct with a valid io and yield_context
explicit optional_yield(boost::asio::io_context& c,
boost::asio::yield_context& y) noexcept
spawn::yield_context& y) noexcept
: c(&c), y(&y) {}
/// type tag to construct an empty object
@ -60,7 +57,7 @@ class optional_yield {
boost::asio::io_context& get_io_context() const noexcept { return *c; }
/// return a reference to the yield_context. only valid if non-empty
boost::asio::yield_context& get_yield_context() const noexcept { return *y; }
spawn::yield_context& get_yield_context() const noexcept { return *y; }
};
// type tag object to construct an empty optional_yield

View File

@ -108,8 +108,8 @@ static ceph::spinlock debug_lock;
static raw_combined *create(unsigned len,
unsigned align,
int mempool = mempool::mempool_buffer_anon) {
if (!align)
align = sizeof(size_t);
// posix_memalign() requires a multiple of sizeof(void *)
align = std::max<unsigned>(align, sizeof(void *));
size_t rawlen = round_up_to(sizeof(buffer::raw_combined),
alignof(buffer::raw_combined));
size_t datalen = round_up_to(len, alignof(buffer::raw_combined));
@ -169,8 +169,8 @@ static ceph::spinlock debug_lock;
MEMPOOL_CLASS_HELPERS();
raw_posix_aligned(unsigned l, unsigned _align) : raw(l) {
align = _align;
ceph_assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
// posix_memalign() requires a multiple of sizeof(void *)
align = std::max<unsigned>(_align, sizeof(void *));
#ifdef DARWIN
data = (char *) valloc(len);
#else

View File

@ -651,9 +651,6 @@ int md_config_t::parse_argv(ConfigValues& values,
else if (ceph_argparse_flag(args, i, "--no-mon-config", (char*)NULL)) {
values.no_mon_config = true;
}
else if (ceph_argparse_flag(args, i, "--log-early", (char*)NULL)) {
values.log_early = true;
}
else if (ceph_argparse_flag(args, i, "--mon-config", (char*)NULL)) {
values.no_mon_config = false;
}

View File

@ -28,7 +28,6 @@ public:
string cluster;
ceph::logging::SubsystemMap subsys;
bool no_mon_config = false;
bool log_early = false;
// Set of configuration options that have changed since the last
// apply_changes
using changed_set_t = std::set<std::string>;

View File

@ -3,7 +3,6 @@
#include <ifaddrs.h>
#include <stdlib.h>
#include <string.h>
#include <boost/algorithm/string/predicate.hpp>
#if defined(__FreeBSD__)
#include <sys/types.h>
#include <sys/socket.h>
@ -29,54 +28,23 @@ void netmask_ipv4(const struct in_addr *addr,
out->s_addr = addr->s_addr & mask;
}
static bool match_numa_node(const string& if_name, int numa_node)
bool matches_ipv4_in_subnet(const struct ifaddrs& addrs,
const struct sockaddr_in* net,
unsigned int prefix_len)
{
#ifdef WITH_SEASTAR
return true;
#else
int if_node = -1;
int r = get_iface_numa_node(if_name, &if_node);
if (r < 0) {
if (addrs.ifa_addr == nullptr)
return false;
}
return if_node == numa_node;
#endif
}
const struct ifaddrs *find_ipv4_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr_in *net,
unsigned int prefix_len,
int numa_node) {
struct in_addr want, temp;
if (addrs.ifa_addr->sa_family != net->sin_family)
return false;
struct in_addr want;
netmask_ipv4(&net->sin_addr, prefix_len, &want);
for (; addrs != NULL; addrs = addrs->ifa_next) {
if (addrs->ifa_addr == NULL)
continue;
if (strcmp(addrs->ifa_name, "lo") == 0 || boost::starts_with(addrs->ifa_name, "lo:"))
continue;
if (numa_node >= 0 && !match_numa_node(addrs->ifa_name, numa_node))
continue;
if (addrs->ifa_addr->sa_family != net->sin_family)
continue;
struct in_addr *cur = &((struct sockaddr_in*)addrs->ifa_addr)->sin_addr;
netmask_ipv4(cur, prefix_len, &temp);
if (temp.s_addr == want.s_addr) {
return addrs;
}
}
return NULL;
struct in_addr *cur = &((struct sockaddr_in*)addrs.ifa_addr)->sin_addr;
struct in_addr temp;
netmask_ipv4(cur, prefix_len, &temp);
return temp.s_addr == want.s_addr;
}
void netmask_ipv6(const struct in6_addr *addr,
unsigned int prefix_len,
struct in6_addr *out) {
@ -90,59 +58,25 @@ void netmask_ipv6(const struct in6_addr *addr,
memset(out->s6_addr+prefix_len/8+1, 0, 16-prefix_len/8-1);
}
bool matches_ipv6_in_subnet(const struct ifaddrs& addrs,
const struct sockaddr_in6* net,
unsigned int prefix_len)
{
if (addrs.ifa_addr == nullptr)
return false;
const struct ifaddrs *find_ipv6_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr_in6 *net,
unsigned int prefix_len,
int numa_node) {
struct in6_addr want, temp;
if (addrs.ifa_addr->sa_family != net->sin6_family)
return false;
struct in6_addr want;
netmask_ipv6(&net->sin6_addr, prefix_len, &want);
for (; addrs != NULL; addrs = addrs->ifa_next) {
if (addrs->ifa_addr == NULL)
continue;
if (strcmp(addrs->ifa_name, "lo") == 0 || boost::starts_with(addrs->ifa_name, "lo:"))
continue;
if (numa_node >= 0 && !match_numa_node(addrs->ifa_name, numa_node))
continue;
if (addrs->ifa_addr->sa_family != net->sin6_family)
continue;
struct in6_addr *cur = &((struct sockaddr_in6*)addrs->ifa_addr)->sin6_addr;
if (IN6_IS_ADDR_LINKLOCAL(cur))
continue;
netmask_ipv6(cur, prefix_len, &temp);
if (IN6_ARE_ADDR_EQUAL(&temp, &want))
return addrs;
}
return NULL;
struct in6_addr temp;
struct in6_addr *cur = &((struct sockaddr_in6*)addrs.ifa_addr)->sin6_addr;
if (IN6_IS_ADDR_LINKLOCAL(cur))
return false;
netmask_ipv6(cur, prefix_len, &temp);
return IN6_ARE_ADDR_EQUAL(&temp, &want);
}
const struct ifaddrs *find_ip_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr *net,
unsigned int prefix_len,
int numa_node) {
switch (net->sa_family) {
case AF_INET:
return find_ipv4_in_subnet(addrs, (struct sockaddr_in*)net, prefix_len,
numa_node);
case AF_INET6:
return find_ipv6_in_subnet(addrs, (struct sockaddr_in6*)net, prefix_len,
numa_node);
}
return NULL;
}
bool parse_network(const char *s, struct sockaddr_storage *network, unsigned int *prefix_len) {
char *slash = strchr((char*)s, '/');
if (!slash) {

View File

@ -801,6 +801,7 @@ OPTION(osd_op_history_slow_op_threshold, OPT_DOUBLE) // track the op if over thi
OPTION(osd_target_transaction_size, OPT_INT) // to adjust various transactions that batch smaller items
OPTION(osd_failsafe_full_ratio, OPT_FLOAT) // what % full makes an OSD "full" (failsafe)
OPTION(osd_fast_shutdown, OPT_BOOL)
OPTION(osd_fast_shutdown_notify_mon, OPT_BOOL) // tell mon the OSD is shutting down on osd_fast_shutdown
OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL) // immediately mark OSDs as down once they refuse to accept connections
OPTION(osd_pg_object_context_cache_count, OPT_INT)

View File

@ -2198,16 +2198,23 @@ std::vector<Option> get_global_options() {
.add_service("mon")
.set_description(""),
Option("paxos_service_trim_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
Option("paxos_service_trim_min", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(250)
.add_service("mon")
.set_description(""),
Option("paxos_service_trim_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
Option("paxos_service_trim_max", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(500)
.add_service("mon")
.set_description(""),
Option("paxos_service_trim_max_multiplier", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(20)
.set_min(0)
.add_service("mon")
.set_description("factor by which paxos_service_trim_max will be multiplied to get a new upper bound when trim sizes are high (0 disables it)")
.set_flag(Option::FLAG_RUNTIME),
Option("paxos_kill_at", Option::TYPE_INT, Option::LEVEL_DEV)
.set_default(0)
.add_service("mon")
@ -3931,6 +3938,12 @@ std::vector<Option> get_global_options() {
.set_description("Fast, immediate shutdown")
.set_long_description("Setting this to false makes the OSD do a slower teardown of all state when it receives a SIGINT or SIGTERM or when shutting down for any other reason. That slow shutdown is primarilyy useful for doing memory leak checking with valgrind."),
Option("osd_fast_shutdown_notify_mon", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(false)
.set_description("Tell mon about OSD shutdown on immediate shutdown")
.set_long_description("Tell the monitor the OSD is shutting down on immediate shutdown. This helps with cluster log messages from other OSDs reporting it immediately failed.")
.add_see_also({"osd_fast_shutdown", "osd_mon_shutdown_timeout"}),
Option("osd_fast_fail_on_connection_refused", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.set_description(""),
@ -4418,9 +4431,9 @@ std::vector<Option> get_global_options() {
.set_description(""),
Option("bluefs_buffered_io", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(false)
.set_default(true)
.set_description("Enabled buffered IO for bluefs reads.")
.set_long_description("When this option is enabled, bluefs will in some cases perform buffered reads. This allows the kernel page cache to act as a secondary cache for things like RocksDB compaction. For example, if the rocksdb block cache isn't large enough to hold blocks from the compressed SST files itself, they can be read from page cache instead of from the disk. This option previously was enabled by default, however in some test cases it appears to cause excessive swap utilization by the linux kernel and a large negative performance impact after several hours of run time. Please exercise caution when enabling."),
.set_long_description("When this option is enabled, bluefs will in some cases perform buffered reads. This allows the kernel page cache to act as a secondary cache for things like RocksDB compaction. For example, if the rocksdb block cache isn't large enough to hold blocks from the compressed SST files itself, they can be read from page cache instead of from the disk."),
Option("bluefs_sync_write", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(false)
@ -4744,7 +4757,7 @@ std::vector<Option> get_global_options() {
.set_description("How frequently we trim the bluestore cache"),
Option("bluestore_cache_trim_max_skip_pinned", Option::TYPE_UINT, Option::LEVEL_DEV)
.set_default(64)
.set_default(1000)
.set_description("Max pinned cache entries we consider before giving up"),
Option("bluestore_cache_type", Option::TYPE_STR, Option::LEVEL_DEV)

View File

@ -14,6 +14,7 @@
#include "common/pick_address.h"
#include "include/ipaddr.h"
#include "include/scope_guard.h"
#include "include/str_list.h"
#include "common/ceph_context.h"
#ifndef WITH_SEASTAR
@ -25,12 +26,111 @@
#include "common/numa.h"
#include <netdb.h>
#include <net/if.h>
#include <netinet/in.h>
#include <string>
#include <string.h>
#include <vector>
#define dout_subsys ceph_subsys_
namespace {
bool matches_with_name(const ifaddrs& ifa, const std::string& if_name)
{
return if_name.compare(ifa.ifa_name) == 0;
}
static int is_loopback_addr(sockaddr* addr)
{
if (addr->sa_family == AF_INET) {
const sockaddr_in* sin = (struct sockaddr_in *)(addr);
const in_addr_t net = ntohl(sin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT;
return net == IN_LOOPBACKNET ? 1 : 0;
} else if (addr->sa_family == AF_INET6) {
sockaddr_in6* sin6 = (struct sockaddr_in6 *)(addr);
return IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) ? 1 : 0;
} else {
return -1;
}
}
static int grade_addr(const ifaddrs& ifa)
{
if (ifa.ifa_addr == nullptr) {
return -1;
}
int score = 0;
if (ifa.ifa_flags & IFF_UP) {
score += 4;
}
switch (is_loopback_addr(ifa.ifa_addr)) {
case 0:
// prefer non-loopback addresses
score += 2;
break;
case 1:
score += 0;
break;
default:
score = -1;
break;
}
return score;
}
bool matches_with_net(const ifaddrs& ifa,
const sockaddr* net,
unsigned int prefix_len,
unsigned ipv)
{
switch (net->sa_family) {
case AF_INET:
if (ipv & CEPH_PICK_ADDRESS_IPV4) {
return matches_ipv4_in_subnet(ifa, (struct sockaddr_in*)net, prefix_len);
}
break;
case AF_INET6:
if (ipv & CEPH_PICK_ADDRESS_IPV6) {
return matches_ipv6_in_subnet(ifa, (struct sockaddr_in6*)net, prefix_len);
}
break;
}
return false;
}
bool matches_with_net(CephContext *cct,
const ifaddrs& ifa,
const std::string& s,
unsigned ipv)
{
struct sockaddr_storage net;
unsigned int prefix_len;
if (!parse_network(s.c_str(), &net, &prefix_len)) {
lderr(cct) << "unable to parse network: " << s << dendl;
exit(1);
}
return matches_with_net(ifa, (sockaddr*)&net, prefix_len, ipv);
}
int grade_with_numa_node(const ifaddrs& ifa, int numa_node)
{
#if defined(WITH_SEASTAR) || defined(_WIN32)
return 0;
#else
if (numa_node < 0) {
return 0;
}
int if_node = -1;
int r = get_iface_numa_node(ifa.ifa_name, &if_node);
if (r < 0) {
return 0;
}
return if_node == numa_node ? 1 : 0;
#endif
}
}
const struct sockaddr *find_ip_in_subnet_list(
CephContext *cct,
const struct ifaddrs *ifa,
@ -39,86 +139,41 @@ const struct sockaddr *find_ip_in_subnet_list(
const std::string &interfaces,
int numa_node)
{
std::list<string> nets;
get_str_list(networks, nets);
std::list<string> ifs;
get_str_list(interfaces, ifs);
// filter interfaces by name
const struct ifaddrs *filtered = nullptr;
if (ifs.empty()) {
filtered = ifa;
} else {
if (nets.empty()) {
const auto ifs = get_str_list(interfaces);
const auto nets = get_str_list(networks);
if (!ifs.empty() && nets.empty()) {
lderr(cct) << "interface names specified but not network names" << dendl;
exit(1);
}
const struct ifaddrs *t = ifa;
struct ifaddrs *head = 0;
while (t) {
bool match = false;
for (auto& i : ifs) {
if (strcmp(i.c_str(), t->ifa_name) == 0) {
match = true;
break;
}
}
if (match) {
struct ifaddrs *n = new ifaddrs;
memcpy(n, t, sizeof(*t));
n->ifa_next = head;
head = n;
}
t = t->ifa_next;
}
if (!head) {
lderr(cct) << "no interfaces matching " << ifs << dendl;
exit(1);
}
filtered = head;
}
struct sockaddr *r = nullptr;
for (auto& s : nets) {
struct sockaddr_storage net;
unsigned int prefix_len;
if (!parse_network(s.c_str(), &net, &prefix_len)) {
lderr(cct) << "unable to parse network: " << s << dendl;
exit(1);
int best_score = 0;
const sockaddr* best_addr = nullptr;
for (const auto* addr = ifa; addr != nullptr; addr = addr->ifa_next) {
if (!ifs.empty() &&
std::none_of(std::begin(ifs), std::end(ifs),
[&](const auto& if_name) {
return matches_with_name(*addr, if_name);
})) {
continue;
}
switch (net.ss_family) {
case AF_INET:
if (!(ipv & CEPH_PICK_ADDRESS_IPV4)) {
continue;
}
break;
case AF_INET6:
if (!(ipv & CEPH_PICK_ADDRESS_IPV6)) {
continue;
}
break;
if (!nets.empty() &&
std::none_of(std::begin(nets), std::end(nets),
[&](const auto& net) {
return matches_with_net(cct, *addr, net, ipv);
})) {
continue;
}
const struct ifaddrs *found = find_ip_in_subnet(
filtered,
(struct sockaddr *) &net, prefix_len, numa_node);
if (found) {
r = found->ifa_addr;
break;
int score = grade_addr(*addr);
if (score < 0) {
continue;
}
score += grade_with_numa_node(*addr, numa_node);
if (score > best_score) {
best_score = score;
best_addr = addr->ifa_addr;
}
}
if (filtered != ifa) {
while (filtered) {
struct ifaddrs *t = filtered->ifa_next;
delete filtered;
filtered = t;
}
}
return r;
return best_addr;
}
#ifndef WITH_SEASTAR
@ -141,8 +196,8 @@ struct Observer : public md_config_obs_t {
static void fill_in_one_address(CephContext *cct,
const struct ifaddrs *ifa,
const string networks,
const string interfaces,
const string &networks,
const string &interfaces,
const char *conf_var,
int numa_node = -1)
{
@ -187,8 +242,6 @@ static void fill_in_one_address(CephContext *cct,
void pick_addresses(CephContext *cct, int needs)
{
struct ifaddrs *ifa;
int r = getifaddrs(&ifa);
auto public_addr = cct->_conf.get_val<entity_addr_t>("public_addr");
auto public_network = cct->_conf.get_val<std::string>("public_network");
auto public_network_interface =
@ -198,33 +251,33 @@ void pick_addresses(CephContext *cct, int needs)
auto cluster_network_interface =
cct->_conf.get_val<std::string>("cluster_network_interface");
struct ifaddrs *ifa;
int r = getifaddrs(&ifa);
if (r < 0) {
string err = cpp_strerror(errno);
lderr(cct) << "unable to fetch interfaces and addresses: " << err << dendl;
exit(1);
}
auto free_ifa = make_scope_guard([ifa] { freeifaddrs(ifa); });
if ((needs & CEPH_PICK_ADDRESS_PUBLIC) &&
public_addr.is_blank_ip() && !public_network.empty()) {
fill_in_one_address(cct, ifa, public_network, public_network_interface,
"public_addr");
"public_addr");
}
if ((needs & CEPH_PICK_ADDRESS_CLUSTER) && cluster_addr.is_blank_ip()) {
if (!cluster_network.empty()) {
fill_in_one_address(cct, ifa, cluster_network, cluster_network_interface,
"cluster_addr");
"cluster_addr");
} else {
if (!public_network.empty()) {
lderr(cct) << "Public network was set, but cluster network was not set " << dendl;
lderr(cct) << " Using public network also for cluster network" << dendl;
fill_in_one_address(cct, ifa, public_network, public_network_interface,
"cluster_addr");
"cluster_addr");
}
}
}
freeifaddrs(ifa);
}
#endif // !WITH_SEASTAR
@ -232,13 +285,15 @@ static int fill_in_one_address(
CephContext *cct,
const struct ifaddrs *ifa,
unsigned ipv,
const string networks,
const string interfaces,
const string &networks,
const string &interfaces,
entity_addrvec_t *addrs,
int numa_node = -1)
{
const struct sockaddr *found = find_ip_in_subnet_list(cct, ifa, ipv, networks,
interfaces, numa_node);
const struct sockaddr *found = find_ip_in_subnet_list(cct, ifa, ipv,
networks,
interfaces,
numa_node);
if (!found) {
std::string ip_type = "";
if ((ipv & CEPH_PICK_ADDRESS_IPV4) && (ipv & CEPH_PICK_ADDRESS_IPV6)) {
@ -352,33 +407,29 @@ int pick_addresses(
!networks.empty()) {
int ipv4_r = !(ipv & CEPH_PICK_ADDRESS_IPV4) ? 0 : -1;
int ipv6_r = !(ipv & CEPH_PICK_ADDRESS_IPV6) ? 0 : -1;
// first try on preferred numa node (if >= 0), then anywhere.
while (true) {
// note: pass in ipv to filter the matching addresses
if ((ipv & CEPH_PICK_ADDRESS_IPV4) &&
(flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) {
ipv4_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV4,
networks, interfaces, addrs,
preferred_numa_node);
}
if (ipv & CEPH_PICK_ADDRESS_IPV6) {
ipv6_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV6,
networks, interfaces, addrs,
preferred_numa_node);
}
if ((ipv & CEPH_PICK_ADDRESS_IPV4) &&
!(flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) {
ipv4_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV4,
networks, interfaces, addrs,
preferred_numa_node);
}
if (ipv4_r >= 0 && ipv6_r >= 0) {
break;
}
if (preferred_numa_node < 0) {
return ipv4_r >= 0 && ipv6_r >= 0 ? 0 : -1;
}
preferred_numa_node = -1; // try any numa node
// note: pass in ipv to filter the matching addresses
if ((ipv & CEPH_PICK_ADDRESS_IPV4) &&
(flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) {
ipv4_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV4,
networks, interfaces,
addrs,
preferred_numa_node);
}
if (ipv & CEPH_PICK_ADDRESS_IPV6) {
ipv6_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV6,
networks, interfaces,
addrs,
preferred_numa_node);
}
if ((ipv & CEPH_PICK_ADDRESS_IPV4) &&
!(flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) {
ipv4_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV4,
networks, interfaces,
addrs,
preferred_numa_node);
}
if (ipv4_r < 0 || ipv6_r < 0) {
return -1;
}
}
@ -461,20 +512,15 @@ std::string pick_iface(CephContext *cct, const struct sockaddr_storage &network)
lderr(cct) << "unable to fetch interfaces and addresses: " << err << dendl;
return {};
}
const unsigned int prefix_len = max(sizeof(in_addr::s_addr), sizeof(in6_addr::s6_addr)) * CHAR_BIT;
const struct ifaddrs *found = find_ip_in_subnet(
ifa,
(const struct sockaddr *) &network, prefix_len);
std::string result;
if (found) {
result = found->ifa_name;
auto free_ifa = make_scope_guard([ifa] { freeifaddrs(ifa); });
const unsigned int prefix_len = std::max(sizeof(in_addr::s_addr), sizeof(in6_addr::s6_addr)) * CHAR_BIT;
for (auto addr = ifa; addr != nullptr; addr = addr->ifa_next) {
if (matches_with_net(*ifa, (const struct sockaddr *) &network, prefix_len,
CEPH_PICK_ADDRESS_IPV4 | CEPH_PICK_ADDRESS_IPV6)) {
return addr->ifa_name;
}
}
freeifaddrs(ifa);
return result;
return {};
}
@ -486,8 +532,8 @@ bool have_local_addr(CephContext *cct, const list<entity_addr_t>& ls, entity_add
lderr(cct) << "unable to fetch interfaces and addresses: " << cpp_strerror(errno) << dendl;
exit(1);
}
auto free_ifa = make_scope_guard([ifa] { freeifaddrs(ifa); });
bool found = false;
for (struct ifaddrs *addrs = ifa; addrs != nullptr; addrs = addrs->ifa_next) {
if (addrs->ifa_addr) {
entity_addr_t a;
@ -495,16 +541,12 @@ bool have_local_addr(CephContext *cct, const list<entity_addr_t>& ls, entity_add
for (auto& p : ls) {
if (a.is_same_host(p)) {
*match = p;
found = true;
goto out;
return true;
}
}
}
}
out:
freeifaddrs(ifa);
return found;
return false;
}
int get_iface_numa_node(

View File

@ -73,6 +73,20 @@ std::string pick_iface(CephContext *cct, const struct sockaddr_storage &network)
*/
bool have_local_addr(CephContext *cct, const std::list<entity_addr_t>& ls, entity_addr_t *match);
/**
* filter the addresses in @c ifa with specified interfaces, networks and IPv
*
* @param cct
* @param ifa a list of network interface addresses to be filtered
* @param ipv bitmask of CEPH_PICK_ADDRESS_IPV4 and CEPH_PICK_ADDRESS_IPV6.
* it is used to filter the @c networks
* @param networks a comma separated list of networks as the allow list. only
* the addresses in the specified networks are allowed. all addresses
* are accepted if it is empty.
* @param interfaces a comma separated list of interfaces for the allow list.
* all addresses are accepted if it is empty
* @param exclude_lo_iface filter out network interface named "lo"
*/
const struct sockaddr *find_ip_in_subnet_list(
CephContext *cct,
const struct ifaddrs *ifa,

View File

@ -1,39 +1,18 @@
# zstd
# libzstd - build it statically
set(ZSTD_C_FLAGS "-fPIC -Wno-unused-variable -O3")
option(WITH_SYSTEM_ZSTD "use prebuilt libzstd in system" OFF)
include(ExternalProject)
ExternalProject_Add(zstd_ext
SOURCE_DIR ${CMAKE_SOURCE_DIR}/src/zstd/build/cmake
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_C_FLAGS=${ZSTD_C_FLAGS}
-DCMAKE_AR=${CMAKE_AR}
-DCMAKE_POSITION_INDEPENDENT_CODE=${ENABLE_SHARED}
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/libzstd
BUILD_COMMAND $(MAKE) libzstd_static
INSTALL_COMMAND "true")
# force zstd make to be called on each time
ExternalProject_Add_Step(zstd_ext forcebuild
DEPENDEES configure
DEPENDERS build
COMMAND "true"
ALWAYS 1)
add_library(zstd STATIC IMPORTED)
set_target_properties(zstd PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/src/zstd/lib"
IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/libzstd/lib/libzstd.a")
add_dependencies(zstd zstd_ext)
if(WITH_SYSTEM_ZSTD)
find_package(Zstd 1.4.4 REQUIRED)
else()
include(BuildZstd)
build_Zstd()
endif()
set(zstd_sources
CompressionPluginZstd.cc
)
CompressionPluginZstd.cc)
add_library(ceph_zstd SHARED ${zstd_sources})
target_link_libraries(ceph_zstd PRIVATE zstd)
target_link_libraries(ceph_zstd PRIVATE Zstd::Zstd)
set_target_properties(ceph_zstd PROPERTIES
VERSION 2.0.0
SOVERSION 2

View File

@ -46,7 +46,7 @@ class ZstdCompressor : public Compressor {
inbuf.size = p.get_ptr_and_advance(left, (const char**)&inbuf.src);
left -= inbuf.size;
ZSTD_EndDirective const zed = (left==0) ? ZSTD_e_end : ZSTD_e_continue;
size_t r = ZSTD_compress_generic(s, &outbuf, &inbuf, zed);
size_t r = ZSTD_compressStream2(s, &outbuf, &inbuf, zed);
if (ZSTD_isError(r)) {
return -EINVAL;
}

View File

@ -119,6 +119,5 @@ int CrushLocation::init_on_startup()
loc.clear();
loc.insert(make_pair<std::string,std::string>("host", hostname));
loc.insert(make_pair<std::string,std::string>("root", "default"));
lgeneric_dout(cct, 10) << "crush_location is (default) " << loc << dendl;
return 0;
}

View File

@ -1376,6 +1376,12 @@ int CrushWrapper::update_item(
<< ((float)old_iweight/(float)0x10000) << " -> " << weight
<< dendl;
adjust_item_weight_in_loc(cct, item, iweight, loc);
ret = rebuild_roots_with_classes(cct);
if (ret < 0) {
ldout(cct, 0) << __func__ << " unable to rebuild roots with classes: "
<< cpp_strerror(ret) << dendl;
return ret;
}
ret = 1;
}
if (get_item_name(item) != name) {
@ -1559,6 +1565,12 @@ int CrushWrapper::adjust_subtree_weight(CephContext *cct, int id, int weight,
}
}
}
int ret = rebuild_roots_with_classes(cct);
if (ret < 0) {
ldout(cct, 0) << __func__ << " unable to rebuild roots with classes: "
<< cpp_strerror(ret) << dendl;
return ret;
}
return changed;
}

View File

@ -145,11 +145,6 @@ void global_pre_init(
// command line (as passed by caller)
conf.parse_argv(args);
if (conf->log_early &&
!cct->_log->is_started()) {
cct->_log->start();
}
if (!cct->_log->is_started()) {
cct->_log->start();
}

View File

@ -20,9 +20,9 @@
typedef void (*signal_handler_t)(int);
#ifdef HAVE_SIGDESCR_NP
#if defined(HAVE_SIGDESCR_NP)
# define sig_str(signum) sigdescr_np(signum)
#elif HAVE_REENTRANT_STRSIGNAL
#elif defined(HAVE_REENTRANT_STRSIGNAL)
# define sig_str(signum) strsignal(signum)
#else
# define sig_str(signum) sys_siglist[signum]

View File

@ -47,9 +47,15 @@
#define CEPH_MONC_PROTOCOL 15 /* server/client */
#define CEPH_INO_ROOT 1
#define CEPH_INO_CEPH 2 /* hidden .ceph dir */
#define CEPH_INO_LOST_AND_FOUND 4 /* reserved ino for use in recovery */
#define CEPH_INO_ROOT 1
/*
* hidden .ceph dir, which is no longer created but
* recognised in existing filesystems so that we
* don't try to fragment it.
*/
#define CEPH_INO_CEPH 2
#define CEPH_INO_GLOBAL_SNAPREALM 3
#define CEPH_INO_LOST_AND_FOUND 4 /* reserved ino for use in recovery */
/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
#define CEPH_MAX_MON 31

Some files were not shown because too many files have changed in this diff Show More