update sources to v12.2.0

This commit is contained in:
Fabian Grünbichler 2017-08-30 08:42:40 +02:00
parent 913cc16a67
commit b5b8bbf502
118 changed files with 2665 additions and 757 deletions

View File

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 2.8.11)
project(ceph)
set(VERSION 12.1.4)
set(VERSION 12.2.0)
if(POLICY CMP0046)
# Tweak policies (this one disables "missing" dependency warning)
@ -274,7 +274,7 @@ find_package(snappy REQUIRED)
option(WITH_LZ4 "LZ4 compression support" OFF)
if(WITH_LZ4)
find_package(LZ4 REQUIRED)
find_package(LZ4 1.7 REQUIRED)
set(HAVE_LZ4 ${LZ4_FOUND})
endif(WITH_LZ4)

View File

@ -1,7 +1,7 @@
# Contributor: John Coyle <dx9err@gmail.com>
# Maintainer: John Coyle <dx9err@gmail.com>
pkgname=ceph
pkgver=12.1.4
pkgver=12.2.0
pkgrel=0
pkgdesc="Ceph is a distributed object store and file system"
pkgusers="ceph"
@ -63,7 +63,7 @@ makedepends="
xmlstarlet
yasm
"
source="ceph-12.1.4.tar.bz2"
source="ceph-12.2.0.tar.bz2"
subpackages="
$pkgname-base
$pkgname-common
@ -116,7 +116,7 @@ _sysconfdir=/etc
_udevrulesdir=/etc/udev/rules.d
_python_sitelib=/usr/lib/python2.7/site-packages
builddir=$srcdir/ceph-12.1.4
builddir=$srcdir/ceph-12.2.0
build() {
export CEPH_BUILD_VIRTUALENV=$builddir

View File

@ -61,7 +61,7 @@
# main package definition
#################################################################################
Name: ceph
Version: 12.1.4
Version: 12.2.0
Release: 0%{?dist}
%if 0%{?fedora} || 0%{?rhel}
Epoch: 2
@ -76,7 +76,7 @@ License: LGPL-2.1 and CC-BY-SA-1.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and
Group: System/Filesystems
%endif
URL: http://ceph.com/
Source0: http://ceph.com/download/ceph-12.1.4.tar.bz2
Source0: http://ceph.com/download/ceph-12.2.0.tar.bz2
%if 0%{?suse_version}
%if 0%{?is_opensuse}
ExclusiveArch: x86_64 aarch64 ppc64 ppc64le
@ -772,7 +772,7 @@ python-rbd, python-rgw or python-cephfs instead.
# common
#################################################################################
%prep
%autosetup -p1 -n ceph-12.1.4
%autosetup -p1 -n ceph-12.2.0
%build
%if 0%{with cephfs_java}
@ -975,6 +975,8 @@ rm -rf %{buildroot}
%{_mandir}/man8/ceph-detect-init.8*
%{_mandir}/man8/ceph-create-keys.8*
%{_mandir}/man8/ceph-disk.8*
%{_mandir}/man8/ceph-volume.8*
%{_mandir}/man8/ceph-volume-systemd.8*
%{_mandir}/man8/ceph-run.8*
%{_mandir}/man8/crushtool.8*
%{_mandir}/man8/osdmaptool.8*

View File

@ -975,6 +975,8 @@ rm -rf %{buildroot}
%{_mandir}/man8/ceph-detect-init.8*
%{_mandir}/man8/ceph-create-keys.8*
%{_mandir}/man8/ceph-disk.8*
%{_mandir}/man8/ceph-volume.8*
%{_mandir}/man8/ceph-volume-systemd.8*
%{_mandir}/man8/ceph-run.8*
%{_mandir}/man8/crushtool.8*
%{_mandir}/man8/osdmaptool.8*

View File

@ -5,11 +5,30 @@
# LZ4_FOUND
# LZ4_INCLUDE_DIR
# LZ4_LIBRARY
# LZ4_VERSION_STRING
# LZ4_VERSION_MAJOR
# LZ4_VERSION_MINOR
# LZ4_VERSION_RELEASE
find_path(LZ4_INCLUDE_DIR NAMES lz4.h)
if(LZ4_INCLUDE_DIR AND EXISTS "${LZ4_INCLUDE_DIR}/lz4.h")
foreach(ver "MAJOR" "MINOR" "RELEASE")
file(STRINGS "${LZ4_INCLUDE_DIR}/lz4.h" LZ4_VER_${ver}_LINE
REGEX "^#define[ \t]+LZ4_VERSION_${ver}[ \t]+[0-9]+[ \t]+.*$")
string(REGEX REPLACE "^#define[ \t]+LZ4_VERSION_${ver}[ \t]+([0-9]+)[ \t]+.*$"
"\\1" LZ4_VERSION_${ver} "${LZ4_VER_${ver}_LINE}")
unset(${LZ4_VER_${ver}_LINE})
endforeach()
set(LZ4_VERSION_STRING
"${LZ4_VERSION_MAJOR}.${LZ4_VERSION_MINOR}.${LZ4_VERSION_RELEASE}")
endif()
find_library(LZ4_LIBRARY NAMES lz4)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(LZ4 DEFAULT_MSG LZ4_LIBRARY LZ4_INCLUDE_DIR)
find_package_handle_standard_args(LZ4
REQUIRED_VARS LZ4_LIBRARY LZ4_INCLUDE_DIR
VERSION_VAR LZ4_VERSION_STRING)
mark_as_advanced(LZ4_INCLUDE_DIR LZ4_LIBRARY)

View File

@ -11,8 +11,11 @@ usr/lib/libosd_tp.so*
usr/lib/python*/dist-packages/ceph_disk*
usr/sbin/ceph-disk
usr/sbin/ceph-volume
usr/sbin/ceph-volume-systemd
usr/lib/python*/dist-packages/ceph_volume/*
usr/lib/python*/dist-packages/ceph_volume-*
usr/share/man/man8/ceph-clsinfo.8
usr/share/man/man8/ceph-disk.8
usr/share/man/man8/ceph-volume.8
usr/share/man/man8/ceph-volume-systemd.8
usr/share/man/man8/ceph-osd.8

View File

@ -1,3 +1,9 @@
ceph (12.2.0-1) stable; urgency=medium
* New upstream release
-- Ceph Release Team <ceph-maintainers@ceph.com> Mon, 28 Aug 2017 16:30:16 +0000
ceph (12.1.4-1) stable; urgency=medium
* New upstream release

View File

@ -0,0 +1,30 @@
.. _ceph-volume:
ceph-volume
===========
Deploy OSDs with different device technologies like lvm or physical disks using
pluggable tools (:doc:`lvm/index` itself is treated like a plugin). It tries to
follow the workflow of ``ceph-disk`` for deploying OSDs, with a predictable,
and robust way of preparing, activating, and starting OSDs.
:ref:`Overview <ceph-volume-overview>` |
:ref:`Plugin Guide <ceph-volume-plugins>` |
**Command Line Subcommands**
Although currently there is support for ``lvm``, the plan is to support other
technologies, including plain disks.
* :ref:`ceph-volume-lvm`
.. toctree::
:hidden:
:maxdepth: 3
:caption: Contents:
intro
lvm/index
lvm/activate
lvm/prepare
lvm/scan
lvm/systemd

View File

@ -0,0 +1,19 @@
.. _ceph-volume-overview:
Overview
--------
The ``ceph-volume`` tool aims to be a single purpose command line tool to deploy
logical volumes as OSDs, trying to maintain a similar API to ``ceph-disk`` when
preparing, activating, and creating OSDs.
It deviates from ``ceph-disk`` by not interacting or relying on the udev rules
that come installed for Ceph. These rules allow automatic detection of
previously setup devices that are in turn fed into ``ceph-disk`` to activate
them.
``ceph-volume lvm``
-------------------
By making use of :term:`LVM tags`, the :ref:`ceph-volume-lvm` sub-command is
able to store and later re-discover and query devices associated with OSDs so
that they can later activated.

View File

@ -0,0 +1,74 @@
.. _ceph-volume-lvm-activate:
``activate``
============
Once :ref:`ceph-volume-lvm-prepare` is completed, and all the various steps
that entails are done, the volume is ready to get "activated".
This activation process enables a systemd unit that persists the OSD ID and its
UUID (also called ``fsid`` in Ceph CLI tools), so that at boot time it can
understand what OSD is enabled and needs to be mounted.
.. note:: The execution of this call is fully idempotent, and there is no
side-effects when running multiple times
New OSDs
--------
To activate newly prepared OSDs both the :term:`OSD id` and :term:`OSD uuid`
need to be supplied. For example::
ceph-volume activate --filestore 0 0263644D-0BF1-4D6D-BC34-28BD98AE3BC8
.. note:: The UUID is stored in the ``osd_fsid`` file in the OSD path, which is
generated when :ref:`ceph-volume-lvm-prepare` is used.
requiring uuids
^^^^^^^^^^^^^^^
The :term:`OSD uuid` is being required as an extra step to ensure that the
right OSD is being activated. It is entirely possible that a previous OSD with
the same id exists and would end up activating the incorrect one.
Discovery
---------
With either existing OSDs or new ones being activated, a *discovery* process is
performed using :term:`LVM tags` to enable the systemd units.
The systemd unit will capture the :term:`OSD id` and :term:`OSD uuid` and
persist it. Internally, the activation will enable it like::
systemctl enable ceph-volume@$id-$uuid-lvm
For example::
systemctl enable ceph-volume@0-8715BEB4-15C5-49DE-BA6F-401086EC7B41-lvm
Would start the discovery process for the OSD with an id of ``0`` and a UUID of
``8715BEB4-15C5-49DE-BA6F-401086EC7B41``.
.. note:: for more details on the systemd workflow see :ref:`ceph-volume-systemd`
The systemd unit will look for the matching OSD device, and by looking at its
:term:`LVM tags` will proceed to:
# mount the device in the corresponding location (by convention this is
``/var/lib/ceph/osd/<cluster name>-<osd id>/``)
# ensure that all required devices are ready for that OSD
# start the ``ceph-osd@0`` systemd unit
Existing OSDs
-------------
For exsiting OSDs that have been deployed with different tooling, the only way
to port them over to the new mechanism is to prepare them again (losing data).
See :ref:`ceph-volume-lvm-existing-osds` for details on how to proceed.
Summary
-------
To recap the ``activate`` process:
#. require both :term:`OSD id` and :term:`OSD uuid`
#. enable the system unit with matching id and uuid
#. the systemd unit will ensure all devices are ready and mounted (if needed)
#. the matching ``ceph-osd`` systemd unit will get started

View File

@ -0,0 +1,24 @@
.. _ceph-volume-lvm:
``lvm``
=======
Implements the functionality needed to deploy OSDs from the ``lvm`` subcommand:
``ceph-volume lvm``
**Command Line Subcommands**
* :ref:`ceph-volume-lvm-prepare`
* :ref:`ceph-volume-lvm-activate`
.. not yet implemented
.. * :ref:`ceph-volume-lvm-scan`
**Internal functionality**
There are other aspects of the ``lvm`` subcommand that are internal and not
exposed to the user, these sections explain how these pieces work together,
clarifying the workflows of the tool.
:ref:`Systemd Units <ceph-volume-systemd>` |
:ref:`lvm <ceph-volume-lvm-api>`

View File

@ -0,0 +1,145 @@
.. _ceph-volume-lvm-prepare:
``prepare``
===========
This subcommand allows a :term:`filestore` setup (:term:`bluestore` support is
planned) and currently consumes only logical volumes for both the data and
journal. It will not create or modify the logical volumes except for adding
extra metadata.
.. note:: This is part of a two step process to deploy an OSD. If looking for
a single-call way, please see :ref:`ceph-volume-lvm-create`
To help identify volumes, the process of preparing a volume (or volumes) to
work with Ceph, the tool will assign a few pieces of metadata information using
:term:`LVM tags`.
:term:`LVM tags` makes volumes easy to discover later, and help identify them as
part of a Ceph system, and what role they have (journal, filestore, bluestore,
etc...)
Although initially :term:`filestore` is supported (and supported by default)
the back end can be specified with:
* :ref:`--filestore <ceph-volume-lvm-prepare_filestore>`
* ``--bluestore``
.. when available, this will need to be updated to:
.. * :ref:`--bluestore <ceph-volume-prepare_bluestore>`
.. _ceph-volume-lvm-prepare_filestore:
``filestore``
-------------
This is the default OSD backend and allows preparation of logical volumes for
a :term:`filestore` OSD.
The process is *very* strict, it requires two logical volumes that are ready to
be used. No special preparation is needed for these volumes other than
following the minimum size requirements for data and journal.
The API call looks like::
ceph-volume prepare --filestore --data data --journal journal
The journal *must* be a logical volume, just like the data volume, and that
argument is always required even if both live under the same group.
A generated uuid is used to ask the cluster for a new OSD. These two pieces are
crucial for identifying an OSD and will later be used throughout the
:ref:`ceph-volume-lvm-activate` process.
The OSD data directory is created using the following convention::
/var/lib/ceph/osd/<cluster name>-<osd id>
At this point the data volume is mounted at this location, and the journal
volume is linked::
ln -s /path/to/journal /var/lib/ceph/osd/<cluster_name>-<osd-id>/journal
The monmap is fetched using the bootstrap key from the OSD::
/usr/bin/ceph --cluster ceph --name client.bootstrap-osd
--keyring /var/lib/ceph/bootstrap-osd/ceph.keyring
mon getmap -o /var/lib/ceph/osd/<cluster name>-<osd id>/activate.monmap
``ceph-osd`` will be called to populate the OSD directory, that is already
mounted, re-using all the pieces of information from the initial steps::
ceph-osd --cluster ceph --mkfs --mkkey -i <osd id> \
--monmap /var/lib/ceph/osd/<cluster name>-<osd id>/activate.monmap --osd-data \
/var/lib/ceph/osd/<cluster name>-<osd id> --osd-journal /var/lib/ceph/osd/<cluster name>-<osd id>/journal \
--osd-uuid <osd uuid> --keyring /var/lib/ceph/osd/<cluster name>-<osd id>/keyring \
--setuser ceph --setgroup ceph
.. _ceph-volume-lvm-existing-osds:
Existing OSDs
-------------
For existing clusters that want to use this new system and have OSDs that are
already running there are a few things to take into account:
.. warning:: this process will forcefully format the data device, destroying
existing data, if any.
* OSD paths should follow this convention::
/var/lib/ceph/osd/<cluster name>-<osd id>
* Preferably, no other mechanisms to mount the volume should exist, and should
be removed (like fstab mount points)
* There is currently no support for encrypted volumes
The one time process for an existing OSD, with an ID of 0 and
using a ``"ceph"`` cluster name would look like::
ceph-volume lvm prepare --filestore --osd-id 0 --osd-fsid E3D291C1-E7BF-4984-9794-B60D9FA139CB
The command line tool will not contact the monitor to generate an OSD ID and
will format the LVM device in addition to storing the metadata on it so that it
can later be startednot contact the monitor to generate an OSD ID and will
format the LVM device in addition to storing the metadata on it so that it can
later be started (for detailed metadata description see :ref:`ceph-volume-lvm-tags`).
.. _ceph-volume-lvm-prepare_bluestore:
``bluestore``
-------------
This subcommand is planned but not currently implemented.
Storing metadata
----------------
The following tags will get applied as part of the prepartion process
regardless of the type of volume (journal or data) and also regardless of the
OSD backend:
* ``cluster_fsid``
* ``data_device``
* ``journal_device``
* ``encrypted``
* ``osd_fsid``
* ``osd_id``
* ``block``
* ``db``
* ``wal``
* ``lockbox_device``
.. note:: For the complete lvm tag conventions see :ref:`ceph-volume-lvm-tag-api`
Summary
-------
To recap the ``prepare`` process:
#. Accept only logical volumes for data and journal (both required)
#. Generate a UUID for the OSD
#. Ask the monitor get an OSD ID reusing the generated UUID
#. OSD data directory is created and data volume mounted
#. Journal is symlinked from data volume to journal location
#. monmap is fetched for activation
#. devices is mounted and data directory is populated by ``ceph-osd``
#. data and journal volumes are assigned all the Ceph metadata using lvm tags

View File

@ -0,0 +1,9 @@
scan
====
This sub-command will allow to discover Ceph volumes previously setup by the
tool by looking into the system's logical volumes and their tags.
As part of the the :ref:`ceph-volume-lvm-prepare` process, the logical volumes are assigned
a few tags with important pieces of information.
.. note:: This sub-command is not yet implemented

View File

@ -0,0 +1,46 @@
.. _ceph-volume-systemd:
systemd
=======
As part of the :ref:`ceph-volume-lvm-activate` process, a few systemd units will get enabled
that will use the OSD id and uuid as part of their name. These units will be
run when the system boots, and will proceed to activate their corresponding
volumes.
The API for activation requires both the :term:`OSD id` and :term:`OSD uuid`,
which get persisted by systemd. Internally, the activation process enables the
systemd unit using the following convention::
ceph-volume@<type>-<extra metadata>
Where ``type`` is the sub-command used to parse the extra metadata, and ``extra
metadata`` is any additional information needed by the sub-command to be able
to activate the OSD. For example an OSD with an ID of 0, for the ``lvm``
sub-command would look like::
systemctl enable ceph-volume@lvm-0-0A3E1ED2-DA8A-4F0E-AA95-61DEC71768D6
Process
-------
The systemd unit is a :term:`systemd oneshot` service, meant to start at boot after the
local filesystem is ready to be used.
Upon startup, it will identify the logical volume using :term:`LVM tags`,
finding a matching ID and later ensuring it is the right one with
the :term:`OSD uuid`.
After identifying the correct volume it will then proceed to mount it by using
the OSD destination conventions, that is::
/var/lib/ceph/osd/<cluster name>-<osd id>
For our example OSD with an id of ``0``, that means the identified device will
be mounted at::
/var/lib/ceph/osd/ceph-0
Once that process is complete, a call will be made to start the OSD::
systemctl start ceph-osd@0

View File

@ -18,6 +18,8 @@ if tags.has('man'):
'install/*',
'mon/*',
'rados/*',
'mgr/*',
'ceph-volume/*',
'radosgw/*',
'rbd/*',
'start/*']

View File

@ -0,0 +1,13 @@
===================================
ceph-volume developer documentation
===================================
.. rubric:: Contents
.. toctree::
:maxdepth: 1
plugins
lvm
systemd

View File

@ -0,0 +1,127 @@
.. _ceph-volume-lvm-api:
LVM
===
The backend of ``ceph-volume lvm`` is LVM, it relies heavily on the usage of
tags, which is a way for LVM to allow extending its volume metadata. These
values can later be queried against devices and it is how they get discovered
later.
.. warning:: These APIs are not meant to be public, but are documented so that
it is clear what the tool is doing behind the scenes. Do not alter
any of these values.
.. _ceph-volume-lvm-tag-api:
Tag API
-------
The process of identifying logical volumes as part of Ceph relies on applying
tags on all volumes. It follows a naming convention for the namespace that
looks like::
ceph.<tag name>=<tag value>
All tags are prefixed by the ``ceph`` keyword do claim ownership of that
namespace and make it easily identifiable. This is how the OSD ID would be used
in the context of lvm tags::
ceph.osd_id=0
.. _ceph-volume-lvm-tags:
Metadata
--------
The following describes all the metadata from Ceph OSDs that is stored on an
LVM volume:
``type``
--------
Describes if the device is a an OSD or Journal, with the ability to expand to
other types when supported (for example a lockbox)
Example::
ceph.type=osd
``cluster_fsid``
----------------
Example::
ceph.cluster_fsid=7146B649-AE00-4157-9F5D-1DBFF1D52C26
``data_device``
---------------
Example::
ceph.data_device=/dev/ceph/data-0
``journal_device``
------------------
Example::
ceph.journal_device=/dev/ceph/journal-0
``encrypted``
-------------
Example for enabled encryption with ``luks``::
ceph.encrypted=luks
For plain dmcrypt::
ceph.encrypted=dmcrypt
For disabled encryption::
ceph.encrypted=0
``osd_fsid``
------------
Example::
ceph.osd_fsid=88ab9018-f84b-4d62-90b4-ce7c076728ff
``osd_id``
----------
Example::
ceph.osd_id=1
``block``
---------
Just used on :term:`bluestore` backends.
Example::
ceph.block=/dev/mapper/vg-block-0
``db``
------
Just used on :term:`bluestore` backends.
Example::
ceph.db=/dev/mapper/vg-db-0
``wal``
-------
Just used on :term:`bluestore` backends.
Example::
ceph.wal=/dev/mapper/vg-wal-0
``lockbox_device``
------------------
Only used when encryption is enabled, to store keys in an unencrypted
volume.
Example::
ceph.lockbox_device=/dev/mapper/vg-lockbox-0

View File

@ -0,0 +1,65 @@
.. _ceph-volume-plugins:
Plugins
=======
``ceph-volume`` started initially to provide support for using ``lvm`` as
the underlying system for an OSD. It is included as part of the tool but it is
treated like a plugin.
This modularity, allows for other device or device-like technologies to be able
to consume and re-use the utilities and workflows provided.
Adding Plugins
--------------
As a Python tool, plugins ``setuptools`` entry points. For a new plugin to be
available, it should have an entry similar to this in its ``setup.py`` file:
.. code-block:: python
setup(
...
entry_points = dict(
ceph_volume_handlers = [
'my_command = my_package.my_module:MyClass',
],
),
The ``MyClass`` should be a class that accepts ``sys.argv`` as its argument,
``ceph-volume`` will pass that in at instantiation and call them ``main``
method.
This is how a plugin for ``ZFS`` could look like for example:
.. code-block:: python
class ZFS(object):
help_menu = 'Deploy OSDs with ZFS'
_help = """
Use ZFS as the underlying technology for OSDs
--verbose Increase the verbosity level
"""
def __init__(self, argv):
self.argv = argv
def main(self):
parser = argparse.ArgumentParser()
args = parser.parse_args(self.argv)
...
And its entry point (via ``setuptools``) in ``setup.py`` would looke like:
.. code-block:: python
entry_points = {
'ceph_volume_handlers': [
'zfs = ceph_volume_zfs.zfs:ZFS',
],
},
After installation, the ``zfs`` subcommand would be listed and could be used
as::
ceph-volume zfs

View File

@ -0,0 +1,37 @@
.. _ceph-volume-systemd-api:
systemd
=======
The workflow to *"activate"* an OSD is by relying on systemd unit files and its
ability to persist information as a suffix to the instance name.
``ceph-volume`` exposes the following convention for unit files::
ceph-volume@<sub command>-<extra metadata>
For example, this is how enabling an OSD could look like for the
:ref:`ceph-volume-lvm` sub command::
systemctl enable ceph-volume@lvm-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41
These 3 pieces of persisted information are needed by the sub-command so that
it understands what OSD it needs to activate.
Since ``lvm`` is not the only subcommand that will be supported, this
is how it will allow other device types to be defined.
At some point for example, for plain disks, it could be::
systemctl enable ceph-volume@disk-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41
At startup, the systemd unit will execute a helper script that will parse the
suffix and will end up calling ``ceph-volume`` back. Using the previous
example for lvm, that call will look like::
ceph-volume lvm activate 0 8715BEB4-15C5-49DE-BA6F-401086EC7B41
.. warning:: These workflows are not meant to be public, but are documented so that
it is clear what the tool is doing behind the scenes. Do not alter
any of these values.

View File

@ -39,3 +39,4 @@ in the body of the message.
osd_internals/index*
mds_internals/index*
radosgw/index*
ceph-volume/index*

View File

@ -4,7 +4,7 @@
Ceph is growing rapidly. As firms deploy Ceph, the technical terms such as
"RADOS", "RBD," "RGW" and so forth require corresponding marketing terms
that explain what each component does. The terms in this glossary are
that explain what each component does. The terms in this glossary are
intended to complement the existing technical terminology.
Sometimes more than one term applies to a definition. Generally, the first
@ -12,21 +12,21 @@ term reflects a term consistent with Ceph's marketing, and secondary terms
reflect either technical terms or legacy ways of referring to Ceph systems.
.. glossary::
.. glossary::
Ceph Project
The aggregate term for the people, software, mission and infrastructure
The aggregate term for the people, software, mission and infrastructure
of Ceph.
cephx
The Ceph authentication protocol. Cephx operates like Kerberos, but it
has no single point of failure.
Ceph
Ceph Platform
All Ceph software, which includes any piece of code hosted at
All Ceph software, which includes any piece of code hosted at
`http://github.com/ceph`_.
Ceph System
Ceph Stack
A collection of two or more components of Ceph.
@ -35,7 +35,7 @@ reflect either technical terms or legacy ways of referring to Ceph systems.
Node
Host
Any single machine or server in a Ceph System.
Ceph Storage Cluster
Ceph Object Store
RADOS
@ -45,7 +45,7 @@ reflect either technical terms or legacy ways of referring to Ceph systems.
Ceph Cluster Map
cluster map
The set of maps comprising the monitor map, OSD map, PG map, MDS map and
The set of maps comprising the monitor map, OSD map, PG map, MDS map and
CRUSH map. See `Cluster Map`_ for details.
Ceph Object Storage
@ -56,13 +56,13 @@ reflect either technical terms or legacy ways of referring to Ceph systems.
RADOS Gateway
RGW
The S3/Swift gateway component of Ceph.
Ceph Block Device
RBD
The block storage component of Ceph.
Ceph Block Storage
The block storage "product," service or capabilities when used in
The block storage "product," service or capabilities when used in
conjunction with ``librbd``, a hypervisor such as QEMU or Xen, and a
hypervisor abstraction layer such as ``libvirt``.
@ -73,7 +73,7 @@ reflect either technical terms or legacy ways of referring to Ceph systems.
Cloud Platforms
Cloud Stacks
Third party cloud provisioning platforms such as OpenStack, CloudStack,
Third party cloud provisioning platforms such as OpenStack, CloudStack,
OpenNebula, ProxMox, etc.
Object Storage Device
@ -82,7 +82,7 @@ reflect either technical terms or legacy ways of referring to Ceph systems.
Sometimes, Ceph users use the
term "OSD" to refer to :term:`Ceph OSD Daemon`, though the
proper term is "Ceph OSD".
Ceph OSD Daemon
Ceph OSD Daemons
Ceph OSD
@ -90,7 +90,29 @@ reflect either technical terms or legacy ways of referring to Ceph systems.
disk (:term:`OSD`). Sometimes, Ceph users use the
term "OSD" to refer to "Ceph OSD Daemon", though the
proper term is "Ceph OSD".
OSD id
The integer that defines an OSD. It is generated by the monitors as part
of the creation of a new OSD.
OSD fsid
This is a unique identifier used to further improve the uniqueness of an
OSD and it is found in the OSD path in a file called ``osd_fsid``. This
``fsid`` term is used interchangeably with ``uuid``
OSD uuid
Just like the OSD fsid, this is the OSD unique identifer and is used
interchangeably with ``fsid``
bluestore
OSD BlueStore is a new back end for OSD daemons (kraken and newer
versions). Unlike :term:`filestore` it stores objects directly on the
Ceph block devices without any file system interface.
filestore
A back end for OSD daemons, where a Journal is needed and files are
written to the filesystem.
Ceph Monitor
MON
The Ceph monitor software.
@ -106,22 +128,22 @@ reflect either technical terms or legacy ways of referring to Ceph systems.
Ceph Clients
Ceph Client
The collection of Ceph components which can access a Ceph Storage
Cluster. These include the Ceph Object Gateway, the Ceph Block Device,
the Ceph Filesystem, and their corresponding libraries, kernel modules,
The collection of Ceph components which can access a Ceph Storage
Cluster. These include the Ceph Object Gateway, the Ceph Block Device,
the Ceph Filesystem, and their corresponding libraries, kernel modules,
and FUSEs.
Ceph Kernel Modules
The collection of kernel modules which can be used to interact with the
The collection of kernel modules which can be used to interact with the
Ceph System (e.g,. ``ceph.ko``, ``rbd.ko``).
Ceph Client Libraries
The collection of libraries that can be used to interact with components
The collection of libraries that can be used to interact with components
of the Ceph System.
Ceph Release
Any distinct numbered version of Ceph.
Ceph Point Release
Any ad-hoc release that includes only bug or security fixes.
@ -130,11 +152,11 @@ reflect either technical terms or legacy ways of referring to Ceph systems.
testing, but may contain new features.
Ceph Release Candidate
A major version of Ceph that has undergone initial quality assurance
A major version of Ceph that has undergone initial quality assurance
testing and is ready for beta testers.
Ceph Stable Release
A major version of Ceph where all features from the preceding interim
A major version of Ceph where all features from the preceding interim
releases have been put through quality assurance testing successfully.
Ceph Test Framework
@ -144,7 +166,7 @@ reflect either technical terms or legacy ways of referring to Ceph systems.
CRUSH
Controlled Replication Under Scalable Hashing. It is the algorithm
Ceph uses to compute object storage locations.
ruleset
A set of CRUSH data placement rules that applies to a particular pool(s).
@ -152,5 +174,14 @@ reflect either technical terms or legacy ways of referring to Ceph systems.
Pools
Pools are logical partitions for storing objects.
systemd oneshot
A systemd ``type`` where a command is defined in ``ExecStart`` which will
exit upon completion (it is not intended to daemonize)
LVM tags
Extensible metadata for LVM volumes and groups. It is used to store
Ceph-specific information about devices and its relationship with
OSDs.
.. _http://github.com/ceph: http://github.com/ceph
.. _Cluster Map: ../architecture#cluster-map

View File

@ -102,6 +102,7 @@ about Ceph, see our `Architecture`_ section.
api/index
architecture
Development <dev/index>
ceph-volume/index
release-notes
releases
Glossary <glossary>

View File

@ -12,12 +12,12 @@ default, so it's useful to know about them when setting up your cluster for
production.
Following the same configuration as `Installation (Quick)`_, we will set up a
cluster with ``node1`` as the monitor node, and ``node2`` and ``node3`` for
cluster with ``node1`` as the monitor node, and ``node2`` and ``node3`` for
OSD nodes.
.. ditaa::
.. ditaa::
/------------------\ /----------------\
| Admin Node | | node1 |
| +-------->+ |
@ -43,51 +43,51 @@ Monitor Bootstrapping
Bootstrapping a monitor (a Ceph Storage Cluster, in theory) requires
a number of things:
- **Unique Identifier:** The ``fsid`` is a unique identifier for the cluster,
and stands for File System ID from the days when the Ceph Storage Cluster was
principally for the Ceph Filesystem. Ceph now supports native interfaces,
block devices, and object storage gateway interfaces too, so ``fsid`` is a
- **Unique Identifier:** The ``fsid`` is a unique identifier for the cluster,
and stands for File System ID from the days when the Ceph Storage Cluster was
principally for the Ceph Filesystem. Ceph now supports native interfaces,
block devices, and object storage gateway interfaces too, so ``fsid`` is a
bit of a misnomer.
- **Cluster Name:** Ceph clusters have a cluster name, which is a simple string
without spaces. The default cluster name is ``ceph``, but you may specify
a different cluster name. Overriding the default cluster name is
especially useful when you are working with multiple clusters and you need to
clearly understand which cluster your are working with.
For example, when you run multiple clusters in a `federated architecture`_,
a different cluster name. Overriding the default cluster name is
especially useful when you are working with multiple clusters and you need to
clearly understand which cluster your are working with.
For example, when you run multiple clusters in a `federated architecture`_,
the cluster name (e.g., ``us-west``, ``us-east``) identifies the cluster for
the current CLI session. **Note:** To identify the cluster name on the
command line interface, specify the Ceph configuration file with the
the current CLI session. **Note:** To identify the cluster name on the
command line interface, specify the Ceph configuration file with the
cluster name (e.g., ``ceph.conf``, ``us-west.conf``, ``us-east.conf``, etc.).
Also see CLI usage (``ceph --cluster {cluster-name}``).
- **Monitor Name:** Each monitor instance within a cluster has a unique name.
- **Monitor Name:** Each monitor instance within a cluster has a unique name.
In common practice, the Ceph Monitor name is the host name (we recommend one
Ceph Monitor per host, and no commingling of Ceph OSD Daemons with
Ceph Monitor per host, and no commingling of Ceph OSD Daemons with
Ceph Monitors). You may retrieve the short hostname with ``hostname -s``.
- **Monitor Map:** Bootstrapping the initial monitor(s) requires you to
generate a monitor map. The monitor map requires the ``fsid``, the cluster
- **Monitor Map:** Bootstrapping the initial monitor(s) requires you to
generate a monitor map. The monitor map requires the ``fsid``, the cluster
name (or uses the default), and at least one host name and its IP address.
- **Monitor Keyring**: Monitors communicate with each other via a
secret key. You must generate a keyring with a monitor secret and provide
- **Monitor Keyring**: Monitors communicate with each other via a
secret key. You must generate a keyring with a monitor secret and provide
it when bootstrapping the initial monitor(s).
- **Administrator Keyring**: To use the ``ceph`` CLI tools, you must have
a ``client.admin`` user. So you must generate the admin user and keyring,
and you must also add the ``client.admin`` user to the monitor keyring.
The foregoing requirements do not imply the creation of a Ceph Configuration
file. However, as a best practice, we recommend creating a Ceph configuration
The foregoing requirements do not imply the creation of a Ceph Configuration
file. However, as a best practice, we recommend creating a Ceph configuration
file and populating it with the ``fsid``, the ``mon initial members`` and the
``mon host`` settings.
You can get and set all of the monitor settings at runtime as well. However,
a Ceph Configuration file may contain only those settings that override the
a Ceph Configuration file may contain only those settings that override the
default values. When you add settings to a Ceph configuration file, these
settings override the default settings. Maintaining those settings in a
settings override the default settings. Maintaining those settings in a
Ceph configuration file makes it easier to maintain your cluster.
The procedure is as follows:
@ -97,52 +97,52 @@ The procedure is as follows:
ssh {hostname}
For example::
For example::
ssh node1
#. Ensure you have a directory for the Ceph configuration file. By default,
Ceph uses ``/etc/ceph``. When you install ``ceph``, the installer will
#. Ensure you have a directory for the Ceph configuration file. By default,
Ceph uses ``/etc/ceph``. When you install ``ceph``, the installer will
create the ``/etc/ceph`` directory automatically. ::
ls /etc/ceph
ls /etc/ceph
**Note:** Deployment tools may remove this directory when purging a
cluster (e.g., ``ceph-deploy purgedata {node-name}``, ``ceph-deploy purge
{node-name}``).
#. Create a Ceph configuration file. By default, Ceph uses
#. Create a Ceph configuration file. By default, Ceph uses
``ceph.conf``, where ``ceph`` reflects the cluster name. ::
sudo vim /etc/ceph/ceph.conf
#. Generate a unique ID (i.e., ``fsid``) for your cluster. ::
#. Generate a unique ID (i.e., ``fsid``) for your cluster. ::
uuidgen
#. Add the unique ID to your Ceph configuration file. ::
#. Add the unique ID to your Ceph configuration file. ::
fsid = {UUID}
For example::
For example::
fsid = a7f64266-0894-4f1e-a635-d0aeaca0e993
#. Add the initial monitor(s) to your Ceph configuration file. ::
#. Add the initial monitor(s) to your Ceph configuration file. ::
mon initial members = {hostname}[,{hostname}]
For example::
For example::
mon initial members = node1
#. Add the IP address(es) of the initial monitor(s) to your Ceph configuration
file and save the file. ::
#. Add the IP address(es) of the initial monitor(s) to your Ceph configuration
file and save the file. ::
mon host = {ip-address}[,{ip-address}]
@ -160,18 +160,18 @@ The procedure is as follows:
#. Generate an administrator keyring, generate a ``client.admin`` user and add
the user to the keyring. ::
the user to the keyring. ::
sudo ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'
#. Add the ``client.admin`` key to the ``ceph.mon.keyring``. ::
#. Add the ``client.admin`` key to the ``ceph.mon.keyring``. ::
ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring
#. Generate a monitor map using the hostname(s), host IP address(es) and the FSID.
Save it as ``/tmp/monmap``::
#. Generate a monitor map using the hostname(s), host IP address(es) and the FSID.
Save it as ``/tmp/monmap``::
monmaptool --create --add {hostname} {ip-address} --fsid {uuid} /tmp/monmap
@ -199,7 +199,7 @@ The procedure is as follows:
sudo -u ceph ceph-mon --mkfs -i node1 --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring
#. Consider settings for a Ceph configuration file. Common settings include
#. Consider settings for a Ceph configuration file. Common settings include
the following::
[global]
@ -215,7 +215,7 @@ The procedure is as follows:
osd pool default size = {n} # Write an object n times.
osd pool default min size = {n} # Allow writing n copy in a degraded state.
osd pool default pg num = {n}
osd pool default pgp num = {n}
osd pool default pgp num = {n}
osd crush chooseleaf type = {n}
In the foregoing example, the ``[global]`` section of the configuration might
@ -233,7 +233,7 @@ The procedure is as follows:
osd pool default size = 2
osd pool default min size = 1
osd pool default pg num = 333
osd pool default pgp num = 333
osd pool default pgp num = 333
osd crush chooseleaf type = 1
#. Touch the ``done`` file.
@ -271,13 +271,13 @@ The procedure is as follows:
0 data,1 metadata,2 rbd,
#. Verify that the monitor is running. ::
#. Verify that the monitor is running. ::
ceph -s
You should see output that the monitor you started is up and running, and
you should see a health error indicating that placement groups are stuck
inactive. It should look something like this::
inactive. It should look something like this::
cluster a7f64266-0894-4f1e-a635-d0aeaca0e993
health HEALTH_ERR 192 pgs stuck inactive; 192 pgs stuck unclean; no osds
@ -295,7 +295,7 @@ Manager daemon configuration
On each node where you run a ceph-mon daemon, you should also set up a ceph-mgr daemon.
See :doc:`../mgr/administrator`
See :ref:`mgr-administrator-guide`
Adding OSDs
===========
@ -304,7 +304,7 @@ Once you have your initial monitor(s) running, you should add OSDs. Your cluster
cannot reach an ``active + clean`` state until you have enough OSDs to handle the
number of copies of an object (e.g., ``osd pool default size = 2`` requires at
least two OSDs). After bootstrapping your monitor, your cluster has a default
CRUSH map; however, the CRUSH map doesn't have any Ceph OSD Daemons mapped to
CRUSH map; however, the CRUSH map doesn't have any Ceph OSD Daemons mapped to
a Ceph Node.
@ -314,7 +314,7 @@ Short Form
Ceph provides the ``ceph-disk`` utility, which can prepare a disk, partition or
directory for use with Ceph. The ``ceph-disk`` utility creates the OSD ID by
incrementing the index. Additionally, ``ceph-disk`` will add the new OSD to the
CRUSH map under the host for you. Execute ``ceph-disk -h`` for CLI details.
CRUSH map under the host for you. Execute ``ceph-disk -h`` for CLI details.
The ``ceph-disk`` utility automates the steps of the `Long Form`_ below. To
create the first two OSDs with the short form procedure, execute the following
on ``node2`` and ``node3``:
@ -335,7 +335,7 @@ on ``node2`` and ``node3``:
sudo ceph-disk activate {data-path} [--activate-key {path}]
For example::
For example::
sudo ceph-disk activate /dev/hdd1
@ -372,7 +372,7 @@ OSDs with the long form procedure, execute the following steps for each OSD.
``client.bootstrap-osd`` key is present on the machine. You may
alternatively execute this command as ``client.admin`` on a
different host where that key is present.::
ID=$(echo "{\"cephx_secret\": \"$OSD_SECRET\"}" | \
ceph osd new $UUID -i - \
-n client.bootstrap-osd -k /var/lib/ceph/bootstrap-osd/ceph.keyring)
@ -381,7 +381,7 @@ OSDs with the long form procedure, execute the following steps for each OSD.
mkdir /var/lib/ceph/osd/ceph-$ID
#. If the OSD is for a drive other than the OS drive, prepare it
#. If the OSD is for a drive other than the OS drive, prepare it
for use with Ceph, and mount it to the directory you just created. ::
mkfs.xfs /dev/{DEV}
@ -400,15 +400,15 @@ OSDs with the long form procedure, execute the following steps for each OSD.
chown -R ceph:ceph /var/lib/ceph/osd/ceph-$ID
#. After you add an OSD to Ceph, the OSD is in your configuration. However,
it is not yet running. You must start
#. After you add an OSD to Ceph, the OSD is in your configuration. However,
it is not yet running. You must start
your new OSD before it can begin receiving data.
For modern systemd distributions::
systemctl enable ceph-osd@$ID
systemctl start ceph-osd@$ID
For example::
systemctl enable ceph-osd@12
@ -427,11 +427,11 @@ In the below instructions, ``{id}`` is an arbitrary name, such as the hostname o
#. Create a keyring.::
ceph-authtool --create-keyring /var/lib/ceph/mds/{cluster-name}-{id}/keyring --gen-key -n mds.{id}
#. Import the keyring and set caps.::
ceph auth add mds.{id} osd "allow rwx" mds "allow" mon "allow profile mds" -i /var/lib/ceph/mds/{cluster}-{id}/keyring
#. Add to ceph.conf.::
[mds.{id}]
@ -458,24 +458,24 @@ Summary
=======
Once you have your monitor and two OSDs up and running, you can watch the
placement groups peer by executing the following::
placement groups peer by executing the following::
ceph -w
To view the tree, execute the following::
To view the tree, execute the following::
ceph osd tree
You should see output that looks something like this::
You should see output that looks something like this::
# id weight type name up/down reweight
-1 2 root default
-2 2 host node1
0 1 osd.0 up 1
-3 1 host node2
1 1 osd.1 up 1
1 1 osd.1 up 1
To add (or remove) additional monitors, see `Add/Remove Monitors`_.
To add (or remove) additional monitors, see `Add/Remove Monitors`_.
To add (or remove) additional Ceph OSD Daemons, see `Add/Remove OSDs`_.

View File

@ -23,6 +23,8 @@ set(osd_srcs
ceph-clsinfo.rst
ceph-detect-init.rst
ceph-disk.rst
ceph-volume.rst
ceph-volume-systemd.rst
ceph-osd.rst
osdmaptool.rst)

View File

@ -0,0 +1,56 @@
:orphan:
=======================================================
ceph-volume-systemd -- systemd ceph-volume helper tool
=======================================================
.. program:: ceph-volume-systemd
Synopsis
========
| **ceph-volume-systemd** *systemd instance name*
Description
===========
:program:`ceph-volume-systemd` is a systemd helper tool that receives input
from (dynamically created) systemd units so that activation of OSDs can
proceed.
It translates the input into a system call to ceph-volume for activation
purposes only.
Examples
========
Its input is the ``systemd instance name`` (represented by ``%i`` in a systemd
unit), and it should be in the following format::
<ceph-volume subcommand>-<extra metadata>
In the case of ``lvm`` a call could look like::
/usr/bin/ceph-volume-systemd lvm-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41
Which in turn will call ``ceph-volume`` in the following way::
ceph-volume lvm trigger 0-8715BEB4-15C5-49DE-BA6F-401086EC7B41
Any other subcommand will need to have implemented a ``trigger`` command that
can consume the extra metadata in this format.
Availability
============
:program:`ceph-volume-systemd` is part of Ceph, a massively scalable,
open-source, distributed storage system. Please refer to the documentation at
http://docs.ceph.com/ for more information.
See also
========
:doc:`ceph-osd <ceph-osd>`\(8),
:doc:`ceph-disk <ceph-volume>`\(8),

View File

@ -0,0 +1,122 @@
:orphan:
========================================
ceph-volume -- Ceph OSD deployment tool
========================================
.. program:: ceph-volume
Synopsis
========
| **ceph-volume** [-h] [--cluster CLUSTER] [--log-level LOG_LEVEL]
| [--log-path LOG_PATH]
| **ceph-volume** **lvm** [ *trigger* | *create* | *activate* | *prepare* ]
Description
===========
:program:`ceph-volume` is a single purpose command line tool to deploy logical
volumes as OSDs, trying to maintain a similar API to ``ceph-disk`` when
preparing, activating, and creating OSDs.
It deviates from ``ceph-disk`` by not interacting or relying on the udev rules
that come installed for Ceph. These rules allow automatic detection of
previously setup devices that are in turn fed into ``ceph-disk`` to activate
them.
Commands
========
lvm
---
By making use of LVM tags, the ``lvm`` sub-command is able to store and later
re-discover and query devices associated with OSDs so that they can later
activated.
Subcommands:
**activate**
Enables a systemd unit that persists the OSD ID and its UUID (also called
``fsid`` in Ceph CLI tools), so that at boot time it can understand what OSD is
enabled and needs to be mounted.
Usage::
ceph-volume lvm activate --filestore <osd id> <osd fsid>
Optional Arguments:
* [-h, --help] show the help message and exit
* [--bluestore] filestore objectstore (not yet implemented)
* [--filestore] filestore objectstore (current default)
**prepare**
Prepares a logical volume to be used as an OSD and journal using a ``filestore`` setup
(``bluestore`` support is planned). It will not create or modify the logical volumes
except for adding extra metadata.
Usage::
ceph-volume lvm prepare --filestore --data <data lv> --journal <journal device>
Optional arguments:
* [-h, --help] show the help message and exit
* [--journal JOURNAL] A logical group name, path to a logical volume, or path to a device
* [--journal-size GB] Size (in GB) A logical group name or a path to a logical volume
* [--bluestore] Use the bluestore objectstore (not currently supported)
* [--filestore] Use the filestore objectstore (currently the only supported object store)
* [--osd-id OSD_ID] Reuse an existing OSD id
* [--osd-fsid OSD_FSID] Reuse an existing OSD fsid
Required arguments:
* --data A logical group name or a path to a logical volume
**create**
Wraps the two-step process to provision a new osd (calling ``prepare`` first
and then ``activate``) into a single one. The reason to prefer ``prepare`` and
then ``activate`` is to gradually introduce new OSDs into a cluster, and
avoiding large amounts of data being rebalanced.
The single-call process unifies exactly what ``prepare`` and ``activate`` do,
with the convenience of doing it all at once. Flags and general usage are
equivalent to those of the ``prepare`` subcommand.
**trigger**
This subcommand is not meant to be used directly, and it is used by systemd so
that it proxies input to ``ceph-volume lvm activate`` by parsing the
input from systemd, detecting the UUID and ID associated with an OSD.
Usage::
ceph-volume lvm trigger <SYSTEMD-DATA>
The systemd "data" is expected to be in the format of::
<OSD ID>-<OSD UUID>
The lvs associated with the OSD need to have been prepared previously,
so that all needed tags and metadata exist.
Positional arguments:
* <SYSTEMD_DATA> Data from a systemd unit containing ID and UUID of the OSD.
Availability
============
:program:`ceph-volume` is part of Ceph, a massively scalable, open-source, distributed storage system. Please refer to
the documentation at http://docs.ceph.com/ for more information.
See also
========
:doc:`ceph-osd <ceph-osd>`\(8),
:doc:`ceph-disk <ceph-disk>`\(8),

View File

@ -560,17 +560,19 @@ Usage::
ceph osd create {<uuid>} {<id>}
Subcommand ``new`` reuses a previously destroyed OSD *id*. The new OSD will
have the specified *uuid*, and the command expects a JSON file containing
the base64 cephx key for auth entity *client.osd.<id>*, as well as optional
base64 cepx key for dm-crypt lockbox access and a dm-crypt key. Specifying
a dm-crypt requires specifying the accompanying lockbox cephx key.
Subcommand ``new`` can be used to create a new OSD or to recreate a previously
destroyed OSD with a specific *id*. The new OSD will have the specified *uuid*,
and the command expects a JSON file containing the base64 cephx key for auth
entity *client.osd.<id>*, as well as optional base64 cepx key for dm-crypt
lockbox access and a dm-crypt key. Specifying a dm-crypt requires specifying
the accompanying lockbox cephx key.
Usage::
ceph osd new {<id>} {<uuid>} -i {<secrets.json>}
The secrets JSON file is expected to maintain a form of the following format::
The secrets JSON file is optional but if provided, is expected to maintain
a form of the following format::
{
"cephx_secret": "AQBWtwhZdBO5ExAAIDyjK2Bh16ZXylmzgYYEjg=="

View File

@ -1,3 +1,4 @@
.. _mgr-administrator-guide:
ceph-mgr administrator's guide
==============================
@ -39,7 +40,7 @@ High availability
-----------------
In general, you should set up a ceph-mgr on each of the hosts
running a ceph-mon daemon to achieve the same level of availability.
running a ceph-mon daemon to achieve the same level of availability.
By default, whichever ceph-mgr instance comes up first will be made
active by the monitors, and the others will be standbys. There is

View File

@ -66,7 +66,10 @@
"ec_hash_error",
"ec_size_error",
"oi_attr_missing",
"oi_attr_corrupted"
"oi_attr_corrupted",
"obj_size_oi_mismatch",
"ss_attr_missing",
"ss_attr_corrupted"
]
},
"minItems": 0,
@ -104,6 +107,9 @@
"osd": {
"type": "integer"
},
"primary": {
"type": "boolean"
},
"size": {
"type": "integer"
},
@ -129,7 +135,10 @@
"ec_hash_error",
"ec_size_error",
"oi_attr_missing",
"oi_attr_corrupted"
"oi_attr_corrupted",
"obj_size_oi_mismatch",
"ss_attr_missing",
"ss_attr_corrupted"
]
},
"minItems": 0,
@ -164,6 +173,7 @@
},
"required": [
"osd",
"primary",
"errors"
]
}

View File

@ -6,6 +6,8 @@
:maxdepth: 1
../../man/8/ceph-disk.rst
../../man/8/ceph-volume.rst
../../man/8/ceph-volume-systemd.rst
../../man/8/ceph.rst
../../man/8/ceph-deploy.rst
../../man/8/ceph-rest-api.rst

View File

@ -36,6 +36,13 @@ Ceph clusters.
configuration file of the same name (e.g. /etc/ceph/remote.conf). See the
`ceph-conf`_ documentation for how to configure multiple clusters.
.. note:: Images in a given pool will be mirrored to a pool with the same name
on the remote cluster. Images using a separate data-pool will use a data-pool
with the same name on the remote cluster. E.g., if an image being mirrored is
in the ``rbd`` pool on the local cluster and using a data-pool called
``rbd-ec``, pools called ``rbd`` and ``rbd-ec`` must exist on the remote
cluster and will be used for mirroring the image.
Enable Mirroring
----------------

View File

@ -25,8 +25,8 @@
echo "Scheduling " $2 " branch"
if [ $2 = "master" ] ; then
# run master branch with --newest option looking for good sha1 7 builds back
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/28 --newest 7 -e $5 $6
# run master branch with --newest option looking for good sha1 7 builds back with /999 jobs
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/999 --newest 7 -e $5 $6
elif [ $2 = "hammer" ] ; then
# run hammer branch with less jobs
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/56 -e $5 $6
@ -34,11 +34,11 @@ elif [ $2 = "jewel" ] ; then
# run jewel branch with /40 jobs
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $5 $6
elif [ $2 = "kraken" ] ; then
# run kraken branch with /40 jobs
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $5 $6
# run kraken branch with /999 jobs
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/999 -e $5 $6
elif [ $2 = "luminous" ] ; then
# run luminous branch with /40 jobs
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $5 $6
# run luminous branch with /999 jobs
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/999 -e $5 $6
else
# run NON master branches without --newest
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/28 -e $5 $6

View File

@ -1,25 +1,123 @@
#!/bin/sh -ex
#!/usr/bin/env bash
set -e
if [ ! -e Makefile ]; then
if [ ! -e Makefile -o ! -d bin ]; then
echo 'run this from the build dir'
exit 1
fi
if [ ! -d /tmp/ceph-disk-virtualenv -o ! -d /tmp/ceph-detect-init-virtualenv ]; then
echo '/tmp/*-virtualenv directories not built. Please run "make check" first.'
exit 1
fi
if [ `uname` = FreeBSD ]; then
# otherwise module prettytable will not be found
export PYTHONPATH=/usr/local/lib/python2.7/site-packages
exec_mode=+111
KERNCORE="kern.corefile"
COREPATTERN="core.%N.%P"
else
export PYTHONPATH=/usr/lib/python2.7/dist-packages
exec_mode=/111
KERNCORE="kernel.core_pattern"
COREPATTERN="core.%e.%p.%t"
fi
for f in `find ../qa/standalone -perm $exec_mode -type f`
do
echo '--- $f ---'
PATH=$PATH:bin \
CEPH_ROOT=.. \
CEPH_LIB=lib \
$f || exit 1
done
function finish() {
if [ -n "$precore" ]; then
sudo sysctl -w ${KERNCORE}=${precore}
fi
exit 0
}
trap finish TERM HUP INT
PATH=$(pwd)/bin:$PATH
# TODO: Use getops
dryrun=false
if [[ "$1" = "--dry-run" ]]; then
dryrun=true
shift
fi
all=false
if [ "$1" = "" ]; then
all=true
fi
select=("$@")
location="../qa/standalone"
count=0
errors=0
userargs=""
precore="$(sysctl -n $KERNCORE)"
# If corepattern already set, avoid having to use sudo
if [ "$precore" = "$COREPATTERN" ]; then
precore=""
else
sudo sysctl -w ${KERNCORE}=${COREPATTERN}
fi
ulimit -c unlimited
for f in $(cd $location ; find . -perm $exec_mode -type f)
do
f=$(echo $f | sed 's/\.\///')
# This is tested with misc/test-ceph-helpers.sh
if [[ "$f" = "ceph-helpers.sh" ]]; then
continue
fi
if [[ "$all" = "false" ]]; then
found=false
for c in "${!select[@]}"
do
# Get command and any arguments of subset of tests ro tun
allargs="${select[$c]}"
arg1=$(echo "$allargs" | cut --delimiter " " --field 1)
# Get user args for this selection for use below
userargs="$(echo $allargs | cut -s --delimiter " " --field 2-)"
if [[ "$arg1" = $(basename $f) ]]; then
found=true
break
fi
if [[ "$arg1" = "$f" ]]; then
found=true
break
fi
done
if [[ "$found" = "false" ]]; then
continue
fi
fi
# Don't run test-failure.sh unless explicitly specified
if [ "$all" = "true" -a "$f" = "special/test-failure.sh" ]; then
continue
fi
cmd="$location/$f $userargs"
count=$(expr $count + 1)
echo "--- $cmd ---"
if [[ "$dryrun" != "true" ]]; then
if ! PATH=$PATH:bin \
CEPH_ROOT=.. \
CEPH_LIB=lib \
LOCALRUN=yes \
$cmd ; then
echo "$f .............. FAILED"
errors=$(expr $errors + 1)
fi
fi
done
if [ -n "$precore" ]; then
sudo sysctl -w ${KERNCORE}=${precore}
fi
if [ "$errors" != "0" ]; then
echo "$errors TESTS FAILED, $count TOTAL TESTS"
exit 1
fi
echo "ALL $count TESTS PASSED"
exit 0

View File

@ -12,7 +12,12 @@ You can run them in a git checkout + build directory as well:
* The qa/run-standalone.sh will run all of them in sequence. This is slow
since there is no parallelism.
* You can run an individual script by passing these environment args. For
example, if you are in the build/ directory,
* You can run individual script(s) by specifying the basename or path below
qa/standalone as arguments to qa/run-standalone.sh.
PATH=$PATH:bin CEPH_ROOT=.. CEPH_LIB=lib ../qa/standalone/mon/misc.sh
../qa/run-standalone.sh misc.sh osd/osd-dup.sh
* Add support for specifying arguments to selected tests by simply adding
list of tests to each argument.
../qa/run-standalone.sh "test-ceph-helpers.sh test_get_last_scrub_stamp"

View File

@ -33,6 +33,7 @@ fi
if [ `uname` = FreeBSD ]; then
SED=gsed
DIFFCOLOPTS=""
KERNCORE="kern.corefile"
else
SED=sed
termwidth=$(stty -a | head -1 | sed -e 's/.*columns \([0-9]*\).*/\1/')
@ -40,6 +41,7 @@ else
termwidth="-W ${termwidth}"
fi
DIFFCOLOPTS="-y $termwidth"
KERNCORE="kernel.core_pattern"
fi
EXTRA_OPTS=""
@ -152,13 +154,43 @@ function test_setup() {
#
function teardown() {
local dir=$1
local dumplogs=$2
kill_daemons $dir KILL
if [ `uname` != FreeBSD ] \
&& [ $(stat -f -c '%T' .) == "btrfs" ]; then
__teardown_btrfs $dir
fi
local cores="no"
local pattern="$(sysctl -n $KERNCORE)"
# See if we have apport core handling
if [ "${pattern:0:1}" = "|" ]; then
# TODO: Where can we get the dumps?
# Not sure where the dumps really are so this will look in the CWD
pattern=""
fi
# Local we start with core and teuthology ends with core
if ls $(dirname $pattern) | grep -q '^core\|core$' ; then
cores="yes"
if [ -n "$LOCALRUN" ]; then
mkdir /tmp/cores.$$ 2> /dev/null || true
for i in $(ls $(dirname $(sysctl -n $KERNCORE)) | grep '^core\|core$'); do
mv $i /tmp/cores.$$
done
fi
fi
if [ "$cores" = "yes" -o "$dumplogs" = "1" ]; then
display_logs $dir
fi
rm -fr $dir
rm -rf $(get_asok_dir)
if [ "$cores" = "yes" ]; then
echo "ERROR: Failure due to cores found"
if [ -n "$LOCALRUN" ]; then
echo "Find saved core files in /tmp/cores.$$"
fi
return 1
fi
return 0
}
function __teardown_btrfs() {
@ -406,6 +438,7 @@ function run_mon() {
--id $id \
--mon-osd-full-ratio=.99 \
--mon-data-avail-crit=1 \
--mon-data-avail-warn=5 \
--paxos-propose-interval=0.1 \
--osd-crush-chooseleaf-type=0 \
$EXTRA_OPTS \
@ -472,10 +505,15 @@ function test_run_mon() {
function create_rbd_pool() {
ceph osd pool delete rbd rbd --yes-i-really-really-mean-it || return 1
ceph osd pool create rbd $PG_NUM || return 1
create_pool rbd $PG_NUM || return 1
rbd pool init rbd
}
function create_pool() {
ceph osd pool create "$@"
sleep 1
}
#######################################################################
function run_mgr() {
@ -1266,7 +1304,7 @@ function test_get_last_scrub_stamp() {
run_osd $dir 0 || return 1
create_rbd_pool || return 1
wait_for_clean || return 1
stamp=$(get_last_scrub_stamp 2.0)
stamp=$(get_last_scrub_stamp 1.0)
test -n "$stamp" || return 1
teardown $dir || return 1
}
@ -1466,9 +1504,9 @@ function test_repair() {
run_osd $dir 0 || return 1
create_rbd_pool || return 1
wait_for_clean || return 1
repair 2.0 || return 1
repair 1.0 || return 1
kill_daemons $dir KILL osd || return 1
! TIMEOUT=1 repair 2.0 || return 1
! TIMEOUT=1 repair 1.0 || return 1
teardown $dir || return 1
}
#######################################################################
@ -1506,9 +1544,9 @@ function test_pg_scrub() {
run_osd $dir 0 || return 1
create_rbd_pool || return 1
wait_for_clean || return 1
pg_scrub 2.0 || return 1
pg_scrub 1.0 || return 1
kill_daemons $dir KILL osd || return 1
! TIMEOUT=1 pg_scrub 2.0 || return 1
! TIMEOUT=1 pg_scrub 1.0 || return 1
teardown $dir || return 1
}
@ -1581,7 +1619,7 @@ function wait_for_scrub() {
local sname=${3:-last_scrub_stamp}
for ((i=0; i < $TIMEOUT; i++)); do
if test "$last_scrub" != "$(get_last_scrub_stamp $pgid $sname)" ; then
if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then
return 0
fi
sleep 1
@ -1598,7 +1636,7 @@ function test_wait_for_scrub() {
run_osd $dir 0 || return 1
create_rbd_pool || return 1
wait_for_clean || return 1
local pgid=2.0
local pgid=1.0
ceph pg repair $pgid
local last_scrub=$(get_last_scrub_stamp $pgid)
wait_for_scrub $pgid "$last_scrub" || return 1
@ -1796,6 +1834,7 @@ function test_flush_pg_stats()
bytes_used=`ceph df detail --format=json | jq "$jq_filter.bytes_used"`
test $raw_bytes_used > 0 || return 1
test $raw_bytes_used == $bytes_used || return 1
teardown $dir
}
#######################################################################
@ -1840,10 +1879,9 @@ function main() {
if run $dir "$@" ; then
code=0
else
display_logs $dir
code=1
fi
teardown $dir || return 1
teardown $dir $code || return 1
return $code
}
@ -1858,7 +1896,7 @@ function run_tests() {
export CEPH_MON="127.0.0.1:7109" # git grep '\<7109\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+=" --fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
export CEPH_CONF=/dev/null
@ -1866,13 +1904,17 @@ function run_tests() {
local dir=td/ceph-helpers
for func in $funcs ; do
$func $dir || return 1
if ! $func $dir; then
teardown $dir 1
return 1
fi
done
}
if test "$1" = TESTS ; then
shift
run_tests "$@"
exit $?
fi
# NOTE:
@ -1915,6 +1957,37 @@ function jq_success() {
return 1
}
function inject_eio() {
local pooltype=$1
shift
local which=$1
shift
local poolname=$1
shift
local objname=$1
shift
local dir=$1
shift
local shard_id=$1
shift
local -a initial_osds=($(get_osds $poolname $objname))
local osd_id=${initial_osds[$shard_id]}
if [ "$pooltype" != "ec" ]; then
shard_id=""
fi
set_config osd $osd_id filestore_debug_inject_read_err true || return 1
local loop=0
while ( CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.$osd_id) \
inject${which}err $poolname $objname $shard_id | grep -q Invalid ); do
loop=$(expr $loop + 1)
if [ $loop = "10" ]; then
return 1
fi
sleep 1
done
}
# Local Variables:
# compile-command: "cd ../../src ; make -j4 && ../qa/standalone/ceph-helpers.sh TESTS # test_get_config"
# End:

View File

@ -57,7 +57,7 @@ function create_erasure_coded_pool() {
ceph osd erasure-code-profile set myprofile \
crush-failure-domain=osd || return 1
ceph osd pool create $poolname 12 12 erasure myprofile \
create_pool $poolname 12 12 erasure myprofile \
|| return 1
wait_for_clean || return 1
}
@ -164,7 +164,7 @@ function TEST_rados_put_get_lrc_advanced() {
mapping=DD_ \
crush-steps='[ [ "chooseleaf", "osd", 0 ] ]' \
layers='[ [ "DDc", "" ] ]' || return 1
ceph osd pool create $poolname 12 12 erasure $profile \
create_pool $poolname 12 12 erasure $profile \
|| return 1
rados_put_get $dir $poolname || return 1
@ -182,7 +182,7 @@ function TEST_rados_put_get_lrc_kml() {
plugin=lrc \
k=4 m=2 l=3 \
crush-failure-domain=osd || return 1
ceph osd pool create $poolname 12 12 erasure $profile \
create_pool $poolname 12 12 erasure $profile \
|| return 1
rados_put_get $dir $poolname || return 1
@ -202,7 +202,7 @@ function TEST_rados_put_get_isa() {
ceph osd erasure-code-profile set profile-isa \
plugin=isa \
crush-failure-domain=osd || return 1
ceph osd pool create $poolname 1 1 erasure profile-isa \
create_pool $poolname 1 1 erasure profile-isa \
|| return 1
rados_put_get $dir $poolname || return 1
@ -222,7 +222,7 @@ function TEST_rados_put_get_jerasure() {
plugin=jerasure \
k=4 m=2 \
crush-failure-domain=osd || return 1
ceph osd pool create $poolname 12 12 erasure $profile \
create_pool $poolname 12 12 erasure $profile \
|| return 1
rados_put_get $dir $poolname || return 1
@ -242,7 +242,7 @@ function TEST_rados_put_get_shec() {
plugin=shec \
k=2 m=1 c=1 \
crush-failure-domain=osd || return 1
ceph osd pool create $poolname 12 12 erasure $profile \
create_pool $poolname 12 12 erasure $profile \
|| return 1
rados_put_get $dir $poolname || return 1
@ -318,7 +318,7 @@ function TEST_chunk_mapping() {
mapping='_DD' \
crush-steps='[ [ "choose", "osd", 0 ] ]' || return 1
ceph osd erasure-code-profile get remap-profile
ceph osd pool create remap-pool 12 12 erasure remap-profile \
create_pool remap-pool 12 12 erasure remap-profile \
|| return 1
#

View File

@ -60,7 +60,7 @@ function create_erasure_coded_pool() {
plugin=jerasure \
k=2 m=1 \
crush-failure-domain=osd || return 1
ceph osd pool create $poolname 1 1 erasure myprofile \
create_pool $poolname 1 1 erasure myprofile \
|| return 1
wait_for_clean || return 1
}
@ -142,22 +142,6 @@ function rados_put_get() {
rm $dir/ORIGINAL
}
function inject_eio() {
local objname=$1
shift
local dir=$1
shift
local shard_id=$1
shift
local poolname=pool-jerasure
local -a initial_osds=($(get_osds $poolname $objname))
local osd_id=${initial_osds[$shard_id]}
set_config osd $osd_id filestore_debug_inject_read_err true || return 1
CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.$osd_id) \
injectdataerr $poolname $objname $shard_id || return 1
}
function rados_get_data_eio() {
local dir=$1
shift
@ -170,11 +154,11 @@ function rados_get_data_eio() {
#
local poolname=pool-jerasure
local objname=obj-eio-$$-$shard_id
inject_eio $objname $dir $shard_id || return 1
inject_eio ec data $poolname $objname $dir $shard_id || return 1
rados_put_get $dir $poolname $objname $recovery || return 1
shard_id=$(expr $shard_id + 1)
inject_eio $objname $dir $shard_id || return 1
inject_eio ec data $poolname $objname $dir $shard_id || return 1
# Now 2 out of 3 shards get EIO, so should fail
rados_get $dir $poolname $objname fail || return 1
}

View File

@ -18,4 +18,4 @@
# GNU Library Public License for more details.
#
$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS
$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS "$@"

View File

@ -40,7 +40,7 @@ function TEST_osd_pool_get_set() {
setup $dir || return 1
run_mon $dir a || return 1
create_rbd_pool || return 1
ceph osd pool create $TEST_POOL 8
create_pool $TEST_POOL 8
local flag
for flag in nodelete nopgchange nosizechange write_fadvise_dontneed noscrub nodeep-scrub; do
@ -82,7 +82,7 @@ function TEST_osd_pool_get_set() {
! ceph osd pool set $TEST_POOL min_size 0 || return 1
local ecpool=erasepool
ceph osd pool create $ecpool 12 12 erasure default || return 1
create_pool $ecpool 12 12 erasure default || return 1
#erasue pool size=k+m, min_size=k
local size=$(ceph osd pool get $ecpool size|awk '{print $2}')
local min_size=$(ceph osd pool get $ecpool min_size|awk '{print $2}')

View File

@ -136,7 +136,7 @@ function TEST_put_get() {
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
ceph osd pool create hello 8 || return 1
create_pool hello 8 || return 1
echo "hello world" > $dir/hello
rados --pool hello put foo $dir/hello || return 1

View File

@ -98,7 +98,7 @@ function TEST_rm() {
grep "WRONG does not exist" || return 1
ceph osd erasure-code-profile set $profile || return 1
ceph osd pool create poolname 12 12 erasure $profile || return 1
create_pool poolname 12 12 erasure $profile || return 1
! ceph osd erasure-code-profile rm $profile > $dir/out 2>&1 || return 1
grep "poolname.*using.*$profile" $dir/out || return 1
ceph osd pool delete poolname poolname --yes-i-really-really-mean-it || return 1

View File

@ -34,7 +34,7 @@ function TEST_pool_quota() {
run_osd $dir 2 || return 1
local poolname=testquoa
ceph osd pool create $poolname 20
create_pool $poolname 20
local objects=`ceph df detail | grep -w $poolname|awk '{print $3}'`
local bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'`

View File

@ -2,6 +2,8 @@
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
[ `uname` = FreeBSD ] && exit 0
function run() {
local dir=$1
shift
@ -38,7 +40,7 @@ function TEST_filestore_to_bluestore() {
sleep 5
ceph osd pool create foo 16
create_pool foo 16
# write some objects
rados bench -p foo 10 write -b 4096 --no-cleanup || return 1

View File

@ -0,0 +1,129 @@
#! /bin/bash
#
# Copyright (C) 2017 Red Hat <contact@redhat.com>
#
# Author: David Zafman <dzafman@redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library Public License for more details.
#
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
function run() {
local dir=$1
shift
export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
$func $dir || return 1
done
}
function TEST_recovery_scrub() {
local dir=$1
local poolname=test
TESTDATA="testdata.$$"
OSDS=8
PGS=32
OBJECTS=4
setup $dir || return 1
run_mon $dir a --osd_pool_default_size=1 || return 1
run_mgr $dir x || return 1
for osd in $(seq 0 $(expr $OSDS - 1))
do
run_osd $dir $osd || return 1
done
# Create a pool with $PGS pgs
create_pool $poolname $PGS $PGS
wait_for_clean || return 1
poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
dd if=/dev/urandom of=$TESTDATA bs=1M count=50
for i in $(seq 1 $OBJECTS)
do
rados -p $poolname put obj${i} $TESTDATA
done
rm -f $TESTDATA
ceph osd pool set $poolname size 4
pids=""
for pg in $(seq 0 $(expr $PGS - 1))
do
run_in_background pids pg_scrub $poolid.$(echo "{ obase=16; $pg }" | bc | tr '[:upper:]' '[:lower:]')
done
ceph pg dump pgs
wait_background pids
return_code=$?
if [ $return_code -ne 0 ]; then return $return_code; fi
ERRORS=0
pidfile=$(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid')
pid=$(cat $pidfile)
if ! kill -0 $pid
then
echo "OSD crash occurred"
tail -100 $dir/osd.0.log
ERRORS=$(expr $ERRORS + 1)
fi
kill_daemons $dir || return 1
declare -a err_strings
err_strings[0]="not scheduling scrubs due to active recovery"
# Test with these two strings after disabled check in OSD::sched_scrub()
#err_strings[0]="handle_scrub_reserve_request: failed to reserve remotely"
#err_strings[1]="sched_scrub: failed to reserve locally"
for osd in $(seq 0 $(expr $OSDS - 1))
do
grep "failed to reserve\|not scheduling scrubs" $dir/osd.${osd}.log
done
for err_string in "${err_strings[@]}"
do
found=false
for osd in $(seq 0 $(expr $OSDS - 1))
do
if grep "$err_string" $dir/osd.${osd}.log > /dev/null;
then
found=true
fi
done
if [ "$found" = "false" ]; then
echo "Missing log message '$err_string'"
ERRORS=$(expr $ERRORS + 1)
fi
done
teardown $dir || return 1
if [ $ERRORS != "0" ];
then
echo "TEST FAILED WITH $ERRORS ERRORS"
return 1
fi
echo "TEST PASSED"
return 0
}
main osd-recovery-scrub "$@"
# Local Variables:
# compile-command: "cd build ; make -j4 && \
# ../qa/run-standalone.sh osd-recovery-scrub.sh"

File diff suppressed because it is too large Load Diff

View File

@ -46,7 +46,8 @@ function TEST_scrub_snaps() {
wait_for_clean || return 1
# Create a pool with a single pg
ceph osd pool create $poolname 1 1
create_pool $poolname 1 1
wait_for_clean || return 1
poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
@ -449,15 +450,14 @@ EOF
err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 is missing in clone_size"
err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 is an unexpected clone"
err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 size 1032 != clone_size 1033"
err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 23 errors"
err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 22 errors"
err_strings[23]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head can't decode 'snapset' attr buffer"
err_strings[24]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj12:1 has no oi or legacy_snaps; cannot convert 1=[[]1[]]:[[]1[]].stray_clone_snaps=[{]1=[[]1[]][}]"
for i in `seq 0 ${#err_strings[@]}`
for err_string in "${err_strings[@]}"
do
if ! grep "${err_strings[$i]}" $dir/osd.0.log > /dev/null;
if ! grep "$err_string" $dir/osd.0.log > /dev/null;
then
echo "Missing log message '${err_strings[$i]}'"
echo "Missing log message '$err_string'"
ERRORS=$(expr $ERRORS + 1)
fi
done

View File

@ -0,0 +1,48 @@
#!/usr/bin/env bash
set -ex
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
function run() {
local dir=$1
shift
export CEPH_MON="127.0.0.1:7202" # git grep '\<7202\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
setup $dir || return 1
$func $dir || return 1
teardown $dir || return 1
done
}
function TEST_failure_log() {
local dir=$1
cat > $dir/test_failure.log << EOF
This is a fake log file
*
*
*
*
*
This ends the fake log file
EOF
# Test fails
return 1
}
function TEST_failure_core_only() {
local dir=$1
run_mon $dir a || return 1
kill_daemons $dir SEGV mon 5
return 0
}
main test_failure "$@"

View File

@ -2,3 +2,4 @@ overrides:
ceph:
log-whitelist:
- \(MDS_TRIM\)
- Behind on trimming

View File

@ -8,4 +8,6 @@ tasks:
- ceph:
skip_mgr_daemons: true
add_osds_to_crush: true
log-whitelist:
- required past_interval bounds are empty
- print: "**** done ceph"

View File

@ -24,6 +24,8 @@ overrides:
- scrub mismatch
- ScrubResult
- wrongly marked
- (MDS_FAILED)
- \(MDS_FAILED\)
- \(OBJECT_
- is unresponsive
conf:
fs: xfs

View File

@ -23,6 +23,7 @@ tasks:
- \(PG_
- Monitor daemon marked osd
- Behind on trimming
- is unresponsive
conf:
global:
mon warn on pool no app: false

View File

@ -1,11 +0,0 @@
meta:
- desc: |
generate read/write load with rados objects ranging from 1MB to 25MB
workload:
full_sequential:
- workunit:
branch: jewel
clients:
client.0:
- rados/load-gen-big.sh
- print: "**** done rados/load-gen-big.sh 2-workload"

View File

@ -8,4 +8,6 @@ tasks:
- ceph:
skip_mgr_daemons: true
add_osds_to_crush: true
log-whitelist:
- required past_interval bounds are empty
- print: "**** done ceph"

View File

@ -297,7 +297,6 @@ def build_ceph_cluster(ctx, config):
# are taking way more than a minute/monitor to form quorum, so lets
# try the next block which will wait up to 15 minutes to gatherkeys.
execute_ceph_deploy(mon_create_nodes)
execute_ceph_deploy(mgr_create)
# create-keys is explicit now
# http://tracker.ceph.com/issues/16036
@ -307,6 +306,9 @@ def build_ceph_cluster(ctx, config):
'--id', remote.shortname])
estatus_gather = execute_ceph_deploy(gather_keys)
execute_ceph_deploy(mgr_create)
if mds_nodes:
estatus_mds = execute_ceph_deploy(deploy_mds)
if estatus_mds != 0:

View File

@ -142,12 +142,12 @@ def create_pools(ctx, clients):
if ctx.rgw.ec_data_pool:
create_ec_pool(remote, data_pool, client, 64,
ctx.rgw.erasure_code_profile, cluster_name)
ctx.rgw.erasure_code_profile, cluster_name, 'rgw')
else:
create_replicated_pool(remote, data_pool, 64, cluster_name)
create_replicated_pool(remote, data_pool, 64, cluster_name, 'rgw')
if ctx.rgw.cache_pools:
create_cache_pool(remote, data_pool, data_pool + '.cache', 64,
64*1024*1024, cluster_name)
64*1024*1024, cluster_name, 'rgw')
log.debug('Pools created')
yield

View File

@ -409,9 +409,9 @@ def create_zone_pools(ctx, zone):
pool_name = pool_config['val']['data_pool']
if ctx.rgw.ec_data_pool:
create_ec_pool(gateway.remote, pool_name, zone.name, 64,
ctx.rgw.erasure_code_profile, cluster.name)
ctx.rgw.erasure_code_profile, cluster.name, 'rgw')
else:
create_replicated_pool(gateway.remote, pool_name, 64, cluster.name)
create_replicated_pool(gateway.remote, pool_name, 64, cluster.name, 'rgw')
def configure_zone_compression(zone, compression):
""" Set compression type in the zone's default-placement """

View File

@ -24,20 +24,28 @@ def rados(ctx, remote, cmd, wait=True, check_status=False):
else:
return proc
def create_ec_pool(remote, name, profile_name, pgnum, profile={}, cluster_name="ceph"):
def create_ec_pool(remote, name, profile_name, pgnum, profile={}, cluster_name="ceph", application=None):
remote.run(args=['sudo', 'ceph'] +
cmd_erasure_code_profile(profile_name, profile) + ['--cluster', cluster_name])
remote.run(args=[
'sudo', 'ceph', 'osd', 'pool', 'create', name,
str(pgnum), str(pgnum), 'erasure', profile_name, '--cluster', cluster_name
])
if application:
remote.run(args=[
'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name
])
def create_replicated_pool(remote, name, pgnum, cluster_name="ceph"):
def create_replicated_pool(remote, name, pgnum, cluster_name="ceph", application=None):
remote.run(args=[
'sudo', 'ceph', 'osd', 'pool', 'create', name, str(pgnum), str(pgnum), '--cluster', cluster_name
])
if application:
remote.run(args=[
'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name
])
def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="ceph"):
def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="ceph", application=None):
remote.run(args=[
'sudo', 'ceph', 'osd', 'pool', 'create', cache_name, str(pgnum), '--cluster', cluster_name
])
@ -45,6 +53,10 @@ def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="
'sudo', 'ceph', 'osd', 'tier', 'add-cache', base_name, cache_name,
str(size), '--cluster', cluster_name
])
if application:
remote.run(args=[
'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name
])
def cmd_erasure_code_profile(profile_name, profile):
"""

View File

@ -26,6 +26,10 @@ ceph osd crush set-device-class ssd osd.0
ceph osd crush set-device-class hdd osd.1
ceph osd crush rule create-replicated foo-ssd default host ssd
ceph osd crush rule create-replicated foo-hdd default host hdd
ceph osd crush rule ls-by-class ssd | grep 'foo-ssd'
ceph osd crush rule ls-by-class ssd | expect_false grep 'foo-hdd'
ceph osd crush rule ls-by-class hdd | grep 'foo-hdd'
ceph osd crush rule ls-by-class hdd | expect_false grep 'foo-ssd'
ceph osd erasure-code-profile set ec-foo-ssd crush-device-class=ssd m=2 k=2
ceph osd pool create ec-foo 2 erasure ec-foo-ssd
@ -33,6 +37,16 @@ ceph osd pool rm ec-foo ec-foo --yes-i-really-really-mean-it
ceph osd crush rule ls | grep foo
ceph osd crush rule rename foo foo-asdf
ceph osd crush rule rename bar bar-asdf
ceph osd crush rule ls | grep 'foo-asdf'
ceph osd crush rule ls | grep 'bar-asdf'
ceph osd crush rule rm foo 2>&1 | grep 'does not exist'
ceph osd crush rule rm bar 2>&1 | grep 'does not exist'
ceph osd crush rule rename foo-asdf foo
ceph osd crush rule rename bar-asdf bar
ceph osd crush rule ls | expect_false grep 'foo-asdf'
ceph osd crush rule ls | expect_false grep 'bar-asdf'
ceph osd crush rule rm foo
ceph osd crush rule rm foo # idempotent
ceph osd crush rule rm bar

View File

@ -216,6 +216,24 @@ compare_images ${POOL} ${clone_image}
expect_failure "is non-primary" clone_image ${CLUSTER1} ${PARENT_POOL} \
${parent_image} ${parent_snap} ${POOL} ${clone_image}1
testlog "TEST: data pool"
dp_image=test_data_pool
create_image ${CLUSTER2} ${POOL} ${dp_image} 128 --data-pool ${PARENT_POOL}
data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image})
test "${data_pool}" = "${PARENT_POOL}"
wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image}
data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image})
test "${data_pool}" = "${PARENT_POOL}"
create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1'
write_image ${CLUSTER2} ${POOL} ${dp_image} 100
create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2'
write_image ${CLUSTER2} ${POOL} ${dp_image} 100
wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${dp_image}
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' 'master_position'
compare_images ${POOL} ${dp_image}@snap1
compare_images ${POOL} ${dp_image}@snap2
compare_images ${POOL} ${dp_image}
testlog "TEST: disable mirroring / delete non-primary image"
image2=test2
image3=test3

View File

@ -867,6 +867,16 @@ request_resync_image()
rbd --cluster=${cluster} -p ${pool} mirror image resync ${image}
}
get_image_data_pool()
{
local cluster=$1
local pool=$2
local image=$3
rbd --cluster ${cluster} -p ${pool} info ${image} |
awk '$1 == "data_pool:" {print $2}'
}
#
# Main
#

View File

@ -1,2 +1,2 @@
a5f84b37668fc8e03165aaf5cbb380c78e4deba4
v12.1.4
32ce2a3ae5239ee33d6150705cdb24d43bab910c
v12.2.0

View File

@ -3,11 +3,45 @@ API for CRUD lvm tag operations. Follows the Ceph LVM tag naming convention
that prefixes tags with ``ceph.`` and uses ``=`` for assignment, and provides
set of utilities for interacting with LVM.
"""
import json
from ceph_volume import process
from ceph_volume.exceptions import MultipleLVsError, MultipleVGsError
def _output_parser(output, fields):
"""
Newer versions of LVM allow ``--reportformat=json``, but older versions,
like the one included in Xenial do not. LVM has the ability to filter and
format its output so we assume the output will be in a format this parser
can handle (using ',' as a delimiter)
:param fields: A string, possibly using ',' to group many items, as it
would be used on the CLI
:param output: The CLI output from the LVM call
"""
field_items = fields.split(',')
report = []
for line in output:
# clear the leading/trailing whitespace
line = line.strip()
# remove the extra '"' in each field
line = line.replace('"', '')
# prevent moving forward with empty contents
if not line:
continue
# spliting on ';' because that is what the lvm call uses as
# '--separator'
output_items = [i.strip() for i in line.split(';')]
# map the output to the fiels
report.append(
dict(zip(field_items, output_items))
)
return report
def parse_tags(lv_tags):
"""
Return a dictionary mapping of all the tags associated with
@ -37,49 +71,22 @@ def parse_tags(lv_tags):
def get_api_vgs():
"""
Return the list of group volumes available in the system using flags to include common
metadata associated with them
Return the list of group volumes available in the system using flags to
include common metadata associated with them
Command and sample JSON output, should look like::
Command and sample delimeted output, should look like::
$ sudo vgs --reportformat=json
{
"report": [
{
"vg": [
{
"vg_name":"VolGroup00",
"pv_count":"1",
"lv_count":"2",
"snap_count":"0",
"vg_attr":"wz--n-",
"vg_size":"38.97g",
"vg_free":"0 "},
{
"vg_name":"osd_vg",
"pv_count":"3",
"lv_count":"1",
"snap_count":"0",
"vg_attr":"wz--n-",
"vg_size":"32.21g",
"vg_free":"9.21g"
}
]
}
]
}
$ sudo vgs --noheadings --separator=';' \
-o vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free
ubuntubox-vg;1;2;0;wz--n-;299.52g;12.00m
osd_vg;3;1;0;wz--n-;29.21g;9.21g
"""
fields = 'vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free'
stdout, stderr, returncode = process.call(
[
'sudo', 'vgs', '--reportformat=json'
]
['sudo', 'vgs', '--noheadings', '--separator=";"', '-o', fields]
)
report = json.loads(''.join(stdout))
for report_item in report.get('report', []):
# is it possible to get more than one item in "report" ?
return report_item['vg']
return []
return _output_parser(stdout, fields)
def get_api_lvs():
@ -87,37 +94,18 @@ def get_api_lvs():
Return the list of logical volumes available in the system using flags to include common
metadata associated with them
Command and sample JSON output, should look like::
Command and delimeted output, should look like::
$ sudo lvs -o lv_tags,lv_path,lv_name,vg_name --reportformat=json
{
"report": [
{
"lv": [
{
"lv_tags":"",
"lv_path":"/dev/VolGroup00/LogVol00",
"lv_name":"LogVol00",
"vg_name":"VolGroup00"},
{
"lv_tags":"ceph.osd_fsid=aaa-fff-0000,ceph.osd_fsid=aaa-fff-bbbb,ceph.osd_id=0",
"lv_path":"/dev/osd_vg/OriginLV",
"lv_name":"OriginLV",
"vg_name":"osd_vg"
}
]
}
]
}
$ sudo lvs --noheadings --separator=';' -o lv_tags,lv_path,lv_name,vg_name
;/dev/ubuntubox-vg/root;root;ubuntubox-vg
;/dev/ubuntubox-vg/swap_1;swap_1;ubuntubox-vg
"""
fields = 'lv_tags,lv_path,lv_name,vg_name'
stdout, stderr, returncode = process.call(
['sudo', 'lvs', '-o', 'lv_tags,lv_path,lv_name,vg_name', '--reportformat=json'])
report = json.loads(''.join(stdout))
for report_item in report.get('report', []):
# is it possible to get more than one item in "report" ?
return report_item['lv']
return []
['sudo', 'lvs', '--noheadings', '--separator=";"', '-o', fields]
)
return _output_parser(stdout, fields)
def get_lv(lv_name=None, vg_name=None, lv_path=None, lv_tags=None):

View File

@ -1,3 +1,4 @@
from ceph_volume.util import arg_validators
import argparse
@ -14,12 +15,13 @@ def common_parser(prog, description):
required_args = parser.add_argument_group('required arguments')
parser.add_argument(
'--journal',
help='A logical group name, path to a logical volume, or path to a device',
help='A logical volume (vg_name/lv_name), or path to a device',
)
required_args.add_argument(
'--data',
required=True,
help='A logical group name or a path to a logical volume',
type=arg_validators.LVPath(),
help='A logical volume (vg_name/lv_name) for OSD data',
)
parser.add_argument(
'--journal-size',

View File

@ -28,28 +28,18 @@ class Create(object):
all the metadata to the logical volumes using LVM tags, and starting
the OSD daemon.
Most basic Usage looks like (journal will be collocated from the same volume group):
ceph-volume lvm create --data {volume group name}
Example calls for supported scenarios:
Dedicated volume group for Journal(s)
-------------------------------------
Filestore
---------
Existing logical volume (lv) or device:
ceph-volume lvm create --data {logical volume} --journal /path/to/{lv}|{device}
ceph-volume lvm create --filestore --data {vg name/lv name} --journal /path/to/device
Or:
ceph-volume lvm create --data {data volume group} --journal {journal volume group}
Collocated (same group) for data and journal
--------------------------------------------
ceph-volume lvm create --data {volume group}
ceph-volume lvm create --filestore --data {vg name/lv name} --journal {vg name/lv name}
""")
parser = create_parser(

View File

@ -9,20 +9,6 @@ from . import api
from .common import prepare_parser
def canonical_device_path(device):
"""
Ensure that a device is canonical (full path) and that it exists so that
it can be used throughout the prepare/activate process
"""
# FIXME: this is obviously super naive
inferred = os.path.join('/dev', device)
if os.path.exists(os.path.abspath(device)):
return device
elif os.path.exists(inferred):
return inferred
raise RuntimeError('Selected device does not exist: %s' % device)
def prepare_filestore(device, journal, secrets, id_=None, fsid=None):
"""
:param device: The name of the volume group or lvm to work with
@ -65,6 +51,19 @@ class Prepare(object):
def __init__(self, argv):
self.argv = argv
def get_journal_lv(self, argument):
"""
Perform some parsing of the value of ``--journal`` so that the process
can determine correctly if it got a device path or an lv
:param argument: The value of ``--journal``, that will need to be split
to retrieve the actual lv
"""
try:
vg_name, lv_name = argument.split('/')
except (ValueError, AttributeError):
return None
return api.get_lv(lv_name=lv_name, vg_name=vg_name)
@decorators.needs_root
def prepare(self, args):
# FIXME we don't allow re-using a keyring, we always generate one for the
@ -78,66 +77,40 @@ class Prepare(object):
#osd_id = args.osd_id or prepare_utils.create_id(fsid)
# allow re-using an id, in case a prepare failed
osd_id = args.osd_id or prepare_utils.create_id(fsid, json.dumps(secrets))
journal_name = "journal_%s" % fsid
osd_name = "osd_%s" % fsid
vg_name, lv_name = args.data.split('/')
if args.filestore:
data_vg = api.get_vg(vg_name=args.data)
data_lv = api.get_lv(lv_name=args.data)
journal_vg = api.get_vg(vg_name=args.journal)
journal_lv = api.get_lv(lv_name=args.journal)
journal_device = None
# it is possible to pass a device as a journal that is not
# an actual logical volume (or group)
if not args.journal:
if data_lv:
raise RuntimeError('--journal is required when not using a vg for OSD data')
# collocated: carve out the journal from the data vg
if data_vg:
journal_lv = api.create_lv(
name=journal_name,
group=data_vg.name,
size=args.journal_size,
osd_fsid=fsid,
osd_id=osd_id,
type='journal',
cluster_fsid=cluster_fsid
)
data_lv = api.get_lv(lv_name=lv_name, vg_name=vg_name)
# if a volume group was defined for the journal create that first
if journal_vg:
journal_lv = api.create_lv(
name=journal_name,
group=args.journal,
size=args.journal_size,
osd_fsid=fsid,
osd_id=osd_id,
type='journal',
cluster_fsid=cluster_fsid
)
if journal_lv:
journal_device = journal_lv.lv_path
# The journal is probably a device, not in LVM
elif args.journal:
journal_device = canonical_device_path(args.journal)
# At this point we must have a journal_lv or a journal device
# now create the osd from the group if that was found
if data_vg:
# XXX make sure that a there aren't more OSDs than physical
# devices from this volume group
data_lv = api.create_lv(
name=osd_name,
group=args.data,
osd_fsid=fsid,
osd_id=osd_id,
type='data',
journal_device=journal_device,
cluster_fsid=cluster_fsid
)
# we must have either an existing data_lv or a newly created, so lets make
# sure that the tags are correct
if not data_lv:
raise RuntimeError('no data logical volume found with: %s' % args.data)
if not args.journal:
raise RuntimeError('--journal is required when using --filestore')
journal_device = None
journal_lv = self.get_journal_lv(args.journal)
# check if we have an actual path to a device, which is allowed
if not journal_lv:
if os.path.exists(args.journal):
journal_device = args.journal
else:
raise RuntimeError(
'--journal specified an invalid or non-existent device: %s' % args.journal
)
# Otherwise the journal_device is the path to the lv
else:
journal_device = journal_lv.lv_path
journal_lv.set_tags({
'ceph.type': 'journal',
'ceph.osd_fsid': fsid,
'ceph.osd_id': osd_id,
'ceph.cluster_fsid': cluster_fsid,
'ceph.journal_device': journal_device,
'ceph.data_device': data_lv.lv_path,
})
data_lv.set_tags({
'ceph.type': 'data',
'ceph.osd_fsid': fsid,

View File

@ -57,7 +57,6 @@ def main(args=None):
Expected input is similar to::
['/path/to/ceph-volume-systemd', '<osd id>-<osd uuid>-<device type>']
['/path/to/ceph-volume-systemd', '<type>-<extra metadata>']
For example::

View File

@ -24,50 +24,53 @@ class TestParseTags(object):
class TestGetAPIVgs(object):
def test_report_is_emtpy(self, monkeypatch):
monkeypatch.setattr(api.process, 'call', lambda x: ('{}', '', 0))
monkeypatch.setattr(api.process, 'call', lambda x: ('\n\n', '', 0))
assert api.get_api_vgs() == []
def test_report_has_stuff(self, monkeypatch):
report = '{"report":[{"vg":[{"vg_name":"VolGroup00"}]}]}'
report = [' VolGroup00']
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}]
def test_report_has_stuff_with_empty_attrs(self, monkeypatch):
report = [' VolGroup00 ;;;;;;9g']
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
result = api.get_api_vgs()[0]
assert len(result.keys()) == 7
assert result['vg_name'] == 'VolGroup00'
assert result['vg_free'] == '9g'
def test_report_has_multiple_items(self, monkeypatch):
report = '{"report":[{"vg":[{"vg_name":"VolGroup00"},{"vg_name":"ceph_vg"}]}]}'
report = [' VolGroup00;;;;;;;', ' ceph_vg;;;;;;;']
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}, {'vg_name': 'ceph_vg'}]
def test_does_not_get_poluted_with_non_vg_items(self, monkeypatch):
report = '{"report":[{"vg":[{"vg_name":"VolGroup00"}],"lv":[{"lv":"1"}]}]}'
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}]
result = api.get_api_vgs()
assert result[0]['vg_name'] == 'VolGroup00'
assert result[1]['vg_name'] == 'ceph_vg'
class TestGetAPILvs(object):
def test_report_is_emtpy(self, monkeypatch):
monkeypatch.setattr(api.process, 'call', lambda x: ('{}', '', 0))
monkeypatch.setattr(api.process, 'call', lambda x: ('', '', 0))
assert api.get_api_lvs() == []
def test_report_has_stuff(self, monkeypatch):
report = '{"report":[{"lv":[{"lv_name":"VolGroup00"}]}]}'
report = [' ;/path;VolGroup00;root']
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
assert api.get_api_lvs() == [{'lv_name': 'VolGroup00'}]
result = api.get_api_lvs()
assert result[0]['lv_name'] == 'VolGroup00'
def test_report_has_multiple_items(self, monkeypatch):
report = '{"report":[{"lv":[{"lv_name":"VolName"},{"lv_name":"ceph_lv"}]}]}'
report = [' ;/path;VolName;root', ';/dev/path;ceph_lv;ceph_vg']
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
assert api.get_api_lvs() == [{'lv_name': 'VolName'}, {'lv_name': 'ceph_lv'}]
def test_does_not_get_poluted_with_non_lv_items(self, monkeypatch):
report = '{"report":[{"lv":[{"lv_name":"VolName"}],"vg":[{"vg":"1"}]}]}'
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
assert api.get_api_lvs() == [{'lv_name': 'VolName'}]
result = api.get_api_lvs()
assert result[0]['lv_name'] == 'VolName'
assert result[1]['lv_name'] == 'ceph_lv'
@pytest.fixture
def volumes(monkeypatch):
monkeypatch.setattr(process, 'call', lambda x: ('{}', '', 0))
monkeypatch.setattr(process, 'call', lambda x: ('', '', 0))
volumes = api.Volumes()
volumes._purge()
return volumes
@ -75,7 +78,7 @@ def volumes(monkeypatch):
@pytest.fixture
def volume_groups(monkeypatch):
monkeypatch.setattr(process, 'call', lambda x: ('{}', '', 0))
monkeypatch.setattr(process, 'call', lambda x: ('', '', 0))
vgs = api.VolumeGroups()
vgs._purge()
return vgs

View File

@ -37,6 +37,21 @@ class TestPrepare(object):
assert 'A logical group name or a path' in stdout
class TestGetJournalLV(object):
@pytest.mark.parametrize('arg', ['', '///', None, '/dev/sda1'])
def test_no_journal_on_invalid_path(self, monkeypatch, arg):
monkeypatch.setattr(lvm.prepare.api, 'get_lv', lambda **kw: False)
prepare = lvm.prepare.Prepare([])
assert prepare.get_journal_lv(arg) is None
def test_no_journal_lv_found(self, monkeypatch):
# patch it with 0 so we know we are getting to get_lv
monkeypatch.setattr(lvm.prepare.api, 'get_lv', lambda **kw: 0)
prepare = lvm.prepare.Prepare([])
assert prepare.get_journal_lv('vg/lv') == 0
class TestActivate(object):
def test_main_spits_help_with_no_arguments(self, capsys):

View File

@ -11,7 +11,9 @@ osd_scenario: lvm
copy_admin_key: true
# test-volume is created by tests/functional/lvm_setup.yml from /dev/sda
lvm_volumes:
test_volume: /dev/sdc
- data: test_volume
journal: /dev/sdc
data_vg: test_group
os_tuning_params:
- { name: kernel.pid_max, value: 4194303 }
- { name: fs.file-max, value: 26234859 }

View File

@ -11,7 +11,9 @@ osd_scenario: lvm
copy_admin_key: true
# test-volume is created by tests/functional/lvm_setup.yml from /dev/sda
lvm_volumes:
test_volume: /dev/sdc
- data: test_volume
journal: /dev/sdc
data_vg: test_group
os_tuning_params:
- { name: kernel.pid_max, value: 4194303 }
- { name: fs.file-max, value: 26234859 }

View File

@ -0,0 +1,24 @@
import pytest
import argparse
from ceph_volume.util import arg_validators
invalid_lv_paths = [
'', 'lv_name', '///', '/lv_name', 'lv_name/',
'/dev/lv_group/lv_name'
]
class TestLVPath(object):
def setup(self):
self.validator = arg_validators.LVPath()
@pytest.mark.parametrize('path', invalid_lv_paths)
def test_no_slash_is_an_error(self, path):
with pytest.raises(argparse.ArgumentError):
self.validator(path)
def test_is_valid(self):
path = 'vg/lv'
assert self.validator(path) == path

View File

@ -0,0 +1,29 @@
import argparse
class LVPath(object):
"""
A simple validator to ensure that a logical volume is specified like::
<vg name>/<lv name>
Because for LVM it is better to be specific on what group does an lv
belongs to.
"""
def __call__(self, string):
error = None
try:
vg, lv = string.split('/')
except ValueError:
error = "Logical volume must be specified as 'volume_group/logical_volume' but got: %s" % string
raise argparse.ArgumentError(None, error)
if not vg:
error = "Didn't specify a volume group like 'volume_group/logical_volume', got: %s" % string
if not lv:
error = "Didn't specify a logical volume like 'volume_group/logical_volume', got: %s" % string
if error:
raise argparse.ArgumentError(None, error)
return string

View File

@ -20,6 +20,7 @@
#include "common/config.h"
#include "common/ceph_argparse.h"
#include "common/errno.h"
#include "common/pick_address.h"
#include "global/global_init.h"
#include "mgr/MgrStandby.h"
@ -52,6 +53,8 @@ int main(int argc, const char **argv)
usage();
}
pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC);
global_init_daemonize(g_ceph_context);
global_init_chdir(g_ceph_context);
common_init_finish(g_ceph_context);

View File

@ -9378,11 +9378,11 @@ int Client::chdir(const char *relpath, std::string &new_cwd,
cwd.swap(in);
ldout(cct, 3) << "chdir(" << relpath << ") cwd now " << cwd->ino << dendl;
getcwd(new_cwd, perms);
_getcwd(new_cwd, perms);
return 0;
}
void Client::getcwd(string& dir, const UserPerm& perms)
void Client::_getcwd(string& dir, const UserPerm& perms)
{
filepath path;
ldout(cct, 10) << "getcwd " << *cwd << dendl;
@ -9422,6 +9422,12 @@ void Client::getcwd(string& dir, const UserPerm& perms)
dir += path.get_path();
}
void Client::getcwd(string& dir, const UserPerm& perms)
{
Mutex::Locker l(client_lock);
_getcwd(dir, perms);
}
int Client::statfs(const char *path, struct statvfs *stbuf,
const UserPerm& perms)
{

View File

@ -956,6 +956,7 @@ public:
// crap
int chdir(const char *s, std::string &new_cwd, const UserPerm& perms);
void _getcwd(std::string& cwd, const UserPerm& perms);
void getcwd(std::string& cwd, const UserPerm& perms);
// namespace ops

View File

@ -112,14 +112,15 @@ static int getgroups(fuse_req_t req, gid_t **sgids)
return 0;
}
*sgids = (gid_t*)malloc(c*sizeof(**sgids));
if (!*sgids) {
gid_t *gids = new (std::nothrow) gid_t[c];
if (!gids) {
return -ENOMEM;
}
c = fuse_req_getgroups(req, c, *sgids);
c = fuse_req_getgroups(req, c, gids);
if (c < 0) {
free(*sgids);
return c;
delete gids;
} else {
*sgids = gids;
}
return c;
#endif

View File

@ -200,9 +200,7 @@ static int cls_log_list(cls_method_context_t hctx, bufferlist *in, bufferlist *o
}
}
if (ret.truncated) {
ret.marker = marker;
}
ret.marker = marker;
::encode(ret, *out);

View File

@ -144,6 +144,16 @@ public:
}
do_queues();
}
/**
* Has reservations
*
* Return true if there are reservations in progress
*/
bool has_reservation() {
Mutex::Locker l(lock);
return !in_progress.empty();
}
static const unsigned MAX_PRIORITY = (unsigned)-1;
};

View File

@ -224,11 +224,17 @@ void LogChannel::do_log(clog_type prio, const std::string& s)
// seq and who should be set for syslog/graylog/log_to_mon
e.who = parent->get_myinst();
e.name = parent->get_myname();
e.seq = parent->get_next_seq();
e.prio = prio;
e.msg = s;
e.channel = get_log_channel();
// log to monitor?
if (log_to_monitors) {
e.seq = parent->queue(e);
} else {
e.seq = parent->get_next_seq();
}
// log to syslog?
if (do_log_to_syslog()) {
ldout(cct,0) << __func__ << " log to syslog" << dendl;
@ -240,11 +246,6 @@ void LogChannel::do_log(clog_type prio, const std::string& s)
ldout(cct,0) << __func__ << " log to graylog" << dendl;
graylog->log_log_entry(&e);
}
// log to monitor?
if (log_to_monitors) {
parent->queue(e);
}
}
Message *LogClient::get_mon_log_message(bool flush)
@ -268,8 +269,8 @@ bool LogClient::are_pending()
Message *LogClient::_get_mon_log_message()
{
assert(log_lock.is_locked());
if (log_queue.empty())
return NULL;
if (log_queue.empty())
return NULL;
// only send entries that haven't been sent yet during this mon
// session! monclient needs to call reset_session() on mon session
@ -324,6 +325,7 @@ void LogClient::_send_to_mon()
version_t LogClient::queue(LogEntry &entry)
{
Mutex::Locker l(log_lock);
entry.seq = ++last_log;
log_queue.push_back(entry);
if (is_mon) {
@ -335,6 +337,7 @@ version_t LogClient::queue(LogEntry &entry)
uint64_t LogClient::get_next_seq()
{
Mutex::Locker l(log_lock);
return ++last_log;
}

View File

@ -245,7 +245,7 @@ private:
bool is_mon;
Mutex log_lock;
version_t last_log_sent;
std::atomic<uint64_t> last_log;
version_t last_log;
std::deque<LogEntry> log_queue;
std::map<std::string, LogChannelRef> channels;

View File

@ -27,7 +27,7 @@ static void netmask_ipv4(const struct in_addr *addr,
}
const struct sockaddr *find_ipv4_in_subnet(const struct ifaddrs *addrs,
const struct ifaddrs *find_ipv4_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr_in *net,
unsigned int prefix_len) {
struct in_addr want, temp;
@ -49,7 +49,7 @@ const struct sockaddr *find_ipv4_in_subnet(const struct ifaddrs *addrs,
netmask_ipv4(cur, prefix_len, &temp);
if (temp.s_addr == want.s_addr) {
return addrs->ifa_addr;
return addrs;
}
}
@ -71,7 +71,7 @@ static void netmask_ipv6(const struct in6_addr *addr,
}
const struct sockaddr *find_ipv6_in_subnet(const struct ifaddrs *addrs,
const struct ifaddrs *find_ipv6_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr_in6 *net,
unsigned int prefix_len) {
struct in6_addr want, temp;
@ -93,14 +93,14 @@ const struct sockaddr *find_ipv6_in_subnet(const struct ifaddrs *addrs,
netmask_ipv6(cur, prefix_len, &temp);
if (IN6_ARE_ADDR_EQUAL(&temp, &want))
return addrs->ifa_addr;
return addrs;
}
return NULL;
}
const struct sockaddr *find_ip_in_subnet(const struct ifaddrs *addrs,
const struct ifaddrs *find_ip_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr *net,
unsigned int prefix_len) {
switch (net->sa_family) {

View File

@ -2510,7 +2510,7 @@ std::vector<Option> get_global_options() {
.set_description(""),
Option("osd_min_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(1500)
.set_default(3000)
.set_description("minimum number of entries to maintain in the PG log")
.add_service("osd")
.add_see_also("osd_max_pg_log_entries")

View File

@ -38,9 +38,10 @@ static const struct sockaddr *find_ip_in_subnet_list(CephContext *cct,
exit(1);
}
const struct sockaddr *found = find_ip_in_subnet(ifa, (struct sockaddr *) &net, prefix_len);
const struct ifaddrs *found = find_ip_in_subnet(ifa,
(struct sockaddr *) &net, prefix_len);
if (found)
return found;
return found->ifa_addr;
}
return NULL;
@ -133,6 +134,32 @@ void pick_addresses(CephContext *cct, int needs)
freeifaddrs(ifa);
}
std::string pick_iface(CephContext *cct, const struct sockaddr_storage &network)
{
struct ifaddrs *ifa;
int r = getifaddrs(&ifa);
if (r < 0) {
string err = cpp_strerror(errno);
lderr(cct) << "unable to fetch interfaces and addresses: " << err << dendl;
return {};
}
unsigned int prefix_len = 0;
const struct ifaddrs *found = find_ip_in_subnet(ifa,
(const struct sockaddr *) &network, prefix_len);
std::string result;
if (found) {
result = found->ifa_name;
}
freeifaddrs(ifa);
return result;
}
bool have_local_addr(CephContext *cct, const list<entity_addr_t>& ls, entity_addr_t *match)
{
struct ifaddrs *ifa;

View File

@ -30,6 +30,12 @@ class CephContext;
*/
void pick_addresses(CephContext *cct, int needs);
/**
* Find a network interface whose address matches the address/netmask
* in `network`.
*/
std::string pick_iface(CephContext *cct, const struct sockaddr_storage &network);
/**
* check for a locally configured address
*

View File

@ -70,8 +70,9 @@ void shard_info_wrapper::set_object(const ScrubMap::object& object)
void shard_info_wrapper::encode(bufferlist& bl) const
{
ENCODE_START(2, 1, bl);
ENCODE_START(3, 3, bl);
::encode(errors, bl);
::encode(primary, bl);
if (has_shard_missing()) {
return;
}
@ -87,8 +88,9 @@ void shard_info_wrapper::encode(bufferlist& bl) const
void shard_info_wrapper::decode(bufferlist::iterator& bp)
{
DECODE_START(2, bp);
DECODE_START(3, bp);
::decode(errors, bp);
::decode(primary, bp);
if (has_shard_missing()) {
return;
}
@ -98,8 +100,7 @@ void shard_info_wrapper::decode(bufferlist::iterator& bp)
::decode(omap_digest, bp);
::decode(data_digest_present, bp);
::decode(data_digest, bp);
if (struct_v > 1)
::decode(selected_oi, bp);
::decode(selected_oi, bp);
DECODE_FINISH(bp);
}
@ -120,10 +121,12 @@ void
inconsistent_obj_wrapper::set_auth_missing(const hobject_t& hoid,
const map<pg_shard_t, ScrubMap*>& maps,
map<pg_shard_t, shard_info_wrapper> &shard_map,
int &shallow_errors, int &deep_errors)
int &shallow_errors, int &deep_errors,
const pg_shard_t &primary)
{
for (auto pg_map : maps) {
auto oid_object = pg_map.second->objects.find(hoid);
shard_map[pg_map.first].primary = (pg_map.first == primary);
if (oid_object == pg_map.second->objects.end())
shard_map[pg_map.first].set_missing();
else

View File

@ -78,6 +78,9 @@ public:
void set_ss_attr_corrupted() {
errors |= err_t::SS_ATTR_CORRUPTED;
}
void set_obj_size_oi_mismatch() {
errors |= err_t::OBJ_SIZE_OI_MISMATCH;
}
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bp);
};
@ -116,7 +119,8 @@ struct inconsistent_obj_wrapper : librados::inconsistent_obj_t {
void set_auth_missing(const hobject_t& hoid,
const map<pg_shard_t, ScrubMap*>&,
map<pg_shard_t, shard_info_wrapper>&,
int &shallow_errors, int &deep_errors);
int &shallow_errors, int &deep_errors,
const pg_shard_t &primary);
void set_version(uint64_t ver) { version = ver; }
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bp);

View File

@ -27,7 +27,9 @@ const char * Compressor::get_comp_alg_name(int a) {
case COMP_ALG_SNAPPY: return "snappy";
case COMP_ALG_ZLIB: return "zlib";
case COMP_ALG_ZSTD: return "zstd";
#ifdef HAVE_LZ4
case COMP_ALG_LZ4: return "lz4";
#endif
default: return "???";
}
}
@ -39,8 +41,10 @@ boost::optional<Compressor::CompressionAlgorithm> Compressor::get_comp_alg_type(
return COMP_ALG_ZLIB;
if (s == "zstd")
return COMP_ALG_ZSTD;
#ifdef HAVE_LZ4
if (s == "lz4")
return COMP_ALG_LZ4;
#endif
if (s == "" || s == "none")
return COMP_ALG_NONE;

View File

@ -34,7 +34,9 @@ public:
COMP_ALG_SNAPPY = 1,
COMP_ALG_ZLIB = 2,
COMP_ALG_ZSTD = 3,
#ifdef HAVE_LZ4
COMP_ALG_LZ4 = 4,
#endif
COMP_ALG_LAST //the last value for range checks
};
// compression options

View File

@ -587,11 +587,10 @@ int CrushCompiler::parse_bucket(iter_t const& i)
if (verbose) err << "bucket " << name << " id " << maybe_id;
if (sub->children.size() > 2) {
string class_name = string_node(sub->children[3]);
if (!crush.class_exists(class_name)) {
err << " unknown device class '" << class_name << "'" << std::endl;
return -EINVAL;
}
int cid = crush.get_class_id(class_name);
// note that we do not verify class existence here,
// as this bucket might come from an empty shadow tree
// which currently has no OSDs but is still referenced by a rule!
int cid = crush.get_or_create_class_id(class_name);
if (class_id.count(cid) != 0) {
err << "duplicate device class " << class_name << " for bucket " << name << std::endl;
return -ERANGE;
@ -741,7 +740,9 @@ int CrushCompiler::parse_bucket(iter_t const& i)
item_weight[id] = bucketweight;
assert(id != 0);
int r = crush.add_bucket(id, alg, hash, type, size, &items[0], &weights[0], NULL);
int idout;
int r = crush.add_bucket(id, alg, hash, type, size,
&items[0], &weights[0], &idout);
if (r < 0) {
if (r == -EEXIST)
err << "Duplicate bucket id " << id << std::endl;

View File

@ -291,6 +291,33 @@ int CrushWrapper::rename_bucket(const string& srcname,
return set_item_name(oldid, dstname);
}
int CrushWrapper::rename_rule(const string& srcname,
const string& dstname,
ostream *ss)
{
if (!rule_exists(srcname)) {
if (ss) {
*ss << "source rule name '" << srcname << "' does not exist";
}
return -ENOENT;
}
if (rule_exists(dstname)) {
if (ss) {
*ss << "destination rule name '" << dstname << "' already exists";
}
return -EEXIST;
}
int rule_id = get_rule_id(srcname);
auto it = rule_name_map.find(rule_id);
assert(it != rule_name_map.end());
it->second = dstname;
if (have_rmaps) {
rule_name_rmap.erase(srcname);
rule_name_rmap[dstname] = rule_id;
}
return 0;
}
void CrushWrapper::find_takes(set<int>& roots) const
{
for (unsigned i=0; i<crush->max_rules; i++) {
@ -1075,7 +1102,7 @@ int CrushWrapper::swap_bucket(CephContext *cct, int src, int dst)
// swap names
swap_names(src, dst);
return 0;
return rebuild_roots_with_classes();
}
int CrushWrapper::link_bucket(
@ -1667,7 +1694,7 @@ int CrushWrapper::remove_rule(int ruleno)
crush->rules[ruleno] = NULL;
rule_name_map.erase(ruleno);
have_rmaps = false;
return 0;
return rebuild_roots_with_classes();
}
int CrushWrapper::bucket_adjust_item_weight(CephContext *cct, crush_bucket *bucket, int item, int weight)
@ -1678,8 +1705,8 @@ int CrushWrapper::bucket_adjust_item_weight(CephContext *cct, crush_bucket *buck
if (bucket->items[position] == item)
break;
assert(position != bucket->size);
for (auto w : choose_args) {
crush_choose_arg_map arg_map = w.second;
for (auto &w : choose_args) {
crush_choose_arg_map &arg_map = w.second;
crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
for (__u32 j = 0; j < arg->weight_set_size; j++) {
crush_weight_set *weight_set = &arg->weight_set[j];
@ -1702,26 +1729,30 @@ int CrushWrapper::add_bucket(
crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items,
weights);
assert(b);
assert(idout);
int r = crush_add_bucket(crush, bucketno, b, idout);
int pos = -1 - *idout;
for (auto& p : choose_args) {
crush_choose_arg_map& cmap = p.second;
if (cmap.args) {
if ((int)cmap.size <= *idout) {
if ((int)cmap.size <= pos) {
cmap.args = (crush_choose_arg*)realloc(
cmap.args,
sizeof(crush_choose_arg) * (*idout + 1));
sizeof(crush_choose_arg) * (pos + 1));
assert(cmap.args);
memset(&cmap.args[cmap.size], 0,
sizeof(crush_choose_arg) * (*idout + 1 - cmap.size));
cmap.size = *idout + 1;
sizeof(crush_choose_arg) * (pos + 1 - cmap.size));
cmap.size = pos + 1;
}
} else {
cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg),
*idout + 1);
cmap.size = *idout + 1;
pos + 1);
assert(cmap.args);
cmap.size = pos + 1;
}
if (size > 0) {
int positions = get_choose_args_positions(cmap);
crush_choose_arg& carg = cmap.args[*idout];
crush_choose_arg& carg = cmap.args[pos];
carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set),
size);
carg.weight_set_size = positions;
@ -1744,8 +1775,8 @@ int CrushWrapper::bucket_add_item(crush_bucket *bucket, int item, int weight)
if (r < 0) {
return r;
}
for (auto w : choose_args) {
crush_choose_arg_map arg_map = w.second;
for (auto &w : choose_args) {
crush_choose_arg_map &arg_map = w.second;
crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
for (__u32 j = 0; j < arg->weight_set_size; j++) {
crush_weight_set *weight_set = &arg->weight_set[j];
@ -1777,8 +1808,8 @@ int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item)
if (r < 0) {
return r;
}
for (auto w : choose_args) {
crush_choose_arg_map arg_map = w.second;
for (auto &w : choose_args) {
crush_choose_arg_map &arg_map = w.second;
crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
for (__u32 j = 0; j < arg->weight_set_size; j++) {
crush_weight_set *weight_set = &arg->weight_set[j];
@ -1960,8 +1991,10 @@ int CrushWrapper::device_class_clone(
unsigned new_size = -1-bno + 1;
cmap.args = (crush_choose_arg*)realloc(cmap.args,
new_size * sizeof(cmap.args[0]));
assert(cmap.args);
memset(cmap.args + cmap.size, 0,
(new_size - cmap.size) * sizeof(cmap.args[0]));
cmap.size = new_size;
}
auto& o = cmap.args[-1-original_id];
auto& n = cmap.args[-1-bno];
@ -1990,6 +2023,37 @@ int CrushWrapper::device_class_clone(
return 0;
}
int CrushWrapper::get_rules_by_class(const string &class_name, set<int> *rules)
{
assert(rules);
rules->clear();
if (!class_exists(class_name)) {
return -ENOENT;
}
int class_id = get_class_id(class_name);
for (unsigned i = 0; i < crush->max_rules; ++i) {
crush_rule *r = crush->rules[i];
if (!r)
continue;
for (unsigned j = 0; j < r->len; ++j) {
if (r->steps[j].op == CRUSH_RULE_TAKE) {
int step_item = r->steps[j].arg1;
int original_item;
int c;
int res = split_id_class(step_item, &original_item, &c);
if (res < 0) {
return res;
}
if (c != -1 && c == class_id) {
rules->insert(i);
break;
}
}
}
}
return 0;
}
bool CrushWrapper::_class_is_dead(int class_id)
{
for (auto &p: class_map) {
@ -2299,7 +2363,7 @@ void CrushWrapper::decode(bufferlist::iterator& blp)
__u32 choose_args_size;
::decode(choose_args_size, blp);
for (__u32 i = 0; i < choose_args_size; i++) {
uint64_t choose_args_index;
typename decltype(choose_args)::key_type choose_args_index;
::decode(choose_args_index, blp);
crush_choose_arg_map arg_map;
arg_map.size = crush->max_buckets;

View File

@ -539,6 +539,9 @@ public:
ostream *ss);
// rule names
int rename_rule(const string& srcname,
const string& dstname,
ostream *ss);
bool rule_exists(string name) const {
build_rmaps();
return rule_name_rmap.count(name);
@ -1217,6 +1220,7 @@ public:
int rename_class(const string& srcname, const string& dstname);
int populate_classes(
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket);
int get_rules_by_class(const string &class_name, set<int> *rules);
bool _class_is_dead(int class_id);
void cleanup_dead_classes();
int rebuild_roots_with_classes();

View File

@ -7,7 +7,7 @@
If there are multiple matches, the first one is returned; this order
is system-dependent and should not be relied on.
*/
const struct sockaddr *find_ip_in_subnet(const struct ifaddrs *addrs,
const struct ifaddrs *find_ip_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr *net,
unsigned int prefix_len);

View File

@ -63,11 +63,12 @@ struct err_t {
OI_ATTR_MISSING = 1 << 14,
OI_ATTR_CORRUPTED = 1 << 15,
SS_ATTR_MISSING = 1 << 16,
SS_ATTR_CORRUPTED = 1 << 17
SS_ATTR_CORRUPTED = 1 << 17,
OBJ_SIZE_OI_MISMATCH = 1 << 18
// When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS
};
uint64_t errors = 0;
static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED;
static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED|OBJ_SIZE_OI_MISMATCH;
static constexpr uint64_t DEEP_ERRORS = SHARD_READ_ERR|DATA_DIGEST_MISMATCH_OI|OMAP_DIGEST_MISMATCH_OI|SHARD_EC_HASH_MISMATCH|SHARD_EC_SIZE_MISMATCH;
bool has_shard_missing() const {
return errors & SHARD_MISSING;
@ -111,6 +112,9 @@ struct err_t {
bool has_deep_errors() const {
return errors & DEEP_ERRORS;
}
bool has_obj_size_oi_mismatch() const {
return errors & OBJ_SIZE_OI_MISMATCH;
}
};
struct shard_info_t : err_t {
@ -121,6 +125,7 @@ struct shard_info_t : err_t {
bool data_digest_present = false;
uint32_t data_digest = 0;
bool selected_oi = false;
bool primary = false;
};
struct osd_shard_t {

View File

@ -1343,26 +1343,28 @@ bool MDSDaemon::ms_verify_authorizer(Connection *con, int peer_type,
if (caps_info.allow_all) {
// Flag for auth providers that don't provide cap strings
s->auth_caps.set_allow_all();
}
} else {
bufferlist::iterator p = caps_info.caps.begin();
string auth_cap_str;
try {
::decode(auth_cap_str, p);
bufferlist::iterator p = caps_info.caps.begin();
string auth_cap_str;
try {
::decode(auth_cap_str, p);
dout(10) << __func__ << ": parsing auth_cap_str='" << auth_cap_str << "'" << dendl;
std::ostringstream errstr;
if (!s->auth_caps.parse(g_ceph_context, auth_cap_str, &errstr)) {
dout(1) << __func__ << ": auth cap parse error: " << errstr.str()
<< " parsing '" << auth_cap_str << "'" << dendl;
clog->warn() << name << " mds cap '" << auth_cap_str
<< "' does not parse: " << errstr.str();
dout(10) << __func__ << ": parsing auth_cap_str='" << auth_cap_str << "'" << dendl;
std::ostringstream errstr;
if (!s->auth_caps.parse(g_ceph_context, auth_cap_str, &errstr)) {
dout(1) << __func__ << ": auth cap parse error: " << errstr.str()
<< " parsing '" << auth_cap_str << "'" << dendl;
clog->warn() << name << " mds cap '" << auth_cap_str
<< "' does not parse: " << errstr.str();
is_valid = false;
}
} catch (buffer::error& e) {
// Assume legacy auth, defaults to:
// * permit all filesystem ops
// * permit no `tell` ops
dout(1) << __func__ << ": cannot decode auth caps bl of length " << caps_info.caps.length() << dendl;
is_valid = false;
}
} catch (buffer::error& e) {
// Assume legacy auth, defaults to:
// * permit all filesystem ops
// * permit no `tell` ops
dout(1) << __func__ << ": cannot decode auth caps bl of length " << caps_info.caps.length() << dendl;
}
}

View File

@ -22,13 +22,13 @@
class MStatfs : public PaxosServiceMessage {
static const int HEAD_VERSION = 2;
static const int COMPAT_VERSION = 0;
static const int COMPAT_VERSION = 1;
public:
uuid_d fsid;
boost::optional<int64_t> data_pool;
MStatfs() : PaxosServiceMessage(CEPH_MSG_STATFS, 0, HEAD_VERSION) {}
MStatfs() : PaxosServiceMessage(CEPH_MSG_STATFS, 0, HEAD_VERSION, COMPAT_VERSION) {}
MStatfs(const uuid_d& f, ceph_tid_t t, boost::optional<int64_t> _data_pool,
version_t v) : PaxosServiceMessage(CEPH_MSG_STATFS, v,
HEAD_VERSION, COMPAT_VERSION),

View File

@ -521,8 +521,17 @@ class AddDataPoolHandler : public FileSystemCommandHandler
return 0;
}
mon->osdmon()->do_application_enable(poolid,
pg_pool_t::APPLICATION_NAME_CEPHFS);
// if we're running as luminous, we have to set the pool application metadata
if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS ||
mon->osdmon()->pending_inc.new_require_osd_release >= CEPH_RELEASE_LUMINOUS) {
if (!mon->osdmon()->is_writeable()) {
// not allowed to write yet, so retry when we can
mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
return -EAGAIN;
}
mon->osdmon()->do_application_enable(poolid, pg_pool_t::APPLICATION_NAME_CEPHFS);
mon->osdmon()->propose_pending();
}
fsmap.modify_filesystem(
fs->fscid,

View File

@ -509,6 +509,10 @@ COMMAND("osd lspools " \
COMMAND_WITH_FLAG("osd crush rule list", "list crush rules", "osd", "r", "cli,rest",
FLAG(DEPRECATED))
COMMAND("osd crush rule ls", "list crush rules", "osd", "r", "cli,rest")
COMMAND("osd crush rule ls-by-class " \
"name=class,type=CephString,goodchars=[A-Za-z0-9-_.]", \
"list all crush rules that reference the same <class>", \
"osd", "r", "cli,rest")
COMMAND("osd crush rule dump " \
"name=name,type=CephString,goodchars=[A-Za-z0-9-_.],req=false", \
"dump crush rule <name> (default all)", \
@ -646,6 +650,11 @@ COMMAND("osd crush rule create-erasure " \
COMMAND("osd crush rule rm " \
"name=name,type=CephString,goodchars=[A-Za-z0-9-_.] ", \
"remove crush rule <name>", "osd", "rw", "cli,rest")
COMMAND("osd crush rule rename " \
"name=srcname,type=CephString,goodchars=[A-Za-z0-9-_.] " \
"name=dstname,type=CephString,goodchars=[A-Za-z0-9-_.]", \
"rename crush rule <srcname> to <dstname>",
"osd", "rw", "cli,rest")
COMMAND("osd crush tree "
"name=shadow,type=CephChoices,strings=--show-shadow,req=false", \
"dump crush buckets and items in a tree view",

View File

@ -796,6 +796,8 @@ int Monitor::init()
mgr_messenger->add_dispatcher_tail(this); // for auth ms_* calls
bootstrap();
// add features of myself into feature_map
session_map.feature_map.add_mon(con_self->get_features());
return 0;
}
@ -2707,7 +2709,12 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f)
if (f) {
f->dump_stream("fsid") << monmap->get_fsid();
get_health_status(false, f, nullptr);
if (osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
get_health_status(false, f, nullptr);
} else {
list<string> health_str;
get_health(health_str, nullptr, f);
}
f->dump_unsigned("election_epoch", get_epoch());
{
f->open_array_section("quorum");

View File

@ -3261,6 +3261,12 @@ epoch_t OSDMonitor::send_pg_creates(int osd, Connection *con, epoch_t next) cons
dout(30) << __func__ << " osd." << osd << " next=" << next
<< " " << creating_pgs_by_osd_epoch << dendl;
std::lock_guard<std::mutex> l(creating_pgs_lock);
if (creating_pgs_epoch <= creating_pgs.last_scan_epoch) {
dout(20) << __func__
<< " not using stale creating_pgs@" << creating_pgs_epoch << dendl;
// the subscribers will be updated when the mapping is completed anyway
return next;
}
auto creating_pgs_by_epoch = creating_pgs_by_osd_epoch.find(osd);
if (creating_pgs_by_epoch == creating_pgs_by_osd_epoch.end())
return next;
@ -4923,6 +4929,34 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
osdmap.crush->list_rules(&ss);
rdata.append(ss.str());
}
} else if (prefix == "osd crush rule ls-by-class") {
string class_name;
cmd_getval(g_ceph_context, cmdmap, "class", class_name);
if (class_name.empty()) {
ss << "no class specified";
r = -EINVAL;
goto reply;
}
set<int> rules;
r = osdmap.crush->get_rules_by_class(class_name, &rules);
if (r < 0) {
ss << "failed to get rules by class '" << class_name << "'";
goto reply;
}
if (f) {
f->open_array_section("rules");
for (auto &rule: rules) {
f->dump_string("name", osdmap.crush->get_rule_name(rule));
}
f->close_section();
f->flush(rdata);
} else {
ostringstream rs;
for (auto &rule: rules) {
rs << osdmap.crush->get_rule_name(rule) << "\n";
}
rdata.append(rs.str());
}
} else if (prefix == "osd crush rule dump") {
string name;
cmd_getval(g_ceph_context, cmdmap, "name", name);
@ -5034,14 +5068,24 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
} else if (prefix == "osd crush class ls-osd") {
string name;
cmd_getval(g_ceph_context, cmdmap, "class", name);
boost::scoped_ptr<Formatter> f(Formatter::create(format, "json-pretty", "json-pretty"));
set<int> osds;
osdmap.crush->get_devices_by_class(name, &osds);
f->open_array_section("osds");
for (auto& osd : osds)
f->dump_int("osd", osd);
f->close_section();
f->flush(rdata);
if (f) {
f->open_array_section("osds");
for (auto &osd: osds)
f->dump_int("osd", osd);
f->close_section();
f->flush(rdata);
} else {
bool first = true;
for (auto &osd : osds) {
if (!first)
ds << "\n";
first = false;
ds << osd;
}
rdata.append(ds);
}
} else if (prefix == "osd erasure-code-profile ls") {
const auto &profiles = osdmap.get_erasure_code_profiles();
if (f)
@ -5719,15 +5763,20 @@ int OSDMonitor::prepare_new_pool(string& name, uint64_t auid,
_get_pending_crush(newcrush);
ostringstream err;
CrushTester tester(newcrush, err);
tester.set_min_x(0);
tester.set_max_x(50);
tester.set_rule(crush_rule);
auto start = ceph::coarse_mono_clock::now();
r = tester.test_with_fork(g_conf->mon_lease);
auto duration = ceph::coarse_mono_clock::now() - start;
if (r < 0) {
dout(10) << " tester.test_with_fork returns " << r
<< ": " << err.str() << dendl;
*ss << "crush test failed with " << r << ": " << err.str();
return r;
}
dout(10) << __func__ << " crush somke test duration: "
<< duration << dendl;
}
unsigned size, min_size;
r = prepare_pool_size(pool_type, erasure_code_profile, &size, &min_size, ss);
@ -6703,6 +6752,11 @@ int OSDMonitor::prepare_command_osd_create(
{
dout(10) << __func__ << " id " << id << " uuid " << uuid << dendl;
assert(existing_id);
if (osdmap.is_destroyed(id)) {
ss << "ceph osd create has been deprecated. Please use ceph osd new "
"instead.";
return -EINVAL;
}
if (uuid.is_zero()) {
dout(10) << __func__ << " no uuid; assuming legacy `osd create`" << dendl;
@ -7294,8 +7348,11 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
dout(10) << " testing map" << dendl;
stringstream ess;
CrushTester tester(crush, ess);
tester.set_min_x(0);
tester.set_max_x(50);
auto start = ceph::coarse_mono_clock::now();
int r = tester.test_with_fork(g_conf->mon_lease);
auto duration = ceph::coarse_mono_clock::now() - start;
if (r < 0) {
dout(10) << " tester.test_with_fork returns " << r
<< ": " << ess.str() << dendl;
@ -7303,7 +7360,8 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
err = r;
goto reply;
}
dout(10) << " crush test result " << ess.str() << dendl;
dout(10) << __func__ << " crush somke test duration: "
<< duration << ", result: " << ess.str() << dendl;
}
pending_inc.crush = data;
@ -8470,6 +8528,36 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
get_last_committed() + 1));
return true;
} else if (prefix == "osd crush rule rename") {
string srcname;
string dstname;
cmd_getval(g_ceph_context, cmdmap, "srcname", srcname);
cmd_getval(g_ceph_context, cmdmap, "dstname", dstname);
if (srcname.empty() || dstname.empty()) {
ss << "must specify both source rule name and destination rule name";
err = -EINVAL;
goto reply;
}
if (srcname == dstname) {
ss << "destination rule name is equal to source rule name";
err = 0;
goto reply;
}
CrushWrapper newcrush;
_get_pending_crush(newcrush);
err = newcrush.rename_rule(srcname, dstname, &ss);
if (err < 0) {
// ss has reason for failure
goto reply;
}
pending_inc.crush.clear();
newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
getline(ss, rs);
wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs,
get_last_committed() + 1));
return true;
} else if (prefix == "osd setmaxosd") {
int64_t newmax;
if (!cmd_getval(g_ceph_context, cmdmap, "newmax", newmax)) {

View File

@ -396,7 +396,7 @@ void PGMapDigest::recovery_summary(Formatter *f, list<string> *psl,
} else {
ostringstream ss;
ss << delta_sum.stats.sum.num_objects_unfound
<< "/" << delta_sum.stats.sum.num_objects << " unfound (" << b << "%)";
<< "/" << delta_sum.stats.sum.num_objects << " objects unfound (" << b << "%)";
psl->push_back(ss.str());
}
}
@ -3097,7 +3097,7 @@ void PGMap::get_health_checks(
snprintf(b, sizeof(b), "%.3lf", pc);
ostringstream ss;
ss << pg_sum.stats.sum.num_objects_unfound
<< "/" << pg_sum.stats.sum.num_objects << " unfound (" << b << "%)";
<< "/" << pg_sum.stats.sum.num_objects << " objects unfound (" << b << "%)";
auto& d = checks->add("OBJECT_UNFOUND", HEALTH_WARN, ss.str());
for (auto& p : pg_stat) {
@ -3188,7 +3188,7 @@ void PGMap::get_health_checks(
}
if (!error_detail.empty()) {
ostringstream ss;
ss << warn << " stuck requests are blocked > "
ss << error << " stuck requests are blocked > "
<< err_age << " sec";
auto& d = checks->add("REQUEST_STUCK", HEALTH_ERR, ss.str());
d.detail.swap(error_detail);
@ -4567,6 +4567,9 @@ int reweight::by_utilization(
if (pools && pools->count(pg.first.pool()) == 0)
continue;
for (const auto acting : pg.second.acting) {
if (!osdmap.exists(acting)) {
continue;
}
if (acting >= (int)pgs_by_osd.size())
pgs_by_osd.resize(acting);
if (pgs_by_osd[acting] == 0) {

Some files were not shown because too many files have changed in this diff Show More