update sources to v12.1.4

This commit is contained in:
Fabian Grünbichler 2017-08-16 09:18:31 +02:00
parent cbb314ce1f
commit 35e4c4457e
39 changed files with 681 additions and 102 deletions

View File

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 2.8.11)
project(ceph)
set(VERSION 12.1.3)
set(VERSION 12.1.4)
if(POLICY CMP0046)
# Tweak policies (this one disables "missing" dependency warning)

View File

@ -1,7 +1,7 @@
# Contributor: John Coyle <dx9err@gmail.com>
# Maintainer: John Coyle <dx9err@gmail.com>
pkgname=ceph
pkgver=12.1.3
pkgver=12.1.4
pkgrel=0
pkgdesc="Ceph is a distributed object store and file system"
pkgusers="ceph"
@ -63,7 +63,7 @@ makedepends="
xmlstarlet
yasm
"
source="ceph-12.1.3.tar.bz2"
source="ceph-12.1.4.tar.bz2"
subpackages="
$pkgname-base
$pkgname-common
@ -116,7 +116,7 @@ _sysconfdir=/etc
_udevrulesdir=/etc/udev/rules.d
_python_sitelib=/usr/lib/python2.7/site-packages
builddir=$srcdir/ceph-12.1.3
builddir=$srcdir/ceph-12.1.4
build() {
export CEPH_BUILD_VIRTUALENV=$builddir

View File

@ -61,7 +61,7 @@
# main package definition
#################################################################################
Name: ceph
Version: 12.1.3
Version: 12.1.4
Release: 0%{?dist}
%if 0%{?fedora} || 0%{?rhel}
Epoch: 2
@ -76,7 +76,7 @@ License: LGPL-2.1 and CC-BY-SA-1.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and
Group: System/Filesystems
%endif
URL: http://ceph.com/
Source0: http://ceph.com/download/ceph-12.1.3.tar.bz2
Source0: http://ceph.com/download/ceph-12.1.4.tar.bz2
%if 0%{?suse_version}
%if 0%{?is_opensuse}
ExclusiveArch: x86_64 aarch64 ppc64 ppc64le
@ -772,7 +772,7 @@ python-rbd, python-rgw or python-cephfs instead.
# common
#################################################################################
%prep
%autosetup -p1 -n ceph-12.1.3
%autosetup -p1 -n ceph-12.1.4
%build
%if 0%{with cephfs_java}

View File

@ -1,3 +1,9 @@
ceph (12.1.4-1) stable; urgency=medium
* New upstream release
-- Ceph Release Team <ceph-maintainers@ceph.com> Tue, 15 Aug 2017 13:45:08 +0000
ceph (12.1.3-1) stable; urgency=medium
* New upstream release

View File

@ -39,7 +39,7 @@ Synopsis
| **ceph** **mon_status**
| **ceph** **osd** [ *blacklist* \| *blocked-by* \| *create* \| *new* \| *deep-scrub* \| *df* \| *down* \| *dump* \| *erasure-code-profile* \| *find* \| *getcrushmap* \| *getmap* \| *getmaxosd* \| *in* \| *lspools* \| *map* \| *metadata* \| *out* \| *pause* \| *perf* \| *pg-temp* \| *force-create-pg* \| *primary-affinity* \| *primary-temp* \| *repair* \| *reweight* \| *reweight-by-pg* \| *rm* \| *destroy* \| *purge* \| *scrub* \| *set* \| *setcrushmap* \| *setmaxosd* \| *stat* \| *tree* \| *unpause* \| *unset* ] ...
| **ceph** **osd** [ *blacklist* \| *blocked-by* \| *create* \| *new* \| *deep-scrub* \| *df* \| *down* \| *dump* \| *erasure-code-profile* \| *find* \| *getcrushmap* \| *getmap* \| *getmaxosd* \| *in* \| *lspools* \| *map* \| *metadata* \| *ok-to-stop* \| *out* \| *pause* \| *perf* \| *pg-temp* \| *force-create-pg* \| *primary-affinity* \| *primary-temp* \| *repair* \| *reweight* \| *reweight-by-pg* \| *rm* \| *destroy* \| *purge* \| *safe-to-destroy* \| *scrub* \| *set* \| *setcrushmap* \| *setmaxosd* \| *stat* \| *tree* \| *unpause* \| *unset* ] ...
| **ceph** **osd** **crush** [ *add* \| *add-bucket* \| *create-or-move* \| *dump* \| *get-tunable* \| *link* \| *move* \| *remove* \| *rename-bucket* \| *reweight* \| *reweight-all* \| *reweight-subtree* \| *rm* \| *rule* \| *set* \| *set-tunable* \| *show-tunables* \| *tunables* \| *unlink* ] ...
@ -874,6 +874,18 @@ Usage::
ceph osd out <ids> [<ids>...]
Subcommand ``ok-to-stop`` checks whether the list of OSD(s) can be
stopped without immediately making data unavailable. That is, all
data should remain readable and writeable, although data redundancy
may be reduced as some PGs may end up in a degraded (but active)
state. It will return a success code if it is okay to stop the
OSD(s), or an error code and informative message if it is not or if no
conclusion can be drawn at the current time.
Usage::
ceph osd ok-to-stop <id> [<ids>...]
Subcommand ``pause`` pauses osd.
Usage::
@ -1066,6 +1078,16 @@ Usage::
ceph osd purge <id> {--yes-i-really-mean-it}
Subcommand ``safe-to-destroy`` checks whether it is safe to remove or
destroy an OSD without reducing overall data redundancy or durability.
It will return a success code if it is definitely safe, or an error
code and informative message if it is not or if no conclusion can be
drawn at the current time.
Usage::
ceph osd safe-to-destroy <id> [<ids>...]
Subcommand ``scrub`` initiates scrub on specified osd.
Usage::

View File

@ -210,6 +210,11 @@ Major Changes from Kraken
- ``ceph osd {add,rm}-{noout,noin,nodown,noup}`` allow the
`noout`, `noin`, `nodown`, and `noup` flags to be applied to
specific OSDs.
- ``ceph osd safe-to-destroy <osd(s)>`` will report whether it is safe to
remove or destroy OSD(s) without reducing data durability or redundancy.
- ``ceph osd ok-to-stop <osd(s)>`` will report whether it is okay to stop
OSD(s) without immediately compromising availability (i.e., all PGs
should remain active but may be degraded).
- ``ceph log last [n]`` will output the last *n* lines of the cluster
log.
- ``ceph mgr dump`` will dump the MgrMap, including the currently active

View File

@ -201,6 +201,18 @@ function TEST_mon_classes() {
# test set-device-class implicitly change class
ceph osd crush set-device-class hdd osd.0 || return 1
expect_failure $dir EBUSY ceph osd crush set-device-class nvme osd.0 || return 1
# test class rename
ceph osd crush rm-device-class all || return 1
ceph osd crush set-device-class class_1 all || return 1
ceph osd crush class ls | grep 'class_1' || return 1
ceph osd crush tree --show-shadow | grep 'class_1' || return 1
ceph osd crush rule create-replicated class_1_rule default host class_1 || return 1
ceph osd crush class rename class_1 class_2
ceph osd crush class ls | grep 'class_1' && return 1
ceph osd crush tree --show-shadow | grep 'class_1' && return 1
ceph osd crush class ls | grep 'class_2' || return 1
ceph osd crush tree --show-shadow | grep 'class_2' || return 1
}
main crush-classes "$@"

View File

@ -10,6 +10,9 @@ tasks:
- .*clock.*skew.*
- clocks not synchronized
- overall HEALTH_
- (MON_CLOCK_SKEW)
- \(MON_CLOCK_SKEW\)
- \(MGR_DOWN\)
- \(PG_
- No standby daemons available
- mon_clock_skew_check:
expect-skew: true

View File

@ -12,6 +12,10 @@ overrides:
osd heartbeat grace: 40
mon:
mon osd crush smoke test: false
log-whitelist:
- overall HEALTH_
# valgrind is slow.. we might get PGs stuck peering etc
- \(PG_
valgrind:
mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
osd: [--tool=memcheck]

View File

@ -453,6 +453,8 @@ class Filesystem(MDSCluster):
data_pool_name, pgs_per_fs_pool.__str__())
self.mon_manager.raw_cluster_cmd('fs', 'new',
self.name, self.metadata_pool_name, data_pool_name)
self.check_pool_application(self.metadata_pool_name)
self.check_pool_application(data_pool_name)
# Turn off spurious standby count warnings from modifying max_mds in tests.
try:
self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0')
@ -465,6 +467,17 @@ class Filesystem(MDSCluster):
self.getinfo(refresh = True)
def check_pool_application(self, pool_name):
osd_map = self.mon_manager.get_osd_dump_json()
for pool in osd_map['pools']:
if pool['pool_name'] == pool_name:
if "application_metadata" in pool:
if not "cephfs" in pool['application_metadata']:
raise RuntimeError("Pool %p does not name cephfs as application!".\
format(pool_name))
def __del__(self):
if getattr(self._ctx, "filesystem", None) == self:
delattr(self._ctx, "filesystem")

View File

@ -11,3 +11,4 @@ overrides:
- \(OBJECT_
- \(REQUEST_SLOW\)
- \(TOO_FEW_PGS\)
- \(MON_DOWN\)

View File

@ -155,4 +155,35 @@ ceph osd crush weight-set create-compat
ceph osd crush weight-set ls | grep '(compat)'
ceph osd crush weight-set rm-compat
# weight set vs device classes
ceph osd pool create cool 2
ceph osd pool create cold 2
ceph osd pool set cold size 2
ceph osd crush weight-set create-compat
ceph osd crush weight-set create cool flat
ceph osd crush weight-set create cold positional
ceph osd crush rm-device-class osd.0
ceph osd crush weight-set reweight-compat osd.0 10.5
ceph osd crush weight-set reweight cool osd.0 11.5
ceph osd crush weight-set reweight cold osd.0 12.5 12.4
ceph osd crush set-device-class fish osd.0
ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 10\\.
ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 11\\.
ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 12\\.
ceph osd crush rm-device-class osd.0
ceph osd crush set-device-class globster osd.0
ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 10\\.
ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 11\\.
ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 12\\.
ceph osd crush weight-set reweight-compat osd.0 7.5
ceph osd crush weight-set reweight cool osd.0 8.5
ceph osd crush weight-set reweight cold osd.0 6.5 6.6
ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 7\\.
ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 8\\.
ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 6\\.
ceph osd crush rm-device-class osd.0
ceph osd pool rm cool cool --yes-i-really-really-mean-it
ceph osd pool rm cold cold --yes-i-really-really-mean-it
ceph osd crush weight-set rm-compat
echo OK

View File

@ -1,2 +1,2 @@
c56d9c07b342c08419bbc18dcf2a4c5fae62b9cf
v12.1.3
a5f84b37668fc8e03165aaf5cbb380c78e4deba4
v12.1.4

View File

@ -2982,7 +2982,7 @@ std::vector<Option> get_global_options() {
.set_description(""),
Option("memstore_page_set", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.set_default(false)
.set_description(""),
Option("memstore_page_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)

View File

@ -364,11 +364,8 @@ bool CrushWrapper::_maybe_remove_last_instance(CephContext *cct, int item, bool
return true;
}
int CrushWrapper::remove_root(int item, bool unused)
int CrushWrapper::remove_root(int item)
{
if (unused && _bucket_is_in_use(item))
return 0;
crush_bucket *b = get_bucket(item);
if (IS_ERR(b)) {
// should be idempotent
@ -383,7 +380,7 @@ int CrushWrapper::remove_root(int item, bool unused)
for (unsigned n = 0; n < b->size; n++) {
if (b->items[n] >= 0)
continue;
int r = remove_root(b->items[n], unused);
int r = remove_root(b->items[n]);
if (r < 0)
return r;
}
@ -513,13 +510,13 @@ int CrushWrapper::_remove_item_under(
if (id == item) {
ldout(cct, 5) << "_remove_item_under removing item " << item
<< " from bucket " << b->id << dendl;
bucket_remove_item(b, item);
for (auto& p : choose_args) {
// weight down each weight-set to 0 before we remove the item
vector<int> weightv(get_choose_args_positions(p.second), 0);
_choose_args_adjust_item_weight_in_bucket(
cct, p.second, b->id, item, weightv, nullptr);
}
bucket_remove_item(b, item);
adjust_item_weight(cct, b->id, b->weight);
ret = 0;
} else if (id < 0) {
@ -1364,6 +1361,42 @@ int CrushWrapper::get_parent_of_type(int item, int type) const
return item;
}
int CrushWrapper::rename_class(const string& srcname, const string& dstname)
{
auto i = class_rname.find(srcname);
if (i == class_rname.end())
return -ENOENT;
auto j = class_rname.find(dstname);
if (j != class_rname.end())
return -EEXIST;
int class_id = i->second;
assert(class_name.count(class_id));
// rename any shadow buckets of old class name
for (auto &it: class_map) {
if (it.first < 0 && it.second == class_id) {
string old_name = get_item_name(it.first);
size_t pos = old_name.find("~");
assert(pos != string::npos);
string name_no_class = old_name.substr(0, pos);
string old_class_name = old_name.substr(pos + 1);
assert(old_class_name == srcname);
string new_name = name_no_class + "~" + dstname;
// we do not use set_item_name
// because the name is intentionally invalid
name_map[it.first] = new_name;
have_rmaps = false;
}
}
// rename class
class_rname.erase(srcname);
class_name.erase(class_id);
class_rname[dstname] = class_id;
class_name[class_id] = dstname;
return 0;
}
int CrushWrapper::populate_classes(
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket)
{
@ -1374,6 +1407,10 @@ int CrushWrapper::populate_classes(
used_ids.insert(q.second);
}
}
// accumulate weight values for each carg and bucket as we go. because it is
// depth first, we will have the nested bucket weights we need when we
// finish constructing the containing buckets.
map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> weights
set<int> roots;
find_nonshadow_roots(roots);
for (auto &r : roots) {
@ -1382,7 +1419,7 @@ int CrushWrapper::populate_classes(
for (auto &c : class_name) {
int clone;
int res = device_class_clone(r, c.first, old_class_bucket, used_ids,
&clone);
&clone, &cmap_item_weight);
if (res < 0)
return res;
}
@ -1390,14 +1427,14 @@ int CrushWrapper::populate_classes(
return 0;
}
int CrushWrapper::trim_roots_with_class(bool unused)
int CrushWrapper::trim_roots_with_class()
{
set<int> roots;
find_shadow_roots(roots);
for (auto &r : roots) {
if (r >= 0)
continue;
int res = remove_root(r, unused);
int res = remove_root(r);
if (res)
return res;
}
@ -1703,6 +1740,10 @@ int CrushWrapper::add_bucket(
int CrushWrapper::bucket_add_item(crush_bucket *bucket, int item, int weight)
{
__u32 new_size = bucket->size + 1;
int r = crush_bucket_add_item(crush, bucket, item, weight);
if (r < 0) {
return r;
}
for (auto w : choose_args) {
crush_choose_arg_map arg_map = w.second;
crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
@ -1721,7 +1762,7 @@ int CrushWrapper::bucket_add_item(crush_bucket *bucket, int item, int weight)
arg->ids_size = new_size;
}
}
return crush_bucket_add_item(crush, bucket, item, weight);
return 0;
}
int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item)
@ -1732,6 +1773,10 @@ int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item)
if (bucket->items[position] == item)
break;
assert(position != bucket->size);
int r = crush_bucket_remove_item(crush, bucket, item);
if (r < 0) {
return r;
}
for (auto w : choose_args) {
crush_choose_arg_map arg_map = w.second;
crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
@ -1740,19 +1785,27 @@ int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item)
assert(weight_set->size - 1 == new_size);
for (__u32 k = position; k < new_size; k++)
weight_set->weights[k] = weight_set->weights[k+1];
weight_set->weights = (__u32*)realloc(weight_set->weights,
new_size * sizeof(__u32));
if (new_size) {
weight_set->weights = (__u32*)realloc(weight_set->weights,
new_size * sizeof(__u32));
} else {
weight_set->weights = NULL;
}
weight_set->size = new_size;
}
if (arg->ids_size) {
assert(arg->ids_size - 1 == new_size);
for (__u32 k = position; k < new_size; k++)
arg->ids[k] = arg->ids[k+1];
arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
if (new_size) {
arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
} else {
arg->ids = NULL;
}
arg->ids_size = new_size;
}
}
return crush_bucket_remove_item(crush, bucket, item);
return 0;
}
int CrushWrapper::update_device_class(int id,
@ -1818,7 +1871,8 @@ int CrushWrapper::device_class_clone(
int original_id, int device_class,
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
const std::set<int32_t>& used_ids,
int *clone)
int *clone,
map<int,map<int,vector<int>>> *cmap_item_weight)
{
const char *item_name = get_item_name(original_id);
if (item_name == NULL)
@ -1831,6 +1885,7 @@ int CrushWrapper::device_class_clone(
*clone = get_item_id(copy_name);
return 0;
}
crush_bucket *original = get_bucket(original_id);
assert(!IS_ERR(original));
crush_bucket *copy = crush_make_bucket(crush,
@ -1839,28 +1894,37 @@ int CrushWrapper::device_class_clone(
original->type,
0, NULL, NULL);
assert(copy);
vector<unsigned> item_orig_pos; // new item pos -> orig item pos
for (unsigned i = 0; i < original->size; i++) {
int item = original->items[i];
int weight = crush_get_bucket_item_weight(original, i);
if (item >= 0) {
if (class_map.count(item) != 0 && class_map[item] == device_class) {
int res = bucket_add_item(copy, item, weight);
int res = crush_bucket_add_item(crush, copy, item, weight);
if (res)
return res;
} else {
continue;
}
} else {
int child_copy_id;
int res = device_class_clone(item, device_class, old_class_bucket,
used_ids, &child_copy_id);
used_ids, &child_copy_id,
cmap_item_weight);
if (res < 0)
return res;
crush_bucket *child_copy = get_bucket(child_copy_id);
assert(!IS_ERR(child_copy));
res = bucket_add_item(copy, child_copy_id, child_copy->weight);
res = crush_bucket_add_item(crush, copy, child_copy_id,
child_copy->weight);
if (res)
return res;
}
item_orig_pos.push_back(i);
}
assert(item_orig_pos.size() == copy->size);
int bno = 0;
if (old_class_bucket.count(original_id) &&
old_class_bucket.at(original_id).count(device_class)) {
@ -1878,14 +1942,51 @@ int CrushWrapper::device_class_clone(
if (res)
return res;
assert(!bno || bno == *clone);
res = set_item_class(*clone, device_class);
if (res < 0)
return res;
// we do not use set_item_name because the name is intentionally invalid
name_map[*clone] = copy_name;
if (have_rmaps)
name_rmap[copy_name] = *clone;
class_bucket[original_id][device_class] = *clone;
// set up choose_args for the new bucket.
for (auto& w : choose_args) {
crush_choose_arg_map& cmap = w.second;
if (-1-bno >= (int)cmap.size) {
unsigned new_size = -1-bno + 1;
cmap.args = (crush_choose_arg*)realloc(cmap.args,
new_size * sizeof(cmap.args[0]));
memset(cmap.args + cmap.size, 0,
(new_size - cmap.size) * sizeof(cmap.args[0]));
}
auto& o = cmap.args[-1-original_id];
auto& n = cmap.args[-1-bno];
n.ids_size = 0; // FIXME: implement me someday
n.weight_set_size = o.weight_set_size;
n.weight_set = (crush_weight_set*)calloc(
n.weight_set_size, sizeof(crush_weight_set));
for (size_t s = 0; s < n.weight_set_size; ++s) {
n.weight_set[s].size = copy->size;
n.weight_set[s].weights = (__u32*)calloc(copy->size, sizeof(__u32));
}
for (size_t s = 0; s < n.weight_set_size; ++s) {
vector<int> bucket_weights(n.weight_set_size);
for (size_t i = 0; i < copy->size; ++i) {
int item = copy->items[i];
if (item >= 0) {
n.weight_set[s].weights[i] = o.weight_set[s].weights[item_orig_pos[i]];
} else {
n.weight_set[s].weights[i] = (*cmap_item_weight)[w.first][item][s];
}
bucket_weights[s] += n.weight_set[s].weights[i];
}
(*cmap_item_weight)[w.first][bno] = bucket_weights;
}
}
return 0;
}
@ -1918,9 +2019,15 @@ bool CrushWrapper::_class_is_dead(int class_id)
void CrushWrapper::cleanup_dead_classes()
{
for (auto &c: class_name) {
if (_class_is_dead(c.first))
remove_class_name(c.second);
auto p = class_name.begin();
while (p != class_name.end()) {
if (_class_is_dead(p->first)) {
string n = p->second;
++p;
remove_class_name(n);
} else {
++p;
}
}
}
@ -1928,7 +2035,7 @@ int CrushWrapper::rebuild_roots_with_classes()
{
std::map<int32_t, map<int32_t, int32_t> > old_class_bucket = class_bucket;
cleanup_dead_classes();
int r = trim_roots_with_class(false);
int r = trim_roots_with_class();
if (r < 0)
return r;
class_bucket.clear();

View File

@ -834,10 +834,9 @@ public:
* when a bucket is in use.
*
* @param item id to remove
* @param unused true if only unused items should be removed
* @return 0 on success, negative on error
*/
int remove_root(int item, bool unused);
int remove_root(int item);
/**
* remove all instances of an item nested beneath a certain point from the map
@ -1213,14 +1212,16 @@ public:
int original, int device_class,
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
const std::set<int32_t>& used_ids,
int *clone);
int *clone,
map<int,map<int,vector<int>>> *cmap_item_weight);
int rename_class(const string& srcname, const string& dstname);
int populate_classes(
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket);
bool _class_is_dead(int class_id);
void cleanup_dead_classes();
int rebuild_roots_with_classes();
/* remove unused roots generated for class devices */
int trim_roots_with_class(bool unused);
int trim_roots_with_class();
void start_choose_profile() {
free(crush->choose_tries);

View File

@ -1030,12 +1030,11 @@ int crush_remove_straw_bucket_item(struct crush_map *map,
for (i = 0; i < bucket->h.size; i++) {
if (bucket->h.items[i] == item) {
bucket->h.size--;
if (bucket->item_weights[i] < bucket->h.weight)
bucket->h.weight -= bucket->item_weights[i];
else
bucket->h.weight = 0;
for (j = i; j < bucket->h.size; j++) {
for (j = i; j < bucket->h.size - 1; j++) {
bucket->h.items[j] = bucket->h.items[j+1];
bucket->item_weights[j] = bucket->item_weights[j+1];
}
@ -1044,7 +1043,11 @@ int crush_remove_straw_bucket_item(struct crush_map *map,
}
if (i == bucket->h.size)
return -ENOENT;
bucket->h.size--;
if (bucket->h.size == 0) {
/* don't bother reallocating */
return 0;
}
void *_realloc = NULL;
if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
@ -1074,12 +1077,11 @@ int crush_remove_straw2_bucket_item(struct crush_map *map,
for (i = 0; i < bucket->h.size; i++) {
if (bucket->h.items[i] == item) {
bucket->h.size--;
if (bucket->item_weights[i] < bucket->h.weight)
bucket->h.weight -= bucket->item_weights[i];
else
bucket->h.weight = 0;
for (j = i; j < bucket->h.size; j++) {
for (j = i; j < bucket->h.size - 1; j++) {
bucket->h.items[j] = bucket->h.items[j+1];
bucket->item_weights[j] = bucket->item_weights[j+1];
}
@ -1089,6 +1091,12 @@ int crush_remove_straw2_bucket_item(struct crush_map *map,
if (i == bucket->h.size)
return -ENOENT;
bucket->h.size--;
if (!newsize) {
/* don't bother reallocating a 0-length array. */
return 0;
}
void *_realloc = NULL;
if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {

View File

@ -924,6 +924,163 @@ bool DaemonServer::handle_command(MCommand *m)
});
cmdctx->reply(r, "");
return true;
} else if (prefix == "osd safe-to-destroy") {
vector<string> ids;
cmd_getval(g_ceph_context, cmdctx->cmdmap, "ids", ids);
set<int> osds;
int r;
cluster_state.with_osdmap([&](const OSDMap& osdmap) {
r = osdmap.parse_osd_id_list(ids, &osds, &ss);
});
if (!r && osds.empty()) {
ss << "must specify one or more OSDs";
r = -EINVAL;
}
if (r < 0) {
cmdctx->reply(r, ss);
return true;
}
set<int> active_osds, missing_stats, stored_pgs;
int affected_pgs = 0;
cluster_state.with_pgmap([&](const PGMap& pg_map) {
if (pg_map.num_pg_unknown > 0) {
ss << pg_map.num_pg_unknown << " pgs have unknown state; cannot draw"
<< " any conclusions";
r = -EAGAIN;
return;
}
int num_active_clean = 0;
for (auto& p : pg_map.num_pg_by_state) {
unsigned want = PG_STATE_ACTIVE|PG_STATE_CLEAN;
if ((p.first & want) == want) {
num_active_clean += p.second;
}
}
cluster_state.with_osdmap([&](const OSDMap& osdmap) {
for (auto osd : osds) {
if (!osdmap.exists(osd)) {
continue; // clearly safe to destroy
}
auto q = pg_map.num_pg_by_osd.find(osd);
if (q != pg_map.num_pg_by_osd.end()) {
if (q->second.acting > 0 || q->second.up > 0) {
active_osds.insert(osd);
affected_pgs += q->second.acting + q->second.up;
continue;
}
}
if (num_active_clean < pg_map.num_pg) {
// all pgs aren't active+clean; we need to be careful.
auto p = pg_map.osd_stat.find(osd);
if (p == pg_map.osd_stat.end()) {
missing_stats.insert(osd);
}
if (p->second.num_pgs > 0) {
stored_pgs.insert(osd);
}
}
}
});
});
if (!r && !active_osds.empty()) {
ss << "OSD(s) " << active_osds << " have " << affected_pgs
<< " pgs currently mapped to them";
r = -EBUSY;
} else if (!missing_stats.empty()) {
ss << "OSD(s) " << missing_stats << " have no reported stats, and not all"
<< " PGs are active+clean; we cannot draw any conclusions";
r = -EAGAIN;
} else if (!stored_pgs.empty()) {
ss << "OSD(s) " << stored_pgs << " last reported they still store some PG"
<< " data, and not all PGs are active+clean; we cannot be sure they"
<< " aren't still needed.";
r = -EBUSY;
}
if (r) {
cmdctx->reply(r, ss);
return true;
}
ss << "OSD(s) " << osds << " are safe to destroy without reducing data"
<< " durability.";
cmdctx->reply(0, ss);
return true;
} else if (prefix == "osd ok-to-stop") {
vector<string> ids;
cmd_getval(g_ceph_context, cmdctx->cmdmap, "ids", ids);
set<int> osds;
int r;
cluster_state.with_osdmap([&](const OSDMap& osdmap) {
r = osdmap.parse_osd_id_list(ids, &osds, &ss);
});
if (!r && osds.empty()) {
ss << "must specify one or more OSDs";
r = -EINVAL;
}
if (r < 0) {
cmdctx->reply(r, ss);
return true;
}
map<pg_t,int> pg_delta; // pgid -> net acting set size change
int dangerous_pgs = 0;
cluster_state.with_pgmap([&](const PGMap& pg_map) {
return cluster_state.with_osdmap([&](const OSDMap& osdmap) {
if (pg_map.num_pg_unknown > 0) {
ss << pg_map.num_pg_unknown << " pgs have unknown state; "
<< "cannot draw any conclusions";
r = -EAGAIN;
return;
}
for (auto osd : osds) {
auto p = pg_map.pg_by_osd.find(osd);
if (p != pg_map.pg_by_osd.end()) {
for (auto& pgid : p->second) {
--pg_delta[pgid];
}
}
}
for (auto& p : pg_delta) {
auto q = pg_map.pg_stat.find(p.first);
if (q == pg_map.pg_stat.end()) {
ss << "missing information about " << p.first << "; cannot draw"
<< " any conclusions";
r = -EAGAIN;
return;
}
if (!(q->second.state & PG_STATE_ACTIVE) ||
(q->second.state & PG_STATE_DEGRADED)) {
// we don't currently have a good way to tell *how* degraded
// a degraded PG is, so we have to assume we cannot remove
// any more replicas/shards.
++dangerous_pgs;
continue;
}
const pg_pool_t *pi = osdmap.get_pg_pool(p.first.pool());
if (!pi) {
++dangerous_pgs; // pool is creating or deleting
} else {
if (q->second.acting.size() + p.second < pi->min_size) {
++dangerous_pgs;
}
}
}
});
});
if (r) {
cmdctx->reply(r, ss);
return true;
}
if (dangerous_pgs) {
ss << dangerous_pgs << " PGs are already degraded or might become "
<< "unavailable";
cmdctx->reply(-EBUSY, ss);
return true;
}
ss << "OSD(s) " << osds << " are ok to stop without reducing"
<< " availability, provided there are no other concurrent failures"
<< " or interventions. " << pg_delta.size() << " PGs are likely to be"
<< " degraded (but remain available) as a result.";
cmdctx->reply(0, ss);
return true;
} else if (prefix == "pg force-recovery" ||
prefix == "pg force-backfill" ||
prefix == "pg cancel-force-recovery" ||

View File

@ -107,6 +107,13 @@ COMMAND("osd test-reweight-by-pg " \
"dry run of reweight OSDs by PG distribution [overload-percentage-for-consideration, default 120]", \
"osd", "r", "cli,rest")
COMMAND("osd safe-to-destroy name=ids,type=CephString,n=N",
"check whether osd(s) can be safely destroyed without reducing data durability",
"osd", "r", "cli,rest")
COMMAND("osd ok-to-stop name=ids,type=CephString,n=N",
"check whether osd(s) can be safely stopped without reducing immediate"\
" data availability", "osd", "r", "cli,rest")
COMMAND("osd scrub " \
"name=who,type=CephString", \
"initiate scrub on osd <who>, or use <all|any|*> to scrub all", \

View File

@ -179,11 +179,6 @@ class FsNewHandler : public FileSystemCommandHandler
pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata);
assert(metadata_pool != NULL); // Checked it existed above
// we must make these checks before we even allow ourselves to *think*
// about requesting a proposal to the osdmonitor and bail out now if
// we believe we must. bailing out *after* we request the proposal is
// bad business as we could have changed the osdmon's state and ending up
// returning an error to the user.
int r = _check_pool(mon->osdmon()->osdmap, data, false, force, &ss);
if (r < 0) {
return r;
@ -193,11 +188,21 @@ class FsNewHandler : public FileSystemCommandHandler
if (r < 0) {
return r;
}
mon->osdmon()->do_application_enable(data,
pg_pool_t::APPLICATION_NAME_CEPHFS);
mon->osdmon()->do_application_enable(metadata,
pg_pool_t::APPLICATION_NAME_CEPHFS);
// if we're running as luminous, we have to set the pool application metadata
if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS ||
mon->osdmon()->pending_inc.new_require_osd_release >= CEPH_RELEASE_LUMINOUS) {
if (!mon->osdmon()->is_writeable()) {
// not allowed to write yet, so retry when we can
mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
return -EAGAIN;
}
mon->osdmon()->do_application_enable(data,
pg_pool_t::APPLICATION_NAME_CEPHFS);
mon->osdmon()->do_application_enable(metadata,
pg_pool_t::APPLICATION_NAME_CEPHFS);
mon->osdmon()->propose_pending();
}
// All checks passed, go ahead and create.
fsmap.create_filesystem(fs_name, metadata, data,

View File

@ -555,6 +555,11 @@ COMMAND("osd crush rm-device-class " \
"remove class of the osd(s) <id> [<id>...]," \
"or use <all|any|*> to remove all.", \
"osd", "rw", "cli,rest")
COMMAND("osd crush class rename " \
"name=srcname,type=CephString,goodchars=[A-Za-z0-9-_] " \
"name=dstname,type=CephString,goodchars=[A-Za-z0-9-_]", \
"rename crush device class <srcname> to <dstname>", \
"osd", "rw", "cli,rest")
COMMAND("osd crush create-or-move " \
"name=id,type=CephOsdName " \
"name=weight,type=CephFloat,range=0.0 " \

View File

@ -3599,7 +3599,7 @@ void Monitor::handle_command(MonOpRequestRef op)
mdsmon()->count_metadata("ceph_version", &mds);
f->open_object_section("mds");
for (auto& p : mon) {
for (auto& p : mds) {
f->dump_int(p.first.c_str(), p.second);
overall[p.first] += p.second;
}

View File

@ -3156,11 +3156,14 @@ void OSDMonitor::check_pg_creates_sub(Subscription *sub)
void OSDMonitor::do_application_enable(int64_t pool_id,
const std::string &app_name)
{
assert(paxos->is_plugged());
assert(paxos->is_plugged() && is_writeable());
dout(20) << __func__ << ": pool_id=" << pool_id << ", app_name=" << app_name
<< dendl;
assert(osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS ||
pending_inc.new_require_osd_release >= CEPH_RELEASE_LUMINOUS);
auto pp = osdmap.get_pg_pool(pool_id);
assert(pp != nullptr);
@ -7454,7 +7457,43 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
new Monitor::C_Command(mon,op, 0, rs, get_last_committed() + 1));
return true;
}
} else if (prefix == "osd crush class rename") {
string srcname, dstname;
if (!cmd_getval(g_ceph_context, cmdmap, "srcname", srcname)) {
err = -EINVAL;
goto reply;
}
if (!cmd_getval(g_ceph_context, cmdmap, "dstname", dstname)) {
err = -EINVAL;
goto reply;
}
CrushWrapper newcrush;
_get_pending_crush(newcrush);
if (!newcrush.class_exists(srcname)) {
err = -ENOENT;
ss << "class '" << srcname << "' does not exist";
goto reply;
}
if (newcrush.class_exists(dstname)) {
err = -EEXIST;
ss << "class '" << dstname << "' already exists";
goto reply;
}
err = newcrush.rename_class(srcname, dstname);
if (err < 0) {
ss << "fail to rename '" << srcname << "' to '" << dstname << "' : "
<< cpp_strerror(err);
goto reply;
}
pending_inc.crush.clear();
newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
ss << "rename class '" << srcname << "' to '" << dstname << "'";
goto update;
} else if (prefix == "osd crush add-bucket") {
// os crush add-bucket <name> <type>
string name, typestr;
@ -11327,6 +11366,16 @@ int OSDMonitor::_prepare_remove_pool(
pending_inc.old_pg_upmap_items.insert(p.first);
}
}
// remove any choose_args for this pool
CrushWrapper newcrush;
_get_pending_crush(newcrush);
if (newcrush.have_choose_args(pool)) {
dout(10) << __func__ << " removing choose_args for pool " << pool << dendl;
newcrush.rm_choose_args(pool);
pending_inc.crush.clear();
newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
}
return 0;
}

View File

@ -817,10 +817,22 @@ struct C_Committed : public Context {
void finish(int r) override {
assert(r >= 0);
Mutex::Locker l(paxos->mon->lock);
if (paxos->is_shutdown()) {
paxos->abort_commit();
return;
}
paxos->commit_finish();
}
};
void Paxos::abort_commit()
{
assert(commits_started > 0);
--commits_started;
if (commits_started == 0)
shutdown_cond.Signal();
}
void Paxos::commit_start()
{
dout(10) << __func__ << " " << (last_committed+1) << dendl;
@ -855,6 +867,7 @@ void Paxos::commit_start()
state = STATE_WRITING;
else
ceph_abort();
++commits_started;
if (mon->get_quorum().size() > 1) {
// cancel timeout event
@ -910,6 +923,8 @@ void Paxos::commit_finish()
// it doesn't need to flush the store queue
assert(is_writing() || is_writing_previous());
state = STATE_REFRESH;
assert(commits_started > 0);
--commits_started;
if (do_refresh()) {
commit_proposal();
@ -1301,9 +1316,17 @@ void Paxos::shutdown()
{
dout(10) << __func__ << " cancel all contexts" << dendl;
state = STATE_SHUTDOWN;
// discard pending transaction
pending_proposal.reset();
// Let store finish commits in progress
// XXX: I assume I can't use finish_contexts() because the store
// is going to trigger
while(commits_started > 0)
shutdown_cond.Wait(mon->lock);
finish_contexts(g_ceph_context, waiting_for_writeable, -ECANCELED);
finish_contexts(g_ceph_context, waiting_for_commit, -ECANCELED);
finish_contexts(g_ceph_context, waiting_for_readable, -ECANCELED);

View File

@ -230,6 +230,8 @@ public:
STATE_WRITING_PREVIOUS,
// leader: refresh following a commit
STATE_REFRESH,
// Shutdown after WRITING or WRITING_PREVIOUS
STATE_SHUTDOWN
};
/**
@ -257,6 +259,8 @@ public:
return "writing-previous";
case STATE_REFRESH:
return "refresh";
case STATE_SHUTDOWN:
return "shutdown";
default:
return "UNKNOWN";
}
@ -270,6 +274,9 @@ private:
/**
* @}
*/
int commits_started = 0;
Cond shutdown_cond;
public:
/**
@ -306,6 +313,9 @@ public:
/// @return 'true' if we are refreshing an update just committed
bool is_refresh() const { return state == STATE_REFRESH; }
/// @return 'true' if we are in the process of shutting down
bool is_shutdown() const { return state == STATE_SHUTDOWN; }
private:
/**
* @defgroup Paxos_h_recovery_vars Common recovery-related member variables
@ -880,6 +890,7 @@ private:
*/
void commit_start();
void commit_finish(); ///< finish a commit after txn becomes durable
void abort_commit(); ///< Handle commit finish after shutdown started
/**
* Commit the new value to stable storage as being the latest available
* version.

View File

@ -83,17 +83,17 @@ protected:
* Child must populate this during encode_pending() by calling encode_health().
*/
health_check_map_t health_checks;
public:
const health_check_map_t& get_health_checks() {
return health_checks;
}
protected:
/**
* format of our state in leveldb, 0 for default
*/
version_t format_version;
public:
const health_check_map_t& get_health_checks() {
return health_checks;
}
/**
* @defgroup PaxosService_h_callbacks Callback classes
* @{
@ -128,7 +128,6 @@ protected:
* @}
*/
public:
/**
* @param mn A Monitor instance
* @param p A Paxos instance

View File

@ -1703,9 +1703,13 @@ ostream& operator<<(ostream& out, const BlueStore::Blob& b)
if (b.is_spanning()) {
out << " spanning " << b.id;
}
out << " " << b.get_blob() << " " << b.get_blob_use_tracker()
<< " " << *b.shared_blob
<< ")";
out << " " << b.get_blob() << " " << b.get_blob_use_tracker();
if (b.shared_blob) {
out << " " << *b.shared_blob;
} else {
out << " (shared_blob=NULL)";
}
out << ")";
return out;
}
@ -9009,7 +9013,7 @@ void BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t)
case Transaction::OP_TRUNCATE:
{
uint64_t off = op->off;
_truncate(txc, c, o, off);
r = _truncate(txc, c, o, off);
}
break;
@ -10329,10 +10333,14 @@ int BlueStore::_write(TransContext *txc,
dout(15) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< dendl;
_assign_nid(txc, o);
int r = _do_write(txc, c, o, offset, length, bl, fadvise_flags);
txc->write_onode(o);
int r = 0;
if (offset + length >= OBJECT_MAX_SIZE) {
r = -E2BIG;
} else {
_assign_nid(txc, o);
r = _do_write(txc, c, o, offset, length, bl, fadvise_flags);
txc->write_onode(o);
}
dout(10) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< " = " << r << dendl;
@ -10347,8 +10355,13 @@ int BlueStore::_zero(TransContext *txc,
dout(15) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< dendl;
_assign_nid(txc, o);
int r = _do_zero(txc, c, o, offset, length);
int r = 0;
if (offset + length >= OBJECT_MAX_SIZE) {
r = -E2BIG;
} else {
_assign_nid(txc, o);
r = _do_zero(txc, c, o, offset, length);
}
dout(10) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< " = " << r << dendl;
@ -10423,7 +10436,7 @@ void BlueStore::_do_truncate(
txc->write_onode(o);
}
void BlueStore::_truncate(TransContext *txc,
int BlueStore::_truncate(TransContext *txc,
CollectionRef& c,
OnodeRef& o,
uint64_t offset)
@ -10431,7 +10444,16 @@ void BlueStore::_truncate(TransContext *txc,
dout(15) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << std::dec
<< dendl;
_do_truncate(txc, c, o, offset);
int r = 0;
if (offset >= OBJECT_MAX_SIZE) {
r = -E2BIG;
} else {
_do_truncate(txc, c, o, offset);
}
dout(10) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << std::dec
<< " = " << r << dendl;
return r;
}
int BlueStore::_do_remove(
@ -10913,6 +10935,7 @@ int BlueStore::_do_clone_range(
uint64_t end = srcoff + length;
uint32_t dirty_range_begin = 0;
uint32_t dirty_range_end = 0;
bool src_dirty = false;
for (auto ep = oldo->extent_map.seek_lextent(srcoff);
ep != oldo->extent_map.extent_map.end();
++ep) {
@ -10933,7 +10956,8 @@ int BlueStore::_do_clone_range(
// make sure it is shared
if (!blob.is_shared()) {
c->make_blob_shared(_assign_blobid(txc), e.blob);
if (dirty_range_begin == 0 && dirty_range_end == 0) {
if (!src_dirty) {
src_dirty = true;
dirty_range_begin = e.logical_offset;
}
assert(e.logical_end() > 0);
@ -10985,7 +11009,7 @@ int BlueStore::_do_clone_range(
dout(20) << __func__ << " dst " << *ne << dendl;
++n;
}
if (dirty_range_end > dirty_range_begin) {
if (src_dirty) {
oldo->extent_map.dirty_range(dirty_range_begin,
dirty_range_end - dirty_range_begin);
txc->write_onode(oldo);
@ -11012,6 +11036,11 @@ int BlueStore::_clone_range(TransContext *txc,
<< " to offset 0x" << dstoff << std::dec << dendl;
int r = 0;
if (srcoff + length >= OBJECT_MAX_SIZE ||
dstoff + length >= OBJECT_MAX_SIZE) {
r = -E2BIG;
goto out;
}
if (srcoff + length > oldo->onode.size) {
r = -EINVAL;
goto out;

View File

@ -2591,7 +2591,7 @@ private:
OnodeRef o,
uint64_t offset,
set<SharedBlob*> *maybe_unshared_blobs=0);
void _truncate(TransContext *txc,
int _truncate(TransContext *txc,
CollectionRef& c,
OnodeRef& o,
uint64_t offset);

View File

@ -923,7 +923,8 @@ void OSDService::set_injectfull(s_names type, int64_t count)
}
osd_stat_t OSDService::set_osd_stat(const struct store_statfs_t &stbuf,
vector<int>& hb_peers)
vector<int>& hb_peers,
int num_pgs)
{
uint64_t bytes = stbuf.total;
uint64_t used = bytes - stbuf.available;
@ -940,6 +941,7 @@ osd_stat_t OSDService::set_osd_stat(const struct store_statfs_t &stbuf,
osd_stat.kb = bytes >> 10;
osd_stat.kb_used = used >> 10;
osd_stat.kb_avail = avail >> 10;
osd_stat.num_pgs = num_pgs;
return osd_stat;
}
}
@ -954,7 +956,7 @@ void OSDService::update_osd_stat(vector<int>& hb_peers)
return;
}
auto new_stat = set_osd_stat(stbuf, hb_peers);
auto new_stat = set_osd_stat(stbuf, hb_peers, osd->get_num_pgs());
dout(20) << "update_osd_stat " << new_stat << dendl;
assert(new_stat.kb);
float ratio = ((float)new_stat.kb_used) / ((float)new_stat.kb);

View File

@ -1047,7 +1047,8 @@ public:
void update_osd_stat(vector<int>& hb_peers);
osd_stat_t set_osd_stat(const struct store_statfs_t &stbuf,
vector<int>& hb_peers);
vector<int>& hb_peers,
int num_pgs);
osd_stat_t get_osd_stat() {
Mutex::Locker l(stat_lock);
++seq;
@ -1951,6 +1952,11 @@ protected:
public:
PG *lookup_lock_pg(spg_t pgid);
int get_num_pgs() {
RWLock::RLocker l(pg_map_lock);
return pg_map.size();
}
protected:
PG *_open_lock_pg(OSDMapRef createmap,
spg_t pg, bool no_lockdep_check=false);

View File

@ -4641,3 +4641,23 @@ void OSDMap::check_health(health_check_map_t *checks) const
}
}
}
int OSDMap::parse_osd_id_list(const vector<string>& ls, set<int> *out,
ostream *ss) const
{
out->clear();
for (auto i = ls.begin(); i != ls.end(); ++i) {
if (i == ls.begin() &&
(*i == "any" || *i == "all" || *i == "*")) {
get_all_osds(*out);
break;
}
long osd = parse_osd_id(i->c_str(), ss);
if (osd < 0) {
*ss << "invalid osd id '" << *i << "'";
return -EINVAL;
}
out->insert(osd);
}
return 0;
}

View File

@ -1369,6 +1369,10 @@ public:
bool check_new_blacklist_entries() const { return new_blacklist_entries; }
void check_health(health_check_map_t *checks) const;
int parse_osd_id_list(const vector<string>& ls,
set<int> *out,
ostream *ss) const;
};
WRITE_CLASS_ENCODER_FEATURES(OSDMap)
WRITE_CLASS_ENCODER_FEATURES(OSDMap::Incremental)

View File

@ -1284,6 +1284,7 @@ public:
eversion_t on_disk_rollback_info_trimmed_to = eversion_t();
ObjectMap::ObjectMapIterator p = store->get_omap_iterator(log_coll, log_oid);
map<eversion_t, hobject_t> divergent_priors;
bool must_rebuild = force_rebuild_missing;
missing.may_include_deletes = false;
list<pg_log_entry_t> entries;
list<pg_log_dup_t> dups;
@ -1298,7 +1299,7 @@ public:
::decode(divergent_priors, bp);
ldpp_dout(dpp, 20) << "read_log_and_missing " << divergent_priors.size()
<< " divergent_priors" << dendl;
assert(force_rebuild_missing);
must_rebuild = true;
debug_verify_stored_missing = false;
} else if (p->key() == "can_rollback_to") {
::decode(on_disk_can_rollback_to, bp);
@ -1345,7 +1346,7 @@ public:
std::move(entries),
std::move(dups));
if (force_rebuild_missing || debug_verify_stored_missing) {
if (must_rebuild || debug_verify_stored_missing) {
// build missing
if (debug_verify_stored_missing || info.last_complete < info.last_update) {
ldpp_dout(dpp, 10)
@ -1438,7 +1439,7 @@ public:
}
}
} else {
assert(force_rebuild_missing);
assert(must_rebuild);
for (map<eversion_t, hobject_t>::reverse_iterator i =
divergent_priors.rbegin();
i != divergent_priors.rend();
@ -1492,7 +1493,7 @@ public:
}
}
if (!force_rebuild_missing) {
if (!must_rebuild) {
if (clear_divergent_priors)
(*clear_divergent_priors) = false;
missing.flush();

View File

@ -309,6 +309,7 @@ void osd_stat_t::dump(Formatter *f) const
{
f->dump_unsigned("up_from", up_from);
f->dump_unsigned("seq", seq);
f->dump_unsigned("num_pgs", num_pgs);
f->dump_unsigned("kb", kb);
f->dump_unsigned("kb_used", kb_used);
f->dump_unsigned("kb_avail", kb_avail);
@ -328,7 +329,7 @@ void osd_stat_t::dump(Formatter *f) const
void osd_stat_t::encode(bufferlist &bl) const
{
ENCODE_START(6, 2, bl);
ENCODE_START(7, 2, bl);
::encode(kb, bl);
::encode(kb_used, bl);
::encode(kb_avail, bl);
@ -340,6 +341,7 @@ void osd_stat_t::encode(bufferlist &bl) const
::encode(os_perf_stat, bl);
::encode(up_from, bl);
::encode(seq, bl);
::encode(num_pgs, bl);
ENCODE_FINISH(bl);
}
@ -362,6 +364,9 @@ void osd_stat_t::decode(bufferlist::iterator &bl)
::decode(up_from, bl);
::decode(seq, bl);
}
if (struct_v >= 7) {
::decode(num_pgs, bl);
}
DECODE_FINISH(bl);
}

View File

@ -904,6 +904,8 @@ struct osd_stat_t {
epoch_t up_from = 0;
uint64_t seq = 0;
uint32_t num_pgs = 0;
osd_stat_t() : kb(0), kb_used(0), kb_avail(0),
snap_trim_queue_len(0), num_snap_trimming(0) {}
@ -915,6 +917,7 @@ struct osd_stat_t {
num_snap_trimming += o.num_snap_trimming;
op_queue_age_hist.add(o.op_queue_age_hist);
os_perf_stat.add(o.os_perf_stat);
num_pgs += o.num_pgs;
}
void sub(const osd_stat_t& o) {
kb -= o.kb;
@ -924,6 +927,7 @@ struct osd_stat_t {
num_snap_trimming -= o.num_snap_trimming;
op_queue_age_hist.sub(o.op_queue_age_hist);
os_perf_stat.sub(o.os_perf_stat);
num_pgs -= o.num_pgs;
}
void dump(Formatter *f) const;
@ -941,7 +945,8 @@ inline bool operator==(const osd_stat_t& l, const osd_stat_t& r) {
l.num_snap_trimming == r.num_snap_trimming &&
l.hb_peers == r.hb_peers &&
l.op_queue_age_hist == r.op_queue_age_hist &&
l.os_perf_stat == r.os_perf_stat;
l.os_perf_stat == r.os_perf_stat &&
l.num_pgs == r.num_pgs;
}
inline bool operator!=(const osd_stat_t& l, const osd_stat_t& r) {
return !(l == r);

View File

@ -1061,7 +1061,7 @@ TEST(CrushWrapper, choose_args_compat) {
}
}
TEST(CrushWrapper, remove_unused_root) {
TEST(CrushWrapper, remove_root) {
CrushWrapper c;
c.create();
c.set_type_name(1, "host");
@ -1087,9 +1087,9 @@ TEST(CrushWrapper, remove_unused_root) {
ASSERT_TRUE(c.name_exists("default"));
ASSERT_TRUE(c.name_exists("r11"));
ASSERT_TRUE(c.name_exists("r12"));
ASSERT_EQ(c.remove_root(c.get_item_id("default"), true), 0);
ASSERT_EQ(c.remove_root(c.get_item_id("default")), 0);
ASSERT_FALSE(c.name_exists("default"));
ASSERT_TRUE(c.name_exists("r11"));
ASSERT_FALSE(c.name_exists("r11"));
ASSERT_FALSE(c.name_exists("r12"));
}
@ -1111,18 +1111,15 @@ TEST(CrushWrapper, trim_roots_with_class) {
int root_id = c.get_item_id("default");
int clone_id;
map<int32_t, map<int32_t, int32_t>> old_class_bucket;
map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> weights
set<int32_t> used_ids;
ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
&clone_id), 0);
&clone_id, &cmap_item_weight), 0);
ASSERT_TRUE(c.name_exists("default"));
ASSERT_TRUE(c.name_exists("default~ssd"));
c.trim_roots_with_class(true); // do nothing because still in use
ASSERT_TRUE(c.name_exists("default"));
ASSERT_TRUE(c.name_exists("default~ssd"));
c.class_bucket.clear();
c.trim_roots_with_class(true); // do nothing because still in use
c.trim_roots_with_class();
ASSERT_TRUE(c.name_exists("default"));
ASSERT_FALSE(c.name_exists("default~ssd"));
}
@ -1149,11 +1146,12 @@ TEST(CrushWrapper, device_class_clone) {
c.reweight(g_ceph_context);
map<int32_t, map<int32_t, int32_t>> old_class_bucket;
map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> weights
set<int32_t> used_ids;
int root_id = c.get_item_id("default");
int clone_id;
ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
&clone_id), 0);
&clone_id, &cmap_item_weight), 0);
ASSERT_TRUE(c.name_exists("default~ssd"));
ASSERT_EQ(clone_id, c.get_item_id("default~ssd"));
ASSERT_TRUE(c.subtree_contains(clone_id, item));
@ -1164,13 +1162,13 @@ TEST(CrushWrapper, device_class_clone) {
// cloning again does nothing and returns the existing one
int other_clone_id;
ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
&other_clone_id), 0);
&other_clone_id, &cmap_item_weight), 0);
ASSERT_EQ(clone_id, other_clone_id);
// invalid arguments
ASSERT_EQ(c.device_class_clone(12345, cl, old_class_bucket, used_ids,
&other_clone_id), -ECHILD);
&other_clone_id, &cmap_item_weight), -ECHILD);
ASSERT_EQ(c.device_class_clone(root_id, 12345, old_class_bucket, used_ids,
&other_clone_id), -EBADF);
&other_clone_id, &cmap_item_weight), -EBADF);
}
TEST(CrushWrapper, split_id_class) {
@ -1188,11 +1186,12 @@ TEST(CrushWrapper, split_id_class) {
c.class_map[item] = class_id;
map<int32_t, map<int32_t, int32_t>> old_class_bucket;
map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> weights
set<int32_t> used_ids;
int item_id = c.get_item_id("default");
int clone_id;
ASSERT_EQ(c.device_class_clone(item_id, class_id, old_class_bucket, used_ids,
&clone_id), 0);
&clone_id, &cmap_item_weight), 0);
int retrieved_item_id;
int retrieved_class_id;
ASSERT_EQ(c.split_id_class(clone_id, &retrieved_item_id, &retrieved_class_id), 0);

View File

@ -2849,7 +2849,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
t.remove_collection(cid);
cerr << "Invalid rm coll" << std::endl;
PrCtl unset_dumpable;
EXPECT_DEATH(apply_transaction(store, &osr, std::move(t)), ".*Directory not empty.*");
EXPECT_DEATH(apply_transaction(store, &osr, std::move(t)), "");
}
{
ObjectStore::Transaction t;
@ -2871,7 +2871,7 @@ TEST_P(StoreTest, SimpleCloneTest) {
t.remove(cid, hoid2);
t.remove_collection(cid);
PrCtl unset_dumpable;
EXPECT_DEATH(apply_transaction(store, &osr, std::move(t)), ".*Directory not empty.*");
EXPECT_DEATH(apply_transaction(store, &osr, std::move(t)), "");
}
{
ObjectStore::Transaction t;

View File

@ -446,6 +446,44 @@ TEST_F(OSDMapTest, PrimaryAffinity) {
}
}
TEST_F(OSDMapTest, parse_osd_id_list) {
set_up_map();
set<int> out;
set<int> all;
osdmap.get_all_osds(all);
ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0"}, &out, &cout));
ASSERT_EQ(1, out.size());
ASSERT_EQ(0, *out.begin());
ASSERT_EQ(0, osdmap.parse_osd_id_list({"1"}, &out, &cout));
ASSERT_EQ(1, out.size());
ASSERT_EQ(1, *out.begin());
ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","osd.1"}, &out, &cout));
ASSERT_EQ(2, out.size());
ASSERT_EQ(0, *out.begin());
ASSERT_EQ(1, *out.rbegin());
ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","1"}, &out, &cout));
ASSERT_EQ(2, out.size());
ASSERT_EQ(0, *out.begin());
ASSERT_EQ(1, *out.rbegin());
ASSERT_EQ(0, osdmap.parse_osd_id_list({"*"}, &out, &cout));
ASSERT_EQ(all.size(), out.size());
ASSERT_EQ(all, out);
ASSERT_EQ(0, osdmap.parse_osd_id_list({"all"}, &out, &cout));
ASSERT_EQ(all, out);
ASSERT_EQ(0, osdmap.parse_osd_id_list({"any"}, &out, &cout));
ASSERT_EQ(all, out);
ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"foo"}, &out, &cout));
ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"-12"}, &out, &cout));
}
TEST(PGTempMap, basic)
{
PGTempMap m;
@ -461,3 +499,4 @@ TEST(PGTempMap, basic)
ASSERT_EQ(m.find(b), m.end());
ASSERT_EQ(998u, m.size());
}

View File

@ -93,7 +93,7 @@ void usage(ostream& out)
" getxattr <obj-name> attr\n"
" setxattr <obj-name> attr val\n"
" rmxattr <obj-name> attr\n"
" stat objname stat the named object\n"
" stat <obj-name> stat the named object\n"
" mapext <obj-name>\n"
" rollback <obj-name> <snap-name> roll back object to snap <snap-name>\n"
"\n"