From b82649cc5200b9a0cecdca19a14fd04af8829ee4 Mon Sep 17 00:00:00 2001 From: Thomas Lamprecht Date: Fri, 23 Nov 2018 12:01:38 +0100 Subject: [PATCH] ceph: add MDS create/delete/list API Allow to create, list and destroy and Ceph Metadata Server (MDS) over the API and the CLI `pveceph` tool. Besides setting up the local systemd service template and the MDS data directory we also add a reference to the MDS in the ceph.conf We note the backing host (node) from the respective MDS and set up a 'mds standby for name' = 'pve' so that the PVE created ones are a single group. If we decide to add integration for rank/path specific MDS (possible useful for CephFS with quite a bit of load) then this may help as a starting point. On create, check early if a reference already exists in ceph.conf and abort in that case. If we only see existing data directories later on we abort but do not remove them, they could well be from an older manual create - where it's possible dangerous to just remove it. Let the user handle it themself in that case. Signed-off-by: Thomas Lamprecht Co-authored-by: Alwin Antreich --- PVE/API2/Ceph.pm | 7 ++ PVE/API2/Ceph/MDS.pm | 243 +++++++++++++++++++++++++++++++++++++++++ PVE/API2/Ceph/Makefile | 15 +++ PVE/API2/Makefile | 2 +- PVE/CLI/pveceph.pm | 3 + PVE/CephTools.pm | 140 +++++++++++++++++++++++- 6 files changed, 408 insertions(+), 2 deletions(-) create mode 100644 PVE/API2/Ceph/MDS.pm create mode 100644 PVE/API2/Ceph/Makefile diff --git a/PVE/API2/Ceph.pm b/PVE/API2/Ceph.pm index c288906c..ae7bc2ed 100644 --- a/PVE/API2/Ceph.pm +++ b/PVE/API2/Ceph.pm @@ -548,6 +548,7 @@ use PVE::RPCEnvironment; use PVE::Storage; use PVE::Tools qw(run_command file_get_contents file_set_contents); +use PVE::API2::Ceph::MDS; use PVE::API2::Storage::Config; use base qw(PVE::RESTHandler); @@ -559,6 +560,11 @@ __PACKAGE__->register_method ({ path => 'osd', }); +__PACKAGE__->register_method ({ + subclass => "PVE::API2::Ceph::MDS", + path => 'mds', +}); + __PACKAGE__->register_method ({ name => 'index', path => '', @@ -590,6 +596,7 @@ __PACKAGE__->register_method ({ { name => 'mon' }, { name => 'osd' }, { name => 'pools' }, + { name => 'mds' }, { name => 'stop' }, { name => 'start' }, { name => 'status' }, diff --git a/PVE/API2/Ceph/MDS.pm b/PVE/API2/Ceph/MDS.pm new file mode 100644 index 00000000..9a2791ae --- /dev/null +++ b/PVE/API2/Ceph/MDS.pm @@ -0,0 +1,243 @@ +package PVE::API2::Ceph::MDS; + +use strict; +use warnings; + +use PVE::CephTools; +use PVE::INotify; +use PVE::JSONSchema qw(get_standard_option); +use PVE::RADOS; +use PVE::RESTHandler; +use PVE::RPCEnvironment; + +use base qw(PVE::RESTHandler); + +__PACKAGE__->register_method ({ + name => 'index', + path => '', + method => 'GET', + description => "MDS directory index.", + permissions => { + check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1], + }, + proxyto => 'node', + protected => 1, + parameters => { + additionalProperties => 0, + properties => { + node => get_standard_option('pve-node'), + }, + }, + returns => { + type => 'array', + items => { + type => "object", + properties => { + name => { + description => "The name (ID) for the MDS", + }, + addr => { + type => 'string', + optional => 1, + }, + host => { + type => 'string', + optional => 1, + }, + state => { + type => 'string', + description => 'State of the MDS', + }, + standby_replay => { + type => 'boolean', + optional => 1, + description => 'If true, the standby MDS is polling the active MDS for faster recovery (hot standby).', + }, + rank => { + type => 'integer', + optional => 1, + }, + }, + }, + links => [ { rel => 'child', href => "{name}" } ], + }, + code => sub { + my ($param) = @_; + + my $res = []; + + my $cfg = PVE::CephTools::parse_ceph_config(); + + my $mds_hash = {}; + + foreach my $section (keys %$cfg) { + my $d = $cfg->{$section}; + + if ($section =~ m/^mds\.(\S+)$/) { + my $mds_id = $1; + if (defined($d->{host})) { + $mds_hash->{$mds_id} = { + name => $mds_id, + state => 'unknown', + addr => $d->{host}, + host => $d->{host}, + }; + } + } + } + + my $mds_state = PVE::CephTools::get_cluster_mds_state(); + foreach my $name (keys %$mds_state) { + my $d = $mds_state->{$name}; + # just overwrite, this always provides more info + $mds_hash->{$name}->{$_} = $d->{$_} for keys %$d; + } + + return PVE::RESTHandler::hash_to_array($mds_hash, 'name'); + } +}); + +__PACKAGE__->register_method ({ + name => 'createmds', + path => '{name}', + method => 'POST', + description => "Create Ceph Metadata Server (MDS)", + proxyto => 'node', + protected => 1, + permissions => { + check => ['perm', '/', [ 'Sys.Modify' ]], + }, + parameters => { + additionalProperties => 0, + properties => { + node => get_standard_option('pve-node'), + name => { + type => 'string', + optional => 1, + default => 'nodename', + pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?', + description => "The ID for the mds, when omitted the same as the nodename", + }, + hotstandby => { + type => 'boolean', + optional => 1, + default => '0', + description => "Determines whether a ceph-mds daemon should poll and replay the log of an active MDS. ". + "Faster switch on MDS failure, but needs more idle resources.", + }, + }, + }, + returns => { type => 'string' }, + code => sub { + my ($param) = @_; + + PVE::CephTools::check_ceph_installed('ceph_mds'); + + PVE::CephTools::check_ceph_inited(); + + my $rpcenv = PVE::RPCEnvironment::get(); + my $authuser = $rpcenv->get_user(); + + my $nodename = $param->{node}; + $nodename = INotify::nodename() if $nodename eq 'localhost'; + + my $mds_id = $param->{name} // $nodename; + + my $worker = sub { + my $timeout = PVE::CephTools::get_config('long_rados_timeout'); + my $rados = PVE::RADOS->new(timeout => $timeout); + + my $cfg = PVE::CephTools::parse_ceph_config(); + + my $section = "mds.$mds_id"; + + if (defined($cfg->{$section})) { + die "MDS '$mds_id' already referenced in ceph config, abort!\n" + } + + if (!defined($cfg->{mds}->{keyring})) { + # $id isn't a perl variable but a ceph metavariable + my $keyring = '/var/lib/ceph/mds/ceph-$id/keyring'; + + $cfg->{mds}->{keyring} = $keyring; + } + + $cfg->{$section}->{host} = $nodename; + $cfg->{$section}->{"mds standby for name"} = 'pve'; + + if ($param->{hotstandby}) { + $cfg->{$section}->{"mds standby replay"} = 'true'; + } + + PVE::CephTools::write_ceph_config($cfg); + + eval { PVE::CephTools::create_mds($mds_id, $rados) }; + if (my $err = $@) { + # we abort early if the section is defined, so we know that we + # wrote it at this point. Do not auto remove the service, could + # do real harm for previously manual setup MDS + warn "Encountered error, remove '$section' from ceph.conf\n"; + $cfg = PVE::CephTools::parse_ceph_config(); + delete $cfg->{$section}; + PVE::CephTools::write_ceph_config($cfg); + + die "$err\n"; + } + }; + + return $rpcenv->fork_worker('cephcreatemds', "mds.$mds_id", $authuser, $worker); + } +}); + +__PACKAGE__->register_method ({ + name => 'destroymds', + path => '{name}', + method => 'DELETE', + description => "Destroy Ceph Metadata Server", + proxyto => 'node', + protected => 1, + permissions => { + check => ['perm', '/', [ 'Sys.Modify' ]], + }, + parameters => { + additionalProperties => 0, + properties => { + node => get_standard_option('pve-node'), + name => { + description => 'The name (ID) of the mds', + type => 'string', + pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?', + }, + }, + }, + returns => { type => 'string' }, + code => sub { + my ($param) = @_; + + my $rpcenv = PVE::RPCEnvironment::get(); + + my $authuser = $rpcenv->get_user(); + + PVE::CephTools::check_ceph_inited(); + + my $mds_id = $param->{name}; + + my $worker = sub { + my $timeout = PVE::CephTools::get_config('long_rados_timeout'); + my $rados = PVE::RADOS->new(timeout => $timeout); + + my $cfg = PVE::CephTools::parse_ceph_config(); + + if (defined($cfg->{"mds.$mds_id"})) { + delete $cfg->{"mds.$mds_id"}; + PVE::CephTools::write_ceph_config($cfg); + } + + PVE::CephTools::destroy_mds($mds_id, $rados); + }; + + return $rpcenv->fork_worker('cephdestroymds', "mds.$mds_id", $authuser, $worker); + } +}); + +1; diff --git a/PVE/API2/Ceph/Makefile b/PVE/API2/Ceph/Makefile new file mode 100644 index 00000000..be4d740c --- /dev/null +++ b/PVE/API2/Ceph/Makefile @@ -0,0 +1,15 @@ +include ../../../defines.mk + +PERLSOURCE= \ + MDS.pm + +all: + +.PHONY: clean +clean: + rm -rf *~ + +.PHONY: install +install: ${PERLSOURCE} + install -d ${PERLLIBDIR}/PVE/API2/Ceph + install -m 0644 ${PERLSOURCE} ${PERLLIBDIR}/PVE/API2/Ceph diff --git a/PVE/API2/Makefile b/PVE/API2/Makefile index a62bf909..c5868d7f 100644 --- a/PVE/API2/Makefile +++ b/PVE/API2/Makefile @@ -1,6 +1,6 @@ include ../../defines.mk -SUBDIRS=Hardware +SUBDIRS=Hardware Ceph PERLSOURCE = \ Replication.pm \ diff --git a/PVE/CLI/pveceph.pm b/PVE/CLI/pveceph.pm index a5a04949..90878d9e 100755 --- a/PVE/CLI/pveceph.pm +++ b/PVE/CLI/pveceph.pm @@ -19,6 +19,7 @@ use PVE::Tools qw(run_command); use PVE::JSONSchema qw(get_standard_option); use PVE::CephTools; use PVE::API2::Ceph; +use PVE::API2::Ceph::MDS; use PVE::CLIHandler; @@ -175,6 +176,8 @@ our $cmddef = { destroymon => [ 'PVE::API2::Ceph', 'destroymon', ['monid'], { node => $nodename }, $upid_exit], createmgr => [ 'PVE::API2::Ceph', 'createmgr', [], { node => $nodename }, $upid_exit], destroymgr => [ 'PVE::API2::Ceph', 'destroymgr', ['id'], { node => $nodename }, $upid_exit], + createmds => [ 'PVE::API2::Ceph::MDS', 'createmds', [], { node => $nodename }, $upid_exit], + destroymds => [ 'PVE::API2::Ceph::MDS', 'destroymds', ['id'], { node => $nodename }, $upid_exit], start => [ 'PVE::API2::Ceph', 'start', ['service'], { node => $nodename }, $upid_exit], stop => [ 'PVE::API2::Ceph', 'stop', ['service'], { node => $nodename }, $upid_exit], install => [ __PACKAGE__, 'install', [] ], diff --git a/PVE/CephTools.pm b/PVE/CephTools.pm index 8a9afa84..cc594f8a 100644 --- a/PVE/CephTools.pm +++ b/PVE/CephTools.pm @@ -18,12 +18,14 @@ my $pve_mon_key_path = "/etc/pve/priv/$ccname.mon.keyring"; my $pve_ckeyring_path = "/etc/pve/priv/$ccname.client.admin.keyring"; my $ceph_bootstrap_osd_keyring = "/var/lib/ceph/bootstrap-osd/$ccname.keyring"; my $ceph_bootstrap_mds_keyring = "/var/lib/ceph/bootstrap-mds/$ccname.keyring"; +my $ceph_mds_data_dir = '/var/lib/ceph/mds'; my $ceph_service = { ceph_bin => "/usr/bin/ceph", ceph_mon => "/usr/bin/ceph-mon", ceph_mgr => "/usr/bin/ceph-mgr", - ceph_osd => "/usr/bin/ceph-osd" + ceph_osd => "/usr/bin/ceph-osd", + ceph_mds => "/usr/bin/ceph-mds", }; my $config_hash = { @@ -33,6 +35,7 @@ my $config_hash = { pve_ckeyring_path => $pve_ckeyring_path, ceph_bootstrap_osd_keyring => $ceph_bootstrap_osd_keyring, ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring, + ceph_mds_data_dir => $ceph_mds_data_dir, long_rados_timeout => 60, }; @@ -297,4 +300,139 @@ sub systemd_managed { } } +sub list_local_mds_ids { + my $mds_list = []; + + PVE::Tools::dir_glob_foreach($ceph_mds_data_dir, qr/$ccname-(\S+)/, sub { + my (undef, $mds_id) = @_; + push @$mds_list, $mds_id; + }); + + return $mds_list; +} + +sub get_cluster_mds_state { + my ($rados) = @_; + + my $mds_state = {}; + + if (!defined($rados)) { + $rados = PVE::RADOS->new(); + } + + my $add_state = sub { + my ($mds) = @_; + + my $state = {}; + $state->{addr} = $mds->{addr}; + $state->{rank} = $mds->{rank}; + $state->{standby_replay} = $mds->{standby_replay} ? 1 : 0; + $state->{state} = $mds->{state}; + + $mds_state->{$mds->{name}} = $state; + }; + + my $mds_dump = $rados->mon_command({ prefix => 'mds stat' }); + my $fsmap = $mds_dump->{fsmap}; + + + foreach my $mds (@{$fsmap->{standbys}}) { + $add_state->($mds); + } + + my $fs_info = $fsmap->{filesystems}->[0]; + my $active_mds = $fs_info->{mdsmap}->{info}; + + # normally there's only one active MDS, but we can have multiple active for + # different ranks (e.g., different cephs path hierarchy). So just add all. + foreach my $mds (values %$active_mds) { + $add_state->($mds); + } + + return $mds_state; +} + +sub create_mds { + my ($id, $rados) = @_; + + # `ceph fs status` fails with numeric only ID. + die "ID: $id, numeric only IDs are not supported\n" + if $id =~ /^\d+$/; + + if (!defined($rados)) { + $rados = PVE::RADOS->new(); + } + + my $service_dir = "/var/lib/ceph/mds/$ccname-$id"; + my $service_keyring = "$service_dir/keyring"; + my $service_name = "mds.$id"; + + die "ceph MDS directory '$service_dir' already exists\n" + if -d $service_dir; + + print "creating MDS directory '$service_dir'\n"; + eval { File::Path::mkpath($service_dir) }; + my $err = $@; + die "creation MDS directory '$service_dir' failed\n" if $err; + + # http://docs.ceph.com/docs/luminous/install/manual-deployment/#adding-mds + my $priv = [ + mon => 'allow profile mds', + osd => 'allow rwx', + mds => 'allow *', + ]; + + print "creating keys for '$service_name'\n"; + my $output = $rados->mon_command({ + prefix => 'auth get-or-create', + entity => $service_name, + caps => $priv, + format => 'plain', + }); + + PVE::Tools::file_set_contents($service_keyring, $output); + + print "setting ceph as owner for service directory\n"; + run_command(["chown", 'ceph:ceph', '-R', $service_dir]); + + print "enabling service 'ceph-mds\@$id.service'\n"; + ceph_service_cmd('enable', $service_name); + print "starting service 'ceph-mds\@$id.service'\n"; + ceph_service_cmd('start', $service_name); + + return undef; +}; + +sub destroy_mds { + my ($id, $rados) = @_; + + if (!defined($rados)) { + $rados = PVE::RADOS->new(); + } + + my $service_name = "mds.$id"; + my $service_dir = "/var/lib/ceph/mds/$ccname-$id"; + + print "disabling service 'ceph-mds\@$id.service'\n"; + ceph_service_cmd('disable', $service_name); + print "stopping service 'ceph-mds\@$id.service'\n"; + ceph_service_cmd('stop', $service_name); + + if (-d $service_dir) { + print "removing ceph-mds directory '$service_dir'\n"; + File::Path::remove_tree($service_dir); + } else { + warn "cannot cleanup MDS $id directory, '$service_dir' not found\n" + } + + print "removing ceph auth for '$service_name'\n"; + $rados->mon_command({ + prefix => 'auth del', + entity => $service_name, + format => 'plain' + }); + + return undef; +}; + 1;