diff --git a/PVE/API2/Ceph.pm b/PVE/API2/Ceph.pm index c288906c..ae7bc2ed 100644 --- a/PVE/API2/Ceph.pm +++ b/PVE/API2/Ceph.pm @@ -548,6 +548,7 @@ use PVE::RPCEnvironment; use PVE::Storage; use PVE::Tools qw(run_command file_get_contents file_set_contents); +use PVE::API2::Ceph::MDS; use PVE::API2::Storage::Config; use base qw(PVE::RESTHandler); @@ -559,6 +560,11 @@ __PACKAGE__->register_method ({ path => 'osd', }); +__PACKAGE__->register_method ({ + subclass => "PVE::API2::Ceph::MDS", + path => 'mds', +}); + __PACKAGE__->register_method ({ name => 'index', path => '', @@ -590,6 +596,7 @@ __PACKAGE__->register_method ({ { name => 'mon' }, { name => 'osd' }, { name => 'pools' }, + { name => 'mds' }, { name => 'stop' }, { name => 'start' }, { name => 'status' }, diff --git a/PVE/API2/Ceph/MDS.pm b/PVE/API2/Ceph/MDS.pm new file mode 100644 index 00000000..9a2791ae --- /dev/null +++ b/PVE/API2/Ceph/MDS.pm @@ -0,0 +1,243 @@ +package PVE::API2::Ceph::MDS; + +use strict; +use warnings; + +use PVE::CephTools; +use PVE::INotify; +use PVE::JSONSchema qw(get_standard_option); +use PVE::RADOS; +use PVE::RESTHandler; +use PVE::RPCEnvironment; + +use base qw(PVE::RESTHandler); + +__PACKAGE__->register_method ({ + name => 'index', + path => '', + method => 'GET', + description => "MDS directory index.", + permissions => { + check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1], + }, + proxyto => 'node', + protected => 1, + parameters => { + additionalProperties => 0, + properties => { + node => get_standard_option('pve-node'), + }, + }, + returns => { + type => 'array', + items => { + type => "object", + properties => { + name => { + description => "The name (ID) for the MDS", + }, + addr => { + type => 'string', + optional => 1, + }, + host => { + type => 'string', + optional => 1, + }, + state => { + type => 'string', + description => 'State of the MDS', + }, + standby_replay => { + type => 'boolean', + optional => 1, + description => 'If true, the standby MDS is polling the active MDS for faster recovery (hot standby).', + }, + rank => { + type => 'integer', + optional => 1, + }, + }, + }, + links => [ { rel => 'child', href => "{name}" } ], + }, + code => sub { + my ($param) = @_; + + my $res = []; + + my $cfg = PVE::CephTools::parse_ceph_config(); + + my $mds_hash = {}; + + foreach my $section (keys %$cfg) { + my $d = $cfg->{$section}; + + if ($section =~ m/^mds\.(\S+)$/) { + my $mds_id = $1; + if (defined($d->{host})) { + $mds_hash->{$mds_id} = { + name => $mds_id, + state => 'unknown', + addr => $d->{host}, + host => $d->{host}, + }; + } + } + } + + my $mds_state = PVE::CephTools::get_cluster_mds_state(); + foreach my $name (keys %$mds_state) { + my $d = $mds_state->{$name}; + # just overwrite, this always provides more info + $mds_hash->{$name}->{$_} = $d->{$_} for keys %$d; + } + + return PVE::RESTHandler::hash_to_array($mds_hash, 'name'); + } +}); + +__PACKAGE__->register_method ({ + name => 'createmds', + path => '{name}', + method => 'POST', + description => "Create Ceph Metadata Server (MDS)", + proxyto => 'node', + protected => 1, + permissions => { + check => ['perm', '/', [ 'Sys.Modify' ]], + }, + parameters => { + additionalProperties => 0, + properties => { + node => get_standard_option('pve-node'), + name => { + type => 'string', + optional => 1, + default => 'nodename', + pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?', + description => "The ID for the mds, when omitted the same as the nodename", + }, + hotstandby => { + type => 'boolean', + optional => 1, + default => '0', + description => "Determines whether a ceph-mds daemon should poll and replay the log of an active MDS. ". + "Faster switch on MDS failure, but needs more idle resources.", + }, + }, + }, + returns => { type => 'string' }, + code => sub { + my ($param) = @_; + + PVE::CephTools::check_ceph_installed('ceph_mds'); + + PVE::CephTools::check_ceph_inited(); + + my $rpcenv = PVE::RPCEnvironment::get(); + my $authuser = $rpcenv->get_user(); + + my $nodename = $param->{node}; + $nodename = INotify::nodename() if $nodename eq 'localhost'; + + my $mds_id = $param->{name} // $nodename; + + my $worker = sub { + my $timeout = PVE::CephTools::get_config('long_rados_timeout'); + my $rados = PVE::RADOS->new(timeout => $timeout); + + my $cfg = PVE::CephTools::parse_ceph_config(); + + my $section = "mds.$mds_id"; + + if (defined($cfg->{$section})) { + die "MDS '$mds_id' already referenced in ceph config, abort!\n" + } + + if (!defined($cfg->{mds}->{keyring})) { + # $id isn't a perl variable but a ceph metavariable + my $keyring = '/var/lib/ceph/mds/ceph-$id/keyring'; + + $cfg->{mds}->{keyring} = $keyring; + } + + $cfg->{$section}->{host} = $nodename; + $cfg->{$section}->{"mds standby for name"} = 'pve'; + + if ($param->{hotstandby}) { + $cfg->{$section}->{"mds standby replay"} = 'true'; + } + + PVE::CephTools::write_ceph_config($cfg); + + eval { PVE::CephTools::create_mds($mds_id, $rados) }; + if (my $err = $@) { + # we abort early if the section is defined, so we know that we + # wrote it at this point. Do not auto remove the service, could + # do real harm for previously manual setup MDS + warn "Encountered error, remove '$section' from ceph.conf\n"; + $cfg = PVE::CephTools::parse_ceph_config(); + delete $cfg->{$section}; + PVE::CephTools::write_ceph_config($cfg); + + die "$err\n"; + } + }; + + return $rpcenv->fork_worker('cephcreatemds', "mds.$mds_id", $authuser, $worker); + } +}); + +__PACKAGE__->register_method ({ + name => 'destroymds', + path => '{name}', + method => 'DELETE', + description => "Destroy Ceph Metadata Server", + proxyto => 'node', + protected => 1, + permissions => { + check => ['perm', '/', [ 'Sys.Modify' ]], + }, + parameters => { + additionalProperties => 0, + properties => { + node => get_standard_option('pve-node'), + name => { + description => 'The name (ID) of the mds', + type => 'string', + pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?', + }, + }, + }, + returns => { type => 'string' }, + code => sub { + my ($param) = @_; + + my $rpcenv = PVE::RPCEnvironment::get(); + + my $authuser = $rpcenv->get_user(); + + PVE::CephTools::check_ceph_inited(); + + my $mds_id = $param->{name}; + + my $worker = sub { + my $timeout = PVE::CephTools::get_config('long_rados_timeout'); + my $rados = PVE::RADOS->new(timeout => $timeout); + + my $cfg = PVE::CephTools::parse_ceph_config(); + + if (defined($cfg->{"mds.$mds_id"})) { + delete $cfg->{"mds.$mds_id"}; + PVE::CephTools::write_ceph_config($cfg); + } + + PVE::CephTools::destroy_mds($mds_id, $rados); + }; + + return $rpcenv->fork_worker('cephdestroymds', "mds.$mds_id", $authuser, $worker); + } +}); + +1; diff --git a/PVE/API2/Ceph/Makefile b/PVE/API2/Ceph/Makefile new file mode 100644 index 00000000..be4d740c --- /dev/null +++ b/PVE/API2/Ceph/Makefile @@ -0,0 +1,15 @@ +include ../../../defines.mk + +PERLSOURCE= \ + MDS.pm + +all: + +.PHONY: clean +clean: + rm -rf *~ + +.PHONY: install +install: ${PERLSOURCE} + install -d ${PERLLIBDIR}/PVE/API2/Ceph + install -m 0644 ${PERLSOURCE} ${PERLLIBDIR}/PVE/API2/Ceph diff --git a/PVE/API2/Makefile b/PVE/API2/Makefile index a62bf909..c5868d7f 100644 --- a/PVE/API2/Makefile +++ b/PVE/API2/Makefile @@ -1,6 +1,6 @@ include ../../defines.mk -SUBDIRS=Hardware +SUBDIRS=Hardware Ceph PERLSOURCE = \ Replication.pm \ diff --git a/PVE/CLI/pveceph.pm b/PVE/CLI/pveceph.pm index a5a04949..90878d9e 100755 --- a/PVE/CLI/pveceph.pm +++ b/PVE/CLI/pveceph.pm @@ -19,6 +19,7 @@ use PVE::Tools qw(run_command); use PVE::JSONSchema qw(get_standard_option); use PVE::CephTools; use PVE::API2::Ceph; +use PVE::API2::Ceph::MDS; use PVE::CLIHandler; @@ -175,6 +176,8 @@ our $cmddef = { destroymon => [ 'PVE::API2::Ceph', 'destroymon', ['monid'], { node => $nodename }, $upid_exit], createmgr => [ 'PVE::API2::Ceph', 'createmgr', [], { node => $nodename }, $upid_exit], destroymgr => [ 'PVE::API2::Ceph', 'destroymgr', ['id'], { node => $nodename }, $upid_exit], + createmds => [ 'PVE::API2::Ceph::MDS', 'createmds', [], { node => $nodename }, $upid_exit], + destroymds => [ 'PVE::API2::Ceph::MDS', 'destroymds', ['id'], { node => $nodename }, $upid_exit], start => [ 'PVE::API2::Ceph', 'start', ['service'], { node => $nodename }, $upid_exit], stop => [ 'PVE::API2::Ceph', 'stop', ['service'], { node => $nodename }, $upid_exit], install => [ __PACKAGE__, 'install', [] ], diff --git a/PVE/CephTools.pm b/PVE/CephTools.pm index 8a9afa84..cc594f8a 100644 --- a/PVE/CephTools.pm +++ b/PVE/CephTools.pm @@ -18,12 +18,14 @@ my $pve_mon_key_path = "/etc/pve/priv/$ccname.mon.keyring"; my $pve_ckeyring_path = "/etc/pve/priv/$ccname.client.admin.keyring"; my $ceph_bootstrap_osd_keyring = "/var/lib/ceph/bootstrap-osd/$ccname.keyring"; my $ceph_bootstrap_mds_keyring = "/var/lib/ceph/bootstrap-mds/$ccname.keyring"; +my $ceph_mds_data_dir = '/var/lib/ceph/mds'; my $ceph_service = { ceph_bin => "/usr/bin/ceph", ceph_mon => "/usr/bin/ceph-mon", ceph_mgr => "/usr/bin/ceph-mgr", - ceph_osd => "/usr/bin/ceph-osd" + ceph_osd => "/usr/bin/ceph-osd", + ceph_mds => "/usr/bin/ceph-mds", }; my $config_hash = { @@ -33,6 +35,7 @@ my $config_hash = { pve_ckeyring_path => $pve_ckeyring_path, ceph_bootstrap_osd_keyring => $ceph_bootstrap_osd_keyring, ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring, + ceph_mds_data_dir => $ceph_mds_data_dir, long_rados_timeout => 60, }; @@ -297,4 +300,139 @@ sub systemd_managed { } } +sub list_local_mds_ids { + my $mds_list = []; + + PVE::Tools::dir_glob_foreach($ceph_mds_data_dir, qr/$ccname-(\S+)/, sub { + my (undef, $mds_id) = @_; + push @$mds_list, $mds_id; + }); + + return $mds_list; +} + +sub get_cluster_mds_state { + my ($rados) = @_; + + my $mds_state = {}; + + if (!defined($rados)) { + $rados = PVE::RADOS->new(); + } + + my $add_state = sub { + my ($mds) = @_; + + my $state = {}; + $state->{addr} = $mds->{addr}; + $state->{rank} = $mds->{rank}; + $state->{standby_replay} = $mds->{standby_replay} ? 1 : 0; + $state->{state} = $mds->{state}; + + $mds_state->{$mds->{name}} = $state; + }; + + my $mds_dump = $rados->mon_command({ prefix => 'mds stat' }); + my $fsmap = $mds_dump->{fsmap}; + + + foreach my $mds (@{$fsmap->{standbys}}) { + $add_state->($mds); + } + + my $fs_info = $fsmap->{filesystems}->[0]; + my $active_mds = $fs_info->{mdsmap}->{info}; + + # normally there's only one active MDS, but we can have multiple active for + # different ranks (e.g., different cephs path hierarchy). So just add all. + foreach my $mds (values %$active_mds) { + $add_state->($mds); + } + + return $mds_state; +} + +sub create_mds { + my ($id, $rados) = @_; + + # `ceph fs status` fails with numeric only ID. + die "ID: $id, numeric only IDs are not supported\n" + if $id =~ /^\d+$/; + + if (!defined($rados)) { + $rados = PVE::RADOS->new(); + } + + my $service_dir = "/var/lib/ceph/mds/$ccname-$id"; + my $service_keyring = "$service_dir/keyring"; + my $service_name = "mds.$id"; + + die "ceph MDS directory '$service_dir' already exists\n" + if -d $service_dir; + + print "creating MDS directory '$service_dir'\n"; + eval { File::Path::mkpath($service_dir) }; + my $err = $@; + die "creation MDS directory '$service_dir' failed\n" if $err; + + # http://docs.ceph.com/docs/luminous/install/manual-deployment/#adding-mds + my $priv = [ + mon => 'allow profile mds', + osd => 'allow rwx', + mds => 'allow *', + ]; + + print "creating keys for '$service_name'\n"; + my $output = $rados->mon_command({ + prefix => 'auth get-or-create', + entity => $service_name, + caps => $priv, + format => 'plain', + }); + + PVE::Tools::file_set_contents($service_keyring, $output); + + print "setting ceph as owner for service directory\n"; + run_command(["chown", 'ceph:ceph', '-R', $service_dir]); + + print "enabling service 'ceph-mds\@$id.service'\n"; + ceph_service_cmd('enable', $service_name); + print "starting service 'ceph-mds\@$id.service'\n"; + ceph_service_cmd('start', $service_name); + + return undef; +}; + +sub destroy_mds { + my ($id, $rados) = @_; + + if (!defined($rados)) { + $rados = PVE::RADOS->new(); + } + + my $service_name = "mds.$id"; + my $service_dir = "/var/lib/ceph/mds/$ccname-$id"; + + print "disabling service 'ceph-mds\@$id.service'\n"; + ceph_service_cmd('disable', $service_name); + print "stopping service 'ceph-mds\@$id.service'\n"; + ceph_service_cmd('stop', $service_name); + + if (-d $service_dir) { + print "removing ceph-mds directory '$service_dir'\n"; + File::Path::remove_tree($service_dir); + } else { + warn "cannot cleanup MDS $id directory, '$service_dir' not found\n" + } + + print "removing ceph auth for '$service_name'\n"; + $rados->mon_command({ + prefix => 'auth del', + entity => $service_name, + format => 'plain' + }); + + return undef; +}; + 1;