lxcapi_destroy: run in a namespace if we are unprivileged

This is necessary to have the rights to remove files owned by our subuids. Also update lxc_rmdir_onedev to return 0 on success, -1 on failure. Callers were not consistent in using it correctly, and this is more in keeping with the rest of our code. Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> Acked-by: Stéphane Graber <stgraber@ubuntu.com>
2025-08-03 12:06:43 +00:00 · 2013-11-22 14:39:37 -06:00 · 2013-11-22 14:39:37 -06:00 · 4355ab5fab
commit 4355ab5fab
parent dfd6d8f019
6 changed files with 182 additions and 23 deletions
--- a/src/lxc/bdev.c
+++ b/src/lxc/bdev.c
@ -450,7 +450,7 @@ static int dir_clonepaths(struct bdev *orig, struct bdev *new, const char *oldna

 static int dir_destroy(struct bdev *orig)
 {
-	if (!lxc_rmdir_onedev(orig->src))
+	if (lxc_rmdir_onedev(orig->src) < 0)
 		return -1;
 	return 0;
 }
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@ -75,6 +75,7 @@
 #include "bdev.h"
 #include "cgroup.h"
 #include "lxclock.h"
+#include "namespace.h"
 #include "lsm/lsm.h"

 #if HAVE_SYS_CAPABILITY_H
@ -3810,11 +3811,10 @@ int lxc_clear_config_caps(struct lxc_conf *c)
 	return 0;
 }

-int lxc_clear_idmaps(struct lxc_conf *c)
-{
+int lxc_free_idmap(struct lxc_list *id_map) {
 	struct lxc_list *it, *next;

-	lxc_list_for_each_safe(it, &c->id_map, next) {
+	lxc_list_for_each_safe(it, id_map, next) {
 		lxc_list_del(it);
 		free(it->elem);
 		free(it);
@ -3822,6 +3822,11 @@ int lxc_clear_idmaps(struct lxc_conf *c)
 	return 0;
 }

+int lxc_clear_idmaps(struct lxc_conf *c)
+{
+	return lxc_free_idmap(&c->id_map);
+}
+
 int lxc_clear_config_keepcaps(struct lxc_conf *c)
 {
 	struct lxc_list *it,*next;
@ -3941,3 +3946,147 @@ void lxc_conf_free(struct lxc_conf *conf)
 	lxc_clear_idmaps(conf);
 	free(conf);
 }
+
+struct userns_fn_data {
+	int (*fn)(void *);
+	void *arg;
+	int p[2];
+};
+
+static int run_userns_fn(void *data)
+{
+	struct userns_fn_data *d = data;
+	char c;
+	// we're not sharing with the parent any more, if it was a thread
+
+	close(d->p[1]);
+	if (read(d->p[0], &c, 1) != 1)
+		return -1;
+	close(d->p[0]);
+	return d->fn(d->arg);
+}
+
+/*
+ * Add a ID_TYPE_UID entry to an existing lxc_conf, if it is not
+ * alread there.
+ * We may want to generalize this to do gids as well as uids, but right now
+ * it's not necessary.
+ */
+static struct lxc_list *idmap_add_id(struct lxc_conf *conf, uid_t uid)
+{
+	int hostid_mapped = mapped_hostid(uid, conf);
+	struct lxc_list *new = NULL, *tmp, *it, *next;
+	struct id_map *entry;
+
+	if (hostid_mapped < 0) {
+		hostid_mapped = find_unmapped_nsuid(conf);
+		if (hostid_mapped < 0) {
+			ERROR("Could not find free uid to map");
+			return NULL;
+		}
+		new = malloc(sizeof(*new));
+		if (!new) {
+			ERROR("Out of memory building id map");
+			return NULL;
+		}
+		entry = malloc(sizeof(*entry));
+		if (!entry) {
+			free(new);
+			ERROR("Out of memory building idmap entry");
+			return NULL;
+		}
+		new->elem = entry;
+		entry->idtype = ID_TYPE_UID;
+		entry->nsid = hostid_mapped;
+		entry->hostid = (unsigned long)uid;
+		entry->range = 1;
+		lxc_list_init(new);
+	}
+	lxc_list_for_each_safe(it, &conf->id_map, next) {
+		tmp = malloc(sizeof(*tmp));
+		if (!tmp)
+			goto err;
+		entry = malloc(sizeof(*entry));
+		if (!entry) {
+			free(tmp);
+			goto err;
+		}
+		memset(entry, 0, sizeof(*entry));
+		memcpy(entry, it->elem, sizeof(*entry));
+		tmp->elem = entry;
+		if (!new) {
+			new = tmp;
+			lxc_list_init(new);
+		} else
+			lxc_list_add_tail(new, tmp);
+	}
+
+	return new;
+
+err:
+	ERROR("Out of memory building a new uid map");
+	lxc_free_idmap(new);
+	return NULL;
+}
+
+/*
+ * Run a function in a new user namespace.
+ * The caller's euid will be mapped in if it is not already.
+ */
+int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
+{
+	int ret, pid;
+	struct userns_fn_data d;
+	char c = '1';
+	int p[2];
+	struct lxc_list *idmap;
+
+	process_lock();
+	ret = pipe(p);
+	process_unlock();
+	if (ret < 0) {
+		SYSERROR("opening pipe");
+		return -1;
+	}
+	d.fn = fn;
+	d.arg = data;
+	d.p[0] = p[0];
+	d.p[1] = p[1];
+	pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
+	if (pid < 0)
+		goto err;
+	process_lock();
+	close(p[0]);
+	process_unlock();
+	p[0] = -1;
+
+	if ((idmap = idmap_add_id(conf, geteuid())) == NULL) {
+		ERROR("Error adding self to container uid map");
+		goto err;
+	}
+
+	ret = lxc_map_ids(idmap, pid);
+	lxc_free_idmap(idmap);
+	if (ret < 0) {
+		ERROR("Error setting up child mappings");
+		goto err;
+	}
+
+	// kick the child
+	if (write(p[1], &c, 1) != 1) {
+		SYSERROR("writing to pipe to child");
+		goto err;
+	}
+
+	if ((ret = wait_for_pid(pid)) < 0) {
+		ERROR("Child returned an error: %d\n", ret);
+		goto err;
+	}
+err:
+	process_lock();
+	if (p[0] != -1)
+		close(p[0]);
+	close(p[1]);
+	process_unlock();
+	return -1;
+}
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@ -164,6 +164,8 @@ struct id_map {
 	unsigned long hostid, nsid, range;
 };

+extern int lxc_free_idmap(struct lxc_list *idmap);
+
 /*
 * Defines a structure containing a pty information for
 * virtualizing a tty
@ -367,4 +369,5 @@ extern int find_unmapped_nsuid(struct lxc_conf *conf);
 extern int mapped_hostid(int id, struct lxc_conf *conf);
 extern int chown_mapped_root(char *path, struct lxc_conf *conf);
 extern int ttys_shift_ids(struct lxc_conf *c);
+extern int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data);
 #endif
--- a/src/lxc/lxc_destroy.c
+++ b/src/lxc/lxc_destroy.c
@ -64,13 +64,6 @@ int main(int argc, char *argv[])
 {
 	struct lxc_container *c;

-	/* this is a short term test.  We'll probably want to check for
-	 * write access to lxcpath instead */
-	if (geteuid()) {
-		fprintf(stderr, "%s must be run as root\n", argv[0]);
-		exit(1);
-	}
-
 	if (lxc_arguments_parse(&my_args, argc, argv))
 		exit(1);

--- a/src/lxc/lxccontainer.c
+++ b/src/lxc/lxccontainer.c
@ -1863,11 +1863,18 @@ out:
 	return bret;
 }

+static int lxc_rmdir_onedev_wrapper(void *data)
+{
+	char *arg = (char *) data;
+	return lxc_rmdir_onedev(arg);
+}
+
 // do we want the api to support --force, or leave that to the caller?
 static bool lxcapi_destroy(struct lxc_container *c)
 {
 	struct bdev *r = NULL;
 	bool ret = false;
+	bool am_unpriv;

 	if (!c || !lxcapi_is_defined(c))
 		return false;
@ -1881,20 +1888,23 @@ static bool lxcapi_destroy(struct lxc_container *c)
 		goto out;
 	}

+	am_unpriv = c->lxc_conf && geteuid() != 0 && !lxc_list_empty(&c->lxc_conf->id_map);
+
 	if (c->lxc_conf && has_snapshots(c)) {
 		ERROR("container %s has dependent snapshots", c->name);
 		goto out;
 	}

-	if (c->lxc_conf && c->lxc_conf->rootfs.path && c->lxc_conf->rootfs.mount)
+	if (!am_unpriv && c->lxc_conf->rootfs.path && c->lxc_conf->rootfs.mount) {
 		r = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
-	if (r) {
-		if (r->ops->destroy(r) < 0) {
+		if (r) {
+			if (r->ops->destroy(r) < 0) {
+				bdev_put(r);
+				ERROR("Error destroying rootfs for %s", c->name);
+				goto out;
+			}
 			bdev_put(r);
-			ERROR("Error destroying rootfs for %s", c->name);
-			goto out;
 		}
-		bdev_put(r);
 	}

 	mod_all_rdeps(c, false);
@ -1902,7 +1912,11 @@ static bool lxcapi_destroy(struct lxc_container *c)
 	const char *p1 = lxcapi_get_config_path(c);
 	char *path = alloca(strlen(p1) + strlen(c->name) + 2);
 	sprintf(path, "%s/%s", p1, c->name);
-	if (lxc_rmdir_onedev(path) < 0) {
+	if (am_unpriv)
+		ret = userns_exec_1(c->lxc_conf, lxc_rmdir_onedev_wrapper, path);
+	else
+		ret = lxc_rmdir_onedev(path);
+	if (ret < 0) {
 		ERROR("Error destroying container directory for %s", c->name);
 		goto out;
 	}
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@ -66,7 +66,7 @@ static int _recursive_rmdir_onedev(char *dirname, dev_t pdev)
 	process_unlock();
 	if (!dir) {
 		ERROR("%s: failed to open %s", __func__, dirname);
-		return 0;
+		return -1;
 	}

 	while (!readdir_r(dir, &dirent, &direntp)) {
@ -95,7 +95,7 @@ static int _recursive_rmdir_onedev(char *dirname, dev_t pdev)
 		if (mystat.st_dev != pdev)
 			continue;
 		if (S_ISDIR(mystat.st_mode)) {
-			if (!_recursive_rmdir_onedev(pathname, pdev))
+			if (_recursive_rmdir_onedev(pathname, pdev) < 0)
 				failed=1;
 		} else {
 			if (unlink(pathname) < 0) {
@ -118,17 +118,17 @@ static int _recursive_rmdir_onedev(char *dirname, dev_t pdev)
 		failed=1;
 	}

-	return !failed;
+	return failed ? -1 : 0;
 }

-/* returns 1 on success, 0 if there were any failures */
+/* returns 0 on success, -1 if there were any failures */
 extern int lxc_rmdir_onedev(char *path)
 {
 	struct stat mystat;

 	if (lstat(path, &mystat) < 0) {
 		ERROR("%s: failed to stat %s", __func__, path);
-		return 0;
+		return -1;
 	}

 	return _recursive_rmdir_onedev(path, mystat.st_dev);