mirror_iproute2/lib/namespace.c
Matteo Croce 80a931d41c ip: reset netns after each command in batch mode
When creating a new netns or executing a program into an existing one,
the unshare() or setns() calls will change the current netns.
In batch mode, this can run commands on the wrong interfaces, as the
ifindex value is meaningful only in the current netns. For example, this
command fails because veth-c doesn't exists in the init netns:

    # ip -b - <<-'EOF'
        netns add client
        link add name veth-c type veth peer veth-s netns client
        addr add 192.168.2.1/24 dev veth-c
    EOF
    Cannot find device "veth-c"
    Command failed -:7

But if there are two devices with the same name in the init and new netns,
ip will build a wrong ll_map with indexes belonging to the new netns,
and will execute actions in the init netns using this wrong mapping.
This script will flush all eth0 addresses and bring it down, as it has
the same ifindex of veth0 in the new netns:

    # ip addr
    1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
        inet 127.0.0.1/8 scope host lo
           valid_lft forever preferred_lft forever
    2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
        link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
        inet 192.168.122.76/24 brd 192.168.122.255 scope global dynamic eth0
           valid_lft 3598sec preferred_lft 3598sec

    # ip -b - <<-'EOF'
        netns add client
        link add name veth0 type veth peer name veth1
        link add name veth-ns type veth peer name veth0 netns client
        link set veth0 down
        address flush veth0
    EOF

    # ip addr
    1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
        inet 127.0.0.1/8 scope host lo
           valid_lft forever preferred_lft forever
    2: eth0: <BROADCAST,MULTICAST> mtu 1500 qdisc mq state DOWN group default qlen 1000
        link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
    3: veth1@veth0: <BROADCAST,MULTICAST,M-DOWN> mtu 1500 qdisc noop state DOWN group default qlen 1000
        link/ether c2:db:d0:34:13:4a brd ff:ff:ff:ff:ff:ff
    4: veth0@veth1: <BROADCAST,MULTICAST,M-DOWN> mtu 1500 qdisc noop state DOWN group default qlen 1000
        link/ether ca:9d:6b:5f:5f:8f brd ff:ff:ff:ff:ff:ff
    5: veth-ns@if2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
        link/ether 32:ef:22:df:51:0a brd ff:ff:ff:ff:ff:ff link-netns client

The same issue can be triggered by the netns exec subcommand with a
sligthy different script:

    # ip netns add client
    # ip -b - <<-'EOF'
        netns exec client true
        link add name veth0 type veth peer name veth1
        link add name veth-ns type veth peer name veth0 netns client
        link set veth0 down
        address flush veth0
    EOF

Fix this by adding two netns_{save,reset} functions, which are used
to get a file descriptor for the init netns, and restore it after
each batch command.
netns_save() is called before the unshare() or setns(),
while netns_restore() is called after each command.

Fixes: 0dc34c7713 ("iproute2: Add processless network namespace support")
Reviewed-and-tested-by: Andrea Claudi <aclaudi@redhat.com>
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
2019-06-10 10:42:14 -07:00

171 lines
3.8 KiB
C

/*
* namespace.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <sys/statvfs.h>
#include <fcntl.h>
#include <dirent.h>
#include <limits.h>
#include "utils.h"
#include "namespace.h"
static int saved_netns = -1;
/* Obtain a FD for the current namespace, so we can reenter it later */
void netns_save(void)
{
if (saved_netns != -1)
return;
saved_netns = open("/proc/self/ns/net", O_RDONLY | O_CLOEXEC);
if (saved_netns == -1) {
perror("Cannot open init namespace");
exit(1);
}
}
void netns_restore(void)
{
if (saved_netns == -1)
return;
if (setns(saved_netns, CLONE_NEWNET)) {
perror("setns");
exit(1);
}
close(saved_netns);
saved_netns = -1;
}
static void bind_etc(const char *name)
{
char etc_netns_path[sizeof(NETNS_ETC_DIR) + NAME_MAX];
char netns_name[PATH_MAX];
char etc_name[PATH_MAX];
struct dirent *entry;
DIR *dir;
if (strlen(name) >= NAME_MAX)
return;
snprintf(etc_netns_path, sizeof(etc_netns_path), "%s/%s", NETNS_ETC_DIR, name);
dir = opendir(etc_netns_path);
if (!dir)
return;
while ((entry = readdir(dir)) != NULL) {
if (strcmp(entry->d_name, ".") == 0)
continue;
if (strcmp(entry->d_name, "..") == 0)
continue;
snprintf(netns_name, sizeof(netns_name), "%s/%s", etc_netns_path, entry->d_name);
snprintf(etc_name, sizeof(etc_name), "/etc/%s", entry->d_name);
if (mount(netns_name, etc_name, "none", MS_BIND, NULL) < 0) {
fprintf(stderr, "Bind %s -> %s failed: %s\n",
netns_name, etc_name, strerror(errno));
}
}
closedir(dir);
}
int netns_switch(char *name)
{
char net_path[PATH_MAX];
int netns;
unsigned long mountflags = 0;
struct statvfs fsstat;
snprintf(net_path, sizeof(net_path), "%s/%s", NETNS_RUN_DIR, name);
netns = open(net_path, O_RDONLY | O_CLOEXEC);
if (netns < 0) {
fprintf(stderr, "Cannot open network namespace \"%s\": %s\n",
name, strerror(errno));
return -1;
}
netns_save();
if (setns(netns, CLONE_NEWNET) < 0) {
fprintf(stderr, "setting the network namespace \"%s\" failed: %s\n",
name, strerror(errno));
close(netns);
return -1;
}
close(netns);
if (unshare(CLONE_NEWNS) < 0) {
fprintf(stderr, "unshare failed: %s\n", strerror(errno));
return -1;
}
/* Don't let any mounts propagate back to the parent */
if (mount("", "/", "none", MS_SLAVE | MS_REC, NULL)) {
fprintf(stderr, "\"mount --make-rslave /\" failed: %s\n",
strerror(errno));
return -1;
}
/* Mount a version of /sys that describes the network namespace */
if (umount2("/sys", MNT_DETACH) < 0) {
/* If this fails, perhaps there wasn't a sysfs instance mounted. Good. */
if (statvfs("/sys", &fsstat) == 0) {
/* We couldn't umount the sysfs, we'll attempt to overlay it.
* A read-only instance can't be shadowed with a read-write one. */
if (fsstat.f_flag & ST_RDONLY)
mountflags = MS_RDONLY;
}
}
if (mount(name, "/sys", "sysfs", mountflags, NULL) < 0) {
fprintf(stderr, "mount of /sys failed: %s\n",strerror(errno));
return -1;
}
/* Setup bind mounts for config files in /etc */
bind_etc(name);
return 0;
}
int netns_get_fd(const char *name)
{
char pathbuf[PATH_MAX];
const char *path, *ptr;
path = name;
ptr = strchr(name, '/');
if (!ptr) {
snprintf(pathbuf, sizeof(pathbuf), "%s/%s",
NETNS_RUN_DIR, name );
path = pathbuf;
}
return open(path, O_RDONLY);
}
int netns_foreach(int (*func)(char *nsname, void *arg), void *arg)
{
DIR *dir;
struct dirent *entry;
dir = opendir(NETNS_RUN_DIR);
if (!dir)
return -1;
while ((entry = readdir(dir)) != NULL) {
if (strcmp(entry->d_name, ".") == 0)
continue;
if (strcmp(entry->d_name, "..") == 0)
continue;
if (func(entry->d_name, arg))
break;
}
closedir(dir);
return 0;
}