diff --git a/ip/Makefile b/ip/Makefile index 6054e8a7..2ee4e7c0 100644 --- a/ip/Makefile +++ b/ip/Makefile @@ -1,4 +1,4 @@ -IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o \ +IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \ rtm_map.o iptunnel.o ip6tunnel.o tunnel.o ipneigh.o ipntable.o iplink.o \ ipmaddr.o ipmonitor.o ipmroute.o ipprefix.o iptuntap.o \ ipxfrm.o xfrm_state.o xfrm_policy.o xfrm_monitor.o \ diff --git a/ip/ip.c b/ip/ip.c index b127d570..7f0c4688 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -44,7 +44,8 @@ static void usage(void) "Usage: ip [ OPTIONS ] OBJECT { COMMAND | help }\n" " ip [ -force ] -batch filename\n" "where OBJECT := { link | addr | addrlabel | route | rule | neigh | ntable |\n" -" tunnel | tuntap | maddr | mroute | mrule | monitor | xfrm }\n" +" tunnel | tuntap | maddr | mroute | mrule | monitor | xfrm |\n" +" netns }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n" " -f[amily] { inet | inet6 | ipx | dnet | link } |\n" " -l[oops] { maximum-addr-flush-attempts } |\n" @@ -80,6 +81,7 @@ static const struct cmd { { "xfrm", do_xfrm }, { "mroute", do_multiroute }, { "mrule", do_multirule }, + { "netns", do_netns }, { "help", do_help }, { 0 } }; diff --git a/ip/ip_common.h b/ip/ip_common.h index a1141869..5e5fb762 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -38,6 +38,7 @@ extern int do_ipmonitor(int argc, char **argv); extern int do_multiaddr(int argc, char **argv); extern int do_multiroute(int argc, char **argv); extern int do_multirule(int argc, char **argv); +extern int do_netns(int argc, char **argv); extern int do_xfrm(int argc, char **argv); static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb) @@ -64,6 +65,7 @@ struct link_util }; struct link_util *get_link_kind(const char *kind); +int get_netns_fd(const char *name); #ifndef INFINITY_LIFE_TIME #define INFINITY_LIFE_TIME 0xFFFFFFFFU diff --git a/ip/iplink.c b/ip/iplink.c index 48c02548..e5325a69 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -67,6 +67,7 @@ void iplink_usage(void) fprintf(stderr, " [ broadcast LLADDR ]\n"); fprintf(stderr, " [ mtu MTU ]\n"); fprintf(stderr, " [ netns PID ]\n"); + fprintf(stderr, " [ netns NAME ]\n"); fprintf(stderr, " [ alias NAME ]\n"); fprintf(stderr, " [ vf NUM [ mac LLADDR ]\n"); fprintf(stderr, " [ vlan VLANID [ qos VLAN-QOS ] ]\n"); @@ -304,9 +305,12 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, NEXT_ARG(); if (netns != -1) duparg("netns", *argv); - if (get_integer(&netns, *argv, 0)) + if ((netns = get_netns_fd(*argv)) >= 0) + addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_FD, &netns, 4); + else if (get_integer(&netns, *argv, 0) == 0) + addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_PID, &netns, 4); + else invarg("Invalid \"netns\" value\n", *argv); - addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_PID, &netns, 4); } else if (strcmp(*argv, "multicast") == 0) { NEXT_ARG(); req->i.ifi_change |= IFF_MULTICAST; diff --git a/ip/ipnetns.c b/ip/ipnetns.c new file mode 100644 index 00000000..db7007cf --- /dev/null +++ b/ip/ipnetns.c @@ -0,0 +1,314 @@ +#define _ATFILE_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "ip_common.h" + +#define NETNS_RUN_DIR "/var/run/netns" +#define NETNS_ETC_DIR "/etc/netns" + +#ifndef CLONE_NEWNET +#define CLONE_NEWNET 0x40000000 /* New network namespace (lo, device, names sockets, etc) */ +#endif + +#ifndef MNT_DETACH +#define MNT_DETACH 0x00000002 /* Just detach from the tree */ +#endif /* MNT_DETACH */ + +static int setns(int fd, int nstype) +{ +#ifdef __NR_setns + return syscall(__NR_setns, fd, nstype); +#else + errno = ENOSYS; + return -1; +#endif +} + + +static int touch(const char *path, mode_t mode) +{ + int fd; + fd = open(path, O_RDONLY|O_CREAT, mode); + if (fd < 0) + return -1; + close(fd); + return 0; +} + +static void usage(void) __attribute__((noreturn)); + +static void usage(void) +{ + fprintf(stderr, "Usage: ip netns list\n"); + fprintf(stderr, " ip netns add NAME\n"); + fprintf(stderr, " ip netns delete NAME\n"); + fprintf(stderr, " ip netns exec NAME cmd ...\n"); + fprintf(stderr, " ip netns monitor\n"); + exit(-1); +} + +int get_netns_fd(const char *name) +{ + char pathbuf[MAXPATHLEN]; + const char *path, *ptr; + + path = name; + ptr = strchr(name, '/'); + if (!ptr) { + snprintf(pathbuf, sizeof(pathbuf), "%s/%s", + NETNS_RUN_DIR, name ); + path = pathbuf; + } + return open(path, O_RDONLY); +} + +static int netns_list(int argc, char **argv) +{ + struct dirent *entry; + DIR *dir; + + dir = opendir(NETNS_RUN_DIR); + if (!dir) + return 0; + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0) + continue; + if (strcmp(entry->d_name, "..") == 0) + continue; + printf("%s\n", entry->d_name); + } + closedir(dir); + return 0; +} + +static void bind_etc(const char *name) +{ + char etc_netns_path[MAXPATHLEN]; + char netns_name[MAXPATHLEN]; + char etc_name[MAXPATHLEN]; + struct dirent *entry; + DIR *dir; + + snprintf(etc_netns_path, sizeof(etc_netns_path), "%s/%s", NETNS_ETC_DIR, name); + dir = opendir(etc_netns_path); + if (!dir) + return; + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0) + continue; + if (strcmp(entry->d_name, "..") == 0) + continue; + snprintf(netns_name, sizeof(netns_name), "%s/%s", etc_netns_path, entry->d_name); + snprintf(etc_name, sizeof(etc_name), "/etc/%s", entry->d_name); + if (mount(netns_name, etc_name, "none", MS_BIND, NULL) < 0) { + fprintf(stderr, "Bind %s -> %s failed: %s\n", + netns_name, etc_name, strerror(errno)); + } + } + closedir(dir); +} + +static int netns_exec(int argc, char **argv) +{ + /* Setup the proper environment for apps that are not netns + * aware, and execute a program in that environment. + */ + const char *name, *cmd; + char net_path[MAXPATHLEN]; + int netns; + + if (argc < 1) { + fprintf(stderr, "No netns name specified\n"); + return -1; + } + if (argc < 2) { + fprintf(stderr, "No cmd specified\n"); + return -1; + } + name = argv[0]; + cmd = argv[1]; + snprintf(net_path, sizeof(net_path), "%s/%s", NETNS_RUN_DIR, name); + netns = open(net_path, O_RDONLY); + if (netns < 0) { + fprintf(stderr, "Cannot open network namespace: %s\n", + strerror(errno)); + return -1; + } + if (setns(netns, CLONE_NEWNET) < 0) { + fprintf(stderr, "seting the network namespace failed: %s\n", + strerror(errno)); + return -1; + } + + if (unshare(CLONE_NEWNS) < 0) { + fprintf(stderr, "unshare failed: %s\n", strerror(errno)); + return -1; + } + /* Mount a version of /sys that describes the network namespace */ + if (umount2("/sys", MNT_DETACH) < 0) { + fprintf(stderr, "umount of /sys failed: %s\n", strerror(errno)); + return -1; + } + if (mount(name, "/sys", "sysfs", 0, NULL) < 0) { + fprintf(stderr, "mount of /sys failed: %s\n",strerror(errno)); + return -1; + } + + /* Setup bind mounts for config files in /etc */ + bind_etc(name); + + if (execvp(cmd, argv + 1) < 0) + fprintf(stderr, "exec of %s failed: %s\n", + cmd, strerror(errno)); + exit(-1); +} + +static int netns_delete(int argc, char **argv) +{ + const char *name; + char netns_path[MAXPATHLEN]; + + if (argc < 1) { + fprintf(stderr, "No netns name specified\n"); + return -1; + } + + name = argv[0]; + snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name); + umount2(netns_path, MNT_DETACH); + if (unlink(netns_path) < 0) { + fprintf(stderr, "Cannot remove %s: %s\n", + netns_path, strerror(errno)); + return -1; + } + return 0; +} + +static int netns_add(int argc, char **argv) +{ + /* This function creates a new network namespace and + * a new mount namespace and bind them into a well known + * location in the filesystem based on the name provided. + * + * The mount namespace is created so that any necessary + * userspace tweaks like remounting /sys, or bind mounting + * a new /etc/resolv.conf can be shared between uers. + */ + char netns_path[MAXPATHLEN]; + const char *name; + + if (argc < 1) { + fprintf(stderr, "No netns name specified\n"); + return -1; + } + name = argv[0]; + + snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name); + + /* Create the base netns directory if it doesn't exist */ + mkdir(NETNS_RUN_DIR, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); + + /* Create the filesystem state */ + if (touch(netns_path, 0) < 0) { + fprintf(stderr, "Could not create %s: %s\n", + netns_path, strerror(errno)); + goto out_delete; + } + if (unshare(CLONE_NEWNET) < 0) { + fprintf(stderr, "Failed to create a new network namespace: %s\n", + strerror(errno)); + goto out_delete; + } + + /* Bind the netns last so I can watch for it */ + if (mount("/proc/self/ns/net", netns_path, "none", MS_BIND, NULL) < 0) { + fprintf(stderr, "Bind /proc/self/ns/net -> %s failed: %s\n", + netns_path, strerror(errno)); + goto out_delete; + } + return 0; +out_delete: + netns_delete(argc, argv); + exit(-1); + return -1; +} + + +static int netns_monitor(int argc, char **argv) +{ + char buf[4096]; + struct inotify_event *event; + int fd; + fd = inotify_init(); + if (fd < 0) { + fprintf(stderr, "inotify_init failed: %s\n", + strerror(errno)); + return -1; + } + if (inotify_add_watch(fd, NETNS_RUN_DIR, IN_CREATE | IN_DELETE) < 0) { + fprintf(stderr, "inotify_add_watch failed: %s\n", + strerror(errno)); + return -1; + } + for(;;) { + ssize_t len = read(fd, buf, sizeof(buf)); + if (len < 0) { + fprintf(stderr, "read failed: %s\n", + strerror(errno)); + return -1; + } + for (event = (struct inotify_event *)buf; + (char *)event < &buf[len]; + event = (struct inotify_event *)((char *)event + sizeof(*event) + event->len)) { + if (event->mask & IN_CREATE) + printf("add %s\n", event->name); + if (event->mask & IN_DELETE) + printf("delete %s\n", event->name); + } + } + return 0; +} + +int do_netns(int argc, char **argv) +{ + if (argc < 1) + return netns_list(0, NULL); + + if ((matches(*argv, "list") == 0) || (matches(*argv, "show") == 0) || + (matches(*argv, "lst") == 0)) + return netns_list(argc-1, argv+1); + + if (matches(*argv, "help") == 0) + usage(); + + if (matches(*argv, "add") == 0) + return netns_add(argc-1, argv+1); + + if (matches(*argv, "delete") == 0) + return netns_delete(argc-1, argv+1); + + if (matches(*argv, "exec") == 0) + return netns_exec(argc-1, argv+1); + + if (matches(*argv, "monitor") == 0) + return netns_monitor(argc-1, argv+1); + + fprintf(stderr, "Command \"%s\" is unknown, try \"ip netns help\".\n", *argv); + exit(-1); +} diff --git a/man/man8/ip.8 b/man/man8/ip.8 index 4ddc78c4..8d2ab874 100644 --- a/man/man8/ip.8 +++ b/man/man8/ip.8 @@ -85,6 +85,9 @@ ip \- show / manipulate routing, devices, policy routing and tunnels .B netns .IR PID " |" .br +.B netns +.IR NETNSNAME " |" +.br .B alias .IR NAME " |" .br @@ -161,6 +164,17 @@ tentative " | " deprecated " | " dadfailed " | " temporary " ]" .ti -8 .BR "ip addrlabel" " { " list " | " flush " }" +.ti -8 +.BR "ip netns" " { " list " | " monitor " } " + +.ti -8 +.BR "ip netns" " { " add " | " delete " } " +.I NETNSNAME + +.ti -8 +.BR "ip netns exec " +.I NETNSNAME command ... + .ti -8 .BR "ip route" " { " .BR list " | " flush " } " @@ -1031,6 +1045,11 @@ the interface is move the device to the network namespace associated with the process .IR "PID". +.TP +.BI netns " NETNSNAME" +move the device to the network namespace associated with name +.IR "NETNSNAME". + .TP .BI alias " NAME" give the device a symbolic name for easy reference. @@ -2496,6 +2515,43 @@ at any time. It prepends the history with the state snapshot dumped at the moment of starting. +.SH ip netns - process network namespace management + +A network namespace is logically another copy of the network stack, +with it's own routes, firewall rules, and network devices. + +By convention a named network namespace is an object at +.BR "/var/run/netns/" NAME +that can be opened. The file descriptor resulting from opening +.BR "/var/run/netns/" NAME +refers to the specified network namespace. Holding that file +descriptor open keeps the network namespace alive. The file +descriptor can be used with the +.B setns(2) +system call to change the network namespace associated with a task. + +The convention for network namespace aware applications is to look +for global network configuration files first in +.BR "/etc/netns/" NAME "/" +then in +.BR "/etc/". +For example, if you want a different version of +.BR /etc/resolv.conf +for a network namespace used to isolate your vpn you would name it +.BR /etc/netns/myvpn/resolv.conf. + +.B ip netns exec +automates handling of this configuration, file convention for network +namespace unaware applications, by creating a mount namespace and +bind mounting all of the per network namespace configure files into +their traditional location in /etc. + +.SS ip netns list - show all of the named network namespaces +.SS ip netns monitor - report when network namespace names are created and destroyed +.SS ip netns add NAME - create a new named network namespace +.SS ip netns delete NAME - delete the name of a network namespace +.SS ip netns exec NAME cmd ... - Run cmd in the named network namespace + .SH ip xfrm - transform configuration xfrm is an IP framework for transforming packets (such as encrypting their payloads). This framework is used to implement the IPsec protocol