diff --git a/doc/api-extensions.md b/doc/api-extensions.md index d5a0a3af7..b55acf0fb 100644 --- a/doc/api-extensions.md +++ b/doc/api-extensions.md @@ -65,3 +65,22 @@ lxc.net[i].ipvlan.isolation=[bridge|private|vepa] (defaults to bridge) lxc.net[i].link=eth0 lxc.net[i].flags=up ``` + +## network\_l2proxy + +This introduces the `lxc.net.[i].l2proxy` that can be either `0` or `1`. Defaults to `0`. +This, when used with `lxc.net.[i].link`, will add IP neighbour proxy entries on the linked device +for any IPv4 and IPv6 addresses on the container's network device. + +For IPv4 addresses it will check the following sysctl values and fail with an error if not set: + +``` +net.ipv4.conf.[link].forwarding=1 +``` + +For IPv6 addresses it will check the following sysctl values and fail with an error if not set: + +``` +net.ipv6.conf.[link].proxy_ndp=1 +net.ipv6.conf.[link].forwarding=1 +``` diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in index 19f3acbd7..f8182567a 100644 --- a/doc/lxc.container.conf.sgml.in +++ b/doc/lxc.container.conf.sgml.in @@ -578,6 +578,24 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + + + + + + Controls whether layer 2 IP neighbour proxy entries will be added to the + lxc.net.[i].link interface for the IP addresses of the container. + Can be set to 0 or 1. Defaults to 0. + When used with IPv4 addresses, the following sysctl values need to be set: + net.ipv4.conf.[link].forwarding=1 + When used with IPv6 addresses, the following sysctl values need to be set: + net.ipv6.conf.[link].proxy_ndp=1 + net.ipv6.conf.[link].forwarding=1 + + + + @@ -645,7 +663,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA interface (as specified by the option) and use that as the gateway. is only available when - using the , + using the , and network types. diff --git a/src/lxc/api_extensions.h b/src/lxc/api_extensions.h index 55d5e9c96..1c748a1d4 100644 --- a/src/lxc/api_extensions.h +++ b/src/lxc/api_extensions.h @@ -46,6 +46,7 @@ static char *api_extensions[] = { "seccomp_notify", "network_veth_routes", "network_ipvlan", + "network_l2proxy", }; static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions); diff --git a/src/lxc/confile.c b/src/lxc/confile.c index ac7e78eb1..78cb88e4f 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -129,6 +129,7 @@ lxc_config_define(net_ipv4_gateway); lxc_config_define(net_ipv6_address); lxc_config_define(net_ipv6_gateway); lxc_config_define(net_link); +lxc_config_define(net_l2proxy); lxc_config_define(net_macvlan_mode); lxc_config_define(net_ipvlan_mode); lxc_config_define(net_ipvlan_isolation); @@ -222,6 +223,7 @@ static struct lxc_config_t config_jump_table[] = { { "lxc.net.ipv6.address", set_config_net_ipv6_address, get_config_net_ipv6_address, clr_config_net_ipv6_address, }, { "lxc.net.ipv6.gateway", set_config_net_ipv6_gateway, get_config_net_ipv6_gateway, clr_config_net_ipv6_gateway, }, { "lxc.net.link", set_config_net_link, get_config_net_link, clr_config_net_link, }, + { "lxc.net.l2proxy", set_config_net_l2proxy, get_config_net_l2proxy, clr_config_net_l2proxy, }, { "lxc.net.macvlan.mode", set_config_net_macvlan_mode, get_config_net_macvlan_mode, clr_config_net_macvlan_mode, }, { "lxc.net.ipvlan.mode", set_config_net_ipvlan_mode, get_config_net_ipvlan_mode, clr_config_net_ipvlan_mode, }, { "lxc.net.ipvlan.isolation", set_config_net_ipvlan_isolation, get_config_net_ipvlan_isolation, clr_config_net_ipvlan_isolation, }, @@ -403,6 +405,35 @@ static int set_config_net_link(const char *key, const char *value, return ret; } +static int set_config_net_l2proxy(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + struct lxc_netdev *netdev = data; + unsigned int val = 0; + int ret; + + if (lxc_config_value_empty(value)) + return clr_config_net_l2proxy(key, lxc_conf, data); + + if (!netdev) + return minus_one_set_errno(EINVAL); + + ret = lxc_safe_uint(value, &val); + if (ret < 0) + return minus_one_set_errno(-ret); + + switch (val) { + case 0: + netdev->l2proxy = false; + return 0; + case 1: + netdev->l2proxy = true; + return 0; + } + + return minus_one_set_errno(EINVAL); +} + static int set_config_net_name(const char *key, const char *value, struct lxc_conf *lxc_conf, void *data) { @@ -4960,6 +4991,19 @@ static int clr_config_net_link(const char *key, struct lxc_conf *lxc_conf, return 0; } +static int clr_config_net_l2proxy(const char *key, struct lxc_conf *lxc_conf, + void *data) +{ + struct lxc_netdev *netdev = data; + + if (!netdev) + return minus_one_set_errno(EINVAL); + + netdev->l2proxy = false; + + return 0; +} + static int clr_config_net_macvlan_mode(const char *key, struct lxc_conf *lxc_conf, void *data) { @@ -5282,6 +5326,13 @@ static int get_config_net_link(const char *key, char *retv, int inlen, return fulllen; } +static int get_config_net_l2proxy(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + struct lxc_netdev *netdev = data; + return lxc_get_conf_bool(c, retv, inlen, netdev->l2proxy); +} + static int get_config_net_name(const char *key, char *retv, int inlen, struct lxc_conf *c, void *data) { diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c index 5bceb96bd..12a8dbb09 100644 --- a/src/lxc/confile_utils.c +++ b/src/lxc/confile_utils.c @@ -339,6 +339,10 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf) if (netdev->link[0] != '\0') TRACE("link: %s", netdev->link); + /* l2proxy only used when link is specified */ + if (netdev->link[0] != '\0') + TRACE("l2proxy: %s", netdev->l2proxy ? "true" : "false"); + if (netdev->name[0] != '\0') TRACE("name: %s", netdev->name); diff --git a/src/lxc/file_utils.c b/src/lxc/file_utils.c index 603c0ace6..31411d712 100644 --- a/src/lxc/file_utils.c +++ b/src/lxc/file_utils.c @@ -147,7 +147,7 @@ ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count, const void *expe ssize_t ret; ret = lxc_read_nointr(fd, buf, count); - if (ret <= 0) + if (ret < 0) return ret; if ((size_t)ret != count) @@ -158,7 +158,18 @@ ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count, const void *expe return -1; } - return ret; + return 0; +} + +ssize_t lxc_read_file_expect(const char *path, void *buf, size_t count, const void *expected_buf) +{ + __do_close_prot_errno int fd = -EBADF; + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return -1; + + return lxc_read_nointr_expect(fd, buf, count, expected_buf); } bool file_exists(const char *f) diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h index cc8f69e18..1b8033d69 100644 --- a/src/lxc/file_utils.h +++ b/src/lxc/file_utils.h @@ -40,6 +40,8 @@ extern ssize_t lxc_send_nointr(int sockfd, void *buf, size_t len, int flags); extern ssize_t lxc_read_nointr(int fd, void *buf, size_t count); extern ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count, const void *expected_buf); +extern ssize_t lxc_read_file_expect(const char *path, void *buf, size_t count, + const void *expected_buf); extern ssize_t lxc_recv_nointr(int sockfd, void *buf, size_t len, int flags); extern bool file_exists(const char *f); diff --git a/src/lxc/network.c b/src/lxc/network.c index def484613..a71eb5ddf 100644 --- a/src/lxc/network.c +++ b/src/lxc/network.c @@ -1660,6 +1660,24 @@ static int proc_sys_net_write(const char *path, const char *value) return err; } +static int lxc_is_ip_forwarding_enabled(const char *ifname, int family) +{ + int ret; + char path[PATH_MAX]; + char buf[1] = ""; + + if (family != AF_INET && family != AF_INET6) + return minus_one_set_errno(EINVAL); + + ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s", + family == AF_INET ? "ipv4" : "ipv6", ifname, + "forwarding"); + if (ret < 0 || (size_t)ret >= PATH_MAX) + return minus_one_set_errno(E2BIG); + + return lxc_read_file_expect(path, buf, 1, "1"); +} + static int neigh_proxy_set(const char *ifname, int family, int flag) { int ret; @@ -1677,6 +1695,24 @@ static int neigh_proxy_set(const char *ifname, int family, int flag) return proc_sys_net_write(path, flag ? "1" : "0"); } +static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family) +{ + int ret; + char path[PATH_MAX]; + char buf[1] = ""; + + if (family != AF_INET && family != AF_INET6) + return minus_one_set_errno(EINVAL); + + ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s", + family == AF_INET ? "ipv4" : "ipv6", ifname, + family == AF_INET ? "proxy_arp" : "proxy_ndp"); + if (ret < 0 || (size_t)ret >= PATH_MAX) + return minus_one_set_errno(E2BIG); + + return lxc_read_file_expect(path, buf, 1, "1"); +} + int lxc_neigh_proxy_on(const char *name, int family) { return neigh_proxy_set(name, family, 1); @@ -2679,6 +2715,155 @@ clear_ifindices: return true; } +struct ip_proxy_args { + const char *ip; + const char *dev; +}; + +static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data) +{ + struct ip_proxy_args *args = data; + + execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL); + return -1; +} + +static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data) +{ + struct ip_proxy_args *args = data; + + execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL); + return -1; +} + +static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev) +{ + int ret; + char cmd_output[PATH_MAX]; + struct ip_proxy_args args = { + .ip = ip, + .dev = dev, + }; + + ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args); + if (ret < 0) { + ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output); + return -1; + } + + return 0; +} + +static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev) +{ + int ret; + char cmd_output[PATH_MAX]; + struct ip_proxy_args args = { + .ip = ip, + .dev = dev, + }; + + ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args); + if (ret < 0) { + ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output); + return -1; + } + + return 0; +} + +static int lxc_setup_l2proxy(struct lxc_netdev *netdev) { + struct lxc_list *cur, *next; + struct lxc_inetdev *inet4dev; + struct lxc_inet6dev *inet6dev; + char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN]; + + /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */ + if (!lxc_list_empty(&netdev->ipv4)) { + /* Check for net.ipv4.conf.[link].forwarding=1 */ + if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) { + ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link); + return minus_one_set_errno(EINVAL); + } + } + + /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */ + if (!lxc_list_empty(&netdev->ipv6)) { + /* Check for net.ipv6.conf.[link].proxy_ndp=1 */ + if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) { + ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link); + return minus_one_set_errno(EINVAL); + } + + /* Check for net.ipv6.conf.[link].forwarding=1 */ + if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) { + ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link); + return minus_one_set_errno(EINVAL); + } + } + + lxc_list_for_each_safe(cur, &netdev->ipv4, next) { + inet4dev = cur->elem; + if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4))) + return minus_one_set_errno(-errno); + + if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0) + return minus_one_set_errno(EINVAL); + } + + lxc_list_for_each_safe(cur, &netdev->ipv6, next) { + inet6dev = cur->elem; + if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6))) + return minus_one_set_errno(-errno); + + if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0) + return minus_one_set_errno(EINVAL); + } + + return 0; +} + +static int lxc_delete_l2proxy(struct lxc_netdev *netdev) { + struct lxc_list *cur, *next; + struct lxc_inetdev *inet4dev; + struct lxc_inet6dev *inet6dev; + char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN]; + int err = 0; + + lxc_list_for_each_safe(cur, &netdev->ipv4, next) { + inet4dev = cur->elem; + if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4))) { + err = -1; + SYSERROR("Failed to convert IP for l2proxy removal on dev \"%s\"", netdev->link); + continue; /* Try to remove any other l2proxy entries */ + } + + if (lxc_del_ip_neigh_proxy(bufinet4, netdev->link) < 0) { + err = -1; + continue; /* Try to remove any other l2proxy entries */ + } + } + + lxc_list_for_each_safe(cur, &netdev->ipv6, next) { + inet6dev = cur->elem; + if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6))) { + err = -1; + SYSERROR("Failed to convert IP for l2proxy removal on dev \"%s\"", netdev->link); + continue; /* Try to remove any other l2proxy entries */ + } + + if (lxc_del_ip_neigh_proxy(bufinet6, netdev->link) < 0) { + err = -1; + continue; /* Try to remove any other l2proxy entries */ + } + } + + if (err < 0) + return minus_one_set_errno(EINVAL); + + return 0; +} + int lxc_create_network_priv(struct lxc_handler *handler) { struct lxc_list *iterator; @@ -2695,11 +2880,18 @@ int lxc_create_network_priv(struct lxc_handler *handler) return -1; } + /* Setup l2proxy entries if enabled and used with a link property */ + if (netdev->l2proxy && netdev->link[0] != '\0') { + if (lxc_setup_l2proxy(netdev)) { + ERROR("Failed to setup l2proxy"); + return -1; + } + } + if (netdev_conf[netdev->type](handler, netdev)) { ERROR("Failed to create network device"); return -1; } - } return 0; @@ -2795,6 +2987,13 @@ bool lxc_delete_network_priv(struct lxc_handler *handler) if (!netdev->ifindex) continue; + /* Delete l2proxy entries if enabled and used with a link property */ + if (netdev->l2proxy && netdev->link[0] != '\0') { + if (lxc_delete_l2proxy(netdev)) + WARN("Failed to delete all l2proxy config"); + /* Don't return, let the network be cleaned up as normal. */ + } + if (netdev->type == LXC_NET_PHYS) { ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link); if (ret < 0) diff --git a/src/lxc/network.h b/src/lxc/network.h index fa80404bc..468593f5e 100644 --- a/src/lxc/network.h +++ b/src/lxc/network.h @@ -171,6 +171,7 @@ struct lxc_netdev { int type; int flags; char link[IFNAMSIZ]; + bool l2proxy; char name[IFNAMSIZ]; char *hwaddr; char *mtu;