diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index d5a0a3af7..b55acf0fb 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -65,3 +65,22 @@ lxc.net[i].ipvlan.isolation=[bridge|private|vepa] (defaults to bridge)
lxc.net[i].link=eth0
lxc.net[i].flags=up
```
+
+## network\_l2proxy
+
+This introduces the `lxc.net.[i].l2proxy` that can be either `0` or `1`. Defaults to `0`.
+This, when used with `lxc.net.[i].link`, will add IP neighbour proxy entries on the linked device
+for any IPv4 and IPv6 addresses on the container's network device.
+
+For IPv4 addresses it will check the following sysctl values and fail with an error if not set:
+
+```
+net.ipv4.conf.[link].forwarding=1
+```
+
+For IPv6 addresses it will check the following sysctl values and fail with an error if not set:
+
+```
+net.ipv6.conf.[link].proxy_ndp=1
+net.ipv6.conf.[link].forwarding=1
+```
diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in
index 19f3acbd7..f8182567a 100644
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
@@ -578,6 +578,24 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+
+
+
+
+ Controls whether layer 2 IP neighbour proxy entries will be added to the
+ lxc.net.[i].link interface for the IP addresses of the container.
+ Can be set to 0 or 1. Defaults to 0.
+ When used with IPv4 addresses, the following sysctl values need to be set:
+ net.ipv4.conf.[link].forwarding=1
+ When used with IPv6 addresses, the following sysctl values need to be set:
+ net.ipv6.conf.[link].proxy_ndp=1
+ net.ipv6.conf.[link].forwarding=1
+
+
+
+
@@ -645,7 +663,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
interface (as specified by the
option) and use that as
the gateway. is only available when
- using the ,
+ using the ,
and network types.
diff --git a/src/lxc/api_extensions.h b/src/lxc/api_extensions.h
index 55d5e9c96..1c748a1d4 100644
--- a/src/lxc/api_extensions.h
+++ b/src/lxc/api_extensions.h
@@ -46,6 +46,7 @@ static char *api_extensions[] = {
"seccomp_notify",
"network_veth_routes",
"network_ipvlan",
+ "network_l2proxy",
};
static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions);
diff --git a/src/lxc/confile.c b/src/lxc/confile.c
index ac7e78eb1..78cb88e4f 100644
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -129,6 +129,7 @@ lxc_config_define(net_ipv4_gateway);
lxc_config_define(net_ipv6_address);
lxc_config_define(net_ipv6_gateway);
lxc_config_define(net_link);
+lxc_config_define(net_l2proxy);
lxc_config_define(net_macvlan_mode);
lxc_config_define(net_ipvlan_mode);
lxc_config_define(net_ipvlan_isolation);
@@ -222,6 +223,7 @@ static struct lxc_config_t config_jump_table[] = {
{ "lxc.net.ipv6.address", set_config_net_ipv6_address, get_config_net_ipv6_address, clr_config_net_ipv6_address, },
{ "lxc.net.ipv6.gateway", set_config_net_ipv6_gateway, get_config_net_ipv6_gateway, clr_config_net_ipv6_gateway, },
{ "lxc.net.link", set_config_net_link, get_config_net_link, clr_config_net_link, },
+ { "lxc.net.l2proxy", set_config_net_l2proxy, get_config_net_l2proxy, clr_config_net_l2proxy, },
{ "lxc.net.macvlan.mode", set_config_net_macvlan_mode, get_config_net_macvlan_mode, clr_config_net_macvlan_mode, },
{ "lxc.net.ipvlan.mode", set_config_net_ipvlan_mode, get_config_net_ipvlan_mode, clr_config_net_ipvlan_mode, },
{ "lxc.net.ipvlan.isolation", set_config_net_ipvlan_isolation, get_config_net_ipvlan_isolation, clr_config_net_ipvlan_isolation, },
@@ -403,6 +405,35 @@ static int set_config_net_link(const char *key, const char *value,
return ret;
}
+static int set_config_net_l2proxy(const char *key, const char *value,
+ struct lxc_conf *lxc_conf, void *data)
+{
+ struct lxc_netdev *netdev = data;
+ unsigned int val = 0;
+ int ret;
+
+ if (lxc_config_value_empty(value))
+ return clr_config_net_l2proxy(key, lxc_conf, data);
+
+ if (!netdev)
+ return minus_one_set_errno(EINVAL);
+
+ ret = lxc_safe_uint(value, &val);
+ if (ret < 0)
+ return minus_one_set_errno(-ret);
+
+ switch (val) {
+ case 0:
+ netdev->l2proxy = false;
+ return 0;
+ case 1:
+ netdev->l2proxy = true;
+ return 0;
+ }
+
+ return minus_one_set_errno(EINVAL);
+}
+
static int set_config_net_name(const char *key, const char *value,
struct lxc_conf *lxc_conf, void *data)
{
@@ -4960,6 +4991,19 @@ static int clr_config_net_link(const char *key, struct lxc_conf *lxc_conf,
return 0;
}
+static int clr_config_net_l2proxy(const char *key, struct lxc_conf *lxc_conf,
+ void *data)
+{
+ struct lxc_netdev *netdev = data;
+
+ if (!netdev)
+ return minus_one_set_errno(EINVAL);
+
+ netdev->l2proxy = false;
+
+ return 0;
+}
+
static int clr_config_net_macvlan_mode(const char *key,
struct lxc_conf *lxc_conf, void *data)
{
@@ -5282,6 +5326,13 @@ static int get_config_net_link(const char *key, char *retv, int inlen,
return fulllen;
}
+static int get_config_net_l2proxy(const char *key, char *retv, int inlen,
+ struct lxc_conf *c, void *data)
+{
+ struct lxc_netdev *netdev = data;
+ return lxc_get_conf_bool(c, retv, inlen, netdev->l2proxy);
+}
+
static int get_config_net_name(const char *key, char *retv, int inlen,
struct lxc_conf *c, void *data)
{
diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c
index 5bceb96bd..12a8dbb09 100644
--- a/src/lxc/confile_utils.c
+++ b/src/lxc/confile_utils.c
@@ -339,6 +339,10 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf)
if (netdev->link[0] != '\0')
TRACE("link: %s", netdev->link);
+ /* l2proxy only used when link is specified */
+ if (netdev->link[0] != '\0')
+ TRACE("l2proxy: %s", netdev->l2proxy ? "true" : "false");
+
if (netdev->name[0] != '\0')
TRACE("name: %s", netdev->name);
diff --git a/src/lxc/file_utils.c b/src/lxc/file_utils.c
index 603c0ace6..31411d712 100644
--- a/src/lxc/file_utils.c
+++ b/src/lxc/file_utils.c
@@ -147,7 +147,7 @@ ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count, const void *expe
ssize_t ret;
ret = lxc_read_nointr(fd, buf, count);
- if (ret <= 0)
+ if (ret < 0)
return ret;
if ((size_t)ret != count)
@@ -158,7 +158,18 @@ ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count, const void *expe
return -1;
}
- return ret;
+ return 0;
+}
+
+ssize_t lxc_read_file_expect(const char *path, void *buf, size_t count, const void *expected_buf)
+{
+ __do_close_prot_errno int fd = -EBADF;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return -1;
+
+ return lxc_read_nointr_expect(fd, buf, count, expected_buf);
}
bool file_exists(const char *f)
diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h
index cc8f69e18..1b8033d69 100644
--- a/src/lxc/file_utils.h
+++ b/src/lxc/file_utils.h
@@ -40,6 +40,8 @@ extern ssize_t lxc_send_nointr(int sockfd, void *buf, size_t len, int flags);
extern ssize_t lxc_read_nointr(int fd, void *buf, size_t count);
extern ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count,
const void *expected_buf);
+extern ssize_t lxc_read_file_expect(const char *path, void *buf, size_t count,
+ const void *expected_buf);
extern ssize_t lxc_recv_nointr(int sockfd, void *buf, size_t len, int flags);
extern bool file_exists(const char *f);
diff --git a/src/lxc/network.c b/src/lxc/network.c
index def484613..a71eb5ddf 100644
--- a/src/lxc/network.c
+++ b/src/lxc/network.c
@@ -1660,6 +1660,24 @@ static int proc_sys_net_write(const char *path, const char *value)
return err;
}
+static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
+{
+ int ret;
+ char path[PATH_MAX];
+ char buf[1] = "";
+
+ if (family != AF_INET && family != AF_INET6)
+ return minus_one_set_errno(EINVAL);
+
+ ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
+ family == AF_INET ? "ipv4" : "ipv6", ifname,
+ "forwarding");
+ if (ret < 0 || (size_t)ret >= PATH_MAX)
+ return minus_one_set_errno(E2BIG);
+
+ return lxc_read_file_expect(path, buf, 1, "1");
+}
+
static int neigh_proxy_set(const char *ifname, int family, int flag)
{
int ret;
@@ -1677,6 +1695,24 @@ static int neigh_proxy_set(const char *ifname, int family, int flag)
return proc_sys_net_write(path, flag ? "1" : "0");
}
+static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
+{
+ int ret;
+ char path[PATH_MAX];
+ char buf[1] = "";
+
+ if (family != AF_INET && family != AF_INET6)
+ return minus_one_set_errno(EINVAL);
+
+ ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
+ family == AF_INET ? "ipv4" : "ipv6", ifname,
+ family == AF_INET ? "proxy_arp" : "proxy_ndp");
+ if (ret < 0 || (size_t)ret >= PATH_MAX)
+ return minus_one_set_errno(E2BIG);
+
+ return lxc_read_file_expect(path, buf, 1, "1");
+}
+
int lxc_neigh_proxy_on(const char *name, int family)
{
return neigh_proxy_set(name, family, 1);
@@ -2679,6 +2715,155 @@ clear_ifindices:
return true;
}
+struct ip_proxy_args {
+ const char *ip;
+ const char *dev;
+};
+
+static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
+{
+ struct ip_proxy_args *args = data;
+
+ execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
+ return -1;
+}
+
+static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
+{
+ struct ip_proxy_args *args = data;
+
+ execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
+ return -1;
+}
+
+static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
+{
+ int ret;
+ char cmd_output[PATH_MAX];
+ struct ip_proxy_args args = {
+ .ip = ip,
+ .dev = dev,
+ };
+
+ ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
+ if (ret < 0) {
+ ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
+{
+ int ret;
+ char cmd_output[PATH_MAX];
+ struct ip_proxy_args args = {
+ .ip = ip,
+ .dev = dev,
+ };
+
+ ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
+ if (ret < 0) {
+ ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
+ struct lxc_list *cur, *next;
+ struct lxc_inetdev *inet4dev;
+ struct lxc_inet6dev *inet6dev;
+ char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
+
+ /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
+ if (!lxc_list_empty(&netdev->ipv4)) {
+ /* Check for net.ipv4.conf.[link].forwarding=1 */
+ if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
+ ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
+ return minus_one_set_errno(EINVAL);
+ }
+ }
+
+ /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
+ if (!lxc_list_empty(&netdev->ipv6)) {
+ /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
+ if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
+ ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
+ return minus_one_set_errno(EINVAL);
+ }
+
+ /* Check for net.ipv6.conf.[link].forwarding=1 */
+ if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
+ ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
+ return minus_one_set_errno(EINVAL);
+ }
+ }
+
+ lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
+ inet4dev = cur->elem;
+ if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
+ return minus_one_set_errno(-errno);
+
+ if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
+ return minus_one_set_errno(EINVAL);
+ }
+
+ lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
+ inet6dev = cur->elem;
+ if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
+ return minus_one_set_errno(-errno);
+
+ if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
+ return minus_one_set_errno(EINVAL);
+ }
+
+ return 0;
+}
+
+static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
+ struct lxc_list *cur, *next;
+ struct lxc_inetdev *inet4dev;
+ struct lxc_inet6dev *inet6dev;
+ char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
+ int err = 0;
+
+ lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
+ inet4dev = cur->elem;
+ if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4))) {
+ err = -1;
+ SYSERROR("Failed to convert IP for l2proxy removal on dev \"%s\"", netdev->link);
+ continue; /* Try to remove any other l2proxy entries */
+ }
+
+ if (lxc_del_ip_neigh_proxy(bufinet4, netdev->link) < 0) {
+ err = -1;
+ continue; /* Try to remove any other l2proxy entries */
+ }
+ }
+
+ lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
+ inet6dev = cur->elem;
+ if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6))) {
+ err = -1;
+ SYSERROR("Failed to convert IP for l2proxy removal on dev \"%s\"", netdev->link);
+ continue; /* Try to remove any other l2proxy entries */
+ }
+
+ if (lxc_del_ip_neigh_proxy(bufinet6, netdev->link) < 0) {
+ err = -1;
+ continue; /* Try to remove any other l2proxy entries */
+ }
+ }
+
+ if (err < 0)
+ return minus_one_set_errno(EINVAL);
+
+ return 0;
+}
+
int lxc_create_network_priv(struct lxc_handler *handler)
{
struct lxc_list *iterator;
@@ -2695,11 +2880,18 @@ int lxc_create_network_priv(struct lxc_handler *handler)
return -1;
}
+ /* Setup l2proxy entries if enabled and used with a link property */
+ if (netdev->l2proxy && netdev->link[0] != '\0') {
+ if (lxc_setup_l2proxy(netdev)) {
+ ERROR("Failed to setup l2proxy");
+ return -1;
+ }
+ }
+
if (netdev_conf[netdev->type](handler, netdev)) {
ERROR("Failed to create network device");
return -1;
}
-
}
return 0;
@@ -2795,6 +2987,13 @@ bool lxc_delete_network_priv(struct lxc_handler *handler)
if (!netdev->ifindex)
continue;
+ /* Delete l2proxy entries if enabled and used with a link property */
+ if (netdev->l2proxy && netdev->link[0] != '\0') {
+ if (lxc_delete_l2proxy(netdev))
+ WARN("Failed to delete all l2proxy config");
+ /* Don't return, let the network be cleaned up as normal. */
+ }
+
if (netdev->type == LXC_NET_PHYS) {
ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
if (ret < 0)
diff --git a/src/lxc/network.h b/src/lxc/network.h
index fa80404bc..468593f5e 100644
--- a/src/lxc/network.h
+++ b/src/lxc/network.h
@@ -171,6 +171,7 @@ struct lxc_netdev {
int type;
int flags;
char link[IFNAMSIZ];
+ bool l2proxy;
char name[IFNAMSIZ];
char *hwaddr;
char *mtu;