From d91fb3f4c7e4dba806541bdc90b1fb60a3581541 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 7 Jul 2016 16:09:03 -0500 Subject: [PATCH 1/6] Add support for configuring Infiniband GUIDs Add two NLA's that allow configuration of Infiniband node or port GUIDs by referencing the IPoIB net device set over the physical function. The format to be used is as follows: ip link set dev ib0 vf 0 node_guid 00:02:c9:03:00:21:6e:70 ip link set dev ib0 vf 0 port_guid 00:02:c9:03:00:21:6e:78 Signed-off-by: Eli Cohen --- include/utils.h | 1 + ip/iplink.c | 20 ++++++++++++++++++++ lib/utils.c | 35 +++++++++++++++++++++++++++++++++++ man/man8/ip-link.8.in | 12 +++++++++++- 4 files changed, 67 insertions(+), 1 deletion(-) diff --git a/include/utils.h b/include/utils.h index 27562a1c..82f1aa7d 100644 --- a/include/utils.h +++ b/include/utils.h @@ -248,5 +248,6 @@ int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, bool show_label); char *int_to_str(int val, char *buf); +int get_guid(__u64 *guid, const char *arg); #endif /* __UTILS_H__ */ diff --git a/ip/iplink.c b/ip/iplink.c index f2a2e13c..28a0a21c 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -420,6 +420,26 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, invarg("Invalid \"state\" value\n", *argv); ivl.vf = vf; addattr_l(&req->n, sizeof(*req), IFLA_VF_LINK_STATE, &ivl, sizeof(ivl)); + } else if (matches(*argv, "node_guid") == 0) { + struct ifla_vf_guid ivg; + + NEXT_ARG(); + ivg.vf = vf; + if (get_guid(&ivg.guid, *argv)) { + invarg("Invalid GUID format\n", *argv); + return -1; + } + addattr_l(&req->n, sizeof(*req), IFLA_VF_IB_NODE_GUID, &ivg, sizeof(ivg)); + } else if (matches(*argv, "port_guid") == 0) { + struct ifla_vf_guid ivg; + + NEXT_ARG(); + ivg.vf = vf; + if (get_guid(&ivg.guid, *argv)) { + invarg("Invalid GUID format\n", *argv); + return -1; + } + addattr_l(&req->n, sizeof(*req), IFLA_VF_IB_PORT_GUID, &ivg, sizeof(ivg)); } else { /* rewind arg */ PREV_ARG(); diff --git a/lib/utils.c b/lib/utils.c index 7dceeb58..96604746 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -1121,3 +1121,38 @@ char *int_to_str(int val, char *buf) sprintf(buf, "%d", val); return buf; } + +int get_guid(__u64 *guid, const char *arg) +{ + unsigned long int tmp; + char *endptr; + int i; + +#define GUID_STR_LEN 23 + /* Verify strict format: format string must be + * xx:xx:xx:xx:xx:xx:xx:xx where xx can be an arbitrary + * hex digit + */ + + if (strlen(arg) != GUID_STR_LEN) + return -1; + + /* make sure columns are in place */ + for (i = 0; i < 7; i++) + if (arg[2 + i * 3] != ':') + return -1; + + *guid = 0; + for (i = 0; i < 8; i++) { + tmp = strtoul(arg + i * 3, &endptr, 16); + if (endptr != arg + i * 3 + 2) + return -1; + + if (tmp > 255) + return -1; + + *guid |= tmp << (56 - 8 * i); + } + + return 0; +} diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index ad18f755..95fef02c 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -146,7 +146,11 @@ ip-link \- network device configuration .br .RB "[ " state " { " auto " | " enable " | " disable " } ]" .br -.RB "[ " trust " { " on " | " off " } ] ]" +.RB "[ " trust " { " on " | " off " } ]" +.br +.RB "[ " node_guid " eui64 ]" +.br +.RB "[ " port_guid " eui64 ] ]" .br .in -9 .RB "[ " master @@ -1196,6 +1200,12 @@ sent by the VF. .BI trust " on|off" - trust the specified VF user. This enables that VF user can set a specific feature which may impact security and/or performance. (e.g. VF multicast promiscuous mode) +.sp +.BI node_guid " eui64" +- configure node GUID for the VF. +.sp +.BI port_guid " eui64" +- configure port GUID for the VF. .in -8 .TP From ef0a738c8d170bc8146954f4bb4e981d0b1eff55 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 15 Jul 2016 11:31:20 -0700 Subject: [PATCH 2/6] ip: link style cleanup break long lines and other trivial changes --- ip/iplink.c | 57 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/ip/iplink.c b/ip/iplink.c index 28a0a21c..365240e6 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -317,7 +317,8 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, len, halen); return -1; } - addattr_l(&req->n, sizeof(*req), IFLA_VF_MAC, &ivm, sizeof(ivm)); + addattr_l(&req->n, sizeof(*req), IFLA_VF_MAC, + &ivm, sizeof(ivm)); } else if (matches(*argv, "vlan") == 0) { struct ifla_vf_vlan ivv; @@ -338,7 +339,8 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, PREV_ARG(); } } - addattr_l(&req->n, sizeof(*req), IFLA_VF_VLAN, &ivv, sizeof(ivv)); + addattr_l(&req->n, sizeof(*req), IFLA_VF_VLAN, + &ivv, sizeof(ivv)); } else if (matches(*argv, "rate") == 0) { struct ifla_vf_tx_rate ivt; @@ -378,7 +380,8 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, else return on_off("spoofchk", *argv); ivs.vf = vf; - addattr_l(&req->n, sizeof(*req), IFLA_VF_SPOOFCHK, &ivs, sizeof(ivs)); + addattr_l(&req->n, sizeof(*req), IFLA_VF_SPOOFCHK, + &ivs, sizeof(ivs)); } else if (matches(*argv, "query_rss") == 0) { struct ifla_vf_rss_query_en ivs; @@ -391,7 +394,8 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, else return on_off("query_rss", *argv); ivs.vf = vf; - addattr_l(&req->n, sizeof(*req), IFLA_VF_RSS_QUERY_EN, &ivs, sizeof(ivs)); + addattr_l(&req->n, sizeof(*req), IFLA_VF_RSS_QUERY_EN, + &ivs, sizeof(ivs)); } else if (matches(*argv, "trust") == 0) { struct ifla_vf_trust ivt; @@ -404,7 +408,8 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, else invarg("Invalid \"trust\" value\n", *argv); ivt.vf = vf; - addattr_l(&req->n, sizeof(*req), IFLA_VF_TRUST, &ivt, sizeof(ivt)); + addattr_l(&req->n, sizeof(*req), IFLA_VF_TRUST, + &ivt, sizeof(ivt)); } else if (matches(*argv, "state") == 0) { struct ifla_vf_link_state ivl; @@ -419,7 +424,8 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, else invarg("Invalid \"state\" value\n", *argv); ivl.vf = vf; - addattr_l(&req->n, sizeof(*req), IFLA_VF_LINK_STATE, &ivl, sizeof(ivl)); + addattr_l(&req->n, sizeof(*req), IFLA_VF_LINK_STATE, + &ivl, sizeof(ivl)); } else if (matches(*argv, "node_guid") == 0) { struct ifla_vf_guid ivg; @@ -429,7 +435,8 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, invarg("Invalid GUID format\n", *argv); return -1; } - addattr_l(&req->n, sizeof(*req), IFLA_VF_IB_NODE_GUID, &ivg, sizeof(ivg)); + addattr_l(&req->n, sizeof(*req), IFLA_VF_IB_NODE_GUID, + &ivg, sizeof(ivg)); } else if (matches(*argv, "port_guid") == 0) { struct ifla_vf_guid ivg; @@ -439,7 +446,8 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, invarg("Invalid GUID format\n", *argv); return -1; } - addattr_l(&req->n, sizeof(*req), IFLA_VF_IB_PORT_GUID, &ivg, sizeof(ivg)); + addattr_l(&req->n, sizeof(*req), IFLA_VF_IB_PORT_GUID, + &ivg, sizeof(ivg)); } else { /* rewind arg */ PREV_ARG(); @@ -543,9 +551,11 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, duparg("netns", *argv); netns = netns_get_fd(*argv); if (netns >= 0) - addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_FD, &netns, 4); + addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_FD, + &netns, 4); else if (get_integer(&netns, *argv, 0) == 0) - addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_PID, &netns, 4); + addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_PID, + &netns, 4); else invarg("Invalid \"netns\" value\n", *argv); } else if (strcmp(*argv, "multicast") == 0) { @@ -709,7 +719,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, invarg("Invalid address generation mode\n", *argv); afs = addattr_nest(&req->n, sizeof(*req), IFLA_AF_SPEC); afs6 = addattr_nest(&req->n, sizeof(*req), AF_INET6); - addattr8(&req->n, sizeof(*req), IFLA_INET6_ADDR_GEN_MODE, mode); + addattr8(&req->n, sizeof(*req), + IFLA_INET6_ADDR_GEN_MODE, mode); addattr_nest_end(&req->n, afs6); addattr_nest_end(&req->n, afs); } else if (matches(*argv, "link-netnsid") == 0) { @@ -748,10 +759,11 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, if (dev_index && addr_len) { int halen = nl_get_ll_addr_len(dev_index); + if (halen >= 0 && halen != addr_len) { fprintf(stderr, - "Invalid address length %d - must be %d bytes\n", - addr_len, halen); + "Invalid address length %d - must be %d bytes\n", + addr_len, halen); return -1; } } @@ -779,7 +791,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) req.n.nlmsg_type = cmd; req.i.ifi_family = preferred_family; - ret = iplink_parse(argc, argv, &req, &name, &type, &link, &dev, &group, &index); + ret = iplink_parse(argc, argv, + &req, &name, &type, &link, &dev, &group, &index); if (ret < 0) return ret; @@ -792,8 +805,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) &group, sizeof(group)); else { if (argc) { - fprintf(stderr, "Garbage instead of arguments \"%s ...\". Try \"ip link " - "help\".\n", *argv); + fprintf(stderr, "Garbage instead of arguments \"%s ...\". Try \"ip link help\".\n", + *argv); return -1; } if (flags & NLM_F_CREATE) { @@ -850,7 +863,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) if (name) { len = strlen(name) + 1; if (len == 1) - invarg("\"\" is not a valid device identifier\n", "name"); + invarg("\"\" is not a valid device identifier\n", + "name"); if (len > IFNAMSIZ) invarg("\"name\" too long\n", name); addattr_l(&req.n, sizeof(req), IFLA_IFNAME, name, len); @@ -879,7 +893,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) iflatype = IFLA_INFO_DATA; } if (lu && argc) { - struct rtattr *data = addattr_nest(&req.n, sizeof(req), iflatype); + struct rtattr *data = addattr_nest(&req.n, + sizeof(req), iflatype); if (lu->parse_opt && lu->parse_opt(lu, argc, argv, &req.n)) @@ -1110,7 +1125,8 @@ static int parse_address(const char *dev, int hatype, int halen, if (alen < 0) return -1; if (alen != halen) { - fprintf(stderr, "Wrong address (%s) length: expected %d bytes\n", lla, halen); + fprintf(stderr, "Wrong address (%s) length: expected %d bytes\n", + lla, halen); return -1; } return 0; @@ -1250,7 +1266,8 @@ static int do_set(int argc, char **argv) } if (!dev) { - fprintf(stderr, "Not enough of information: \"dev\" argument is required.\n"); + fprintf(stderr, + "Not enough of information: \"dev\" argument is required.\n"); exit(-1); } From 74af8dd9620e4322babf9d2a936b1d333a4e37e0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 12 Jul 2016 21:37:58 +0800 Subject: [PATCH 3/6] ip route: restore route entries in correct order Sometimes we cannot restore route entries, because in kernel [1] fib_check_nh() [2] fib_valid_prefsrc() cause some routes to depend on existence of others while adding. For example, we saved all the routes, and flushed all tables [a] default via 192.168.122.1 dev eth0 [b] 192.168.122.0/24 dev eth0 src 192.168.122.21 [c] broadcast 127.0.0.0 dev lo table local src 127.0.0.1 [d] local 127.0.0.0/8 dev lo table local src 127.0.0.1 [e] local 127.0.0.1 dev lo table local src 127.0.0.1 [f] broadcast 127.255.255.255 dev lo table local src 127.0.0.1 [g] broadcast 192.168.122.0 dev eth0 table local src 192.168.122.21 [h] local 192.168.122.21 dev eth0 table local src 192.168.122.21 [i] broadcast 192.168.122.255 dev eth0 table local src 192.168.122.21 Now start to restore them: If we want to add [a], we have to add [b] first, as [1] and 'via 192.168.122.1' in [a]. If we want to add [b], we have to add [h] first, as [2] and 'src 192.168.122.21' in [b]. So the correct order to restore should be like: [e][h] -> [b][c][d][f][g][i] -> [a] This patch fixes it by traversing the file 3 times, it only restores part of them in each run according to the following conditions, to make sure every entry can be restored successfully. 1. !gw && (!fib_prefsrc || fib_prefsrc == cfg->fc_dst) 2. !gw && (fib_prefsrc != cfg->fc_dst) 3. gw Signed-off-by: Xin Long Acked-by: Phil Sutter --- ip/iproute.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/ip/iproute.c b/ip/iproute.c index 24f6b010..c7350009 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -1810,12 +1810,42 @@ static int iproute_get(int argc, char **argv) return 0; } +static int rtattr_cmp(struct rtattr *rta1, struct rtattr *rta2) +{ + if (!rta1 || !rta2 || rta1->rta_len != rta2->rta_len) + return 1; + + return memcmp(RTA_DATA(rta1), RTA_DATA(rta2), RTA_PAYLOAD(rta1)); +} + static int restore_handler(const struct sockaddr_nl *nl, struct rtnl_ctrl_data *ctrl, struct nlmsghdr *n, void *arg) { - int ret; + struct rtmsg *r = NLMSG_DATA(n); + struct rtattr *tb[RTA_MAX+1]; + int len = n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); + int ret, prio = *(int *)arg; + parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len); + + /* Restore routes in correct order: + * 0. ones for local addresses, + * 1. ones for local networks, + * 2. others (remote networks/hosts). + */ + if (!prio && !tb[RTA_GATEWAY] && (!tb[RTA_PREFSRC] || + !rtattr_cmp(tb[RTA_PREFSRC], tb[RTA_DST]))) + goto restore; + else if (prio == 1 && !tb[RTA_GATEWAY] && + rtattr_cmp(tb[RTA_PREFSRC], tb[RTA_DST])) + goto restore; + else if (prio == 2 && tb[RTA_GATEWAY]) + goto restore; + + return 0; + +restore: n->nlmsg_flags |= NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK; ll_init_map(&rth); @@ -1848,10 +1878,23 @@ static int route_dump_check_magic(void) static int iproute_restore(void) { + int pos, prio; + if (route_dump_check_magic()) exit(-1); - exit(rtnl_from_file(stdin, &restore_handler, NULL)); + pos = ftell(stdin); + for (prio = 0; prio < 3; prio++) { + int err; + + err = rtnl_from_file(stdin, &restore_handler, &prio); + if (err) + exit(err); + + fseek(stdin, pos, SEEK_SET); + } + + exit(0); } static int show_handler(const struct sockaddr_nl *nl, From 79f4a39365361732622e114dbc86bc90ed32d25f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 15 Jul 2016 11:34:45 -0700 Subject: [PATCH 4/6] iproute: constify rtattr_cmp --- ip/iproute.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/iproute.c b/ip/iproute.c index c7350009..c564fa6d 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -1810,7 +1810,7 @@ static int iproute_get(int argc, char **argv) return 0; } -static int rtattr_cmp(struct rtattr *rta1, struct rtattr *rta2) +static int rtattr_cmp(const struct rtattr *rta1, const struct rtattr *rta2) { if (!rta1 || !rta2 || rta1->rta_len != rta2->rta_len) return 1; From e3da7a45bad1672328d67f016627e026cb41feba Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 13 Jul 2016 09:53:53 +0300 Subject: [PATCH 5/6] man: Add devlink man pages to Makefile Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel --- man/man8/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/man8/Makefile b/man/man8/Makefile index 929826ec..9badbed7 100644 --- a/man/man8/Makefile +++ b/man/man8/Makefile @@ -16,7 +16,8 @@ MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 rtpr.8 ss. tc-basic.8 tc-cgroup.8 tc-flow.8 tc-flower.8 tc-fw.8 tc-route.8 \ tc-tcindex.8 tc-u32.8 \ tc-connmark.8 tc-csum.8 tc-mirred.8 tc-nat.8 tc-pedit.8 tc-police.8 \ - tc-simple.8 tc-skbedit.8 tc-vlan.8 tc-xt.8 + tc-simple.8 tc-skbedit.8 tc-vlan.8 tc-xt.8 \ + devlink.8 devlink-dev.8 devlink-monitor.8 devlink-port.8 devlink-sb.8 all: $(TARGETS) From 78c610e6ea8554e87e2204f540cf0ce61ef52695 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 13 Jul 2016 09:53:54 +0300 Subject: [PATCH 6/6] man: Point to 'devlink-sb' from 'devlink' man page Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel --- man/man8/devlink.8 | 1 + 1 file changed, 1 insertion(+) diff --git a/man/man8/devlink.8 b/man/man8/devlink.8 index df00f4fa..cf0563b9 100644 --- a/man/man8/devlink.8 +++ b/man/man8/devlink.8 @@ -76,6 +76,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR devlink-dev (8), .BR devlink-port (8), .BR devlink-monitor (8), +.BR devlink-sb (8), .br .SH REPORTING BUGS