From fd5c1d4391c79900fee667f53dea744ce11238e4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 9 Jun 2014 12:50:30 -0700 Subject: [PATCH 1/3] Update to current net-next kernel headers Update sanitized headers --- include/linux/can.h | 6 +++--- include/linux/can/netlink.h | 6 +++--- include/linux/filter.h | 3 ++- include/linux/if_link.h | 12 +++++++++++- include/linux/l2tp.h | 2 ++ include/linux/neighbour.h | 1 + 6 files changed, 22 insertions(+), 8 deletions(-) diff --git a/include/linux/can.h b/include/linux/can.h index 5d9d1d14..d9ba97f3 100644 --- a/include/linux/can.h +++ b/include/linux/can.h @@ -42,8 +42,8 @@ * DAMAGE. */ -#ifndef CAN_H -#define CAN_H +#ifndef _CAN_H +#define _CAN_H #include #include @@ -191,4 +191,4 @@ struct can_filter { #define CAN_INV_FILTER 0x20000000U /* to be set in can_filter.can_id */ -#endif /* CAN_H */ +#endif /* !_UAPI_CAN_H */ diff --git a/include/linux/can/netlink.h b/include/linux/can/netlink.h index 7e2e1863..295cd409 100644 --- a/include/linux/can/netlink.h +++ b/include/linux/can/netlink.h @@ -15,8 +15,8 @@ * GNU General Public License for more details. */ -#ifndef CAN_NETLINK_H -#define CAN_NETLINK_H +#ifndef _CAN_NETLINK_H +#define _CAN_NETLINK_H #include @@ -130,4 +130,4 @@ enum { #define IFLA_CAN_MAX (__IFLA_CAN_MAX - 1) -#endif /* CAN_NETLINK_H */ +#endif /* !_UAPI_CAN_NETLINK_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 9a46cb63..a9ae93c0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -130,7 +130,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_VLAN_TAG 44 #define SKF_AD_VLAN_TAG_PRESENT 48 #define SKF_AD_PAY_OFFSET 52 -#define SKF_AD_MAX 56 +#define SKF_AD_RANDOM 56 +#define SKF_AD_MAX 60 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 84fca1e7..fadef0fd 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -317,6 +317,9 @@ enum { IFLA_VXLAN_PORT, /* destination port */ IFLA_VXLAN_GROUP6, IFLA_VXLAN_LOCAL6, + IFLA_VXLAN_UDP_CSUM, + IFLA_VXLAN_UDP_ZERO_CSUM6_TX, + IFLA_VXLAN_UDP_ZERO_CSUM6_RX, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -397,9 +400,10 @@ enum { IFLA_VF_UNSPEC, IFLA_VF_MAC, /* Hardware queue specific attributes */ IFLA_VF_VLAN, - IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ + IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ + IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ __IFLA_VF_MAX, }; @@ -421,6 +425,12 @@ struct ifla_vf_tx_rate { __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ }; +struct ifla_vf_rate { + __u32 vf; + __u32 min_tx_rate; /* Min Bandwidth in Mbps */ + __u32 max_tx_rate; /* Max Bandwidth in Mbps */ +}; + struct ifla_vf_spoofchk { __u32 vf; __u32 setting; diff --git a/include/linux/l2tp.h b/include/linux/l2tp.h index c4bec823..c0e116a4 100644 --- a/include/linux/l2tp.h +++ b/include/linux/l2tp.h @@ -122,6 +122,8 @@ enum { L2TP_ATTR_STATS, /* nested */ L2TP_ATTR_IP6_SADDR, /* struct in6_addr */ L2TP_ATTR_IP6_DADDR, /* struct in6_addr */ + L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* u8 */ + L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* u8 */ __L2TP_ATTR_MAX, }; diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index d3ef5831..4a1d7e96 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -24,6 +24,7 @@ enum { NDA_PORT, NDA_VNI, NDA_IFINDEX, + NDA_MASTER, __NDA_MAX }; From f89a2a05ffa94ac5bec9f50751f761215356092b Mon Sep 17 00:00:00 2001 From: Sucheta Chakraborty Date: Thu, 22 May 2014 09:59:37 -0400 Subject: [PATCH 2/3] Add support to configure SR-IOV VF minimum and maximum Tx rate through ip tool o "min_tx_rate" option has been added for minimum Tx rate. Hence, for consistent naming, "max_tx_rate" option has been introduced for maximum Tx rate. o Change in v2: "rate" can be used along with "max_tx_rate". When both are specified, "max_tx_rate" should override. o Change in v3: * IFLA_VF_RATE: When IFLA_VF_RATE is used, and user has given only one of min_tx_rate or max_tx_rate, reading of previous rate limits is done in userspace instead of in kernel space before ndo_set_vf_rate. * IFLA_VF_TX_RATE: When IFLA_VF_TX_RATE is used, min_tx_rate is always read in kernel space. This takes care of below scenarios: (1) when old tool sends "rate" but kernel is new (expects min and max) (2) when new tool sends only "rate" but kernel is old (expects only "rate") o Change in v4 as suggested by Stephen Hemminger: * As per iproute policy, input and output formats should match. Changing display of max_tx_rate and min_tx_rate options accordingly. ./ip/ip link show p3p1 8: p3p1: mtu 1500 qdisc noop state DOWN mode DEFAULT qlen 1000 link/ether 00:0e:1e:16:ce:40 brd ff:ff:ff:ff:ff:ff vf 0 MAC 2a:18:8f:4d:3d:d4, tx rate 700 (Mbps), max_tx_rate 700Mbps, min_tx_rate 200Mbps vf 1 MAC 72:dc:ba:f9:df:fd Signed-off-by: Sucheta Chakraborty --- ip/ip_common.h | 1 + ip/ipaddress.c | 63 +++++++++++++++++++++++++++++++++++++++++++ ip/iplink.c | 63 ++++++++++++++++++++++++++++++++++++++++--- man/man8/ip-link.8.in | 24 +++++++++++++++-- 4 files changed, 146 insertions(+), 5 deletions(-) diff --git a/ip/ip_common.h b/ip/ip_common.h index 698dc7aa..ac1e4c19 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -17,6 +17,7 @@ extern int iproute_monitor(int argc, char **argv); extern void iplink_usage(void) __attribute__((noreturn)); extern void iproute_reset_filter(void); extern void ipmroute_reset_filter(void); +void ipaddr_get_vf_rate(int, int *, int *, int); extern void ipaddr_reset_filter(int); extern void ipneigh_reset_filter(void); extern void ipntable_reset_filter(void); diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 76f47823..8138e862 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -245,6 +245,7 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo) { struct ifla_vf_mac *vf_mac; struct ifla_vf_vlan *vf_vlan; + struct ifla_vf_rate *vf_rate; struct ifla_vf_tx_rate *vf_tx_rate; struct ifla_vf_spoofchk *vf_spoofchk; struct ifla_vf_link_state *vf_linkstate; @@ -262,6 +263,7 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo) vf_mac = RTA_DATA(vf[IFLA_VF_MAC]); vf_vlan = RTA_DATA(vf[IFLA_VF_VLAN]); vf_tx_rate = RTA_DATA(vf[IFLA_VF_TX_RATE]); + vf_rate = RTA_DATA(vf[IFLA_VF_RATE]); /* Check if the spoof checking vf info type is supported by * this kernel. @@ -297,6 +299,10 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo) fprintf(fp, ", qos %d", vf_vlan->qos); if (vf_tx_rate->rate) fprintf(fp, ", tx rate %d (Mbps)", vf_tx_rate->rate); + if (vf_rate->max_tx_rate) + fprintf(fp, ", max_tx_rate %dMbps", vf_rate->max_tx_rate); + if (vf_rate->min_tx_rate) + fprintf(fp, ", min_tx_rate %dMbps", vf_rate->min_tx_rate); if (vf_spoofchk && vf_spoofchk->setting != -1) { if (vf_spoofchk->setting) fprintf(fp, ", spoof checking on"); @@ -1278,6 +1284,63 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) return 0; } +static void +ipaddr_loop_each_vf(struct rtattr *tb[], int vfnum, int *min, int *max) +{ + struct rtattr *vflist = tb[IFLA_VFINFO_LIST]; + struct rtattr *i, *vf[IFLA_VF_MAX+1]; + struct ifla_vf_rate *vf_rate; + int rem; + + rem = RTA_PAYLOAD(vflist); + + for (i = RTA_DATA(vflist); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) { + parse_rtattr_nested(vf, IFLA_VF_MAX, i); + vf_rate = RTA_DATA(vf[IFLA_VF_RATE]); + if (vf_rate->vf == vfnum) { + *min = vf_rate->min_tx_rate; + *max = vf_rate->max_tx_rate; + return; + } + } + fprintf(stderr, "Cannot find VF %d\n", vfnum); + exit(1); +} + +void ipaddr_get_vf_rate(int vfnum, int *min, int *max, int idx) +{ + struct nlmsg_chain linfo = { NULL, NULL}; + struct rtattr *tb[IFLA_MAX+1]; + struct ifinfomsg *ifi; + struct nlmsg_list *l; + struct nlmsghdr *n; + int len; + + if (rtnl_wilddump_request(&rth, AF_UNSPEC, RTM_GETLINK) < 0) { + perror("Cannot send dump request"); + exit(1); + } + if (rtnl_dump_filter(&rth, store_nlmsg, &linfo) < 0) { + fprintf(stderr, "Dump terminated\n"); + exit(1); + } + for (l = linfo.head; l; l = l->next) { + n = &l->h; + ifi = NLMSG_DATA(n); + + len = n->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)); + if (len < 0 || idx && idx != ifi->ifi_index) + continue; + + parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len); + + if ((tb[IFLA_VFINFO_LIST] && tb[IFLA_NUM_VF])) { + ipaddr_loop_each_vf(tb, vfnum, min, max); + return; + } + } +} + int ipaddr_list_link(int argc, char **argv) { preferred_family = AF_PACKET; diff --git a/ip/iplink.c b/ip/iplink.c index c94261ac..0d020efc 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -215,14 +215,38 @@ struct iplink_req { }; static int iplink_parse_vf(int vf, int *argcp, char ***argvp, - struct iplink_req *req) + struct iplink_req *req, int dev_index) { + char new_rate_api = 0, count = 0, override_legacy_rate = 0; + struct ifla_vf_rate tivt; int len, argc = *argcp; char **argv = *argvp; struct rtattr *vfinfo; + tivt.min_tx_rate = -1; + tivt.max_tx_rate = -1; + vfinfo = addattr_nest(&req->n, sizeof(*req), IFLA_VF_INFO); + while (NEXT_ARG_OK()) { + NEXT_ARG(); + count++; + if (!matches(*argv, "max_tx_rate")) { + /* new API in use */ + new_rate_api = 1; + /* override legacy rate */ + override_legacy_rate = 1; + } else if (!matches(*argv, "min_tx_rate")) { + /* new API in use */ + new_rate_api = 1; + } + } + + while (count--) { + /* rewind arg */ + PREV_ARG(); + } + while (NEXT_ARG_OK()) { NEXT_ARG(); if (matches(*argv, "mac") == 0) { @@ -261,7 +285,25 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, invarg("Invalid \"rate\" value\n", *argv); } ivt.vf = vf; - addattr_l(&req->n, sizeof(*req), IFLA_VF_TX_RATE, &ivt, sizeof(ivt)); + if (!new_rate_api) + addattr_l(&req->n, sizeof(*req), + IFLA_VF_TX_RATE, &ivt, sizeof(ivt)); + else if (!override_legacy_rate) + tivt.max_tx_rate = ivt.rate; + + } else if (matches(*argv, "max_tx_rate") == 0) { + NEXT_ARG(); + if (get_unsigned(&tivt.max_tx_rate, *argv, 0)) + invarg("Invalid \"max tx rate\" value\n", + *argv); + tivt.vf = vf; + + } else if (matches(*argv, "min_tx_rate") == 0) { + NEXT_ARG(); + if (get_unsigned(&tivt.min_tx_rate, *argv, 0)) + invarg("Invalid \"min tx rate\" value\n", + *argv); + tivt.vf = vf; } else if (matches(*argv, "spoofchk") == 0) { struct ifla_vf_spoofchk ivs; @@ -295,6 +337,19 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, } } + if (new_rate_api) { + int tmin, tmax; + if (tivt.min_tx_rate == -1 || tivt.max_tx_rate == -1) { + ipaddr_get_vf_rate(tivt.vf, &tmin, &tmax, dev_index); + if (tivt.min_tx_rate == -1) + tivt.min_tx_rate = tmin; + if (tivt.max_tx_rate == -1) + tivt.max_tx_rate = tmax; + } + addattr_l(&req->n, sizeof(*req), IFLA_VF_RATE, &tivt, + sizeof(tivt)); + } + if (argc == *argcp) incomplete_command(); @@ -316,6 +371,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, int vf = -1; int numtxqueues = -1; int numrxqueues = -1; + int dev_index; *group = -1; ret = argc; @@ -428,7 +484,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, } vflist = addattr_nest(&req->n, sizeof(*req), IFLA_VFINFO_LIST); - len = iplink_parse_vf(vf, &argc, &argv, req); + len = iplink_parse_vf(vf, &argc, &argv, req, dev_index); if (len < 0) return -1; addattr_nest_end(&req->n, vflist); @@ -510,6 +566,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, if (*dev) duparg2("dev", *argv); *dev = *argv; + dev_index = ll_name_to_index(*dev); } argc--; argv++; } diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index e1260469..f89785c5 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -124,6 +124,10 @@ ip-link \- network device configuration .IR VLAN-QOS " ] ] [" .B rate .IR TXRATE " ] [" +.B max_tx_rate +.IR TXRATE " ] [" +.B min_tx_rate +.IR TXRATE " ] [" .B spoofchk { on | off } ] [ .B state { auto | enable | disable} ] | @@ -566,8 +570,24 @@ as 0 disables VLAN tagging and filtering for the VF. .sp .BI rate " TXRATE" -- change the allowed transmit bandwidth, in Mbps, for the specified VF. -Setting this parameter to 0 disables rate limiting. The +-- change the allowed transmit bandwidth, in Mbps, for the specified VF. +Setting this parameter to 0 disables rate limiting. +.B vf +parameter must be specified. +Please use new API +.B "max_tx_rate" +option instead. + +.sp +.BI max_tx_rate " TXRATE" +- change the allowed maximum transmit bandwidth, in Mbps, for the specified VF. +.B vf +parameter must be specified. + +.sp +.BI min_tx_rate " TXRATE" +- change the allowed minimum transmit bandwidth, in Mbps, for the specified VF. +Minimum TXRATE should be always <= Maximum TXRATE. .B vf parameter must be specified. From cc273a51d0e3e006780ad5abab3c7261b516854c Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Sat, 7 Jun 2014 22:23:42 -0700 Subject: [PATCH 3/3] bridge: Add master device name to bridge fdb show This patch adds master dev name from NDA_MASTER netlink attribute to bridge fdb show output current iproute2 tries to print 'master' in the output if NTF_MASTER is present. But, kernel today does not set NTF_MASTER during dump requests. Which means I have not seen iproute2 bridge cmd print 'master' atall. This patch overrides the NTF_MASTER flag if NDA_MASTER attribute is present. Example output: before this patch: # bridge fdb show 44:38:39:00:27:ba dev bond2.2003 permanent 44:38:39:00:27:bb dev bond4.2003 permanent 44:38:39:00:27:bc dev bond2.2004 permanent After this patch: # bridge fdb show 44:38:39:00:27:ba dev bond2.2003 master br-2003 permanent 44:38:39:00:27:bb dev bond4.2003 master br-2003 permanent 44:38:39:00:27:bc dev bond2.2004 master br-2004 permanent For comparision with the above, below is the output for NTF_SELF today, # bridge fdb show 33:33:00:00:00:01 dev eth0 self permanent 01:00:5e:00:00:01 dev eth0 self permanent 33:33:ff:00:01:cc dev eth0 self permanent If change in output is a concern, 'master' can be put at the end of the fdb output line or made optional with -d[etails] option. change from v1 to v2: use 'bridge' instead of 'master' in fdb show output change from v2 to v3: use 'master' instead of 'bridge' in fdb show output (master could also be a vxlan device) Signed-off-by: Wilson Kok Signed-off-by: Roopa Prabhu --- bridge/fdb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bridge/fdb.c b/bridge/fdb.c index cca99ef3..9a07a327 100644 --- a/bridge/fdb.c +++ b/bridge/fdb.c @@ -147,7 +147,10 @@ int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) } if (r->ndm_flags & NTF_SELF) fprintf(fp, "self "); - if (r->ndm_flags & NTF_MASTER) + if (tb[NDA_MASTER]) + fprintf(fp, "master %s ", + ll_index_to_name(rta_getattr_u32(tb[NDA_MASTER]))); + else if (r->ndm_flags & NTF_MASTER) fprintf(fp, "master "); if (r->ndm_flags & NTF_ROUTER) fprintf(fp, "router ");