From ee713339d381ca0758a5efa0a54b4eded0c1bcda Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Fri, 12 Jul 2019 19:02:14 +0200 Subject: [PATCH 01/26] tunnel: factorize printout of GRE key and flags print_tunnel() functions in ip6tunnel.c and iptunnel.c contains the same code to print out GRE key and flags This commit factorize the code in a helper function in tunnel.c Signed-off-by: Andrea Claudi Signed-off-by: David Ahern --- ip/ip6tunnel.c | 22 ++-------------------- ip/iptunnel.c | 19 ++----------------- ip/tunnel.c | 26 ++++++++++++++++++++++++++ ip/tunnel.h | 3 +++ 4 files changed, 33 insertions(+), 37 deletions(-) diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c index 2e0f099c..d7684a67 100644 --- a/ip/ip6tunnel.c +++ b/ip/ip6tunnel.c @@ -120,26 +120,8 @@ static void print_tunnel(const void *t) if (p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) printf(" allow-localremote"); - if ((p->i_flags & GRE_KEY) && (p->o_flags & GRE_KEY) && - p->o_key == p->i_key) - printf(" key %u", ntohl(p->i_key)); - else { - if (p->i_flags & GRE_KEY) - printf(" ikey %u", ntohl(p->i_key)); - if (p->o_flags & GRE_KEY) - printf(" okey %u", ntohl(p->o_key)); - } - - if (p->proto == IPPROTO_GRE) { - if (p->i_flags & GRE_SEQ) - printf("%s Drop packets out of sequence.", _SL_); - if (p->i_flags & GRE_CSUM) - printf("%s Checksum in received packet is required.", _SL_); - if (p->o_flags & GRE_SEQ) - printf("%s Sequence packets on output.", _SL_); - if (p->o_flags & GRE_CSUM) - printf("%s Checksum output packets.", _SL_); - } + tnl_print_gre_flags(p->proto, p->i_flags, p->o_flags, + p->i_key, p->o_key); } static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) diff --git a/ip/iptunnel.c b/ip/iptunnel.c index 92a5cb92..66929e75 100644 --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -354,23 +354,8 @@ static void print_tunnel(const void *t) } } - if ((p->i_flags & GRE_KEY) && (p->o_flags & GRE_KEY) && p->o_key == p->i_key) - printf(" key %u", ntohl(p->i_key)); - else if ((p->i_flags | p->o_flags) & GRE_KEY) { - if (p->i_flags & GRE_KEY) - printf(" ikey %u", ntohl(p->i_key)); - if (p->o_flags & GRE_KEY) - printf(" okey %u", ntohl(p->o_key)); - } - - if (p->i_flags & GRE_SEQ) - printf("%s Drop packets out of sequence.", _SL_); - if (p->i_flags & GRE_CSUM) - printf("%s Checksum in received packet is required.", _SL_); - if (p->o_flags & GRE_SEQ) - printf("%s Sequence packets on output.", _SL_); - if (p->o_flags & GRE_CSUM) - printf("%s Checksum output packets.", _SL_); + tnl_print_gre_flags(p->iph.protocol, p->i_flags, p->o_flags, + p->i_key, p->o_key); } diff --git a/ip/tunnel.c b/ip/tunnel.c index d0d55f37..41b0ef31 100644 --- a/ip/tunnel.c +++ b/ip/tunnel.c @@ -308,6 +308,32 @@ void tnl_print_endpoint(const char *name, const struct rtattr *rta, int family) } } +void tnl_print_gre_flags(__u8 proto, + __be16 i_flags, __be16 o_flags, + __be32 i_key, __be32 o_key) +{ + if ((i_flags & GRE_KEY) && (o_flags & GRE_KEY) && + o_key == i_key) { + printf(" key %u", ntohl(i_key)); + } else { + if (i_flags & GRE_KEY) + printf(" ikey %u", ntohl(i_key)); + if (o_flags & GRE_KEY) + printf(" okey %u", ntohl(o_key)); + } + + if (proto == IPPROTO_GRE) { + if (i_flags & GRE_SEQ) + printf("%s Drop packets out of sequence.", _SL_); + if (i_flags & GRE_CSUM) + printf("%s Checksum in received packet is required.", _SL_); + if (o_flags & GRE_SEQ) + printf("%s Sequence packets on output.", _SL_); + if (o_flags & GRE_CSUM) + printf("%s Checksum output packets.", _SL_); + } +} + static void tnl_print_stats(const struct rtnl_link_stats64 *s) { printf("%s", _SL_); diff --git a/ip/tunnel.h b/ip/tunnel.h index e530d07c..604f8cbf 100644 --- a/ip/tunnel.h +++ b/ip/tunnel.h @@ -55,5 +55,8 @@ void tnl_print_encap(struct rtattr *tb[], int encap_sport, int encap_dport); void tnl_print_endpoint(const char *name, const struct rtattr *rta, int family); +void tnl_print_gre_flags(__u8 proto, + __be16 i_flags, __be16 o_flags, + __be32 i_key, __be32 o_key); #endif From 18aa9f5583e94abc7204b2376b819ede1180da97 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 11 Jul 2019 11:14:25 +0300 Subject: [PATCH 02/26] tc: add NLA_F_NESTED flag to all actions options nested block Strict netlink validation now requires this flag on all nested attributes, add it for action options. Signed-off-by: Paul Blakey Signed-off-by: David Ahern --- tc/m_action.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tc/m_action.c b/tc/m_action.c index ab6bc0ad..2d36a698 100644 --- a/tc/m_action.c +++ b/tc/m_action.c @@ -214,7 +214,8 @@ done0: tail = addattr_nest(n, MAX_MSG, ++prio); addattr_l(n, MAX_MSG, TCA_ACT_KIND, k, strlen(k) + 1); - ret = a->parse_aopt(a, &argc, &argv, TCA_ACT_OPTIONS, + ret = a->parse_aopt(a, &argc, &argv, + TCA_ACT_OPTIONS | NLA_F_NESTED, n); if (ret < 0) { From f47081befffc50a5eef734d0a6654b59047e7808 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 18 Jul 2019 15:40:07 -0700 Subject: [PATCH 03/26] Import tc_act/tc_ct.h uapi file Import include/uapi/linux/tc_act/tc_ct.h header from commit of last kernel headers sync. Signed-off-by: David Ahern --- include/uapi/linux/tc_act/tc_ct.h | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/uapi/linux/tc_act/tc_ct.h diff --git a/include/uapi/linux/tc_act/tc_ct.h b/include/uapi/linux/tc_act/tc_ct.h new file mode 100644 index 00000000..5fb1d7ac --- /dev/null +++ b/include/uapi/linux/tc_act/tc_ct.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __UAPI_TC_CT_H +#define __UAPI_TC_CT_H + +#include +#include + +enum { + TCA_CT_UNSPEC, + TCA_CT_PARMS, + TCA_CT_TM, + TCA_CT_ACTION, /* u16 */ + TCA_CT_ZONE, /* u16 */ + TCA_CT_MARK, /* u32 */ + TCA_CT_MARK_MASK, /* u32 */ + TCA_CT_LABELS, /* u128 */ + TCA_CT_LABELS_MASK, /* u128 */ + TCA_CT_NAT_IPV4_MIN, /* be32 */ + TCA_CT_NAT_IPV4_MAX, /* be32 */ + TCA_CT_NAT_IPV6_MIN, /* struct in6_addr */ + TCA_CT_NAT_IPV6_MAX, /* struct in6_addr */ + TCA_CT_NAT_PORT_MIN, /* be16 */ + TCA_CT_NAT_PORT_MAX, /* be16 */ + TCA_CT_PAD, + __TCA_CT_MAX +}; + +#define TCA_CT_MAX (__TCA_CT_MAX - 1) + +#define TCA_CT_ACT_COMMIT (1 << 0) +#define TCA_CT_ACT_FORCE (1 << 1) +#define TCA_CT_ACT_CLEAR (1 << 2) +#define TCA_CT_ACT_NAT (1 << 3) +#define TCA_CT_ACT_NAT_SRC (1 << 4) +#define TCA_CT_ACT_NAT_DST (1 << 5) + +struct tc_ct { + tc_gen; +}; + +#endif /* __UAPI_TC_CT_H */ From c8a494314c400eb023d7555933ba8ab40345519b Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 11 Jul 2019 11:14:26 +0300 Subject: [PATCH 04/26] tc: Introduce tc ct action New tc action to send packets to conntrack module, commit them, and set a zone, labels, mark, and nat on the connection. It can also clear the packet's conntrack state by using clear. Usage: ct clear ct commit [force] [zone] [mark] [label] [nat] ct [nat] [zone] Signed-off-by: Paul Blakey Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Yossi Kuperman Acked-by: Jiri Pirko Acked-by: Roi Dayan Signed-off-by: David Ahern --- tc/Makefile | 1 + tc/m_ct.c | 497 +++++++++++++++++++++++++++++++++++++++++++++++++++ tc/tc_util.c | 44 +++++ tc/tc_util.h | 4 + 4 files changed, 546 insertions(+) create mode 100644 tc/m_ct.c diff --git a/tc/Makefile b/tc/Makefile index 09ff3692..14171a28 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -53,6 +53,7 @@ TCMODULES += m_ctinfo.o TCMODULES += m_bpf.o TCMODULES += m_tunnel_key.o TCMODULES += m_sample.o +TCMODULES += m_ct.o TCMODULES += p_ip.o TCMODULES += p_ip6.o TCMODULES += p_icmp.o diff --git a/tc/m_ct.c b/tc/m_ct.c new file mode 100644 index 00000000..8589cb9a --- /dev/null +++ b/tc/m_ct.c @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* - + * m_ct.c Connection tracking action + * + * Authors: Paul Blakey + * Yossi Kuperman + * Marcelo Ricardo Leitner + */ + +#include +#include +#include +#include +#include "utils.h" +#include "tc_util.h" +#include + +static void +usage(void) +{ + fprintf(stderr, + "Usage: ct clear\n" + " ct commit [force] [zone ZONE] [mark MASKED_MARK] [label MASKED_LABEL] [nat NAT_SPEC]\n" + " ct [nat] [zone ZONE]\n" + "Where: ZONE is the conntrack zone table number\n" + " NAT_SPEC is {src|dst} addr addr1[-addr2] [port port1[-port2]]\n" + "\n"); + exit(-1); +} + +static int ct_parse_nat_addr_range(const char *str, struct nlmsghdr *n) +{ + inet_prefix addr = { .family = AF_UNSPEC, }; + char *addr1, *addr2 = 0; + SPRINT_BUF(buffer); + int attr; + int ret; + + strncpy(buffer, str, sizeof(buffer) - 1); + + addr1 = buffer; + addr2 = strchr(addr1, '-'); + if (addr2) { + *addr2 = '\0'; + addr2++; + } + + ret = get_addr(&addr, addr1, AF_UNSPEC); + if (ret) + return ret; + attr = addr.family == AF_INET ? TCA_CT_NAT_IPV4_MIN : + TCA_CT_NAT_IPV6_MIN; + addattr_l(n, MAX_MSG, attr, addr.data, addr.bytelen); + + if (addr2) { + ret = get_addr(&addr, addr2, addr.family); + if (ret) + return ret; + } + attr = addr.family == AF_INET ? TCA_CT_NAT_IPV4_MAX : + TCA_CT_NAT_IPV6_MAX; + addattr_l(n, MAX_MSG, attr, addr.data, addr.bytelen); + + return 0; +} + +static int ct_parse_nat_port_range(const char *str, struct nlmsghdr *n) +{ + char *port1, *port2 = 0; + SPRINT_BUF(buffer); + __be16 port; + int ret; + + strncpy(buffer, str, sizeof(buffer) - 1); + + port1 = buffer; + port2 = strchr(port1, '-'); + if (port2) { + *port2 = '\0'; + port2++; + } + + ret = get_be16(&port, port1, 10); + if (ret) + return -1; + addattr16(n, MAX_MSG, TCA_CT_NAT_PORT_MIN, port); + + if (port2) { + ret = get_be16(&port, port2, 10); + if (ret) + return -1; + } + addattr16(n, MAX_MSG, TCA_CT_NAT_PORT_MAX, port); + + return 0; +} + + +static int ct_parse_u16(char *str, int value_type, int mask_type, + struct nlmsghdr *n) +{ + __u16 value, mask; + char *slash = 0; + + if (mask_type != TCA_CT_UNSPEC) { + slash = strchr(str, '/'); + if (slash) + *slash = '\0'; + } + + if (get_u16(&value, str, 0)) + return -1; + + if (slash) { + if (get_u16(&mask, slash + 1, 0)) + return -1; + } else { + mask = UINT16_MAX; + } + + addattr16(n, MAX_MSG, value_type, value); + if (mask_type != TCA_CT_UNSPEC) + addattr16(n, MAX_MSG, mask_type, mask); + + return 0; +} + +static int ct_parse_u32(char *str, int value_type, int mask_type, + struct nlmsghdr *n) +{ + __u32 value, mask; + char *slash; + + slash = strchr(str, '/'); + if (slash) + *slash = '\0'; + + if (get_u32(&value, str, 0)) + return -1; + + if (slash) { + if (get_u32(&mask, slash + 1, 0)) + return -1; + } else { + mask = UINT32_MAX; + } + + addattr32(n, MAX_MSG, value_type, value); + addattr32(n, MAX_MSG, mask_type, mask); + + return 0; +} + +static int ct_parse_mark(char *str, struct nlmsghdr *n) +{ + return ct_parse_u32(str, TCA_CT_MARK, TCA_CT_MARK_MASK, n); +} + +static int ct_parse_labels(char *str, struct nlmsghdr *n) +{ +#define LABELS_SIZE 16 + uint8_t labels[LABELS_SIZE], lmask[LABELS_SIZE]; + char *slash, *mask = NULL; + size_t slen, slen_mask = 0; + + slash = index(str, '/'); + if (slash) { + *slash = 0; + mask = slash+1; + slen_mask = strlen(mask); + } + + slen = strlen(str); + if (slen > LABELS_SIZE*2 || slen_mask > LABELS_SIZE*2) { + char errmsg[128]; + + snprintf(errmsg, sizeof(errmsg), + "%zd Max allowed size %d", + slen, LABELS_SIZE*2); + invarg(errmsg, str); + } + + if (hex2mem(str, labels, slen/2) < 0) + invarg("ct: labels must be a hex string\n", str); + addattr_l(n, MAX_MSG, TCA_CT_LABELS, labels, slen/2); + + if (mask) { + if (hex2mem(mask, lmask, slen_mask/2) < 0) + invarg("ct: labels mask must be a hex string\n", mask); + } else { + memset(lmask, 0xff, sizeof(lmask)); + slen_mask = sizeof(lmask)*2; + } + addattr_l(n, MAX_MSG, TCA_CT_LABELS_MASK, lmask, slen_mask/2); + + return 0; +} + +static int +parse_ct(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + struct nlmsghdr *n) +{ + struct tc_ct sel = {}; + char **argv = *argv_p; + struct rtattr *tail; + int argc = *argc_p; + int ct_action = 0; + int ret; + + tail = addattr_nest(n, MAX_MSG, tca_id); + + if (argc && matches(*argv, "ct") == 0) + NEXT_ARG_FWD(); + + while (argc > 0) { + if (matches(*argv, "zone") == 0) { + NEXT_ARG(); + + if (ct_parse_u16(*argv, + TCA_CT_ZONE, TCA_CT_UNSPEC, n)) { + fprintf(stderr, "ct: Illegal \"zone\"\n"); + return -1; + } + } else if (matches(*argv, "nat") == 0) { + ct_action |= TCA_CT_ACT_NAT; + + NEXT_ARG(); + if (matches(*argv, "src") == 0) + ct_action |= TCA_CT_ACT_NAT_SRC; + else if (matches(*argv, "dst") == 0) + ct_action |= TCA_CT_ACT_NAT_DST; + else + continue; + + NEXT_ARG(); + if (matches(*argv, "addr") != 0) + usage(); + + NEXT_ARG(); + ret = ct_parse_nat_addr_range(*argv, n); + if (ret) { + fprintf(stderr, "ct: Illegal nat address range\n"); + return -1; + } + + NEXT_ARG_FWD(); + if (matches(*argv, "port") != 0) + continue; + + NEXT_ARG(); + ret = ct_parse_nat_port_range(*argv, n); + if (ret) { + fprintf(stderr, "ct: Illegal nat port range\n"); + return -1; + } + } else if (matches(*argv, "clear") == 0) { + ct_action |= TCA_CT_ACT_CLEAR; + } else if (matches(*argv, "commit") == 0) { + ct_action |= TCA_CT_ACT_COMMIT; + } else if (matches(*argv, "force") == 0) { + ct_action |= TCA_CT_ACT_FORCE; + } else if (matches(*argv, "index") == 0) { + NEXT_ARG(); + if (get_u32(&sel.index, *argv, 10)) { + fprintf(stderr, "ct: Illegal \"index\"\n"); + return -1; + } + } else if (matches(*argv, "mark") == 0) { + NEXT_ARG(); + + ret = ct_parse_mark(*argv, n); + if (ret) { + fprintf(stderr, "ct: Illegal \"mark\"\n"); + return -1; + } + } else if (matches(*argv, "label") == 0) { + NEXT_ARG(); + + ret = ct_parse_labels(*argv, n); + if (ret) { + fprintf(stderr, "ct: Illegal \"label\"\n"); + return -1; + } + } else if (matches(*argv, "help") == 0) { + usage(); + } else { + break; + } + NEXT_ARG_FWD(); + } + + if (ct_action & TCA_CT_ACT_CLEAR && + ct_action & ~TCA_CT_ACT_CLEAR) { + fprintf(stderr, "ct: clear can only be used alone\n"); + return -1; + } + + if (ct_action & TCA_CT_ACT_NAT_SRC && + ct_action & TCA_CT_ACT_NAT_DST) { + fprintf(stderr, "ct: src and dst nat can't be used together\n"); + return -1; + } + + if ((ct_action & TCA_CT_ACT_COMMIT) && + (ct_action & TCA_CT_ACT_NAT) && + !(ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) { + fprintf(stderr, "ct: commit and nat must set src or dst\n"); + return -1; + } + + if (!(ct_action & TCA_CT_ACT_COMMIT) && + (ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) { + fprintf(stderr, "ct: src or dst is only valid if commit is set\n"); + return -1; + } + + parse_action_control_dflt(&argc, &argv, &sel.action, false, + TC_ACT_PIPE); + NEXT_ARG_FWD(); + + addattr16(n, MAX_MSG, TCA_CT_ACTION, ct_action); + addattr_l(n, MAX_MSG, TCA_CT_PARMS, &sel, sizeof(sel)); + addattr_nest_end(n, tail); + + *argc_p = argc; + *argv_p = argv; + return 0; +} + +static int ct_sprint_port(char *buf, const char *prefix, struct rtattr *attr) +{ + if (!attr) + return 0; + + return sprintf(buf, "%s%d", prefix, rta_getattr_be16(attr)); +} + +static int ct_sprint_ip_addr(char *buf, const char *prefix, + struct rtattr *attr) +{ + int family; + size_t len; + + if (!attr) + return 0; + + len = RTA_PAYLOAD(attr); + + if (len == 4) + family = AF_INET; + else if (len == 16) + family = AF_INET6; + else + return 0; + + return sprintf(buf, "%s%s", prefix, rt_addr_n2a_rta(family, attr)); +} + +static void ct_print_nat(int ct_action, struct rtattr **tb) +{ + size_t done = 0; + char out[256] = ""; + bool nat; + + if (!(ct_action & TCA_CT_ACT_NAT)) + return; + + if (ct_action & TCA_CT_ACT_NAT_SRC) { + nat = true; + done += sprintf(out + done, "src"); + } else if (ct_action & TCA_CT_ACT_NAT_DST) { + nat = true; + done += sprintf(out + done, "dst"); + } + + if (nat) { + done += ct_sprint_ip_addr(out + done, " addr ", + tb[TCA_CT_NAT_IPV4_MIN]); + done += ct_sprint_ip_addr(out + done, " addr ", + tb[TCA_CT_NAT_IPV6_MIN]); + if (tb[TCA_CT_NAT_IPV4_MAX] && + memcmp(RTA_DATA(tb[TCA_CT_NAT_IPV4_MIN]), + RTA_DATA(tb[TCA_CT_NAT_IPV4_MAX]), 4)) + done += ct_sprint_ip_addr(out + done, "-", + tb[TCA_CT_NAT_IPV4_MAX]); + else if (tb[TCA_CT_NAT_IPV6_MAX] && + memcmp(RTA_DATA(tb[TCA_CT_NAT_IPV6_MIN]), + RTA_DATA(tb[TCA_CT_NAT_IPV6_MAX]), 16)) + done += ct_sprint_ip_addr(out + done, "-", + tb[TCA_CT_NAT_IPV6_MAX]); + done += ct_sprint_port(out + done, " port ", + tb[TCA_CT_NAT_PORT_MIN]); + if (tb[TCA_CT_NAT_PORT_MAX] && + memcmp(RTA_DATA(tb[TCA_CT_NAT_PORT_MIN]), + RTA_DATA(tb[TCA_CT_NAT_PORT_MAX]), 2)) + done += ct_sprint_port(out + done, "-", + tb[TCA_CT_NAT_PORT_MAX]); + } + + if (done) + print_string(PRINT_ANY, "nat", " nat %s", out); + else + print_string(PRINT_ANY, "nat", " nat", ""); +} + +static void ct_print_labels(struct rtattr *attr, + struct rtattr *mask_attr) +{ + const unsigned char *str; + bool print_mask = false; + char out[256], *p; + int data_len, i; + + if (!attr) + return; + + data_len = RTA_PAYLOAD(attr); + hexstring_n2a(RTA_DATA(attr), data_len, out, sizeof(out)); + p = out + data_len*2; + + data_len = RTA_PAYLOAD(attr); + str = RTA_DATA(mask_attr); + if (data_len != 16) + print_mask = true; + for (i = 0; !print_mask && i < data_len; i++) { + if (str[i] != 0xff) + print_mask = true; + } + if (print_mask) { + *p++ = '/'; + hexstring_n2a(RTA_DATA(mask_attr), data_len, p, + sizeof(out)-(p-out)); + p += data_len*2; + } + *p = '\0'; + + print_string(PRINT_ANY, "label", " label %s", out); +} + +static int print_ct(struct action_util *au, FILE *f, struct rtattr *arg) +{ + struct rtattr *tb[TCA_CT_MAX + 1]; + const char *commit; + struct tc_ct *p; + int ct_action = 0; + + if (arg == NULL) + return -1; + + parse_rtattr_nested(tb, TCA_CT_MAX, arg); + if (tb[TCA_CT_PARMS] == NULL) { + print_string(PRINT_FP, NULL, "%s", "[NULL ct parameters]"); + return -1; + } + + p = RTA_DATA(tb[TCA_CT_PARMS]); + + print_string(PRINT_ANY, "kind", "%s", "ct"); + + if (tb[TCA_CT_ACTION]) + ct_action = rta_getattr_u16(tb[TCA_CT_ACTION]); + if (ct_action & TCA_CT_ACT_COMMIT) { + commit = ct_action & TCA_CT_ACT_FORCE ? + "commit force" : "commit"; + print_string(PRINT_ANY, "action", " %s", commit); + } else if (ct_action & TCA_CT_ACT_CLEAR) { + print_string(PRINT_ANY, "action", " %s", "clear"); + } + + print_masked_u32("mark", tb[TCA_CT_MARK], tb[TCA_CT_MARK_MASK]); + print_masked_u16("zone", tb[TCA_CT_ZONE], NULL); + ct_print_labels(tb[TCA_CT_LABELS], tb[TCA_CT_LABELS_MASK]); + ct_print_nat(ct_action, tb); + + print_action_control(f, " ", p->action, ""); + + print_uint(PRINT_ANY, "index", "\n\t index %u", p->index); + print_int(PRINT_ANY, "ref", " ref %d", p->refcnt); + print_int(PRINT_ANY, "bind", " bind %d", p->bindcnt); + + if (show_stats) { + if (tb[TCA_CT_TM]) { + struct tcf_t *tm = RTA_DATA(tb[TCA_CT_TM]); + + print_tm(f, tm); + } + } + print_string(PRINT_FP, NULL, "%s", "\n "); + + return 0; +} + +struct action_util ct_action_util = { + .id = "ct", + .parse_aopt = parse_ct, + .print_aopt = print_ct, +}; diff --git a/tc/tc_util.c b/tc/tc_util.c index 53d15e08..8e461bac 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -913,3 +913,47 @@ compat_xstats: if (tb[TCA_XSTATS] && xstats) *xstats = tb[TCA_XSTATS]; } + +void print_masked_u32(const char *name, struct rtattr *attr, + struct rtattr *mask_attr) +{ + __u32 value, mask; + SPRINT_BUF(namefrm); + SPRINT_BUF(out); + size_t done; + + if (!attr) + return; + + value = rta_getattr_u32(attr); + mask = mask_attr ? rta_getattr_u32(mask_attr) : UINT32_MAX; + + done = sprintf(out, "%u", value); + if (mask != UINT32_MAX) + sprintf(out + done, "/0x%x", mask); + + sprintf(namefrm, " %s %%s", name); + print_string(PRINT_ANY, name, namefrm, out); +} + +void print_masked_u16(const char *name, struct rtattr *attr, + struct rtattr *mask_attr) +{ + __u16 value, mask; + SPRINT_BUF(namefrm); + SPRINT_BUF(out); + size_t done; + + if (!attr) + return; + + value = rta_getattr_u16(attr); + mask = mask_attr ? rta_getattr_u16(mask_attr) : UINT16_MAX; + + done = sprintf(out, "%u", value); + if (mask != UINT16_MAX) + sprintf(out + done, "/0x%x", mask); + + sprintf(namefrm, " %s %%s", name); + print_string(PRINT_ANY, name, namefrm, out); +} diff --git a/tc/tc_util.h b/tc/tc_util.h index eb4b60db..0c3425ab 100644 --- a/tc/tc_util.h +++ b/tc/tc_util.h @@ -127,4 +127,8 @@ int action_a2n(char *arg, int *result, bool allow_num); bool tc_qdisc_block_exists(__u32 block_index); +void print_masked_u32(const char *name, struct rtattr *attr, + struct rtattr *mask_attr); +void print_masked_u16(const char *name, struct rtattr *attr, + struct rtattr *mask_attr); #endif From 2fffb1c03056e71d49d623f7ca460883fa6110a6 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 11 Jul 2019 11:14:27 +0300 Subject: [PATCH 05/26] tc: flower: Add matching on conntrack info Matches on conntrack state, zone, mark, and label. Signed-off-by: Paul Blakey Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Yossi Kuperman Acked-by: Jiri Pirko Acked-by: Roi Dayan Signed-off-by: David Ahern --- man/man8/tc-flower.8 | 35 ++++++ tc/f_flower.c | 276 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 310 insertions(+), 1 deletion(-) diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8 index adff41e3..04ee1947 100644 --- a/man/man8/tc-flower.8 +++ b/man/man8/tc-flower.8 @@ -289,6 +289,41 @@ bits is assumed. .TQ .BI enc_ttl " NUMBER" .TQ +.BR +.TP +.BI ct_state " CT_STATE" +.TQ +.BI ct_zone " CT_MASKED_ZONE" +.TQ +.BI ct_mark " CT_MASKED_MARK" +.TQ +.BI ct_label " CT_MASKED_LABEL" +Matches on connection tracking info +.RS +.TP +.I CT_STATE +Match the connection state, and can ne combination of [{+|-}flag] flags, where flag can be one of +.RS +.TP +trk - Tracked connection. +.TP +new - New connection. +.TP +est - Established connection. +.TP +Example: +trk+est +.RE +.TP +.I CT_MASKED_ZONE +Match the connection zone, and can be masked. +.TP +.I CT_MASKED_MARK +32bit match on the connection mark, and can be masked. +.TP +.I CT_MASKED_LABEL +128bit match on the connection label, and can be masked. +.RE +.TP .BI geneve_opts " OPTIONS" Match on IP tunnel metadata. Key id .I NUMBER diff --git a/tc/f_flower.c b/tc/f_flower.c index 70d40d3b..a2a23016 100644 --- a/tc/f_flower.c +++ b/tc/f_flower.c @@ -82,9 +82,14 @@ static void explain(void) " enc_ttl MASKED-IP_TTL |\n" " geneve_opts MASKED-OPTIONS |\n" " ip_flags IP-FLAGS | \n" - " enc_dst_port [ port_number ] }\n" + " enc_dst_port [ port_number ] |\n" + " ct_state MASKED_CT_STATE |\n" + " ct_label MASKED_CT_LABEL |\n" + " ct_mark MASKED_CT_MARK |\n" + " ct_zone MASKED_CT_ZONE }\n" " FILTERID := X:Y:Z\n" " MASKED_LLADDR := { LLADDR | LLADDR/MASK | LLADDR/BITS }\n" + " MASKED_CT_STATE := combination of {+|-} and flags trk,est,new\n" " ACTION-SPEC := ... look at individual actions\n" "\n" "NOTE: CLASSID, IP-PROTO are parsed as hexadecimal input.\n" @@ -214,6 +219,159 @@ static int flower_parse_matching_flags(char *str, return 0; } +static int flower_parse_u16(char *str, int value_type, int mask_type, + struct nlmsghdr *n) +{ + __u16 value, mask; + char *slash; + + slash = strchr(str, '/'); + if (slash) + *slash = '\0'; + + if (get_u16(&value, str, 0)) + return -1; + + if (slash) { + if (get_u16(&mask, slash + 1, 0)) + return -1; + } else { + mask = UINT16_MAX; + } + + addattr16(n, MAX_MSG, value_type, value); + addattr16(n, MAX_MSG, mask_type, mask); + + return 0; +} + +static int flower_parse_u32(char *str, int value_type, int mask_type, + struct nlmsghdr *n) +{ + __u32 value, mask; + char *slash; + + slash = strchr(str, '/'); + if (slash) + *slash = '\0'; + + if (get_u32(&value, str, 0)) + return -1; + + if (slash) { + if (get_u32(&mask, slash + 1, 0)) + return -1; + } else { + mask = UINT32_MAX; + } + + addattr32(n, MAX_MSG, value_type, value); + addattr32(n, MAX_MSG, mask_type, mask); + + return 0; +} + +static int flower_parse_ct_mark(char *str, struct nlmsghdr *n) +{ + return flower_parse_u32(str, + TCA_FLOWER_KEY_CT_MARK, + TCA_FLOWER_KEY_CT_MARK_MASK, + n); +} + +static int flower_parse_ct_zone(char *str, struct nlmsghdr *n) +{ + return flower_parse_u16(str, + TCA_FLOWER_KEY_CT_ZONE, + TCA_FLOWER_KEY_CT_ZONE_MASK, + n); +} + +static int flower_parse_ct_labels(char *str, struct nlmsghdr *n) +{ +#define LABELS_SIZE 16 + uint8_t labels[LABELS_SIZE], lmask[LABELS_SIZE]; + char *slash, *mask = NULL; + size_t slen, slen_mask = 0; + + slash = index(str, '/'); + if (slash) { + *slash = 0; + mask = slash + 1; + slen_mask = strlen(mask); + } + + slen = strlen(str); + if (slen > LABELS_SIZE * 2 || slen_mask > LABELS_SIZE * 2) { + char errmsg[128]; + + snprintf(errmsg, sizeof(errmsg), + "%zd Max allowed size %d", + slen, LABELS_SIZE*2); + invarg(errmsg, str); + } + + if (hex2mem(str, labels, slen / 2) < 0) + invarg("labels must be a hex string\n", str); + addattr_l(n, MAX_MSG, TCA_FLOWER_KEY_CT_LABELS, labels, slen / 2); + + if (mask) { + if (hex2mem(mask, lmask, slen_mask / 2) < 0) + invarg("labels mask must be a hex string\n", mask); + } else { + memset(lmask, 0xff, sizeof(lmask)); + slen_mask = sizeof(lmask) * 2; + } + addattr_l(n, MAX_MSG, TCA_FLOWER_KEY_CT_LABELS_MASK, lmask, + slen_mask / 2); + + return 0; +} + +static struct flower_ct_states { + char *str; + int flag; +} flower_ct_states[] = { + { "trk", TCA_FLOWER_KEY_CT_FLAGS_TRACKED }, + { "new", TCA_FLOWER_KEY_CT_FLAGS_NEW }, + { "est", TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED }, +}; + +static int flower_parse_ct_state(char *str, struct nlmsghdr *n) +{ + int flags = 0, mask = 0, len, i; + bool p; + + while (*str != '\0') { + if (*str == '+') + p = true; + else if (*str == '-') + p = false; + else + return -1; + + for (i = 0; i < ARRAY_SIZE(flower_ct_states); i++) { + len = strlen(flower_ct_states[i].str); + if (strncmp(str + 1, flower_ct_states[i].str, len)) + continue; + + if (p) + flags |= flower_ct_states[i].flag; + mask |= flower_ct_states[i].flag; + break; + } + + if (i == ARRAY_SIZE(flower_ct_states)) + return -1; + + str += len + 1; + } + + addattr16(n, MAX_MSG, TCA_FLOWER_KEY_CT_STATE, flags); + addattr16(n, MAX_MSG, TCA_FLOWER_KEY_CT_STATE_MASK, mask); + return 0; +} + static int flower_parse_ip_proto(char *str, __be16 eth_type, int type, __u8 *p_ip_proto, struct nlmsghdr *n) { @@ -898,6 +1056,34 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, flags |= TCA_CLS_FLAGS_SKIP_HW; } else if (matches(*argv, "skip_sw") == 0) { flags |= TCA_CLS_FLAGS_SKIP_SW; + } else if (matches(*argv, "ct_state") == 0) { + NEXT_ARG(); + ret = flower_parse_ct_state(*argv, n); + if (ret < 0) { + fprintf(stderr, "Illegal \"ct_state\"\n"); + return -1; + } + } else if (matches(*argv, "ct_zone") == 0) { + NEXT_ARG(); + ret = flower_parse_ct_zone(*argv, n); + if (ret < 0) { + fprintf(stderr, "Illegal \"ct_zone\"\n"); + return -1; + } + } else if (matches(*argv, "ct_mark") == 0) { + NEXT_ARG(); + ret = flower_parse_ct_mark(*argv, n); + if (ret < 0) { + fprintf(stderr, "Illegal \"ct_mark\"\n"); + return -1; + } + } else if (matches(*argv, "ct_label") == 0) { + NEXT_ARG(); + ret = flower_parse_ct_labels(*argv, n); + if (ret < 0) { + fprintf(stderr, "Illegal \"ct_label\"\n"); + return -1; + } } else if (matches(*argv, "indev") == 0) { NEXT_ARG(); if (check_ifname(*argv)) @@ -1590,6 +1776,85 @@ static void flower_print_tcp_flags(const char *name, struct rtattr *flags_attr, print_string(PRINT_ANY, name, namefrm, out); } +static void flower_print_ct_state(struct rtattr *flags_attr, + struct rtattr *mask_attr) +{ + SPRINT_BUF(out); + uint16_t state; + uint16_t state_mask; + size_t done = 0; + int i; + + if (!flags_attr) + return; + + state = rta_getattr_u16(flags_attr); + if (mask_attr) + state_mask = rta_getattr_u16(mask_attr); + else + state_mask = UINT16_MAX; + + for (i = 0; i < ARRAY_SIZE(flower_ct_states); i++) { + if (!(state_mask & flower_ct_states[i].flag)) + continue; + + if (state & flower_ct_states[i].flag) + done += sprintf(out + done, "+%s", + flower_ct_states[i].str); + else + done += sprintf(out + done, "-%s", + flower_ct_states[i].str); + } + + print_string(PRINT_ANY, "ct_state", "\n ct_state %s", out); +} + +static void flower_print_ct_label(struct rtattr *attr, + struct rtattr *mask_attr) +{ + const unsigned char *str; + bool print_mask = false; + int data_len, i; + SPRINT_BUF(out); + char *p; + + if (!attr) + return; + + data_len = RTA_PAYLOAD(attr); + hexstring_n2a(RTA_DATA(attr), data_len, out, sizeof(out)); + p = out + data_len*2; + + data_len = RTA_PAYLOAD(attr); + str = RTA_DATA(mask_attr); + if (data_len != 16) + print_mask = true; + for (i = 0; !print_mask && i < data_len; i++) { + if (str[i] != 0xff) + print_mask = true; + } + if (print_mask) { + *p++ = '/'; + hexstring_n2a(RTA_DATA(mask_attr), data_len, p, + sizeof(out)-(p-out)); + p += data_len*2; + } + *p = '\0'; + + print_string(PRINT_ANY, "ct_label", "\n ct_label %s", out); +} + +static void flower_print_ct_zone(struct rtattr *attr, + struct rtattr *mask_attr) +{ + print_masked_u16("ct_zone", attr, mask_attr); +} + +static void flower_print_ct_mark(struct rtattr *attr, + struct rtattr *mask_attr) +{ + print_masked_u32("ct_mark", attr, mask_attr); +} static void flower_print_key_id(const char *name, struct rtattr *attr) { @@ -1949,6 +2214,15 @@ static int flower_print_opt(struct filter_util *qu, FILE *f, tb[TCA_FLOWER_KEY_FLAGS], tb[TCA_FLOWER_KEY_FLAGS_MASK]); + flower_print_ct_state(tb[TCA_FLOWER_KEY_CT_STATE], + tb[TCA_FLOWER_KEY_CT_STATE_MASK]); + flower_print_ct_zone(tb[TCA_FLOWER_KEY_CT_ZONE], + tb[TCA_FLOWER_KEY_CT_ZONE_MASK]); + flower_print_ct_mark(tb[TCA_FLOWER_KEY_CT_MARK], + tb[TCA_FLOWER_KEY_CT_MARK_MASK]); + flower_print_ct_label(tb[TCA_FLOWER_KEY_CT_LABELS], + tb[TCA_FLOWER_KEY_CT_LABELS_MASK]); + close_json_object(); if (tb[TCA_FLOWER_FLAGS]) { From d9114263d00dac608e87aa27761574202bd009c2 Mon Sep 17 00:00:00 2001 From: Vedang Patel Date: Thu, 18 Jul 2019 12:55:39 -0700 Subject: [PATCH 06/26] etf: Add skip_sock_check ETF Qdisc currently checks for a socket with SO_TXTIME socket option. If either is not present, the packet is dropped. In the future commits, we want other Qdiscs to add packet with launchtime to the ETF Qdisc. Also, there are some packets (e.g. ICMP packets) which may not have a socket associated with them. So, add an option to skip this check. Signed-off-by: Vedang Patel Signed-off-by: David Ahern --- tc/q_etf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tc/q_etf.c b/tc/q_etf.c index 76aca476..c2090589 100644 --- a/tc/q_etf.c +++ b/tc/q_etf.c @@ -130,6 +130,13 @@ static int etf_parse_opt(struct qdisc_util *qu, int argc, explain_clockid(*argv); return -1; } + } else if (strcmp(*argv, "skip_sock_check") == 0) { + if (opt.flags & TC_ETF_SKIP_SOCK_CHECK) { + fprintf(stderr, "etf: duplicate \"skip_sock_check\" specification\n"); + return -1; + } + + opt.flags |= TC_ETF_SKIP_SOCK_CHECK; } else if (strcmp(*argv, "help") == 0) { explain(); return -1; @@ -171,8 +178,10 @@ static int etf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) print_uint(PRINT_ANY, "delta", "delta %d ", qopt->delta); print_string(PRINT_ANY, "offload", "offload %s ", (qopt->flags & TC_ETF_OFFLOAD_ON) ? "on" : "off"); - print_string(PRINT_ANY, "deadline_mode", "deadline_mode %s", + print_string(PRINT_ANY, "deadline_mode", "deadline_mode %s ", (qopt->flags & TC_ETF_DEADLINE_MODE_ON) ? "on" : "off"); + print_string(PRINT_ANY, "skip_sock_check", "skip_sock_check %s", + (qopt->flags & TC_ETF_SKIP_SOCK_CHECK) ? "on" : "off"); return 0; } From ee000bf217870b6425849c03b309faa64539ff24 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 18 Jul 2019 12:55:40 -0700 Subject: [PATCH 07/26] taprio: Add support for setting flags This allows a new parameter, flags, to be passed to taprio. Currently, it only supports enabling the txtime-assist mode. But, we plan to add different modes for taprio (e.g. hardware offloading) and this parameter will be useful in enabling those modes. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Vedang Patel Signed-off-by: David Ahern --- tc/q_taprio.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tc/q_taprio.c b/tc/q_taprio.c index 62c8c591..1db2aba6 100644 --- a/tc/q_taprio.c +++ b/tc/q_taprio.c @@ -159,6 +159,7 @@ static int taprio_parse_opt(struct qdisc_util *qu, int argc, __s64 cycle_time_extension = 0; struct list_head sched_entries; struct rtattr *tail, *l; + __u32 taprio_flags = 0; __s64 cycle_time = 0; __s64 base_time = 0; int err, idx; @@ -281,6 +282,17 @@ static int taprio_parse_opt(struct qdisc_util *qu, int argc, explain_clockid(*argv); return -1; } + } else if (strcmp(*argv, "flags") == 0) { + NEXT_ARG(); + if (taprio_flags) { + fprintf(stderr, "taprio: duplicate \"flags\" specification\n"); + return -1; + } + if (get_u32(&taprio_flags, *argv, 0)) { + PREV_ARG(); + return -1; + } + } else if (strcmp(*argv, "help") == 0) { explain(); return -1; @@ -297,6 +309,9 @@ static int taprio_parse_opt(struct qdisc_util *qu, int argc, if (clockid != CLOCKID_INVALID) addattr_l(n, 1024, TCA_TAPRIO_ATTR_SCHED_CLOCKID, &clockid, sizeof(clockid)); + if (taprio_flags) + addattr_l(n, 1024, TCA_TAPRIO_ATTR_FLAGS, &taprio_flags, sizeof(taprio_flags)); + if (opt.num_tc > 0) addattr_l(n, 1024, TCA_TAPRIO_ATTR_PRIOMAP, &opt, sizeof(opt)); @@ -442,6 +457,13 @@ static int taprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) print_string(PRINT_ANY, "clockid", "clockid %s", get_clock_name(clockid)); + if (tb[TCA_TAPRIO_ATTR_FLAGS]) { + __u32 flags; + + flags = rta_getattr_u32(tb[TCA_TAPRIO_ATTR_FLAGS]); + print_0xhex(PRINT_ANY, "flags", " flags %#x", flags); + } + print_schedule(f, tb); if (tb[TCA_TAPRIO_ATTR_ADMIN_SCHED]) { From a5e6ee3b34226f76c8be4b1e3e3ad82212ea4d50 Mon Sep 17 00:00:00 2001 From: Vedang Patel Date: Thu, 18 Jul 2019 12:55:41 -0700 Subject: [PATCH 08/26] taprio: add support for setting txtime_delay. This adds support for setting the txtime_delay parameter which is useful for the txtime offload mode of taprio. Signed-off-by: Vedang Patel Signed-off-by: David Ahern --- tc/q_taprio.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tc/q_taprio.c b/tc/q_taprio.c index 1db2aba6..b9954436 100644 --- a/tc/q_taprio.c +++ b/tc/q_taprio.c @@ -52,7 +52,7 @@ static void explain(void) " [num_tc NUMBER] [map P0 P1 ...] " " [queues COUNT@OFFSET COUNT@OFFSET COUNT@OFFSET ...] " " [ [sched-entry index cmd gate-mask interval] ... ] " - " [base-time time] " + " [base-time time] [txtime-delay delay]" "\n" "CLOCKID must be a valid SYS-V id (i.e. CLOCK_TAI)\n"); } @@ -160,6 +160,7 @@ static int taprio_parse_opt(struct qdisc_util *qu, int argc, struct list_head sched_entries; struct rtattr *tail, *l; __u32 taprio_flags = 0; + __u32 txtime_delay = 0; __s64 cycle_time = 0; __s64 base_time = 0; int err, idx; @@ -293,6 +294,17 @@ static int taprio_parse_opt(struct qdisc_util *qu, int argc, return -1; } + } else if (strcmp(*argv, "txtime-delay") == 0) { + NEXT_ARG(); + if (txtime_delay != 0) { + fprintf(stderr, "taprio: duplicate \"txtime-delay\" specification\n"); + return -1; + } + if (get_u32(&txtime_delay, *argv, 0)) { + PREV_ARG(); + return -1; + } + } else if (strcmp(*argv, "help") == 0) { explain(); return -1; @@ -315,6 +327,9 @@ static int taprio_parse_opt(struct qdisc_util *qu, int argc, if (opt.num_tc > 0) addattr_l(n, 1024, TCA_TAPRIO_ATTR_PRIOMAP, &opt, sizeof(opt)); + if (txtime_delay) + addattr_l(n, 1024, TCA_TAPRIO_ATTR_TXTIME_DELAY, &txtime_delay, sizeof(txtime_delay)); + if (base_time) addattr_l(n, 1024, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, &base_time, sizeof(base_time)); @@ -464,6 +479,13 @@ static int taprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) print_0xhex(PRINT_ANY, "flags", " flags %#x", flags); } + if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) { + __u32 txtime_delay; + + txtime_delay = rta_getattr_s32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]); + print_uint(PRINT_ANY, "txtime_delay", " txtime delay %d", txtime_delay); + } + print_schedule(f, tb); if (tb[TCA_TAPRIO_ATTR_ADMIN_SCHED]) { From 1738a16de965f13961407ac108cb174f296f0c70 Mon Sep 17 00:00:00 2001 From: Vedang Patel Date: Thu, 18 Jul 2019 12:55:42 -0700 Subject: [PATCH 09/26] tc: etf: Add documentation for skip_sock_check. Document the newly added option (skip_sock_check) on the etf man-page. Signed-off-by: Vedang Patel Signed-off-by: David Ahern --- man/man8/tc-etf.8 | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/man/man8/tc-etf.8 b/man/man8/tc-etf.8 index 30a12de7..4cb3b9e0 100644 --- a/man/man8/tc-etf.8 +++ b/man/man8/tc-etf.8 @@ -106,6 +106,16 @@ referred to as "Launch Time" or "Time-Based Scheduling" by the documentation of network interface controllers. The default is for this option to be disabled. +.TP +skip_sock_check +.br +.BR etf(8) +currently drops any packet which does not have a socket associated with it or +if the socket does not have SO_TXTIME socket option set. But, this will not +work if the launchtime is set by another entity inside the kernel (e.g. some +other Qdisc). Setting the skip_sock_check will skip checking for a socket +associated with the packet. + .SH EXAMPLES ETF is used to enforce a Quality of Service. It controls when each From a794d0523711d5ab4530483b9435ba627e07d28b Mon Sep 17 00:00:00 2001 From: Vedang Patel Date: Thu, 18 Jul 2019 12:55:43 -0700 Subject: [PATCH 10/26] tc: taprio: Update documentation Add documentation for the latest options, flags and txtime-delay, to the taprio manpage. This also adds an example to run tc in txtime offload mode. Signed-off-by: Vedang Patel Signed-off-by: David Ahern --- man/man8/tc-taprio.8 | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/man/man8/tc-taprio.8 b/man/man8/tc-taprio.8 index 850be9b0..e1d19ba1 100644 --- a/man/man8/tc-taprio.8 +++ b/man/man8/tc-taprio.8 @@ -112,6 +112,26 @@ means that traffic class 0 is "active" for that schedule entry. long that state defined by and should be held before moving to the next entry. +.TP +flags +.br +Specifies different modes for taprio. Currently, only txtime-assist is +supported which can be enabled by setting it to 0x1. In this mode, taprio will +set the transmit timestamp depending on the interval in which the packet needs +to be transmitted. It will then utililize the +.BR etf(8) +qdisc to sort and transmit the packets at the right time. The second example +can be used as a reference to configure this mode. + +.TP +txtime-delay +.br +This parameter is specific to the txtime offload mode. It specifies the maximum +time a packet might take to reach the network card from the taprio qdisc. The +value should always be greater than the delta specified in the +.BR etf(8) +qdisc. + .SH EXAMPLES The following example shows how an traffic schedule with three traffic @@ -137,6 +157,26 @@ reference CLOCK_TAI. The schedule is composed of three entries each of clockid CLOCK_TAI .EE +Following is an example to enable the txtime offload mode in taprio. See +.BR etf(8) +for more information about configuring the ETF qdisc. + +.EX +# tc qdisc replace dev eth0 parent root handle 100 taprio \\ + num_tc 3 \\ + map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \\ + queues 1@0 1@0 1@0 \\ + base-time 1528743495910289987 \\ + sched-entry S 01 300000 \\ + sched-entry S 02 300000 \\ + sched-entry S 04 400000 \\ + flags 0x1 \\ + txtime-delay 200000 \\ + clockid CLOCK_TAI + +# tc qdisc replace dev $IFACE parent 100:1 etf skip_skb_check \\ + offload delta 200000 clockid CLOCK_TAI +.EE .SH AUTHORS Vinicius Costa Gomes From 39307384cea7be7f50036f18ab3e9925026399fd Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 4 Aug 2019 11:07:56 +0300 Subject: [PATCH 11/26] rdma: Add driver QP type string RDMA resource tracker now tracks driver QPs as well, add driver QP type string to qp_types_to_str function. Signed-off-by: Gal Pressman Signed-off-by: David Ahern --- rdma/res.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rdma/res.c b/rdma/res.c index ef863f14..97a7b964 100644 --- a/rdma/res.c +++ b/rdma/res.c @@ -148,9 +148,11 @@ const char *qp_types_to_str(uint8_t idx) "UC", "UD", "RAW_IPV6", "RAW_ETHERTYPE", "UNKNOWN", "RAW_PACKET", - "XRC_INI", "XRC_TGT" }; + "XRC_INI", "XRC_TGT", + [0xFF] = "DRIVER", + }; - if (idx < ARRAY_SIZE(qp_types_str)) + if (idx < ARRAY_SIZE(qp_types_str) && qp_types_str[idx]) return qp_types_str[idx]; return "UNKNOWN"; } From a8360dd3f25fc8fe4730ab39a8d7b359f397149f Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Fri, 2 Aug 2019 19:38:10 +0200 Subject: [PATCH 12/26] ip tunnel: add json output Add json support on iptunnel and ip6tunnel. The plain text output format should remain the same. Signed-off-by: Andrea Claudi Signed-off-by: David Ahern --- ip/ip6tunnel.c | 66 ++++++++++++++++++++++++--------------- ip/iptunnel.c | 84 ++++++++++++++++++++++++++++++++------------------ ip/tunnel.c | 37 +++++++++++++++++----- 3 files changed, 125 insertions(+), 62 deletions(-) diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c index b9b3dbfb..5399f91d 100644 --- a/ip/ip6tunnel.c +++ b/ip/ip6tunnel.c @@ -71,57 +71,76 @@ static void usage(void) static void print_tunnel(const void *t) { const struct ip6_tnl_parm2 *p = t; - char s1[1024]; - char s2[1024]; + SPRINT_BUF(b1); /* Do not use format_host() for local addr, * symbolic name will not be useful. */ - printf("%s: %s/ipv6 remote %s local %s", - p->name, - tnl_strproto(p->proto), - format_host_r(AF_INET6, 16, &p->raddr, s1, sizeof(s1)), - rt_addr_n2a_r(AF_INET6, 16, &p->laddr, s2, sizeof(s2))); + open_json_object(NULL); + print_color_string(PRINT_ANY, COLOR_IFNAME, "ifname", "%s: ", p->name); + snprintf(b1, sizeof(b1), "%s/ipv6", tnl_strproto(p->proto)); + print_string(PRINT_ANY, "mode", "%s ", b1); + print_string(PRINT_FP, NULL, "%s", "remote "); + print_color_string(PRINT_ANY, COLOR_INET6, "remote", "%s ", + format_host_r(AF_INET6, 16, &p->raddr, b1, sizeof(b1))); + print_string(PRINT_FP, NULL, "%s", "local "); + print_color_string(PRINT_ANY, COLOR_INET6, "local", "%s", + rt_addr_n2a_r(AF_INET6, 16, &p->laddr, b1, sizeof(b1))); + if (p->link) { const char *n = ll_index_to_name(p->link); if (n) - printf(" dev %s", n); + print_string(PRINT_ANY, "link", " dev %s", n); } if (p->flags & IP6_TNL_F_IGN_ENCAP_LIMIT) - printf(" encaplimit none"); + print_null(PRINT_ANY, "ip6_tnl_f_ign_encap_limit", + " encaplimit none", NULL); else - printf(" encaplimit %u", p->encap_limit); + print_uint(PRINT_ANY, "encap_limit", " encaplimit %u", + p->encap_limit); if (p->hop_limit) - printf(" hoplimit %u", p->hop_limit); + print_uint(PRINT_ANY, "hoplimit", " hoplimit %u", p->hop_limit); else - printf(" hoplimit inherit"); + print_string(PRINT_FP, "hoplimit", " hoplimit %s", "inherit"); - if (p->flags & IP6_TNL_F_USE_ORIG_TCLASS) - printf(" tclass inherit"); - else { + if (p->flags & IP6_TNL_F_USE_ORIG_TCLASS) { + print_null(PRINT_ANY, "ip6_tnl_f_use_orig_tclass", + " tclass inherit", NULL); + } else { __u32 val = ntohl(p->flowinfo & IP6_FLOWINFO_TCLASS); - printf(" tclass 0x%02x", (__u8)(val >> 20)); + snprintf(b1, sizeof(b1), "0x%02x", (__u8)(val >> 20)); + print_string(PRINT_ANY, "tclass", " tclass %s", b1); } - if (p->flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - printf(" flowlabel inherit"); - else - printf(" flowlabel 0x%05x", ntohl(p->flowinfo & IP6_FLOWINFO_FLOWLABEL)); + if (p->flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) { + print_null(PRINT_ANY, "ip6_tnl_f_use_orig_flowlabel", + " flowlabel inherit", NULL); + } else { + __u32 val = ntohl(p->flowinfo & IP6_FLOWINFO_FLOWLABEL); - printf(" (flowinfo 0x%08x)", ntohl(p->flowinfo)); + snprintf(b1, sizeof(b1), "0x%05x", val); + print_string(PRINT_ANY, "flowlabel", " flowlabel %s", b1); + } + + snprintf(b1, sizeof(b1), "0x%08x", ntohl(p->flowinfo)); + print_string(PRINT_ANY, "flowinfo", " (flowinfo %s)", b1); if (p->flags & IP6_TNL_F_RCV_DSCP_COPY) - printf(" dscp inherit"); + print_null(PRINT_ANY, "ip6_tnl_f_rcv_dscp_copy", + " dscp inherit", NULL); if (p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) - printf(" allow-localremote"); + print_null(PRINT_ANY, "ip6_tnl_f_allow_local_remote", + " allow-localremote", NULL); tnl_print_gre_flags(p->proto, p->i_flags, p->o_flags, p->i_key, p->o_key); + + close_json_object(); } static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) @@ -355,7 +374,6 @@ static int do_show(int argc, char **argv) return -1; print_tunnel(&p); - fputc('\n', stdout); return 0; } diff --git a/ip/iptunnel.c b/ip/iptunnel.c index 66929e75..696f3b92 100644 --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -289,17 +289,25 @@ static void print_tunnel(const void *t) { const struct ip_tunnel_parm *p = t; struct ip_tunnel_6rd ip6rd = {}; - char s1[1024]; - char s2[1024]; + SPRINT_BUF(b1); /* Do not use format_host() for local addr, * symbolic name will not be useful. */ - printf("%s: %s/ip remote %s local %s", - p->name, - tnl_strproto(p->iph.protocol), - p->iph.daddr ? format_host_r(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any", - p->iph.saddr ? rt_addr_n2a_r(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any"); + open_json_object(NULL); + print_color_string(PRINT_ANY, COLOR_IFNAME, "ifname", "%s: ", p->name); + snprintf(b1, sizeof(b1), "%s/ip", tnl_strproto(p->iph.protocol)); + print_string(PRINT_ANY, "mode", "%s ", b1); + print_null(PRINT_FP, NULL, "remote ", NULL); + print_color_string(PRINT_ANY, COLOR_INET, "remote", "%s ", + p->iph.daddr || is_json_context() + ? format_host_r(AF_INET, 4, &p->iph.daddr, b1, sizeof(b1)) + : "any"); + print_null(PRINT_FP, NULL, "local ", NULL); + print_color_string(PRINT_ANY, COLOR_INET, "local", "%s", + p->iph.saddr || is_json_context() + ? rt_addr_n2a_r(AF_INET, 4, &p->iph.saddr, b1, sizeof(b1)) + : "any"); if (p->iph.protocol == IPPROTO_IPV6 && (p->i_flags & SIT_ISATAP)) { struct ip_tunnel_prl prl[16] = {}; @@ -308,54 +316,70 @@ static void print_tunnel(const void *t) prl[0].datalen = sizeof(prl) - sizeof(prl[0]); prl[0].addr = htonl(INADDR_ANY); - if (!tnl_prl_ioctl(SIOCGETPRL, p->name, prl)) + if (!tnl_prl_ioctl(SIOCGETPRL, p->name, prl)) { for (i = 1; i < ARRAY_SIZE(prl); i++) { - if (prl[i].addr != htonl(INADDR_ANY)) { - printf(" %s %s ", - (prl[i].flags & PRL_DEFAULT) ? "pdr" : "pr", - format_host(AF_INET, 4, &prl[i].addr)); - } + if (prl[i].addr == htonl(INADDR_ANY)) + continue; + if (prl[i].flags & PRL_DEFAULT) + print_string(PRINT_ANY, "pdr", + " pdr %s", + format_host(AF_INET, 4, &prl[i].addr)); + else + print_string(PRINT_ANY, "pr", " pr %s", + format_host(AF_INET, 4, &prl[i].addr)); } + } } if (p->link) { const char *n = ll_index_to_name(p->link); if (n) - printf(" dev %s", n); + print_string(PRINT_ANY, "dev", " dev %s", n); } if (p->iph.ttl) - printf(" ttl %u", p->iph.ttl); + print_uint(PRINT_ANY, "ttl", " ttl %u", p->iph.ttl); else - printf(" ttl inherit"); + print_string(PRINT_FP, "ttl", " ttl %s", "inherit"); if (p->iph.tos) { - SPRINT_BUF(b1); - printf(" tos"); - if (p->iph.tos & 1) - printf(" inherit"); - if (p->iph.tos & ~1) - printf("%c%s ", p->iph.tos & 1 ? '/' : ' ', - rtnl_dsfield_n2a(p->iph.tos & ~1, b1, sizeof(b1))); + SPRINT_BUF(b2); + + if (p->iph.tos != 1) { + if (!is_json_context() && p->iph.tos & 1) + snprintf(b2, sizeof(b2), "%s%s", + p->iph.tos & 1 ? "inherit/" : "", + rtnl_dsfield_n2a(p->iph.tos & ~1, b1, sizeof(b1))); + else + snprintf(b2, sizeof(b2), "%s", + rtnl_dsfield_n2a(p->iph.tos, b1, sizeof(b1))); + print_string(PRINT_ANY, "tos", " tos %s", b2); + } else { + print_string(PRINT_FP, NULL, " tos %s", "inherit"); + } } if (!(p->iph.frag_off & htons(IP_DF))) - printf(" nopmtudisc"); + print_null(PRINT_ANY, "nopmtudisc", " nopmtudisc", NULL); if (p->iph.protocol == IPPROTO_IPV6 && !tnl_ioctl_get_6rd(p->name, &ip6rd) && ip6rd.prefixlen) { - printf(" 6rd-prefix %s/%u", - inet_ntop(AF_INET6, &ip6rd.prefix, s1, sizeof(s1)), - ip6rd.prefixlen); + print_string(PRINT_ANY, "6rd-prefix", " 6rd-prefix %s", + inet_ntop(AF_INET6, &ip6rd.prefix, b1, sizeof(b1))); + print_uint(PRINT_ANY, "6rd-prefixlen", "/%u", ip6rd.prefixlen); if (ip6rd.relay_prefix) { - printf(" 6rd-relay_prefix %s/%u", - format_host(AF_INET, 4, &ip6rd.relay_prefix), - ip6rd.relay_prefixlen); + print_string(PRINT_ANY, "6rd-relay_prefix", + " 6rd-relay_prefix %s", + format_host(AF_INET, 4, &ip6rd.relay_prefix)); + print_uint(PRINT_ANY, "6rd-relay_prefixlen", "/%u", + ip6rd.relay_prefixlen); } } tnl_print_gre_flags(p->iph.protocol, p->i_flags, p->o_flags, p->i_key, p->o_key); + + close_json_object(); } diff --git a/ip/tunnel.c b/ip/tunnel.c index 41b0ef31..88585cf3 100644 --- a/ip/tunnel.c +++ b/ip/tunnel.c @@ -314,24 +314,43 @@ void tnl_print_gre_flags(__u8 proto, { if ((i_flags & GRE_KEY) && (o_flags & GRE_KEY) && o_key == i_key) { - printf(" key %u", ntohl(i_key)); + print_uint(PRINT_ANY, "key", " key %u", ntohl(i_key)); } else { if (i_flags & GRE_KEY) - printf(" ikey %u", ntohl(i_key)); + print_uint(PRINT_ANY, "ikey", " ikey %u", ntohl(i_key)); if (o_flags & GRE_KEY) - printf(" okey %u", ntohl(o_key)); + print_uint(PRINT_ANY, "okey", " okey %u", ntohl(o_key)); } - if (proto == IPPROTO_GRE) { - if (i_flags & GRE_SEQ) + if (proto != IPPROTO_GRE) + return; + + open_json_array(PRINT_JSON, "flags"); + if (i_flags & GRE_SEQ) { + if (is_json_context()) + print_string(PRINT_JSON, NULL, "%s", "rx_drop_ooseq"); + else printf("%s Drop packets out of sequence.", _SL_); - if (i_flags & GRE_CSUM) + } + if (i_flags & GRE_CSUM) { + if (is_json_context()) + print_string(PRINT_JSON, NULL, "%s", "rx_csum"); + else printf("%s Checksum in received packet is required.", _SL_); - if (o_flags & GRE_SEQ) + } + if (o_flags & GRE_SEQ) { + if (is_json_context()) + print_string(PRINT_JSON, NULL, "%s", "tx_seq"); + else printf("%s Sequence packets on output.", _SL_); - if (o_flags & GRE_CSUM) + } + if (o_flags & GRE_CSUM) { + if (is_json_context()) + print_string(PRINT_JSON, NULL, "%s", "tx_csum"); + else printf("%s Checksum output packets.", _SL_); } + close_json_array(PRINT_JSON, NULL); } static void tnl_print_stats(const struct rtnl_link_stats64 *s) @@ -417,6 +436,7 @@ static int print_nlmsg_tunnel(struct nlmsghdr *n, void *arg) int do_tunnels_list(struct tnl_print_nlmsg_info *info) { + new_json_obj(json); if (rtnl_linkdump_req(&rth, preferred_family) < 0) { perror("Cannot send dump request\n"); return -1; @@ -426,6 +446,7 @@ int do_tunnels_list(struct tnl_print_nlmsg_info *info) fprintf(stderr, "Dump terminated\n"); return -1; } + delete_json_obj(); return 0; } From e3af717a8d410c97d9e0b985219ab8fc9ff18b79 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 18 Aug 2019 11:48:02 -0700 Subject: [PATCH 13/26] Update kernel headers Update kernel headers to commit: d83d508b74c4 ("Merge branch 'stmmac-next'") Signed-off-by: David Ahern --- include/uapi/linux/bpf.h | 37 ++++++++++++++++++- include/uapi/linux/can/netlink.h | 6 ++-- include/uapi/linux/devlink.h | 62 ++++++++++++++++++++++++++++++++ include/uapi/linux/if_bridge.h | 1 + 4 files changed, 102 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e75f97cf..1e462a53 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -134,6 +134,7 @@ enum bpf_map_type { BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_DEVMAP_HASH, }; /* Note that tracing related programs such as @@ -2713,6 +2714,33 @@ union bpf_attr { * **-EPERM** if no permission to send the *sig*. * * **-EAGAIN** if bpf program can try again. + * + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header. + * + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** SYN cookie cannot be issued due to error + * + * **-ENOENT** SYN cookie should not be issued (no SYN flood) + * + * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies + * + * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2824,7 +2852,8 @@ union bpf_attr { FN(strtoul), \ FN(sk_storage_get), \ FN(sk_storage_delete), \ - FN(send_signal), + FN(send_signal), \ + FN(tcp_gen_syncookie), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3507,6 +3536,10 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; +#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) +#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) +#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) + struct bpf_flow_keys { __u16 nhoff; __u16 thoff; @@ -3528,6 +3561,8 @@ struct bpf_flow_keys { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; }; + __u32 flags; + __be32 flow_label; }; struct bpf_func_info { diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index f0c5e58b..c1f62640 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -40,15 +40,15 @@ struct can_bittiming { }; /* - * CAN harware-dependent bit-timing constant + * CAN hardware-dependent bit-timing constant * * Used for calculating and checking bit-timing parameters */ struct can_bittiming_const { char name[16]; /* Name of the CAN controller hardware */ - __u32 tseg1_min; /* Time segement 1 = prop_seg + phase_seg1 */ + __u32 tseg1_min; /* Time segment 1 = prop_seg + phase_seg1 */ __u32 tseg1_max; - __u32 tseg2_min; /* Time segement 2 = phase_seg2 */ + __u32 tseg2_min; /* Time segment 2 = phase_seg2 */ __u32 tseg2_max; __u32 sjw_max; /* Synchronisation jump width */ __u32 brp_min; /* Bit-rate prescaler */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index fc195cbd..3fb683be 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -107,6 +107,16 @@ enum devlink_command { DEVLINK_CMD_FLASH_UPDATE_END, /* notification only */ DEVLINK_CMD_FLASH_UPDATE_STATUS, /* notification only */ + DEVLINK_CMD_TRAP_GET, /* can dump */ + DEVLINK_CMD_TRAP_SET, + DEVLINK_CMD_TRAP_NEW, + DEVLINK_CMD_TRAP_DEL, + + DEVLINK_CMD_TRAP_GROUP_GET, /* can dump */ + DEVLINK_CMD_TRAP_GROUP_SET, + DEVLINK_CMD_TRAP_GROUP_NEW, + DEVLINK_CMD_TRAP_GROUP_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -194,6 +204,47 @@ enum devlink_param_fw_load_policy_value { DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH, }; +enum { + DEVLINK_ATTR_STATS_RX_PACKETS, /* u64 */ + DEVLINK_ATTR_STATS_RX_BYTES, /* u64 */ + + __DEVLINK_ATTR_STATS_MAX, + DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1 +}; + +/** + * enum devlink_trap_action - Packet trap action. + * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not + * sent to the CPU. + * @DEVLINK_TRAP_ACTION_TRAP: The sole copy of the packet is sent to the CPU. + */ +enum devlink_trap_action { + DEVLINK_TRAP_ACTION_DROP, + DEVLINK_TRAP_ACTION_TRAP, +}; + +/** + * enum devlink_trap_type - Packet trap type. + * @DEVLINK_TRAP_TYPE_DROP: Trap reason is a drop. Trapped packets are only + * processed by devlink and not injected to the + * kernel's Rx path. + * @DEVLINK_TRAP_TYPE_EXCEPTION: Trap reason is an exception. Packet was not + * forwarded as intended due to an exception + * (e.g., missing neighbour entry) and trapped to + * control plane for resolution. Trapped packets + * are processed by devlink and injected to + * the kernel's Rx path. + */ +enum devlink_trap_type { + DEVLINK_TRAP_TYPE_DROP, + DEVLINK_TRAP_TYPE_EXCEPTION, +}; + +enum { + /* Trap can report input port as metadata */ + DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -348,6 +399,17 @@ enum devlink_attr { DEVLINK_ATTR_PORT_PCI_PF_NUMBER, /* u16 */ DEVLINK_ATTR_PORT_PCI_VF_NUMBER, /* u16 */ + DEVLINK_ATTR_STATS, /* nested */ + + DEVLINK_ATTR_TRAP_NAME, /* string */ + /* enum devlink_trap_action */ + DEVLINK_ATTR_TRAP_ACTION, /* u8 */ + /* enum devlink_trap_type */ + DEVLINK_ATTR_TRAP_TYPE, /* u8 */ + DEVLINK_ATTR_TRAP_GENERIC, /* flag */ + DEVLINK_ATTR_TRAP_METADATA, /* nested */ + DEVLINK_ATTR_TRAP_GROUP_NAME, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 04f763cf..31fc51bd 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -237,6 +237,7 @@ struct br_mdb_entry { #define MDB_PERMANENT 1 __u8 state; #define MDB_FLAGS_OFFLOAD (1 << 0) +#define MDB_FLAGS_FAST_LEAVE (1 << 1) __u8 flags; __u16 vid; struct { From b83220db3767eb8bcaa6d4c286947a9cbd0472fd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 13 Aug 2019 11:31:40 +0300 Subject: [PATCH 14/26] devlink: Increase number of supported options Currently, the number of supported options is capped at 32 which is a problem given we are about to add a few more and go over the limit. Increase the limit to 64 options. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David Ahern --- devlink/devlink.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 91c85dc1..4ed240e2 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -235,7 +235,7 @@ static void ifname_map_free(struct ifname_map *ifname_map) #define DL_OPT_HEALTH_REPORTER_AUTO_RECOVER BIT(28) struct dl_opts { - uint32_t present; /* flags of present items */ + uint64_t present; /* flags of present items */ char *bus_name; char *dev_name; uint32_t port_index; @@ -735,7 +735,7 @@ static int dl_argv_handle_port(struct dl *dl, char **p_bus_name, static int dl_argv_handle_both(struct dl *dl, char **p_bus_name, char **p_dev_name, uint32_t *p_port_index, - uint32_t *p_handle_bit) + uint64_t *p_handle_bit) { char *str = dl_argv_next(dl); unsigned int slash_count; @@ -1015,7 +1015,7 @@ static int param_cmode_get(const char *cmodestr, } struct dl_args_metadata { - uint32_t o_flag; + uint64_t o_flag; char err_msg[DL_ARGS_REQUIRED_MAX_ERR_LEN]; }; @@ -1042,10 +1042,10 @@ static const struct dl_args_metadata dl_args_required[] = { {DL_OPT_HEALTH_REPORTER_NAME, "Reporter's name is expected."}, }; -static int dl_args_finding_required_validate(uint32_t o_required, - uint32_t o_found) +static int dl_args_finding_required_validate(uint64_t o_required, + uint64_t o_found) { - uint32_t o_flag; + uint64_t o_flag; int i; for (i = 0; i < ARRAY_SIZE(dl_args_required); i++) { @@ -1058,16 +1058,16 @@ static int dl_args_finding_required_validate(uint32_t o_required, return 0; } -static int dl_argv_parse(struct dl *dl, uint32_t o_required, - uint32_t o_optional) +static int dl_argv_parse(struct dl *dl, uint64_t o_required, + uint64_t o_optional) { struct dl_opts *opts = &dl->opts; - uint32_t o_all = o_required | o_optional; - uint32_t o_found = 0; + uint64_t o_all = o_required | o_optional; + uint64_t o_found = 0; int err; if (o_required & DL_OPT_HANDLE && o_required & DL_OPT_HANDLEP) { - uint32_t handle_bit; + uint64_t handle_bit; err = dl_argv_handle_both(dl, &opts->bus_name, &opts->dev_name, &opts->port_index, &handle_bit); @@ -1446,7 +1446,7 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) } static int dl_argv_parse_put(struct nlmsghdr *nlh, struct dl *dl, - uint32_t o_required, uint32_t o_optional) + uint64_t o_required, uint64_t o_optional) { int err; From ef12d6dafaeb9e3fe6dd1c6ebadc01af9e7f476c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 13 Aug 2019 11:31:41 +0300 Subject: [PATCH 15/26] devlink: Add devlink trap set and show commands The trap set command allows the user to set the action of an individual trap. Example: # devlink trap set netdevsim/netdevsim10 trap blackhole_route action trap The trap show command allows the user to get the current status of an individual trap or a dump of all traps in case one is not specified. When '-s' is specified the trap's statistics are shown. When '-v' is specified the metadata types the trap can provide are shown. Example: # devlink -jvps trap show netdevsim/netdevsim10 trap blackhole_route { "trap": { "netdevsim/netdevsim10": [ { "name": "blackhole_route", "type": "drop", "generic": true, "action": "trap", "group": "l3_drops", "metadata": [ "input_port" ], "stats": { "rx": { "bytes": 0, "packets": 0 } } } ] } } Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David Ahern --- devlink/devlink.c | 293 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 288 insertions(+), 5 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 4ed240e2..81fff442 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -233,6 +233,8 @@ static void ifname_map_free(struct ifname_map *ifname_map) #define DL_OPT_HEALTH_REPORTER_NAME BIT(27) #define DL_OPT_HEALTH_REPORTER_GRACEFUL_PERIOD BIT(27) #define DL_OPT_HEALTH_REPORTER_AUTO_RECOVER BIT(28) +#define DL_OPT_TRAP_NAME BIT(29) +#define DL_OPT_TRAP_ACTION BIT(30) struct dl_opts { uint64_t present; /* flags of present items */ @@ -269,6 +271,8 @@ struct dl_opts { const char *reporter_name; uint64_t reporter_graceful_period; bool reporter_auto_recover; + const char *trap_name; + enum devlink_trap_action trap_action; }; struct dl { @@ -282,6 +286,7 @@ struct dl { bool json_output; bool pretty_output; bool verbose; + bool stats; struct { bool present; char *bus_name; @@ -436,6 +441,19 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT] = MNL_TYPE_U64, [DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS] = MNL_TYPE_U64, [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = MNL_TYPE_U64, + [DEVLINK_ATTR_STATS] = MNL_TYPE_NESTED, + [DEVLINK_ATTR_TRAP_NAME] = MNL_TYPE_STRING, + [DEVLINK_ATTR_TRAP_ACTION] = MNL_TYPE_U8, + [DEVLINK_ATTR_TRAP_TYPE] = MNL_TYPE_U8, + [DEVLINK_ATTR_TRAP_GENERIC] = MNL_TYPE_FLAG, + [DEVLINK_ATTR_TRAP_METADATA] = MNL_TYPE_NESTED, + [DEVLINK_ATTR_TRAP_GROUP_NAME] = MNL_TYPE_STRING, +}; + +static const enum mnl_attr_data_type +devlink_stats_policy[DEVLINK_ATTR_STATS_MAX + 1] = { + [DEVLINK_ATTR_STATS_RX_PACKETS] = MNL_TYPE_U64, + [DEVLINK_ATTR_STATS_RX_BYTES] = MNL_TYPE_U64, }; static int attr_cb(const struct nlattr *attr, void *data) @@ -454,6 +472,25 @@ static int attr_cb(const struct nlattr *attr, void *data) return MNL_CB_OK; } +static int attr_stats_cb(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + int type; + + /* Allow the tool to work on top of newer kernels that might contain + * more attributes. + */ + if (mnl_attr_type_valid(attr, DEVLINK_ATTR_STATS_MAX) < 0) + return MNL_CB_OK; + + type = mnl_attr_get_type(attr); + if (mnl_attr_validate(attr, devlink_stats_policy[type]) < 0) + return MNL_CB_ERROR; + + tb[type] = attr; + return MNL_CB_OK; +} + static int ifname_map_cb(const struct nlmsghdr *nlh, void *data) { struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; @@ -1014,6 +1051,20 @@ static int param_cmode_get(const char *cmodestr, return 0; } +static int trap_action_get(const char *actionstr, + enum devlink_trap_action *p_action) +{ + if (strcmp(actionstr, "drop") == 0) { + *p_action = DEVLINK_TRAP_ACTION_DROP; + } else if (strcmp(actionstr, "trap") == 0) { + *p_action = DEVLINK_TRAP_ACTION_TRAP; + } else { + pr_err("Unknown trap action \"%s\"\n", actionstr); + return -EINVAL; + } + return 0; +} + struct dl_args_metadata { uint64_t o_flag; char err_msg[DL_ARGS_REQUIRED_MAX_ERR_LEN]; @@ -1040,6 +1091,7 @@ static const struct dl_args_metadata dl_args_required[] = { {DL_OPT_REGION_ADDRESS, "Region address value expected."}, {DL_OPT_REGION_LENGTH, "Region length value expected."}, {DL_OPT_HEALTH_REPORTER_NAME, "Reporter's name is expected."}, + {DL_OPT_TRAP_NAME, "Trap's name is expected."}, }; static int dl_args_finding_required_validate(uint64_t o_required, @@ -1329,6 +1381,25 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required, if (err) return err; o_found |= DL_OPT_HEALTH_REPORTER_AUTO_RECOVER; + } else if (dl_argv_match(dl, "trap") && + (o_all & DL_OPT_TRAP_NAME)) { + dl_arg_inc(dl); + err = dl_argv_str(dl, &opts->trap_name); + if (err) + return err; + o_found |= DL_OPT_TRAP_NAME; + } else if (dl_argv_match(dl, "action") && + (o_all & DL_OPT_TRAP_ACTION)) { + const char *actionstr; + + dl_arg_inc(dl); + err = dl_argv_str(dl, &actionstr); + if (err) + return err; + err = trap_action_get(actionstr, &opts->trap_action); + if (err) + return err; + o_found |= DL_OPT_TRAP_ACTION; } else { pr_err("Unknown option \"%s\"\n", dl_argv(dl)); return -EINVAL; @@ -1442,6 +1513,12 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) if (opts->present & DL_OPT_HEALTH_REPORTER_AUTO_RECOVER) mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, opts->reporter_auto_recover); + if (opts->present & DL_OPT_TRAP_NAME) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_NAME, + opts->trap_name); + if (opts->present & DL_OPT_TRAP_ACTION) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, + opts->trap_action); } @@ -1945,6 +2022,30 @@ static void pr_out_entry_end(struct dl *dl) __pr_out_newline(); } +static void pr_out_stats(struct dl *dl, struct nlattr *nla_stats) +{ + struct nlattr *tb[DEVLINK_ATTR_STATS_MAX + 1] = {}; + int err; + + if (!dl->stats) + return; + + err = mnl_attr_parse_nested(nla_stats, attr_stats_cb, tb); + if (err != MNL_CB_OK) + return; + + pr_out_object_start(dl, "stats"); + pr_out_object_start(dl, "rx"); + if (tb[DEVLINK_ATTR_STATS_RX_BYTES]) + pr_out_u64(dl, "bytes", + mnl_attr_get_u64(tb[DEVLINK_ATTR_STATS_RX_BYTES])); + if (tb[DEVLINK_ATTR_STATS_RX_PACKETS]) + pr_out_u64(dl, "packets", + mnl_attr_get_u64(tb[DEVLINK_ATTR_STATS_RX_PACKETS])); + pr_out_object_end(dl); + pr_out_object_end(dl); +} + static const char *param_cmode_name(uint8_t cmode) { switch (cmode) { @@ -3764,6 +3865,10 @@ static const char *cmd_name(uint8_t cmd) case DEVLINK_CMD_REGION_SET: return "set"; case DEVLINK_CMD_REGION_NEW: return "new"; case DEVLINK_CMD_REGION_DEL: return "del"; + case DEVLINK_CMD_TRAP_GET: return "get"; + case DEVLINK_CMD_TRAP_SET: return "set"; + case DEVLINK_CMD_TRAP_NEW: return "new"; + case DEVLINK_CMD_TRAP_DEL: return "del"; default: return ""; } } @@ -3792,6 +3897,11 @@ static const char *cmd_obj(uint8_t cmd) case DEVLINK_CMD_REGION_NEW: case DEVLINK_CMD_REGION_DEL: return "region"; + case DEVLINK_CMD_TRAP_GET: + case DEVLINK_CMD_TRAP_SET: + case DEVLINK_CMD_TRAP_NEW: + case DEVLINK_CMD_TRAP_DEL: + return "trap"; default: return ""; } } @@ -3817,6 +3927,7 @@ static bool cmd_filter_check(struct dl *dl, uint8_t cmd) } static void pr_out_region(struct dl *dl, struct nlattr **tb); +static void pr_out_trap(struct dl *dl, struct nlattr **tb, bool array); static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) { @@ -3872,6 +3983,22 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) pr_out_mon_header(genl->cmd); pr_out_region(dl, tb); break; + case DEVLINK_CMD_TRAP_GET: /* fall through */ + case DEVLINK_CMD_TRAP_SET: /* fall through */ + case DEVLINK_CMD_TRAP_NEW: /* fall through */ + case DEVLINK_CMD_TRAP_DEL: + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_TRAP_NAME] || + !tb[DEVLINK_ATTR_TRAP_TYPE] || + !tb[DEVLINK_ATTR_TRAP_ACTION] || + !tb[DEVLINK_ATTR_TRAP_GROUP_NAME] || + !tb[DEVLINK_ATTR_TRAP_METADATA] || + !tb[DEVLINK_ATTR_STATS]) + return MNL_CB_ERROR; + pr_out_mon_header(genl->cmd); + pr_out_trap(dl, tb, false); + break; } return MNL_CB_OK; } @@ -3885,7 +4012,8 @@ static int cmd_mon_show(struct dl *dl) while ((cur_obj = dl_argv_index(dl, index++))) { if (strcmp(cur_obj, "all") != 0 && strcmp(cur_obj, "dev") != 0 && - strcmp(cur_obj, "port") != 0) { + strcmp(cur_obj, "port") != 0 && + strcmp(cur_obj, "trap") != 0) { pr_err("Unknown object \"%s\"\n", cur_obj); return -EINVAL; } @@ -3902,7 +4030,7 @@ static int cmd_mon_show(struct dl *dl) static void cmd_mon_help(void) { pr_err("Usage: devlink monitor [ all | OBJECT-LIST ]\n" - "where OBJECT-LIST := { dev | port }\n"); + "where OBJECT-LIST := { dev | port | trap }\n"); } static int cmd_mon(struct dl *dl) @@ -6330,12 +6458,160 @@ static int cmd_health(struct dl *dl) return -ENOENT; } +static const char *trap_type_name(uint8_t type) +{ + switch (type) { + case DEVLINK_TRAP_TYPE_DROP: + return "drop"; + case DEVLINK_TRAP_TYPE_EXCEPTION: + return "exception"; + default: + return ""; + } +} + +static const char *trap_action_name(uint8_t action) +{ + switch (action) { + case DEVLINK_TRAP_ACTION_DROP: + return "drop"; + case DEVLINK_TRAP_ACTION_TRAP: + return "trap"; + default: + return ""; + } +} + +static const char *trap_metadata_name(const struct nlattr *attr) +{ + switch (attr->nla_type) { + case DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT: + return "input_port"; + default: + return ""; + } +} +static void pr_out_trap_metadata(struct dl *dl, struct nlattr *attr) +{ + struct nlattr *attr_metadata; + + pr_out_array_start(dl, "metadata"); + mnl_attr_for_each_nested(attr_metadata, attr) + pr_out_str_value(dl, trap_metadata_name(attr_metadata)); + pr_out_array_end(dl); +} + +static void pr_out_trap(struct dl *dl, struct nlattr **tb, bool array) +{ + uint8_t action = mnl_attr_get_u8(tb[DEVLINK_ATTR_TRAP_ACTION]); + uint8_t type = mnl_attr_get_u8(tb[DEVLINK_ATTR_TRAP_TYPE]); + + if (array) + pr_out_handle_start_arr(dl, tb); + else + __pr_out_handle_start(dl, tb, true, false); + + pr_out_str(dl, "name", mnl_attr_get_str(tb[DEVLINK_ATTR_TRAP_NAME])); + pr_out_str(dl, "type", trap_type_name(type)); + pr_out_bool(dl, "generic", !!tb[DEVLINK_ATTR_TRAP_GENERIC]); + pr_out_str(dl, "action", trap_action_name(action)); + pr_out_str(dl, "group", + mnl_attr_get_str(tb[DEVLINK_ATTR_TRAP_GROUP_NAME])); + if (dl->verbose) + pr_out_trap_metadata(dl, tb[DEVLINK_ATTR_TRAP_METADATA]); + pr_out_stats(dl, tb[DEVLINK_ATTR_STATS]); + pr_out_handle_end(dl); +} + +static int cmd_trap_show_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; + struct dl *dl = data; + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_TRAP_NAME] || !tb[DEVLINK_ATTR_TRAP_TYPE] || + !tb[DEVLINK_ATTR_TRAP_ACTION] || + !tb[DEVLINK_ATTR_TRAP_GROUP_NAME] || + !tb[DEVLINK_ATTR_TRAP_METADATA] || !tb[DEVLINK_ATTR_STATS]) + return MNL_CB_ERROR; + + pr_out_trap(dl, tb, true); + + return MNL_CB_OK; +} + +static void cmd_trap_help(void) +{ + pr_err("Usage: devlink trap set DEV trap TRAP [ action { trap | drop } ]\n"); + pr_err(" devlink trap show [ DEV trap TRAP ]\n"); +} + +static int cmd_trap_show(struct dl *dl) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; + struct nlmsghdr *nlh; + int err; + + if (dl_argc(dl) == 0) + flags |= NLM_F_DUMP; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_TRAP_GET, flags); + + if (dl_argc(dl) > 0) { + err = dl_argv_parse_put(nlh, dl, + DL_OPT_HANDLE | DL_OPT_TRAP_NAME, 0); + if (err) + return err; + } + + pr_out_section_start(dl, "trap"); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_trap_show_cb, dl); + pr_out_section_end(dl); + + return err; +} + +static int cmd_trap_set(struct dl *dl) +{ + struct nlmsghdr *nlh; + int err; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_TRAP_SET, + NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE | DL_OPT_TRAP_NAME, + DL_OPT_TRAP_ACTION); + if (err) + return err; + + return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +} + +static int cmd_trap(struct dl *dl) +{ + if (dl_argv_match(dl, "help")) { + cmd_trap_help(); + return 0; + } else if (dl_argv_match(dl, "show") || + dl_argv_match(dl, "list") || dl_no_arg(dl)) { + dl_arg_inc(dl); + return cmd_trap_show(dl); + } else if (dl_argv_match(dl, "set")) { + dl_arg_inc(dl); + return cmd_trap_set(dl); + } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +} + static void help(void) { pr_err("Usage: devlink [ OPTIONS ] OBJECT { COMMAND | help }\n" " devlink [ -f[orce] ] -b[atch] filename\n" - "where OBJECT := { dev | port | sb | monitor | dpipe | resource | region | health }\n" - " OPTIONS := { -V[ersion] | -n[o-nice-names] | -j[son] | -p[retty] | -v[erbose] }\n"); + "where OBJECT := { dev | port | sb | monitor | dpipe | resource | region | health | trap }\n" + " OPTIONS := { -V[ersion] | -n[o-nice-names] | -j[son] | -p[retty] | -v[erbose] -s[tatistics] }\n"); } static int dl_cmd(struct dl *dl, int argc, char **argv) @@ -6370,6 +6646,9 @@ static int dl_cmd(struct dl *dl, int argc, char **argv) } else if (dl_argv_match(dl, "health")) { dl_arg_inc(dl); return cmd_health(dl); + } else if (dl_argv_match(dl, "trap")) { + dl_arg_inc(dl); + return cmd_trap(dl); } pr_err("Object \"%s\" not found\n", dl_argv(dl)); return -ENOENT; @@ -6479,6 +6758,7 @@ int main(int argc, char **argv) { "json", no_argument, NULL, 'j' }, { "pretty", no_argument, NULL, 'p' }, { "verbose", no_argument, NULL, 'v' }, + { "statistics", no_argument, NULL, 's' }, { NULL, 0, NULL, 0 } }; const char *batch_file = NULL; @@ -6494,7 +6774,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - while ((opt = getopt_long(argc, argv, "Vfb:njpv", + while ((opt = getopt_long(argc, argv, "Vfb:njpvs", long_options, NULL)) >= 0) { switch (opt) { @@ -6520,6 +6800,9 @@ int main(int argc, char **argv) case 'v': dl->verbose = true; break; + case 's': + dl->stats = true; + break; default: pr_err("Unknown option.\n"); help(); From 4ede9e9d56206d8fc6d97b94d079c8a1c2e934e1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 13 Aug 2019 11:31:42 +0300 Subject: [PATCH 16/26] devlink: Add devlink trap group set and show commands These commands are similar to the trap set and show commands, but operate on a trap group and not individual traps. Example: # devlink trap group set netdevsim/netdevsim10 group l3_drops action trap # devlink -jps trap group show netdevsim/netdevsim10 group l3_drops { "trap_group": { "netdevsim/netdevsim10": [ { "name": "l3_drops", "generic": true, "stats": { "rx": { "bytes": 0, "packets": 0 } } } ] } } Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David Ahern --- devlink/devlink.c | 135 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 2 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 81fff442..2f084c02 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -235,6 +235,7 @@ static void ifname_map_free(struct ifname_map *ifname_map) #define DL_OPT_HEALTH_REPORTER_AUTO_RECOVER BIT(28) #define DL_OPT_TRAP_NAME BIT(29) #define DL_OPT_TRAP_ACTION BIT(30) +#define DL_OPT_TRAP_GROUP_NAME BIT(31) struct dl_opts { uint64_t present; /* flags of present items */ @@ -272,6 +273,7 @@ struct dl_opts { uint64_t reporter_graceful_period; bool reporter_auto_recover; const char *trap_name; + const char *trap_group_name; enum devlink_trap_action trap_action; }; @@ -1092,6 +1094,7 @@ static const struct dl_args_metadata dl_args_required[] = { {DL_OPT_REGION_LENGTH, "Region length value expected."}, {DL_OPT_HEALTH_REPORTER_NAME, "Reporter's name is expected."}, {DL_OPT_TRAP_NAME, "Trap's name is expected."}, + {DL_OPT_TRAP_GROUP_NAME, "Trap group's name is expected."}, }; static int dl_args_finding_required_validate(uint64_t o_required, @@ -1388,6 +1391,13 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required, if (err) return err; o_found |= DL_OPT_TRAP_NAME; + } else if (dl_argv_match(dl, "group") && + (o_all & DL_OPT_TRAP_GROUP_NAME)) { + dl_arg_inc(dl); + err = dl_argv_str(dl, &opts->trap_group_name); + if (err) + return err; + o_found |= DL_OPT_TRAP_GROUP_NAME; } else if (dl_argv_match(dl, "action") && (o_all & DL_OPT_TRAP_ACTION)) { const char *actionstr; @@ -1516,6 +1526,9 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) if (opts->present & DL_OPT_TRAP_NAME) mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_NAME, opts->trap_name); + if (opts->present & DL_OPT_TRAP_GROUP_NAME) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_GROUP_NAME, + opts->trap_group_name); if (opts->present & DL_OPT_TRAP_ACTION) mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, opts->trap_action); @@ -3869,6 +3882,10 @@ static const char *cmd_name(uint8_t cmd) case DEVLINK_CMD_TRAP_SET: return "set"; case DEVLINK_CMD_TRAP_NEW: return "new"; case DEVLINK_CMD_TRAP_DEL: return "del"; + case DEVLINK_CMD_TRAP_GROUP_GET: return "get"; + case DEVLINK_CMD_TRAP_GROUP_SET: return "set"; + case DEVLINK_CMD_TRAP_GROUP_NEW: return "new"; + case DEVLINK_CMD_TRAP_GROUP_DEL: return "del"; default: return ""; } } @@ -3902,6 +3919,11 @@ static const char *cmd_obj(uint8_t cmd) case DEVLINK_CMD_TRAP_NEW: case DEVLINK_CMD_TRAP_DEL: return "trap"; + case DEVLINK_CMD_TRAP_GROUP_GET: + case DEVLINK_CMD_TRAP_GROUP_SET: + case DEVLINK_CMD_TRAP_GROUP_NEW: + case DEVLINK_CMD_TRAP_GROUP_DEL: + return "trap-group"; default: return ""; } } @@ -3928,6 +3950,7 @@ static bool cmd_filter_check(struct dl *dl, uint8_t cmd) static void pr_out_region(struct dl *dl, struct nlattr **tb); static void pr_out_trap(struct dl *dl, struct nlattr **tb, bool array); +static void pr_out_trap_group(struct dl *dl, struct nlattr **tb, bool array); static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) { @@ -3999,6 +4022,18 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) pr_out_mon_header(genl->cmd); pr_out_trap(dl, tb, false); break; + case DEVLINK_CMD_TRAP_GROUP_GET: /* fall through */ + case DEVLINK_CMD_TRAP_GROUP_SET: /* fall through */ + case DEVLINK_CMD_TRAP_GROUP_NEW: /* fall through */ + case DEVLINK_CMD_TRAP_GROUP_DEL: + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_TRAP_GROUP_NAME] || + !tb[DEVLINK_ATTR_STATS]) + return MNL_CB_ERROR; + pr_out_mon_header(genl->cmd); + pr_out_trap_group(dl, tb, false); + break; } return MNL_CB_OK; } @@ -4013,7 +4048,8 @@ static int cmd_mon_show(struct dl *dl) if (strcmp(cur_obj, "all") != 0 && strcmp(cur_obj, "dev") != 0 && strcmp(cur_obj, "port") != 0 && - strcmp(cur_obj, "trap") != 0) { + strcmp(cur_obj, "trap") != 0 && + strcmp(cur_obj, "trap-group") != 0) { pr_err("Unknown object \"%s\"\n", cur_obj); return -EINVAL; } @@ -4030,7 +4066,7 @@ static int cmd_mon_show(struct dl *dl) static void cmd_mon_help(void) { pr_err("Usage: devlink monitor [ all | OBJECT-LIST ]\n" - "where OBJECT-LIST := { dev | port | trap }\n"); + "where OBJECT-LIST := { dev | port | trap | trap-group }\n"); } static int cmd_mon(struct dl *dl) @@ -6546,6 +6582,8 @@ static void cmd_trap_help(void) { pr_err("Usage: devlink trap set DEV trap TRAP [ action { trap | drop } ]\n"); pr_err(" devlink trap show [ DEV trap TRAP ]\n"); + pr_err(" devlink trap group set DEV group GROUP [ action { trap | drop } ]\n"); + pr_err(" devlink trap group show [ DEV group GROUP ]\n"); } static int cmd_trap_show(struct dl *dl) @@ -6589,6 +6627,96 @@ static int cmd_trap_set(struct dl *dl) return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); } +static void pr_out_trap_group(struct dl *dl, struct nlattr **tb, bool array) +{ + if (array) + pr_out_handle_start_arr(dl, tb); + else + __pr_out_handle_start(dl, tb, true, false); + + pr_out_str(dl, "name", + mnl_attr_get_str(tb[DEVLINK_ATTR_TRAP_GROUP_NAME])); + pr_out_bool(dl, "generic", !!tb[DEVLINK_ATTR_TRAP_GENERIC]); + pr_out_stats(dl, tb[DEVLINK_ATTR_STATS]); + pr_out_handle_end(dl); +} + +static int cmd_trap_group_show_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; + struct dl *dl = data; + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_TRAP_GROUP_NAME] || !tb[DEVLINK_ATTR_STATS]) + return MNL_CB_ERROR; + + pr_out_trap_group(dl, tb, true); + + return MNL_CB_OK; +} + +static int cmd_trap_group_show(struct dl *dl) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; + struct nlmsghdr *nlh; + int err; + + if (dl_argc(dl) == 0) + flags |= NLM_F_DUMP; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_TRAP_GROUP_GET, flags); + + if (dl_argc(dl) > 0) { + err = dl_argv_parse_put(nlh, dl, + DL_OPT_HANDLE | DL_OPT_TRAP_GROUP_NAME, + 0); + if (err) + return err; + } + + pr_out_section_start(dl, "trap_group"); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_trap_group_show_cb, dl); + pr_out_section_end(dl); + + return err; +} + +static int cmd_trap_group_set(struct dl *dl) +{ + struct nlmsghdr *nlh; + int err; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_TRAP_GROUP_SET, + NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, + DL_OPT_HANDLE | DL_OPT_TRAP_GROUP_NAME, + DL_OPT_TRAP_ACTION); + if (err) + return err; + + return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +} + +static int cmd_trap_group(struct dl *dl) +{ + if (dl_argv_match(dl, "help")) { + cmd_trap_help(); + return 0; + } else if (dl_argv_match(dl, "show") || + dl_argv_match(dl, "list") || dl_no_arg(dl)) { + dl_arg_inc(dl); + return cmd_trap_group_show(dl); + } else if (dl_argv_match(dl, "set")) { + dl_arg_inc(dl); + return cmd_trap_group_set(dl); + } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +} + static int cmd_trap(struct dl *dl) { if (dl_argv_match(dl, "help")) { @@ -6601,6 +6729,9 @@ static int cmd_trap(struct dl *dl) } else if (dl_argv_match(dl, "set")) { dl_arg_inc(dl); return cmd_trap_set(dl); + } else if (dl_argv_match(dl, "group")) { + dl_arg_inc(dl); + return cmd_trap_group(dl); } pr_err("Command \"%s\" not found\n", dl_argv(dl)); return -ENOENT; From a7a56f6f9d0b19fc235ca5961cfaee32854831ba Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 13 Aug 2019 11:31:43 +0300 Subject: [PATCH 17/26] devlink: Add man page for devlink-trap Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David Ahern --- man/man8/devlink-monitor.8 | 3 +- man/man8/devlink-trap.8 | 138 +++++++++++++++++++++++++++++++++++++ man/man8/devlink.8 | 11 ++- 3 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 man/man8/devlink-trap.8 diff --git a/man/man8/devlink-monitor.8 b/man/man8/devlink-monitor.8 index 13fe641d..fffab3a4 100644 --- a/man/man8/devlink-monitor.8 +++ b/man/man8/devlink-monitor.8 @@ -21,7 +21,7 @@ command is the first in the command line and then the object list. .I OBJECT-LIST is the list of object types that we want to monitor. It may contain -.BR dev ", " port ". +.BR dev ", " port ", " trap ", " trap-group . .B devlink opens Devlink Netlink socket, listens on it and dumps state changes. @@ -31,6 +31,7 @@ opens Devlink Netlink socket, listens on it and dumps state changes. .BR devlink-dev (8), .BR devlink-sb (8), .BR devlink-port (8), +.BR devlink-trap (8), .br .SH AUTHOR diff --git a/man/man8/devlink-trap.8 b/man/man8/devlink-trap.8 new file mode 100644 index 00000000..4f079eb8 --- /dev/null +++ b/man/man8/devlink-trap.8 @@ -0,0 +1,138 @@ +.TH DEVLINK\-TRAP 8 "2 August 2019" "iproute2" "Linux" +.SH NAME +devlink-trap \- devlink trap configuration +.SH SYNOPSIS +.sp +.ad l +.in +8 +.ti -8 +.B devlink +.RI "[ " OPTIONS " ]" +.B trap +.RI "{ " COMMAND " |" +.BR help " }" +.sp + +.ti -8 +.IR OPTIONS " := { " +\fB\-v\fR[\fIerbose\fR] | +\fB\-s\fR[\fItatistics\fR] } + +.ti -8 +.B "devlink trap show" +.RI "[ " DEV +.B trap +.IR TRAP " ]" + +.ti -8 +.BI "devlink trap set " DEV " trap " TRAP +.RB "[ " action " { " trap " | " drop " } ]" + +.ti -8 +.B "devlink trap group show" +.RI "[ " DEV +.B group +.IR GROUP " ]" + +.ti -8 +.BI "devlink trap group set " DEV " group " GROUP +.RB "[ " action " { " trap " | " drop " } ]" + +.ti -8 +.B devlink trap help + +.SH "DESCRIPTION" +.SS devlink trap show - display available packet traps and their attributes + +.PP +.I "DEV" +- specifies the devlink device from which to show packet traps. +If this argument is omitted all packet traps of all devices are listed. + +.PP +.BI "trap " TRAP +- specifies the packet trap. +Only applicable if a devlink device is also specified. + +.SS devlink trap set - set attributes of a packet trap + +.PP +.I "DEV" +- specifies the devlink device the packet trap belongs to. + +.PP +.BI "trap " TRAP +- specifies the packet trap. + +.TP +.BR action " { " trap " | " drop " } " +packet trap action. + +.I trap +- the sole copy of the packet is sent to the CPU. + +.I drop +- the packet is dropped by the underlying device and a copy is not sent to the CPU. + +.SS devlink trap group show - display available packet trap groups and their attributes + +.PP +.I "DEV" +- specifies the devlink device from which to show packet trap groups. +If this argument is omitted all packet trap groups of all devices are listed. + +.PP +.BI "group " GROUP +- specifies the packet trap group. +Only applicable if a devlink device is also specified. + +.SS devlink trap group set - set attributes of a packet trap group + +.PP +.I "DEV" +- specifies the devlink device the packet trap group belongs to. + +.PP +.BI "group " GROUP +- specifies the packet trap group. + +.TP +.BR action " { " trap " | " drop " } " +packet trap action. The action is set for all the packet traps member in the +trap group. The actions of non-drop traps cannot be changed and are thus +skipped. + +.SH "EXAMPLES" +.PP +devlink trap show +.RS 4 +List available packet traps. +.RE +.PP +devlink trap group show +.RS 4 +List available packet trap groups. +.RE +.PP +devlink -vs trap show pci/0000:01:00.0 trap source_mac_is_multicast +.RS 4 +Show attributes and statistics of a specific packet trap. +.RE +.PP +devlink -s trap group show pci/0000:01:00.0 group l2_drops +.RS 4 +Show attributes and statistics of a specific packet trap group. +.RE +.PP +devlink trap set pci/0000:01:00.0 trap source_mac_is_multicast action trap +.RS 4 +Set the action of a specific packet trap to 'trap'. + +.SH SEE ALSO +.BR devlink (8), +.BR devlink-dev (8), +.BR devlink-monitor (8), +.br + +.SH AUTHOR +Ido Schimmel diff --git a/man/man8/devlink.8 b/man/man8/devlink.8 index 13d4dcd9..12d48944 100644 --- a/man/man8/devlink.8 +++ b/man/man8/devlink.8 @@ -7,7 +7,7 @@ devlink \- Devlink tool .in +8 .ti -8 .B devlink -.RI "[ " OPTIONS " ] { " dev | port | monitor | sb | resource | region | health " } { " COMMAND " | " +.RI "[ " OPTIONS " ] { " dev | port | monitor | sb | resource | region | health | trap " } { " COMMAND " | " .BR help " }" .sp @@ -51,6 +51,10 @@ When combined with -j generate a pretty JSON output. .BR "\-v" , " --verbose" Turn on verbose output. +.TP +.BR "\-s" , " --statistics" +Output statistics. + .SS .I OBJECT @@ -82,6 +86,10 @@ Turn on verbose output. .B health - devlink reporting and recovery +.TP +.B trap +- devlink trap configuration + .SS .I COMMAND @@ -114,6 +122,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR devlink-resource (8), .BR devlink-region (8), .BR devlink-health (8), +.BR devlink-trap (8), .br .SH REPORTING BUGS From 84b9168328bf92f6ecbee4e451b5bc34514be3f9 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 9 Aug 2019 20:18:43 -0400 Subject: [PATCH 18/26] ip nexthop: Allow flush|list operations to specify a specific protocol In the case where we have a large number of nexthops from a specific protocol, allow the flush and list operations to take a protocol to limit the commands scopes. Signed-off-by: Donald Sharp Signed-off-by: David Ahern --- ip/ipnexthop.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/ip/ipnexthop.c b/ip/ipnexthop.c index f35aab52..bc8ab431 100644 --- a/ip/ipnexthop.c +++ b/ip/ipnexthop.c @@ -19,6 +19,7 @@ static struct { unsigned int groups; unsigned int ifindex; unsigned int master; + unsigned int proto; } filter; enum { @@ -34,7 +35,7 @@ static void usage(void) __attribute__((noreturn)); static void usage(void) { fprintf(stderr, - "Usage: ip nexthop { list | flush } SELECTOR\n" + "Usage: ip nexthop { list | flush } [ protocol ID ] SELECTOR\n" " ip nexthop { add | replace } id ID NH [ protocol ID ]\n" " ip nexthop { get| del } id ID\n" "SELECTOR := [ id ID ] [ dev DEV ] [ vrf NAME ] [ master DEV ]\n" @@ -109,6 +110,9 @@ static int flush_nexthop(struct nlmsghdr *nlh, void *arg) return -1; } + if (filter.proto && nhm->nh_protocol != filter.proto) + return 0; + parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len); if (tb[NHA_ID]) id = rta_getattr_u32(tb[NHA_ID]); @@ -213,6 +217,9 @@ int print_nexthop(struct nlmsghdr *n, void *arg) return -1; } + if (filter.proto && filter.proto != nhm->nh_protocol) + return 0; + parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len); open_json_object(NULL); @@ -473,6 +480,13 @@ static int ipnh_list_flush(int argc, char **argv, int action) if (get_unsigned(&id, *argv, 0)) invarg("invalid id value", *argv); return ipnh_get_id(id); + } else if (!matches(*argv, "protocol")) { + __u32 proto; + + NEXT_ARG(); + if (get_unsigned(&proto, *argv, 0)) + invarg("invalid protocol value", *argv); + filter.proto = proto; } else if (matches(*argv, "help") == 0) { usage(); } else { From 3d72f125c300dd261a5151cf1cac7cfa152376b2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 15 Sep 2019 10:32:58 -0700 Subject: [PATCH 19/26] Update kernel headers Update kernel headers to commit: aa2eaa8c272a ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: David Ahern --- include/uapi/linux/bpf.h | 15 ++++++++++++--- include/uapi/linux/can.h | 20 +++++++++++++++++++- include/uapi/linux/devlink.h | 11 +++++++++++ include/uapi/linux/inet_diag.h | 9 +++++++++ include/uapi/linux/pkt_cls.h | 2 ++ include/uapi/linux/sctp.h | 3 +++ 6 files changed, 56 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 79701d3e..f7a75530 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -106,6 +106,7 @@ enum bpf_cmd { BPF_TASK_FD_QUERY, BPF_MAP_LOOKUP_AND_DELETE_ELEM, BPF_MAP_FREEZE, + BPF_BTF_GET_NEXT_ID, }; enum bpf_map_type { @@ -284,6 +285,9 @@ enum bpf_attach_type { */ #define BPF_F_TEST_RND_HI32 (1U << 2) +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_STATE_FREQ (1U << 3) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * two extensions: * @@ -337,6 +341,9 @@ enum bpf_attach_type { #define BPF_F_RDONLY_PROG (1U << 7) #define BPF_F_WRONLY_PROG (1U << 8) +/* Clone map from listener for newly accepted socket */ +#define BPF_F_CLONE (1U << 9) + /* flags for BPF_PROG_QUERY */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -576,6 +583,8 @@ union bpf_attr { * limited to five). * * Each time the helper is called, it appends a line to the trace. + * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is + * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in * *\/sys/kernel/debug/tracing/trace_options* (see also the @@ -1014,7 +1023,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1076,7 +1085,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags) + * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1725,7 +1734,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_reg *regs, u64 rc) + * int bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 9009f0b6..c61cdc7a 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -157,7 +157,8 @@ struct canfd_frame { #define CAN_TP20 4 /* VAG Transport Protocol v2.0 */ #define CAN_MCNET 5 /* Bosch MCNet */ #define CAN_ISOTP 6 /* ISO 15765-2 Transport Protocol */ -#define CAN_NPROTO 7 +#define CAN_J1939 7 /* SAE J1939 */ +#define CAN_NPROTO 8 #define SOL_CAN_BASE 100 @@ -174,6 +175,23 @@ struct sockaddr_can { /* transport protocol class address information (e.g. ISOTP) */ struct { canid_t rx_id, tx_id; } tp; + /* J1939 address information */ + struct { + /* 8 byte name when using dynamic addressing */ + __u64 name; + + /* pgn: + * 8 bit: PS in PDU2 case, else 0 + * 8 bit: PF + * 1 bit: DP + * 1 bit: reserved + */ + __u32 pgn; + + /* 1 byte address */ + __u8 addr; + } j1939; + /* reserved for future CAN protocols address information */ } can_addr; }; diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 3fb683be..79e1405d 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -202,6 +202,15 @@ enum devlink_param_cmode { enum devlink_param_fw_load_policy_value { DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER, DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH, + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK, + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_UNKNOWN, +}; + +enum devlink_param_reset_dev_on_drv_probe_value { + DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN, + DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS, + DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER, + DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK, }; enum { @@ -410,6 +419,8 @@ enum devlink_attr { DEVLINK_ATTR_TRAP_METADATA, /* nested */ DEVLINK_ATTR_TRAP_GROUP_NAME, /* string */ + DEVLINK_ATTR_RELOAD_FAILED, /* u8 0 or 1 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index f3bcd7ee..3dff6841 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -153,11 +153,20 @@ enum { INET_DIAG_BBRINFO, /* request as INET_DIAG_VEGASINFO */ INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */ INET_DIAG_MD5SIG, + INET_DIAG_ULP_INFO, __INET_DIAG_MAX, }; #define INET_DIAG_MAX (__INET_DIAG_MAX - 1) +enum { + INET_ULP_INFO_UNSPEC, + INET_ULP_INFO_NAME, + INET_ULP_INFO_TLS, + __INET_ULP_INFO_MAX, +}; +#define INET_ULP_INFO_MAX (__INET_ULP_INFO_MAX - 1) + /* INET_DIAG_MEM */ struct inet_diag_meminfo { diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index b057aeeb..a6aa466f 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -160,6 +160,8 @@ enum { TCA_POLICE_RESULT, TCA_POLICE_TM, TCA_POLICE_PAD, + TCA_POLICE_RATE64, + TCA_POLICE_PEAKRATE64, __TCA_POLICE_MAX #define TCA_POLICE_RESULT TCA_POLICE_RESULT }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index c4bce0a2..0d4c1507 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -134,6 +134,9 @@ typedef __s32 sctp_assoc_t; #define SCTP_INTERLEAVING_SUPPORTED 125 #define SCTP_SENDMSG_CONNECT 126 #define SCTP_EVENT 127 +#define SCTP_ASCONF_SUPPORTED 128 +#define SCTP_AUTH_SUPPORTED 129 +#define SCTP_ECN_SUPPORTED 130 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 From 1157a6fc362c4fa9bb628e076593dda6a273784f Mon Sep 17 00:00:00 2001 From: David Dai Date: Wed, 4 Sep 2019 10:06:51 -0500 Subject: [PATCH 20/26] iproute2-next: police: support 64bit rate and peakrate in tc utility For high speed adapter like Mellanox CX-5 card, it can reach upto 100 Gbits per second bandwidth. Currently htb already supports 64bit rate in tc utility. However police action rate and peakrate are still limited to 32bit value (upto 32 Gbits per second). Taking advantage of the 2 new attributes TCA_POLICE_RATE64 and TCA_POLICE_PEAKRATE64 from kernel, tc can use them to break the 32bit limit, and still keep the backward binary compatibility. Tested-by: David Dai Signed-off-by: David Dai Signed-off-by: David Ahern --- tc/m_police.c | 149 +++++++++++++++++++++++--------------------------- tc/tc_core.c | 29 ++++++++++ tc/tc_core.h | 3 + 3 files changed, 100 insertions(+), 81 deletions(-) diff --git a/tc/m_police.c b/tc/m_police.c index 862a39ff..a5bc20c0 100644 --- a/tc/m_police.c +++ b/tc/m_police.c @@ -49,11 +49,6 @@ static void usage(void) exit(-1); } -static void explain1(char *arg) -{ - fprintf(stderr, "Illegal \"%s\"\n", arg); -} - static int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n) { @@ -71,6 +66,7 @@ static int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */ int Rcell_log = -1, Pcell_log = -1; struct rtattr *tail; + __u64 rate64 = 0, prate64 = 0; if (a) /* new way of doing things */ NEXT_ARG(); @@ -82,73 +78,47 @@ static int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, if (matches(*argv, "index") == 0) { NEXT_ARG(); - if (get_u32(&p.index, *argv, 10)) { - fprintf(stderr, "Illegal \"index\"\n"); - return -1; - } + if (get_u32(&p.index, *argv, 10)) + invarg("index", *argv); } else if (matches(*argv, "burst") == 0 || strcmp(*argv, "buffer") == 0 || strcmp(*argv, "maxburst") == 0) { NEXT_ARG(); - if (buffer) { - fprintf(stderr, "Double \"buffer/burst\" spec\n"); - return -1; - } - if (get_size_and_cell(&buffer, &Rcell_log, *argv) < 0) { - explain1("buffer"); - return -1; - } + if (buffer) + duparg("buffer/burst", *argv); + if (get_size_and_cell(&buffer, &Rcell_log, *argv) < 0) + invarg("buffer", *argv); } else if (strcmp(*argv, "mtu") == 0 || strcmp(*argv, "minburst") == 0) { NEXT_ARG(); - if (mtu) { - fprintf(stderr, "Double \"mtu/minburst\" spec\n"); - return -1; - } - if (get_size_and_cell(&mtu, &Pcell_log, *argv) < 0) { - explain1("mtu"); - return -1; - } + if (mtu) + duparg("mtu/minburst", *argv); + if (get_size_and_cell(&mtu, &Pcell_log, *argv) < 0) + invarg("mtu", *argv); } else if (strcmp(*argv, "mpu") == 0) { NEXT_ARG(); - if (mpu) { - fprintf(stderr, "Double \"mpu\" spec\n"); - return -1; - } - if (get_size(&mpu, *argv)) { - explain1("mpu"); - return -1; - } + if (mpu) + duparg("mpu", *argv); + if (get_size(&mpu, *argv)) + invarg("mpu", *argv); } else if (strcmp(*argv, "rate") == 0) { NEXT_ARG(); - if (p.rate.rate) { - fprintf(stderr, "Double \"rate\" spec\n"); - return -1; - } - if (get_rate(&p.rate.rate, *argv)) { - explain1("rate"); - return -1; - } + if (rate64) + duparg("rate", *argv); + if (get_rate64(&rate64, *argv)) + invarg("rate", *argv); } else if (strcmp(*argv, "avrate") == 0) { NEXT_ARG(); - if (avrate) { - fprintf(stderr, "Double \"avrate\" spec\n"); - return -1; - } - if (get_rate(&avrate, *argv)) { - explain1("avrate"); - return -1; - } + if (avrate) + duparg("avrate", *argv); + if (get_rate(&avrate, *argv)) + invarg("avrate", *argv); } else if (matches(*argv, "peakrate") == 0) { NEXT_ARG(); - if (p.peakrate.rate) { - fprintf(stderr, "Double \"peakrate\" spec\n"); - return -1; - } - if (get_rate(&p.peakrate.rate, *argv)) { - explain1("peakrate"); - return -1; - } + if (prate64) + duparg("peakrate", *argv); + if (get_rate64(&prate64, *argv)) + invarg("peakrate", *argv); } else if (matches(*argv, "reclassify") == 0 || matches(*argv, "drop") == 0 || matches(*argv, "shot") == 0 || @@ -168,14 +138,12 @@ static int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, return -1; } else if (matches(*argv, "overhead") == 0) { NEXT_ARG(); - if (get_u16(&overhead, *argv, 10)) { - explain1("overhead"); return -1; - } + if (get_u16(&overhead, *argv, 10)) + invarg("overhead", *argv); } else if (matches(*argv, "linklayer") == 0) { NEXT_ARG(); - if (get_linklayer(&linklayer, *argv)) { - explain1("linklayer"); return -1; - } + if (get_linklayer(&linklayer, *argv)) + invarg("linklayer", *argv); } else if (strcmp(*argv, "help") == 0) { usage(); } else { @@ -189,23 +157,23 @@ action_ctrl_ok: if (!ok) return -1; - if (p.rate.rate && avrate) + if (rate64 && avrate) return -1; /* Must at least do late binding, use TB or ewma policing */ - if (!p.rate.rate && !avrate && !p.index) { + if (!rate64 && !avrate && !p.index) { fprintf(stderr, "\"rate\" or \"avrate\" MUST be specified.\n"); return -1; } /* When the TB policer is used, burst is required */ - if (p.rate.rate && !buffer && !avrate) { + if (rate64 && !buffer && !avrate) { fprintf(stderr, "\"burst\" requires \"rate\".\n"); return -1; } - if (p.peakrate.rate) { - if (!p.rate.rate) { + if (prate64) { + if (!rate64) { fprintf(stderr, "\"peakrate\" requires \"rate\".\n"); return -1; } @@ -215,22 +183,24 @@ action_ctrl_ok: } } - if (p.rate.rate) { + if (rate64) { + p.rate.rate = (rate64 >= (1ULL << 32)) ? ~0U : rate64; p.rate.mpu = mpu; p.rate.overhead = overhead; - if (tc_calc_rtable(&p.rate, rtab, Rcell_log, mtu, - linklayer) < 0) { + if (tc_calc_rtable_64(&p.rate, rtab, Rcell_log, mtu, + linklayer, rate64) < 0) { fprintf(stderr, "POLICE: failed to calculate rate table.\n"); return -1; } - p.burst = tc_calc_xmittime(p.rate.rate, buffer); + p.burst = tc_calc_xmittime(rate64, buffer); } p.mtu = mtu; - if (p.peakrate.rate) { + if (prate64) { + p.peakrate.rate = (prate64 >= (1ULL << 32)) ? ~0U : prate64; p.peakrate.mpu = mpu; p.peakrate.overhead = overhead; - if (tc_calc_rtable(&p.peakrate, ptab, Pcell_log, mtu, - linklayer) < 0) { + if (tc_calc_rtable_64(&p.peakrate, ptab, Pcell_log, mtu, + linklayer, prate64) < 0) { fprintf(stderr, "POLICE: failed to calculate peak rate table.\n"); return -1; } @@ -238,10 +208,16 @@ action_ctrl_ok: tail = addattr_nest(n, MAX_MSG, tca_id); addattr_l(n, MAX_MSG, TCA_POLICE_TBF, &p, sizeof(p)); - if (p.rate.rate) + if (rate64) { addattr_l(n, MAX_MSG, TCA_POLICE_RATE, rtab, 1024); - if (p.peakrate.rate) + if (rate64 >= (1ULL << 32)) + addattr64(n, MAX_MSG, TCA_POLICE_RATE64, rate64); + } + if (prate64) { addattr_l(n, MAX_MSG, TCA_POLICE_PEAKRATE, ptab, 1024); + if (prate64 >= (1ULL << 32)) + addattr64(n, MAX_MSG, TCA_POLICE_PEAKRATE64, prate64); + } if (avrate) addattr32(n, MAX_MSG, TCA_POLICE_AVRATE, avrate); if (presult) @@ -268,6 +244,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) struct rtattr *tb[TCA_POLICE_MAX+1]; unsigned int buffer; unsigned int linklayer; + __u64 rate64, prate64; if (arg == NULL) return 0; @@ -286,16 +263,26 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) #endif p = RTA_DATA(tb[TCA_POLICE_TBF]); + rate64 = p->rate.rate; + if (tb[TCA_POLICE_RATE64] && + RTA_PAYLOAD(tb[TCA_POLICE_RATE64]) >= sizeof(rate64)) + rate64 = rta_getattr_u64(tb[TCA_POLICE_RATE64]); + fprintf(f, " police 0x%x ", p->index); - fprintf(f, "rate %s ", sprint_rate(p->rate.rate, b1)); - buffer = tc_calc_xmitsize(p->rate.rate, p->burst); + fprintf(f, "rate %s ", sprint_rate(rate64, b1)); + buffer = tc_calc_xmitsize(rate64, p->burst); fprintf(f, "burst %s ", sprint_size(buffer, b1)); fprintf(f, "mtu %s ", sprint_size(p->mtu, b1)); if (show_raw) fprintf(f, "[%08x] ", p->burst); - if (p->peakrate.rate) - fprintf(f, "peakrate %s ", sprint_rate(p->peakrate.rate, b1)); + prate64 = p->peakrate.rate; + if (tb[TCA_POLICE_PEAKRATE64] && + RTA_PAYLOAD(tb[TCA_POLICE_PEAKRATE64]) >= sizeof(prate64)) + prate64 = rta_getattr_u64(tb[TCA_POLICE_PEAKRATE64]); + + if (prate64) + fprintf(f, "peakrate %s ", sprint_rate(prate64, b1)); if (tb[TCA_POLICE_AVRATE]) fprintf(f, "avrate %s ", diff --git a/tc/tc_core.c b/tc/tc_core.c index 8eb11223..498d35dc 100644 --- a/tc/tc_core.c +++ b/tc/tc_core.c @@ -152,6 +152,35 @@ int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab, return cell_log; } +int tc_calc_rtable_64(struct tc_ratespec *r, __u32 *rtab, + int cell_log, unsigned int mtu, + enum link_layer linklayer, __u64 rate) +{ + int i; + unsigned int sz; + __u64 bps = rate; + unsigned int mpu = r->mpu; + + if (mtu == 0) + mtu = 2047; + + if (cell_log < 0) { + cell_log = 0; + while ((mtu >> cell_log) > 255) + cell_log++; + } + + for (i = 0; i < 256; i++) { + sz = tc_adjust_size((i + 1) << cell_log, mpu, linklayer); + rtab[i] = tc_calc_xmittime(bps, sz); + } + + r->cell_align = -1; + r->cell_log = cell_log; + r->linklayer = (linklayer & TC_LINKLAYER_MASK); + return cell_log; +} + /* stab[pkt_len>>cell_log] = pkt_xmit_size>>size_log */ diff --git a/tc/tc_core.h b/tc/tc_core.h index bd4a99f0..6dab2727 100644 --- a/tc/tc_core.h +++ b/tc/tc_core.h @@ -21,6 +21,9 @@ unsigned tc_calc_xmittime(__u64 rate, unsigned size); unsigned tc_calc_xmitsize(__u64 rate, unsigned ticks); int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab, int cell_log, unsigned mtu, enum link_layer link_layer); +int tc_calc_rtable_64(struct tc_ratespec *r, __u32 *rtab, + int cell_log, unsigned mtu, enum link_layer link_layer, + __u64 rate); int tc_calc_size_table(struct tc_sizespec *s, __u16 **stab); int tc_setup_estimator(unsigned A, unsigned time_const, struct tc_estimator *est); From c240e6748e5f13ad8d4d595b444725a3bdf6dc1a Mon Sep 17 00:00:00 2001 From: Dirk van der Merwe Date: Wed, 11 Sep 2019 14:05:17 +0100 Subject: [PATCH 21/26] devlink: add 'reset_dev_on_drv_probe' devlink param Add support for the new devlink parameter along with string to uint conversion. Signed-off-by: Dirk van der Merwe Signed-off-by: Simon Horman Signed-off-by: David Ahern --- devlink/devlink.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/devlink/devlink.c b/devlink/devlink.c index 2f084c02..15877a04 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -2253,6 +2253,26 @@ static const struct param_val_conv param_val_conv[] = { .vstr = "flash", .vuint = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH, }, + { + .name = "reset_dev_on_drv_probe", + .vstr = "unknown", + .vuint = DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN, + }, + { + .name = "reset_dev_on_drv_probe", + .vstr = "always", + .vuint = DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS, + }, + { + .name = "reset_dev_on_drv_probe", + .vstr = "never", + .vuint = DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER, + }, + { + .name = "reset_dev_on_drv_probe", + .vstr = "disk", + .vuint = DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK, + }, }; #define PARAM_VAL_CONV_LEN ARRAY_SIZE(param_val_conv) From 850de16f12a9221694145760624c3c9ab9b4b668 Mon Sep 17 00:00:00 2001 From: Dirk van der Merwe Date: Wed, 11 Sep 2019 15:56:29 +0100 Subject: [PATCH 22/26] devlink: unknown 'fw_load_policy' string validation The 'fw_load_policy' devlink parameter now supports an unknown value. Suggested-by: Jakub Kicinski Signed-off-by: Dirk van der Merwe Signed-off-by: Simon Horman Signed-off-by: David Ahern --- devlink/devlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/devlink/devlink.c b/devlink/devlink.c index 15877a04..e4b494eb 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -2258,6 +2258,11 @@ static const struct param_val_conv param_val_conv[] = { .vstr = "unknown", .vuint = DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN, }, + { + .name = "fw_load_policy", + .vstr = "unknown", + .vuint = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_UNKNOWN, + }, { .name = "reset_dev_on_drv_probe", .vstr = "always", From 853be43f9eb52dcebb18fbf0c19618253ea7da1b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Sep 2019 13:29:37 +0200 Subject: [PATCH 23/26] devlink: implement flash update status monitoring Kernel sends notifications about flash update status, so implement these messages for monitoring. Signed-off-by: Jiri Pirko Signed-off-by: David Ahern --- devlink/devlink.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/devlink/devlink.c b/devlink/devlink.c index e4b494eb..26520d34 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -443,6 +443,10 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT] = MNL_TYPE_U64, [DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS] = MNL_TYPE_U64, [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = MNL_TYPE_U64, + [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = MNL_TYPE_STRING, + [DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG] = MNL_TYPE_STRING, + [DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE] = MNL_TYPE_U64, + [DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL] = MNL_TYPE_U64, [DEVLINK_ATTR_STATS] = MNL_TYPE_NESTED, [DEVLINK_ATTR_TRAP_NAME] = MNL_TYPE_STRING, [DEVLINK_ATTR_TRAP_ACTION] = MNL_TYPE_U8, @@ -3903,6 +3907,9 @@ static const char *cmd_name(uint8_t cmd) case DEVLINK_CMD_REGION_SET: return "set"; case DEVLINK_CMD_REGION_NEW: return "new"; case DEVLINK_CMD_REGION_DEL: return "del"; + case DEVLINK_CMD_FLASH_UPDATE: return "begin"; + case DEVLINK_CMD_FLASH_UPDATE_END: return "end"; + case DEVLINK_CMD_FLASH_UPDATE_STATUS: return "status"; case DEVLINK_CMD_TRAP_GET: return "get"; case DEVLINK_CMD_TRAP_SET: return "set"; case DEVLINK_CMD_TRAP_NEW: return "new"; @@ -3939,6 +3946,10 @@ static const char *cmd_obj(uint8_t cmd) case DEVLINK_CMD_REGION_NEW: case DEVLINK_CMD_REGION_DEL: return "region"; + case DEVLINK_CMD_FLASH_UPDATE: + case DEVLINK_CMD_FLASH_UPDATE_END: + case DEVLINK_CMD_FLASH_UPDATE_STATUS: + return "flash"; case DEVLINK_CMD_TRAP_GET: case DEVLINK_CMD_TRAP_SET: case DEVLINK_CMD_TRAP_NEW: @@ -3973,6 +3984,29 @@ static bool cmd_filter_check(struct dl *dl, uint8_t cmd) return false; } +static void pr_out_flash_update(struct dl *dl, struct nlattr **tb) +{ + __pr_out_handle_start(dl, tb, true, false); + + if (tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG]) + pr_out_str(dl, "msg", + mnl_attr_get_str(tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG])); + + if (tb[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT]) + pr_out_str(dl, "component", + mnl_attr_get_str(tb[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT])); + + if (tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE]) + pr_out_u64(dl, "done", + mnl_attr_get_u64(tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE])); + + if (tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL]) + pr_out_u64(dl, "total", + mnl_attr_get_u64(tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL])); + + pr_out_handle_end(dl); +} + static void pr_out_region(struct dl *dl, struct nlattr **tb); static void pr_out_trap(struct dl *dl, struct nlattr **tb, bool array); static void pr_out_trap_group(struct dl *dl, struct nlattr **tb, bool array); @@ -4031,6 +4065,15 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) pr_out_mon_header(genl->cmd); pr_out_region(dl, tb); break; + case DEVLINK_CMD_FLASH_UPDATE: /* fall through */ + case DEVLINK_CMD_FLASH_UPDATE_END: /* fall through */ + case DEVLINK_CMD_FLASH_UPDATE_STATUS: + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME]) + return MNL_CB_ERROR; + pr_out_mon_header(genl->cmd); + pr_out_flash_update(dl, tb); + break; case DEVLINK_CMD_TRAP_GET: /* fall through */ case DEVLINK_CMD_TRAP_SET: /* fall through */ case DEVLINK_CMD_TRAP_NEW: /* fall through */ From 9b13cddfe26869a51329203f6866cfbb164ca951 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Sep 2019 13:29:38 +0200 Subject: [PATCH 24/26] devlink: implement flash status monitoring Listen to status notifications coming from kernel during flashing and put them on stdout to inform user about the status. Signed-off-by: Jiri Pirko Signed-off-by: David Ahern --- devlink/devlink.c | 215 ++++++++++++++++++++++++++++++++++++++++- devlink/mnlg.c | 5 + devlink/mnlg.h | 1 + man/man8/devlink-dev.8 | 11 +++ 4 files changed, 228 insertions(+), 4 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 26520d34..5896e22d 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "SNAPSHOT.h" #include "list.h" @@ -96,6 +97,18 @@ pr_out_sp(unsigned int num, const char *fmt, ...) g_new_line_count = 0; \ } +static void __attribute__((format(printf, 1, 2))) +pr_out_tty(const char *fmt, ...) +{ + va_list ap; + + if (!isatty(STDOUT_FILENO)) + return; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + static void __pr_out_indent_inc(void) { if (g_indent_level + INDENT_STR_STEP > INDENT_STR_MAXLEN) @@ -135,9 +148,8 @@ static int _mnlg_socket_recv_run(struct mnlg_socket *nlg, return 0; } -static int _mnlg_socket_sndrcv(struct mnlg_socket *nlg, - const struct nlmsghdr *nlh, - mnl_cb_t data_cb, void *data) +static int _mnlg_socket_send(struct mnlg_socket *nlg, + const struct nlmsghdr *nlh) { int err; @@ -146,6 +158,18 @@ static int _mnlg_socket_sndrcv(struct mnlg_socket *nlg, pr_err("Failed to call mnlg_socket_send\n"); return -errno; } + return 0; +} + +static int _mnlg_socket_sndrcv(struct mnlg_socket *nlg, + const struct nlmsghdr *nlh, + mnl_cb_t data_cb, void *data) +{ + int err; + + err = _mnlg_socket_send(nlg, nlh); + if (err) + return err; return _mnlg_socket_recv_run(nlg, data_cb, data); } @@ -2855,9 +2879,151 @@ static void cmd_dev_flash_help(void) pr_err("Usage: devlink dev flash DEV file PATH [ component NAME ]\n"); } + +struct cmd_dev_flash_status_ctx { + struct dl *dl; + char *last_msg; + char *last_component; + uint8_t not_first:1, + last_pc:1, + received_end:1, + flash_done:1; +}; + +static int nullstrcmp(const char *str1, const char *str2) +{ + if (str1 && str2) + return strcmp(str1, str2); + if (!str1 && !str2) + return 0; + return str1 ? 1 : -1; +} + +static int cmd_dev_flash_status_cb(const struct nlmsghdr *nlh, void *data) +{ + struct cmd_dev_flash_status_ctx *ctx = data; + struct dl_opts *opts = &ctx->dl->opts; + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; + const char *component = NULL; + uint64_t done = 0, total = 0; + const char *msg = NULL; + const char *bus_name; + const char *dev_name; + + if (genl->cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS && + genl->cmd != DEVLINK_CMD_FLASH_UPDATE_END) + return MNL_CB_STOP; + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME]) + return MNL_CB_ERROR; + bus_name = mnl_attr_get_str(tb[DEVLINK_ATTR_BUS_NAME]); + dev_name = mnl_attr_get_str(tb[DEVLINK_ATTR_DEV_NAME]); + if (strcmp(bus_name, opts->bus_name) || + strcmp(dev_name, opts->dev_name)) + return MNL_CB_ERROR; + + if (genl->cmd == DEVLINK_CMD_FLASH_UPDATE_END && ctx->not_first) { + pr_out("\n"); + free(ctx->last_msg); + free(ctx->last_component); + ctx->received_end = 1; + return MNL_CB_STOP; + } + + if (tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG]) + msg = mnl_attr_get_str(tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG]); + if (tb[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT]) + component = mnl_attr_get_str(tb[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT]); + if (tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE]) + done = mnl_attr_get_u64(tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE]); + if (tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL]) + total = mnl_attr_get_u64(tb[DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL]); + + if (!nullstrcmp(msg, ctx->last_msg) && + !nullstrcmp(component, ctx->last_component) && + ctx->last_pc && ctx->not_first) { + pr_out_tty("\b\b\b\b\b"); /* clean percentage */ + } else { + if (ctx->not_first) + pr_out("\n"); + if (component) { + pr_out("[%s] ", component); + free(ctx->last_component); + ctx->last_component = strdup(component); + } + if (msg) { + pr_out("%s", msg); + free(ctx->last_msg); + ctx->last_msg = strdup(msg); + } + } + if (total) { + pr_out_tty(" %3lu%%", (done * 100) / total); + ctx->last_pc = 1; + } else { + ctx->last_pc = 0; + } + fflush(stdout); + ctx->not_first = 1; + + return MNL_CB_STOP; +} + +static int cmd_dev_flash_fds_process(struct cmd_dev_flash_status_ctx *ctx, + struct mnlg_socket *nlg_ntf, + int pipe_r) +{ + int nlfd = mnlg_socket_get_fd(nlg_ntf); + fd_set fds[3]; + int fdmax; + int i; + int err; + int err2; + + for (i = 0; i < 3; i++) + FD_ZERO(&fds[i]); + FD_SET(pipe_r, &fds[0]); + fdmax = pipe_r + 1; + FD_SET(nlfd, &fds[0]); + if (nlfd >= fdmax) + fdmax = nlfd + 1; + + while (select(fdmax, &fds[0], &fds[1], &fds[2], NULL) < 0) { + if (errno == EINTR) + continue; + pr_err("select() failed\n"); + return -errno; + } + if (FD_ISSET(nlfd, &fds[0])) { + err = _mnlg_socket_recv_run(nlg_ntf, + cmd_dev_flash_status_cb, ctx); + if (err) + return err; + } + if (FD_ISSET(pipe_r, &fds[0])) { + err = read(pipe_r, &err2, sizeof(err2)); + if (err == -1) { + pr_err("Failed to read pipe\n"); + return -errno; + } + if (err2) + return err2; + ctx->flash_done = 1; + } + return 0; +} + + static int cmd_dev_flash(struct dl *dl) { + struct cmd_dev_flash_status_ctx ctx = {.dl = dl,}; + struct mnlg_socket *nlg_ntf; struct nlmsghdr *nlh; + int pipe_r, pipe_w; + int pipe_fds[2]; + pid_t pid; int err; if (dl_argv_match(dl, "help") || dl_no_arg(dl)) { @@ -2873,7 +3039,48 @@ static int cmd_dev_flash(struct dl *dl) if (err) return err; - return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); + nlg_ntf = mnlg_socket_open(DEVLINK_GENL_NAME, DEVLINK_GENL_VERSION); + if (!nlg_ntf) + return err; + + err = _mnlg_socket_group_add(nlg_ntf, DEVLINK_GENL_MCGRP_CONFIG_NAME); + if (err) + return err; + + err = pipe(pipe_fds); + if (err == -1) + return -errno; + pipe_r = pipe_fds[0]; + pipe_w = pipe_fds[1]; + + pid = fork(); + if (pid == -1) { + close(pipe_r); + close(pipe_w); + return -errno; + } else if (!pid) { + /* In child, just execute the flash and pass returned + * value through pipe once it is done. + */ + close(pipe_r); + err = _mnlg_socket_send(dl->nlg, nlh); + write(pipe_w, &err, sizeof(err)); + close(pipe_w); + exit(0); + } + close(pipe_w); + + do { + err = cmd_dev_flash_fds_process(&ctx, nlg_ntf, pipe_r); + if (err) + goto out; + } while (!ctx.flash_done || (ctx.not_first && !ctx.received_end)); + + err = _mnlg_socket_recv_run(dl->nlg, NULL, NULL); +out: + close(pipe_r); + mnlg_socket_close(nlg_ntf); + return err; } static int cmd_dev(struct dl *dl) diff --git a/devlink/mnlg.c b/devlink/mnlg.c index ee125df0..c7d25e87 100644 --- a/devlink/mnlg.c +++ b/devlink/mnlg.c @@ -320,3 +320,8 @@ void mnlg_socket_close(struct mnlg_socket *nlg) free(nlg->buf); free(nlg); } + +int mnlg_socket_get_fd(struct mnlg_socket *nlg) +{ + return mnl_socket_get_fd(nlg->nl); +} diff --git a/devlink/mnlg.h b/devlink/mnlg.h index 4d1babf3..61bc5a3f 100644 --- a/devlink/mnlg.h +++ b/devlink/mnlg.h @@ -23,5 +23,6 @@ int mnlg_socket_recv_run(struct mnlg_socket *nlg, mnl_cb_t data_cb, void *data); int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name); struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version); void mnlg_socket_close(struct mnlg_socket *nlg); +int mnlg_socket_get_fd(struct mnlg_socket *nlg); #endif /* _MNLG_H_ */ diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8 index 1804463b..1021ee8d 100644 --- a/man/man8/devlink-dev.8 +++ b/man/man8/devlink-dev.8 @@ -244,6 +244,17 @@ Sets the parameter internal_error_reset of specified devlink device to true. devlink dev reload pci/0000:01:00.0 .RS 4 Performs hot reload of specified devlink device. +.RE +.PP +devlink dev flash pci/0000:01:00.0 file firmware.bin +.RS 4 +Flashes the specified devlink device with provided firmware file name. If the driver supports it, user gets updates about the flash status. For example: +.br +Preparing to flash +.br +Flashing 100% +.br +Flashing done .SH SEE ALSO .BR devlink (8), From c0325b06382cb4f7ebfaf80c29c8800d74666fd9 Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Mon, 16 Sep 2019 15:00:55 +0200 Subject: [PATCH 25/26] bpf: replace snprintf with asprintf when dealing with long buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reduces stack usage, as asprintf allocates memory on the heap. This indirectly fixes a snprintf truncation warning (from gcc v9.2.1): bpf.c: In function ‘bpf_get_work_dir’: bpf.c:784:49: warning: ‘snprintf’ output may be truncated before the last format character [-Wformat-truncation=] 784 | snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt); | ^ bpf.c:784:2: note: ‘snprintf’ output between 2 and 4097 bytes into a destination of size 4096 784 | snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: e42256699cac ("bpf: make tc's bpf loader generic and move into lib") Signed-off-by: Andrea Claudi Signed-off-by: David Ahern --- lib/bpf.c | 155 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 116 insertions(+), 39 deletions(-) diff --git a/lib/bpf.c b/lib/bpf.c index 7d2a322f..0fe2e9eb 100644 --- a/lib/bpf.c +++ b/lib/bpf.c @@ -406,13 +406,21 @@ static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map, struct bpf_map_ext *ext) { unsigned int val, owner_type = 0, owner_jited = 0; - char file[PATH_MAX], buff[4096]; + char *file = NULL; + char buff[4096]; FILE *fp; + int ret; - snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); + ret = asprintf(&file, "/proc/%d/fdinfo/%d", getpid(), fd); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + free(file); + return ret; + } memset(map, 0, sizeof(*map)); fp = fopen(file, "r"); + free(file); if (!fp) { fprintf(stderr, "No procfs support?!\n"); return -EIO; @@ -600,8 +608,9 @@ int bpf_trace_pipe(void) 0, }; int fd_in, fd_out = STDERR_FILENO; - char tpipe[PATH_MAX]; + char *tpipe = NULL; const char *mnt; + int ret; mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt, sizeof(tracefs_mnt), tracefs_known_mnts); @@ -610,9 +619,15 @@ int bpf_trace_pipe(void) return -1; } - snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt); + ret = asprintf(&tpipe, "%s/trace_pipe", mnt); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + free(tpipe); + return ret; + } fd_in = open(tpipe, O_RDONLY); + free(tpipe); if (fd_in < 0) return -1; @@ -633,37 +648,50 @@ int bpf_trace_pipe(void) static int bpf_gen_global(const char *bpf_sub_dir) { - char bpf_glo_dir[PATH_MAX]; + char *bpf_glo_dir = NULL; int ret; - snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/", - bpf_sub_dir, BPF_DIR_GLOBALS); + ret = asprintf(&bpf_glo_dir, "%s/%s/", bpf_sub_dir, BPF_DIR_GLOBALS); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + goto out; + } ret = mkdir(bpf_glo_dir, S_IRWXU); if (ret && errno != EEXIST) { fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir, strerror(errno)); - return ret; + goto out; } - return 0; + ret = 0; +out: + free(bpf_glo_dir); + return ret; } static int bpf_gen_master(const char *base, const char *name) { - char bpf_sub_dir[PATH_MAX + NAME_MAX + 1]; + char *bpf_sub_dir = NULL; int ret; - snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name); + ret = asprintf(&bpf_sub_dir, "%s%s/", base, name); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + goto out; + } ret = mkdir(bpf_sub_dir, S_IRWXU); if (ret && errno != EEXIST) { fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir, strerror(errno)); - return ret; + goto out; } - return bpf_gen_global(bpf_sub_dir); + ret = bpf_gen_global(bpf_sub_dir); +out: + free(bpf_sub_dir); + return ret; } static int bpf_slave_via_bind_mnt(const char *full_name, @@ -692,13 +720,22 @@ static int bpf_slave_via_bind_mnt(const char *full_name, static int bpf_gen_slave(const char *base, const char *name, const char *link) { - char bpf_lnk_dir[PATH_MAX + NAME_MAX + 1]; - char bpf_sub_dir[PATH_MAX + NAME_MAX]; + char *bpf_lnk_dir = NULL; + char *bpf_sub_dir = NULL; struct stat sb = {}; int ret; - snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link); - snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name); + ret = asprintf(&bpf_lnk_dir, "%s%s/", base, link); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + goto out; + } + + ret = asprintf(&bpf_sub_dir, "%s%s", base, name); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + goto out; + } ret = symlink(bpf_lnk_dir, bpf_sub_dir); if (ret) { @@ -706,25 +743,30 @@ static int bpf_gen_slave(const char *base, const char *name, if (errno != EPERM) { fprintf(stderr, "symlink %s failed: %s\n", bpf_sub_dir, strerror(errno)); - return ret; + goto out; } - return bpf_slave_via_bind_mnt(bpf_sub_dir, - bpf_lnk_dir); + ret = bpf_slave_via_bind_mnt(bpf_sub_dir, bpf_lnk_dir); + goto out; } ret = lstat(bpf_sub_dir, &sb); if (ret) { fprintf(stderr, "lstat %s failed: %s\n", bpf_sub_dir, strerror(errno)); - return ret; + goto out; } - if ((sb.st_mode & S_IFMT) != S_IFLNK) - return bpf_gen_global(bpf_sub_dir); + if ((sb.st_mode & S_IFMT) != S_IFLNK) { + ret = bpf_gen_global(bpf_sub_dir); + goto out; + } } - return 0; +out: + free(bpf_lnk_dir); + free(bpf_sub_dir); + return ret; } static int bpf_gen_hierarchy(const char *base) @@ -742,7 +784,7 @@ static int bpf_gen_hierarchy(const char *base) static const char *bpf_get_work_dir(enum bpf_prog_type type) { static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT; - static char bpf_wrk_dir[PATH_MAX]; + static char *bpf_wrk_dir; static const char *mnt; static bool bpf_mnt_cached; const char *mnt_env = getenv(BPF_ENV_MNT); @@ -781,7 +823,12 @@ static const char *bpf_get_work_dir(enum bpf_prog_type type) } } - snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt); + ret = asprintf(&bpf_wrk_dir, "%s/", mnt); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + free(bpf_wrk_dir); + goto out; + } ret = bpf_gen_hierarchy(bpf_wrk_dir); if (ret) { @@ -1438,31 +1485,48 @@ static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx, static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx) { - char tmp[PATH_MAX]; + char *tmp = NULL; int ret; - snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type), - ctx->obj_uid); + ret = asprintf(&tmp, "%s/%s", bpf_get_work_dir(ctx->type), ctx->obj_uid); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + goto out; + } ret = mkdir(tmp, S_IRWXU); if (ret && errno != EEXIST) { fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno)); - return ret; + goto out; } - return 0; + ret = 0; +out: + free(tmp); + return ret; } static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx, const char *todo) { - char tmp[PATH_MAX], rem[PATH_MAX], *sub; + char *tmp = NULL; + char *rem = NULL; + char *sub; int ret; - snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type)); - snprintf(rem, sizeof(rem), "%s/", todo); - sub = strtok(rem, "/"); + ret = asprintf(&tmp, "%s/../", bpf_get_work_dir(ctx->type)); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + goto out; + } + ret = asprintf(&rem, "%s/", todo); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + goto out; + } + + sub = strtok(rem, "/"); while (sub) { if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX) return -EINVAL; @@ -1474,13 +1538,17 @@ static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx, if (ret && errno != EEXIST) { fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno)); - return ret; + goto out; } sub = strtok(NULL, "/"); } - return 0; + ret = 0; +out: + free(rem); + free(tmp); + return ret; } static int bpf_place_pinned(int fd, const char *name, @@ -2581,14 +2649,23 @@ struct bpf_jited_aux { static int bpf_derive_prog_from_fdinfo(int fd, struct bpf_prog_data *prog) { - char file[PATH_MAX], buff[4096]; + char *file = NULL; + char buff[4096]; unsigned int val; FILE *fp; + int ret; + + ret = asprintf(&file, "/proc/%d/fdinfo/%d", getpid(), fd); + if (ret < 0) { + fprintf(stderr, "asprintf failed: %s\n", strerror(errno)); + free(file); + return ret; + } - snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); memset(prog, 0, sizeof(*prog)); fp = fopen(file, "r"); + free(file); if (!fp) { fprintf(stderr, "No procfs support?!\n"); return -EIO; From 88fdbf80303838bb36f1b5e059f780b0335aeb91 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 16 Sep 2019 11:44:48 +0200 Subject: [PATCH 26/26] devlink: add reload failed indication Add indication about previous failed devlink reload. Example outputs: $ devlink dev netdevsim/netdevsim10: reload_failed true $ devlink dev -j -p { "dev": { "netdevsim/netdevsim10": { "reload_failed": true } } } Signed-off-by: Jiri Pirko Signed-off-by: David Ahern --- devlink/devlink.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 5896e22d..fd3f7901 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -478,6 +478,7 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_TRAP_GENERIC] = MNL_TYPE_FLAG, [DEVLINK_ATTR_TRAP_METADATA] = MNL_TYPE_NESTED, [DEVLINK_ATTR_TRAP_GROUP_NAME] = MNL_TYPE_STRING, + [DEVLINK_ATTR_RELOAD_FAILED] = MNL_TYPE_U8, }; static const enum mnl_attr_data_type @@ -1977,11 +1978,6 @@ static void pr_out_region_chunk(struct dl *dl, uint8_t *data, uint32_t len, pr_out_region_chunk_end(dl); } -static void pr_out_dev(struct dl *dl, struct nlattr **tb) -{ - pr_out_handle(dl, tb); -} - static void pr_out_section_start(struct dl *dl, const char *name) { if (dl->json_output) { @@ -2682,11 +2678,23 @@ static int cmd_dev_show_cb(const struct nlmsghdr *nlh, void *data) struct dl *dl = data; struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + uint8_t reload_failed = 0; mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME]) return MNL_CB_ERROR; - pr_out_dev(dl, tb); + + if (tb[DEVLINK_ATTR_RELOAD_FAILED]) + reload_failed = mnl_attr_get_u8(tb[DEVLINK_ATTR_RELOAD_FAILED]); + + if (reload_failed) { + __pr_out_handle_start(dl, tb, true, false); + pr_out_bool(dl, "reload_failed", true); + pr_out_handle_end(dl); + } else { + pr_out_handle(dl, tb); + } + return MNL_CB_OK; } @@ -4237,7 +4245,7 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME]) return MNL_CB_ERROR; pr_out_mon_header(genl->cmd); - pr_out_dev(dl, tb); + pr_out_handle(dl, tb); break; case DEVLINK_CMD_PORT_GET: /* fall through */ case DEVLINK_CMD_PORT_SET: /* fall through */