From 8f0807023d067e2bb585a2ae8da93e59689d10f1 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 12 Feb 2018 20:23:12 +0100 Subject: [PATCH 01/25] lib/namespace: don't try to mount rw /sys over a ro one It will fail with EPERM on Linux 4.15. Signed-off-by: Lubomir Rintel Acked-by: Phil Sutter Signed-off-by: Stephen Hemminger --- lib/namespace.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/namespace.c b/lib/namespace.c index 30b51388..6f3356d0 100644 --- a/lib/namespace.c +++ b/lib/namespace.c @@ -7,6 +7,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include #include @@ -46,6 +47,8 @@ int netns_switch(char *name) { char net_path[PATH_MAX]; int netns; + unsigned long mountflags = 0; + struct statvfs fsstat; snprintf(net_path, sizeof(net_path), "%s/%s", NETNS_RUN_DIR, name); netns = open(net_path, O_RDONLY | O_CLOEXEC); @@ -73,12 +76,25 @@ int netns_switch(char *name) strerror(errno)); return -1; } + /* Mount a version of /sys that describes the network namespace */ - if (umount2("/sys", MNT_DETACH) < 0) { - fprintf(stderr, "umount of /sys failed: %s\n", strerror(errno)); + + if (statvfs("/sys", &fsstat) < 0) { + fprintf(stderr, "could not stat /sys (not mounted?): %s\n",strerror(errno)); return -1; } - if (mount(name, "/sys", "sysfs", 0, NULL) < 0) { + if (fsstat.f_flag & ST_RDONLY) { + /* If /sys is not writable (e.g. in a container), we can't + * unmount the old /sys instance, but we can still mount a new + * read-only instance over it. */ + mountflags = MS_RDONLY; + } else { + if (umount2("/sys", MNT_DETACH) < 0) { + fprintf(stderr, "umount of /sys failed: %s\n", strerror(errno)); + return -1; + } + } + if (mount(name, "/sys", "sysfs", mountflags, NULL) < 0) { fprintf(stderr, "mount of /sys failed: %s\n",strerror(errno)); return -1; } From 7bdd62394883a0c38489da105996c35c9bacf18d Mon Sep 17 00:00:00 2001 From: Timothy Redaelli Date: Mon, 19 Feb 2018 17:13:06 +0100 Subject: [PATCH 02/25] bridge: Prevent a double space in bridge mdb show Prevent a double space in "bridge mdb show" when the MDB entry is not marked as "offload". Signed-off-by: Timothy Redaelli Signed-off-by: Stephen Hemminger --- bridge/mdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridge/mdb.c b/bridge/mdb.c index 62dc8a0c..58c20b82 100644 --- a/bridge/mdb.c +++ b/bridge/mdb.c @@ -166,12 +166,12 @@ static void print_mdb_entry(FILE *f, int ifindex, struct br_mdb_entry *e, if (mdb_flags) jsonw_end_array(jw_global); } else{ - fprintf(f, "dev %s port %s grp %s %s %s", + fprintf(f, "dev %s port %s grp %s %s%s", ll_index_to_name(ifindex), ll_index_to_name(e->ifindex), inet_ntop(af, src, abuf, sizeof(abuf)), (e->state & MDB_PERMANENT) ? "permanent" : "temp", - (e->flags & MDB_FLAGS_OFFLOAD) ? "offload" : ""); + (e->flags & MDB_FLAGS_OFFLOAD) ? " offload" : ""); } if (e->vid) { if (jw_global) From 2fb854d07cb80fe11593b9da8f34feb62b7e401e Mon Sep 17 00:00:00 2001 From: Adam Vyskovsky Date: Sun, 18 Feb 2018 20:50:10 +0100 Subject: [PATCH 03/25] tc: fix an off-by-one error while printing tc actions The tc_print_action() function did not print all tc actions when e.g. TCA_ACT_MAX_PRIO actions were defined for a single tc filter. Signed-off-by: Adam Vyskovsky Signed-off-by: Stephen Hemminger --- tc/m_action.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tc/m_action.c b/tc/m_action.c index 445d0b69..148f1372 100644 --- a/tc/m_action.c +++ b/tc/m_action.c @@ -368,7 +368,7 @@ tc_print_action(FILE *f, const struct rtattr *arg, unsigned short tot_acts) return tc_print_action_flush(f, tb[0]); open_json_array(PRINT_JSON, "actions"); - for (i = 0; i < tot_acts; i++) { + for (i = 0; i <= tot_acts; i++) { if (tb[i]) { open_json_object(NULL); print_uint(PRINT_ANY, "order", From 1ca4341d2c6bcdaf19fb7e3cdd0b39207a29941d Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Wed, 21 Feb 2018 00:28:04 +0100 Subject: [PATCH 04/25] color: disable color when json output is requested Instead of declaring -color and -json exclusive, ignore -color when -json is provided. The rationale is to allow to put -color in an alias for ip while still being able to use -json. -color is merely a presentation suggestion and we can assume there is nothing to color in the JSON output. Signed-off-by: Vincent Bernat Signed-off-by: Stephen Hemminger --- include/color.h | 1 - ip/ip.c | 7 ++++--- lib/color.c | 8 -------- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/include/color.h b/include/color.h index f6c351b7..c80359d3 100644 --- a/include/color.h +++ b/include/color.h @@ -13,7 +13,6 @@ enum color_attr { }; void enable_color(void); -void check_if_color_enabled(void); void set_color_palette(void); int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...); enum color_attr ifa_family_color(__u8 ifa_family); diff --git a/ip/ip.c b/ip/ip.c index ee0d6346..e0cd96cb 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -172,6 +172,7 @@ int main(int argc, char **argv) { char *basename; char *batch_file = NULL; + int color = 0; basename = strrchr(argv[0], '/'); if (basename == NULL) @@ -269,7 +270,7 @@ int main(int argc, char **argv) } rcvbuf = size; } else if (matches(opt, "-color") == 0) { - enable_color(); + ++color; } else if (matches(opt, "-help") == 0) { usage(); } else if (matches(opt, "-netns") == 0) { @@ -289,8 +290,8 @@ int main(int argc, char **argv) _SL_ = oneline ? "\\" : "\n"; - if (json) - check_if_color_enabled(); + if (color && !json) + enable_color(); if (batch_file) return batch(batch_file); diff --git a/lib/color.c b/lib/color.c index a13a4930..da1f516c 100644 --- a/lib/color.c +++ b/lib/color.c @@ -92,14 +92,6 @@ void set_color_palette(void) is_dark_bg = 1; } -void check_if_color_enabled(void) -{ - if (color_is_enabled) { - fprintf(stderr, "Option \"-json\" conflicts with \"-color\".\n"); - exit(1); - } -} - int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...) { int ret = 0; From a883dd8b063699deaadfc9f0dae30cf53c0f1885 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 21 Feb 2018 19:22:14 -0800 Subject: [PATCH 05/25] README: re-add updated information link The "Information" link was removed from README file in commit d7843207e6fd ("README: update location of git repositories, remove broken info link"), because it redirected to a page that no longer existed on the Linux Foundation wiki. This page has just been restored, so we can add the link back again. Since the previous link was a redirection, use the updated link instead. Thanks to Luca Boccassi for investigating this issue, restoring and updating the page. Signed-off-by: Quentin Monnet --- README | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README b/README index 1b7f4427..f66fd5fa 100644 --- a/README +++ b/README @@ -1,5 +1,8 @@ This is a set of utilities for Linux networking. +Information: + https://wiki.linuxfoundation.org/networking/iproute2 + Download: http://www.kernel.org/pub/linux/utils/net/iproute2/ From 97352f1b334f3dc73882e387e2fcaf6e3342e41b Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Fri, 16 Feb 2018 04:11:20 +0900 Subject: [PATCH 06/25] ss: prepare rth when killing inet sock kill_inet_sock() expects rhn_handle instance is passed via inet_diag_arg argument. However on the following calling path: generic_show_sock => show_one_inet_sock => kill_inet_sock rth field of inet_diag_arg is not filled with the address of rhn_handle instance. As the result ss crashes. This commit fills the field with newly created rhn_handle instance. Changes in v2: Instead of creating rtn_handle instances for each socket, create one in upper layer and reuse it. Signed-off-by: Masatake YAMATO Signed-off-by: Stephen Hemminger --- misc/ss.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/misc/ss.c b/misc/ss.c index 29a25070..e047f9c0 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -239,6 +239,7 @@ struct filter { uint64_t families; struct ssfilter *f; bool kill; + struct rtnl_handle *rth_for_killing; }; #define FAMILY_MASK(family) ((uint64_t)1 << (family)) @@ -4262,6 +4263,7 @@ static int generic_show_sock(const struct sockaddr_nl *addr, switch (r->sdiag_family) { case AF_INET: case AF_INET6: + inet_arg.rth = inet_arg.f->rth_for_killing; return show_one_inet_sock(addr, nlh, &inet_arg); case AF_UNIX: return unix_show_sock(addr, nlh, arg); @@ -4280,7 +4282,7 @@ static int handle_follow_request(struct filter *f) { int ret = 0; int groups = 0; - struct rtnl_handle rth; + struct rtnl_handle rth, rth2; if (f->families & FAMILY_MASK(AF_INET) && f->dbs & (1 << TCP_DB)) groups |= 1 << (SKNLGRP_INET_TCP_DESTROY - 1); @@ -4300,10 +4302,20 @@ static int handle_follow_request(struct filter *f) rth.dump = 0; rth.local.nl_pid = 0; + if (f->kill) { + if (rtnl_open_byproto(&rth2, groups, NETLINK_SOCK_DIAG)) { + rtnl_close(&rth); + return -1; + } + f->rth_for_killing = &rth2; + } + if (rtnl_dump_filter(&rth, generic_show_sock, f)) ret = -1; rtnl_close(&rth); + if (f->rth_for_killing) + rtnl_close(f->rth_for_killing); return ret; } From 844646a52837f4e37f9736bfb5a9b935e3cb7318 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Wed, 14 Feb 2018 10:55:16 +0200 Subject: [PATCH 07/25] devlink: Change empty line indication with indentations Currently multi-line objects are separated by new-lines. This patch changes this behavior by using indentations for separation. Signed-off-by: Arkadi Sharhsevsky Acked-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 220b2bb2..8e089f80 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -35,6 +35,8 @@ #define ESWITCH_INLINE_MODE_NETWORK "network" #define ESWITCH_INLINE_MODE_TRANSPORT "transport" +static int g_new_line_count; + #define pr_err(args...) fprintf(stderr, ##args) #define pr_out(args...) \ do { \ @@ -43,6 +45,7 @@ g_indent_newline = false; \ } \ fprintf(stdout, ##args); \ + g_new_line_count = 0; \ } while (0) #define pr_out_sp(num, args...) \ @@ -50,6 +53,7 @@ int ret = fprintf(stdout, ##args); \ if (ret < num) \ fprintf(stdout, "%*s", num - ret, ""); \ + g_new_line_count = 0; \ } while (0) static int g_indent_level; @@ -77,8 +81,11 @@ static void __pr_out_indent_dec(void) static void __pr_out_newline(void) { - pr_out("\n"); - g_indent_newline = true; + if (g_new_line_count < 1) { + pr_out("\n"); + g_indent_newline = true; + } + g_new_line_count++; } static int _mnlg_socket_recv_run(struct mnlg_socket *nlg, @@ -1401,20 +1408,22 @@ static void pr_out_array_start(struct dl *dl, const char *name) jsonw_name(dl->jw, name); jsonw_start_array(dl->jw); } else { - if (!g_indent_newline) - __pr_out_newline(); - pr_out("%s:", name); - __pr_out_newline(); __pr_out_indent_inc(); + __pr_out_newline(); + pr_out("%s:", name); + __pr_out_indent_inc(); + __pr_out_newline(); } } static void pr_out_array_end(struct dl *dl) { - if (dl->json_output) + if (dl->json_output) { jsonw_end_array(dl->jw); - else + } else { __pr_out_indent_dec(); + __pr_out_indent_dec(); + } } static void pr_out_entry_start(struct dl *dl) From 049c58539f5d4037b06aba085356a4a000308b33 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Wed, 14 Feb 2018 10:55:17 +0200 Subject: [PATCH 08/25] devlink: mnlg: Add support for extended ack Add support for extended ack. Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- devlink/mnlg.c | 53 ++++++++++++++++++++++++++++++++++++++++++-- include/libnetlink.h | 1 + lib/libnetlink.c | 4 ++-- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/devlink/mnlg.c b/devlink/mnlg.c index 9e27de27..37c56873 100644 --- a/devlink/mnlg.c +++ b/devlink/mnlg.c @@ -18,6 +18,8 @@ #include #include +#include "libnetlink.h" +#include "utils.h" #include "mnlg.h" struct mnlg_socket { @@ -60,6 +62,39 @@ int mnlg_socket_send(struct mnlg_socket *nlg, const struct nlmsghdr *nlh) return mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len); } +static int mnlg_cb_noop(const struct nlmsghdr *nlh, void *data) +{ + return MNL_CB_OK; +} + +static int mnlg_cb_error(const struct nlmsghdr *nlh, void *data) +{ + const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh); + + if (nl_dump_ext_ack(nlh, NULL)) + return MNL_CB_STOP; + + /* Netlink subsystems returns the errno value with different signess */ + if (err->error < 0) + errno = -err->error; + else + errno = err->error; + + return err->error == 0 ? MNL_CB_STOP : MNL_CB_ERROR; +} + +static int mnlg_cb_stop(const struct nlmsghdr *nlh, void *data) +{ + return MNL_CB_STOP; +} + +static mnl_cb_t mnlg_cb_array[NLMSG_MIN_TYPE] = { + [NLMSG_NOOP] = mnlg_cb_noop, + [NLMSG_ERROR] = mnlg_cb_error, + [NLMSG_DONE] = mnlg_cb_stop, + [NLMSG_OVERRUN] = mnlg_cb_noop, +}; + int mnlg_socket_recv_run(struct mnlg_socket *nlg, mnl_cb_t data_cb, void *data) { int err; @@ -69,8 +104,9 @@ int mnlg_socket_recv_run(struct mnlg_socket *nlg, mnl_cb_t data_cb, void *data) MNL_SOCKET_BUFFER_SIZE); if (err <= 0) break; - err = mnl_cb_run(nlg->buf, err, nlg->seq, nlg->portid, - data_cb, data); + err = mnl_cb_run2(nlg->buf, err, nlg->seq, nlg->portid, + data_cb, data, mnlg_cb_array, + ARRAY_SIZE(mnlg_cb_array)); } while (err > 0); return err; @@ -220,6 +256,7 @@ struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version) { struct mnlg_socket *nlg; struct nlmsghdr *nlh; + int one = 1; int err; nlg = malloc(sizeof(*nlg)); @@ -234,6 +271,16 @@ struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version) if (!nlg->nl) goto err_mnl_socket_open; + err = mnl_socket_setsockopt(nlg->nl, NETLINK_CAP_ACK, &one, + sizeof(one)); + if (err) + goto err_mnl_set_ack; + + err = mnl_socket_setsockopt(nlg->nl, NETLINK_EXT_ACK, &one, + sizeof(one)); + if (err) + goto err_mnl_set_ext_ack; + err = mnl_socket_bind(nlg->nl, 0, MNL_SOCKET_AUTOPID); if (err < 0) goto err_mnl_socket_bind; @@ -258,6 +305,8 @@ struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version) err_mnlg_socket_recv_run: err_mnlg_socket_send: err_mnl_socket_bind: +err_mnl_set_ext_ack: +err_mnl_set_ack: mnl_socket_close(nlg->nl); err_mnl_socket_open: free(nlg->buf); diff --git a/include/libnetlink.h b/include/libnetlink.h index d6322190..9d9249e6 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -109,6 +109,7 @@ int rtnl_send(struct rtnl_handle *rth, const void *buf, int) __attribute__((warn_unused_result)); int rtnl_send_check(struct rtnl_handle *rth, const void *buf, int) __attribute__((warn_unused_result)); +int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn); int addattr(struct nlmsghdr *n, int maxlen, int type); int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data); diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 7ca47b22..8bb1c8d7 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -65,7 +65,7 @@ static int err_attr_cb(const struct nlattr *attr, void *data) } /* dump netlink extended ack error message */ -static int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn) +int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn) { struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {}; const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh); @@ -120,7 +120,7 @@ static int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn) #warning "libmnl required for error support" /* No extended error ack without libmnl */ -static int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn) +int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn) { return 0; } From 8cd644095842af3107320e86eeb01be6af6c77bb Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Wed, 14 Feb 2018 10:55:18 +0200 Subject: [PATCH 09/25] devlink: Add support for devlink resource abstraction Add support for devlink resource abstraction. The resources are represented by a tree based structure and are identified by a name and a size. Some resources can present their real time occupancy. First the resources exposed by the driver can be observed, for example: $devlink resource show pci/0000:03:00.0 pci/0000:03:00.0: name kvd size 245760 unit entry resources: name linear size 98304 occ 0 unit entry size_min 0 size_max 147456 size_gran 128 name hash_double size 60416 unit entry size_min 32768 size_max 180224 size_gran 128 name hash_single size 87040 unit entry size_min 65536 size_max 212992 size_gran 128 Some resource's size can be changed. Examples: $devlink resource set pci/0000:03:00.0 path /kvd/hash_single size 73088 $devlink resource set pci/0000:03:00.0 path /kvd/hash_double size 74368 The changes do not apply immediately, this can be validate by the 'size_new' attribute, which represents the pending changed size. For example $devlink resource show pci/0000:03:00.0 pci/0000:03:00.0: name kvd size 245760 unit entry size_valid false resources: name linear size 98304 size_new 147456 occ 0 unit entry size_min 0 size_max 147456 size_gran 128 name hash_double size 60416 unit entry size_min 32768 size_max 180224 size_gran 128 name hash_single size 87040 unit entry size_min 65536 size_max 212992 size_gran 128 In case of a pending change the nested resources present an indication for a valid configuration of its children (sum of its children sizes doesn't exceed the parent's size). In order for the changes to take place hot reload is needed. The hot reload through devlink will be introduced in the following patch. Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 490 +++++++++++++++++++++++++++++++++++++++++++++- include/list.h | 5 + 2 files changed, 494 insertions(+), 1 deletion(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 8e089f80..51b9bf9a 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -185,6 +185,8 @@ static void ifname_map_free(struct ifname_map *ifname_map) #define DL_OPT_DPIPE_TABLE_NAME BIT(13) #define DL_OPT_DPIPE_TABLE_COUNTERS BIT(14) #define DL_OPT_ESWITCH_ENCAP_MODE BIT(15) +#define DL_OPT_RESOURCE_PATH BIT(16) +#define DL_OPT_RESOURCE_SIZE BIT(17) struct dl_opts { uint32_t present; /* flags of present items */ @@ -205,6 +207,10 @@ struct dl_opts { const char *dpipe_table_name; bool dpipe_counters_enable; bool eswitch_encap_mode; + const char *resource_path; + uint32_t resource_size; + uint32_t resource_id; + bool resource_id_valid; }; struct dl { @@ -953,6 +959,20 @@ static int dl_argv_parse(struct dl *dl, uint32_t o_required, if (err) return err; o_found |= DL_OPT_ESWITCH_ENCAP_MODE; + } else if (dl_argv_match(dl, "path") && + (o_all & DL_OPT_RESOURCE_PATH)) { + dl_arg_inc(dl); + err = dl_argv_str(dl, &opts->resource_path); + if (err) + return err; + o_found |= DL_OPT_RESOURCE_PATH; + } else if (dl_argv_match(dl, "size") && + (o_all & DL_OPT_RESOURCE_SIZE)) { + dl_arg_inc(dl); + err = dl_argv_uint32_t(dl, &opts->resource_size); + if (err) + return err; + o_found |= DL_OPT_RESOURCE_SIZE; } else { pr_err("Unknown option \"%s\"\n", dl_argv(dl)); return -EINVAL; @@ -1095,6 +1115,12 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) if (opts->present & DL_OPT_ESWITCH_ENCAP_MODE) mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, opts->eswitch_encap_mode); + if ((opts->present & DL_OPT_RESOURCE_PATH) && opts->resource_id_valid) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_ID, + opts->resource_id); + if (opts->present & DL_OPT_RESOURCE_SIZE) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, + opts->resource_size); } static int dl_argv_parse_put(struct nlmsghdr *nlh, struct dl *dl, @@ -2684,6 +2710,91 @@ struct dpipe_header { unsigned int fields_count; }; +struct resource { + char *name; + uint64_t size; + uint64_t size_new; + uint64_t size_min; + uint64_t size_max; + uint64_t size_gran; + enum devlink_resource_unit unit; + bool size_valid; + uint64_t size_occ; + bool occ_valid; + uint64_t id; + struct list_head list; + struct list_head resource_list; + struct resource *parent; +}; + +struct resources { + struct list_head resource_list; +}; + +struct resource_ctx { + struct dl *dl; + int err; + struct resources *resources; + bool print_resources; + bool pending_change; +}; + +static struct resource *resource_alloc(void) +{ + struct resource *resource; + + resource = calloc(1, sizeof(struct resource)); + if (!resource) + return NULL; + INIT_LIST_HEAD(&resource->resource_list); + return resource; +} + +static void resource_free(struct resource *resource) +{ + struct resource *child_resource, *tmp; + + list_for_each_entry_safe(child_resource, tmp, &resource->resource_list, + list) { + free(child_resource->name); + resource_free(child_resource); + } + free(resource); +} + +static struct resources *resources_alloc(void) +{ + struct resources *resources; + + resources = calloc(1, sizeof(struct resources)); + if (!resources) + return NULL; + INIT_LIST_HEAD(&resources->resource_list); + return resources; +} + +static void resources_free(struct resources *resources) +{ + struct resource *resource, *tmp; + + list_for_each_entry_safe(resource, tmp, &resources->resource_list, list) + resource_free(resource); +} + +static int resource_ctx_init(struct resource_ctx *ctx, struct dl *dl) +{ + ctx->resources = resources_alloc(); + if (!ctx->resources) + return -ENOMEM; + ctx->dl = dl; + return 0; +} + +static void resource_ctx_fini(struct resource_ctx *ctx) +{ + resources_free(ctx->resources); +} + struct dpipe_ctx { struct dl *dl; int err; @@ -3248,6 +3359,66 @@ err_match_parse: return -EINVAL; } +static struct resource * +resource_find(struct resources *resources, struct resource *resource, + uint64_t resource_id) +{ + struct list_head *list_head; + + if (!resource) + list_head = &resources->resource_list; + else + list_head = &resource->resource_list; + + list_for_each_entry(resource, list_head, list) { + struct resource *child_resource; + + if (resource->id == resource_id) + return resource; + + child_resource = resource_find(resources, resource, + resource_id); + if (child_resource) + return child_resource; + } + return NULL; +} + +static void +resource_path_print(struct dl *dl, struct resources *resources, + uint64_t resource_id) +{ + struct resource *resource, *parent_resource; + const char del[] = "/"; + int path_len = 0; + char *path; + + resource = resource_find(resources, NULL, resource_id); + if (!resource) + return; + + for (parent_resource = resource; parent_resource; + parent_resource = parent_resource->parent) + path_len += strlen(parent_resource->name) + 1; + + path_len++; + path = calloc(1, path_len); + if (!path) + return; + + path += path_len - 1; + for (parent_resource = resource; parent_resource; + parent_resource = parent_resource->parent) { + path -= strlen(parent_resource->name); + memcpy(path, parent_resource->name, + strlen(parent_resource->name)); + path -= strlen(del); + memcpy(path, del, strlen(del)); + } + pr_out_str(dl, "resource_path", path); + free(path); +} + static int dpipe_table_show(struct dpipe_ctx *ctx, struct nlattr *nl) { struct nlattr *nla_table[DEVLINK_ATTR_MAX + 1] = {}; @@ -3809,11 +3980,325 @@ static int cmd_dpipe(struct dl *dl) return -ENOENT; } +static int +resource_parse(struct resource_ctx *ctx, struct resource *resource, + struct nlattr **nla_resource) +{ + if (!nla_resource[DEVLINK_ATTR_RESOURCE_NAME] || + !nla_resource[DEVLINK_ATTR_RESOURCE_SIZE] || + !nla_resource[DEVLINK_ATTR_RESOURCE_ID] || + !nla_resource[DEVLINK_ATTR_RESOURCE_UNIT] || + !nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_MIN] || + !nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_MAX] || + !nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_GRAN]) { + return -EINVAL; + } + + resource->name = strdup(mnl_attr_get_str(nla_resource[DEVLINK_ATTR_RESOURCE_NAME])); + resource->size = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE]); + resource->id = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_ID]); + resource->unit = mnl_attr_get_u8(nla_resource[DEVLINK_ATTR_RESOURCE_UNIT]); + resource->size_min = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_MIN]); + resource->size_max = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_MAX]); + resource->size_gran = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_GRAN]); + + if (nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_NEW]) + resource->size_new = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_NEW]); + else + resource->size_new = resource->size; + + if (nla_resource[DEVLINK_ATTR_RESOURCE_OCC]) { + resource->size_occ = mnl_attr_get_u64(nla_resource[DEVLINK_ATTR_RESOURCE_OCC]); + resource->occ_valid = true; + } + + if (resource->size_new != resource->size) + ctx->pending_change = true; + + return 0; +} + +static int +resource_get(struct resource_ctx *ctx, struct resource *resource, + struct resource *parent_resource, struct nlattr *nl) +{ + struct nlattr *nla_resource[DEVLINK_ATTR_MAX + 1] = {}; + struct nlattr *nla_child_resource; + struct nlattr *nla_resources; + bool top = false; + int err; + + if (!resource) { + nla_resources = nl; + top = true; + goto out; + } + + err = mnl_attr_parse_nested(nl, attr_cb, nla_resource); + if (err != MNL_CB_OK) + return -EINVAL; + + err = resource_parse(ctx, resource, nla_resource); + if (err) + return err; + + resource->parent = parent_resource; + if (!nla_resource[DEVLINK_ATTR_RESOURCE_LIST]) + return 0; + + resource->size_valid = !!mnl_attr_get_u8(nla_resource[DEVLINK_ATTR_RESOURCE_SIZE_VALID]); + nla_resources = nla_resource[DEVLINK_ATTR_RESOURCE_LIST]; +out: + mnl_attr_for_each_nested(nla_child_resource, nla_resources) { + struct resource *child_resource; + struct list_head *list; + + child_resource = resource_alloc(); + if (!child_resource) + return -ENOMEM; + + if (top) + list = &ctx->resources->resource_list; + else + list = &resource->resource_list; + + list_add_tail(&child_resource->list, list); + err = resource_get(ctx, child_resource, resource, + nla_child_resource); + if (err) + return err; + } + + return 0; +} + +static const char *resource_unit_str_get(enum devlink_resource_unit unit) +{ + switch (unit) { + case DEVLINK_RESOURCE_UNIT_ENTRY: return "entry"; + default: return ""; + } +} + +static void resource_show(struct resource *resource, + struct resource_ctx *ctx) +{ + struct resource *child_resource; + struct dl *dl = ctx->dl; + + pr_out_str(dl, "name", resource->name); + if (dl->verbose) + resource_path_print(dl, ctx->resources, resource->id); + pr_out_uint(dl, "size", resource->size); + if (resource->size != resource->size_new) + pr_out_uint(dl, "size_new", resource->size_new); + if (resource->occ_valid) + pr_out_uint(dl, "occ", resource->size_occ); + pr_out_str(dl, "unit", resource_unit_str_get(resource->unit)); + + if (resource->size_min != resource->size_max) { + pr_out_uint(dl, "size_min", resource->size_min); + pr_out_uint(dl, "size_max", resource->size_max); + pr_out_uint(dl, "size_gran", resource->size_gran); + } + + if (list_empty(&resource->resource_list)) + return; + + if (ctx->pending_change) + pr_out_str(dl, "size_valid", resource->size_valid ? + "true" : "false"); + pr_out_array_start(dl, "resources"); + list_for_each_entry(child_resource, &resource->resource_list, list) { + pr_out_entry_start(dl); + resource_show(child_resource, ctx); + pr_out_entry_end(dl); + } + pr_out_array_end(dl); +} + +static void +resources_show(struct resource_ctx *ctx, struct nlattr **tb) +{ + struct resources *resources = ctx->resources; + struct resource *resource; + + list_for_each_entry(resource, &resources->resource_list, list) { + pr_out_handle_start_arr(ctx->dl, tb); + resource_show(resource, ctx); + pr_out_handle_end(ctx->dl); + } +} + +static int resources_get(struct resource_ctx *ctx, struct nlattr **tb) +{ + return resource_get(ctx, NULL, NULL, tb[DEVLINK_ATTR_RESOURCE_LIST]); +} + +static int cmd_resource_dump_cb(const struct nlmsghdr *nlh, void *data) +{ + struct resource_ctx *ctx = data; + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + int err; + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_RESOURCE_LIST]) + return MNL_CB_ERROR; + + err = resources_get(ctx, tb); + if (err) { + ctx->err = err; + return MNL_CB_ERROR; + } + + if (ctx->print_resources) + resources_show(ctx, tb); + + return MNL_CB_OK; +} + +static int cmd_resource_show(struct dl *dl) +{ + struct nlmsghdr *nlh; + struct resource_ctx ctx = {}; + int err; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RESOURCE_DUMP, + NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, 0); + if (err) + return err; + + err = resource_ctx_init(&ctx, dl); + if (err) + return err; + + ctx.print_resources = true; + pr_out_section_start(dl, "resources"); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_resource_dump_cb, &ctx); + pr_out_section_end(dl); + resource_ctx_fini(&ctx); + return err; +} + +static void cmd_resource_help(void) +{ + pr_err("Usage: devlink resource show DEV\n" + " devlink resource set DEV path PATH size SIZE\n"); +} + +static struct resource * +resource_find_by_name(struct list_head *list, char *name) +{ + struct resource *resource; + + list_for_each_entry(resource, list, list) { + if (!strcmp(resource->name, name)) + return resource; + } + return NULL; +} + +static int +resource_path_parse(struct resource_ctx *ctx, const char *resource_path, + uint32_t *p_resource_id, bool *p_resource_valid) +{ + struct resource *resource; + uint32_t resource_id = 0; + char *resource_path_dup; + struct list_head *list; + const char del[] = "/"; + char *resource_name; + + resource_path_dup = strdup(resource_path); + list = &ctx->resources->resource_list; + resource_name = strtok(resource_path_dup, del); + while (resource_name != NULL) { + resource = resource_find_by_name(list, resource_name); + if (!resource) + goto err_resource_lookup; + + list = &resource->resource_list; + resource_name = strtok(NULL, del); + resource_id = resource->id; + } + free(resource_path_dup); + *p_resource_valid = true; + *p_resource_id = resource_id; + return 0; + +err_resource_lookup: + free(resource_path_dup); + return -EINVAL; +} + +static int cmd_resource_set(struct dl *dl) +{ + struct nlmsghdr *nlh; + struct resource_ctx ctx = {}; + int err; + + err = resource_ctx_init(&ctx, dl); + if (err) + return err; + + ctx.print_resources = false; + err = dl_argv_parse(dl, DL_OPT_HANDLE | DL_OPT_RESOURCE_PATH | + DL_OPT_RESOURCE_SIZE, 0); + if (err) + goto out; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RESOURCE_DUMP, + NLM_F_REQUEST); + dl_opts_put(nlh, dl); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_resource_dump_cb, &ctx); + if (err) { + pr_err("error getting resources %s\n", strerror(ctx.err)); + goto out; + } + + err = resource_path_parse(&ctx, dl->opts.resource_path, + &dl->opts.resource_id, + &dl->opts.resource_id_valid); + if (err) { + pr_err("error parsing resource path %s\n", strerror(err)); + goto out; + } + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RESOURCE_SET, + NLM_F_REQUEST | NLM_F_ACK); + + dl_opts_put(nlh, dl); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +out: + resource_ctx_fini(&ctx); + return err; +} + +static int cmd_resource(struct dl *dl) +{ + if (dl_argv_match(dl, "help") || dl_no_arg(dl)) { + cmd_resource_help(); + return 0; + } else if (dl_argv_match(dl, "show")) { + dl_arg_inc(dl); + return cmd_resource_show(dl); + } else if (dl_argv_match(dl, "set")) { + dl_arg_inc(dl); + return cmd_resource_set(dl); + } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +} + static void help(void) { pr_err("Usage: devlink [ OPTIONS ] OBJECT { COMMAND | help }\n" " devlink [ -f[orce] ] -b[atch] filename\n" - "where OBJECT := { dev | port | sb | monitor | dpipe }\n" + "where OBJECT := { dev | port | sb | monitor | dpipe | resource }\n" " OPTIONS := { -V[ersion] | -n[no-nice-names] | -j[json] | -p[pretty] | -v[verbose] }\n"); } @@ -3840,6 +4325,9 @@ static int dl_cmd(struct dl *dl, int argc, char **argv) } else if (dl_argv_match(dl, "dpipe")) { dl_arg_inc(dl); return cmd_dpipe(dl); + } else if (dl_argv_match(dl, "resource")) { + dl_arg_inc(dl); + return cmd_resource(dl); } pr_err("Object \"%s\" not found\n", dl_argv(dl)); return -ENOENT; diff --git a/include/list.h b/include/list.h index 5af737c7..5d86b131 100644 --- a/include/list.h +++ b/include/list.h @@ -108,6 +108,11 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) n->pprev = &h->first; } +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + #define hlist_for_each(pos, head) \ for (pos = (head)->first; pos ; pos = pos->next) From 06dd94f952e50edeffe5ea8b7b95b5cd562b9365 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Wed, 14 Feb 2018 10:55:19 +0200 Subject: [PATCH 10/25] devlink: Add support for hot reload Add support for hot reload. It should be used in order for resource updates to take place. Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/devlink/devlink.c b/devlink/devlink.c index 51b9bf9a..96b33e98 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -1179,6 +1179,7 @@ static void cmd_dev_help(void) pr_err(" [ inline-mode { none | link | network | transport } ]\n"); pr_err(" [ encap { disable | enable } ]\n"); pr_err(" devlink dev eswitch show DEV\n"); + pr_err(" devlink dev reload DEV\n"); } static bool cmp_arr_last_handle(struct dl *dl, const char *bus_name, @@ -1620,6 +1621,31 @@ static int cmd_dev_show(struct dl *dl) return err; } +static void cmd_dev_reload_help(void) +{ + pr_err("Usage: devlink dev reload [ DEV ]\n"); +} + +static int cmd_dev_reload(struct dl *dl) +{ + struct nlmsghdr *nlh; + int err; + + if (dl_argv_match(dl, "help") || dl_no_arg(dl)) { + cmd_dev_reload_help(); + return 0; + } + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RELOAD, + NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, 0); + if (err) + return err; + + return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +} + static int cmd_dev(struct dl *dl) { if (dl_argv_match(dl, "help")) { @@ -1632,6 +1658,9 @@ static int cmd_dev(struct dl *dl) } else if (dl_argv_match(dl, "eswitch")) { dl_arg_inc(dl); return cmd_dev_eswitch(dl); + } else if (dl_argv_match(dl, "reload")) { + dl_arg_inc(dl); + return cmd_dev_reload(dl); } pr_err("Command \"%s\" not found\n", dl_argv(dl)); return -ENOENT; From 06a2cda9b0636fa97abe41035aaff39bd97f0414 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Wed, 14 Feb 2018 10:55:20 +0200 Subject: [PATCH 11/25] devlink: Move dpipe context from heap to stack Move dpipe context to stack instead of dynamically. Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 67 +++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 40 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 96b33e98..70521823 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -2882,25 +2882,15 @@ static void dpipe_header_del(struct dpipe_header *header) list_del(&header->list); } -static struct dpipe_ctx *dpipe_ctx_alloc(struct dl *dl) +static int dpipe_ctx_init(struct dpipe_ctx *ctx, struct dl *dl) { - struct dpipe_ctx *ctx; - - ctx = calloc(1, sizeof(struct dpipe_ctx)); - if (!ctx) - return NULL; ctx->dl = dl; INIT_LIST_HEAD(&ctx->global_headers); INIT_LIST_HEAD(&ctx->local_headers); - return ctx; + return 0; } -static void dpipe_ctx_free(struct dpipe_ctx *ctx) -{ - free(ctx); -} - -static void dpipe_ctx_clear(struct dpipe_ctx *ctx) +static void dpipe_ctx_fini(struct dpipe_ctx *ctx) { struct dpipe_header *header, *tmp; @@ -3171,7 +3161,7 @@ static int cmd_dpipe_header_cb(const struct nlmsghdr *nlh, void *data) static int cmd_dpipe_headers_show(struct dl *dl) { struct nlmsghdr *nlh; - struct dpipe_ctx *ctx; + struct dpipe_ctx ctx = {}; uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; int err; @@ -3181,20 +3171,19 @@ static int cmd_dpipe_headers_show(struct dl *dl) if (err) return err; - ctx = dpipe_ctx_alloc(dl); - if (!ctx) - return -ENOMEM; + err = dpipe_ctx_init(&ctx, dl); + if (err) + return err; - ctx->print_headers = true; + ctx.print_headers = true; pr_out_section_start(dl, "header"); - err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_header_cb, ctx); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_header_cb, &ctx); if (err) - pr_err("error get headers %s\n", strerror(ctx->err)); + pr_err("error get headers %s\n", strerror(ctx.err)); pr_out_section_end(dl); - dpipe_ctx_clear(ctx); - dpipe_ctx_free(ctx); + dpipe_ctx_fini(&ctx); return err; } @@ -3532,13 +3521,13 @@ static int cmd_dpipe_table_show_cb(const struct nlmsghdr *nlh, void *data) static int cmd_dpipe_table_show(struct dl *dl) { struct nlmsghdr *nlh; - struct dpipe_ctx *ctx; + struct dpipe_ctx ctx = {}; uint16_t flags = NLM_F_REQUEST; int err; - ctx = dpipe_ctx_alloc(dl); - if (!ctx) - return -ENOMEM; + err = dpipe_ctx_init(&ctx, dl); + if (err) + return err; err = dl_argv_parse(dl, DL_OPT_HANDLE, DL_OPT_DPIPE_TABLE_NAME); if (err) @@ -3546,9 +3535,9 @@ static int cmd_dpipe_table_show(struct dl *dl) nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_DPIPE_HEADERS_GET, flags); dl_opts_put(nlh, dl); - err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_header_cb, ctx); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_header_cb, &ctx); if (err) { - pr_err("error get headers %s\n", strerror(ctx->err)); + pr_err("error get headers %s\n", strerror(ctx.err)); goto out; } @@ -3557,11 +3546,10 @@ static int cmd_dpipe_table_show(struct dl *dl) dl_opts_put(nlh, dl); pr_out_section_start(dl, "table"); - _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_table_show_cb, ctx); + _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_table_show_cb, &ctx); pr_out_section_end(dl); out: - dpipe_ctx_clear(ctx); - dpipe_ctx_free(ctx); + dpipe_ctx_fini(&ctx); return err; } @@ -3929,13 +3917,13 @@ static int cmd_dpipe_table_entry_dump_cb(const struct nlmsghdr *nlh, void *data) static int cmd_dpipe_table_dump(struct dl *dl) { struct nlmsghdr *nlh; - struct dpipe_ctx *ctx; + struct dpipe_ctx ctx = {}; uint16_t flags = NLM_F_REQUEST; int err; - ctx = dpipe_ctx_alloc(dl); - if (!ctx) - return -ENOMEM; + err = dpipe_ctx_init(&ctx, dl); + if (err) + return err; err = dl_argv_parse(dl, DL_OPT_HANDLE | DL_OPT_DPIPE_TABLE_NAME, 0); if (err) @@ -3943,9 +3931,9 @@ static int cmd_dpipe_table_dump(struct dl *dl) nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_DPIPE_HEADERS_GET, flags); dl_opts_put(nlh, dl); - err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_header_cb, ctx); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_header_cb, &ctx); if (err) { - pr_err("error get headers %s\n", strerror(ctx->err)); + pr_err("error get headers %s\n", strerror(ctx.err)); goto out; } @@ -3954,11 +3942,10 @@ static int cmd_dpipe_table_dump(struct dl *dl) dl_opts_put(nlh, dl); pr_out_section_start(dl, "table_entry"); - _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_table_entry_dump_cb, ctx); + _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_table_entry_dump_cb, &ctx); pr_out_section_end(dl); out: - dpipe_ctx_clear(ctx); - dpipe_ctx_free(ctx); + dpipe_ctx_fini(&ctx); return err; } From ead180274caf409c85af388ad0c945e1bdf05e5d Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Wed, 14 Feb 2018 10:55:21 +0200 Subject: [PATCH 12/25] devlink: Add support for resource/dpipe relation Dpipe - Each dpipe table can have one resource which is mapped to it. The resource is presented via its full path. Furthermore, the number of units consumed by single table entry is presented. Resource - Each resource presents the dpipe tables that use it. Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 211 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 180 insertions(+), 31 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 70521823..69c3c5d9 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -2739,6 +2739,17 @@ struct dpipe_header { unsigned int fields_count; }; +struct dpipe_table { + struct list_head list; + char *name; + unsigned int resource_id; + bool resource_valid; +}; + +struct dpipe_tables { + struct list_head table_list; +}; + struct resource { char *name; uint64_t size; @@ -2764,6 +2775,7 @@ struct resource_ctx { struct dl *dl; int err; struct resources *resources; + struct dpipe_tables *tables; bool print_resources; bool pending_change; }; @@ -2829,7 +2841,10 @@ struct dpipe_ctx { int err; struct list_head global_headers; struct list_head local_headers; + struct dpipe_tables *tables; + struct resources *resources; bool print_headers; + bool print_tables; }; static struct dpipe_header *dpipe_header_alloc(unsigned int fields_count) @@ -2882,8 +2897,42 @@ static void dpipe_header_del(struct dpipe_header *header) list_del(&header->list); } +static struct dpipe_table *dpipe_table_alloc(void) +{ + return calloc(1, sizeof(struct dpipe_table)); +} + +static void dpipe_table_free(struct dpipe_table *table) +{ + free(table); +} + +static struct dpipe_tables *dpipe_tables_alloc(void) +{ + struct dpipe_tables *tables; + + tables = calloc(1, sizeof(struct dpipe_tables)); + if (!tables) + return NULL; + INIT_LIST_HEAD(&tables->table_list); + return tables; +} + +static void dpipe_tables_free(struct dpipe_tables *tables) +{ + struct dpipe_table *table, *tmp; + + list_for_each_entry_safe(table, tmp, &tables->table_list, list) + dpipe_table_free(table); + free(tables); +} + static int dpipe_ctx_init(struct dpipe_ctx *ctx, struct dl *dl) { + ctx->tables = dpipe_tables_alloc(); + if (!ctx->tables) + return -ENOMEM; + ctx->dl = dl; INIT_LIST_HEAD(&ctx->global_headers); INIT_LIST_HEAD(&ctx->local_headers); @@ -2906,6 +2955,7 @@ static void dpipe_ctx_fini(struct dpipe_ctx *ctx) dpipe_header_clear(header); dpipe_header_free(header); } + dpipe_tables_free(ctx->tables); } static const char *dpipe_header_id2s(struct dpipe_ctx *ctx, @@ -3440,8 +3490,10 @@ resource_path_print(struct dl *dl, struct resources *resources, static int dpipe_table_show(struct dpipe_ctx *ctx, struct nlattr *nl) { struct nlattr *nla_table[DEVLINK_ATTR_MAX + 1] = {}; + struct dpipe_table *table; + uint32_t resource_units; bool counters_enabled; - const char *name; + bool resource_valid; uint32_t size; int err; @@ -3457,15 +3509,36 @@ static int dpipe_table_show(struct dpipe_ctx *ctx, struct nlattr *nl) return -EINVAL; } - name = mnl_attr_get_str(nla_table[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + table = dpipe_table_alloc(); + if (!table) + return -ENOMEM; + + table->name = strdup(mnl_attr_get_str(nla_table[DEVLINK_ATTR_DPIPE_TABLE_NAME])); size = mnl_attr_get_u32(nla_table[DEVLINK_ATTR_DPIPE_TABLE_SIZE]); counters_enabled = !!mnl_attr_get_u8(nla_table[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]); - pr_out_str(ctx->dl, "name", name); + resource_valid = !!nla_table[DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID]; + if (resource_valid) { + table->resource_id = mnl_attr_get_u64(nla_table[DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID]); + table->resource_valid = true; + } + + list_add_tail(&table->list, &ctx->tables->table_list); + if (!ctx->print_tables) + return 0; + + pr_out_str(ctx->dl, "name", table->name); pr_out_uint(ctx->dl, "size", size); pr_out_str(ctx->dl, "counters_enabled", counters_enabled ? "true" : "false"); + if (resource_valid) { + resource_units = mnl_attr_get_u32(nla_table[DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS]); + resource_path_print(ctx->dl, ctx->resources, + table->resource_id); + pr_out_uint(ctx->dl, "resource_units", resource_units); + } + pr_out_array_start(ctx->dl, "match"); if (dpipe_table_matches_show(ctx, nla_table[DEVLINK_ATTR_DPIPE_TABLE_MATCHES])) goto err_matches_show; @@ -3490,15 +3563,18 @@ static int dpipe_tables_show(struct dpipe_ctx *ctx, struct nlattr **tb) struct nlattr *nla_table; mnl_attr_for_each_nested(nla_table, nla_tables) { - pr_out_handle_start_arr(ctx->dl, tb); + if (ctx->print_tables) + pr_out_handle_start_arr(ctx->dl, tb); if (dpipe_table_show(ctx, nla_table)) goto err_table_show; - pr_out_handle_end(ctx->dl); + if (ctx->print_tables) + pr_out_handle_end(ctx->dl); } return 0; err_table_show: - pr_out_handle_end(ctx->dl); + if (ctx->print_tables) + pr_out_handle_end(ctx->dl); return -EINVAL; } @@ -3518,38 +3594,68 @@ static int cmd_dpipe_table_show_cb(const struct nlmsghdr *nlh, void *data) return MNL_CB_OK; } +static int cmd_resource_dump_cb(const struct nlmsghdr *nlh, void *data); + static int cmd_dpipe_table_show(struct dl *dl) { struct nlmsghdr *nlh; - struct dpipe_ctx ctx = {}; + struct dpipe_ctx dpipe_ctx = {}; + struct resource_ctx resource_ctx = {}; uint16_t flags = NLM_F_REQUEST; int err; - err = dpipe_ctx_init(&ctx, dl); + err = dl_argv_parse(dl, DL_OPT_HANDLE, DL_OPT_DPIPE_TABLE_NAME); if (err) return err; - err = dl_argv_parse(dl, DL_OPT_HANDLE, DL_OPT_DPIPE_TABLE_NAME); - if (err) - goto out; - nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_DPIPE_HEADERS_GET, flags); + + err = dpipe_ctx_init(&dpipe_ctx, dl); + if (err) + return err; + + dpipe_ctx.print_tables = true; + dl_opts_put(nlh, dl); - err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_header_cb, &ctx); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_header_cb, + &dpipe_ctx); if (err) { - pr_err("error get headers %s\n", strerror(ctx.err)); - goto out; + pr_err("error get headers %s\n", strerror(dpipe_ctx.err)); + goto err_headers_get; } + err = resource_ctx_init(&resource_ctx, dl); + if (err) + goto err_resource_ctx_init; + + resource_ctx.print_resources = false; + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RESOURCE_DUMP, flags); + dl_opts_put(nlh, dl); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_resource_dump_cb, + &resource_ctx); + if (err) { + pr_err("error get resources %s\n", strerror(resource_ctx.err)); + goto err_resource_dump; + } + + dpipe_ctx.resources = resource_ctx.resources; flags = NLM_F_REQUEST | NLM_F_ACK; nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_DPIPE_TABLE_GET, flags); dl_opts_put(nlh, dl); pr_out_section_start(dl, "table"); - _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_table_show_cb, &ctx); + _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_table_show_cb, &dpipe_ctx); pr_out_section_end(dl); -out: - dpipe_ctx_fini(&ctx); + + resource_ctx_fini(&resource_ctx); + dpipe_ctx_fini(&dpipe_ctx); + return 0; + +err_resource_dump: + resource_ctx_fini(&resource_ctx); +err_resource_ctx_init: +err_headers_get: + dpipe_ctx_fini(&dpipe_ctx); return err; } @@ -4100,7 +4206,9 @@ static void resource_show(struct resource *resource, struct resource_ctx *ctx) { struct resource *child_resource; + struct dpipe_table *table; struct dl *dl = ctx->dl; + bool array = false; pr_out_str(dl, "name", resource->name); if (dl->verbose) @@ -4118,6 +4226,27 @@ static void resource_show(struct resource *resource, pr_out_uint(dl, "size_gran", resource->size_gran); } + list_for_each_entry(table, &ctx->tables->table_list, list) + if (table->resource_id == resource->id && + table->resource_valid) + array = true; + + if (array) + pr_out_array_start(dl, "dpipe_tables"); + else + pr_out_str(dl, "dpipe_tables", "none"); + + list_for_each_entry(table, &ctx->tables->table_list, list) { + if (table->resource_id != resource->id || + !table->resource_valid) + continue; + pr_out_entry_start(dl); + pr_out_str(dl, "table_name", table->name); + pr_out_entry_end(dl); + } + if (array) + pr_out_array_end(dl); + if (list_empty(&resource->resource_list)) return; @@ -4178,25 +4307,45 @@ static int cmd_resource_dump_cb(const struct nlmsghdr *nlh, void *data) static int cmd_resource_show(struct dl *dl) { struct nlmsghdr *nlh; - struct resource_ctx ctx = {}; + struct dpipe_ctx dpipe_ctx = {}; + struct resource_ctx resource_ctx = {}; int err; + err = dl_argv_parse(dl, DL_OPT_HANDLE, 0); + if (err) + return err; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_DPIPE_TABLE_GET, + NLM_F_REQUEST); + dl_opts_put(nlh, dl); + + err = dpipe_ctx_init(&dpipe_ctx, dl); + if (err) + return err; + + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_dpipe_table_show_cb, + &dpipe_ctx); + if (err) { + pr_err("error get tables %s\n", strerror(dpipe_ctx.err)); + goto out; + } + + err = resource_ctx_init(&resource_ctx, dl); + if (err) + goto out; + + resource_ctx.print_resources = true; + resource_ctx.tables = dpipe_ctx.tables; nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_RESOURCE_DUMP, NLM_F_REQUEST | NLM_F_ACK); - - err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE, 0); - if (err) - return err; - - err = resource_ctx_init(&ctx, dl); - if (err) - return err; - - ctx.print_resources = true; + dl_opts_put(nlh, dl); pr_out_section_start(dl, "resources"); - err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_resource_dump_cb, &ctx); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_resource_dump_cb, + &resource_ctx); pr_out_section_end(dl); - resource_ctx_fini(&ctx); + resource_ctx_fini(&resource_ctx); +out: + dpipe_ctx_fini(&dpipe_ctx); return err; } From 58b48c5d75e2960dfcd947975911a170ae765975 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Wed, 14 Feb 2018 10:55:22 +0200 Subject: [PATCH 13/25] devlink: Update man pages and add resource man Add resource man, and update dev manual for reload command. Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- man/man8/devlink-dev.8 | 15 +++++++ man/man8/devlink-resource.8 | 78 +++++++++++++++++++++++++++++++++++++ man/man8/devlink.8 | 1 + 3 files changed, 94 insertions(+) create mode 100644 man/man8/devlink-resource.8 diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8 index b074d57a..7c749dda 100644 --- a/man/man8/devlink-dev.8 +++ b/man/man8/devlink-dev.8 @@ -42,6 +42,10 @@ devlink-dev \- devlink device configuration .BR "devlink dev eswitch show" .IR DEV +.ti -8 +.BR "devlink dev reload" +.IR DEV + .SH "DESCRIPTION" .SS devlink dev show - display devlink device attributes @@ -94,6 +98,12 @@ Set eswitch encapsulation support .I enable - Enable encapsulation support +.SS devlink dev reload - perform hot reload of the driver. + +.PP +.I "DEV" +- Specifies the devlink device to reload. + .SH "EXAMPLES" .PP devlink dev show @@ -114,6 +124,11 @@ Shows the eswitch mode of specified devlink device. devlink dev eswitch set pci/0000:01:00.0 mode switchdev .RS 4 Sets the eswitch mode of specified devlink device to switchdev. +.RE +.PP +devlink dev reload pci/0000:01:00.0 +.RS 4 +Performs hot reload of specified devlink device. .SH SEE ALSO .BR devlink (8), diff --git a/man/man8/devlink-resource.8 b/man/man8/devlink-resource.8 new file mode 100644 index 00000000..b8f78806 --- /dev/null +++ b/man/man8/devlink-resource.8 @@ -0,0 +1,78 @@ +.TH DEVLINK\-RESOURCE 8 "11 Feb 2018" "iproute2" "Linux" +.SH NAME +devlink-resource \- devlink device resource configuration +.SH SYNOPSIS +.sp +.ad l +.in +8 +.ti -8 +.B devlink +.RI "[ " OPTIONS " ]" +.B resource +.RI " { " COMMAND " | " +.BR help " }" +.sp + +.ti -8 +.IR OPTIONS " := { " +\fB\-v\fR[\fIerbose\fR] } + +.ti -8 +.B devlink resource show +.IR DEV + +.ti -8 +.B devlink resource help + +.ti -8 +.BR "devlink resource set" +.IR DEV +.BI path " RESOURCE_PATH" +.BI size " RESOURCE_SIZE" + +.SH "DESCRIPTION" +.SS devlink resource show - display devlink device's resosources + +.PP +.I "DEV" +- specifies the devlink device to show. + +.in +4 +Format is: +.in +2 +BUS_NAME/BUS_ADDRESS + +.SS devlink resource set - sets resource size of specific resource + +.PP +.I "DEV" +- specifies the devlink device. + +.TP +.BI path " RESOURCE_PATH" +Resource's path. + +.TP +.BI size " RESOURCE_SIZE" +The new resource's size. + +.SH "EXAMPLES" +.PP +devlink resource show pci/0000:01:00.0 +.RS 4 +Shows the resources of the specified devlink device. +.RE +.PP +devlink resource set pci/0000:01:00.0 /kvd/linear 98304 +.RS 4 +Sets the size of the specified resource for the specified devlink device. + +.SH SEE ALSO +.BR devlink (8), +.BR devlink-port (8), +.BR devlink-sb (8), +.BR devlink-monitor (8), +.br + +.SH AUTHOR +Arkadi Sharshevsky diff --git a/man/man8/devlink.8 b/man/man8/devlink.8 index a975ef34..b83909da 100644 --- a/man/man8/devlink.8 +++ b/man/man8/devlink.8 @@ -103,6 +103,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR devlink-port (8), .BR devlink-monitor (8), .BR devlink-sb (8), +.BR devlink-resource (8), .br .SH REPORTING BUGS From 4ac152d003311c43a337b60a5123ba9919d65121 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Feb 2018 14:47:18 +0200 Subject: [PATCH 14/25] rdma: Avoid memory leak for skipper resource The call to get_task_name() allocates memory which is not freed in case of skipping the object. Fixes: 8ecac46a60ff ("rdma: Add QP resource tracking information") Signed-off-by: Leon Romanovsky Signed-off-by: Stephen Hemminger --- rdma/res.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rdma/res.c b/rdma/res.c index 2a63e712..62f5c544 100644 --- a/rdma/res.c +++ b/rdma/res.c @@ -395,8 +395,10 @@ static int res_qp_parse_cb(const struct nlmsghdr *nlh, void *data) comm = get_task_name(pid); } - if (rd_check_is_filtered(rd, "pid", pid)) + if (rd_check_is_filtered(rd, "pid", pid)) { + free(comm); continue; + } if (nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]) /* discard const from mnl_attr_get_str */ From 728eb8d00b3b87adcaf25b19cf5fb2dbb1965b60 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 23 Feb 2018 14:10:09 -0500 Subject: [PATCH 15/25] ip: Properly display AF_BRIDGE address information for neighbor events The vxlan driver when a neighbor add/delete event occurs sends NDA_DST filled with a union: union vxlan_addr { struct sockaddr_in sin; struct sockaddr_in6 sin6; struct sockaddr sa; }; This eventually calls rt_addr_n2a_r which had no handler for the AF_BRIDGE family and "???" was being printed. Add code to properly display this data when requested. Signed-off-by: Donald Sharp Signed-off-by: Stephen Hemminger --- lib/utils.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/utils.c b/lib/utils.c index 8e15625e..379739d6 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -983,6 +983,25 @@ const char *rt_addr_n2a_r(int af, int len, } case AF_PACKET: return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen); + case AF_BRIDGE: + { + const union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } *sa = addr; + + switch (sa->sa.sa_family) { + case AF_INET: + return inet_ntop(AF_INET, &sa->sin.sin_addr, + buf, buflen); + case AF_INET6: + return inet_ntop(AF_INET6, &sa->sin6.sin6_addr, + buf, buflen); + } + + /* fallthrough */ + } default: return "???"; } From f85adc61ddd2d401af790781cc16b82dfe39b7de Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Wed, 28 Feb 2018 11:24:22 +0200 Subject: [PATCH 16/25] devlink: Fix error reporting The current code doesn't set errno in case of extended ack. Fixes: 049c58539f5d ("devlink: mnlg: Add support for extended ack") Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- devlink/mnlg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/devlink/mnlg.c b/devlink/mnlg.c index 37c56873..3d28453a 100644 --- a/devlink/mnlg.c +++ b/devlink/mnlg.c @@ -71,15 +71,15 @@ static int mnlg_cb_error(const struct nlmsghdr *nlh, void *data) { const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh); - if (nl_dump_ext_ack(nlh, NULL)) - return MNL_CB_STOP; - /* Netlink subsystems returns the errno value with different signess */ if (err->error < 0) errno = -err->error; else errno = err->error; + if (nl_dump_ext_ack(nlh, NULL)) + return MNL_CB_ERROR; + return err->error == 0 ? MNL_CB_STOP : MNL_CB_ERROR; } From a0405444f7614d9dd76cb9fb1b7a696cab5059ba Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 28 Feb 2018 14:16:42 -0800 Subject: [PATCH 17/25] bpf: Print section name when hitting non ld64 issue It's useful to be able to tell which section is being processed in the ELF when this error is triggered, so print that detail. Signed-off-by: Joe Stringer Acked-by: Daniel Borkmann Signed-off-by: Stephen Hemminger --- lib/bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/bpf.c b/lib/bpf.c index 2db151e4..c38d92d8 100644 --- a/lib/bpf.c +++ b/lib/bpf.c @@ -2039,6 +2039,7 @@ static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx, insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) { fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n", ioff); + fprintf(stderr, " - Current section: %s\n", data_relo->sec_name); if (ioff < num_insns && insns[ioff].code == (BPF_JMP | BPF_CALL)) fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n"); From 06867c371958773e39b4ccac07cfe3e2fff2ea55 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 1 Mar 2018 10:35:12 +0100 Subject: [PATCH 18/25] ip-link: Fix use after free in nl_get_ll_addr_len() Immediately after freeing the buffer returned from rtnl_talk(), it is accessed again via pointer in struct rtattr array. This leads to some builds not allowing to set an interface's MAC address because the expected length value is garbage. Fixes: 86bf43c7c2fdc ("lib/libnetlink: update rtnl_talk to support malloc buff at run time") Signed-off-by: Phil Sutter --- ip/iplink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ip/iplink.c b/ip/iplink.c index 230f4c53..d401311b 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -276,8 +276,9 @@ static int nl_get_ll_addr_len(unsigned int dev_index) return -1; } + len = RTA_PAYLOAD(tb[IFLA_ADDRESS]); free(answer); - return RTA_PAYLOAD(tb[IFLA_ADDRESS]); + return len; } static void iplink_parse_vf_vlan_info(int vf, int *argcp, char ***argvp, From 3dec72672f7b293ad773568c2c4a0d11fa8c4529 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 1 Mar 2018 14:43:08 -0800 Subject: [PATCH 19/25] libnetlink: __rtnl_talk_iov should only loop max iovlen times William reported ip hanging and bisected to a recent commit for batching allowing more than 1 command to be sent per message. The loop over recvmsg should never cycle more than iovlen times -- 1 response for each command in the message. Fixes: 72a2ff3916e5 ("lib/libnetlink: Add a new function rtnl_talk_iov") Signed-off-by: David Ahern --- lib/libnetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 8bb1c8d7..928de1dd 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -670,8 +670,9 @@ next: free(buf); if (h->nlmsg_seq == seq) return 0; - else + else if (i < iovlen) goto next; + return 0; } if (rtnl->proto != NETLINK_SOCK_DIAG && From eb8559eff124221bfbafe934c4dbfe30f20604c0 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Sat, 3 Mar 2018 16:59:44 +0000 Subject: [PATCH 20/25] ss: fix NULL dereference when rendering without header When ss is invoked with the no-header flag, if the query doesn't return any result, render() is called with 'buffer' uninitialized. This currently leads to a segfault. Ensure that buffer is initialized before rendering. The bug can be triggered with: ss -H sport = 100000 Signed-off-by: Jean-Philippe Brucker Acked-by: Stefano Brivio Signed-off-by: Stephen Hemminger --- misc/ss.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/misc/ss.c b/misc/ss.c index e047f9c0..e087bef7 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -1197,10 +1197,15 @@ newline: /* Render buffered output with spacing and delimiters, then free up buffers */ static void render(int screen_width) { - struct buf_token *token = (struct buf_token *)buffer.head->data; + struct buf_token *token; int printed, line_started = 0; struct column *f; + if (!buffer.head) + return; + + token = (struct buf_token *)buffer.head->data; + /* Ensure end alignment of last token, it wasn't necessarily flushed */ buffer.tail->end += buffer.cur->len % 2; From 75ef7b18d2a13657056706895bf8d8dd3ac93e46 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 2 Mar 2018 19:36:16 +0100 Subject: [PATCH 21/25] tc: fix parsing of the control action If the user didn't specify any control action, don't pop the command line arguments: otherwise, parsing of the next argument (tipically the 'index' keyword) results in an error, causing the following 'tc-testing' failures: Test a6d6: Add skbedit action with index Test 38f3: Delete skbedit action Test a568: Add action with ife type Test b983: Add action without ife type Test 7d50: Add skbmod action to set destination mac Test 9b29: Add skbmod action to set source mac Test e93a: Delete an skbmod action Also, add missing parse for 'ok' control action to m_police, to fix the following 'tc-testing' failure: Test 8dd5: Add police action with control ok tested with: # ./tdc.py test results: all tests ok using kernel 4.16-rc2, except 9aa8 "Get a single skbmod action from a list" (which is failing also before this commit) Fixes: 3572e01a090a ("tc: util: Don't call NEXT_ARG_FWD() in __parse_action_control()") Cc: Michal Privoznik Cc: Wolfgang Bumiller Signed-off-by: Davide Caratti Signed-off-by: Stephen Hemminger --- tc/m_bpf.c | 1 - tc/m_connmark.c | 1 - tc/m_csum.c | 1 - tc/m_gact.c | 9 +++------ tc/m_ife.c | 1 - tc/m_mirred.c | 5 ++--- tc/m_nat.c | 1 - tc/m_pedit.c | 1 - tc/m_police.c | 16 ++++++++++------ tc/m_sample.c | 1 - tc/m_skbedit.c | 1 - tc/m_skbmod.c | 1 - tc/m_tunnel_key.c | 1 - tc/m_vlan.c | 1 - tc/tc_util.c | 6 +++++- 15 files changed, 20 insertions(+), 27 deletions(-) diff --git a/tc/m_bpf.c b/tc/m_bpf.c index 576f69cc..1c1f71cd 100644 --- a/tc/m_bpf.c +++ b/tc/m_bpf.c @@ -129,7 +129,6 @@ opt_bpf: parse_action_control_dflt(&argc, &argv, &parm.action, false, TC_ACT_PIPE); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { diff --git a/tc/m_connmark.c b/tc/m_connmark.c index 47c7a8c2..37d71854 100644 --- a/tc/m_connmark.c +++ b/tc/m_connmark.c @@ -82,7 +82,6 @@ parse_connmark(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, } parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_PIPE); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { diff --git a/tc/m_csum.c b/tc/m_csum.c index e1352c08..7b156734 100644 --- a/tc/m_csum.c +++ b/tc/m_csum.c @@ -124,7 +124,6 @@ parse_csum(struct action_util *a, int *argc_p, } parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_OK); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { diff --git a/tc/m_gact.c b/tc/m_gact.c index b30b0420..16c4413f 100644 --- a/tc/m_gact.c +++ b/tc/m_gact.c @@ -87,12 +87,10 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, if (argc < 0) return -1; - if (matches(*argv, "gact") != 0 && - parse_action_control(&argc, &argv, &p.action, false) == -1) { + if (!matches(*argv, "gact")) + NEXT_ARG_FWD(); + if (parse_action_control(&argc, &argv, &p.action, false)) usage(); /* does not return */ - } - - NEXT_ARG_FWD(); #ifdef CONFIG_GACT_PROB if (argc > 0) { @@ -113,7 +111,6 @@ parse_gact(struct action_util *a, int *argc_p, char ***argv_p, if (parse_action_control(&argc, &argv, &pp.paction, false) == -1) usage(); - NEXT_ARG_FWD(); if (get_u16(&pp.pval, *argv, 10)) { fprintf(stderr, "Illegal probability val 0x%x\n", diff --git a/tc/m_ife.c b/tc/m_ife.c index 4647f6a6..205efc9f 100644 --- a/tc/m_ife.c +++ b/tc/m_ife.c @@ -159,7 +159,6 @@ static int parse_ife(struct action_util *a, int *argc_p, char ***argv_p, parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { NEXT_ARG(); diff --git a/tc/m_mirred.c b/tc/m_mirred.c index aa7ce6d9..14e5c88d 100644 --- a/tc/m_mirred.c +++ b/tc/m_mirred.c @@ -103,6 +103,7 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p, while (argc > 0) { if (matches(*argv, "action") == 0) { + NEXT_ARG(); break; } else if (!egress && matches(*argv, "egress") == 0) { egress = 1; @@ -202,10 +203,8 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p, } - if (p.eaction == TCA_EGRESS_MIRROR || p.eaction == TCA_INGRESS_MIRROR) { + if (p.eaction == TCA_EGRESS_MIRROR || p.eaction == TCA_INGRESS_MIRROR) parse_action_control(&argc, &argv, &p.action, false); - NEXT_ARG_FWD(); - } if (argc) { if (iok && matches(*argv, "index") == 0) { diff --git a/tc/m_nat.c b/tc/m_nat.c index f5de4d4c..1e4ff51f 100644 --- a/tc/m_nat.c +++ b/tc/m_nat.c @@ -116,7 +116,6 @@ parse_nat(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_OK); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { NEXT_ARG(); diff --git a/tc/m_pedit.c b/tc/m_pedit.c index dc57f14a..26549eee 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -672,7 +672,6 @@ int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, parse_action_control_dflt(&argc, &argv, &sel.sel.action, false, TC_ACT_OK); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { NEXT_ARG(); diff --git a/tc/m_police.c b/tc/m_police.c index ff1dcb7d..055b50ee 100644 --- a/tc/m_police.c +++ b/tc/m_police.c @@ -150,15 +150,18 @@ int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, matches(*argv, "shot") == 0 || matches(*argv, "continue") == 0 || matches(*argv, "pass") == 0 || + matches(*argv, "ok") == 0 || matches(*argv, "pipe") == 0 || matches(*argv, "goto") == 0) { - if (parse_action_control(&argc, &argv, &p.action, false)) - return -1; + if (!parse_action_control(&argc, &argv, &p.action, false)) + goto action_ctrl_ok; + return -1; } else if (strcmp(*argv, "conform-exceed") == 0) { NEXT_ARG(); - if (parse_action_control_slash(&argc, &argv, &p.action, - &presult, true)) - return -1; + if (!parse_action_control_slash(&argc, &argv, &p.action, + &presult, true)) + goto action_ctrl_ok; + return -1; } else if (matches(*argv, "overhead") == 0) { NEXT_ARG(); if (get_u16(&overhead, *argv, 10)) { @@ -174,8 +177,9 @@ int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, } else { break; } + NEXT_ARG_FWD(); +action_ctrl_ok: ok++; - argc--; argv++; } if (!ok) diff --git a/tc/m_sample.c b/tc/m_sample.c index 31774c0e..ff5ee6bd 100644 --- a/tc/m_sample.c +++ b/tc/m_sample.c @@ -100,7 +100,6 @@ static int parse_sample(struct action_util *a, int *argc_p, char ***argv_p, parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { NEXT_ARG(); diff --git a/tc/m_skbedit.c b/tc/m_skbedit.c index c41a7bb0..aa374fcb 100644 --- a/tc/m_skbedit.c +++ b/tc/m_skbedit.c @@ -123,7 +123,6 @@ parse_skbedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, parse_action_control_dflt(&argc, &argv, &sel.action, false, TC_ACT_PIPE); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { NEXT_ARG(); diff --git a/tc/m_skbmod.c b/tc/m_skbmod.c index bc268dfd..561b73fb 100644 --- a/tc/m_skbmod.c +++ b/tc/m_skbmod.c @@ -124,7 +124,6 @@ static int parse_skbmod(struct action_util *a, int *argc_p, char ***argv_p, parse_action_control_dflt(&argc, &argv, &p.action, false, TC_ACT_PIPE); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { NEXT_ARG(); diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c index 2dc91879..1cdd0356 100644 --- a/tc/m_tunnel_key.c +++ b/tc/m_tunnel_key.c @@ -175,7 +175,6 @@ static int parse_tunnel_key(struct action_util *a, int *argc_p, char ***argv_p, parse_action_control_dflt(&argc, &argv, &parm.action, false, TC_ACT_PIPE); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { NEXT_ARG(); diff --git a/tc/m_vlan.c b/tc/m_vlan.c index edae0d1e..161759fd 100644 --- a/tc/m_vlan.c +++ b/tc/m_vlan.c @@ -131,7 +131,6 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, parse_action_control_dflt(&argc, &argv, &parm.action, false, TC_ACT_PIPE); - NEXT_ARG_FWD(); if (argc) { if (matches(*argv, "index") == 0) { NEXT_ARG(); diff --git a/tc/tc_util.c b/tc/tc_util.c index aceb0d94..8eadbbcf 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -588,6 +588,7 @@ static int __parse_action_control(int *argc_p, char ***argv_p, int *result_p, } result |= jump_cnt; } + NEXT_ARG_FWD(); *argc_p = argc; *argv_p = argv; *result_p = result; @@ -684,8 +685,8 @@ out: int parse_action_control_slash(int *argc_p, char ***argv_p, int *result1_p, int *result2_p, bool allow_num) { + int result1, result2, argc = *argc_p; char **argv = *argv_p; - int result1, result2; char *p = strchr(*argv, '/'); if (!p) @@ -704,6 +705,9 @@ int parse_action_control_slash(int *argc_p, char ***argv_p, *result1_p = result1; *result2_p = result2; + NEXT_ARG_FWD(); + *argc_p = argc; + *argv_p = argv; return 0; } From 9426673910b26bb2fd9af61c83e15b9cb651b9bd Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Mon, 5 Mar 2018 11:36:16 -0500 Subject: [PATCH 22/25] tc: added tc monitor description in man page Signed-off-by: Roman Mashak Signed-off-by: Stephen Hemminger --- man/man8/tc.8 | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/man/man8/tc.8 b/man/man8/tc.8 index 5ffea373..cc94faa9 100644 --- a/man/man8/tc.8 +++ b/man/man8/tc.8 @@ -81,13 +81,21 @@ tc \- show / manipulate traffic control settings .B filter show block \fIBLOCK_INDEX\fR +.P +.B tc +.RI "[ " OPTIONS " ]" +.B monitor [ file +\fIFILENAME\fR +.B ] + .P .ti 8 .IR OPTIONS " := {" \fB[ -force ] -b\fR[\fIatch\fR] \fB[ filename ] \fR| \fB[ \fB-n\fR[\fIetns\fR] name \fB] \fR| \fB[ \fB-nm \fR| \fB-nam\fR[\fIes\fR] \fB] \fR| -\fB[ \fR{ \fB-cf \fR| \fB-c\fR[\fIonf\fR] \fR} \fB[ filename ] \fB] \fR} +\fB[ \fR{ \fB-cf \fR| \fB-c\fR[\fIonf\fR] \fR} \fB[ filename ] \fB] \fR +\fB[ -t\fR[imestamp\fR] \fB\] \fR| \fB[ -t\fR[short\fR] \fB]\fR } .ti 8 .IR FORMAT " := {" @@ -616,6 +624,17 @@ link Only available for qdiscs and performs a replace where the node must exist already. +.SH MONITOR +The\fB\ tc\fR\ utility can monitor events generated by the kernel such as +adding/deleting qdiscs, filters or actions, or modifying existing ones. + +The following command is available for\fB\ monitor\fR\ : +.TP +\fBfile\fR +If the file option is given, the \fBtc\fR does not listen to kernel events, but opens +the given file and dumps its contents. The file has to be in binary +format and contain netlink messages. + .SH OPTIONS .TP @@ -653,6 +672,16 @@ to specifies path to the config file. This option is used in conjunction with other options (e.g. .BR -nm ")." +.TP +.BR "\-t", " \-timestamp" +When\fB\ tc monitor\fR\ runs, print timestamp before the event message in format: + Timestamp:
usec + +.TP +.BR "\-ts", " \-tshort" +When\fB\ tc monitor\fR\ runs, prints short timestamp before the event message in format: + [--
T.] + .SH FORMAT The show command has additional formatting options: From d9d8c8393e02e60a0105e2bf540675ce158c2578 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 6 Mar 2018 14:39:19 -0800 Subject: [PATCH 23/25] json_writer: add SPDX Identifier (GPL-2/BSD-2) I wrote this code so put SPDX License on it and intentionally allow use in BSD code. Signed-off-by: Stephen Hemminger --- include/json_writer.h | 6 +----- lib/json_writer.c | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/include/json_writer.h b/include/json_writer.h index 1516aafb..45459fa2 100644 --- a/include/json_writer.h +++ b/include/json_writer.h @@ -1,14 +1,10 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ /* * Simple streaming JSON writer * * This takes care of the annoying bits of JSON syntax like the commas * after elements * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Stephen Hemminger */ diff --git a/lib/json_writer.c b/lib/json_writer.c index f3eeaf7b..cddd4dca 100644 --- a/lib/json_writer.c +++ b/lib/json_writer.c @@ -1,14 +1,10 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ /* * Simple streaming JSON writer * * This takes care of the annoying bits of JSON syntax like the commas * after elements * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Stephen Hemminger */ From b80c9af8a464496d646ee1b17b15f3a6755569a2 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 7 Mar 2018 09:35:39 -0500 Subject: [PATCH 24/25] tc: updated tc-bpf man page Added description of direct-action parameter. Signed-off-by: Roman Mashak Acked-by: Daniel Borkmann --- man/man8/tc-bpf.8 | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/man/man8/tc-bpf.8 b/man/man8/tc-bpf.8 index 2e9812ed..d311f295 100644 --- a/man/man8/tc-bpf.8 +++ b/man/man8/tc-bpf.8 @@ -14,6 +14,10 @@ CLS_NAME ] [ UDS_FILE ] [ .B verbose ] [ +.B direct-action +| +.B da +] [ .B skip_hw | .B skip_sw @@ -141,6 +145,11 @@ if set, it will dump the eBPF verifier output, even if loading the eBPF program was successful. By default, only on error, the verifier log is being emitted to the user. +.SS direct-action | da +instructs eBPF classifier to not invoke external TC actions, instead use the +TC actions return codes (\fBTC_ACT_OK\fR, \fBTC_ACT_SHOT\fR etc.) for +classifiers. + .SS skip_hw | skip_sw hardware offload control flags. By default TC will try to offload filters to hardware if possible. From 527f85141c9e6982f73f043f85949eaf7ff498bc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 8 Mar 2018 18:08:26 +0200 Subject: [PATCH 25/25] ip-address: Fix negative prints of large TX rate limits TX rate limit fields are unsigned (__u32). Use %u and print_uint when printing. Tested: $ ip link set ens1 vf 1 rate 2294967296 $ ip link show |grep -iE "vf 1" | grep rate before: vf 1 MAC 00:00:00:00:00:00, tx rate -2000000000 (Mbps), max_tx_rate -2000000000Mbps, ... after: vf 1 MAC 00:00:00:00:00:00, tx rate 2294967296 (Mbps), max_tx_rate 2294967296Mbps, ... Fixes: 3fd86630876a ("iproute2: rework SR-IOV VF support") Fixes: 8c29ae7cc249 ("ip link: Fix crash on older kernels when show VF dev") Fixes: f89a2a05ffa9 ("Add support to configure SR-IOV VF minimum and maximum Tx rate through ip tool") Fixes: ae7229d5f99e ("ip: Add support for setting and showing SR-IOV virtual funtion link params") Fixes: d0e720111aad ("ip: ipaddress.c: add support for json output") Signed-off-by: Tariq Toukan --- ip/ipaddress.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 051a05f0..955ef72e 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -417,10 +417,10 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo) } if (vf_tx_rate->rate) - print_int(PRINT_ANY, - "tx_rate", - ", tx rate %d (Mbps)", - vf_tx_rate->rate); + print_uint(PRINT_ANY, + "tx_rate", + ", tx rate %u (Mbps)", + vf_tx_rate->rate); if (vf[IFLA_VF_RATE]) { struct ifla_vf_rate *vf_rate = RTA_DATA(vf[IFLA_VF_RATE]); @@ -429,14 +429,14 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo) if (is_json_context()) { open_json_object("rate"); - print_int(PRINT_JSON, "max_tx", NULL, max_tx); - print_int(PRINT_ANY, "min_tx", NULL, min_tx); + print_uint(PRINT_JSON, "max_tx", NULL, max_tx); + print_uint(PRINT_ANY, "min_tx", NULL, min_tx); close_json_object(); } else { if (max_tx) - fprintf(fp, ", max_tx_rate %dMbps", max_tx); + fprintf(fp, ", max_tx_rate %uMbps", max_tx); if (min_tx) - fprintf(fp, ", min_tx_rate %dMbps", min_tx); + fprintf(fp, ", min_tx_rate %uMbps", min_tx); } }