From d5ddb441a52cc0da65de69a667569aa12f5c30cb Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 9 Jul 2019 14:25:14 -0700 Subject: [PATCH 01/36] tc: print all error messages to stderr Many tc modules were printing error messages to stdout. This is problematic if using JSON or other output formats. Change all these places to use fprintf(stderr, ...) instead. Also, remove unnecessary initialization and places where else is used after error return. Signed-off-by: Stephen Hemminger --- tc/m_bpf.c | 2 +- tc/m_connmark.c | 2 +- tc/m_csum.c | 2 +- tc/m_gact.c | 2 +- tc/m_ife.c | 4 +- tc/m_ipt.c | 94 +++++++++++++++++++++++------------------------ tc/m_mirred.c | 2 +- tc/m_nat.c | 2 +- tc/m_pedit.c | 2 +- tc/m_sample.c | 2 +- tc/m_simple.c | 4 +- tc/m_skbedit.c | 4 +- tc/m_skbmod.c | 4 +- tc/m_tunnel_key.c | 3 +- tc/m_vlan.c | 2 +- tc/m_xt.c | 15 ++++---- tc/m_xt_old.c | 92 +++++++++++++++++++++++----------------------- tc/tc_filter.c | 3 +- tc/tc_qdisc.c | 3 +- 19 files changed, 119 insertions(+), 125 deletions(-) diff --git a/tc/m_bpf.c b/tc/m_bpf.c index e247da8d..e8d704b5 100644 --- a/tc/m_bpf.c +++ b/tc/m_bpf.c @@ -167,7 +167,7 @@ static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_ACT_BPF_MAX, arg); if (!tb[TCA_ACT_BPF_PARMS]) { - fprintf(f, "[NULL bpf parameters]"); + fprintf(stderr, "Missing bpf parameters\n"); return -1; } diff --git a/tc/m_connmark.c b/tc/m_connmark.c index af5ebfc4..eac23489 100644 --- a/tc/m_connmark.c +++ b/tc/m_connmark.c @@ -115,7 +115,7 @@ static int print_connmark(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_CONNMARK_MAX, arg); if (tb[TCA_CONNMARK_PARMS] == NULL) { - print_string(PRINT_FP, NULL, "%s", "[NULL connmark parameters]"); + fprintf(stderr, "Missing connmark parameters\n"); return -1; } diff --git a/tc/m_csum.c b/tc/m_csum.c index 84396d6a..3e3dc251 100644 --- a/tc/m_csum.c +++ b/tc/m_csum.c @@ -172,7 +172,7 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_CSUM_MAX, arg); if (tb[TCA_CSUM_PARMS] == NULL) { - fprintf(f, "[NULL csum parameters]"); + fprintf(stderr, "Missing csum parameters\n"); return -1; } sel = RTA_DATA(tb[TCA_CSUM_PARMS]); diff --git a/tc/m_gact.c b/tc/m_gact.c index 32617d4c..dca2a2f9 100644 --- a/tc/m_gact.c +++ b/tc/m_gact.c @@ -177,7 +177,7 @@ print_gact(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_GACT_MAX, arg); if (tb[TCA_GACT_PARMS] == NULL) { - print_string(PRINT_FP, NULL, "%s", "[NULL gact parameters]"); + fprintf(stderr, "Missing gact parameters\n"); return -1; } p = RTA_DATA(tb[TCA_GACT_PARMS]); diff --git a/tc/m_ife.c b/tc/m_ife.c index 67c1df75..7c612c02 100644 --- a/tc/m_ife.c +++ b/tc/m_ife.c @@ -218,7 +218,7 @@ skip_encode: static int print_ife(struct action_util *au, FILE *f, struct rtattr *arg) { - struct tc_ife *p = NULL; + struct tc_ife *p; struct rtattr *tb[TCA_IFE_MAX + 1]; __u16 ife_type = 0; __u32 mmark = 0; @@ -233,7 +233,7 @@ static int print_ife(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_IFE_MAX, arg); if (tb[TCA_IFE_PARMS] == NULL) { - print_string(PRINT_FP, NULL, "%s", "[NULL ife parameters]"); + fprintf(stderr, "Missing ife parameters\n"); return -1; } p = RTA_DATA(tb[TCA_IFE_PARMS]); diff --git a/tc/m_ipt.c b/tc/m_ipt.c index 1d73cb98..cc95eab7 100644 --- a/tc/m_ipt.c +++ b/tc/m_ipt.c @@ -429,6 +429,8 @@ print_ipt(struct action_util *au, FILE * f, struct rtattr *arg) { struct rtattr *tb[TCA_IPT_MAX + 1]; struct ipt_entry_target *t = NULL; + struct xtables_target *m; + __u32 hook; if (arg == NULL) return -1; @@ -440,70 +442,68 @@ print_ipt(struct action_util *au, FILE * f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_IPT_MAX, arg); if (tb[TCA_IPT_TABLE] == NULL) { - fprintf(f, "[NULL ipt table name ] assuming mangle "); + fprintf(stderr, "Missing ipt table name, assuming mangle\n"); } else { fprintf(f, "tablename: %s ", rta_getattr_str(tb[TCA_IPT_TABLE])); } if (tb[TCA_IPT_HOOK] == NULL) { - fprintf(f, "[NULL ipt hook name ]\n "); + fprintf(stderr, "Missing ipt hook name\n "); return -1; - } else { - __u32 hook; - - hook = rta_getattr_u32(tb[TCA_IPT_HOOK]); - fprintf(f, " hook: %s\n", ipthooks[hook]); } + hook = rta_getattr_u32(tb[TCA_IPT_HOOK]); + fprintf(f, " hook: %s\n", ipthooks[hook]); + if (tb[TCA_IPT_TARG] == NULL) { - fprintf(f, "\t[NULL ipt target parameters ]\n"); + fprintf(stderr, "Missing ipt target parameters\n"); return -1; - } else { - struct xtables_target *m = NULL; + } - t = RTA_DATA(tb[TCA_IPT_TARG]); - m = get_target_name(t->u.user.name); - if (m != NULL) { - if (build_st(m, t) < 0) { - fprintf(stderr, " %s error\n", m->name); - return -1; - } - opts = - merge_options(opts, m->extra_opts, - &m->option_offset); - } else { - fprintf(stderr, " failed to find target %s\n\n", - t->u.user.name); + t = RTA_DATA(tb[TCA_IPT_TARG]); + m = get_target_name(t->u.user.name); + if (m != NULL) { + if (build_st(m, t) < 0) { + fprintf(stderr, " %s error\n", m->name); return -1; } - fprintf(f, "\ttarget "); - m->print(NULL, m->t, 0); - if (tb[TCA_IPT_INDEX] == NULL) { - fprintf(f, " [NULL ipt target index ]\n"); - } else { - __u32 index; - - index = rta_getattr_u32(tb[TCA_IPT_INDEX]); - fprintf(f, "\n\tindex %u", index); - } - - if (tb[TCA_IPT_CNT]) { - struct tc_cnt *c = RTA_DATA(tb[TCA_IPT_CNT]); - - fprintf(f, " ref %d bind %d", c->refcnt, c->bindcnt); - } - if (show_stats) { - if (tb[TCA_IPT_TM]) { - struct tcf_t *tm = RTA_DATA(tb[TCA_IPT_TM]); - - print_tm(f, tm); - } - } - fprintf(f, "\n"); + opts = + merge_options(opts, m->extra_opts, + &m->option_offset); + } else { + fprintf(stderr, " failed to find target %s\n\n", + t->u.user.name); + return -1; } + + fprintf(f, "\ttarget "); + m->print(NULL, m->t, 0); + if (tb[TCA_IPT_INDEX] == NULL) { + fprintf(stderr, "Missing ipt target index\n"); + } else { + __u32 index; + + index = rta_getattr_u32(tb[TCA_IPT_INDEX]); + fprintf(f, "\n\tindex %u", index); + } + + if (tb[TCA_IPT_CNT]) { + struct tc_cnt *c = RTA_DATA(tb[TCA_IPT_CNT]); + + fprintf(f, " ref %d bind %d", c->refcnt, c->bindcnt); + } + if (show_stats) { + if (tb[TCA_IPT_TM]) { + struct tcf_t *tm = RTA_DATA(tb[TCA_IPT_TM]); + + print_tm(f, tm); + } + } + fprintf(f, "\n"); + free_opts(opts); return 0; diff --git a/tc/m_mirred.c b/tc/m_mirred.c index 23ba638a..13209523 100644 --- a/tc/m_mirred.c +++ b/tc/m_mirred.c @@ -287,7 +287,7 @@ print_mirred(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_MIRRED_MAX, arg); if (tb[TCA_MIRRED_PARMS] == NULL) { - print_string(PRINT_FP, NULL, "%s", "[NULL mirred parameters]"); + fprintf(stderr, "Missing mirred parameters\n"); return -1; } p = RTA_DATA(tb[TCA_MIRRED_PARMS]); diff --git a/tc/m_nat.c b/tc/m_nat.c index ee0b7520..c4b02a83 100644 --- a/tc/m_nat.c +++ b/tc/m_nat.c @@ -152,7 +152,7 @@ print_nat(struct action_util *au, FILE * f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_NAT_MAX, arg); if (tb[TCA_NAT_PARMS] == NULL) { - print_string(PRINT_FP, NULL, "%s", "[NULL nat parameters]"); + fprintf(stderr, "Missing nat parameters\n"); return -1; } sel = RTA_DATA(tb[TCA_NAT_PARMS]); diff --git a/tc/m_pedit.c b/tc/m_pedit.c index 8eb15f4b..1cd2d162 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -742,7 +742,7 @@ static int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_PEDIT_MAX, arg); if (!tb[TCA_PEDIT_PARMS] && !tb[TCA_PEDIT_PARMS_EX]) { - fprintf(f, "[NULL pedit parameters]"); + fprintf(stderr, "Missing pedit parameters\n"); return -1; } diff --git a/tc/m_sample.c b/tc/m_sample.c index 3c840d3f..c068e632 100644 --- a/tc/m_sample.c +++ b/tc/m_sample.c @@ -150,7 +150,7 @@ static int print_sample(struct action_util *au, FILE *f, struct rtattr *arg) if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] || !tb[TCA_SAMPLE_PSAMPLE_GROUP]) { - print_string(PRINT_FP, NULL, "%s", "[NULL sample parameters]"); + fprintf(stderr, "Missing sample parameters\n"); return -1; } p = RTA_DATA(tb[TCA_SAMPLE_PARMS]); diff --git a/tc/m_simple.c b/tc/m_simple.c index 34d1bab4..49e25047 100644 --- a/tc/m_simple.c +++ b/tc/m_simple.c @@ -171,13 +171,13 @@ static int print_simple(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_DEF_MAX, arg); if (tb[TCA_DEF_PARMS] == NULL) { - fprintf(f, "[NULL simple parameters]"); + fprintf(stderr, "Missing simple parameters\n"); return -1; } sel = RTA_DATA(tb[TCA_DEF_PARMS]); if (tb[TCA_DEF_DATA] == NULL) { - fprintf(f, "[missing simple string]"); + fprintf(stderr, "Missing simple string\n"); return -1; } diff --git a/tc/m_skbedit.c b/tc/m_skbedit.c index 70e3a2e4..761cad58 100644 --- a/tc/m_skbedit.c +++ b/tc/m_skbedit.c @@ -196,7 +196,7 @@ static int print_skbedit(struct action_util *au, FILE *f, struct rtattr *arg) SPRINT_BUF(b1); __u32 priority; __u16 ptype; - struct tc_skbedit *p = NULL; + struct tc_skbedit *p; if (arg == NULL) return -1; @@ -204,7 +204,7 @@ static int print_skbedit(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_SKBEDIT_MAX, arg); if (tb[TCA_SKBEDIT_PARMS] == NULL) { - print_string(PRINT_FP, NULL, "%s", "[NULL skbedit parameters]"); + fprintf(stderr, "Missing skbedit parameters\n"); return -1; } p = RTA_DATA(tb[TCA_SKBEDIT_PARMS]); diff --git a/tc/m_skbmod.c b/tc/m_skbmod.c index 2dd1bb7e..d38a5c19 100644 --- a/tc/m_skbmod.c +++ b/tc/m_skbmod.c @@ -161,7 +161,7 @@ static int parse_skbmod(struct action_util *a, int *argc_p, char ***argv_p, static int print_skbmod(struct action_util *au, FILE *f, struct rtattr *arg) { - struct tc_skbmod *p = NULL; + struct tc_skbmod *p; struct rtattr *tb[TCA_SKBMOD_MAX + 1]; __u16 skbmod_etype = 0; int has_optional = 0; @@ -174,7 +174,7 @@ static int print_skbmod(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_SKBMOD_MAX, arg); if (tb[TCA_SKBMOD_PARMS] == NULL) { - fprintf(f, "[NULL skbmod parameters]"); + fprintf(stderr, "Missing skbmod parameters\n"); return -1; } diff --git a/tc/m_tunnel_key.c b/tc/m_tunnel_key.c index fd699017..4e65e444 100644 --- a/tc/m_tunnel_key.c +++ b/tc/m_tunnel_key.c @@ -493,8 +493,7 @@ static int print_tunnel_key(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_TUNNEL_KEY_MAX, arg); if (!tb[TCA_TUNNEL_KEY_PARMS]) { - print_string(PRINT_FP, NULL, "%s", - "[NULL tunnel_key parameters]"); + fprintf(stderr, "Missing tunnel_key parameters\n"); return -1; } parm = RTA_DATA(tb[TCA_TUNNEL_KEY_PARMS]); diff --git a/tc/m_vlan.c b/tc/m_vlan.c index 412f6aa1..9c8071e9 100644 --- a/tc/m_vlan.c +++ b/tc/m_vlan.c @@ -188,7 +188,7 @@ static int print_vlan(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_VLAN_MAX, arg); if (!tb[TCA_VLAN_PARMS]) { - print_string(PRINT_FP, NULL, "%s", "[NULL vlan parameters]"); + fprintf(stderr, "Missing vlanparameters\n"); return -1; } parm = RTA_DATA(tb[TCA_VLAN_PARMS]); diff --git a/tc/m_xt.c b/tc/m_xt.c index 29574bd4..bf0db2be 100644 --- a/tc/m_xt.c +++ b/tc/m_xt.c @@ -317,6 +317,7 @@ print_ipt(struct action_util *au, FILE *f, struct rtattr *arg) struct xtables_target *m; struct rtattr *tb[TCA_IPT_MAX + 1]; struct xt_entry_target *t = NULL; + __u32 hook; if (arg == NULL) return -1; @@ -330,27 +331,25 @@ print_ipt(struct action_util *au, FILE *f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_IPT_MAX, arg); if (tb[TCA_IPT_TABLE] == NULL) { - fprintf(f, "[NULL ipt table name ] assuming mangle "); + fprintf(stderr, "Missing ipt table name, assuming mangle\n"); } else { fprintf(f, "tablename: %s ", rta_getattr_str(tb[TCA_IPT_TABLE])); } if (tb[TCA_IPT_HOOK] == NULL) { - fprintf(f, "[NULL ipt hook name ]\n "); + fprintf(stderr, "Missing ipt hook name\n "); return -1; - } else { - __u32 hook; - - hook = rta_getattr_u32(tb[TCA_IPT_HOOK]); - fprintf(f, " hook: %s\n", ipthooks[hook]); } if (tb[TCA_IPT_TARG] == NULL) { - fprintf(f, "\t[NULL ipt target parameters ]\n"); + fprintf(stderr, "Missing ipt target parameters\n"); return -1; } + hook = rta_getattr_u32(tb[TCA_IPT_HOOK]); + fprintf(f, " hook: %s\n", ipthooks[hook]); + t = RTA_DATA(tb[TCA_IPT_TARG]); m = xtables_find_target(t->u.user.name, XTF_TRY_LOAD); if (!m) { diff --git a/tc/m_xt_old.c b/tc/m_xt_old.c index 25d36778..6a4509a9 100644 --- a/tc/m_xt_old.c +++ b/tc/m_xt_old.c @@ -354,6 +354,8 @@ print_ipt(struct action_util *au, FILE * f, struct rtattr *arg) { struct rtattr *tb[TCA_IPT_MAX + 1]; struct xt_entry_target *t = NULL; + struct xtables_target *m; + __u32 hook; if (arg == NULL) return -1; @@ -363,70 +365,66 @@ print_ipt(struct action_util *au, FILE * f, struct rtattr *arg) parse_rtattr_nested(tb, TCA_IPT_MAX, arg); if (tb[TCA_IPT_TABLE] == NULL) { - fprintf(f, "[NULL ipt table name ] assuming mangle "); + fprintf(stderr, "Missing ipt table name, assuming mangle\n"); } else { fprintf(f, "tablename: %s ", rta_getattr_str(tb[TCA_IPT_TABLE])); } if (tb[TCA_IPT_HOOK] == NULL) { - fprintf(f, "[NULL ipt hook name ]\n "); + fprintf(stderr, "Missing ipt hook name\n"); return -1; - } else { - __u32 hook; - - hook = rta_getattr_u32(tb[TCA_IPT_HOOK]); - fprintf(f, " hook: %s\n", ipthooks[hook]); } if (tb[TCA_IPT_TARG] == NULL) { - fprintf(f, "\t[NULL ipt target parameters ]\n"); + fprintf(stderr, "Missing ipt target parameters\n"); return -1; - } else { - struct xtables_target *m = NULL; + } - t = RTA_DATA(tb[TCA_IPT_TARG]); - m = find_target(t->u.user.name, TRY_LOAD); - if (m != NULL) { - if (build_st(m, t) < 0) { - fprintf(stderr, " %s error\n", m->name); - return -1; - } + hook = rta_getattr_u32(tb[TCA_IPT_HOOK]); + fprintf(f, " hook: %s\n", ipthooks[hook]); - opts = - merge_options(opts, m->extra_opts, - &m->option_offset); - } else { - fprintf(stderr, " failed to find target %s\n\n", - t->u.user.name); + t = RTA_DATA(tb[TCA_IPT_TARG]); + m = find_target(t->u.user.name, TRY_LOAD); + if (m != NULL) { + if (build_st(m, t) < 0) { + fprintf(stderr, " %s error\n", m->name); return -1; } - fprintf(f, "\ttarget "); - m->print(NULL, m->t, 0); - if (tb[TCA_IPT_INDEX] == NULL) { - fprintf(f, " [NULL ipt target index ]\n"); - } else { - __u32 index; - - index = rta_getattr_u32(tb[TCA_IPT_INDEX]); - fprintf(f, "\n\tindex %u", index); - } - - if (tb[TCA_IPT_CNT]) { - struct tc_cnt *c = RTA_DATA(tb[TCA_IPT_CNT]); - - fprintf(f, " ref %d bind %d", c->refcnt, c->bindcnt); - } - if (show_stats) { - if (tb[TCA_IPT_TM]) { - struct tcf_t *tm = RTA_DATA(tb[TCA_IPT_TM]); - - print_tm(f, tm); - } - } - fprintf(f, "\n"); + opts = + merge_options(opts, m->extra_opts, + &m->option_offset); + } else { + fprintf(stderr, " failed to find target %s\n\n", + t->u.user.name); + return -1; } + fprintf(f, "\ttarget "); + m->print(NULL, m->t, 0); + if (tb[TCA_IPT_INDEX] == NULL) { + fprintf(f, " [NULL ipt target index ]\n"); + } else { + __u32 index; + + index = rta_getattr_u32(tb[TCA_IPT_INDEX]); + fprintf(f, "\n\tindex %u", index); + } + + if (tb[TCA_IPT_CNT]) { + struct tc_cnt *c = RTA_DATA(tb[TCA_IPT_CNT]); + + fprintf(f, " ref %d bind %d", c->refcnt, c->bindcnt); + } + if (show_stats) { + if (tb[TCA_IPT_TM]) { + struct tcf_t *tm = RTA_DATA(tb[TCA_IPT_TM]); + + print_tm(f, tm); + } + } + fprintf(f, "\n"); + free_opts(opts); return 0; diff --git a/tc/tc_filter.c b/tc/tc_filter.c index e5c7bc46..cd78c244 100644 --- a/tc/tc_filter.c +++ b/tc/tc_filter.c @@ -375,8 +375,7 @@ int print_filter(struct nlmsghdr *n, void *arg) if (q) q->print_fopt(q, fp, tb[TCA_OPTIONS], t->tcm_handle); else - print_string(PRINT_FP, NULL, - "[cannot parse parameters]", NULL); + fprintf(stderr, "cannot parse option parameters\n"); close_json_object(); } } diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c index e573a1df..17e39983 100644 --- a/tc/tc_qdisc.c +++ b/tc/tc_qdisc.c @@ -313,8 +313,7 @@ int print_qdisc(struct nlmsghdr *n, void *arg) if (q) q->print_qopt(q, fp, tb[TCA_OPTIONS]); else - print_string(PRINT_FP, NULL, - "[cannot parse qdisc parameters]", NULL); + fprintf(stderr, "Cannot parse qdisc parameters\n"); } close_json_object(); From fda6f26e9b364ffff567a3ae10535c538f7db7a0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 11 Jul 2019 15:36:29 -0700 Subject: [PATCH 02/36] uapi: fix bpf.h link Signed-off-by: Stephen Hemminger --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0c3e3d9a..4c955172 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -806,7 +806,7 @@ union bpf_attr { * based on a user-provided identifier for all traffic coming from * the tasks belonging to the related cgroup. See also the related * kernel documentation, available from the Linux sources in file - * *Documentation/cgroup-v1/net_cls.txt*. + * *Documentation/cgroup-v1/net_cls.rst*. * * The Linux kernel has two versions for cgroups: there are * cgroups v1 and cgroups v2. Both are available to users, who can From 6bc13e4a20f50e9c37d5a504c78222913c433fd3 Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Sat, 13 Jul 2019 11:44:07 +0200 Subject: [PATCH 03/36] tc: util: constrain percentage in 0-100 interval parse_percent() currently allows to specify negative percentages or value above 100%. However this does not seems to make sense, as the function is used for probabilities or bandiwidth rates. Moreover, using negative values leads to erroneous results (using Bernoulli loss model as example): $ ip link add test type dummy $ ip link set test up $ tc qdisc add dev test root netem loss gemodel -10% limit 10 $ tc qdisc show dev test qdisc netem 800c: root refcnt 2 limit 10 loss gemodel p 90% r 10% 1-h 100% 1-k 0% Using values above 100% we have instead: $ ip link add test type dummy $ ip link set test up $ tc qdisc add dev test root netem loss gemodel 140% limit 10 $ tc qdisc show dev test qdisc netem 800f: root refcnt 2 limit 10 loss gemodel p 40% r 60% 1-h 100% 1-k 0% This commit changes parse_percent() with a check to ensure percentage values stay between 1.0 and 0.0. parse_percent_rate() function, which already employs a similar check, is adjusted accordingly. With this check in place, we have: $ ip link add test type dummy $ ip link set test up $ tc qdisc add dev test root netem loss gemodel -10% limit 10 Illegal "loss gemodel p" Fixes: 927e3cfb52b58 ("tc: B.W limits can now be specified in %.") Signed-off-by: Andrea Claudi Signed-off-by: Stephen Hemminger --- tc/tc_util.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tc/tc_util.c b/tc/tc_util.c index 53d15e08..b90d256c 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -198,7 +198,7 @@ int parse_percent(double *val, const char *str) char *p; *val = strtod(str, &p) / 100.; - if (*val == HUGE_VALF || *val == HUGE_VALL) + if (*val > 1.0 || *val < 0.0) return 1; if (*p && strcmp(p, "%")) return -1; @@ -226,16 +226,16 @@ static int parse_percent_rate(char *rate, size_t len, if (ret != 1) goto malf; - if (parse_percent(&perc, str_perc)) + ret = parse_percent(&perc, str_perc); + if (ret == 1) { + fprintf(stderr, "Invalid rate specified; should be between [0,100]%% but is %s\n", str); + goto err; + } else if (ret == -1) { goto malf; + } free(str_perc); - if (perc > 1.0 || perc < 0.0) { - fprintf(stderr, "Invalid rate specified; should be between [0,100]%% but is %s\n", str); - return -1; - } - rate_bit = perc * dev_mbit * 1000 * 1000; ret = snprintf(rate, len, "%lf", rate_bit); @@ -247,8 +247,9 @@ static int parse_percent_rate(char *rate, size_t len, return 0; malf: - free(str_perc); fprintf(stderr, "Specified rate value could not be read or is malformed\n"); +err: + free(str_perc); return -1; } From 1f420318bda3cc62156e89e1b56d60cc744b48ad Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 15 Jul 2019 20:04:30 +0200 Subject: [PATCH 04/36] utils: don't match empty strings as prefixes iproute has an utility function which checks if a string is a prefix for another one, to allow use of abbreviated commands, e.g. 'addr' or 'a' instead of 'address'. This routine unfortunately considers an empty string as prefix of any pattern, leading to undefined behaviour when an empty argument is passed to ip: # ip '' 1: lo: mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever # tc '' qdisc noqueue 0: dev lo root refcnt 2 # ip address add 192.0.2.0/24 '' 198.51.100.1 dev dummy0 # ip addr show dev dummy0 6: dummy0: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 02:9d:5e:e9:3f:c0 brd ff:ff:ff:ff:ff:ff inet 192.0.2.0/24 brd 198.51.100.1 scope global dummy0 valid_lft forever preferred_lft forever Rewrite matches() so it takes care of an empty input, and doesn't scan the input strings three times: the actual implementation does 2 strlen and a memcpy to accomplish the same task. Signed-off-by: Matteo Croce Signed-off-by: Stephen Hemminger --- include/utils.h | 2 +- lib/utils.c | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/utils.h b/include/utils.h index 1d9c1127..794d3605 100644 --- a/include/utils.h +++ b/include/utils.h @@ -198,7 +198,7 @@ int nodev(const char *dev); int check_ifname(const char *); int get_ifname(char *, const char *); const char *get_ifname_rta(int ifindex, const struct rtattr *rta); -int matches(const char *arg, const char *pattern); +bool matches(const char *prefix, const char *string); int inet_addr_match(const inet_prefix *a, const inet_prefix *b, int bits); int inet_addr_match_rta(const inet_prefix *m, const struct rtattr *rta); diff --git a/lib/utils.c b/lib/utils.c index 5da9a478..9ea21fa1 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -871,13 +871,18 @@ const char *get_ifname_rta(int ifindex, const struct rtattr *rta) return name; } -int matches(const char *cmd, const char *pattern) +/* Returns false if 'prefix' is a not empty prefix of 'string'. + */ +bool matches(const char *prefix, const char *string) { - int len = strlen(cmd); + if (!*prefix) + return true; + while (*string && *prefix == *string) { + prefix++; + string++; + } - if (len > strlen(pattern)) - return -1; - return memcmp(pattern, cmd, len); + return !!*prefix; } int inet_addr_match(const inet_prefix *a, const inet_prefix *b, int bits) From b4d97ef57fd4b7669971ed209065a72d115dffc2 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 10 Jul 2019 14:03:19 +0300 Subject: [PATCH 05/36] devlink: Change devlink health dump show command to dumpit Although devlink health dump show command is given per reporter, it returns large amounts of data. Trying to use the doit cb results in OUT-OF-BUFFER error. This complementary patch raises the DUMP flag in order to invoke the dumpit cb. We're safe as no existing drivers implement the dump health reporter option yet. Fixes: 041e6e651a8e ("devlink: Add devlink health dump show command") Signed-off-by: Aya Levin Signed-off-by: Tariq Toukan Acked-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index d8197ea3..637cb5fc 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -6105,13 +6105,13 @@ static int cmd_fmsg_object_cb(const struct nlmsghdr *nlh, void *data) return MNL_CB_OK; } -static int cmd_health_object_common(struct dl *dl, uint8_t cmd) +static int cmd_health_object_common(struct dl *dl, uint8_t cmd, uint16_t flags) { struct fmsg_cb_data data; struct nlmsghdr *nlh; int err; - nlh = mnlg_msg_prepare(dl->nlg, cmd, NLM_F_REQUEST | NLM_F_ACK); + nlh = mnlg_msg_prepare(dl->nlg, cmd, flags | NLM_F_REQUEST | NLM_F_ACK); err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE | DL_OPT_HEALTH_REPORTER_NAME, 0); @@ -6126,12 +6126,16 @@ static int cmd_health_object_common(struct dl *dl, uint8_t cmd) static int cmd_health_dump_show(struct dl *dl) { - return cmd_health_object_common(dl, DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET); + return cmd_health_object_common(dl, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + NLM_F_DUMP); } static int cmd_health_diagnose(struct dl *dl) { - return cmd_health_object_common(dl, DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE); + return cmd_health_object_common(dl, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + 0); } static int cmd_health_recover(struct dl *dl) From 1d05cca2fd70a5bc8a9f4e978aa5629dbc99a973 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 10 Jul 2019 14:03:20 +0300 Subject: [PATCH 06/36] devlink: Fix binary values print Fix function pr_out_binary_value() to start printing the binary buffer from offset 0 instead of offset 1. Remove redundant new line at the beginning of the output Example: With patch: mlx5e_txqsq: 05 00 00 00 05 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 8e 6e 3a 13 07 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c0 Without patch mlx5e_txqsq: 00 00 00 05 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 8e 6e 3a 13 07 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c0 Fixes: 844a61764c6f ("devlink: Add helper functions for name and value separately") Signed-off-by: Aya Levin Signed-off-by: Tariq Toukan Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 637cb5fc..6b28138d 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -1779,29 +1779,31 @@ static void pr_out_uint64_value(struct dl *dl, uint64_t value) pr_out(" %"PRIu64, value); } +static bool is_binary_eol(int i) +{ + return !(i%16); +} + static void pr_out_binary_value(struct dl *dl, uint8_t *data, uint32_t len) { - int i = 1; + int i = 0; if (dl->json_output) jsonw_start_array(dl->jw); - else - pr_out("\n"); while (i < len) { - if (dl->json_output) { + if (dl->json_output) jsonw_printf(dl->jw, "%d", data[i]); - } else { - pr_out(" %02x", data[i]); - if (!(i % 16)) - pr_out("\n"); - } + else + pr_out("%02x ", data[i]); i++; + if (!dl->json_output && is_binary_eol(i)) + __pr_out_newline(); } if (dl->json_output) jsonw_end_array(dl->jw); - else if ((i - 1) % 16) - pr_out("\n"); + else if (!is_binary_eol(i)) + __pr_out_newline(); } static void pr_out_str_value(struct dl *dl, const char *value) From f359942a25d368ccf2e47b79f95db2798e09f7a4 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 10 Jul 2019 14:03:21 +0300 Subject: [PATCH 07/36] devlink: Remove enclosing array brackets binary print with json format Keep pr_out_binary_value function only for printing. Inner relations like array grouping should be done outside the function. Fixes: 844a61764c6f ("devlink: Add helper functions for name and value separately") Signed-off-by: Aya Levin Signed-off-by: Tariq Toukan Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 6b28138d..bb023c0c 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -1788,9 +1788,6 @@ static void pr_out_binary_value(struct dl *dl, uint8_t *data, uint32_t len) { int i = 0; - if (dl->json_output) - jsonw_start_array(dl->jw); - while (i < len) { if (dl->json_output) jsonw_printf(dl->jw, "%d", data[i]); @@ -1800,9 +1797,7 @@ static void pr_out_binary_value(struct dl *dl, uint8_t *data, uint32_t len) if (!dl->json_output && is_binary_eol(i)) __pr_out_newline(); } - if (dl->json_output) - jsonw_end_array(dl->jw); - else if (!is_binary_eol(i)) + if (!dl->json_output && !is_binary_eol(i)) __pr_out_newline(); } From 03dafe13f4c42f308e62df06d859e05462ecca1c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 16 Jul 2019 11:56:58 -0700 Subject: [PATCH 08/36] uapi: update uapi/magic.h From upstream Signed-off-by: Stephen Hemminger --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f8c00045..665e1862 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -91,5 +91,6 @@ #define UDF_SUPER_MAGIC 0x15013346 #define BALLOON_KVM_MAGIC 0x13661366 #define ZSMALLOC_MAGIC 0x58295829 +#define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ #endif /* __LINUX_MAGIC_H__ */ From 78d3832335d514dd70402daedab06a383ff9fc42 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 16 Jul 2019 11:58:44 -0700 Subject: [PATCH 09/36] uapi: rdma netlink.h update From upstream 5.3-rc Signed-off-by: Stephen Hemminger --- rdma/include/uapi/rdma/rdma_netlink.h | 86 +++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 4 deletions(-) diff --git a/rdma/include/uapi/rdma/rdma_netlink.h b/rdma/include/uapi/rdma/rdma_netlink.h index 41cfa84c..ae5a77a1 100644 --- a/rdma/include/uapi/rdma/rdma_netlink.h +++ b/rdma/include/uapi/rdma/rdma_netlink.h @@ -147,6 +147,18 @@ enum { IWPM_NLA_HELLO_MAX }; +/* For RDMA_NLDEV_ATTR_DEV_NODE_TYPE */ +enum { + /* IB values map to NodeInfo:NodeType. */ + RDMA_NODE_IB_CA = 1, + RDMA_NODE_IB_SWITCH, + RDMA_NODE_IB_ROUTER, + RDMA_NODE_RNIC, + RDMA_NODE_USNIC, + RDMA_NODE_USNIC_UDP, + RDMA_NODE_UNSPECIFIED, +}; + /* * Local service operations: * RESOLVE - The client requests the local service to resolve a path. @@ -267,11 +279,15 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_RES_PD_GET, /* can dump */ - RDMA_NLDEV_NUM_OPS -}; + RDMA_NLDEV_CMD_GET_CHARDEV, -enum { - RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16, + RDMA_NLDEV_CMD_STAT_SET, + + RDMA_NLDEV_CMD_STAT_GET, /* can dump */ + + RDMA_NLDEV_CMD_STAT_DEL, + + RDMA_NLDEV_NUM_OPS }; enum rdma_nldev_print_type { @@ -478,10 +494,72 @@ enum rdma_nldev_attr { * File descriptor handle of the net namespace object */ RDMA_NLDEV_NET_NS_FD, /* u32 */ + /* + * Information about a chardev. + * CHARDEV_TYPE is the name of the chardev ABI (ie uverbs, umad, etc) + * CHARDEV_ABI signals the ABI revision (historical) + * CHARDEV_NAME is the kernel name for the /dev/ file (no directory) + * CHARDEV is the 64 bit dev_t for the inode + */ + RDMA_NLDEV_ATTR_CHARDEV_TYPE, /* string */ + RDMA_NLDEV_ATTR_CHARDEV_NAME, /* string */ + RDMA_NLDEV_ATTR_CHARDEV_ABI, /* u64 */ + RDMA_NLDEV_ATTR_CHARDEV, /* u64 */ + RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID, /* u64 */ + /* + * Counter-specific attributes. + */ + RDMA_NLDEV_ATTR_STAT_MODE, /* u32 */ + RDMA_NLDEV_ATTR_STAT_RES, /* u32 */ + RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, /* u32 */ + RDMA_NLDEV_ATTR_STAT_COUNTER, /* nested table */ + RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_STAT_COUNTER_ID, /* u32 */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTERS, /* nested table */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, /* string */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE, /* u64 */ + + /* + * CQ adaptive moderatio (DIM) + */ + RDMA_NLDEV_ATTR_DEV_DIM, /* u8 */ /* * Always the end */ RDMA_NLDEV_ATTR_MAX }; + +/* + * Supported counter bind modes. All modes are mutual-exclusive. + */ +enum rdma_nl_counter_mode { + RDMA_COUNTER_MODE_NONE, + + /* + * A qp is bound with a counter automatically during initialization + * based on the auto mode (e.g., qp type, ...) + */ + RDMA_COUNTER_MODE_AUTO, + + /* + * Which qp are bound with which counter is explicitly specified + * by the user + */ + RDMA_COUNTER_MODE_MANUAL, + + /* + * Always the end + */ + RDMA_COUNTER_MODE_MAX, +}; + +/* + * Supported criteria in counter auto mode. + * Currently only "qp type" is supported + */ +enum rdma_nl_counter_mask { + RDMA_COUNTER_MASK_QP_TYPE = 1, +}; #endif /* _RDMA_NETLINK_H */ From ad04dbc5b41df509cd6925eab36af73000632fd2 Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Tue, 9 Jul 2019 15:16:50 +0200 Subject: [PATCH 10/36] Revert "ip6tunnel: fix 'ip -6 {show|change} dev ' cmds" This reverts commit ba126dcad20e6d0e472586541d78bdd1ac4f1123. It breaks tunnel creation when using 'dev' parameter: $ ip link add type dummy $ ip -6 tunnel add ip6tnl1 mode ip6ip6 remote 2001:db8:ffff:100::2 local 2001:db8:ffff:100::1 hoplimit 1 tclass 0x0 dev dummy0 add tunnel "ip6tnl0" failed: File exists dev parameter must be used to specify the device to which the tunnel is binded, and not the tunnel itself. Reported-by: Jianwen Ji Reviewed-by: Matteo Croce Signed-off-by: Andrea Claudi Signed-off-by: Stephen Hemminger --- ip/ip6tunnel.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c index 2e0f099c..a1bf366b 100644 --- a/ip/ip6tunnel.c +++ b/ip/ip6tunnel.c @@ -299,8 +299,6 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) p->link = ll_name_to_index(medium); if (!p->link) return nodev(medium); - else - strlcpy(p->name, medium, sizeof(p->name)); } return 0; } From d035cc1b4e83e2589ea2115cdc2fa7c6d3693a5a Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Tue, 9 Jul 2019 15:16:51 +0200 Subject: [PATCH 11/36] ip tunnel: warn when changing IPv6 tunnel without tunnel name Tunnel change fails if a tunnel name is not specified while using 'ip -6 tunnel change'. However, no warning message is printed and no error code is returned. $ ip -6 tunnel add ip6tnl1 mode ip6gre local fd::1 remote fd::2 tos inherit ttl 127 encaplimit none dev dummy0 $ ip -6 tunnel change dev dummy0 local 2001:1234::1 remote 2001:1234::2 $ ip -6 tunnel show ip6tnl1 ip6tnl1: gre/ipv6 remote fd::2 local fd::1 dev dummy0 encaplimit none hoplimit 127 tclass inherit flowlabel 0x00000 (flowinfo 0x00000000) This commit checks if tunnel interface name is equal to an empty string: in this case, it prints a warning message to the user. It intentionally avoids to return an error to not break existing script setup. This is the output after this commit: $ ip -6 tunnel add ip6tnl1 mode ip6gre local fd::1 remote fd::2 tos inherit ttl 127 encaplimit none dev dummy0 $ ip -6 tunnel change dev dummy0 local 2001:1234::1 remote 2001:1234::2 Tunnel interface name not specified $ ip -6 tunnel show ip6tnl1 ip6tnl1: gre/ipv6 remote fd::2 local fd::1 dev dummy0 encaplimit none hoplimit 127 tclass inherit flowlabel 0x00000 (flowinfo 0x00000000) Reviewed-by: Matteo Croce Signed-off-by: Andrea Claudi Signed-off-by: Stephen Hemminger --- ip/ip6tunnel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c index a1bf366b..42535fcf 100644 --- a/ip/ip6tunnel.c +++ b/ip/ip6tunnel.c @@ -387,6 +387,9 @@ static int do_add(int cmd, int argc, char **argv) if (parse_args(argc, argv, cmd, &p) < 0) return -1; + if (!*p.name) + fprintf(stderr, "Tunnel interface name not specified\n"); + if (p.proto == IPPROTO_GRE) basedev = "ip6gre0"; else if (p.i_flags & VTI_ISVTI) From ed54f76484b5ee47b190a202ecf29fce60d0d878 Mon Sep 17 00:00:00 2001 From: Ivan Delalande Date: Wed, 17 Jul 2019 18:15:31 -0700 Subject: [PATCH 12/36] json: fix backslash escape typo in jsonw_puts Fixes: fcc16c22 ("provide common json output formatter") Signed-off-by: Ivan Delalande Signed-off-by: Stephen Hemminger --- lib/json_writer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/json_writer.c b/lib/json_writer.c index 5004c181..88c5eb88 100644 --- a/lib/json_writer.c +++ b/lib/json_writer.c @@ -75,7 +75,7 @@ static void jsonw_puts(json_writer_t *self, const char *str) fputs("\\b", self->out); break; case '\\': - fputs("\\n", self->out); + fputs("\\\\", self->out); break; case '"': fputs("\\\"", self->out); From 51a8f9f8fb9ec3189938a7e2aa6b2b58bee0ffa7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 19 Jul 2019 10:49:36 -0700 Subject: [PATCH 13/36] uapi: fix bpf comment typo From upstream. Signed-off-by: Stephen Hemminger --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4c955172..bf39f61b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -806,7 +806,7 @@ union bpf_attr { * based on a user-provided identifier for all traffic coming from * the tasks belonging to the related cgroup. See also the related * kernel documentation, available from the Linux sources in file - * *Documentation/cgroup-v1/net_cls.rst*. + * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. * * The Linux kernel has two versions for cgroups: there are * cgroups v1 and cgroups v2. Both are available to users, who can From 5937552b42e4bfdb5890a155fc6d349f0720a2e0 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 17 Jul 2019 17:31:50 +0300 Subject: [PATCH 14/36] rdma: Add "stat qp show" support This patch presents link, id, task name, lqpn, as well as all sub counters of a QP counter. A QP counter is a dynamically allocated statistic counter that is bound with one or more QPs. It has several sub-counters, each is used for a different purpose. Examples: $ rdma stat qp show link mlx5_2/1 cntn 5 pid 31609 comm client.1 rx_write_requests 0 rx_read_requests 0 rx_atomic_requests 0 out_of_buffer 0 out_of_sequence 0 duplicate_request 0 rnr_nak_retry_err 0 packet_seq_err 0 implied_nak_seq_err 0 local_ack_timeout_err 0 resp_local_length_error 0 resp_cqe_error 0 req_cqe_error 0 req_remote_invalid_request 0 req_remote_access_errors 0 resp_remote_access_errors 0 resp_cqe_flush_error 0 req_cqe_flush_error 0 LQPN: <178> $ rdma stat show link rocep1s0f5/1 link rocep1s0f5/1 rx_write_requests 0 rx_read_requests 0 rx_atomic_requests 0 out_of_buffer 0 duplicate_request 0 rnr_nak_retry_err 0 packet_seq_err 0 implied_nak_seq_err 0 local_ack_timeout_err 0 resp_local_length_error 0 resp_cqe_error 0 req_cqe_error 0 req_remote_invalid_request 0 req_remote_access_errors 0 resp_remote_access_errors 0 resp_cqe_flush_error 0 req_cqe_flush_error 0 rp_cnp_ignored 0 rp_cnp_handled 0 np_ecn_marked_roce_packets 0 np_cnp_sent 0 $ rdma stat show link rocep1s0f5/1 -p link rocep1s0f5/1 rx_write_requests 0 rx_read_requests 0 rx_atomic_requests 0 out_of_buffer 0 duplicate_request 0 rnr_nak_retry_err 0 packet_seq_err 0 implied_nak_seq_err 0 local_ack_timeout_err 0 resp_local_length_error 0 resp_cqe_error 0 req_cqe_error 0 req_remote_invalid_request 0 req_remote_access_errors 0 resp_remote_access_errors 0 resp_cqe_flush_error 0 req_cqe_flush_error 0 rp_cnp_ignored 0 rp_cnp_handled 0 np_ecn_marked_roce_packets 0 np_cnp_sent 0 Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Stephen Hemminger --- rdma/Makefile | 2 +- rdma/rdma.c | 3 +- rdma/rdma.h | 1 + rdma/stat.c | 268 ++++++++++++++++++++++++++++++++++++++++++++++++++ rdma/utils.c | 7 ++ 5 files changed, 279 insertions(+), 2 deletions(-) create mode 100644 rdma/stat.c diff --git a/rdma/Makefile b/rdma/Makefile index 4847f27e..e3f550bf 100644 --- a/rdma/Makefile +++ b/rdma/Makefile @@ -7,7 +7,7 @@ ifeq ($(HAVE_MNL),y) CFLAGS += -I./include/uapi/ RDMA_OBJ = rdma.o utils.o dev.o link.o res.o res-pd.o res-mr.o res-cq.o \ - res-cmid.o res-qp.o sys.o + res-cmid.o res-qp.o sys.o stat.o TARGETS += rdma endif diff --git a/rdma/rdma.c b/rdma/rdma.c index e9f1b4bb..4e34da92 100644 --- a/rdma/rdma.c +++ b/rdma/rdma.c @@ -11,7 +11,7 @@ static void help(char *name) { pr_out("Usage: %s [ OPTIONS ] OBJECT { COMMAND | help }\n" " %s [ -f[orce] ] -b[atch] filename\n" - "where OBJECT := { dev | link | resource | system | help }\n" + "where OBJECT := { dev | link | resource | system | statistic | help }\n" " OPTIONS := { -V[ersion] | -d[etails] | -j[son] | -p[retty]}\n", name, name); } @@ -30,6 +30,7 @@ static int rd_cmd(struct rd *rd, int argc, char **argv) { "link", cmd_link }, { "resource", cmd_res }, { "system", cmd_sys }, + { "statistic", cmd_stat }, { 0 } }; diff --git a/rdma/rdma.h b/rdma/rdma.h index 885a751e..23157743 100644 --- a/rdma/rdma.h +++ b/rdma/rdma.h @@ -94,6 +94,7 @@ int cmd_dev(struct rd *rd); int cmd_link(struct rd *rd); int cmd_res(struct rd *rd); int cmd_sys(struct rd *rd); +int cmd_stat(struct rd *rd); int rd_exec_cmd(struct rd *rd, const struct rd_cmd *c, const char *str); int rd_exec_dev(struct rd *rd, int (*cb)(struct rd *rd)); int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd)); diff --git a/rdma/stat.c b/rdma/stat.c new file mode 100644 index 00000000..da35ef7d --- /dev/null +++ b/rdma/stat.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * rdma.c RDMA tool + * Authors: Mark Zhang + */ + +#include "rdma.h" +#include "res.h" +#include + +static int stat_help(struct rd *rd) +{ + pr_out("Usage: %s [ OPTIONS ] statistic { COMMAND | help }\n", rd->filename); + pr_out(" %s statistic OBJECT show\n", rd->filename); + pr_out(" %s statistic OBJECT show link [ DEV/PORT_INDEX ] [ FILTER-NAME FILTER-VALUE ]\n", rd->filename); + pr_out("Examples:\n"); + pr_out(" %s statistic qp show\n", rd->filename); + pr_out(" %s statistic qp show link mlx5_2/1\n", rd->filename); + + return 0; +} + +static int res_get_hwcounters(struct rd *rd, struct nlattr *hwc_table, bool print) +{ + struct nlattr *nla_entry; + const char *nm; + uint64_t v; + int err; + + mnl_attr_for_each_nested(nla_entry, hwc_table) { + struct nlattr *hw_line[RDMA_NLDEV_ATTR_MAX] = {}; + + err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, hw_line); + if (err != MNL_CB_OK) + return -EINVAL; + + if (!hw_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] || + !hw_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE]) { + return -EINVAL; + } + + if (!print) + continue; + + nm = mnl_attr_get_str(hw_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME]); + v = mnl_attr_get_u64(hw_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE]); + if (rd->pretty_output && !rd->json_output) + newline_indent(rd); + res_print_uint(rd, nm, v, hw_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME]); + } + + return MNL_CB_OK; +} + +static int res_counter_line(struct rd *rd, const char *name, int index, + struct nlattr **nla_line) +{ + uint32_t cntn, port = 0, pid = 0, qpn; + struct nlattr *hwc_table, *qp_table; + struct nlattr *nla_entry; + const char *comm = NULL; + bool isfirst; + int err; + + if (nla_line[RDMA_NLDEV_ATTR_PORT_INDEX]) + port = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_PORT_INDEX]); + + hwc_table = nla_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]; + qp_table = nla_line[RDMA_NLDEV_ATTR_RES_QP]; + if (!hwc_table || !qp_table || + !nla_line[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) + return MNL_CB_ERROR; + + cntn = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); + if (rd_is_filtered_attr(rd, "cntn", cntn, + nla_line[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])) + return MNL_CB_OK; + + if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) { + pid = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_PID]); + comm = get_task_name(pid); + } + if (rd_is_filtered_attr(rd, "pid", pid, + nla_line[RDMA_NLDEV_ATTR_RES_PID])) + return MNL_CB_OK; + + if (nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]) + comm = (char *)mnl_attr_get_str( + nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]); + + mnl_attr_for_each_nested(nla_entry, qp_table) { + struct nlattr *qp_line[RDMA_NLDEV_ATTR_MAX] = {}; + + err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, qp_line); + if (err != MNL_CB_OK) + return -EINVAL; + + if (!qp_line[RDMA_NLDEV_ATTR_RES_LQPN]) + return -EINVAL; + + qpn = mnl_attr_get_u32(qp_line[RDMA_NLDEV_ATTR_RES_LQPN]); + if (rd_is_filtered_attr(rd, "lqpn", qpn, + qp_line[RDMA_NLDEV_ATTR_RES_LQPN])) + return MNL_CB_OK; + } + + err = res_get_hwcounters(rd, hwc_table, false); + if (err != MNL_CB_OK) + return err; + + if (rd->json_output) { + jsonw_string_field(rd->jw, "ifname", name); + if (port) + jsonw_uint_field(rd->jw, "port", port); + jsonw_uint_field(rd->jw, "cntn", cntn); + } else { + if (port) + pr_out("link %s/%u cntn %u ", name, port, cntn); + else + pr_out("dev %s cntn %u ", name, cntn); + } + + res_print_uint(rd, "pid", pid, nla_line[RDMA_NLDEV_ATTR_RES_PID]); + print_comm(rd, comm, nla_line); + + res_get_hwcounters(rd, hwc_table, true); + + isfirst = true; + mnl_attr_for_each_nested(nla_entry, qp_table) { + struct nlattr *qp_line[RDMA_NLDEV_ATTR_MAX] = {}; + + if (isfirst && !rd->json_output) + pr_out("\n LQPN: <"); + + err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, qp_line); + if (err != MNL_CB_OK) + return -EINVAL; + + if (!qp_line[RDMA_NLDEV_ATTR_RES_LQPN]) + return -EINVAL; + + qpn = mnl_attr_get_u32(qp_line[RDMA_NLDEV_ATTR_RES_LQPN]); + if (rd->json_output) { + jsonw_uint_field(rd->jw, "lqpn", qpn); + } else { + if (isfirst) + pr_out("%d", qpn); + else + pr_out(", %d", qpn); + } + isfirst = false; + } + + if (!rd->json_output) + pr_out(">\n"); + return MNL_CB_OK; +} + +static int stat_qp_show_parse_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; + struct nlattr *nla_table, *nla_entry; + struct rd *rd = data; + const char *name; + uint32_t idx; + int ret; + + mnl_attr_parse(nlh, 0, rd_attr_cb, tb); + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || + !tb[RDMA_NLDEV_ATTR_STAT_COUNTER]) + return MNL_CB_ERROR; + + name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); + idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + nla_table = tb[RDMA_NLDEV_ATTR_STAT_COUNTER]; + + mnl_attr_for_each_nested(nla_entry, nla_table) { + struct nlattr *nla_line[RDMA_NLDEV_ATTR_MAX] = {}; + + ret = mnl_attr_parse_nested(nla_entry, rd_attr_cb, nla_line); + if (ret != MNL_CB_OK) + break; + + ret = res_counter_line(rd, name, idx, nla_line); + if (ret != MNL_CB_OK) + break; + } + + return ret; +} + +static const struct filters stat_valid_filters[MAX_NUMBER_OF_FILTERS] = { + { .name = "cntn", .is_number = true }, + { .name = "lqpn", .is_number = true }, + { .name = "pid", .is_number = true }, +}; + +static int stat_qp_show_one_link(struct rd *rd) +{ + int flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP; + uint32_t seq; + int ret; + + if (!rd->port_idx) + return 0; + + ret = rd_build_filter(rd, stat_valid_filters); + if (ret) + return ret; + + rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_GET, &seq, flags); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP); + ret = rd_send_msg(rd); + if (ret) + return ret; + + if (rd->json_output) + jsonw_start_object(rd->jw); + ret = rd_recv_msg(rd, stat_qp_show_parse_cb, rd, seq); + if (rd->json_output) + jsonw_end_object(rd->jw); + + return ret; +} + +static int stat_qp_show_link(struct rd *rd) +{ + return rd_exec_link(rd, stat_qp_show_one_link, false); +} + +static int stat_qp_show(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_qp_show_link }, + { "link", stat_qp_show_link }, + { "help", stat_help }, + { 0 } + }; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + +static int stat_qp(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_qp_show }, + { "show", stat_qp_show }, + { "list", stat_qp_show }, + { "help", stat_help }, + { 0 } + }; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + +int cmd_stat(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_help }, + { "help", stat_help }, + { "qp", stat_qp }, + { 0 } + }; + + return rd_exec_cmd(rd, cmds, "statistic command"); +} diff --git a/rdma/utils.c b/rdma/utils.c index 558d1c29..7bc0439a 100644 --- a/rdma/utils.c +++ b/rdma/utils.c @@ -436,6 +436,13 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_DRIVER_S64] = MNL_TYPE_U64, [RDMA_NLDEV_ATTR_DRIVER_U64] = MNL_TYPE_U64, [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = MNL_TYPE_U8, + [RDMA_NLDEV_ATTR_STAT_COUNTER] = MNL_TYPE_NESTED, + [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = MNL_TYPE_NESTED, + [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = MNL_TYPE_U32, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = MNL_TYPE_NESTED, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = MNL_TYPE_NESTED, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = MNL_TYPE_NUL_STRING, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = MNL_TYPE_U64, }; int rd_attr_check(const struct nlattr *attr, int *typep) From 1b2ca7ada7806a8da03e65e3af40f82ad910f281 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 17 Jul 2019 17:31:51 +0300 Subject: [PATCH 15/36] rdma: Add get per-port counter mode support Add an interface to show which mode is active. Two modes are supported: - "auto": In this mode all QPs belong to one category are bind automatically to a single counter set. Currently only "qp type" is supported; - "manual": In this mode QPs are bound to a counter manually. Examples: $ rdma statistic qp mode 0/1: mlx5_0/1: qp auto off 1/1: mlx5_1/1: qp auto off 2/1: mlx5_2/1: qp auto type on 3/1: mlx5_3/1: qp auto off $ rdma statistic qp mode link mlx5_0 0/1: mlx5_0/1: qp auto off Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Stephen Hemminger --- rdma/stat.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++ rdma/utils.c | 2 + 2 files changed, 142 insertions(+) diff --git a/rdma/stat.c b/rdma/stat.c index da35ef7d..0c239851 100644 --- a/rdma/stat.c +++ b/rdma/stat.c @@ -13,13 +13,152 @@ static int stat_help(struct rd *rd) pr_out("Usage: %s [ OPTIONS ] statistic { COMMAND | help }\n", rd->filename); pr_out(" %s statistic OBJECT show\n", rd->filename); pr_out(" %s statistic OBJECT show link [ DEV/PORT_INDEX ] [ FILTER-NAME FILTER-VALUE ]\n", rd->filename); + pr_out(" %s statistic OBJECT mode\n", rd->filename); + pr_out("where OBJECT: = { qp }\n"); pr_out("Examples:\n"); pr_out(" %s statistic qp show\n", rd->filename); pr_out(" %s statistic qp show link mlx5_2/1\n", rd->filename); + pr_out(" %s statistic qp mode\n", rd->filename); + pr_out(" %s statistic qp mode link mlx5_0\n", rd->filename); return 0; } +struct counter_param { + char *name; + uint32_t attr; +}; + +static struct counter_param auto_params[] = { + { "type", RDMA_COUNTER_MASK_QP_TYPE, }, + { NULL }, +}; + +static int prepare_auto_mode_str(struct nlattr **tb, uint32_t mask, + char *output, int len) +{ + char s[] = "qp auto"; + int i, outlen = strlen(s); + + memset(output, 0, len); + snprintf(output, len, "%s", s); + + if (mask) { + for (i = 0; auto_params[i].name != NULL; i++) { + if (mask & auto_params[i].attr) { + outlen += strlen(auto_params[i].name) + 1; + if (outlen >= len) + return -EINVAL; + strcat(output, " "); + strcat(output, auto_params[i].name); + } + } + + if (outlen + strlen(" on") >= len) + return -EINVAL; + strcat(output, " on"); + } else { + if (outlen + strlen(" off") >= len) + return -EINVAL; + strcat(output, " off"); + } + + return 0; +} + +static int qp_link_get_mode_parse_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; + uint32_t mode = 0, mask = 0; + char output[128] = {}; + struct rd *rd = data; + uint32_t idx, port; + const char *name; + + mnl_attr_parse(nlh, 0, rd_attr_cb, tb); + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME]) + return MNL_CB_ERROR; + + if (!tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { + pr_err("This tool doesn't support switches yet\n"); + return MNL_CB_ERROR; + } + + idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + port = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); + if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) + mode = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); + + if (mode == RDMA_COUNTER_MODE_AUTO) { + if (!tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) + return MNL_CB_ERROR; + mask = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); + prepare_auto_mode_str(tb, mask, output, sizeof(output)); + } else { + snprintf(output, sizeof(output), "qp auto off"); + } + + if (rd->json_output) { + jsonw_uint_field(rd->jw, "ifindex", idx); + jsonw_uint_field(rd->jw, "port", port); + jsonw_string_field(rd->jw, "mode", output); + } else { + pr_out("%u/%u: %s/%u: %s\n", idx, port, name, port, output); + } + + return MNL_CB_OK; +} + +static int stat_one_qp_link_get_mode(struct rd *rd) +{ + uint32_t seq; + int ret; + + if (!rd->port_idx) + return 0; + + rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_GET, + &seq, (NLM_F_REQUEST | NLM_F_ACK)); + + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx); + /* Make RDMA_NLDEV_ATTR_STAT_MODE valid so that kernel knows + * return only mode instead of all counters + */ + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_MODE, + RDMA_COUNTER_MODE_MANUAL); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP); + ret = rd_send_msg(rd); + if (ret) + return ret; + + if (rd->json_output) + jsonw_start_object(rd->jw); + ret = rd_recv_msg(rd, qp_link_get_mode_parse_cb, rd, seq); + if (rd->json_output) + jsonw_end_object(rd->jw); + + return ret; +} + +static int stat_qp_link_get_mode(struct rd *rd) +{ + return rd_exec_link(rd, stat_one_qp_link_get_mode, false); +} + +static int stat_qp_get_mode(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_qp_link_get_mode }, + { "link", stat_qp_link_get_mode }, + { "help", stat_help }, + { 0 } + }; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + static int res_get_hwcounters(struct rd *rd, struct nlattr *hwc_table, bool print) { struct nlattr *nla_entry; @@ -248,6 +387,7 @@ static int stat_qp(struct rd *rd) { NULL, stat_qp_show }, { "show", stat_qp_show }, { "list", stat_qp_show }, + { "mode", stat_qp_get_mode }, { "help", stat_help }, { 0 } }; diff --git a/rdma/utils.c b/rdma/utils.c index 7bc0439a..9c885ad7 100644 --- a/rdma/utils.c +++ b/rdma/utils.c @@ -443,6 +443,8 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = MNL_TYPE_NESTED, [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = MNL_TYPE_NUL_STRING, [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = MNL_TYPE_U64, + [RDMA_NLDEV_ATTR_STAT_MODE] = MNL_TYPE_U32, + [RDMA_NLDEV_ATTR_STAT_RES] = MNL_TYPE_U32, }; int rd_attr_check(const struct nlattr *attr, int *typep) From 887fc739eb969f4948bffa8c0a9df908fadc2838 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 17 Jul 2019 17:31:52 +0300 Subject: [PATCH 16/36] rdma: Add rdma statistic counter per-port auto mode support With per-QP statistic counter support, a user is allowed to monitor specific QPs categories, which are bound to/unbound from counters dynamically allocated/deallocated. In per-port "auto" mode, QPs are bound to counters automatically according to common criteria. For example a per "type"(qp type) scheme, where in each process all QPs have same qp type are bind automatically to a single counter. Currently only "type" (qp type) is supported. Examples: $ rdma statistic qp set link mlx5_2/1 auto type on $ rdma statistic qp set link mlx5_2/1 auto off Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Stephen Hemminger --- rdma/stat.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++ rdma/utils.c | 1 + 2 files changed, 88 insertions(+) diff --git a/rdma/stat.c b/rdma/stat.c index 0c239851..ad1cc063 100644 --- a/rdma/stat.c +++ b/rdma/stat.c @@ -14,12 +14,17 @@ static int stat_help(struct rd *rd) pr_out(" %s statistic OBJECT show\n", rd->filename); pr_out(" %s statistic OBJECT show link [ DEV/PORT_INDEX ] [ FILTER-NAME FILTER-VALUE ]\n", rd->filename); pr_out(" %s statistic OBJECT mode\n", rd->filename); + pr_out(" %s statistic OBJECT set COUNTER_SCOPE [DEV/PORT_INDEX] auto {CRITERIA | off}\n", rd->filename); pr_out("where OBJECT: = { qp }\n"); + pr_out(" CRITERIA : = { type }\n"); + pr_out(" COUNTER_SCOPE: = { link | dev }\n"); pr_out("Examples:\n"); pr_out(" %s statistic qp show\n", rd->filename); pr_out(" %s statistic qp show link mlx5_2/1\n", rd->filename); pr_out(" %s statistic qp mode\n", rd->filename); pr_out(" %s statistic qp mode link mlx5_0\n", rd->filename); + pr_out(" %s statistic qp set link mlx5_2/1 auto type on\n", rd->filename); + pr_out(" %s statistic qp set link mlx5_2/1 auto off\n", rd->filename); return 0; } @@ -381,6 +386,87 @@ static int stat_qp_show(struct rd *rd) return rd_exec_cmd(rd, cmds, "parameter"); } +static int stat_qp_set_link_auto_sendmsg(struct rd *rd, uint32_t mask) +{ + uint32_t seq; + + rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_SET, + &seq, (NLM_F_REQUEST | NLM_F_ACK)); + + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_MODE, + RDMA_COUNTER_MODE_AUTO); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask); + + return rd_sendrecv_msg(rd, seq); +} + +static int stat_one_qp_set_link_auto_off(struct rd *rd) +{ + return stat_qp_set_link_auto_sendmsg(rd, 0); +} + +static int stat_one_qp_set_auto_type_on(struct rd *rd) +{ + return stat_qp_set_link_auto_sendmsg(rd, RDMA_COUNTER_MASK_QP_TYPE); +} + +static int stat_one_qp_set_link_auto_type(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_help }, + { "on", stat_one_qp_set_auto_type_on }, + { 0 } + }; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + +static int stat_one_qp_set_link_auto(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_one_qp_link_get_mode }, + { "off", stat_one_qp_set_link_auto_off }, + { "type", stat_one_qp_set_link_auto_type }, + { 0 } + }; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + +static int stat_one_qp_set_link(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_one_qp_link_get_mode }, + { "auto", stat_one_qp_set_link_auto }, + { 0 } + }; + + if (!rd->port_idx) + return 0; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + +static int stat_qp_set_link(struct rd *rd) +{ + return rd_exec_link(rd, stat_one_qp_set_link, false); +} + +static int stat_qp_set(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_help }, + { "link", stat_qp_set_link }, + { "help", stat_help }, + { 0 } + }; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + static int stat_qp(struct rd *rd) { const struct rd_cmd cmds[] = { @@ -388,6 +474,7 @@ static int stat_qp(struct rd *rd) { "show", stat_qp_show }, { "list", stat_qp_show }, { "mode", stat_qp_get_mode }, + { "set", stat_qp_set }, { "help", stat_help }, { 0 } }; diff --git a/rdma/utils.c b/rdma/utils.c index 9c885ad7..aed1a3d0 100644 --- a/rdma/utils.c +++ b/rdma/utils.c @@ -445,6 +445,7 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = MNL_TYPE_U64, [RDMA_NLDEV_ATTR_STAT_MODE] = MNL_TYPE_U32, [RDMA_NLDEV_ATTR_STAT_RES] = MNL_TYPE_U32, + [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = MNL_TYPE_U32, }; int rd_attr_check(const struct nlattr *attr, int *typep) From cbe10b4e44f7f8d15bec5e31d16daa993a6b5d7a Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 17 Jul 2019 17:31:53 +0300 Subject: [PATCH 17/36] rdma: Make get_port_from_argv() returns valid port in strict port mode When strict_port is set, make get_port_from_argv() returns failure if no valid port is specified. Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Stephen Hemminger --- rdma/utils.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rdma/utils.c b/rdma/utils.c index aed1a3d0..95b669f3 100644 --- a/rdma/utils.c +++ b/rdma/utils.c @@ -56,7 +56,7 @@ bool rd_no_arg(struct rd *rd) * mlx5_1/1 | 1 | false * mlx5_1/- | 0 | false * - * In strict mode, /- will return error. + * In strict port mode, a non-0 port must be provided */ static int get_port_from_argv(struct rd *rd, uint32_t *port, bool *is_dump_all, bool strict_port) @@ -64,7 +64,7 @@ static int get_port_from_argv(struct rd *rd, uint32_t *port, char *slash; *port = 0; - *is_dump_all = true; + *is_dump_all = strict_port ? false : true; slash = strchr(rd_argv(rd), '/'); /* if no port found, return 0 */ @@ -83,6 +83,9 @@ static int get_port_from_argv(struct rd *rd, uint32_t *port, if (!*port && strlen(slash)) return -EINVAL; } + if (strict_port && (*port == 0)) + return -EINVAL; + return 0; } From a6d0773ebeccc4afd292d470e2f365f1ee74dda4 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 17 Jul 2019 17:31:54 +0300 Subject: [PATCH 18/36] rdma: Add stat manual mode support In manual mode a QP can be manually bound to a counter. If the counter id(cntn) is not specified that kernel will allocate one. After a successful bind, the cntn can be seen through "rdma statistic qp show". And in unbind if lqpn is not specified then all QPs on this counter will be unbound. The manual and auto mode are mutual-exclusive. Examples: $ rdma statistic qp bind link mlx5_2/1 lqpn 178 $ rdma statistic qp bind link mlx5_2/1 lqpn 178 cntn 4 $ rdma statistic qp unbind link mlx5_2/1 cntn 4 $ rdma statistic qp unbind link mlx5_2/1 cntn 4 lqpn 178 Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Stephen Hemminger --- rdma/stat.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/rdma/stat.c b/rdma/stat.c index ad1cc063..942c1ac3 100644 --- a/rdma/stat.c +++ b/rdma/stat.c @@ -15,6 +15,8 @@ static int stat_help(struct rd *rd) pr_out(" %s statistic OBJECT show link [ DEV/PORT_INDEX ] [ FILTER-NAME FILTER-VALUE ]\n", rd->filename); pr_out(" %s statistic OBJECT mode\n", rd->filename); pr_out(" %s statistic OBJECT set COUNTER_SCOPE [DEV/PORT_INDEX] auto {CRITERIA | off}\n", rd->filename); + pr_out(" %s statistic OBJECT bind COUNTER_SCOPE [DEV/PORT_INDEX] [OBJECT-ID] [COUNTER-ID]\n", rd->filename); + pr_out(" %s statistic OBJECT unbind COUNTER_SCOPE [DEV/PORT_INDEX] [COUNTER-ID]\n", rd->filename); pr_out("where OBJECT: = { qp }\n"); pr_out(" CRITERIA : = { type }\n"); pr_out(" COUNTER_SCOPE: = { link | dev }\n"); @@ -25,6 +27,10 @@ static int stat_help(struct rd *rd) pr_out(" %s statistic qp mode link mlx5_0\n", rd->filename); pr_out(" %s statistic qp set link mlx5_2/1 auto type on\n", rd->filename); pr_out(" %s statistic qp set link mlx5_2/1 auto off\n", rd->filename); + pr_out(" %s statistic qp bind link mlx5_2/1 lqpn 178\n", rd->filename); + pr_out(" %s statistic qp bind link mlx5_2/1 lqpn 178 cntn 4\n", rd->filename); + pr_out(" %s statistic qp unbind link mlx5_2/1 cntn 4\n", rd->filename); + pr_out(" %s statistic qp unbind link mlx5_2/1 cntn 4 lqpn 178\n", rd->filename); return 0; } @@ -467,6 +473,190 @@ static int stat_qp_set(struct rd *rd) return rd_exec_cmd(rd, cmds, "parameter"); } +static int stat_get_arg(struct rd *rd, const char *arg) +{ + int value = 0; + char *endp; + + if (strcmpx(rd_argv(rd), arg) != 0) + return -EINVAL; + + rd_arg_inc(rd); + value = strtol(rd_argv(rd), &endp, 10); + rd_arg_inc(rd); + + return value; +} + +static int stat_one_qp_bind(struct rd *rd) +{ + int lqpn = 0, cntn = 0, ret; + uint32_t seq; + + if (rd_no_arg(rd)) { + stat_help(rd); + return -EINVAL; + } + + ret = rd_build_filter(rd, stat_valid_filters); + if (ret) + return ret; + + lqpn = stat_get_arg(rd, "lqpn"); + + rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_SET, + &seq, (NLM_F_REQUEST | NLM_F_ACK)); + + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_MODE, + RDMA_COUNTER_MODE_MANUAL); + + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_RES_LQPN, lqpn); + + if (rd_argc(rd)) { + cntn = stat_get_arg(rd, "cntn"); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, + cntn); + } + + return rd_sendrecv_msg(rd, seq); +} + +static int do_stat_qp_unbind_lqpn(struct rd *rd, uint32_t cntn, uint32_t lqpn) +{ + uint32_t seq; + + rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_DEL, + &seq, (NLM_F_REQUEST | NLM_F_ACK)); + + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_MODE, + RDMA_COUNTER_MODE_MANUAL); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_RES_LQPN, lqpn); + + return rd_sendrecv_msg(rd, seq); +} + +static int stat_get_counter_parse_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; + struct nlattr *nla_table, *nla_entry; + struct rd *rd = data; + uint32_t lqpn, cntn; + int err; + + mnl_attr_parse(nlh, 0, rd_attr_cb, tb); + + if (!tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) + return MNL_CB_ERROR; + cntn = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); + + nla_table = tb[RDMA_NLDEV_ATTR_RES_QP]; + if (!nla_table) + return MNL_CB_ERROR; + + mnl_attr_for_each_nested(nla_entry, nla_table) { + struct nlattr *nla_line[RDMA_NLDEV_ATTR_MAX] = {}; + + err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, nla_line); + if (err != MNL_CB_OK) + return -EINVAL; + + if (!nla_line[RDMA_NLDEV_ATTR_RES_LQPN]) + return -EINVAL; + + lqpn = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_LQPN]); + err = do_stat_qp_unbind_lqpn(rd, cntn, lqpn); + if (err) + return MNL_CB_ERROR; + } + + return MNL_CB_OK; +} + +static int stat_one_qp_unbind(struct rd *rd) +{ + int flags = NLM_F_REQUEST | NLM_F_ACK, ret; + char buf[MNL_SOCKET_BUFFER_SIZE]; + int lqpn = 0, cntn = 0; + unsigned int portid; + uint32_t seq; + + ret = rd_build_filter(rd, stat_valid_filters); + if (ret) + return ret; + + cntn = stat_get_arg(rd, "cntn"); + if (rd_argc(rd)) { + lqpn = stat_get_arg(rd, "lqpn"); + return do_stat_qp_unbind_lqpn(rd, cntn, lqpn); + } + + rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_GET, &seq, flags); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn); + ret = rd_send_msg(rd); + if (ret) + return ret; + + + /* Can't use rd_recv_msg() since the callback also calls it (recursively), + * then rd_recv_msg() always return -1 here + */ + portid = mnl_socket_get_portid(rd->nl); + ret = mnl_socket_recvfrom(rd->nl, buf, sizeof(buf)); + if (ret <= 0) + return ret; + + ret = mnl_cb_run(buf, ret, seq, portid, stat_get_counter_parse_cb, rd); + mnl_socket_close(rd->nl); + if (ret != MNL_CB_OK) + return ret; + + return 0; +} + +static int stat_qp_bind_link(struct rd *rd) +{ + return rd_exec_link(rd, stat_one_qp_bind, true); +} + +static int stat_qp_bind(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_help }, + { "link", stat_qp_bind_link }, + { "help", stat_help }, + { 0 }, + }; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + +static int stat_qp_unbind_link(struct rd *rd) +{ + return rd_exec_link(rd, stat_one_qp_unbind, true); +} + +static int stat_qp_unbind(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_help }, + { "link", stat_qp_unbind_link }, + { "help", stat_help }, + { 0 }, + }; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + static int stat_qp(struct rd *rd) { const struct rd_cmd cmds[] = { @@ -475,6 +665,8 @@ static int stat_qp(struct rd *rd) { "list", stat_qp_show }, { "mode", stat_qp_get_mode }, { "set", stat_qp_set }, + { "bind", stat_qp_bind }, + { "unbind", stat_qp_unbind }, { "help", stat_help }, { 0 } }; From a7137e517fef4fccbf56afd06e8fe155be3ec8c8 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 17 Jul 2019 17:31:55 +0300 Subject: [PATCH 19/36] rdma: Add default counter show support Show default counter statistics, which are same through the sysfs interface: /sys/class/infiniband//ports//hw_counters/ Example: $ rdma stat show link mlx5_2/1 link mlx5_2/1 rx_write_requests 8 rx_read_requests 4 rx_atomic_requests 0 out_of_buffer 0 out_of_sequence 0 duplicate_request 0 rnr_nak_retry_err 0 packet_seq_err 0 implied_nak_seq_err 0 local_ack_timeout_err 0 resp_local_length_error 0 resp_cqe_error 0 req_cqe_error 0 req_remote_invalid_request 0 req_remote_access_errors 0 resp_remote_access_errors 0 resp_cqe_flush_error 0 req_cqe_flush_error 0 rp_cnp_ignored 0 rp_cnp_handled 0 np_ecn_marked_roce_packets 0 np_cnp_sent 0 rx_icrc_encapsulated 0 Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Stephen Hemminger --- rdma/stat.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/rdma/stat.c b/rdma/stat.c index 942c1ac3..ef0bbcf1 100644 --- a/rdma/stat.c +++ b/rdma/stat.c @@ -17,6 +17,8 @@ static int stat_help(struct rd *rd) pr_out(" %s statistic OBJECT set COUNTER_SCOPE [DEV/PORT_INDEX] auto {CRITERIA | off}\n", rd->filename); pr_out(" %s statistic OBJECT bind COUNTER_SCOPE [DEV/PORT_INDEX] [OBJECT-ID] [COUNTER-ID]\n", rd->filename); pr_out(" %s statistic OBJECT unbind COUNTER_SCOPE [DEV/PORT_INDEX] [COUNTER-ID]\n", rd->filename); + pr_out(" %s statistic show\n", rd->filename); + pr_out(" %s statistic show link [ DEV/PORT_INDEX ]\n", rd->filename); pr_out("where OBJECT: = { qp }\n"); pr_out(" CRITERIA : = { type }\n"); pr_out(" COUNTER_SCOPE: = { link | dev }\n"); @@ -31,6 +33,8 @@ static int stat_help(struct rd *rd) pr_out(" %s statistic qp bind link mlx5_2/1 lqpn 178 cntn 4\n", rd->filename); pr_out(" %s statistic qp unbind link mlx5_2/1 cntn 4\n", rd->filename); pr_out(" %s statistic qp unbind link mlx5_2/1 cntn 4 lqpn 178\n", rd->filename); + pr_out(" %s statistic show\n", rd->filename); + pr_out(" %s statistic show link mlx5_2/1\n", rd->filename); return 0; } @@ -674,10 +678,78 @@ static int stat_qp(struct rd *rd) return rd_exec_cmd(rd, cmds, "parameter"); } +static int stat_show_parse_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; + struct rd *rd = data; + const char *name; + uint32_t port; + int ret; + + mnl_attr_parse(nlh, 0, rd_attr_cb, tb); + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || + !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) + return MNL_CB_ERROR; + + name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); + port = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (rd->json_output) { + jsonw_string_field(rd->jw, "ifname", name); + jsonw_uint_field(rd->jw, "port", port); + } else { + pr_out("link %s/%u ", name, port); + } + + ret = res_get_hwcounters(rd, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], true); + + if (!rd->json_output) + pr_out("\n"); + return ret; +} + +static int stat_show_one_link(struct rd *rd) +{ + int flags = NLM_F_REQUEST | NLM_F_ACK; + uint32_t seq; + int ret; + + if (!rd->port_idx) + return 0; + + rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_GET, &seq, flags); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx); + ret = rd_send_msg(rd); + if (ret) + return ret; + + return rd_recv_msg(rd, stat_show_parse_cb, rd, seq); +} + +static int stat_show_link(struct rd *rd) +{ + return rd_exec_link(rd, stat_show_one_link, false); +} + +static int stat_show(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, stat_show_link }, + { "link", stat_show_link }, + { "help", stat_help }, + { 0 } + }; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + int cmd_stat(struct rd *rd) { const struct rd_cmd cmds[] = { - { NULL, stat_help }, + { NULL, stat_show }, + { "show", stat_show }, + { "list", stat_show }, { "help", stat_help }, { "qp", stat_qp }, { 0 } From ca084842dac457538ea3068ce0bcab046e2024e7 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 17 Jul 2019 17:31:56 +0300 Subject: [PATCH 20/36] rdma: Document counter statistic Add document of accessing the QP counter, including bind/unbind a QP to a counter manually or automatically, and dump counter statistics. Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Stephen Hemminger --- man/man8/rdma-dev.8 | 1 + man/man8/rdma-link.8 | 1 + man/man8/rdma-resource.8 | 1 + man/man8/rdma-statistic.8 | 167 ++++++++++++++++++++++++++++++++++++++ man/man8/rdma.8 | 7 +- 5 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 man/man8/rdma-statistic.8 diff --git a/man/man8/rdma-dev.8 b/man/man8/rdma-dev.8 index 38e34b3b..e77e7cd0 100644 --- a/man/man8/rdma-dev.8 +++ b/man/man8/rdma-dev.8 @@ -77,6 +77,7 @@ previously created using iproute2 ip command. .BR rdma-link (8), .BR rdma-resource (8), .BR rdma-system (8), +.BR rdma-statistic (8), .br .SH AUTHOR diff --git a/man/man8/rdma-link.8 b/man/man8/rdma-link.8 index b3b40de7..32f80228 100644 --- a/man/man8/rdma-link.8 +++ b/man/man8/rdma-link.8 @@ -97,6 +97,7 @@ Removes RXE link rxe_eth0 .BR rdma (8), .BR rdma-dev (8), .BR rdma-resource (8), +.BR rdma-statistic (8), .br .SH AUTHOR diff --git a/man/man8/rdma-resource.8 b/man/man8/rdma-resource.8 index 40b073db..05030d0a 100644 --- a/man/man8/rdma-resource.8 +++ b/man/man8/rdma-resource.8 @@ -103,6 +103,7 @@ Show CQs belonging to pid 30489 .BR rdma (8), .BR rdma-dev (8), .BR rdma-link (8), +.BR rdma-statistic (8), .br .SH AUTHOR diff --git a/man/man8/rdma-statistic.8 b/man/man8/rdma-statistic.8 new file mode 100644 index 00000000..dea6ff24 --- /dev/null +++ b/man/man8/rdma-statistic.8 @@ -0,0 +1,167 @@ +.TH RDMA\-STATISTIC 8 "17 Mar 2019" "iproute2" "Linux" +.SH NAME +rdma-statistic \- RDMA statistic counter configuration +.SH SYNOPSIS +.sp +.ad l +.in +8 +.ti -8 +.B rdma +.RI "[ " OPTIONS " ]" +.B statistic +.RI " { " COMMAND " | " +.BR help " }" +.sp + +.ti -8 +.B rdma statistic +.RI "[ " OBJECT " ]" +.B show + +.ti -8 +.B rdma statistic +.RI "[ " OBJECT " ]" +.B show link +.RI "[ " DEV/PORT_INDX " ]" + +.ti -8 +.B rdma statistic +.IR OBJECT +.B mode + +.ti -8 +.B rdma statistic +.IR OBJECT +.B set +.IR COUNTER_SCOPE +.RI "[ " DEV/PORT_INDEX "]" +.B auto +.RI "{ " CRITERIA " | " +.BR off " }" + +.ti -8 +.B rdma statistic +.IR OBJECT +.B bind +.IR COUNTER_SCOPE +.RI "[ " DEV/PORT_INDEX "]" +.RI "[ " OBJECT-ID " ]" +.RI "[ " COUNTER-ID " ]" + +.ti -8 +.B rdma statistic +.IR OBJECT +.B unbind +.IR COUNTER_SCOPE +.RI "[ " DEV/PORT_INDEX "]" +.RI "[ " COUNTER-ID " ]" +.RI "[ " OBJECT-ID " ]" + +.ti -8 +.IR COUNTER_SCOPE " := " +.RB "{ " link " | " dev " }" + +.ti -8 +.IR OBJECT " := " +.RB "{ " qp " }" + +.ti -8 +.IR CRITERIA " := " +.RB "{ " type " }" + +.SH "DESCRIPTION" +.SS rdma statistic [object] show - Queries the specified RDMA device for RDMA and driver-specific statistics. Show the default hw counters if object is not specified + +.PP +.I "DEV" +- specifies counters on this RDMA device to show. + +.I "PORT_INDEX" +- specifies counters on this RDMA port to show. + +.SS rdma statistic set - configure counter statistic auto-mode for a specific device/port +In auto mode all objects belong to one category are bind automatically to a single counter set. + +.SS rdma statistic bind - manually bind an object (e.g., a qp) with a counter +When bound the statistics of this object are available in this counter. + +.SS rdma statistic unbind - manually unbind an object (e.g., a qp) from the counter previously bound +When unbound the statistics of this object are no longer available in this counter; And if object id is not specified then all objects on this counter will be unbound. + +.I "COUNTER-ID" +- specifies the id of the counter to be bound. +If this argument is omitted then a new counter will be allocated. + +.SH "EXAMPLES" +.PP +rdma statistic show +.RS 4 +Shows the state of the default counter of all RDMA devices on the system. +.RE +.PP +rdma statistic show link mlx5_2/1 +.RS 4 +Shows the state of the default counter of specified RDMA port +.RE +.PP +rdma statistic qp show +.RS 4 +Shows the state of all qp counters of all RDMA devices on the system. +.RE +.PP +rdma statistic qp show link mlx5_2/1 +.RS 4 +Shows the state of all qp counters of specified RDMA port. +.RE +.PP +rdma statistic qp show link mlx5_2 pid 30489 +.RS 4 +Shows the state of all qp counters of specified RDMA port and belonging to pid 30489 +.RE +.PP +rdma statistic qp mode +.RS 4 +List current counter mode on all devices +.RE +.PP +rdma statistic qp mode link mlx5_2/1 +.RS 4 +List current counter mode of device mlx5_2 port 1 +.RE +.PP +rdma statistic qp set link mlx5_2/1 auto type on +.RS 4 +On device mlx5_2 port 1, for each new QP bind it with a counter automatically. Per counter for QPs with same qp type in each process. Currently only "type" is supported. +.RE +.PP +rdma statistic qp set link mlx5_2/1 auto off +.RS 4 +Turn-off auto mode on device mlx5_2 port 1. The allocated counters can be manually accessed. +.RE +.PP +rdma statistic qp bind link mlx5_2/1 lqpn 178 +.RS 4 +On device mlx5_2 port 1, allocate a counter and bind the specified qp on it +.RE +.PP +rdma statistic qp unbind link mlx5_2/1 cntn 4 lqpn 178 +.RS 4 +On device mlx5_2 port 1, bind the specified qp on the specified counter +.RE +.PP +rdma statistic qp unbind link mlx5_2/1 cntn 4 +.RS 4 +On device mlx5_2 port 1, unbind all QPs on the specified counter. After that this counter will be released automatically by the kernel. + +.RE +.PP + +.SH SEE ALSO +.BR rdma (8), +.BR rdma-dev (8), +.BR rdma-link (8), +.BR rdma-resource (8), +.br + +.SH AUTHOR +Mark Zhang diff --git a/man/man8/rdma.8 b/man/man8/rdma.8 index 3ae33987..ef29b1c6 100644 --- a/man/man8/rdma.8 +++ b/man/man8/rdma.8 @@ -19,7 +19,7 @@ rdma \- RDMA tool .ti -8 .IR OBJECT " := { " -.BR dev " | " link " | " system " }" +.BR dev " | " link " | " system " | " statistic " }" .sp .ti -8 @@ -74,6 +74,10 @@ Generate JSON output. .B sys - RDMA subsystem related. +.TP +.B statistic +- RDMA counter statistic related. + .PP The names of all objects may be written in full or abbreviated form, for example @@ -112,6 +116,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR rdma-link (8), .BR rdma-resource (8), .BR rdma-system (8), +.BR rdma-statistic (8), .br .SH REPORTING BUGS From b89d6202c98dd875cc2aea2065718e2cfe453439 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 22 Jul 2019 09:45:09 -0700 Subject: [PATCH 21/36] uapi: update kernel headers from 5.3-rc1 Signed-off-by: Stephen Hemminger --- include/uapi/linux/bpf.h | 4 ++-- include/uapi/linux/magic.h | 1 + include/uapi/linux/pkt_sched.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bf39f61b..e75f97cf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3248,7 +3248,7 @@ struct bpf_sock_addr { __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 user_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. + __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ __u32 user_port; /* Allows 4-byte read and write. @@ -3260,7 +3260,7 @@ struct bpf_sock_addr { __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. + __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ __bpf_md_ptr(struct bpf_sock *, sk); diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 665e1862..1274c692 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -92,5 +92,6 @@ #define BALLOON_KVM_MAGIC 0x13661366 #define ZSMALLOC_MAGIC 0x58295829 #define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ +#define Z3FOLD_MAGIC 0x33 #endif /* __LINUX_MAGIC_H__ */ diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 1f623252..18f18529 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1174,7 +1174,7 @@ enum { TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */ TCA_TAPRIO_ATTR_FLAGS, /* u32 */ - TCA_TAPRIO_ATTR_TXTIME_DELAY, /* s32 */ + TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */ __TCA_TAPRIO_ATTR_MAX, }; From 33267017faf1a188d1286f5c423454a060517e39 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 26 Jul 2019 22:01:05 +0100 Subject: [PATCH 22/36] iproute2: devlink: port from sys/queue.h to list.h sys/queue.h does not exist on linux-musl targets and fails build as: devlink.c:28:10: fatal error: sys/queue.h: No such file or directory 28 | #include | ^~~~~~~~~~~~~ The change ports to list.h API and drops dependency of 'sys/queue.h'. The API maps one-to-one. Build-tested on linux-musl and linux-glibc. Bug: https://bugs.gentoo.org/690486 CC: Stephen Hemminger CC: netdev@vger.kernel.org Signed-off-by: Sergei Trofimovich Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index bb023c0c..0ea401ae 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -25,7 +25,6 @@ #include #include #include -#include #include "SNAPSHOT.h" #include "list.h" @@ -5981,13 +5980,13 @@ static int fmsg_value_show(struct dl *dl, int type, struct nlattr *nl_data) struct nest_qentry { int attr_type; - TAILQ_ENTRY(nest_qentry) nest_entries; + struct list_head nest_entries; }; struct fmsg_cb_data { struct dl *dl; uint8_t value_type; - TAILQ_HEAD(, nest_qentry) qhead; + struct list_head qhead; }; static int cmd_fmsg_nest_queue(struct fmsg_cb_data *fmsg_data, @@ -6001,13 +6000,13 @@ static int cmd_fmsg_nest_queue(struct fmsg_cb_data *fmsg_data, return -ENOMEM; entry->attr_type = *attr_value; - TAILQ_INSERT_HEAD(&fmsg_data->qhead, entry, nest_entries); + list_add(&fmsg_data->qhead, &entry->nest_entries); } else { - if (TAILQ_EMPTY(&fmsg_data->qhead)) + if (list_empty(&fmsg_data->qhead)) return MNL_CB_ERROR; - entry = TAILQ_FIRST(&fmsg_data->qhead); + entry = list_first_entry(&fmsg_data->qhead, struct nest_qentry, nest_entries); *attr_value = entry->attr_type; - TAILQ_REMOVE(&fmsg_data->qhead, entry, nest_entries); + list_del(&entry->nest_entries); free(entry); } return MNL_CB_OK; @@ -6116,7 +6115,7 @@ static int cmd_health_object_common(struct dl *dl, uint8_t cmd, uint16_t flags) return err; data.dl = dl; - TAILQ_INIT(&data.qhead); + INIT_LIST_HEAD(&data.qhead); err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_fmsg_object_cb, &data); return err; } From 36e584ad8af68a07e1b652fe15a450f299e1d3fe Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Fri, 26 Jul 2019 15:06:09 +0200 Subject: [PATCH 23/36] iplink_can: fix format output of clock with flag -details The command ip -details link show can0 prints in the last line the value of the clock frequency attached to the name of the following value "numtxqueues", e.g. clock 49500000numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 Add the missing space after the clock value. Signed-off-by: Antonio Borneo --- ip/iplink_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/iplink_can.c b/ip/iplink_can.c index 5bf490a9..735ab941 100644 --- a/ip/iplink_can.c +++ b/ip/iplink_can.c @@ -545,7 +545,7 @@ static void can_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) print_int(PRINT_ANY, "clock", - "\n clock %d", + "\n clock %d ", clock->freq); } From ab45d91d6af402d54f6d1f982986c48b93ca34f0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 26 Jul 2019 14:59:59 -0700 Subject: [PATCH 24/36] iplink: document 'change' option to ip link Add the command alias "change" to man page. Don't show it on usage, since it is not commonly used. Reported-off-by: Matteo Croce Signed-off-by: Stephen Hemminger Acked-by: Matteo Croce --- man/man8/ip-link.8.in | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 883d8807..a8ae72d2 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -1815,6 +1815,11 @@ can move the system to an unpredictable state. The solution is to avoid changing several parameters with one .B ip link set call. +The modifier +.B change +is equivalent to +.BR "set" . + .TP .BI dev " DEVICE " From c875433b145e33645798ecfe4d99bcb28c80d1e9 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Thu, 4 Jul 2019 14:24:27 +0200 Subject: [PATCH 25/36] utils: Fix get_s64() function get_s64() uses internally strtoll() to parse the value out of a given string. strtoll() returns a long long. However, the intermediate variable is long only which might be 32 bit on some systems. So, fix it. Signed-off-by: Kurt Kanzenbach Signed-off-by: Stephen Hemminger --- lib/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/utils.c b/lib/utils.c index 9ea21fa1..4c43f8fd 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -374,7 +374,7 @@ int get_u8(__u8 *val, const char *arg, int base) int get_s64(__s64 *val, const char *arg, int base) { - long res; + long long res; char *ptr; errno = 0; From 067925e2e1acd476420c746d8032582248bb75b1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 29 Jul 2019 08:45:32 -0700 Subject: [PATCH 26/36] json_print: drop extra semi-colons The _PRINT_FUNC() macro expands to a function call. Putting a semi-colon is unnecessary and causes warnings with -pedantic Signed-off-by: Stephen Hemminger --- include/json_print.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/include/json_print.h b/include/json_print.h index dbdc90e2..fe92d14c 100644 --- a/include/json_print.h +++ b/include/json_print.h @@ -57,20 +57,21 @@ void print_nl(void); { \ print_color_##type_name(t, COLOR_NONE, key, fmt, value); \ } -_PRINT_FUNC(int, int); -_PRINT_FUNC(s64, int64_t); -_PRINT_FUNC(bool, bool); -_PRINT_FUNC(null, const char*); -_PRINT_FUNC(string, const char*); -_PRINT_FUNC(uint, unsigned int); -_PRINT_FUNC(u64, uint64_t); -_PRINT_FUNC(hhu, unsigned char); -_PRINT_FUNC(hu, unsigned short); -_PRINT_FUNC(hex, unsigned int); -_PRINT_FUNC(0xhex, unsigned long long); -_PRINT_FUNC(luint, unsigned long); -_PRINT_FUNC(lluint, unsigned long long); -_PRINT_FUNC(float, double); + +_PRINT_FUNC(int, int) +_PRINT_FUNC(s64, int64_t) +_PRINT_FUNC(bool, bool) +_PRINT_FUNC(null, const char*) +_PRINT_FUNC(string, const char*) +_PRINT_FUNC(uint, unsigned int) +_PRINT_FUNC(u64, uint64_t) +_PRINT_FUNC(hhu, unsigned char) +_PRINT_FUNC(hu, unsigned short) +_PRINT_FUNC(hex, unsigned int) +_PRINT_FUNC(0xhex, unsigned long long) +_PRINT_FUNC(luint, unsigned long) +_PRINT_FUNC(lluint, unsigned long long) +_PRINT_FUNC(float, double) #undef _PRINT_FUNC #endif /* _JSON_PRINT_H_ */ From 8a56ef325c1b7c7a1f4cbb52cdc17291ba8d548f Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Mon, 29 Jul 2019 10:42:25 +0300 Subject: [PATCH 27/36] rdma: Control CQ adaptive moderation (DIM) In order to set adaptive-moderation for an ib device the command is: rdma dev set [DEV] adaptive-moderation [on|off] rdma dev show -d 0: mlx5_0: node_type ca fw 16.25.0319 node_guid 248a:0703:00a5:29d0 sys_image_guid 248a:0703:00a5:29d0 adaptive-moderation on caps: rdma resource show cq dev mlx5_0 cqn 0 cqe 1023 users 4 poll-ctx UNBOUND_WORKQUEUE adaptive-moderation off comm [ib_core] Signed-off-by: Yamin Friedman Signed-off-by: Leon Romanovsky --- rdma/dev.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++- rdma/rdma.h | 1 + rdma/res-cq.c | 15 ++++++++++++++ rdma/utils.c | 6 ++++++ 4 files changed, 76 insertions(+), 1 deletion(-) diff --git a/rdma/dev.c b/rdma/dev.c index d28bf6b3..c597cba5 100644 --- a/rdma/dev.c +++ b/rdma/dev.c @@ -12,6 +12,7 @@ static int dev_help(struct rd *rd) pr_out("Usage: %s dev show [DEV]\n", rd->filename); pr_out(" %s dev set [DEV] name DEVNAME\n", rd->filename); pr_out(" %s dev set [DEV] netns NSNAME\n", rd->filename); + pr_out(" %s dev set [DEV] adaptive-moderation [on|off]\n", rd->filename); return 0; } @@ -167,6 +168,21 @@ static void dev_print_sys_image_guid(struct rd *rd, struct nlattr **tb) pr_out("sys_image_guid %s ", str); } +static void dev_print_dim_setting(struct rd *rd, struct nlattr **tb) +{ + uint8_t dim_setting; + + if (!tb[RDMA_NLDEV_ATTR_DEV_DIM]) + return; + + dim_setting = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]); + if (dim_setting > 1) + return; + + print_on_off(rd, "adaptive-moderation", dim_setting); + +} + static const char *node_type_to_str(uint8_t node_type) { static const char * const node_type_str[] = { "unknown", "ca", @@ -219,8 +235,10 @@ static int dev_parse_cb(const struct nlmsghdr *nlh, void *data) dev_print_fw(rd, tb); dev_print_node_guid(rd, tb); dev_print_sys_image_guid(rd, tb); - if (rd->show_details) + if (rd->show_details) { + dev_print_dim_setting(rd, tb); dev_print_caps(rd, tb); + } if (!rd->json_output) pr_out("\n"); @@ -308,12 +326,47 @@ done: return ret; } +static int dev_set_dim_sendmsg(struct rd *rd, uint8_t dim_setting) +{ + uint32_t seq; + + rd_prepare_msg(rd, RDMA_NLDEV_CMD_SET, &seq, + (NLM_F_REQUEST | NLM_F_ACK)); + mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx); + mnl_attr_put_u8(rd->nlh, RDMA_NLDEV_ATTR_DEV_DIM, dim_setting); + + return rd_sendrecv_msg(rd, seq); +} + +static int dev_set_dim_off(struct rd *rd) +{ + return dev_set_dim_sendmsg(rd, 0); +} + +static int dev_set_dim_on(struct rd *rd) +{ + return dev_set_dim_sendmsg(rd, 1); +} + +static int dev_set_dim(struct rd *rd) +{ + const struct rd_cmd cmds[] = { + { NULL, dev_help}, + { "on", dev_set_dim_on}, + { "off", dev_set_dim_off}, + { 0 } + }; + + return rd_exec_cmd(rd, cmds, "parameter"); +} + static int dev_one_set(struct rd *rd) { const struct rd_cmd cmds[] = { { NULL, dev_help}, { "name", dev_set_name}, { "netns", dev_set_netns}, + { "adaptive-moderation", dev_set_dim}, { 0 } }; diff --git a/rdma/rdma.h b/rdma/rdma.h index 23157743..dfd1b70b 100644 --- a/rdma/rdma.h +++ b/rdma/rdma.h @@ -136,6 +136,7 @@ int rd_attr_check(const struct nlattr *attr, int *typep); void print_driver_table(struct rd *rd, struct nlattr *tb); void newline(struct rd *rd); void newline_indent(struct rd *rd); +void print_on_off(struct rd *rd, const char *key_str, bool on); #define MAX_LINE_LENGTH 80 #endif /* _RDMA_TOOL_H_ */ diff --git a/rdma/res-cq.c b/rdma/res-cq.c index 5afb97c5..d2591fbe 100644 --- a/rdma/res-cq.c +++ b/rdma/res-cq.c @@ -30,6 +30,20 @@ static void print_poll_ctx(struct rd *rd, uint8_t poll_ctx, struct nlattr *attr) pr_out("poll-ctx %s ", poll_ctx_to_str(poll_ctx)); } +static void print_cq_dim_setting(struct rd *rd, struct nlattr *attr) +{ + uint8_t dim_setting; + + if (!attr) + return; + + dim_setting = mnl_attr_get_u8(attr); + if (dim_setting > 1) + return; + + print_on_off(rd, "adaptive-moderation", dim_setting); +} + static int res_cq_line(struct rd *rd, const char *name, int idx, struct nlattr **nla_line) { @@ -97,6 +111,7 @@ static int res_cq_line(struct rd *rd, const char *name, int idx, res_print_uint(rd, "users", users, nla_line[RDMA_NLDEV_ATTR_RES_USECNT]); print_poll_ctx(rd, poll_ctx, nla_line[RDMA_NLDEV_ATTR_RES_POLL_CTX]); + print_cq_dim_setting(rd, nla_line[RDMA_NLDEV_ATTR_DEV_DIM]); res_print_uint(rd, "ctxn", ctxn, nla_line[RDMA_NLDEV_ATTR_RES_CTXN]); res_print_uint(rd, "pid", pid, nla_line[RDMA_NLDEV_ATTR_RES_PID]); print_comm(rd, comm, nla_line); diff --git a/rdma/utils.c b/rdma/utils.c index 95b669f3..37659011 100644 --- a/rdma/utils.c +++ b/rdma/utils.c @@ -449,6 +449,7 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_STAT_MODE] = MNL_TYPE_U32, [RDMA_NLDEV_ATTR_STAT_RES] = MNL_TYPE_U32, [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = MNL_TYPE_U32, + [RDMA_NLDEV_ATTR_DEV_DIM] = MNL_TYPE_U8, }; int rd_attr_check(const struct nlattr *attr, int *typep) @@ -789,6 +790,11 @@ static int print_driver_string(struct rd *rd, const char *key_str, } } +void print_on_off(struct rd *rd, const char *key_str, bool on) +{ + print_driver_string(rd, key_str, (on) ? "on":"off"); +} + static int print_driver_s32(struct rd *rd, const char *key_str, int32_t val, enum rdma_nldev_print_type print_type) { From 432b21bec7baa50624e5d3a61360574d039098a4 Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Mon, 29 Jul 2019 10:42:26 +0300 Subject: [PATCH 28/36] rdma: Document adaptive-moderation Add document of setting the adaptive-moderation for the ib device. Signed-off-by: Yamin Friedman Signed-off-by: Leon Romanovsky --- man/man8/rdma-dev.8 | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/man/man8/rdma-dev.8 b/man/man8/rdma-dev.8 index e77e7cd0..368cdc7c 100644 --- a/man/man8/rdma-dev.8 +++ b/man/man8/rdma-dev.8 @@ -34,11 +34,17 @@ rdma-dev \- RDMA device configuration .BR netns .BR NSNAME +.ti -8 +.B rdma dev set +.RI "[ " DEV " ]" +.BR adaptive-moderation +.BR [on/off] + .ti -8 .B rdma dev help .SH "DESCRIPTION" -.SS rdma dev set - rename RDMA device or set network namespace +.SS rdma dev set - rename RDMA device or set network namespace or set RDMA device adaptive-moderation .SS rdma dev show - display RDMA device attributes @@ -70,6 +76,14 @@ Changes the network namespace of RDMA device to foo where foo is previously created using iproute2 ip command. .RE .PP +rdma dev set mlx5_3 adaptive-moderation [on/off] +.RS 4 +Sets the state of adaptive interrupt moderation for the RDMA device. +.RE +.RS 4 +This is a global setting for the RDMA device but the value is printed for each CQ individually because the state is constant from CQ allocation. +.RE +.PP .SH SEE ALSO .BR ip (8), From 11120881d9a99b7f3b6f812374fde5c85864120e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 31 Jul 2019 17:16:54 -0700 Subject: [PATCH 29/36] Revert "tc: Remove pointless assignments in batch()" This reverts commit 6358bbc381c6e38465838370bcbbdeb77ec3565a. Signed-off-by: Stephen Hemminger --- tc/tc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tc/tc.c b/tc/tc.c index 64e342dd..1f23971a 100644 --- a/tc/tc.c +++ b/tc/tc.c @@ -326,11 +326,11 @@ static int batch(const char *name) struct batch_buf *head = NULL, *tail = NULL, *buf_pool = NULL; char *largv[100], *largv_next[100]; char *line, *line_next = NULL; + bool bs_enabled_next = false; bool bs_enabled = false; bool lastline = false; int largc, largc_next; bool bs_enabled_saved; - bool bs_enabled_next; int batchsize = 0; size_t len = 0; int ret = 0; @@ -359,6 +359,7 @@ static int batch(const char *name) goto Exit; largc = makeargs(line, largv, 100); bs_enabled = batchsize_enabled(largc, largv); + bs_enabled_saved = bs_enabled; do { if (getcmdline(&line_next, &len, stdin) == -1) lastline = true; @@ -394,6 +395,7 @@ static int batch(const char *name) len = 0; bs_enabled_saved = bs_enabled; bs_enabled = bs_enabled_next; + bs_enabled_next = false; if (largc == 0) { largc = largc_next; From 350bc27cf300c95a59fe73206bbbae12aa2d025d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 31 Jul 2019 17:19:18 -0700 Subject: [PATCH 30/36] Revert "tc: flush after each command in batch mode" This reverts commit d66fdfda71e4a30c1ca0ddb7b1a048bef30fe79e. Signed-off-by: Stephen Hemminger --- tc/tc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tc/tc.c b/tc/tc.c index 1f23971a..c115155b 100644 --- a/tc/tc.c +++ b/tc/tc.c @@ -405,7 +405,6 @@ static int batch(const char *name) err = do_cmd(largc, largv, tail == NULL ? NULL : tail->buf, tail == NULL ? 0 : sizeof(tail->buf)); - fflush(stdout); if (err != 0) { fprintf(stderr, "Command failed %s:%d\n", name, cmdlineno - 1); From bfdda70d596fb86472cb4343e6ef4d489d8b1ec1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 31 Jul 2019 17:19:33 -0700 Subject: [PATCH 31/36] Revert "tc: fix batch force option" This reverts commit b133392468d1f404077a8f3554d1f63d48bb45e8. Signed-off-by: Stephen Hemminger --- tc/tc.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/tc/tc.c b/tc/tc.c index c115155b..b7b6bd28 100644 --- a/tc/tc.c +++ b/tc/tc.c @@ -334,7 +334,6 @@ static int batch(const char *name) int batchsize = 0; size_t len = 0; int ret = 0; - int err; bool send; batch_mode = 1; @@ -403,9 +402,9 @@ static int batch(const char *name) continue; /* blank line */ } - err = do_cmd(largc, largv, tail == NULL ? NULL : tail->buf, + ret = do_cmd(largc, largv, tail == NULL ? NULL : tail->buf, tail == NULL ? 0 : sizeof(tail->buf)); - if (err != 0) { + if (ret != 0) { fprintf(stderr, "Command failed %s:%d\n", name, cmdlineno - 1); ret = 1; @@ -427,17 +426,15 @@ static int batch(const char *name) iov->iov_len = n->nlmsg_len; } - err = rtnl_talk_iov(&rth, iovs, batchsize, NULL); - put_batch_bufs(&buf_pool, &head, &tail); - free(iovs); - if (err < 0) { + ret = rtnl_talk_iov(&rth, iovs, batchsize, NULL); + if (ret < 0) { fprintf(stderr, "Command failed %s:%d\n", name, - cmdlineno - (batchsize + err) - 1); - ret = 1; - if (!force) - break; + cmdlineno - (batchsize + ret) - 1); + return 2; } + put_batch_bufs(&buf_pool, &head, &tail); batchsize = 0; + free(iovs); } } while (!lastline); From e991c04d64c0ed2782c953351634e257c4fddb99 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 31 Jul 2019 17:27:59 -0700 Subject: [PATCH 32/36] Revert "tc: Add batchsize feature for filter and actions" This reverts commit 485d0c6001c4aa134b99c86913d6a7089b7b2ab0. Signed-off-by: Stephen Hemminger --- tc/m_action.c | 65 ++++++---------- tc/tc.c | 199 ++++--------------------------------------------- tc/tc_common.h | 7 +- tc/tc_filter.c | 131 ++++++++++++-------------------- 4 files changed, 88 insertions(+), 314 deletions(-) diff --git a/tc/m_action.c b/tc/m_action.c index ab6bc0ad..bdc62720 100644 --- a/tc/m_action.c +++ b/tc/m_action.c @@ -556,61 +556,40 @@ bad_val: return ret; } -struct tc_action_req { - struct nlmsghdr n; - struct tcamsg t; - char buf[MAX_MSG]; -}; - static int tc_action_modify(int cmd, unsigned int flags, - int *argc_p, char ***argv_p, - void *buf, size_t buflen) + int *argc_p, char ***argv_p) { - struct tc_action_req *req, action_req; - char **argv = *argv_p; - struct rtattr *tail; int argc = *argc_p; - struct iovec iov; + char **argv = *argv_p; int ret = 0; - - if (buf) { - req = buf; - if (buflen < sizeof (struct tc_action_req)) { - fprintf(stderr, "buffer is too small: %zu\n", buflen); - return -1; - } - } else { - memset(&action_req, 0, sizeof (struct tc_action_req)); - req = &action_req; - } - - req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg)); - req->n.nlmsg_flags = NLM_F_REQUEST | flags; - req->n.nlmsg_type = cmd; - req->t.tca_family = AF_UNSPEC; - tail = NLMSG_TAIL(&req->n); + struct { + struct nlmsghdr n; + struct tcamsg t; + char buf[MAX_MSG]; + } req = { + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg)), + .n.nlmsg_flags = NLM_F_REQUEST | flags, + .n.nlmsg_type = cmd, + .t.tca_family = AF_UNSPEC, + }; + struct rtattr *tail = NLMSG_TAIL(&req.n); argc -= 1; argv += 1; - if (parse_action(&argc, &argv, TCA_ACT_TAB, &req->n)) { + if (parse_action(&argc, &argv, TCA_ACT_TAB, &req.n)) { fprintf(stderr, "Illegal \"action\"\n"); return -1; } - tail->rta_len = (void *) NLMSG_TAIL(&req->n) - (void *) tail; + tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail; - *argc_p = argc; - *argv_p = argv; - - if (buf) - return 0; - - iov.iov_base = &req->n; - iov.iov_len = req->n.nlmsg_len; - if (rtnl_talk_iov(&rth, &iov, 1, NULL) < 0) { + if (rtnl_talk(&rth, &req.n, NULL) < 0) { fprintf(stderr, "We have an error talking to the kernel\n"); ret = -1; } + *argc_p = argc; + *argv_p = argv; + return ret; } @@ -711,7 +690,7 @@ bad_val: return ret; } -int do_action(int argc, char **argv, void *buf, size_t buflen) +int do_action(int argc, char **argv) { int ret = 0; @@ -721,12 +700,12 @@ int do_action(int argc, char **argv, void *buf, size_t buflen) if (matches(*argv, "add") == 0) { ret = tc_action_modify(RTM_NEWACTION, NLM_F_EXCL | NLM_F_CREATE, - &argc, &argv, buf, buflen); + &argc, &argv); } else if (matches(*argv, "change") == 0 || matches(*argv, "replace") == 0) { ret = tc_action_modify(RTM_NEWACTION, NLM_F_CREATE | NLM_F_REPLACE, - &argc, &argv, buf, buflen); + &argc, &argv); } else if (matches(*argv, "delete") == 0) { argc -= 1; argv += 1; diff --git a/tc/tc.c b/tc/tc.c index b7b6bd28..a0a18f38 100644 --- a/tc/tc.c +++ b/tc/tc.c @@ -205,18 +205,18 @@ static void usage(void) " -nm | -nam[es] | { -cf | -conf } path }\n"); } -static int do_cmd(int argc, char **argv, void *buf, size_t buflen) +static int do_cmd(int argc, char **argv) { if (matches(*argv, "qdisc") == 0) return do_qdisc(argc-1, argv+1); if (matches(*argv, "class") == 0) return do_class(argc-1, argv+1); if (matches(*argv, "filter") == 0) - return do_filter(argc-1, argv+1, buf, buflen); + return do_filter(argc-1, argv+1); if (matches(*argv, "chain") == 0) - return do_chain(argc-1, argv+1, buf, buflen); + return do_chain(argc-1, argv+1); if (matches(*argv, "actions") == 0) - return do_action(argc-1, argv+1, buf, buflen); + return do_action(argc-1, argv+1); if (matches(*argv, "monitor") == 0) return do_tcmonitor(argc-1, argv+1); if (matches(*argv, "exec") == 0) @@ -231,110 +231,11 @@ static int do_cmd(int argc, char **argv, void *buf, size_t buflen) return -1; } -#define TC_MAX_SUBC 10 -static bool batchsize_enabled(int argc, char *argv[]) -{ - struct { - char *c; - char *subc[TC_MAX_SUBC]; - } table[] = { - { "filter", { "add", "delete", "change", "replace", NULL} }, - { "actions", { "add", "change", "replace", NULL} }, - { NULL }, - }, *iter; - char *s; - int i; - - if (argc < 2) - return false; - - for (iter = table; iter->c; iter++) { - if (matches(argv[0], iter->c)) - continue; - for (i = 0; i < TC_MAX_SUBC; i++) { - s = iter->subc[i]; - if (s && matches(argv[1], s) == 0) - return true; - } - } - - return false; -} - -struct batch_buf { - struct batch_buf *next; - char buf[16420]; /* sizeof (struct nlmsghdr) + - max(sizeof (struct tcmsg) + - sizeof (struct tcamsg)) + - MAX_MSG */ -}; - -static struct batch_buf *get_batch_buf(struct batch_buf **pool, - struct batch_buf **head, - struct batch_buf **tail) -{ - struct batch_buf *buf; - - if (*pool == NULL) - buf = calloc(1, sizeof(struct batch_buf)); - else { - buf = *pool; - *pool = (*pool)->next; - memset(buf, 0, sizeof(struct batch_buf)); - } - - if (*head == NULL) - *head = *tail = buf; - else { - (*tail)->next = buf; - (*tail) = buf; - } - - return buf; -} - -static void put_batch_bufs(struct batch_buf **pool, - struct batch_buf **head, - struct batch_buf **tail) -{ - if (*head == NULL || *tail == NULL) - return; - - if (*pool == NULL) - *pool = *head; - else { - (*tail)->next = *pool; - *pool = *head; - } - *head = NULL; - *tail = NULL; -} - -static void free_batch_bufs(struct batch_buf **pool) -{ - struct batch_buf *buf; - - for (buf = *pool; buf != NULL; buf = *pool) { - *pool = buf->next; - free(buf); - } - *pool = NULL; -} - static int batch(const char *name) { - struct batch_buf *head = NULL, *tail = NULL, *buf_pool = NULL; - char *largv[100], *largv_next[100]; - char *line, *line_next = NULL; - bool bs_enabled_next = false; - bool bs_enabled = false; - bool lastline = false; - int largc, largc_next; - bool bs_enabled_saved; - int batchsize = 0; + char *line = NULL; size_t len = 0; int ret = 0; - bool send; batch_mode = 1; if (name && strcmp(name, "-") != 0) { @@ -354,95 +255,25 @@ static int batch(const char *name) } cmdlineno = 0; - if (getcmdline(&line, &len, stdin) == -1) - goto Exit; - largc = makeargs(line, largv, 100); - bs_enabled = batchsize_enabled(largc, largv); - bs_enabled_saved = bs_enabled; - do { - if (getcmdline(&line_next, &len, stdin) == -1) - lastline = true; + while (getcmdline(&line, &len, stdin) != -1) { + char *largv[100]; + int largc; - largc_next = makeargs(line_next, largv_next, 100); - bs_enabled_next = batchsize_enabled(largc_next, largv_next); - if (bs_enabled) { - struct batch_buf *buf; - - buf = get_batch_buf(&buf_pool, &head, &tail); - if (!buf) { - fprintf(stderr, - "failed to allocate batch_buf\n"); - return -1; - } - ++batchsize; - } - - /* - * In batch mode, if we haven't accumulated enough commands - * and this is not the last command and this command & next - * command both support the batchsize feature, don't send the - * message immediately. - */ - if (!lastline && bs_enabled && bs_enabled_next - && batchsize != MSG_IOV_MAX) - send = false; - else - send = true; - - line = line_next; - line_next = NULL; - len = 0; - bs_enabled_saved = bs_enabled; - bs_enabled = bs_enabled_next; - bs_enabled_next = false; - - if (largc == 0) { - largc = largc_next; - memcpy(largv, largv_next, largc * sizeof(char *)); + largc = makeargs(line, largv, 100); + if (largc == 0) continue; /* blank line */ - } - ret = do_cmd(largc, largv, tail == NULL ? NULL : tail->buf, - tail == NULL ? 0 : sizeof(tail->buf)); - if (ret != 0) { - fprintf(stderr, "Command failed %s:%d\n", name, - cmdlineno - 1); + if (do_cmd(largc, largv)) { + fprintf(stderr, "Command failed %s:%d\n", + name, cmdlineno); ret = 1; if (!force) break; } - largc = largc_next; - memcpy(largv, largv_next, largc * sizeof(char *)); + } - if (send && bs_enabled_saved) { - struct iovec *iov, *iovs; - struct batch_buf *buf; - struct nlmsghdr *n; - - iov = iovs = malloc(batchsize * sizeof(struct iovec)); - for (buf = head; buf != NULL; buf = buf->next, ++iov) { - n = (struct nlmsghdr *)&buf->buf; - iov->iov_base = n; - iov->iov_len = n->nlmsg_len; - } - - ret = rtnl_talk_iov(&rth, iovs, batchsize, NULL); - if (ret < 0) { - fprintf(stderr, "Command failed %s:%d\n", name, - cmdlineno - (batchsize + ret) - 1); - return 2; - } - put_batch_bufs(&buf_pool, &head, &tail); - batchsize = 0; - free(iovs); - } - } while (!lastline); - - free_batch_bufs(&buf_pool); -Exit: free(line); rtnl_close(&rth); - return ret; } @@ -536,7 +367,7 @@ int main(int argc, char **argv) goto Exit; } - ret = do_cmd(argc-1, argv+1, NULL, 0); + ret = do_cmd(argc-1, argv+1); Exit: rtnl_close(&rth); diff --git a/tc/tc_common.h b/tc/tc_common.h index d8a6dfde..802fb7f0 100644 --- a/tc/tc_common.h +++ b/tc/tc_common.h @@ -1,15 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ #define TCA_BUF_MAX (64*1024) -#define MSG_IOV_MAX 128 extern struct rtnl_handle rth; int do_qdisc(int argc, char **argv); int do_class(int argc, char **argv); -int do_filter(int argc, char **argv, void *buf, size_t buflen); -int do_chain(int argc, char **argv, void *buf, size_t buflen); -int do_action(int argc, char **argv, void *buf, size_t buflen); +int do_filter(int argc, char **argv); +int do_chain(int argc, char **argv); +int do_action(int argc, char **argv); int do_tcmonitor(int argc, char **argv); int do_exec(int argc, char **argv); diff --git a/tc/tc_filter.c b/tc/tc_filter.c index cd78c244..53759a7a 100644 --- a/tc/tc_filter.c +++ b/tc/tc_filter.c @@ -58,42 +58,30 @@ struct tc_filter_req { char buf[MAX_MSG]; }; -static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv, - void *buf, size_t buflen) +static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv) { - struct tc_filter_req *req, filter_req; + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[MAX_MSG]; + } req = { + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .n.nlmsg_flags = NLM_F_REQUEST | flags, + .n.nlmsg_type = cmd, + .t.tcm_family = AF_UNSPEC, + }; struct filter_util *q = NULL; - struct tc_estimator est = {}; - char k[FILTER_NAMESZ] = {}; - int chain_index_set = 0; - char d[IFNAMSIZ] = {}; - int protocol_set = 0; - __u32 block_index = 0; - char *fhandle = NULL; - __u32 protocol = 0; - __u32 chain_index; - struct iovec iov; __u32 prio = 0; - int ret; + __u32 protocol = 0; + int protocol_set = 0; + __u32 chain_index; + int chain_index_set = 0; + char *fhandle = NULL; + char d[IFNAMSIZ] = {}; + char k[FILTER_NAMESZ] = {}; + struct tc_estimator est = {}; - if (buf) { - req = buf; - if (buflen < sizeof (struct tc_filter_req)) { - fprintf(stderr, "buffer is too small: %zu\n", buflen); - return -1; - } - } else { - memset(&filter_req, 0, sizeof (struct tc_filter_req)); - req = &filter_req; - } - - req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); - req->n.nlmsg_flags = NLM_F_REQUEST | flags; - req->n.nlmsg_type = cmd; - req->t.tcm_family = AF_UNSPEC; - - if ((cmd == RTM_NEWTFILTER || cmd == RTM_NEWCHAIN) && - flags & NLM_F_CREATE) + if (cmd == RTM_NEWTFILTER && flags & NLM_F_CREATE) protocol = htons(ETH_P_ALL); while (argc > 0) { @@ -101,53 +89,39 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv, NEXT_ARG(); if (d[0]) duparg("dev", *argv); - if (block_index) { - fprintf(stderr, "Error: \"dev\" and \"block\" are mutually exlusive\n"); - return -1; - } strncpy(d, *argv, sizeof(d)-1); - } else if (matches(*argv, "block") == 0) { - NEXT_ARG(); - if (block_index) - duparg("block", *argv); - if (d[0]) { - fprintf(stderr, "Error: \"dev\" and \"block\" are mutually exlusive\n"); - return -1; - } - if (get_u32(&block_index, *argv, 0) || !block_index) - invarg("invalid block index value", *argv); } else if (strcmp(*argv, "root") == 0) { - if (req->t.tcm_parent) { + if (req.t.tcm_parent) { fprintf(stderr, "Error: \"root\" is duplicate parent ID\n"); return -1; } - req->t.tcm_parent = TC_H_ROOT; + req.t.tcm_parent = TC_H_ROOT; } else if (strcmp(*argv, "ingress") == 0) { - if (req->t.tcm_parent) { + if (req.t.tcm_parent) { fprintf(stderr, "Error: \"ingress\" is duplicate parent ID\n"); return -1; } - req->t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, + req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); } else if (strcmp(*argv, "egress") == 0) { - if (req->t.tcm_parent) { + if (req.t.tcm_parent) { fprintf(stderr, "Error: \"egress\" is duplicate parent ID\n"); return -1; } - req->t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, + req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS); } else if (strcmp(*argv, "parent") == 0) { __u32 handle; NEXT_ARG(); - if (req->t.tcm_parent) + if (req.t.tcm_parent) duparg("parent", *argv); if (get_tc_classid(&handle, *argv)) invarg("Invalid parent ID", *argv); - req->t.tcm_parent = handle; + req.t.tcm_parent = handle; } else if (strcmp(*argv, "handle") == 0) { NEXT_ARG(); if (fhandle) @@ -194,27 +168,26 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv, argc--; argv++; } - req->t.tcm_info = TC_H_MAKE(prio<<16, protocol); + req.t.tcm_info = TC_H_MAKE(prio<<16, protocol); if (chain_index_set) - addattr32(&req->n, sizeof(*req), TCA_CHAIN, chain_index); + addattr32(&req.n, sizeof(req), TCA_CHAIN, chain_index); if (k[0]) - addattr_l(&req->n, sizeof(*req), TCA_KIND, k, strlen(k)+1); + addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1); if (d[0]) { ll_init_map(&rth); - req->t.tcm_ifindex = ll_name_to_index(d); - if (!req->t.tcm_ifindex) - return -nodev(d); - } else if (block_index) { - req->t.tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; - req->t.tcm_block_index = block_index; + req.t.tcm_ifindex = ll_name_to_index(d); + if (req.t.tcm_ifindex == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", d); + return 1; + } } if (q) { - if (q->parse_fopt(q, fhandle, argc, argv, &req->n)) + if (q->parse_fopt(q, fhandle, argc, argv, &req.n)) return 1; } else { if (fhandle) { @@ -233,16 +206,10 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv, } if (est.ewma_log) - addattr_l(&req->n, sizeof(*req), TCA_RATE, &est, sizeof(est)); + addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est)); - if (buf) - return 0; - - iov.iov_base = &req->n; - iov.iov_len = req->n.nlmsg_len; - ret = rtnl_talk_iov(&rth, &iov, 1, NULL); - if (ret < 0) { - fprintf(stderr, "We have an error talking to the kernel, %d\n", ret); + if (rtnl_talk(&rth, &req.n, NULL) < 0) { + fprintf(stderr, "We have an error talking to the kernel\n"); return 2; } @@ -751,22 +718,20 @@ static int tc_filter_list(int cmd, int argc, char **argv) return 0; } -int do_filter(int argc, char **argv, void *buf, size_t buflen) +int do_filter(int argc, char **argv) { if (argc < 1) return tc_filter_list(RTM_GETTFILTER, 0, NULL); if (matches(*argv, "add") == 0) return tc_filter_modify(RTM_NEWTFILTER, NLM_F_EXCL|NLM_F_CREATE, - argc-1, argv+1, buf, buflen); + argc-1, argv+1); if (matches(*argv, "change") == 0) - return tc_filter_modify(RTM_NEWTFILTER, 0, argc-1, argv+1, - buf, buflen); + return tc_filter_modify(RTM_NEWTFILTER, 0, argc-1, argv+1); if (matches(*argv, "replace") == 0) return tc_filter_modify(RTM_NEWTFILTER, NLM_F_CREATE, argc-1, - argv+1, buf, buflen); + argv+1); if (matches(*argv, "delete") == 0) - return tc_filter_modify(RTM_DELTFILTER, 0, argc-1, argv+1, - buf, buflen); + return tc_filter_modify(RTM_DELTFILTER, 0, argc-1, argv+1); if (matches(*argv, "get") == 0) return tc_filter_get(RTM_GETTFILTER, 0, argc-1, argv+1); if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0 @@ -781,16 +746,16 @@ int do_filter(int argc, char **argv, void *buf, size_t buflen) return -1; } -int do_chain(int argc, char **argv, void *buf, size_t buflen) +int do_chain(int argc, char **argv) { if (argc < 1) return tc_filter_list(RTM_GETCHAIN, 0, NULL); if (matches(*argv, "add") == 0) { return tc_filter_modify(RTM_NEWCHAIN, NLM_F_EXCL | NLM_F_CREATE, - argc - 1, argv + 1, buf, buflen); + argc - 1, argv + 1); } else if (matches(*argv, "delete") == 0) { return tc_filter_modify(RTM_DELCHAIN, 0, - argc - 1, argv + 1, buf, buflen); + argc - 1, argv + 1); } else if (matches(*argv, "get") == 0) { return tc_filter_get(RTM_GETCHAIN, 0, argc - 1, argv + 1); From 4dd599fdb83f143c492695e80626f1298eeb4fe7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 2 Aug 2019 09:33:39 -0700 Subject: [PATCH 33/36] tc: fflush after each command in batch mode Restore behaviour of tc batch mode. Flush stdout after each command. Signed-off-by: Stephen Hemminger --- tc/tc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tc/tc.c b/tc/tc.c index a0a18f38..37294b31 100644 --- a/tc/tc.c +++ b/tc/tc.c @@ -270,6 +270,7 @@ static int batch(const char *name) if (!force) break; } + fflush(stdout); } free(line); From efd12cd2d904064510f11a3c2d96609885ce9a75 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 5 Aug 2019 11:56:56 +0200 Subject: [PATCH 34/36] devlink: finish queue.h to list.h transition Loose the "q" from the names and name the structure fields in the same way rest of the code does. Also, fix list_add arg order which leads to segfault. Fixes: 33267017faf1 ("iproute2: devlink: port from sys/queue.h to list.h") Signed-off-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 0ea401ae..91c85dc1 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -5978,35 +5978,36 @@ static int fmsg_value_show(struct dl *dl, int type, struct nlattr *nl_data) return MNL_CB_OK; } -struct nest_qentry { +struct nest_entry { int attr_type; - struct list_head nest_entries; + struct list_head list; }; struct fmsg_cb_data { struct dl *dl; uint8_t value_type; - struct list_head qhead; + struct list_head entry_list; }; static int cmd_fmsg_nest_queue(struct fmsg_cb_data *fmsg_data, uint8_t *attr_value, bool insert) { - struct nest_qentry *entry = NULL; + struct nest_entry *entry; if (insert) { - entry = malloc(sizeof(struct nest_qentry)); + entry = malloc(sizeof(struct nest_entry)); if (!entry) return -ENOMEM; entry->attr_type = *attr_value; - list_add(&fmsg_data->qhead, &entry->nest_entries); + list_add(&entry->list, &fmsg_data->entry_list); } else { - if (list_empty(&fmsg_data->qhead)) + if (list_empty(&fmsg_data->entry_list)) return MNL_CB_ERROR; - entry = list_first_entry(&fmsg_data->qhead, struct nest_qentry, nest_entries); + entry = list_first_entry(&fmsg_data->entry_list, + struct nest_entry, list); *attr_value = entry->attr_type; - list_del(&entry->nest_entries); + list_del(&entry->list); free(entry); } return MNL_CB_OK; @@ -6115,7 +6116,7 @@ static int cmd_health_object_common(struct dl *dl, uint8_t cmd, uint16_t flags) return err; data.dl = dl; - INIT_LIST_HEAD(&data.qhead); + INIT_LIST_HEAD(&data.entry_list); err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_fmsg_object_cb, &data); return err; } From 18db049f6fc44dbcb9ac745b4b2804e6f0e94f91 Mon Sep 17 00:00:00 2001 From: Patrick Talbert Date: Sat, 3 Aug 2019 10:37:41 +0200 Subject: [PATCH 35/36] ss: sctp: fix typo for nodelay nodealy should be nodelay. Signed-off-by: Patrick Talbert Signed-off-by: Stephen Hemminger --- misc/ss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/ss.c b/misc/ss.c index 0927b192..01b47fed 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -2414,7 +2414,7 @@ static void sctp_stats_print(struct sctp_info *s) if (s->sctpi_s_pd_point) out(" pdpoint:%d", s->sctpi_s_pd_point); if (s->sctpi_s_nodelay) - out(" nodealy:%d", s->sctpi_s_nodelay); + out(" nodelay:%d", s->sctpi_s_nodelay); if (s->sctpi_s_disable_fragments) out(" nofrag:%d", s->sctpi_s_disable_fragments); if (s->sctpi_s_v4mapped) From 2d7cb22240b5b52294fc04ee3545694e5fe35605 Mon Sep 17 00:00:00 2001 From: Patrick Talbert Date: Sat, 3 Aug 2019 10:47:08 +0200 Subject: [PATCH 36/36] ss: sctp: Formatting tweak in sctp_show_info for locals 'locals' output does not include a leading space so it runs up against skmem:() output. Add a leading space to fix it. Signed-off-by: Patrick Talbert Signed-off-by: Stephen Hemminger --- misc/ss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/ss.c b/misc/ss.c index 01b47fed..363b4c8d 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -2937,7 +2937,7 @@ static void sctp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, len = RTA_PAYLOAD(tb[INET_DIAG_LOCALS]); sa = RTA_DATA(tb[INET_DIAG_LOCALS]); - out("locals:%s", format_host_sa(sa)); + out(" locals:%s", format_host_sa(sa)); for (sa++, len -= sizeof(*sa); len > 0; sa++, len -= sizeof(*sa)) out(",%s", format_host_sa(sa));