From 843fc90068270f4f3e5c44c49c13653e4cf1e6e0 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Mon, 24 Apr 2017 17:35:37 +0200 Subject: [PATCH 01/13] man: ip-rule.8: Further clarify how to interpret priority value Despite the past changes, users seemed to get confused by the seemingly contradictory relation of priority value and actual rule priority. Signed-off-by: Phil Sutter --- man/man8/ip-rule.8 | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8 index 7de80f3e..a5c47981 100644 --- a/man/man8/ip-rule.8 +++ b/man/man8/ip-rule.8 @@ -95,7 +95,10 @@ Each policy routing rule consists of a .B selector and an .B action predicate. -The RPDB is scanned in order of decreasing priority. The selector +The RPDB is scanned in order of decreasing priority (note that lower number +means higher priority, see the description of +.I PREFERENCE +below). The selector of each rule is applied to {source address, destination address, incoming interface, tos, fwmark} and, if the selector matches the packet, the action is performed. The action predicate may return with success. @@ -225,7 +228,8 @@ value to match. .BI priority " PREFERENCE" the priority of this rule. .I PREFERENCE -is an unsigned integer value, higher number means lower priority. Each rule +is an unsigned integer value, higher number means lower priority, and rules get +processed in order of increasing number. Each rule should have an explicitly set .I unique priority value. From 6ec14f1abb82bdaa6d0fd98e491a505bcd284d26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Thu, 27 Apr 2017 11:43:47 +0200 Subject: [PATCH 02/13] routel: fix infinite loop in line parser As noticed by one of the few users of routel script, it ends up in an infinite loop when they pull out the cable from the NIC used for some route. This is caused by its parser expecting the line of "ip route show" output consists of "key value" pairs (except for the initial target range), together with an old trap of Bourne style shells that "shift 2" does nothing if there is only one argument left. Some keywords, e.g. "linkdown", are not followed by a value. Improve the parser to (1) only set variables for keywords we care about (2) recognize (currently) known keywords without value This is still far from perfect (and certainly not future proof) but to fully fix the script, one would probably have to rewrite the logic completely (and I'm not sure it's worth the effort). Signed-off-by: Michal Kubecek --- ip/routel | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/ip/routel b/ip/routel index 8d1d352a..9a30462a 100644 --- a/ip/routel +++ b/ip/routel @@ -32,10 +32,22 @@ ip route list table "$@" | esac while test $# != 0 do - key=$1 - val=$2 - eval "$key=$val" - shift 2 + case "$1" in + proto|via|dev|scope|src|table) + key=$1 + val=$2 + eval "$key='$val'" + shift 2 + ;; + dead|onlink|pervasive|offload|notify|linkdown|unresolved) + shift + ;; + *) + # avoid infinite loop on unknown keyword without value at line end + shift + shift + ;; + esac done echo "$network $via $src $proto $scope $dev $table" done | awk -F ' ' ' From 432b92a70287e54db6217510f97d4669582933b1 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Wed, 26 Apr 2017 15:08:39 +0800 Subject: [PATCH 03/13] iplink: add support for IFLA_CARRIER attribute Add support to set IFLA_CARRIER attribute. Signed-off-by: Zhang Shengju --- ip/iplink.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ip/iplink.c b/ip/iplink.c index 866ad723..263bfdde 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -72,6 +72,7 @@ void iplink_usage(void) " [ allmulticast { on | off } ]\n" " [ promisc { on | off } ]\n" " [ trailers { on | off } ]\n" + " [ carrier { on | off } ]\n" " [ txqueuelen PACKETS ]\n" " [ name NEWNAME ]\n" " [ address LLADDR ]\n" @@ -673,6 +674,17 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, req->i.ifi_flags |= IFF_NOARP; else return on_off("arp", *argv); + } else if (strcmp(*argv, "carrier") == 0) { + int carrier; + NEXT_ARG(); + if (strcmp(*argv, "on") == 0) + carrier = 1; + else if (strcmp(*argv, "off") == 0) + carrier = 0; + else + return on_off("carrier", *argv); + + addattr8(&req->n, sizeof(*req), IFLA_CARRIER, carrier); } else if (strcmp(*argv, "vf") == 0) { struct rtattr *vflist; From bb6ab47b162e37cf5c6846f3700dd74eb97a11ae Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 1 May 2017 09:12:27 -0700 Subject: [PATCH 04/13] iplink: whitespace cleanup Break lines to conform to 80 col guideline. Signed-off-by: Stephen Hemminger --- ip/iplink.c | 61 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/ip/iplink.c b/ip/iplink.c index 263bfdde..da3f9a77 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -63,7 +63,8 @@ void iplink_usage(void) " [ { up | down } ]\n" " [ type TYPE ARGS ]\n"); } else - fprintf(stderr, "Usage: ip link set DEVICE [ { up | down } ]\n"); + fprintf(stderr, + "Usage: ip link set DEVICE [ { up | down } ]\n"); fprintf(stderr, " [ arp { on | off } ]\n" @@ -298,7 +299,8 @@ static void iplink_parse_vf_vlan_info(int vf, int *argcp, char ***argvp, SPRINT_BUF(b2); char msg[64 + sizeof(b1) + sizeof(b2)]; - sprintf(msg, "Invalid \"vlan protocol\" value - supported %s, %s\n", + sprintf(msg, + "Invalid \"vlan protocol\" value - supported %s, %s\n", ll_proto_n2a(htons(ETH_P_8021Q), b1, sizeof(b1)), ll_proto_n2a(htons(ETH_P_8021AD), @@ -583,14 +585,16 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, addr_len = ll_addr_a2n(abuf, sizeof(abuf), *argv); if (addr_len < 0) return -1; - addattr_l(&req->n, sizeof(*req), IFLA_ADDRESS, abuf, addr_len); + addattr_l(&req->n, sizeof(*req), + IFLA_ADDRESS, abuf, addr_len); } else if (matches(*argv, "broadcast") == 0 || strcmp(*argv, "brd") == 0) { NEXT_ARG(); len = ll_addr_a2n(abuf, sizeof(abuf), *argv); if (len < 0) return -1; - addattr_l(&req->n, sizeof(*req), IFLA_BROADCAST, abuf, len); + addattr_l(&req->n, sizeof(*req), + IFLA_BROADCAST, abuf, len); } else if (matches(*argv, "txqueuelen") == 0 || strcmp(*argv, "qlen") == 0 || matches(*argv, "txqlen") == 0) { @@ -599,7 +603,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, duparg("txqueuelen", *argv); if (get_integer(&qlen, *argv, 0)) invarg("Invalid \"txqueuelen\" value\n", *argv); - addattr_l(&req->n, sizeof(*req), IFLA_TXQLEN, &qlen, 4); + addattr_l(&req->n, sizeof(*req), + IFLA_TXQLEN, &qlen, 4); } else if (strcmp(*argv, "mtu") == 0) { NEXT_ARG(); if (mtu != -1) @@ -620,8 +625,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_FD, &netns, 4); else if (get_integer(&netns, *argv, 0) == 0) - addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_PID, - &netns, 4); + addattr_l(&req->n, sizeof(*req), + IFLA_NET_NS_PID, &netns, 4); else invarg("Invalid \"netns\" value\n", *argv); } else if (strcmp(*argv, "multicast") == 0) { @@ -676,6 +681,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, return on_off("arp", *argv); } else if (strcmp(*argv, "carrier") == 0) { int carrier; + NEXT_ARG(); if (strcmp(*argv, "on") == 0) carrier = 1; @@ -775,7 +781,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, if (numtxqueues != -1) duparg("numtxqueues", *argv); if (get_integer(&numtxqueues, *argv, 0)) - invarg("Invalid \"numtxqueues\" value\n", *argv); + invarg("Invalid \"numtxqueues\" value\n", + *argv); addattr_l(&req->n, sizeof(*req), IFLA_NUM_TX_QUEUES, &numtxqueues, 4); } else if (matches(*argv, "numrxqueues") == 0) { @@ -783,7 +790,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, if (numrxqueues != -1) duparg("numrxqueues", *argv); if (get_integer(&numrxqueues, *argv, 0)) - invarg("Invalid \"numrxqueues\" value\n", *argv); + invarg("Invalid \"numrxqueues\" value\n", + *argv); addattr_l(&req->n, sizeof(*req), IFLA_NUM_RX_QUEUES, &numrxqueues, 4); } else if (matches(*argv, "addrgenmode") == 0) { @@ -793,7 +801,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, NEXT_ARG(); mode = get_addr_gen_mode(*argv); if (mode < 0) - invarg("Invalid address generation mode\n", *argv); + invarg("Invalid address generation mode\n", + *argv); afs = addattr_nest(&req->n, sizeof(*req), IFLA_AF_SPEC); afs6 = addattr_nest(&req->n, sizeof(*req), AF_INET6); addattr8(&req->n, sizeof(*req), @@ -805,7 +814,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, if (link_netnsid != -1) duparg("link-netnsid", *argv); if (get_integer(&link_netnsid, *argv, 0)) - invarg("Invalid \"link-netnsid\" value\n", *argv); + invarg("Invalid \"link-netnsid\" value\n", + *argv); addattr32(&req->n, sizeof(*req), IFLA_LINK_NETNSID, link_netnsid); } else if (strcmp(*argv, "protodown") == 0) { @@ -886,7 +896,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) return -1; } if (flags & NLM_F_CREATE) { - fprintf(stderr, "group cannot be used when creating devices.\n"); + fprintf(stderr, + "group cannot be used when creating devices.\n"); return -1; } @@ -900,11 +911,13 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) if (!(flags & NLM_F_CREATE)) { if (!dev) { - fprintf(stderr, "Not enough information: \"dev\" argument is required.\n"); + fprintf(stderr, + "Not enough information: \"dev\" argument is required.\n"); exit(-1); } if (cmd == RTM_NEWLINK && index != -1) { - fprintf(stderr, "index can be used only when creating devices.\n"); + fprintf(stderr, + "index can be used only when creating devices.\n"); exit(-1); } @@ -961,8 +974,9 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) else iflatype = IFLA_INFO_DATA; if (lu && argc) { - struct rtattr *data = addattr_nest(&req.n, - sizeof(req), iflatype); + struct rtattr *data + = addattr_nest(&req.n, + sizeof(req), iflatype); if (lu->parse_opt && lu->parse_opt(lu, argc, argv, &req.n)) @@ -979,7 +993,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) } addattr_nest_end(&req.n, linkinfo); } else if (flags & NLM_F_CREATE) { - fprintf(stderr, "Not enough information: \"type\" argument is required\n"); + fprintf(stderr, + "Not enough information: \"type\" argument is required\n"); return -1; } @@ -1187,7 +1202,8 @@ static int parse_address(const char *dev, int hatype, int halen, if (alen < 0) return -1; if (alen != halen) { - fprintf(stderr, "Wrong address (%s) length: expected %d bytes\n", + fprintf(stderr, + "Wrong address (%s) length: expected %d bytes\n", lla, halen); return -1; } @@ -1338,18 +1354,21 @@ static int do_set(int argc, char **argv) if (halen < 0) return -1; if (newaddr) { - if (parse_address(dev, htype, halen, newaddr, &ifr0) < 0) + if (parse_address(dev, htype, halen, + newaddr, &ifr0) < 0) return -1; } if (newbrd) { - if (parse_address(dev, htype, halen, newbrd, &ifr1) < 0) + if (parse_address(dev, htype, halen, + newbrd, &ifr1) < 0) return -1; } } if (newname && strcmp(dev, newname)) { if (strlen(newname) == 0) - invarg("\"\" is not a valid device identifier\n", "name"); + invarg("\"\" is not a valid device identifier\n", + "name"); if (do_changename(dev, newname) < 0) return -1; dev = newname; From 51536ebbe869fe029d36c869677d0d2310f45eee Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 23 Apr 2017 15:53:49 +0300 Subject: [PATCH 05/13] tc/pedit: Fix a typo in pedit usage message Signed-off-by: Amir Vadai --- tc/m_pedit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tc/m_pedit.c b/tc/m_pedit.c index 8e9bf072..939a6a14 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -40,7 +40,7 @@ static void explain(void) "\t:= [ATC]\n \t\tOFFSETC:= offset \n" "\t\tATC:= at offmask shift \n" "\t\tNOTE: offval is byte offset, must be multiple of 4\n" - "\t\tNOTE: maskval is a 32 bit hex number\n \t\tNOTE: shiftval is a is a shift value\n" + "\t\tNOTE: maskval is a 32 bit hex number\n \t\tNOTE: shiftval is a shift value\n" "\t\tCMD:= clear | invert | set | retain\n" "\t:= ip | ip6 \n" " \t\t| udp | tcp | icmp \n" From 7c71a40cbd3180a64a5fb997fa3efba3335c7002 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 23 Apr 2017 15:53:50 +0300 Subject: [PATCH 06/13] tc/pedit: Extend pedit to specify offset relative to mac/transport headers Utilize the extended pedit netlink to set an offset relative to a specific header type. Old netlink only enabled the user to set approximated offset relative to the IPv4 header. To use this extended functionality need to use the 'ex' keyword after 'pedit' and before any 'munge'. e.g: $ tc filter add dev ens9 protocol ip parent ffff: \ flower \ ip_proto udp \ dst_port 80 \ action pedit ex munge \ ip dst set 1.1.1.1 \ pipe \ action mirred egress redirect dev veth0 Signed-off-by: Amir Vadai --- man/man8/tc-pedit.8 | 41 ++++++--- tc/m_pedit.c | 213 ++++++++++++++++++++++++++++++++++++++------ tc/m_pedit.h | 43 +++++++-- tc/p_icmp.c | 3 +- tc/p_ip.c | 15 +++- tc/p_tcp.c | 3 +- tc/p_udp.c | 3 +- 7 files changed, 270 insertions(+), 51 deletions(-) diff --git a/man/man8/tc-pedit.8 b/man/man8/tc-pedit.8 index c34520c0..761d5c8e 100644 --- a/man/man8/tc-pedit.8 +++ b/man/man8/tc-pedit.8 @@ -5,8 +5,8 @@ pedit - generic packet editor action .SH SYNOPSIS .in +8 .ti -8 -.BR tc " ... " "action pedit munge " { -.IR RAW_OP " | " LAYERED_OP " } [ " CONTROL " ]" +.BR tc " ... " "action pedit [ex] munge " { +.IR RAW_OP " | " LAYERED_OP " | " EXTENDED_LAYERED_OP " } [ " CONTROL " ]" .ti -8 .IR RAW_OP " := " @@ -22,20 +22,22 @@ pedit - generic packet editor action .IR LAYERED_OP " := { " .BI ip " IPHDR_FIELD" | -.BI ip6 " IP6HDR_FIELD" -| -.BI udp " UDPHDR_FIELD" -| -.BI tcp " TCPHDR_FIELD" -| -.BI icmp " ICMPHDR_FIELD" +.BI ip " BEYOND_IPHDR_FIELD" +.RI } " CMD_SPEC" + +.ti -8 +.IR EXTENDED_LAYERED_OP " := { " +.BI ip " IPHDR_FIELD" .RI } " CMD_SPEC" .ti -8 .IR IPHDR_FIELD " := { " .BR src " | " dst " | " tos " | " dsfield " | " ihl " | " protocol " |" -.BR precedence " | " nofrag " | " firstfrag " | " ce " | " df " |" -.BR mf " | " dport " | " sport " | " icmp_type " | " icmp_code " }" +.BR precedence " | " nofrag " | " firstfrag " | " ce " | " df " }" + +.ti -8 +.IR BEYOND_IPHDR_FIELD " := { " +.BR dport " | " sport " | " icmp_type " | " icmp_code " }" .ti -8 .IR CMD_SPEC " := {" @@ -58,6 +60,11 @@ chosen automatically based on the header field size. Currently this is supported only for IPv4 headers. .SH OPTIONS .TP +.B ex +Use extended pedit. +.I EXTENDED_LAYERED_OP +is allowed only in this mode. +.TP .BI offset " OFFSET " "\fR{ \fBu32 \fR| \fBu16 \fR| \fBu8 \fR}" Specify the offset at which to change data. .I OFFSET @@ -123,6 +130,15 @@ Change IP header flags. Note that the value to pass to the .B set command is not just a bit value, but the full byte including the flags field. Though only the relevant bits of that value are respected, the rest ignored. +.RE +.TP +.BI ip " BEYOND_IPHDR_FIELD" +Supported only for non-extended layered op. It is passed to the kernel as +offsets relative to the beginning of the IP header and assumes the IP header is +of minimum size (20 bytes). The supported keywords for +.I BEYOND_IPHDR_FIELD +are: +.RS .TP .B dport .TQ @@ -222,6 +238,9 @@ tc filter add dev eth0 parent 1: u32 \\ tc filter add dev eth0 parent ffff: u32 \\ match ip sport 22 0xffff \\ action pedit pedit munge ip sport set 23 +tc filter add dev eth0 parent ffff: u32 \\ + match ip sport 22 0xffff \\ + action pedit ex munge ip dst set 192.168.1.199 .EE .RE .SH SEE ALSO diff --git a/tc/m_pedit.c b/tc/m_pedit.c index 939a6a14..a26fd3e5 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -34,7 +34,7 @@ static int pedit_debug; static void explain(void) { - fprintf(stderr, "Usage: ... pedit munge [CONTROL]\n"); + fprintf(stderr, "Usage: ... pedit munge [ex] [CONTROL]\n"); fprintf(stderr, "Where: MUNGE := |\n" "\t:= [ATC]\n \t\tOFFSETC:= offset \n" @@ -45,6 +45,7 @@ static void explain(void) "\t:= ip | ip6 \n" " \t\t| udp | tcp | icmp \n" "\tCONTROL:= reclassify | pipe | drop | continue | pass\n" + "\tNOTE: if 'ex' is set, extended functionality will be supported (kernel >= 4.11)\n" "For Example usage look at the examples directory\n"); } @@ -56,8 +57,8 @@ static void usage(void) } static int pedit_parse_nopopt(int *argc_p, char ***argv_p, - struct tc_pedit_sel *sel, - struct tc_pedit_key *tkey) + struct m_pedit_sel *sel, + struct m_pedit_key *tkey) { int argc = *argc_p; char **argv = *argv_p; @@ -116,8 +117,10 @@ noexist: return p; } -int pack_key(struct tc_pedit_sel *sel, struct tc_pedit_key *tkey) +int pack_key(struct m_pedit_sel *_sel, struct m_pedit_key *tkey) { + struct tc_pedit_sel *sel = &_sel->sel; + struct m_pedit_key_ex *keys_ex = _sel->keys_ex; int hwm = sel->nkeys; if (hwm >= MAX_OFFS) @@ -134,12 +137,24 @@ int pack_key(struct tc_pedit_sel *sel, struct tc_pedit_key *tkey) sel->keys[hwm].at = tkey->at; sel->keys[hwm].offmask = tkey->offmask; sel->keys[hwm].shift = tkey->shift; + + if (_sel->extended) { + keys_ex[hwm].htype = tkey->htype; + keys_ex[hwm].cmd = tkey->cmd; + } else { + if (tkey->htype != TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK || + tkey->cmd != TCA_PEDIT_KEY_EX_CMD_SET) { + fprintf(stderr, "Munge parameters not supported. Use 'munge ex'.\n"); + return -1; + } + } + sel->nkeys++; return 0; } -int pack_key32(__u32 retain, struct tc_pedit_sel *sel, - struct tc_pedit_key *tkey) +int pack_key32(__u32 retain, struct m_pedit_sel *sel, + struct m_pedit_key *tkey) { if (tkey->off > (tkey->off & ~3)) { fprintf(stderr, @@ -152,8 +167,8 @@ int pack_key32(__u32 retain, struct tc_pedit_sel *sel, return pack_key(sel, tkey); } -int pack_key16(__u32 retain, struct tc_pedit_sel *sel, - struct tc_pedit_key *tkey) +int pack_key16(__u32 retain, struct m_pedit_sel *sel, + struct m_pedit_key *tkey) { int ind, stride; __u32 m[4] = { 0x0000FFFF, 0xFF0000FF, 0xFFFF0000 }; @@ -183,7 +198,7 @@ int pack_key16(__u32 retain, struct tc_pedit_sel *sel, } -int pack_key8(__u32 retain, struct tc_pedit_sel *sel, struct tc_pedit_key *tkey) +int pack_key8(__u32 retain, struct m_pedit_sel *sel, struct m_pedit_key *tkey) { int ind, stride; __u32 m[4] = { 0x00FFFFFF, 0xFF00FFFF, 0xFFFF00FF, 0xFFFFFF00 }; @@ -239,7 +254,7 @@ int parse_val(int *argc_p, char ***argv_p, __u32 *val, int type) } int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain, - struct tc_pedit_sel *sel, struct tc_pedit_key *tkey) + struct m_pedit_sel *sel, struct m_pedit_key *tkey) { __u32 mask = 0, val = 0; __u32 o = 0xFF; @@ -313,8 +328,8 @@ done: } -int parse_offset(int *argc_p, char ***argv_p, struct tc_pedit_sel *sel, - struct tc_pedit_key *tkey) +int parse_offset(int *argc_p, char ***argv_p, struct m_pedit_sel *sel, + struct m_pedit_key *tkey) { int off; __u32 len, retain; @@ -389,9 +404,9 @@ done: return res; } -static int parse_munge(int *argc_p, char ***argv_p, struct tc_pedit_sel *sel) +static int parse_munge(int *argc_p, char ***argv_p, struct m_pedit_sel *sel) { - struct tc_pedit_key tkey = {}; + struct m_pedit_key tkey = {}; int argc = *argc_p; char **argv = *argv_p; int res = -1; @@ -433,13 +448,69 @@ done: return res; } +static int pedit_keys_ex_getattr(struct rtattr *attr, + struct m_pedit_key_ex *keys_ex, int n) +{ + struct rtattr *i; + int rem = RTA_PAYLOAD(attr); + struct rtattr *tb[TCA_PEDIT_KEY_EX_MAX + 1]; + struct m_pedit_key_ex *k = keys_ex; + + for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) { + if (!n) + return -1; + + if (i->rta_type != TCA_PEDIT_KEY_EX) + return -1; + + parse_rtattr_nested(tb, TCA_PEDIT_KEY_EX_MAX, i); + + k->htype = rta_getattr_u16(tb[TCA_PEDIT_KEY_EX_HTYPE]); + k->cmd = rta_getattr_u16(tb[TCA_PEDIT_KEY_EX_CMD]); + + k++; + n--; + } + + return !!n; +} + +static int pedit_keys_ex_addattr(struct m_pedit_sel *sel, struct nlmsghdr *n) +{ + struct m_pedit_key_ex *k = sel->keys_ex; + struct rtattr *keys_start; + int i; + + if (!sel->extended) + return 0; + + keys_start = addattr_nest(n, MAX_MSG, TCA_PEDIT_KEYS_EX | NLA_F_NESTED); + + for (i = 0; i < sel->sel.nkeys; i++) { + struct rtattr *key_start; + + key_start = addattr_nest(n, MAX_MSG, + TCA_PEDIT_KEY_EX | NLA_F_NESTED); + + if (addattr16(n, MAX_MSG, TCA_PEDIT_KEY_EX_HTYPE, k->htype) || + addattr16(n, MAX_MSG, TCA_PEDIT_KEY_EX_CMD, k->cmd)) { + return -1; + } + + addattr_nest_end(n, key_start); + + k++; + } + + addattr_nest_end(n, keys_start); + + return 0; +} + int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n) { - struct { - struct tc_pedit_sel sel; - struct tc_pedit_key keys[MAX_OFFS]; - } sel = {}; + struct m_pedit_sel sel = {}; int argc = *argc_p; char **argv = *argv_p; @@ -452,6 +523,17 @@ int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, if (matches(*argv, "pedit") == 0) { NEXT_ARG(); ok++; + + if (matches(*argv, "ex") == 0) { + if (ok > 1) { + fprintf(stderr, "'ex' must be before first 'munge'\n"); + explain(); + return -1; + } + sel.extended = true; + NEXT_ARG(); + } + continue; } else if (matches(*argv, "help") == 0) { usage(); @@ -463,7 +545,8 @@ int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, return -1; } NEXT_ARG(); - if (parse_munge(&argc, &argv, &sel.sel)) { + + if (parse_munge(&argc, &argv, &sel)) { fprintf(stderr, "Bad pedit construct (%s)\n", *argv); explain(); @@ -499,9 +582,18 @@ int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, tail = NLMSG_TAIL(n); addattr_l(n, MAX_MSG, tca_id, NULL, 0); - addattr_l(n, MAX_MSG, TCA_PEDIT_PARMS, &sel, - sizeof(sel.sel) + - sel.sel.nkeys * sizeof(struct tc_pedit_key)); + if (!sel.extended) { + addattr_l(n, MAX_MSG, TCA_PEDIT_PARMS, &sel, + sizeof(sel.sel) + + sel.sel.nkeys * sizeof(struct tc_pedit_key)); + } else { + addattr_l(n, MAX_MSG, TCA_PEDIT_PARMS_EX, &sel, + sizeof(sel.sel) + + sel.sel.nkeys * sizeof(struct tc_pedit_key)); + + pedit_keys_ex_addattr(&sel, n); + } + tail->rta_len = (void *)NLMSG_TAIL(n) - (void *)tail; *argc_p = argc; @@ -509,21 +601,74 @@ int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, return 0; } +const char *pedit_htype_str[] = { + [TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK] = "", + [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = "eth", + [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = "ipv4", + [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = "ipv6", + [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = "tcp", + [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = "udp", +}; + +static void print_pedit_location(FILE *f, + enum pedit_header_type htype, __u32 off) +{ + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) { + fprintf(f, "%d", (unsigned int)off); + return; + } + + if (htype < ARRAY_SIZE(pedit_htype_str)) + fprintf(f, "%s", pedit_htype_str[htype]); + else + fprintf(f, "unknown(%d)", htype); + + fprintf(f, "%c%d", (int)off >= 0 ? '+' : '-', abs((int)off)); +} + int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg) { struct tc_pedit_sel *sel; struct rtattr *tb[TCA_PEDIT_MAX + 1]; + struct m_pedit_key_ex *keys_ex = NULL; if (arg == NULL) return -1; parse_rtattr_nested(tb, TCA_PEDIT_MAX, arg); - if (tb[TCA_PEDIT_PARMS] == NULL) { + if (!tb[TCA_PEDIT_PARMS] && !tb[TCA_PEDIT_PARMS_EX]) { fprintf(f, "[NULL pedit parameters]"); return -1; } - sel = RTA_DATA(tb[TCA_PEDIT_PARMS]); + + if (tb[TCA_PEDIT_PARMS]) { + sel = RTA_DATA(tb[TCA_PEDIT_PARMS]); + } else { + int err; + + sel = RTA_DATA(tb[TCA_PEDIT_PARMS_EX]); + + if (!tb[TCA_PEDIT_KEYS_EX]) { + fprintf(f, "Netlink error\n"); + return -1; + } + + keys_ex = calloc(sel->nkeys, sizeof(*keys_ex)); + if (!keys_ex) { + fprintf(f, "Out of memory\n"); + return -1; + } + + err = pedit_keys_ex_getattr(tb[TCA_PEDIT_KEYS_EX], keys_ex, + sel->nkeys); + if (err) { + fprintf(f, "Netlink error\n"); + + free(keys_ex); + return -1; + } + } fprintf(f, " pedit action %s keys %d\n ", action_n2a(sel->action), sel->nkeys); @@ -540,11 +685,25 @@ int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg) if (sel->nkeys) { int i; struct tc_pedit_key *key = sel->keys; + struct m_pedit_key_ex *key_ex = keys_ex; for (i = 0; i < sel->nkeys; i++, key++) { + enum pedit_header_type htype = + TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; + + if (keys_ex) { + htype = key_ex->htype; + + key_ex++; + } + fprintf(f, "\n\t key #%d", i); - fprintf(f, " at %d: val %08x mask %08x", - (unsigned int)key->off, + + fprintf(f, " at "); + + print_pedit_location(f, htype, key->off); + + fprintf(f, ": val %08x mask %08x", (unsigned int)ntohl(key->val), (unsigned int)ntohl(key->mask)); } @@ -554,6 +713,8 @@ int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg) } fprintf(f, "\n "); + + free(keys_ex); return 0; } diff --git a/tc/m_pedit.h b/tc/m_pedit.h index 1698c954..e2897b0c 100644 --- a/tc/m_pedit.h +++ b/tc/m_pedit.h @@ -39,22 +39,47 @@ #define PEDITKINDSIZ 16 +struct m_pedit_key { + __u32 mask; /* AND */ + __u32 val; /*XOR */ + __u32 off; /*offset */ + __u32 at; + __u32 offmask; + __u32 shift; + + enum pedit_header_type htype; + enum pedit_cmd cmd; +}; + +struct m_pedit_key_ex { + enum pedit_header_type htype; + enum pedit_cmd cmd; +}; + +struct m_pedit_sel { + struct tc_pedit_sel sel; + struct tc_pedit_key keys[MAX_OFFS]; + struct m_pedit_key_ex keys_ex[MAX_OFFS]; + bool extended; +}; + struct m_pedit_util { struct m_pedit_util *next; char id[PEDITKINDSIZ]; - int (*parse_peopt)(int *argc_p, char ***argv_p,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey); + int (*parse_peopt)(int *argc_p, char ***argv_p, + struct m_pedit_sel *sel, struct m_pedit_key *tkey); }; - -extern int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type,__u32 retain,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey); -extern int pack_key(struct tc_pedit_sel *sel,struct tc_pedit_key *tkey); -extern int pack_key32(__u32 retain,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey); -extern int pack_key16(__u32 retain,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey); -extern int pack_key8(__u32 retain,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey); +extern int pack_key(struct m_pedit_sel *sel, struct m_pedit_key *tkey); +extern int pack_key32(__u32 retain, struct m_pedit_sel *sel, struct m_pedit_key *tkey); +extern int pack_key16(__u32 retain, struct m_pedit_sel *sel, struct m_pedit_key *tkey); +extern int pack_key8(__u32 retain, struct m_pedit_sel *sel, struct m_pedit_key *tkey); extern int parse_val(int *argc_p, char ***argv_p, __u32 * val, int type); -extern int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type,__u32 retain,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey); -extern int parse_offset(int *argc_p, char ***argv_p,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey); +extern int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain, + struct m_pedit_sel *sel, struct m_pedit_key *tkey); +extern int parse_offset(int *argc_p, char ***argv_p, + struct m_pedit_sel *sel, struct m_pedit_key *tkey); int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n); extern int print_pedit(struct action_util *au,FILE * f, struct rtattr *arg); extern int pedit_print_xstats(struct action_util *au, FILE *f, struct rtattr *xstats); diff --git a/tc/p_icmp.c b/tc/p_icmp.c index c2a6fcd6..1c3a5d90 100644 --- a/tc/p_icmp.c +++ b/tc/p_icmp.c @@ -25,7 +25,8 @@ static int -parse_icmp(int *argc_p, char ***argv_p, struct tc_pedit_sel *sel, struct tc_pedit_key *tkey) +parse_icmp(int *argc_p, char ***argv_p, + struct m_pedit_sel *sel, struct m_pedit_key *tkey) { int res = -1; #if 0 diff --git a/tc/p_ip.c b/tc/p_ip.c index 535151e5..e56eb393 100644 --- a/tc/p_ip.c +++ b/tc/p_ip.c @@ -25,7 +25,7 @@ static int parse_ip(int *argc_p, char ***argv_p, - struct tc_pedit_sel *sel, struct tc_pedit_key *tkey) + struct m_pedit_sel *sel, struct m_pedit_key *tkey) { int res = -1; int argc = *argc_p; @@ -34,6 +34,10 @@ parse_ip(int *argc_p, char ***argv_p, if (argc < 2) return -1; + tkey->htype = sel->extended ? + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 : + TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; + if (strcmp(*argv, "src") == 0) { NEXT_ARG(); tkey->off = 12; @@ -107,6 +111,13 @@ parse_ip(int *argc_p, char ***argv_p, res = parse_cmd(&argc, &argv, 1, TU32, 0x20, sel, tkey); goto done; } + + if (sel->extended) + return -1; /* fields located outside IP header should be + * addressed using the relevant header type in + * extended pedit kABI + */ + if (strcmp(*argv, "dport") == 0) { NEXT_ARG(); tkey->off = 22; @@ -141,7 +152,7 @@ done: static int parse_ip6(int *argc_p, char ***argv_p, - struct tc_pedit_sel *sel, struct tc_pedit_key *tkey) + struct m_pedit_sel *sel, struct m_pedit_key *tkey) { int res = -1; return res; diff --git a/tc/p_tcp.c b/tc/p_tcp.c index 79f16c58..53ee9842 100644 --- a/tc/p_tcp.c +++ b/tc/p_tcp.c @@ -24,7 +24,8 @@ #include "m_pedit.h" static int -parse_tcp(int *argc_p, char ***argv_p, struct tc_pedit_sel *sel, struct tc_pedit_key *tkey) +parse_tcp(int *argc_p, char ***argv_p, + struct m_pedit_sel *sel, struct m_pedit_key *tkey) { int res = -1; return res; diff --git a/tc/p_udp.c b/tc/p_udp.c index c056414e..3a86ba38 100644 --- a/tc/p_udp.c +++ b/tc/p_udp.c @@ -24,7 +24,8 @@ #include "m_pedit.h" static int -parse_udp(int *argc_p, char ***argv_p, struct tc_pedit_sel *sel, struct tc_pedit_key *tkey) +parse_udp(int *argc_p, char ***argv_p, + struct m_pedit_sel *sel, struct m_pedit_key *tkey) { int res = -1; return res; From c05ddaf9e07e8fe3d4acf5c6b28c561afd342fbd Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 23 Apr 2017 15:53:51 +0300 Subject: [PATCH 07/13] tc/pedit: Introduce 'add' operation This command could be useful to increase/decrease fields value. Signed-off-by: Amir Vadai --- man/man8/tc-pedit.8 | 13 ++++++++++++- tc/m_pedit.c | 18 +++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/man/man8/tc-pedit.8 b/man/man8/tc-pedit.8 index 761d5c8e..6bba7419 100644 --- a/man/man8/tc-pedit.8 +++ b/man/man8/tc-pedit.8 @@ -43,6 +43,8 @@ pedit - generic packet editor action .IR CMD_SPEC " := {" .BR clear " | " invert " | " set .IR VAL " | " +.BR add +.IR VAL " | " .BR preserve " } [ " retain .IR RVAL " ]" @@ -63,7 +65,9 @@ only for IPv4 headers. .B ex Use extended pedit. .I EXTENDED_LAYERED_OP -is allowed only in this mode. +and the add +.I CMD_SPEC +are allowed only in this mode. .TP .BI offset " OFFSET " "\fR{ \fBu32 \fR| \fBu16 \fR| \fBu8 \fR}" Specify the offset at which to change data. @@ -173,6 +177,13 @@ keywords in or the size of the addressed header field in .IR LAYERED_OP . .TP +.BI add " VAL" +Add the addressed data by a specific value. The size of +.I VAL +is defined by the size of the addressed header field in +.IR EXTENDED_LAYERED_OP . +This operation is supported only for extended layered op. +.TP .B preserve Keep the addressed data as is. .TP diff --git a/tc/m_pedit.c b/tc/m_pedit.c index a26fd3e5..7af074a5 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -41,7 +41,7 @@ static void explain(void) "\t\tATC:= at offmask shift \n" "\t\tNOTE: offval is byte offset, must be multiple of 4\n" "\t\tNOTE: maskval is a 32 bit hex number\n \t\tNOTE: shiftval is a shift value\n" - "\t\tCMD:= clear | invert | set | retain\n" + "\t\tCMD:= clear | invert | set | add | retain\n" "\t:= ip | ip6 \n" " \t\t| udp | tcp | icmp \n" "\tCONTROL:= reclassify | pipe | drop | continue | pass\n" @@ -276,7 +276,16 @@ int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain, if (matches(*argv, "invert") == 0) { val = mask = o; - } else if (matches(*argv, "set") == 0) { + } else if (matches(*argv, "set") == 0 || + matches(*argv, "add") == 0) { + if (matches(*argv, "add") == 0) + tkey->cmd = TCA_PEDIT_KEY_EX_CMD_ADD; + + if (!sel->extended && tkey->cmd) { + fprintf(stderr, "Non extended mode. only 'set' command is supported\n"); + return -1; + } + NEXT_ARG(); if (parse_val(&argc, &argv, &val, type)) return -1; @@ -690,9 +699,11 @@ int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg) for (i = 0; i < sel->nkeys; i++, key++) { enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; + enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET; if (keys_ex) { htype = key_ex->htype; + cmd = key_ex->cmd; key_ex++; } @@ -703,7 +714,8 @@ int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg) print_pedit_location(f, htype, key->off); - fprintf(f, ": val %08x mask %08x", + fprintf(f, ": %s %08x mask %08x", + cmd ? "add" : "val", (unsigned int)ntohl(key->val), (unsigned int)ntohl(key->mask)); } From 8d193d96074bdc103d7e1738742aa371709cf7d4 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 23 Apr 2017 15:53:52 +0300 Subject: [PATCH 08/13] tc/pedit: p_ip: introduce editing ttl header Enable user to edit IP header ttl field. For example, to forward any TCP packet and decrease its TTL by one: $ tc filter add dev enp0s9 protocol ip parent ffff: \ flower \ ip_proto tcp \ action pedit ex munge \ ip ttl add 0xff pipe \ action mirred egress \ redirect dev veth0 Signed-off-by: Amir Vadai --- man/man8/tc-pedit.8 | 17 +++++++++++++++++ tc/p_ip.c | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/man/man8/tc-pedit.8 b/man/man8/tc-pedit.8 index 6bba7419..c98d95cb 100644 --- a/man/man8/tc-pedit.8 +++ b/man/man8/tc-pedit.8 @@ -28,6 +28,8 @@ pedit - generic packet editor action .ti -8 .IR EXTENDED_LAYERED_OP " := { " .BI ip " IPHDR_FIELD" +| +.BI ip " EX_IPHDR_FIELD" .RI } " CMD_SPEC" .ti -8 @@ -39,6 +41,10 @@ pedit - generic packet editor action .IR BEYOND_IPHDR_FIELD " := { " .BR dport " | " sport " | " icmp_type " | " icmp_code " }" +.ti -8 +.IR EX_IPHDR_FIELD " := { " +.BR ttl " }" + .ti -8 .IR CMD_SPEC " := {" .BR clear " | " invert " | " set @@ -161,6 +167,17 @@ If it is not or the latter is bigger than the minimum of 20 bytes, this will do unexpected things. These fields are eight-bit values. .RE .TP +.BI ip " EX_IPHDR_FIELD" +Supported only when +.I ex +is used. The supported keywords for +.I EX_IPHDR_FIELD +are: +.RS +.TP +.B ttl +.RE +.TP .B clear Clear the addressed data (i.e., set it to zero). .TP diff --git a/tc/p_ip.c b/tc/p_ip.c index e56eb393..22fe6505 100644 --- a/tc/p_ip.c +++ b/tc/p_ip.c @@ -66,6 +66,12 @@ parse_ip(int *argc_p, char ***argv_p, res = parse_cmd(&argc, &argv, 1, TU32, 0x0f, sel, tkey); goto done; } + if (strcmp(*argv, "ttl") == 0) { + NEXT_ARG(); + tkey->off = 8; + res = parse_cmd(&argc, &argv, 1, TU32, RU8, sel, tkey); + goto done; + } if (strcmp(*argv, "protocol") == 0) { NEXT_ARG(); tkey->off = 9; From fa4652ff3b5d151dd5c9ec2a9faa92983b38603f Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 23 Apr 2017 15:53:53 +0300 Subject: [PATCH 09/13] tc/pedit: Support fields bigger than 32 bits Make parse_val() accept fields up to 128 bits long, this should be enough for current use cases and involves a minimal change to code. Signed-off-by: Amir Vadai --- tc/m_pedit.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tc/m_pedit.c b/tc/m_pedit.c index 7af074a5..d982c91a 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -256,7 +256,10 @@ int parse_val(int *argc_p, char ***argv_p, __u32 *val, int type) int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain, struct m_pedit_sel *sel, struct m_pedit_key *tkey) { - __u32 mask = 0, val = 0; + __u32 mask[4] = { 0 }; + __u32 val[4] = { 0 }; + __u32 *m = &mask[0]; + __u32 *v = &val[0]; __u32 o = 0xFF; int res = -1; int argc = *argc_p; @@ -275,7 +278,7 @@ int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain, o = 0xFFFFFFFF; if (matches(*argv, "invert") == 0) { - val = mask = o; + *v = *m = o; } else if (matches(*argv, "set") == 0 || matches(*argv, "add") == 0) { if (matches(*argv, "add") == 0) @@ -287,7 +290,7 @@ int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain, } NEXT_ARG(); - if (parse_val(&argc, &argv, &val, type)) + if (parse_val(&argc, &argv, val, type)) return -1; } else if (matches(*argv, "preserve") == 0) { retain = 0; @@ -307,8 +310,8 @@ int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain, argv++; } - tkey->val = val; - tkey->mask = mask; + tkey->val = *v; + tkey->mask = *m; if (type == TIPV4) tkey->val = ntohl(tkey->val); From 3cd5149ecd78b88852fb3d120527b26e70b471cd Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 23 Apr 2017 15:53:54 +0300 Subject: [PATCH 10/13] tc/pedit: p_eth: ETH header editor For example, forward tcp traffic to veth0 and set destination mac address to 11:22:33:44:55:66 : $ tc filter add dev enp0s9 protocol ip parent ffff: \ flower \ ip_proto tcp \ action pedit ex munge \ eth dst set 11:22:33:44:55:66 \ action mirred egress \ redirect dev veth0 Signed-off-by: Amir Vadai --- man/man8/tc-pedit.8 | 24 +++++++++++++++ tc/Makefile | 1 + tc/m_pedit.c | 46 +++++++++++++++++++++++++++++ tc/m_pedit.h | 1 + tc/p_eth.c | 72 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 144 insertions(+) create mode 100644 tc/p_eth.c diff --git a/man/man8/tc-pedit.8 b/man/man8/tc-pedit.8 index c98d95cb..8febdfe2 100644 --- a/man/man8/tc-pedit.8 +++ b/man/man8/tc-pedit.8 @@ -27,11 +27,17 @@ pedit - generic packet editor action .ti -8 .IR EXTENDED_LAYERED_OP " := { " +.BI eth " ETHHDR_FIELD" +| .BI ip " IPHDR_FIELD" | .BI ip " EX_IPHDR_FIELD" .RI } " CMD_SPEC" +.ti -8 +.IR ETHHDR_FIELD " := { " +.BR src " | " dst " | " type " }" + .ti -8 .IR IPHDR_FIELD " := { " .BR src " | " dst " | " tos " | " dsfield " | " ihl " | " protocol " |" @@ -103,6 +109,21 @@ and right-shifted by before adding it to .IR OFFSET . .TP +.BI eth " ETHHDR_FIELD" +Change an ETH header field. The supported keywords for +.I ETHHDR_FIELD +are: +.RS +.TP +.B src +.TQ +.B dst +Source or destination MAC address in the standard format: XX:XX:XX:XX:XX:XX +.TP +.B type +Ether-type in numeric value +.RE +.TP .BI ip " IPHDR_FIELD" Change an IPv4 header field. The supported keywords for .I IPHDR_FIELD @@ -269,6 +290,9 @@ tc filter add dev eth0 parent ffff: u32 \\ tc filter add dev eth0 parent ffff: u32 \\ match ip sport 22 0xffff \\ action pedit ex munge ip dst set 192.168.1.199 +tc filter add dev eth0 parent ffff: u32 \\ + match ip sport 22 0xffff \\ + action pedit ex munge eth dst set 11:22:33:44:55:66 .EE .RE .SH SEE ALSO diff --git a/tc/Makefile b/tc/Makefile index 3f7fc939..446a1139 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -54,6 +54,7 @@ TCMODULES += m_tunnel_key.o TCMODULES += m_sample.o TCMODULES += p_ip.o TCMODULES += p_icmp.o +TCMODULES += p_eth.o TCMODULES += p_tcp.o TCMODULES += p_udp.o TCMODULES += em_nbyte.o diff --git a/tc/m_pedit.c b/tc/m_pedit.c index d982c91a..0be42343 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -28,6 +28,7 @@ #include "utils.h" #include "tc_util.h" #include "m_pedit.h" +#include "rt_names.h" static struct m_pedit_util *pedit_list; static int pedit_debug; @@ -223,6 +224,38 @@ int pack_key8(__u32 retain, struct m_pedit_sel *sel, struct m_pedit_key *tkey) return pack_key(sel, tkey); } +static int pack_mac(struct m_pedit_sel *sel, struct m_pedit_key *tkey, + __u8 *mac) +{ + int ret = 0; + + if (!(tkey->off & 0x3)) { + tkey->mask = 0; + tkey->val = ntohl(*((__u32 *)mac)); + ret |= pack_key32(~0, sel, tkey); + + tkey->off += 4; + tkey->mask = 0; + tkey->val = ntohs(*((__u16 *)&mac[4])); + ret |= pack_key16(~0, sel, tkey); + } else if (!(tkey->off & 0x1)) { + tkey->mask = 0; + tkey->val = ntohs(*((__u16 *)mac)); + ret |= pack_key16(~0, sel, tkey); + + tkey->off += 4; + tkey->mask = 0; + tkey->val = ntohl(*((__u32 *)(mac + 2))); + ret |= pack_key32(~0, sel, tkey); + } else { + fprintf(stderr, + "pack_mac: mac offsets must begin in 32bit or 16bit boundaries\n"); + return -1; + } + + return ret; +} + int parse_val(int *argc_p, char ***argv_p, __u32 *val, int type) { int argc = *argc_p; @@ -250,6 +283,14 @@ int parse_val(int *argc_p, char ***argv_p, __u32 *val, int type) if (type == TIPV6) return -1; /* not implemented yet */ + if (type == TMAC) { +#define MAC_ALEN 6 + int ret = ll_addr_a2n((char *)val, MAC_ALEN, *argv); + + if (ret == MAC_ALEN) + return 0; + } + return -1; } @@ -310,6 +351,11 @@ int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain, argv++; } + if (type == TMAC) { + res = pack_mac(sel, tkey, (__u8 *)val); + goto done; + } + tkey->val = *v; tkey->mask = *m; diff --git a/tc/m_pedit.h b/tc/m_pedit.h index e2897b0c..ecfb6add 100644 --- a/tc/m_pedit.h +++ b/tc/m_pedit.h @@ -32,6 +32,7 @@ #define TIPV6 2 #define TINT 3 #define TU32 4 +#define TMAC 5 #define RU32 0xFFFFFFFF #define RU16 0xFFFF diff --git a/tc/p_eth.c b/tc/p_eth.c new file mode 100644 index 00000000..ad3e28f8 --- /dev/null +++ b/tc/p_eth.c @@ -0,0 +1,72 @@ +/* + * m_pedit_eth.c packet editor: ETH header + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Amir Vadai (amir@vadai.me) + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "tc_util.h" +#include "m_pedit.h" + +static int +parse_eth(int *argc_p, char ***argv_p, + struct m_pedit_sel *sel, struct m_pedit_key *tkey) +{ + int res = -1; + int argc = *argc_p; + char **argv = *argv_p; + + if (argc < 2) + return -1; + + tkey->htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH; + + if (strcmp(*argv, "type") == 0) { + NEXT_ARG(); + tkey->off = 12; + res = parse_cmd(&argc, &argv, 2, TU32, RU16, sel, tkey); + goto done; + } + + if (strcmp(*argv, "dst") == 0) { + NEXT_ARG(); + tkey->off = 0; + res = parse_cmd(&argc, &argv, 6, TMAC, RU32, sel, tkey); + goto done; + } + + if (strcmp(*argv, "src") == 0) { + NEXT_ARG(); + tkey->off = 6; + res = parse_cmd(&argc, &argv, 6, TMAC, RU32, sel, tkey); + goto done; + } + + return -1; + +done: + *argc_p = argc; + *argv_p = argv; + return res; +} + +struct m_pedit_util p_pedit_eth = { + NULL, + "eth", + parse_eth, +}; From 2c6eb12ab82548cd376bd2d24eb034b385f23d18 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 23 Apr 2017 15:53:55 +0300 Subject: [PATCH 11/13] tc/pedit: p_tcp: introduce pedit tcp support For example, forward tcp traffic destined to port 80 to veth0 and set tcp port to 8080: $ tc filter add dev enp0s9 protocol ip parent ffff: \ flower \ ip_proto tcp \ dst_port 80 \ action pedit ex munge \ tcp dport set 8080 \ action mirred egress \ redirect dev veth0 Signed-off-by: Amir Vadai --- man/man8/tc-pedit.8 | 23 +++++++++++++++++++++++ tc/p_tcp.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/man/man8/tc-pedit.8 b/man/man8/tc-pedit.8 index 8febdfe2..ad192959 100644 --- a/man/man8/tc-pedit.8 +++ b/man/man8/tc-pedit.8 @@ -32,6 +32,8 @@ pedit - generic packet editor action .BI ip " IPHDR_FIELD" | .BI ip " EX_IPHDR_FIELD" +| +.BI tcp " TCPHDR_FIELD" .RI } " CMD_SPEC" .ti -8 @@ -51,6 +53,10 @@ pedit - generic packet editor action .IR EX_IPHDR_FIELD " := { " .BR ttl " }" +.ti -8 +.IR TCPHDR_FIELD " := { " +.BR sport " | " dport " | " flags " }" + .ti -8 .IR CMD_SPEC " := {" .BR clear " | " invert " | " set @@ -199,6 +205,20 @@ are: .B ttl .RE .TP +.BI tcp " TCPHDR_FIELD" +The supported keywords for +.I TCPHDR_FIELD +are: +.RS +.TP +.B sport +.TQ +.B dport +Source or destination TCP port number, a 16-bit value. +.TP +.B flags +.RE +.TP .B clear Clear the addressed data (i.e., set it to zero). .TP @@ -293,6 +313,9 @@ tc filter add dev eth0 parent ffff: u32 \\ tc filter add dev eth0 parent ffff: u32 \\ match ip sport 22 0xffff \\ action pedit ex munge eth dst set 11:22:33:44:55:66 +tc filter add dev eth0 parent ffff: u32 \\ + match ip dport 23 0xffff \\ + action pedit ex munge tcp dport set 22 .EE .RE .SH SEE ALSO diff --git a/tc/p_tcp.c b/tc/p_tcp.c index 53ee9842..cf14574c 100644 --- a/tc/p_tcp.c +++ b/tc/p_tcp.c @@ -28,6 +28,43 @@ parse_tcp(int *argc_p, char ***argv_p, struct m_pedit_sel *sel, struct m_pedit_key *tkey) { int res = -1; + int argc = *argc_p; + char **argv = *argv_p; + + if (argc < 2) + return -1; + + if (!sel->extended) + return -1; + + tkey->htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP; + + if (strcmp(*argv, "sport") == 0) { + NEXT_ARG(); + tkey->off = 0; + res = parse_cmd(&argc, &argv, 2, TU32, RU16, sel, tkey); + goto done; + } + + if (strcmp(*argv, "dport") == 0) { + NEXT_ARG(); + tkey->off = 2; + res = parse_cmd(&argc, &argv, 2, TU32, RU16, sel, tkey); + goto done; + } + + if (strcmp(*argv, "flags") == 0) { + NEXT_ARG(); + tkey->off = 13; + res = parse_cmd(&argc, &argv, 1, TU32, RU8, sel, tkey); + goto done; + } + + return -1; + +done: + *argc_p = argc; + *argv_p = argv; return res; } struct m_pedit_util p_pedit_tcp = { From 3d2a7781ec0b9843c21f48455021ff252bfc85ac Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 23 Apr 2017 15:53:56 +0300 Subject: [PATCH 12/13] tc/pedit: p_udp: introduce pedit udp support For example, forward udp traffic destined to port 999 to veth0 and set tcp port to 888: $ tc filter add dev enp0s9 protocol ip parent ffff: \ flower \ ip_proto udp \ dst_port 999 \ action pedit ex munge \ udp dport set 888 \ action mirred egress \ redirect dev veth0 Signed-off-by: Or Gerlitz Signed-off-by: Amir Vadai --- man/man8/tc-pedit.8 | 18 ++++++++++++++++++ tc/p_udp.c | 27 +++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/man/man8/tc-pedit.8 b/man/man8/tc-pedit.8 index ad192959..7f482eaf 100644 --- a/man/man8/tc-pedit.8 +++ b/man/man8/tc-pedit.8 @@ -34,6 +34,8 @@ pedit - generic packet editor action .BI ip " EX_IPHDR_FIELD" | .BI tcp " TCPHDR_FIELD" +| +.BI udp " UDPHDR_FIELD" .RI } " CMD_SPEC" .ti -8 @@ -57,6 +59,10 @@ pedit - generic packet editor action .IR TCPHDR_FIELD " := { " .BR sport " | " dport " | " flags " }" +.ti -8 +.IR UDPHDR_FIELD " := { " +.BR sport " | " dport " }" + .ti -8 .IR CMD_SPEC " := {" .BR clear " | " invert " | " set @@ -219,6 +225,18 @@ Source or destination TCP port number, a 16-bit value. .B flags .RE .TP +.BI udp " UDPHDR_FIELD" +The supported keywords for +.I UDPHDR_FIELD +are: +.RS +.TP +.B sport +.TQ +.B dport +Source or destination TCP port number, a 16-bit value. +.RE +.TP .B clear Clear the addressed data (i.e., set it to zero). .TP diff --git a/tc/p_udp.c b/tc/p_udp.c index 3a86ba38..a56a1b51 100644 --- a/tc/p_udp.c +++ b/tc/p_udp.c @@ -28,6 +28,33 @@ parse_udp(int *argc_p, char ***argv_p, struct m_pedit_sel *sel, struct m_pedit_key *tkey) { int res = -1; + int argc = *argc_p; + char **argv = *argv_p; + + if (argc < 2) + return -1; + + tkey->htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP; + + if (strcmp(*argv, "sport") == 0) { + NEXT_ARG(); + tkey->off = 0; + res = parse_cmd(&argc, &argv, 2, TU32, RU16, sel, tkey); + goto done; + } + + if (strcmp(*argv, "dport") == 0) { + NEXT_ARG(); + tkey->off = 2; + res = parse_cmd(&argc, &argv, 2, TU32, RU16, sel, tkey); + goto done; + } + + return -1; + +done: + *argc_p = argc; + *argv_p = argv; return res; } From 1e600da057a63db348f1122e76732ab74adf2ca3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 1 May 2017 09:25:22 -0700 Subject: [PATCH 13/13] pedit: fix whitespace Add newlines to break long lines. Signed-off-by: Stephen Hemminger --- tc/m_pedit.c | 9 ++++++--- tc/m_pedit.h | 28 +++++++++++++++++----------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/tc/m_pedit.c b/tc/m_pedit.c index 0be42343..6498dd91 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -145,7 +145,8 @@ int pack_key(struct m_pedit_sel *_sel, struct m_pedit_key *tkey) } else { if (tkey->htype != TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK || tkey->cmd != TCA_PEDIT_KEY_EX_CMD_SET) { - fprintf(stderr, "Munge parameters not supported. Use 'munge ex'.\n"); + fprintf(stderr, + "Munge parameters not supported. Use 'munge ex'.\n"); return -1; } } @@ -326,7 +327,8 @@ int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain, tkey->cmd = TCA_PEDIT_KEY_EX_CMD_ADD; if (!sel->extended && tkey->cmd) { - fprintf(stderr, "Non extended mode. only 'set' command is supported\n"); + fprintf(stderr, + "Non extended mode. only 'set' command is supported\n"); return -1; } @@ -584,7 +586,8 @@ int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, if (matches(*argv, "ex") == 0) { if (ok > 1) { - fprintf(stderr, "'ex' must be before first 'munge'\n"); + fprintf(stderr, + "'ex' must be before first 'munge'\n"); explain(); return -1; } diff --git a/tc/m_pedit.h b/tc/m_pedit.h index ecfb6add..0bc02971 100644 --- a/tc/m_pedit.h +++ b/tc/m_pedit.h @@ -64,25 +64,31 @@ struct m_pedit_sel { bool extended; }; -struct m_pedit_util -{ +struct m_pedit_util { struct m_pedit_util *next; char id[PEDITKINDSIZ]; int (*parse_peopt)(int *argc_p, char ***argv_p, - struct m_pedit_sel *sel, struct m_pedit_key *tkey); + struct m_pedit_sel *sel, + struct m_pedit_key *tkey); }; extern int pack_key(struct m_pedit_sel *sel, struct m_pedit_key *tkey); -extern int pack_key32(__u32 retain, struct m_pedit_sel *sel, struct m_pedit_key *tkey); -extern int pack_key16(__u32 retain, struct m_pedit_sel *sel, struct m_pedit_key *tkey); -extern int pack_key8(__u32 retain, struct m_pedit_sel *sel, struct m_pedit_key *tkey); -extern int parse_val(int *argc_p, char ***argv_p, __u32 * val, int type); -extern int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain, +extern int pack_key32(__u32 retain, struct m_pedit_sel *sel, + struct m_pedit_key *tkey); +extern int pack_key16(__u32 retain, struct m_pedit_sel *sel, + struct m_pedit_key *tkey); +extern int pack_key8(__u32 retain, struct m_pedit_sel *sel, + struct m_pedit_key *tkey); +extern int parse_val(int *argc_p, char ***argv_p, __u32 *val, int type); +extern int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, + __u32 retain, struct m_pedit_sel *sel, struct m_pedit_key *tkey); extern int parse_offset(int *argc_p, char ***argv_p, struct m_pedit_sel *sel, struct m_pedit_key *tkey); -int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n); -extern int print_pedit(struct action_util *au,FILE * f, struct rtattr *arg); -extern int pedit_print_xstats(struct action_util *au, FILE *f, struct rtattr *xstats); +int parse_pedit(struct action_util *a, int *argc_p, char ***argv_p, + int tca_id, struct nlmsghdr *n); +extern int print_pedit(struct action_util *au, FILE *f, struct rtattr *arg); +extern int pedit_print_xstats(struct action_util *au, FILE *f, + struct rtattr *xstats); #endif