Merge pull request #11694 from sigeryang/master

zebra: add basic traffic control API
This commit is contained in:
Stephen Worley 2022-08-16 11:21:04 -04:00 committed by GitHub
commit d30d63f4f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 1602 additions and 0 deletions

776
include/linux/pkt_cls.h Normal file
View File

@ -0,0 +1,776 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __LINUX_PKT_CLS_H
#define __LINUX_PKT_CLS_H
#include <linux/types.h>
#include <linux/pkt_sched.h>
#define TC_COOKIE_MAX_SIZE 16
/* Action attributes */
enum {
TCA_ACT_UNSPEC,
TCA_ACT_KIND,
TCA_ACT_OPTIONS,
TCA_ACT_INDEX,
TCA_ACT_STATS,
TCA_ACT_PAD,
TCA_ACT_COOKIE,
TCA_ACT_FLAGS,
TCA_ACT_HW_STATS,
TCA_ACT_USED_HW_STATS,
__TCA_ACT_MAX
};
#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for
* actions stats.
*/
/* tca HW stats type
* When user does not pass the attribute, he does not care.
* It is the same as if he would pass the attribute with
* all supported bits set.
* In case no bits are set, user is not interested in getting any HW statistics.
*/
#define TCA_ACT_HW_STATS_IMMEDIATE (1 << 0) /* Means that in dump, user
* gets the current HW stats
* state from the device
* queried at the dump time.
*/
#define TCA_ACT_HW_STATS_DELAYED (1 << 1) /* Means that in dump, user gets
* HW stats that might be out of date
* for some time, maybe couple of
* seconds. This is the case when
* driver polls stats updates
* periodically or when it gets async
* stats update from the device.
*/
#define TCA_ACT_MAX __TCA_ACT_MAX
#define TCA_OLD_COMPAT (TCA_ACT_MAX+1)
#define TCA_ACT_MAX_PRIO 32
#define TCA_ACT_BIND 1
#define TCA_ACT_NOBIND 0
#define TCA_ACT_UNBIND 1
#define TCA_ACT_NOUNBIND 0
#define TCA_ACT_REPLACE 1
#define TCA_ACT_NOREPLACE 0
#define TC_ACT_UNSPEC (-1)
#define TC_ACT_OK 0
#define TC_ACT_RECLASSIFY 1
#define TC_ACT_SHOT 2
#define TC_ACT_PIPE 3
#define TC_ACT_STOLEN 4
#define TC_ACT_QUEUED 5
#define TC_ACT_REPEAT 6
#define TC_ACT_REDIRECT 7
#define TC_ACT_TRAP 8 /* For hw path, this means "trap to cpu"
* and don't further process the frame
* in hardware. For sw path, this is
* equivalent of TC_ACT_STOLEN - drop
* the skb and act like everything
* is alright.
*/
#define TC_ACT_VALUE_MAX TC_ACT_TRAP
/* There is a special kind of actions called "extended actions",
* which need a value parameter. These have a local opcode located in
* the highest nibble, starting from 1. The rest of the bits
* are used to carry the value. These two parts together make
* a combined opcode.
*/
#define __TC_ACT_EXT_SHIFT 28
#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
#define TC_ACT_JUMP __TC_ACT_EXT(1)
#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN
/* These macros are put here for binary compatibility with userspace apps that
* make use of them. For kernel code and new userspace apps, use the TCA_ID_*
* versions.
*/
#define TCA_ACT_GACT 5
#define TCA_ACT_IPT 6
#define TCA_ACT_PEDIT 7
#define TCA_ACT_MIRRED 8
#define TCA_ACT_NAT 9
#define TCA_ACT_XT 10
#define TCA_ACT_SKBEDIT 11
#define TCA_ACT_VLAN 12
#define TCA_ACT_BPF 13
#define TCA_ACT_CONNMARK 14
#define TCA_ACT_SKBMOD 15
#define TCA_ACT_CSUM 16
#define TCA_ACT_TUNNEL_KEY 17
#define TCA_ACT_SIMP 22
#define TCA_ACT_IFE 25
#define TCA_ACT_SAMPLE 26
/* Action type identifiers*/
enum tca_id {
TCA_ID_UNSPEC = 0,
TCA_ID_POLICE = 1,
TCA_ID_GACT = TCA_ACT_GACT,
TCA_ID_IPT = TCA_ACT_IPT,
TCA_ID_PEDIT = TCA_ACT_PEDIT,
TCA_ID_MIRRED = TCA_ACT_MIRRED,
TCA_ID_NAT = TCA_ACT_NAT,
TCA_ID_XT = TCA_ACT_XT,
TCA_ID_SKBEDIT = TCA_ACT_SKBEDIT,
TCA_ID_VLAN = TCA_ACT_VLAN,
TCA_ID_BPF = TCA_ACT_BPF,
TCA_ID_CONNMARK = TCA_ACT_CONNMARK,
TCA_ID_SKBMOD = TCA_ACT_SKBMOD,
TCA_ID_CSUM = TCA_ACT_CSUM,
TCA_ID_TUNNEL_KEY = TCA_ACT_TUNNEL_KEY,
TCA_ID_SIMP = TCA_ACT_SIMP,
TCA_ID_IFE = TCA_ACT_IFE,
TCA_ID_SAMPLE = TCA_ACT_SAMPLE,
TCA_ID_CTINFO,
TCA_ID_MPLS,
TCA_ID_CT,
TCA_ID_GATE,
/* other actions go here */
__TCA_ID_MAX = 255
};
#define TCA_ID_MAX __TCA_ID_MAX
struct tc_police {
__u32 index;
int action;
#define TC_POLICE_UNSPEC TC_ACT_UNSPEC
#define TC_POLICE_OK TC_ACT_OK
#define TC_POLICE_RECLASSIFY TC_ACT_RECLASSIFY
#define TC_POLICE_SHOT TC_ACT_SHOT
#define TC_POLICE_PIPE TC_ACT_PIPE
__u32 limit;
__u32 burst;
__u32 mtu;
struct tc_ratespec rate;
struct tc_ratespec peakrate;
int refcnt;
int bindcnt;
__u32 capab;
};
struct tcf_t {
__u64 install;
__u64 lastuse;
__u64 expires;
__u64 firstuse;
};
struct tc_cnt {
int refcnt;
int bindcnt;
};
#define tc_gen \
__u32 index; \
__u32 capab; \
int action; \
int refcnt; \
int bindcnt
enum {
TCA_POLICE_UNSPEC,
TCA_POLICE_TBF,
TCA_POLICE_RATE,
TCA_POLICE_PEAKRATE,
TCA_POLICE_AVRATE,
TCA_POLICE_RESULT,
TCA_POLICE_TM,
TCA_POLICE_PAD,
TCA_POLICE_RATE64,
TCA_POLICE_PEAKRATE64,
__TCA_POLICE_MAX
#define TCA_POLICE_RESULT TCA_POLICE_RESULT
};
#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1)
/* tca flags definitions */
#define TCA_CLS_FLAGS_SKIP_HW (1 << 0) /* don't offload filter to HW */
#define TCA_CLS_FLAGS_SKIP_SW (1 << 1) /* don't use filter in SW */
#define TCA_CLS_FLAGS_IN_HW (1 << 2) /* filter is offloaded to HW */
#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */
#define TCA_CLS_FLAGS_VERBOSE (1 << 4) /* verbose logging */
/* U32 filters */
#define TC_U32_HTID(h) ((h)&0xFFF00000)
#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20)
#define TC_U32_HASH(h) (((h)>>12)&0xFF)
#define TC_U32_NODE(h) ((h)&0xFFF)
#define TC_U32_KEY(h) ((h)&0xFFFFF)
#define TC_U32_UNSPEC 0
#define TC_U32_ROOT (0xFFF00000)
enum {
TCA_U32_UNSPEC,
TCA_U32_CLASSID,
TCA_U32_HASH,
TCA_U32_LINK,
TCA_U32_DIVISOR,
TCA_U32_SEL,
TCA_U32_POLICE,
TCA_U32_ACT,
TCA_U32_INDEV,
TCA_U32_PCNT,
TCA_U32_MARK,
TCA_U32_FLAGS,
TCA_U32_PAD,
__TCA_U32_MAX
};
#define TCA_U32_MAX (__TCA_U32_MAX - 1)
struct tc_u32_key {
__be32 mask;
__be32 val;
int off;
int offmask;
};
struct tc_u32_sel {
unsigned char flags;
unsigned char offshift;
unsigned char nkeys;
__be16 offmask;
__u16 off;
short offoff;
short hoff;
__be32 hmask;
struct tc_u32_key keys[0];
};
struct tc_u32_mark {
__u32 val;
__u32 mask;
__u32 success;
};
struct tc_u32_pcnt {
__u64 rcnt;
__u64 rhit;
__u64 kcnts[0];
};
/* Flags */
#define TC_U32_TERMINAL 1
#define TC_U32_OFFSET 2
#define TC_U32_VAROFFSET 4
#define TC_U32_EAT 8
#define TC_U32_MAXDEPTH 8
/* RSVP filter */
enum {
TCA_RSVP_UNSPEC,
TCA_RSVP_CLASSID,
TCA_RSVP_DST,
TCA_RSVP_SRC,
TCA_RSVP_PINFO,
TCA_RSVP_POLICE,
TCA_RSVP_ACT,
__TCA_RSVP_MAX
};
#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
struct tc_rsvp_gpi {
__u32 key;
__u32 mask;
int offset;
};
struct tc_rsvp_pinfo {
struct tc_rsvp_gpi dpi;
struct tc_rsvp_gpi spi;
__u8 protocol;
__u8 tunnelid;
__u8 tunnelhdr;
__u8 pad;
};
/* ROUTE filter */
enum {
TCA_ROUTE4_UNSPEC,
TCA_ROUTE4_CLASSID,
TCA_ROUTE4_TO,
TCA_ROUTE4_FROM,
TCA_ROUTE4_IIF,
TCA_ROUTE4_POLICE,
TCA_ROUTE4_ACT,
__TCA_ROUTE4_MAX
};
#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1)
/* FW filter */
enum {
TCA_FW_UNSPEC,
TCA_FW_CLASSID,
TCA_FW_POLICE,
TCA_FW_INDEV,
TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
TCA_FW_MASK,
__TCA_FW_MAX
};
#define TCA_FW_MAX (__TCA_FW_MAX - 1)
/* TC index filter */
enum {
TCA_TCINDEX_UNSPEC,
TCA_TCINDEX_HASH,
TCA_TCINDEX_MASK,
TCA_TCINDEX_SHIFT,
TCA_TCINDEX_FALL_THROUGH,
TCA_TCINDEX_CLASSID,
TCA_TCINDEX_POLICE,
TCA_TCINDEX_ACT,
__TCA_TCINDEX_MAX
};
#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1)
/* Flow filter */
enum {
FLOW_KEY_SRC,
FLOW_KEY_DST,
FLOW_KEY_PROTO,
FLOW_KEY_PROTO_SRC,
FLOW_KEY_PROTO_DST,
FLOW_KEY_IIF,
FLOW_KEY_PRIORITY,
FLOW_KEY_MARK,
FLOW_KEY_NFCT,
FLOW_KEY_NFCT_SRC,
FLOW_KEY_NFCT_DST,
FLOW_KEY_NFCT_PROTO_SRC,
FLOW_KEY_NFCT_PROTO_DST,
FLOW_KEY_RTCLASSID,
FLOW_KEY_SKUID,
FLOW_KEY_SKGID,
FLOW_KEY_VLAN_TAG,
FLOW_KEY_RXHASH,
__FLOW_KEY_MAX,
};
#define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1)
enum {
FLOW_MODE_MAP,
FLOW_MODE_HASH,
};
enum {
TCA_FLOW_UNSPEC,
TCA_FLOW_KEYS,
TCA_FLOW_MODE,
TCA_FLOW_BASECLASS,
TCA_FLOW_RSHIFT,
TCA_FLOW_ADDEND,
TCA_FLOW_MASK,
TCA_FLOW_XOR,
TCA_FLOW_DIVISOR,
TCA_FLOW_ACT,
TCA_FLOW_POLICE,
TCA_FLOW_EMATCHES,
TCA_FLOW_PERTURB,
__TCA_FLOW_MAX
};
#define TCA_FLOW_MAX (__TCA_FLOW_MAX - 1)
/* Basic filter */
struct tc_basic_pcnt {
__u64 rcnt;
__u64 rhit;
};
enum {
TCA_BASIC_UNSPEC,
TCA_BASIC_CLASSID,
TCA_BASIC_EMATCHES,
TCA_BASIC_ACT,
TCA_BASIC_POLICE,
TCA_BASIC_PCNT,
TCA_BASIC_PAD,
__TCA_BASIC_MAX
};
#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1)
/* Cgroup classifier */
enum {
TCA_CGROUP_UNSPEC,
TCA_CGROUP_ACT,
TCA_CGROUP_POLICE,
TCA_CGROUP_EMATCHES,
__TCA_CGROUP_MAX,
};
#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1)
/* BPF classifier */
#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0)
enum {
TCA_BPF_UNSPEC,
TCA_BPF_ACT,
TCA_BPF_POLICE,
TCA_BPF_CLASSID,
TCA_BPF_OPS_LEN,
TCA_BPF_OPS,
TCA_BPF_FD,
TCA_BPF_NAME,
TCA_BPF_FLAGS,
TCA_BPF_FLAGS_GEN,
TCA_BPF_TAG,
TCA_BPF_ID,
__TCA_BPF_MAX,
};
#define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
/* Flower classifier */
enum {
TCA_FLOWER_UNSPEC,
TCA_FLOWER_CLASSID,
TCA_FLOWER_INDEV,
TCA_FLOWER_ACT,
TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_TYPE, /* be16 */
TCA_FLOWER_KEY_IP_PROTO, /* u8 */
TCA_FLOWER_KEY_IPV4_SRC, /* be32 */
TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */
TCA_FLOWER_KEY_IPV4_DST, /* be32 */
TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */
TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */
TCA_FLOWER_KEY_TCP_SRC, /* be16 */
TCA_FLOWER_KEY_TCP_DST, /* be16 */
TCA_FLOWER_KEY_UDP_SRC, /* be16 */
TCA_FLOWER_KEY_UDP_DST, /* be16 */
TCA_FLOWER_FLAGS,
TCA_FLOWER_KEY_VLAN_ID, /* be16 */
TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */
TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */
TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */
TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */
TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */
TCA_FLOWER_KEY_ENC_IPV4_DST, /* be32 */
TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */
TCA_FLOWER_KEY_ENC_IPV6_SRC, /* struct in6_addr */
TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */
TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */
TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */
TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */
TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */
TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */
TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */
TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */
TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */
TCA_FLOWER_KEY_SCTP_SRC, /* be16 */
TCA_FLOWER_KEY_SCTP_DST, /* be16 */
TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */
TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */
TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */
TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */
TCA_FLOWER_KEY_FLAGS, /* be32 */
TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */
TCA_FLOWER_KEY_ICMPV4_CODE, /* u8 */
TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */
TCA_FLOWER_KEY_ICMPV4_TYPE, /* u8 */
TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */
TCA_FLOWER_KEY_ICMPV6_CODE, /* u8 */
TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */
TCA_FLOWER_KEY_ICMPV6_TYPE, /* u8 */
TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */
TCA_FLOWER_KEY_ARP_SIP, /* be32 */
TCA_FLOWER_KEY_ARP_SIP_MASK, /* be32 */
TCA_FLOWER_KEY_ARP_TIP, /* be32 */
TCA_FLOWER_KEY_ARP_TIP_MASK, /* be32 */
TCA_FLOWER_KEY_ARP_OP, /* u8 */
TCA_FLOWER_KEY_ARP_OP_MASK, /* u8 */
TCA_FLOWER_KEY_ARP_SHA, /* ETH_ALEN */
TCA_FLOWER_KEY_ARP_SHA_MASK, /* ETH_ALEN */
TCA_FLOWER_KEY_ARP_THA, /* ETH_ALEN */
TCA_FLOWER_KEY_ARP_THA_MASK, /* ETH_ALEN */
TCA_FLOWER_KEY_MPLS_TTL, /* u8 - 8 bits */
TCA_FLOWER_KEY_MPLS_BOS, /* u8 - 1 bit */
TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */
TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */
TCA_FLOWER_KEY_TCP_FLAGS, /* be16 */
TCA_FLOWER_KEY_TCP_FLAGS_MASK, /* be16 */
TCA_FLOWER_KEY_IP_TOS, /* u8 */
TCA_FLOWER_KEY_IP_TOS_MASK, /* u8 */
TCA_FLOWER_KEY_IP_TTL, /* u8 */
TCA_FLOWER_KEY_IP_TTL_MASK, /* u8 */
TCA_FLOWER_KEY_CVLAN_ID, /* be16 */
TCA_FLOWER_KEY_CVLAN_PRIO, /* u8 */
TCA_FLOWER_KEY_CVLAN_ETH_TYPE, /* be16 */
TCA_FLOWER_KEY_ENC_IP_TOS, /* u8 */
TCA_FLOWER_KEY_ENC_IP_TOS_MASK, /* u8 */
TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */
TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */
TCA_FLOWER_KEY_ENC_OPTS,
TCA_FLOWER_KEY_ENC_OPTS_MASK,
TCA_FLOWER_IN_HW_COUNT,
TCA_FLOWER_KEY_PORT_SRC_MIN, /* be16 */
TCA_FLOWER_KEY_PORT_SRC_MAX, /* be16 */
TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */
TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */
TCA_FLOWER_KEY_CT_STATE, /* u16 */
TCA_FLOWER_KEY_CT_STATE_MASK, /* u16 */
TCA_FLOWER_KEY_CT_ZONE, /* u16 */
TCA_FLOWER_KEY_CT_ZONE_MASK, /* u16 */
TCA_FLOWER_KEY_CT_MARK, /* u32 */
TCA_FLOWER_KEY_CT_MARK_MASK, /* u32 */
TCA_FLOWER_KEY_CT_LABELS, /* u128 */
TCA_FLOWER_KEY_CT_LABELS_MASK, /* u128 */
TCA_FLOWER_KEY_MPLS_OPTS,
TCA_FLOWER_KEY_HASH, /* u32 */
TCA_FLOWER_KEY_HASH_MASK, /* u32 */
__TCA_FLOWER_MAX,
};
#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
enum {
TCA_FLOWER_KEY_CT_FLAGS_NEW = 1 << 0, /* Beginning of a new connection. */
TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED = 1 << 1, /* Part of an existing connection. */
TCA_FLOWER_KEY_CT_FLAGS_RELATED = 1 << 2, /* Related to an established connection. */
TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */
TCA_FLOWER_KEY_CT_FLAGS_INVALID = 1 << 4, /* Conntrack is invalid. */
TCA_FLOWER_KEY_CT_FLAGS_REPLY = 1 << 5, /* Packet is in the reply direction. */
__TCA_FLOWER_KEY_CT_FLAGS_MAX,
};
enum {
TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
* TCA_FLOWER_KEY_ENC_OPT_GENEVE_
* attributes
*/
TCA_FLOWER_KEY_ENC_OPTS_VXLAN, /* Nested
* TCA_FLOWER_KEY_ENC_OPT_VXLAN_
* attributes
*/
TCA_FLOWER_KEY_ENC_OPTS_ERSPAN, /* Nested
* TCA_FLOWER_KEY_ENC_OPT_ERSPAN_
* attributes
*/
__TCA_FLOWER_KEY_ENC_OPTS_MAX,
};
#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1)
enum {
TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC,
TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */
TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */
TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */
__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
};
#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \
(__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1)
enum {
TCA_FLOWER_KEY_ENC_OPT_VXLAN_UNSPEC,
TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP, /* u32 */
__TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX,
};
#define TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX \
(__TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX - 1)
enum {
TCA_FLOWER_KEY_ENC_OPT_ERSPAN_UNSPEC,
TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER, /* u8 */
TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX, /* be32 */
TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR, /* u8 */
TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID, /* u8 */
__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX,
};
#define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \
(__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1)
enum {
TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC,
TCA_FLOWER_KEY_MPLS_OPTS_LSE,
__TCA_FLOWER_KEY_MPLS_OPTS_MAX,
};
#define TCA_FLOWER_KEY_MPLS_OPTS_MAX (__TCA_FLOWER_KEY_MPLS_OPTS_MAX - 1)
enum {
TCA_FLOWER_KEY_MPLS_OPT_LSE_UNSPEC,
TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH,
TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL,
TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS,
TCA_FLOWER_KEY_MPLS_OPT_LSE_TC,
TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
__TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX,
};
#define TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX \
(__TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX - 1)
enum {
TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
};
#define TCA_FLOWER_MASK_FLAGS_RANGE (1 << 0) /* Range-based match */
/* Match-all classifier */
struct tc_matchall_pcnt {
__u64 rhit;
};
enum {
TCA_MATCHALL_UNSPEC,
TCA_MATCHALL_CLASSID,
TCA_MATCHALL_ACT,
TCA_MATCHALL_FLAGS,
TCA_MATCHALL_PCNT,
TCA_MATCHALL_PAD,
__TCA_MATCHALL_MAX,
};
#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1)
/* Extended Matches */
struct tcf_ematch_tree_hdr {
__u16 nmatches;
__u16 progid;
};
enum {
TCA_EMATCH_TREE_UNSPEC,
TCA_EMATCH_TREE_HDR,
TCA_EMATCH_TREE_LIST,
__TCA_EMATCH_TREE_MAX
};
#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1)
struct tcf_ematch_hdr {
__u16 matchid;
__u16 kind;
__u16 flags;
__u16 pad; /* currently unused */
};
/* 0 1
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
* +-----------------------+-+-+---+
* | Unused |S|I| R |
* +-----------------------+-+-+---+
*
* R(2) ::= relation to next ematch
* where: 0 0 END (last ematch)
* 0 1 AND
* 1 0 OR
* 1 1 Unused (invalid)
* I(1) ::= invert result
* S(1) ::= simple payload
*/
#define TCF_EM_REL_END 0
#define TCF_EM_REL_AND (1<<0)
#define TCF_EM_REL_OR (1<<1)
#define TCF_EM_INVERT (1<<2)
#define TCF_EM_SIMPLE (1<<3)
#define TCF_EM_REL_MASK 3
#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK)
enum {
TCF_LAYER_LINK,
TCF_LAYER_NETWORK,
TCF_LAYER_TRANSPORT,
__TCF_LAYER_MAX
};
#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1)
/* Ematch type assignments
* 1..32767 Reserved for ematches inside kernel tree
* 32768..65535 Free to use, not reliable
*/
#define TCF_EM_CONTAINER 0
#define TCF_EM_CMP 1
#define TCF_EM_NBYTE 2
#define TCF_EM_U32 3
#define TCF_EM_META 4
#define TCF_EM_TEXT 5
#define TCF_EM_VLAN 6
#define TCF_EM_CANID 7
#define TCF_EM_IPSET 8
#define TCF_EM_IPT 9
#define TCF_EM_MAX 9
enum {
TCF_EM_PROG_TC
};
enum {
TCF_EM_OPND_EQ,
TCF_EM_OPND_GT,
TCF_EM_OPND_LT
};
#endif

View File

@ -1536,6 +1536,24 @@ next_rta:
goto next_rta;
}
static const char *tcm_nltype2str(int nltype)
{
switch (nltype) {
case RTM_NEWQDISC:
case RTM_DELQDISC:
return "qdisc";
case RTM_NEWTCLASS:
case RTM_DELTCLASS:
return "tclass";
case RTM_NEWTFILTER:
case RTM_DELTFILTER:
return "tfilter";
default:
/* should never hit */
return "unknown";
}
}
static void nlncm_dump(const struct netconfmsg *ncm, size_t msglen)
{
const struct rtattr *rta;
@ -1595,6 +1613,8 @@ void nl_dump(void *msg, size_t msglen)
struct ifinfomsg *ifi;
struct tunnel_msg *tnlm;
struct fib_rule_hdr *frh;
struct tcmsg *tcm;
char fbuf[128];
char ibuf[128];
@ -1730,6 +1750,21 @@ next_header:
nlncm_dump(ncm, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ncm)));
break;
case RTM_NEWQDISC:
case RTM_DELQDISC:
case RTM_NEWTCLASS:
case RTM_DELTCLASS:
case RTM_NEWTFILTER:
case RTM_DELTFILTER:
tcm = NLMSG_DATA(nlmsg);
zlog_debug(
" tcm [type=%s family=%s (%d) ifindex=%d handle=%04x:%04x]",
tcm_nltype2str(nlmsg->nlmsg_type),
af_type2str(tcm->tcm_family), tcm->tcm_family,
tcm->tcm_ifindex, tcm->tcm_handle >> 16,
tcm->tcm_handle & 0xffff);
break;
default:
break;
}

View File

@ -815,6 +815,9 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx)
case DPLANE_OP_INTF_INSTALL:
case DPLANE_OP_INTF_UPDATE:
case DPLANE_OP_INTF_DELETE:
case DPLANE_OP_TC_INSTALL:
case DPLANE_OP_TC_UPDATE:
case DPLANE_OP_TC_DELETE:
case DPLANE_OP_NONE:
break;

View File

@ -1573,6 +1573,9 @@ void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx)
case DPLANE_OP_IPSET_ENTRY_DELETE:
case DPLANE_OP_NEIGH_TABLE_UPDATE:
case DPLANE_OP_GRE_SET:
case DPLANE_OP_TC_INSTALL:
case DPLANE_OP_TC_UPDATE:
case DPLANE_OP_TC_DELETE:
break; /* should never hit here */
}
}

View File

@ -47,6 +47,7 @@
#include "zebra/rt_netlink.h"
#include "zebra/if_netlink.h"
#include "zebra/rule_netlink.h"
#include "zebra/tc_netlink.h"
#include "zebra/netconf_netlink.h"
#include "zebra/zebra_errors.h"
@ -114,6 +115,15 @@ static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
{RTM_NEWTUNNEL, "RTM_NEWTUNNEL"},
{RTM_DELTUNNEL, "RTM_DELTUNNEL"},
{RTM_GETTUNNEL, "RTM_GETTUNNEL"},
{RTM_NEWQDISC, "RTM_NEWQDISC"},
{RTM_DELQDISC, "RTM_DELQDISC"},
{RTM_GETQDISC, "RTM_GETQDISC"},
{RTM_NEWTCLASS, "RTM_NEWTCLASS"},
{RTM_DELTCLASS, "RTM_DELTCLASS"},
{RTM_GETTCLASS, "RTM_GETTCLASS"},
{RTM_NEWTFILTER, "RTM_NEWTFILTER"},
{RTM_DELTFILTER, "RTM_DELTFILTER"},
{RTM_GETTFILTER, "RTM_GETTFILTER"},
{0}};
static const struct message rtproto_str[] = {
@ -1623,6 +1633,11 @@ static enum netlink_msg_status nl_put_msg(struct nl_batch *bth,
case DPLANE_OP_INTF_UPDATE:
case DPLANE_OP_INTF_DELETE:
return netlink_put_intf_update_msg(bth, ctx);
case DPLANE_OP_TC_INSTALL:
case DPLANE_OP_TC_UPDATE:
case DPLANE_OP_TC_DELETE:
return netlink_put_tc_update_msg(bth, ctx);
}
return FRR_NETLINK_ERROR;

View File

@ -1603,6 +1603,12 @@ void kernel_update_multi(struct dplane_ctx_q *ctx_list)
res = kernel_intf_update(ctx);
break;
case DPLANE_OP_TC_INSTALL:
case DPLANE_OP_TC_UPDATE:
case DPLANE_OP_TC_DELETE:
res = kernel_tc_update(ctx);
break;
/* Ignore 'notifications' - no-op */
case DPLANE_OP_SYS_ROUTE_ADD:
case DPLANE_OP_SYS_ROUTE_DELETE:

View File

@ -71,6 +71,7 @@ kernel_intf_update(struct zebra_dplane_ctx *ctx);
extern enum zebra_dplane_result
kernel_intf_netconf_update(struct zebra_dplane_ctx *ctx);
extern enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx);
#endif /* !HAVE_NETLINK */

View File

@ -82,6 +82,8 @@ zebra_zebra_SOURCES = \
zebra/rule_netlink.c \
zebra/rule_socket.c \
zebra/table_manager.c \
zebra/tc_netlink.c \
zebra/tc_socket.c \
zebra/zapi_msg.c \
zebra/zebra_dplane.c \
zebra/zebra_errors.c \
@ -163,6 +165,7 @@ noinst_HEADERS += \
zebra/rtadv.h \
zebra/rule_netlink.h \
zebra/table_manager.h \
zebra/tc_netlink.h \
zebra/zapi_msg.h \
zebra/zebra_dplane.h \
zebra/zebra_errors.h \

468
zebra/tc_netlink.c Normal file
View File

@ -0,0 +1,468 @@
/*
* Zebra Traffic Control (TC) interaction with the kernel using netlink.
*
* Copyright (C) 2022 Shichu Yang
*
* This file is part of FRR.
*
* FRR is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* FRR is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with FRR; see the file COPYING. If not, write to the Free
* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
#include <zebra.h>
#ifdef HAVE_NETLINK
#include <linux/if_ether.h>
#include <sys/socket.h>
#include "if.h"
#include "prefix.h"
#include "vrf.h"
#include <linux/fib_rules.h>
#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include "zebra/zserv.h"
#include "zebra/zebra_ns.h"
#include "zebra/zebra_vrf.h"
#include "zebra/rt.h"
#include "zebra/interface.h"
#include "zebra/debug.h"
#include "zebra/rtadv.h"
#include "zebra/kernel_netlink.h"
#include "zebra/tc_netlink.h"
#include "zebra/zebra_errors.h"
#include "zebra/zebra_dplane.h"
#include "zebra/zebra_trace.h"
/* TODO: move these bitflags to zebra_tc.h */
#define TC_FILTER_SRC_IP (1 << 0)
#define TC_FILTER_DST_IP (1 << 1)
#define TC_FILTER_IP_PROTOCOL (1 << 9)
#define TC_FREQ_DEFAULT (100)
#define TC_MAJOR_BASE (0x1000u)
#define TC_MINOR_NOCLASS (0xffffu)
#define TC_FILTER_MASK (0x8000u)
#define TIME_UNITS_PER_SEC (1000000)
#define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r)))
static uint32_t tc_get_freq(void)
{
int freq = 0;
FILE *fp = fopen("/proc/net/psched", "r");
if (fp) {
uint32_t nom, denom;
if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) {
if (nom == 1000000)
freq = denom;
}
fclose(fp);
}
return freq == 0 ? TC_FREQ_DEFAULT : freq;
}
static inline uint32_t tc_make_handle(uint16_t major, uint16_t minor)
{
return (major) << 16 | (minor);
}
static inline uint32_t tc_get_handle(struct zebra_dplane_ctx *ctx,
uint16_t minor)
{
uint16_t major = TC_MAJOR_BASE + (uint16_t)dplane_ctx_get_ifindex(ctx);
return tc_make_handle(major, minor);
}
static void tc_calc_rate_table(struct tc_ratespec *ratespec, uint32_t *table,
uint32_t mtu)
{
if (mtu == 0)
mtu = 2047;
int cell_log = -1;
if (cell_log < 0) {
cell_log = 0;
while ((mtu >> cell_log) > 255)
cell_log++;
}
for (int i = 0; i < 256; i++)
table[i] = xmittime(ratespec->rate, (i + 1) << cell_log);
ratespec->cell_align = -1;
ratespec->cell_log = cell_log;
ratespec->linklayer = TC_LINKLAYER_ETHERNET;
}
static int tc_flower_get_inet_prefix(const struct prefix *prefix,
struct inet_prefix *addr)
{
addr->family = prefix->family;
if (addr->family == AF_INET) {
addr->bytelen = 4;
addr->bitlen = prefix->prefixlen;
addr->flags = 0;
addr->flags |= PREFIXLEN_SPECIFIED;
addr->flags |= ADDRTYPE_INET;
memcpy(addr->data, prefix->u.val32, sizeof(prefix->u.val32));
} else if (addr->family == AF_INET6) {
addr->bytelen = 16;
addr->bitlen = prefix->prefixlen;
addr->flags = 0;
addr->flags |= PREFIXLEN_SPECIFIED;
addr->flags |= ADDRTYPE_INET;
memcpy(addr->data, prefix->u.val, sizeof(prefix->u.val));
} else {
return -1;
}
return 0;
}
static int tc_flower_get_inet_mask(const struct prefix *prefix,
struct inet_prefix *addr)
{
addr->family = prefix->family;
if (addr->family == AF_INET) {
addr->bytelen = 4;
addr->bitlen = prefix->prefixlen;
addr->flags = 0;
addr->flags |= PREFIXLEN_SPECIFIED;
addr->flags |= ADDRTYPE_INET;
} else if (addr->family == AF_INET6) {
addr->bytelen = 16;
addr->bitlen = prefix->prefixlen;
addr->flags = 0;
addr->flags |= PREFIXLEN_SPECIFIED;
addr->flags |= ADDRTYPE_INET;
} else {
return -1;
}
memset(addr->data, 0xff, addr->bytelen);
int rest = prefix->prefixlen;
for (int i = 0; i < addr->bytelen / 4; i++) {
if (!rest) {
addr->data[i] = 0;
} else if (rest / 32 >= 1) {
rest -= 32;
} else {
addr->data[i] <<= 32 - rest;
addr->data[i] = htonl(addr->data[i]);
rest = 0;
}
}
return 0;
}
/*
* Traffic control queue discipline encoding (only "htb" supported)
*/
static ssize_t netlink_qdisc_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
void *data, size_t datalen)
{
struct nlsock *nl;
const char *kind = "htb";
struct tc_htb_glob htb_glob = {
.rate2quantum = 10, .version = 3, .defcls = TC_MINOR_NOCLASS};
struct rtattr *nest;
struct {
struct nlmsghdr n;
struct tcmsg t;
char buf[0];
} *req = (void *)data;
if (datalen < sizeof(*req))
return 0;
nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
memset(req, 0, sizeof(*req));
req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
req->n.nlmsg_flags |= NLM_F_REPLACE;
req->n.nlmsg_type = cmd;
req->n.nlmsg_pid = nl->snl.nl_pid;
req->t.tcm_family = AF_UNSPEC;
req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
req->t.tcm_handle = tc_get_handle(ctx, 0);
req->t.tcm_parent = TC_H_ROOT;
nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1);
nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
nl_attr_put(&req->n, datalen, TCA_HTB_INIT, &htb_glob,
sizeof(htb_glob));
nl_attr_nest_end(&req->n, nest);
return NLMSG_ALIGN(req->n.nlmsg_len);
}
/*
* Traffic control class encoding
*/
static ssize_t netlink_tclass_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
void *data, size_t datalen)
{
struct nlsock *nl;
struct tc_htb_opt htb_opt = {};
uint64_t rate, ceil;
uint64_t buffer, cbuffer;
/* TODO: fetch mtu from interface */
uint32_t mtu = 0;
uint32_t rtab[256];
uint32_t ctab[256];
struct rtattr *nest;
struct {
struct nlmsghdr n;
struct tcmsg t;
char buf[0];
} *req = (void *)data;
if (datalen < sizeof(*req))
return 0;
nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
memset(req, 0, sizeof(*req));
req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
req->n.nlmsg_type = cmd;
req->n.nlmsg_pid = nl->snl.nl_pid;
req->t.tcm_family = AF_UNSPEC;
req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
req->t.tcm_handle = tc_get_handle(ctx, 1);
req->t.tcm_parent = tc_get_handle(ctx, 0);
rate = dplane_ctx_tc_get_rate(ctx);
ceil = dplane_ctx_tc_get_ceil(ctx);
ceil = ceil < rate ? rate : ceil;
htb_opt.rate.rate = (rate >> 32 != 0) ? ~0U : rate;
htb_opt.ceil.rate = (ceil >> 32 != 0) ? ~0U : ceil;
buffer = rate / tc_get_freq(), cbuffer = ceil / tc_get_freq();
htb_opt.buffer = buffer;
htb_opt.cbuffer = cbuffer;
tc_calc_rate_table(&htb_opt.rate, rtab, mtu);
tc_calc_rate_table(&htb_opt.ceil, rtab, mtu);
htb_opt.ceil.mpu = htb_opt.rate.mpu = 0;
htb_opt.ceil.overhead = htb_opt.rate.overhead = 0;
nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
if (rate >> 32 != 0) {
nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &rate,
sizeof(rate));
}
if (ceil >> 32 != 0) {
nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &ceil,
sizeof(ceil));
}
nl_attr_put(&req->n, datalen, TCA_HTB_PARMS, &htb_opt, sizeof(htb_opt));
nl_attr_put(&req->n, datalen, TCA_HTB_RTAB, rtab, sizeof(rtab));
nl_attr_put(&req->n, datalen, TCA_HTB_CTAB, ctab, sizeof(ctab));
nl_attr_nest_end(&req->n, nest);
return NLMSG_ALIGN(req->n.nlmsg_len);
}
/*
* Traffic control filter encoding (only "flower" supported)
*/
static ssize_t netlink_tfilter_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
void *data, size_t datalen)
{
struct nlsock *nl;
struct rtattr *nest;
const char *kind = "flower";
uint16_t priority;
uint16_t protocol;
uint32_t classid;
uint32_t filter_bm;
uint32_t flags = 0;
struct inet_prefix addr;
struct {
struct nlmsghdr n;
struct tcmsg t;
char buf[0];
} *req = (void *)data;
if (datalen < sizeof(*req))
return 0;
nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
memset(req, 0, sizeof(*req));
req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
req->n.nlmsg_flags |= NLM_F_EXCL;
req->n.nlmsg_type = cmd;
req->n.nlmsg_pid = nl->snl.nl_pid;
req->t.tcm_family = AF_UNSPEC;
req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx);
/* TODO: priority and layer-3 protocol support */
priority = 0;
protocol = htons(ETH_P_IP);
classid = tc_get_handle(ctx, 1);
filter_bm = dplane_ctx_tc_get_filter_bm(ctx);
req->t.tcm_info = tc_make_handle(priority, protocol);
req->t.tcm_handle = 1;
req->t.tcm_parent = tc_get_handle(ctx, 0);
nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1);
nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS);
nl_attr_put(&req->n, datalen, TCA_FLOWER_CLASSID, &classid,
sizeof(classid));
if (filter_bm & TC_FILTER_SRC_IP) {
const struct prefix *src_p = dplane_ctx_tc_get_src_ip(ctx);
if (tc_flower_get_inet_prefix(src_p, &addr) != 0)
return 0;
nl_attr_put(&req->n, datalen,
(addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_SRC
: TCA_FLOWER_KEY_IPV6_SRC,
addr.data, addr.bytelen);
if (tc_flower_get_inet_mask(src_p, &addr) != 0)
return 0;
nl_attr_put(&req->n, datalen,
(addr.family == AF_INET)
? TCA_FLOWER_KEY_IPV4_SRC_MASK
: TCA_FLOWER_KEY_IPV6_SRC_MASK,
addr.data, addr.bytelen);
}
if (filter_bm & TC_FILTER_DST_IP) {
const struct prefix *dst_p = dplane_ctx_tc_get_dst_ip(ctx);
if (tc_flower_get_inet_prefix(dst_p, &addr) != 0)
return 0;
nl_attr_put(&req->n, datalen,
(addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_DST
: TCA_FLOWER_KEY_IPV6_DST,
addr.data, addr.bytelen);
if (tc_flower_get_inet_mask(dst_p, &addr) != 0)
return 0;
nl_attr_put(&req->n, datalen,
(addr.family == AF_INET)
? TCA_FLOWER_KEY_IPV4_DST_MASK
: TCA_FLOWER_KEY_IPV6_DST_MASK,
addr.data, addr.bytelen);
}
if (filter_bm & TC_FILTER_IP_PROTOCOL) {
nl_attr_put8(&req->n, datalen, TCA_FLOWER_KEY_IP_PROTO,
dplane_ctx_tc_get_ip_proto(ctx));
}
nl_attr_put32(&req->n, datalen, TCA_FLOWER_FLAGS, flags);
nl_attr_put16(&req->n, datalen, TCA_FLOWER_KEY_ETH_TYPE, protocol);
nl_attr_nest_end(&req->n, nest);
return NLMSG_ALIGN(req->n.nlmsg_len);
}
static ssize_t netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx *ctx,
void *buf, size_t buflen)
{
return netlink_qdisc_msg_encode(RTM_NEWQDISC, ctx, buf, buflen);
}
static ssize_t netlink_newtclass_msg_encoder(struct zebra_dplane_ctx *ctx,
void *buf, size_t buflen)
{
return netlink_tclass_msg_encode(RTM_NEWTCLASS, ctx, buf, buflen);
}
static ssize_t netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx *ctx,
void *buf, size_t buflen)
{
return netlink_tfilter_msg_encode(RTM_NEWTFILTER, ctx, buf, buflen);
}
enum netlink_msg_status netlink_put_tc_update_msg(struct nl_batch *bth,
struct zebra_dplane_ctx *ctx)
{
/* TODO: error handling and other actions (delete, replace, ...) */
netlink_batch_add_msg(bth, ctx, netlink_newqdisc_msg_encoder, false);
netlink_batch_add_msg(bth, ctx, netlink_newtclass_msg_encoder, false);
return netlink_batch_add_msg(bth, ctx, netlink_newtfilter_msg_encoder,
false);
}
#endif /* HAVE_NETLINK */

62
zebra/tc_netlink.h Normal file
View File

@ -0,0 +1,62 @@
/*
* Zebra Traffic Control (TC) interaction with the kernel using netlink.
*
* Copyright (C) 2022 Shichu Yang
*
* This file is part of FRR.
*
* FRR is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* FRR is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with FRR; see the file COPYING. If not, write to the Free
* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
#ifndef _ZEBRA_TC_NETLINK_H
#define _ZEBRA_TC_NETLINK_H
#ifdef HAVE_NETLINK
#ifdef __cplusplus
extern "C" {
#endif
/* Represent a prefixed address in flower filter */
struct inet_prefix {
uint16_t flags;
uint16_t bytelen;
uint16_t bitlen;
uint16_t family;
uint32_t data[64];
};
enum {
PREFIXLEN_SPECIFIED = (1 << 0),
ADDRTYPE_INET = (1 << 1),
ADDRTYPE_UNSPEC = (1 << 2),
ADDRTYPE_MULTI = (1 << 3),
ADDRTYPE_INET_UNSPEC = ADDRTYPE_INET | ADDRTYPE_UNSPEC,
ADDRTYPE_INET_MULTI = ADDRTYPE_INET | ADDRTYPE_MULTI
};
extern enum netlink_msg_status
netlink_put_tc_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx);
#ifdef __cplusplus
}
#endif
#endif /* HAVE_NETLINK */
#endif /* _ZEBRA_TC_NETLINK_H */

41
zebra/tc_socket.c Normal file
View File

@ -0,0 +1,41 @@
/*
* Zebra Traffic Control (TC) interaction with the kernel using socket.
*
* Copyright (C) 2022 Shichu Yang
*
* This file is part of FRR.
*
* FRR is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* FRR is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with FRR; see the file COPYING. If not, write to the Free
* Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
#include <zebra.h>
#ifndef HAVE_NETLINK
#include "lib_errors.h"
#include "zebra/rt.h"
#include "zebra/zebra_dplane.h"
#include "zebra/zebra_errors.h"
enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx)
{
flog_err(EC_LIB_UNAVAILABLE, "%s not Implemented for this platform",
__func__);
return ZEBRA_DPLANE_REQUEST_FAILURE;
}
#endif /* !HAVE_NETLINK */

View File

@ -313,6 +313,25 @@ struct dplane_netconf_info {
enum dplane_netconf_status_e linkdown_val;
};
/*
* Traffic control contexts for the dplane
*/
struct dplane_tc_info {
/* Rate spec (unit: Bytes/s) */
uint64_t rate;
uint64_t ceil;
/* TODO: custom burst */
/* Filter components for "tfilter" */
uint32_t filter_bm;
struct prefix src_ip;
struct prefix dst_ip;
uint8_t ip_proto;
/* TODO: more filter components */
};
/*
* The context block used to exchange info about route updates across
* the boundary between the zebra main context (and pthread) and the
@ -362,6 +381,7 @@ struct zebra_dplane_ctx {
struct dplane_mac_info macinfo;
struct dplane_neigh_info neigh;
struct dplane_rule_info rule;
struct dplane_tc_info tc;
struct zebra_pbr_iptable iptable;
struct zebra_pbr_ipset ipset;
struct {
@ -540,6 +560,9 @@ static struct zebra_dplane_globals {
_Atomic uint32_t dg_intfs_in;
_Atomic uint32_t dg_intf_errors;
_Atomic uint32_t dg_tcs_in;
_Atomic uint32_t dg_tcs_errors;
/* Dataplane pthread */
struct frr_pthread *dg_pthread;
@ -777,6 +800,9 @@ static void dplane_ctx_free_internal(struct zebra_dplane_ctx *ctx)
case DPLANE_OP_INTF_INSTALL:
case DPLANE_OP_INTF_UPDATE:
case DPLANE_OP_INTF_DELETE:
case DPLANE_OP_TC_INSTALL:
case DPLANE_OP_TC_UPDATE:
case DPLANE_OP_TC_DELETE:
break;
case DPLANE_OP_IPSET_ENTRY_ADD:
@ -1100,6 +1126,16 @@ const char *dplane_op2str(enum dplane_op_e op)
case DPLANE_OP_INTF_DELETE:
ret = "INTF_DELETE";
break;
case DPLANE_OP_TC_INSTALL:
ret = "TC_INSTALL";
break;
case DPLANE_OP_TC_UPDATE:
ret = "TC_UPDATE";
break;
case DPLANE_OP_TC_DELETE:
ret = "TC_DELETE";
break;
}
return ret;
@ -1419,6 +1455,50 @@ uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx)
return ctx->u.rinfo.zd_old_distance;
}
uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
return ctx->u.tc.rate;
}
uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
return ctx->u.tc.ceil;
}
uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
return ctx->u.tc.filter_bm;
}
const struct prefix *
dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
return &(ctx->u.tc.src_ip);
}
const struct prefix *
dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
return &(ctx->u.tc.dst_ip);
}
uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx)
{
DPLANE_CTX_VALID(ctx);
return ctx->u.tc.ip_proto;
}
/*
* Set the nexthops associated with a context: note that processing code
* may well expect that nexthops are in canonical (sorted) order, so we
@ -2691,6 +2771,26 @@ done:
return ret;
}
int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op)
{
int ret = EINVAL;
struct zebra_vrf *zvrf = NULL;
struct zebra_ns *zns = NULL;
ctx->zd_op = op;
ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS;
/* TODO: init traffic control qdisc */
zns = zvrf ? zvrf->zns : zebra_ns_lookup(NS_DEFAULT);
dplane_ctx_ns_init(ctx, zns, true);
ret = AOK;
return ret;
}
/**
* dplane_ctx_nexthop_init() - Initialize a context block for a nexthop update
*
@ -3410,6 +3510,47 @@ dplane_route_update_internal(struct route_node *rn,
return result;
}
static enum zebra_dplane_result dplane_tc_update_internal(enum dplane_op_e op)
{
enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE;
int ret = EINVAL;
struct zebra_dplane_ctx *ctx = NULL;
/* Obtain context block */
ctx = dplane_ctx_alloc();
if (!ctx) {
ret = ENOMEM;
goto done;
}
/* Init context with info from zebra data structs */
ret = dplane_ctx_tc_init(ctx, op);
if (ret == AOK)
ret = dplane_update_enqueue(ctx);
done:
/* Update counter */
atomic_fetch_add_explicit(&zdplane_info.dg_tcs_in, 1,
memory_order_relaxed);
if (ret == AOK) {
result = ZEBRA_DPLANE_REQUEST_QUEUED;
} else {
atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors, 1,
memory_order_relaxed);
if (ctx)
dplane_ctx_free(&ctx);
}
return result;
}
enum zebra_dplane_result dplane_tc_update(void)
{
return dplane_tc_update_internal(DPLANE_OP_TC_UPDATE);
}
/**
* dplane_nexthop_update_internal() - Helper for enqueuing nexthop changes
*
@ -5591,6 +5732,13 @@ static void kernel_dplane_log_detail(struct zebra_dplane_ctx *ctx)
dplane_ctx_get_ifindex(ctx),
dplane_ctx_intf_is_protodown(ctx));
break;
/* TODO: more detailed log */
case DPLANE_OP_TC_INSTALL:
case DPLANE_OP_TC_UPDATE:
case DPLANE_OP_TC_DELETE:
zlog_debug("Dplane tc ifidx %u", dplane_ctx_get_ifindex(ctx));
break;
}
}
@ -5734,6 +5882,14 @@ static void kernel_dplane_handle_result(struct zebra_dplane_ctx *ctx)
1, memory_order_relaxed);
break;
case DPLANE_OP_TC_INSTALL:
case DPLANE_OP_TC_UPDATE:
case DPLANE_OP_TC_DELETE:
if (res != ZEBRA_DPLANE_REQUEST_SUCCESS)
atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors,
1, memory_order_relaxed);
break;
/* Ignore 'notifications' - no-op */
case DPLANE_OP_SYS_ROUTE_ADD:
case DPLANE_OP_SYS_ROUTE_DELETE:

View File

@ -193,6 +193,11 @@ enum dplane_op_e {
DPLANE_OP_INTF_INSTALL,
DPLANE_OP_INTF_UPDATE,
DPLANE_OP_INTF_DELETE,
/* Traffic control */
DPLANE_OP_TC_INSTALL,
DPLANE_OP_TC_UPDATE,
DPLANE_OP_TC_DELETE,
};
/*
@ -378,6 +383,16 @@ uint8_t dplane_ctx_get_distance(const struct zebra_dplane_ctx *ctx);
void dplane_ctx_set_distance(struct zebra_dplane_ctx *ctx, uint8_t distance);
uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx);
/* Accessors for traffic control context */
uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx);
uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx);
uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx);
const struct prefix *
dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx);
const struct prefix *
dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx);
uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx);
void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh);
void dplane_ctx_set_backup_nhg(struct zebra_dplane_ctx *ctx,
const struct nexthop_group *nhg);
@ -707,6 +722,13 @@ enum zebra_dplane_result dplane_intf_add(const struct interface *ifp);
enum zebra_dplane_result dplane_intf_update(const struct interface *ifp);
enum zebra_dplane_result dplane_intf_delete(const struct interface *ifp);
/*
* Enqueue interface link changes for the dataplane.
*/
enum zebra_dplane_result dplane_tc_add(void);
enum zebra_dplane_result dplane_tc_update(void);
enum zebra_dplane_result dplane_tc_delete(void);
/*
* Link layer operations for the dataplane.
*/
@ -849,6 +871,9 @@ int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op,
int dplane_ctx_intf_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op,
const struct interface *ifp);
/* Encode traffic control information into data plane context. */
int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op);
/* Retrieve the limit on the number of pending, unprocessed updates. */
uint32_t dplane_get_in_queue_limit(void);

View File

@ -3125,6 +3125,9 @@ void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx)
case DPLANE_OP_INTF_INSTALL:
case DPLANE_OP_INTF_UPDATE:
case DPLANE_OP_INTF_DELETE:
case DPLANE_OP_TC_INSTALL:
case DPLANE_OP_TC_UPDATE:
case DPLANE_OP_TC_DELETE:
break;
}
}

View File

@ -4391,6 +4391,11 @@ static void rib_process_dplane_results(struct thread *thread)
zebra_if_dplane_result(ctx);
break;
case DPLANE_OP_TC_INSTALL:
case DPLANE_OP_TC_UPDATE:
case DPLANE_OP_TC_DELETE:
break;
/* Some op codes not handled here */
case DPLANE_OP_ADDR_INSTALL:
case DPLANE_OP_ADDR_UNINSTALL: