diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h new file mode 100644 index 0000000000..7ea59cfe1f --- /dev/null +++ b/include/linux/pkt_cls.h @@ -0,0 +1,776 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_PKT_CLS_H +#define __LINUX_PKT_CLS_H + +#include +#include + +#define TC_COOKIE_MAX_SIZE 16 + +/* Action attributes */ +enum { + TCA_ACT_UNSPEC, + TCA_ACT_KIND, + TCA_ACT_OPTIONS, + TCA_ACT_INDEX, + TCA_ACT_STATS, + TCA_ACT_PAD, + TCA_ACT_COOKIE, + TCA_ACT_FLAGS, + TCA_ACT_HW_STATS, + TCA_ACT_USED_HW_STATS, + __TCA_ACT_MAX +}; + +#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for + * actions stats. + */ + +/* tca HW stats type + * When user does not pass the attribute, he does not care. + * It is the same as if he would pass the attribute with + * all supported bits set. + * In case no bits are set, user is not interested in getting any HW statistics. + */ +#define TCA_ACT_HW_STATS_IMMEDIATE (1 << 0) /* Means that in dump, user + * gets the current HW stats + * state from the device + * queried at the dump time. + */ +#define TCA_ACT_HW_STATS_DELAYED (1 << 1) /* Means that in dump, user gets + * HW stats that might be out of date + * for some time, maybe couple of + * seconds. This is the case when + * driver polls stats updates + * periodically or when it gets async + * stats update from the device. + */ + +#define TCA_ACT_MAX __TCA_ACT_MAX +#define TCA_OLD_COMPAT (TCA_ACT_MAX+1) +#define TCA_ACT_MAX_PRIO 32 +#define TCA_ACT_BIND 1 +#define TCA_ACT_NOBIND 0 +#define TCA_ACT_UNBIND 1 +#define TCA_ACT_NOUNBIND 0 +#define TCA_ACT_REPLACE 1 +#define TCA_ACT_NOREPLACE 0 + +#define TC_ACT_UNSPEC (-1) +#define TC_ACT_OK 0 +#define TC_ACT_RECLASSIFY 1 +#define TC_ACT_SHOT 2 +#define TC_ACT_PIPE 3 +#define TC_ACT_STOLEN 4 +#define TC_ACT_QUEUED 5 +#define TC_ACT_REPEAT 6 +#define TC_ACT_REDIRECT 7 +#define TC_ACT_TRAP 8 /* For hw path, this means "trap to cpu" + * and don't further process the frame + * in hardware. For sw path, this is + * equivalent of TC_ACT_STOLEN - drop + * the skb and act like everything + * is alright. + */ +#define TC_ACT_VALUE_MAX TC_ACT_TRAP + +/* There is a special kind of actions called "extended actions", + * which need a value parameter. These have a local opcode located in + * the highest nibble, starting from 1. The rest of the bits + * are used to carry the value. These two parts together make + * a combined opcode. + */ +#define __TC_ACT_EXT_SHIFT 28 +#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT) +#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1) +#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK)) +#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode) + +#define TC_ACT_JUMP __TC_ACT_EXT(1) +#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2) +#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN + +/* These macros are put here for binary compatibility with userspace apps that + * make use of them. For kernel code and new userspace apps, use the TCA_ID_* + * versions. + */ +#define TCA_ACT_GACT 5 +#define TCA_ACT_IPT 6 +#define TCA_ACT_PEDIT 7 +#define TCA_ACT_MIRRED 8 +#define TCA_ACT_NAT 9 +#define TCA_ACT_XT 10 +#define TCA_ACT_SKBEDIT 11 +#define TCA_ACT_VLAN 12 +#define TCA_ACT_BPF 13 +#define TCA_ACT_CONNMARK 14 +#define TCA_ACT_SKBMOD 15 +#define TCA_ACT_CSUM 16 +#define TCA_ACT_TUNNEL_KEY 17 +#define TCA_ACT_SIMP 22 +#define TCA_ACT_IFE 25 +#define TCA_ACT_SAMPLE 26 + +/* Action type identifiers*/ +enum tca_id { + TCA_ID_UNSPEC = 0, + TCA_ID_POLICE = 1, + TCA_ID_GACT = TCA_ACT_GACT, + TCA_ID_IPT = TCA_ACT_IPT, + TCA_ID_PEDIT = TCA_ACT_PEDIT, + TCA_ID_MIRRED = TCA_ACT_MIRRED, + TCA_ID_NAT = TCA_ACT_NAT, + TCA_ID_XT = TCA_ACT_XT, + TCA_ID_SKBEDIT = TCA_ACT_SKBEDIT, + TCA_ID_VLAN = TCA_ACT_VLAN, + TCA_ID_BPF = TCA_ACT_BPF, + TCA_ID_CONNMARK = TCA_ACT_CONNMARK, + TCA_ID_SKBMOD = TCA_ACT_SKBMOD, + TCA_ID_CSUM = TCA_ACT_CSUM, + TCA_ID_TUNNEL_KEY = TCA_ACT_TUNNEL_KEY, + TCA_ID_SIMP = TCA_ACT_SIMP, + TCA_ID_IFE = TCA_ACT_IFE, + TCA_ID_SAMPLE = TCA_ACT_SAMPLE, + TCA_ID_CTINFO, + TCA_ID_MPLS, + TCA_ID_CT, + TCA_ID_GATE, + /* other actions go here */ + __TCA_ID_MAX = 255 +}; + +#define TCA_ID_MAX __TCA_ID_MAX + +struct tc_police { + __u32 index; + int action; +#define TC_POLICE_UNSPEC TC_ACT_UNSPEC +#define TC_POLICE_OK TC_ACT_OK +#define TC_POLICE_RECLASSIFY TC_ACT_RECLASSIFY +#define TC_POLICE_SHOT TC_ACT_SHOT +#define TC_POLICE_PIPE TC_ACT_PIPE + + __u32 limit; + __u32 burst; + __u32 mtu; + struct tc_ratespec rate; + struct tc_ratespec peakrate; + int refcnt; + int bindcnt; + __u32 capab; +}; + +struct tcf_t { + __u64 install; + __u64 lastuse; + __u64 expires; + __u64 firstuse; +}; + +struct tc_cnt { + int refcnt; + int bindcnt; +}; + +#define tc_gen \ + __u32 index; \ + __u32 capab; \ + int action; \ + int refcnt; \ + int bindcnt + +enum { + TCA_POLICE_UNSPEC, + TCA_POLICE_TBF, + TCA_POLICE_RATE, + TCA_POLICE_PEAKRATE, + TCA_POLICE_AVRATE, + TCA_POLICE_RESULT, + TCA_POLICE_TM, + TCA_POLICE_PAD, + TCA_POLICE_RATE64, + TCA_POLICE_PEAKRATE64, + __TCA_POLICE_MAX +#define TCA_POLICE_RESULT TCA_POLICE_RESULT +}; + +#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1) + +/* tca flags definitions */ +#define TCA_CLS_FLAGS_SKIP_HW (1 << 0) /* don't offload filter to HW */ +#define TCA_CLS_FLAGS_SKIP_SW (1 << 1) /* don't use filter in SW */ +#define TCA_CLS_FLAGS_IN_HW (1 << 2) /* filter is offloaded to HW */ +#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */ +#define TCA_CLS_FLAGS_VERBOSE (1 << 4) /* verbose logging */ + +/* U32 filters */ + +#define TC_U32_HTID(h) ((h)&0xFFF00000) +#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20) +#define TC_U32_HASH(h) (((h)>>12)&0xFF) +#define TC_U32_NODE(h) ((h)&0xFFF) +#define TC_U32_KEY(h) ((h)&0xFFFFF) +#define TC_U32_UNSPEC 0 +#define TC_U32_ROOT (0xFFF00000) + +enum { + TCA_U32_UNSPEC, + TCA_U32_CLASSID, + TCA_U32_HASH, + TCA_U32_LINK, + TCA_U32_DIVISOR, + TCA_U32_SEL, + TCA_U32_POLICE, + TCA_U32_ACT, + TCA_U32_INDEV, + TCA_U32_PCNT, + TCA_U32_MARK, + TCA_U32_FLAGS, + TCA_U32_PAD, + __TCA_U32_MAX +}; + +#define TCA_U32_MAX (__TCA_U32_MAX - 1) + +struct tc_u32_key { + __be32 mask; + __be32 val; + int off; + int offmask; +}; + +struct tc_u32_sel { + unsigned char flags; + unsigned char offshift; + unsigned char nkeys; + + __be16 offmask; + __u16 off; + short offoff; + + short hoff; + __be32 hmask; + struct tc_u32_key keys[0]; +}; + +struct tc_u32_mark { + __u32 val; + __u32 mask; + __u32 success; +}; + +struct tc_u32_pcnt { + __u64 rcnt; + __u64 rhit; + __u64 kcnts[0]; +}; + +/* Flags */ + +#define TC_U32_TERMINAL 1 +#define TC_U32_OFFSET 2 +#define TC_U32_VAROFFSET 4 +#define TC_U32_EAT 8 + +#define TC_U32_MAXDEPTH 8 + + +/* RSVP filter */ + +enum { + TCA_RSVP_UNSPEC, + TCA_RSVP_CLASSID, + TCA_RSVP_DST, + TCA_RSVP_SRC, + TCA_RSVP_PINFO, + TCA_RSVP_POLICE, + TCA_RSVP_ACT, + __TCA_RSVP_MAX +}; + +#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) + +struct tc_rsvp_gpi { + __u32 key; + __u32 mask; + int offset; +}; + +struct tc_rsvp_pinfo { + struct tc_rsvp_gpi dpi; + struct tc_rsvp_gpi spi; + __u8 protocol; + __u8 tunnelid; + __u8 tunnelhdr; + __u8 pad; +}; + +/* ROUTE filter */ + +enum { + TCA_ROUTE4_UNSPEC, + TCA_ROUTE4_CLASSID, + TCA_ROUTE4_TO, + TCA_ROUTE4_FROM, + TCA_ROUTE4_IIF, + TCA_ROUTE4_POLICE, + TCA_ROUTE4_ACT, + __TCA_ROUTE4_MAX +}; + +#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1) + + +/* FW filter */ + +enum { + TCA_FW_UNSPEC, + TCA_FW_CLASSID, + TCA_FW_POLICE, + TCA_FW_INDEV, + TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ + TCA_FW_MASK, + __TCA_FW_MAX +}; + +#define TCA_FW_MAX (__TCA_FW_MAX - 1) + +/* TC index filter */ + +enum { + TCA_TCINDEX_UNSPEC, + TCA_TCINDEX_HASH, + TCA_TCINDEX_MASK, + TCA_TCINDEX_SHIFT, + TCA_TCINDEX_FALL_THROUGH, + TCA_TCINDEX_CLASSID, + TCA_TCINDEX_POLICE, + TCA_TCINDEX_ACT, + __TCA_TCINDEX_MAX +}; + +#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) + +/* Flow filter */ + +enum { + FLOW_KEY_SRC, + FLOW_KEY_DST, + FLOW_KEY_PROTO, + FLOW_KEY_PROTO_SRC, + FLOW_KEY_PROTO_DST, + FLOW_KEY_IIF, + FLOW_KEY_PRIORITY, + FLOW_KEY_MARK, + FLOW_KEY_NFCT, + FLOW_KEY_NFCT_SRC, + FLOW_KEY_NFCT_DST, + FLOW_KEY_NFCT_PROTO_SRC, + FLOW_KEY_NFCT_PROTO_DST, + FLOW_KEY_RTCLASSID, + FLOW_KEY_SKUID, + FLOW_KEY_SKGID, + FLOW_KEY_VLAN_TAG, + FLOW_KEY_RXHASH, + __FLOW_KEY_MAX, +}; + +#define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1) + +enum { + FLOW_MODE_MAP, + FLOW_MODE_HASH, +}; + +enum { + TCA_FLOW_UNSPEC, + TCA_FLOW_KEYS, + TCA_FLOW_MODE, + TCA_FLOW_BASECLASS, + TCA_FLOW_RSHIFT, + TCA_FLOW_ADDEND, + TCA_FLOW_MASK, + TCA_FLOW_XOR, + TCA_FLOW_DIVISOR, + TCA_FLOW_ACT, + TCA_FLOW_POLICE, + TCA_FLOW_EMATCHES, + TCA_FLOW_PERTURB, + __TCA_FLOW_MAX +}; + +#define TCA_FLOW_MAX (__TCA_FLOW_MAX - 1) + +/* Basic filter */ + +struct tc_basic_pcnt { + __u64 rcnt; + __u64 rhit; +}; + +enum { + TCA_BASIC_UNSPEC, + TCA_BASIC_CLASSID, + TCA_BASIC_EMATCHES, + TCA_BASIC_ACT, + TCA_BASIC_POLICE, + TCA_BASIC_PCNT, + TCA_BASIC_PAD, + __TCA_BASIC_MAX +}; + +#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1) + + +/* Cgroup classifier */ + +enum { + TCA_CGROUP_UNSPEC, + TCA_CGROUP_ACT, + TCA_CGROUP_POLICE, + TCA_CGROUP_EMATCHES, + __TCA_CGROUP_MAX, +}; + +#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) + +/* BPF classifier */ + +#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0) + +enum { + TCA_BPF_UNSPEC, + TCA_BPF_ACT, + TCA_BPF_POLICE, + TCA_BPF_CLASSID, + TCA_BPF_OPS_LEN, + TCA_BPF_OPS, + TCA_BPF_FD, + TCA_BPF_NAME, + TCA_BPF_FLAGS, + TCA_BPF_FLAGS_GEN, + TCA_BPF_TAG, + TCA_BPF_ID, + __TCA_BPF_MAX, +}; + +#define TCA_BPF_MAX (__TCA_BPF_MAX - 1) + +/* Flower classifier */ + +enum { + TCA_FLOWER_UNSPEC, + TCA_FLOWER_CLASSID, + TCA_FLOWER_INDEV, + TCA_FLOWER_ACT, + TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE, /* be16 */ + TCA_FLOWER_KEY_IP_PROTO, /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC, /* be16 */ + TCA_FLOWER_KEY_TCP_DST, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC, /* be16 */ + TCA_FLOWER_KEY_UDP_DST, /* be16 */ + + TCA_FLOWER_FLAGS, + TCA_FLOWER_KEY_VLAN_ID, /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */ + + TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */ + + TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST, /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ + + TCA_FLOWER_KEY_FLAGS, /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */ + + TCA_FLOWER_KEY_ARP_SIP, /* be32 */ + TCA_FLOWER_KEY_ARP_SIP_MASK, /* be32 */ + TCA_FLOWER_KEY_ARP_TIP, /* be32 */ + TCA_FLOWER_KEY_ARP_TIP_MASK, /* be32 */ + TCA_FLOWER_KEY_ARP_OP, /* u8 */ + TCA_FLOWER_KEY_ARP_OP_MASK, /* u8 */ + TCA_FLOWER_KEY_ARP_SHA, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_SHA_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA_MASK, /* ETH_ALEN */ + + TCA_FLOWER_KEY_MPLS_TTL, /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS, /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */ + + TCA_FLOWER_KEY_TCP_FLAGS, /* be16 */ + TCA_FLOWER_KEY_TCP_FLAGS_MASK, /* be16 */ + + TCA_FLOWER_KEY_IP_TOS, /* u8 */ + TCA_FLOWER_KEY_IP_TOS_MASK, /* u8 */ + TCA_FLOWER_KEY_IP_TTL, /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK, /* u8 */ + + TCA_FLOWER_KEY_CVLAN_ID, /* be16 */ + TCA_FLOWER_KEY_CVLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, /* be16 */ + + TCA_FLOWER_KEY_ENC_IP_TOS, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TOS_MASK, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */ + + TCA_FLOWER_KEY_ENC_OPTS, + TCA_FLOWER_KEY_ENC_OPTS_MASK, + + TCA_FLOWER_IN_HW_COUNT, + + TCA_FLOWER_KEY_PORT_SRC_MIN, /* be16 */ + TCA_FLOWER_KEY_PORT_SRC_MAX, /* be16 */ + TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */ + TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */ + + TCA_FLOWER_KEY_CT_STATE, /* u16 */ + TCA_FLOWER_KEY_CT_STATE_MASK, /* u16 */ + TCA_FLOWER_KEY_CT_ZONE, /* u16 */ + TCA_FLOWER_KEY_CT_ZONE_MASK, /* u16 */ + TCA_FLOWER_KEY_CT_MARK, /* u32 */ + TCA_FLOWER_KEY_CT_MARK_MASK, /* u32 */ + TCA_FLOWER_KEY_CT_LABELS, /* u128 */ + TCA_FLOWER_KEY_CT_LABELS_MASK, /* u128 */ + + TCA_FLOWER_KEY_MPLS_OPTS, + + TCA_FLOWER_KEY_HASH, /* u32 */ + TCA_FLOWER_KEY_HASH_MASK, /* u32 */ + + __TCA_FLOWER_MAX, +}; + +#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) + +enum { + TCA_FLOWER_KEY_CT_FLAGS_NEW = 1 << 0, /* Beginning of a new connection. */ + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED = 1 << 1, /* Part of an existing connection. */ + TCA_FLOWER_KEY_CT_FLAGS_RELATED = 1 << 2, /* Related to an established connection. */ + TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */ + TCA_FLOWER_KEY_CT_FLAGS_INVALID = 1 << 4, /* Conntrack is invalid. */ + TCA_FLOWER_KEY_CT_FLAGS_REPLY = 1 << 5, /* Packet is in the reply direction. */ + __TCA_FLOWER_KEY_CT_FLAGS_MAX, +}; + +enum { + TCA_FLOWER_KEY_ENC_OPTS_UNSPEC, + TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_GENEVE_ + * attributes + */ + TCA_FLOWER_KEY_ENC_OPTS_VXLAN, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_VXLAN_ + * attributes + */ + TCA_FLOWER_KEY_ENC_OPTS_ERSPAN, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_ERSPAN_ + * attributes + */ + __TCA_FLOWER_KEY_ENC_OPTS_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */ + + __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPT_VXLAN_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP, /* u32 */ + __TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPT_ERSPAN_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX, /* be32 */ + TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID, /* u8 */ + __TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1) + +enum { + TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC, + TCA_FLOWER_KEY_MPLS_OPTS_LSE, + __TCA_FLOWER_KEY_MPLS_OPTS_MAX, +}; + +#define TCA_FLOWER_KEY_MPLS_OPTS_MAX (__TCA_FLOWER_KEY_MPLS_OPTS_MAX - 1) + +enum { + TCA_FLOWER_KEY_MPLS_OPT_LSE_UNSPEC, + TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH, + TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL, + TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS, + TCA_FLOWER_KEY_MPLS_OPT_LSE_TC, + TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, + __TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX, +}; + +#define TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX \ + (__TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX - 1) + +enum { + TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), +}; + +#define TCA_FLOWER_MASK_FLAGS_RANGE (1 << 0) /* Range-based match */ + +/* Match-all classifier */ + +struct tc_matchall_pcnt { + __u64 rhit; +}; + +enum { + TCA_MATCHALL_UNSPEC, + TCA_MATCHALL_CLASSID, + TCA_MATCHALL_ACT, + TCA_MATCHALL_FLAGS, + TCA_MATCHALL_PCNT, + TCA_MATCHALL_PAD, + __TCA_MATCHALL_MAX, +}; + +#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1) + +/* Extended Matches */ + +struct tcf_ematch_tree_hdr { + __u16 nmatches; + __u16 progid; +}; + +enum { + TCA_EMATCH_TREE_UNSPEC, + TCA_EMATCH_TREE_HDR, + TCA_EMATCH_TREE_LIST, + __TCA_EMATCH_TREE_MAX +}; +#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1) + +struct tcf_ematch_hdr { + __u16 matchid; + __u16 kind; + __u16 flags; + __u16 pad; /* currently unused */ +}; + +/* 0 1 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + * +-----------------------+-+-+---+ + * | Unused |S|I| R | + * +-----------------------+-+-+---+ + * + * R(2) ::= relation to next ematch + * where: 0 0 END (last ematch) + * 0 1 AND + * 1 0 OR + * 1 1 Unused (invalid) + * I(1) ::= invert result + * S(1) ::= simple payload + */ +#define TCF_EM_REL_END 0 +#define TCF_EM_REL_AND (1<<0) +#define TCF_EM_REL_OR (1<<1) +#define TCF_EM_INVERT (1<<2) +#define TCF_EM_SIMPLE (1<<3) + +#define TCF_EM_REL_MASK 3 +#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK) + +enum { + TCF_LAYER_LINK, + TCF_LAYER_NETWORK, + TCF_LAYER_TRANSPORT, + __TCF_LAYER_MAX +}; +#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1) + +/* Ematch type assignments + * 1..32767 Reserved for ematches inside kernel tree + * 32768..65535 Free to use, not reliable + */ +#define TCF_EM_CONTAINER 0 +#define TCF_EM_CMP 1 +#define TCF_EM_NBYTE 2 +#define TCF_EM_U32 3 +#define TCF_EM_META 4 +#define TCF_EM_TEXT 5 +#define TCF_EM_VLAN 6 +#define TCF_EM_CANID 7 +#define TCF_EM_IPSET 8 +#define TCF_EM_IPT 9 +#define TCF_EM_MAX 9 + +enum { + TCF_EM_PROG_TC +}; + +enum { + TCF_EM_OPND_EQ, + TCF_EM_OPND_GT, + TCF_EM_OPND_LT +}; + +#endif diff --git a/zebra/debug_nl.c b/zebra/debug_nl.c index a16d442521..afefab6674 100644 --- a/zebra/debug_nl.c +++ b/zebra/debug_nl.c @@ -1536,6 +1536,24 @@ next_rta: goto next_rta; } +static const char *tcm_nltype2str(int nltype) +{ + switch (nltype) { + case RTM_NEWQDISC: + case RTM_DELQDISC: + return "qdisc"; + case RTM_NEWTCLASS: + case RTM_DELTCLASS: + return "tclass"; + case RTM_NEWTFILTER: + case RTM_DELTFILTER: + return "tfilter"; + default: + /* should never hit */ + return "unknown"; + } +} + static void nlncm_dump(const struct netconfmsg *ncm, size_t msglen) { const struct rtattr *rta; @@ -1595,6 +1613,8 @@ void nl_dump(void *msg, size_t msglen) struct ifinfomsg *ifi; struct tunnel_msg *tnlm; struct fib_rule_hdr *frh; + struct tcmsg *tcm; + char fbuf[128]; char ibuf[128]; @@ -1730,6 +1750,21 @@ next_header: nlncm_dump(ncm, nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(*ncm))); break; + case RTM_NEWQDISC: + case RTM_DELQDISC: + case RTM_NEWTCLASS: + case RTM_DELTCLASS: + case RTM_NEWTFILTER: + case RTM_DELTFILTER: + tcm = NLMSG_DATA(nlmsg); + zlog_debug( + " tcm [type=%s family=%s (%d) ifindex=%d handle=%04x:%04x]", + tcm_nltype2str(nlmsg->nlmsg_type), + af_type2str(tcm->tcm_family), tcm->tcm_family, + tcm->tcm_ifindex, tcm->tcm_handle >> 16, + tcm->tcm_handle & 0xffff); + break; + default: break; } diff --git a/zebra/dplane_fpm_nl.c b/zebra/dplane_fpm_nl.c index ec4ea372f1..d07c4c6332 100644 --- a/zebra/dplane_fpm_nl.c +++ b/zebra/dplane_fpm_nl.c @@ -815,6 +815,9 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx) case DPLANE_OP_INTF_INSTALL: case DPLANE_OP_INTF_UPDATE: case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: case DPLANE_OP_NONE: break; diff --git a/zebra/interface.c b/zebra/interface.c index 205fa88293..c674b499ac 100644 --- a/zebra/interface.c +++ b/zebra/interface.c @@ -1573,6 +1573,9 @@ void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx) case DPLANE_OP_IPSET_ENTRY_DELETE: case DPLANE_OP_NEIGH_TABLE_UPDATE: case DPLANE_OP_GRE_SET: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: break; /* should never hit here */ } } diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c index 396ccb34bd..45a372f88c 100644 --- a/zebra/kernel_netlink.c +++ b/zebra/kernel_netlink.c @@ -47,6 +47,7 @@ #include "zebra/rt_netlink.h" #include "zebra/if_netlink.h" #include "zebra/rule_netlink.h" +#include "zebra/tc_netlink.h" #include "zebra/netconf_netlink.h" #include "zebra/zebra_errors.h" @@ -114,6 +115,15 @@ static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"}, {RTM_NEWTUNNEL, "RTM_NEWTUNNEL"}, {RTM_DELTUNNEL, "RTM_DELTUNNEL"}, {RTM_GETTUNNEL, "RTM_GETTUNNEL"}, + {RTM_NEWQDISC, "RTM_NEWQDISC"}, + {RTM_DELQDISC, "RTM_DELQDISC"}, + {RTM_GETQDISC, "RTM_GETQDISC"}, + {RTM_NEWTCLASS, "RTM_NEWTCLASS"}, + {RTM_DELTCLASS, "RTM_DELTCLASS"}, + {RTM_GETTCLASS, "RTM_GETTCLASS"}, + {RTM_NEWTFILTER, "RTM_NEWTFILTER"}, + {RTM_DELTFILTER, "RTM_DELTFILTER"}, + {RTM_GETTFILTER, "RTM_GETTFILTER"}, {0}}; static const struct message rtproto_str[] = { @@ -1623,6 +1633,11 @@ static enum netlink_msg_status nl_put_msg(struct nl_batch *bth, case DPLANE_OP_INTF_UPDATE: case DPLANE_OP_INTF_DELETE: return netlink_put_intf_update_msg(bth, ctx); + + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + return netlink_put_tc_update_msg(bth, ctx); } return FRR_NETLINK_ERROR; diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c index 076e9c4dfa..cb549339af 100644 --- a/zebra/kernel_socket.c +++ b/zebra/kernel_socket.c @@ -1603,6 +1603,12 @@ void kernel_update_multi(struct dplane_ctx_q *ctx_list) res = kernel_intf_update(ctx); break; + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + res = kernel_tc_update(ctx); + break; + /* Ignore 'notifications' - no-op */ case DPLANE_OP_SYS_ROUTE_ADD: case DPLANE_OP_SYS_ROUTE_DELETE: diff --git a/zebra/rt.h b/zebra/rt.h index 0a86a2897c..d8a22d2cfc 100644 --- a/zebra/rt.h +++ b/zebra/rt.h @@ -71,6 +71,7 @@ kernel_intf_update(struct zebra_dplane_ctx *ctx); extern enum zebra_dplane_result kernel_intf_netconf_update(struct zebra_dplane_ctx *ctx); +extern enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx); #endif /* !HAVE_NETLINK */ diff --git a/zebra/subdir.am b/zebra/subdir.am index a926c14adf..298b71598c 100644 --- a/zebra/subdir.am +++ b/zebra/subdir.am @@ -82,6 +82,8 @@ zebra_zebra_SOURCES = \ zebra/rule_netlink.c \ zebra/rule_socket.c \ zebra/table_manager.c \ + zebra/tc_netlink.c \ + zebra/tc_socket.c \ zebra/zapi_msg.c \ zebra/zebra_dplane.c \ zebra/zebra_errors.c \ @@ -163,6 +165,7 @@ noinst_HEADERS += \ zebra/rtadv.h \ zebra/rule_netlink.h \ zebra/table_manager.h \ + zebra/tc_netlink.h \ zebra/zapi_msg.h \ zebra/zebra_dplane.h \ zebra/zebra_errors.h \ diff --git a/zebra/tc_netlink.c b/zebra/tc_netlink.c new file mode 100644 index 0000000000..89ce075454 --- /dev/null +++ b/zebra/tc_netlink.c @@ -0,0 +1,468 @@ +/* + * Zebra Traffic Control (TC) interaction with the kernel using netlink. + * + * Copyright (C) 2022 Shichu Yang + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include + +#ifdef HAVE_NETLINK + +#include +#include + +#include "if.h" +#include "prefix.h" +#include "vrf.h" + +#include +#include +#include +#include "zebra/zserv.h" +#include "zebra/zebra_ns.h" +#include "zebra/zebra_vrf.h" +#include "zebra/rt.h" +#include "zebra/interface.h" +#include "zebra/debug.h" +#include "zebra/rtadv.h" +#include "zebra/kernel_netlink.h" +#include "zebra/tc_netlink.h" +#include "zebra/zebra_errors.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_trace.h" + +/* TODO: move these bitflags to zebra_tc.h */ +#define TC_FILTER_SRC_IP (1 << 0) +#define TC_FILTER_DST_IP (1 << 1) +#define TC_FILTER_IP_PROTOCOL (1 << 9) + +#define TC_FREQ_DEFAULT (100) + +#define TC_MAJOR_BASE (0x1000u) +#define TC_MINOR_NOCLASS (0xffffu) + +#define TC_FILTER_MASK (0x8000u) + +#define TIME_UNITS_PER_SEC (1000000) +#define xmittime(r, s) (TIME_UNITS_PER_SEC * ((double)(s) / (double)(r))) + +static uint32_t tc_get_freq(void) +{ + int freq = 0; + FILE *fp = fopen("/proc/net/psched", "r"); + + if (fp) { + uint32_t nom, denom; + + if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) { + if (nom == 1000000) + freq = denom; + } + fclose(fp); + } + + return freq == 0 ? TC_FREQ_DEFAULT : freq; +} + +static inline uint32_t tc_make_handle(uint16_t major, uint16_t minor) +{ + return (major) << 16 | (minor); +} + +static inline uint32_t tc_get_handle(struct zebra_dplane_ctx *ctx, + uint16_t minor) +{ + uint16_t major = TC_MAJOR_BASE + (uint16_t)dplane_ctx_get_ifindex(ctx); + + return tc_make_handle(major, minor); +} + +static void tc_calc_rate_table(struct tc_ratespec *ratespec, uint32_t *table, + uint32_t mtu) +{ + if (mtu == 0) + mtu = 2047; + + int cell_log = -1; + + if (cell_log < 0) { + cell_log = 0; + while ((mtu >> cell_log) > 255) + cell_log++; + } + + for (int i = 0; i < 256; i++) + table[i] = xmittime(ratespec->rate, (i + 1) << cell_log); + + ratespec->cell_align = -1; + ratespec->cell_log = cell_log; + ratespec->linklayer = TC_LINKLAYER_ETHERNET; +} + +static int tc_flower_get_inet_prefix(const struct prefix *prefix, + struct inet_prefix *addr) +{ + addr->family = prefix->family; + + if (addr->family == AF_INET) { + addr->bytelen = 4; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + memcpy(addr->data, prefix->u.val32, sizeof(prefix->u.val32)); + } else if (addr->family == AF_INET6) { + addr->bytelen = 16; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + memcpy(addr->data, prefix->u.val, sizeof(prefix->u.val)); + } else { + return -1; + } + + return 0; +} + +static int tc_flower_get_inet_mask(const struct prefix *prefix, + struct inet_prefix *addr) +{ + addr->family = prefix->family; + + if (addr->family == AF_INET) { + addr->bytelen = 4; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + } else if (addr->family == AF_INET6) { + addr->bytelen = 16; + addr->bitlen = prefix->prefixlen; + addr->flags = 0; + addr->flags |= PREFIXLEN_SPECIFIED; + addr->flags |= ADDRTYPE_INET; + } else { + return -1; + } + + memset(addr->data, 0xff, addr->bytelen); + + int rest = prefix->prefixlen; + + for (int i = 0; i < addr->bytelen / 4; i++) { + if (!rest) { + addr->data[i] = 0; + } else if (rest / 32 >= 1) { + rest -= 32; + } else { + addr->data[i] <<= 32 - rest; + addr->data[i] = htonl(addr->data[i]); + rest = 0; + } + } + + return 0; +} + +/* + * Traffic control queue discipline encoding (only "htb" supported) + */ +static ssize_t netlink_qdisc_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct nlsock *nl; + + const char *kind = "htb"; + + struct tc_htb_glob htb_glob = { + .rate2quantum = 10, .version = 3, .defcls = TC_MINOR_NOCLASS}; + + struct rtattr *nest; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = (void *)data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + req->n.nlmsg_flags |= NLM_F_REPLACE; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + req->t.tcm_handle = tc_get_handle(ctx, 0); + req->t.tcm_parent = TC_H_ROOT; + + nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1); + + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + nl_attr_put(&req->n, datalen, TCA_HTB_INIT, &htb_glob, + sizeof(htb_glob)); + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* + * Traffic control class encoding + */ +static ssize_t netlink_tclass_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct nlsock *nl; + struct tc_htb_opt htb_opt = {}; + + uint64_t rate, ceil; + uint64_t buffer, cbuffer; + + /* TODO: fetch mtu from interface */ + uint32_t mtu = 0; + + uint32_t rtab[256]; + uint32_t ctab[256]; + + struct rtattr *nest; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = (void *)data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + req->t.tcm_handle = tc_get_handle(ctx, 1); + req->t.tcm_parent = tc_get_handle(ctx, 0); + + rate = dplane_ctx_tc_get_rate(ctx); + ceil = dplane_ctx_tc_get_ceil(ctx); + + ceil = ceil < rate ? rate : ceil; + + htb_opt.rate.rate = (rate >> 32 != 0) ? ~0U : rate; + htb_opt.ceil.rate = (ceil >> 32 != 0) ? ~0U : ceil; + + buffer = rate / tc_get_freq(), cbuffer = ceil / tc_get_freq(); + + htb_opt.buffer = buffer; + htb_opt.cbuffer = cbuffer; + + tc_calc_rate_table(&htb_opt.rate, rtab, mtu); + tc_calc_rate_table(&htb_opt.ceil, rtab, mtu); + + htb_opt.ceil.mpu = htb_opt.rate.mpu = 0; + htb_opt.ceil.overhead = htb_opt.rate.overhead = 0; + + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + if (rate >> 32 != 0) { + nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &rate, + sizeof(rate)); + } + + if (ceil >> 32 != 0) { + nl_attr_put(&req->n, datalen, TCA_HTB_CEIL64, &ceil, + sizeof(ceil)); + } + + nl_attr_put(&req->n, datalen, TCA_HTB_PARMS, &htb_opt, sizeof(htb_opt)); + + nl_attr_put(&req->n, datalen, TCA_HTB_RTAB, rtab, sizeof(rtab)); + nl_attr_put(&req->n, datalen, TCA_HTB_CTAB, ctab, sizeof(ctab)); + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +/* + * Traffic control filter encoding (only "flower" supported) + */ +static ssize_t netlink_tfilter_msg_encode(int cmd, struct zebra_dplane_ctx *ctx, + void *data, size_t datalen) +{ + struct nlsock *nl; + struct rtattr *nest; + + const char *kind = "flower"; + + uint16_t priority; + uint16_t protocol; + uint32_t classid; + uint32_t filter_bm; + uint32_t flags = 0; + + struct inet_prefix addr; + + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[0]; + } *req = (void *)data; + + if (datalen < sizeof(*req)) + return 0; + + nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx)); + + memset(req, 0, sizeof(*req)); + + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST; + + req->n.nlmsg_flags |= NLM_F_EXCL; + + req->n.nlmsg_type = cmd; + + req->n.nlmsg_pid = nl->snl.nl_pid; + + req->t.tcm_family = AF_UNSPEC; + req->t.tcm_ifindex = dplane_ctx_get_ifindex(ctx); + + /* TODO: priority and layer-3 protocol support */ + priority = 0; + protocol = htons(ETH_P_IP); + classid = tc_get_handle(ctx, 1); + filter_bm = dplane_ctx_tc_get_filter_bm(ctx); + + req->t.tcm_info = tc_make_handle(priority, protocol); + + req->t.tcm_handle = 1; + req->t.tcm_parent = tc_get_handle(ctx, 0); + + nl_attr_put(&req->n, datalen, TCA_KIND, kind, strlen(kind) + 1); + nest = nl_attr_nest(&req->n, datalen, TCA_OPTIONS); + + nl_attr_put(&req->n, datalen, TCA_FLOWER_CLASSID, &classid, + sizeof(classid)); + + if (filter_bm & TC_FILTER_SRC_IP) { + const struct prefix *src_p = dplane_ctx_tc_get_src_ip(ctx); + + if (tc_flower_get_inet_prefix(src_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_SRC + : TCA_FLOWER_KEY_IPV6_SRC, + addr.data, addr.bytelen); + + if (tc_flower_get_inet_mask(src_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) + ? TCA_FLOWER_KEY_IPV4_SRC_MASK + : TCA_FLOWER_KEY_IPV6_SRC_MASK, + addr.data, addr.bytelen); + } + + if (filter_bm & TC_FILTER_DST_IP) { + const struct prefix *dst_p = dplane_ctx_tc_get_dst_ip(ctx); + + if (tc_flower_get_inet_prefix(dst_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) ? TCA_FLOWER_KEY_IPV4_DST + : TCA_FLOWER_KEY_IPV6_DST, + addr.data, addr.bytelen); + + if (tc_flower_get_inet_mask(dst_p, &addr) != 0) + return 0; + + nl_attr_put(&req->n, datalen, + (addr.family == AF_INET) + ? TCA_FLOWER_KEY_IPV4_DST_MASK + : TCA_FLOWER_KEY_IPV6_DST_MASK, + addr.data, addr.bytelen); + } + + if (filter_bm & TC_FILTER_IP_PROTOCOL) { + nl_attr_put8(&req->n, datalen, TCA_FLOWER_KEY_IP_PROTO, + dplane_ctx_tc_get_ip_proto(ctx)); + } + + nl_attr_put32(&req->n, datalen, TCA_FLOWER_FLAGS, flags); + + nl_attr_put16(&req->n, datalen, TCA_FLOWER_KEY_ETH_TYPE, protocol); + nl_attr_nest_end(&req->n, nest); + + return NLMSG_ALIGN(req->n.nlmsg_len); +} + +static ssize_t netlink_newqdisc_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_qdisc_msg_encode(RTM_NEWQDISC, ctx, buf, buflen); +} + +static ssize_t netlink_newtclass_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_tclass_msg_encode(RTM_NEWTCLASS, ctx, buf, buflen); +} + +static ssize_t netlink_newtfilter_msg_encoder(struct zebra_dplane_ctx *ctx, + void *buf, size_t buflen) +{ + return netlink_tfilter_msg_encode(RTM_NEWTFILTER, ctx, buf, buflen); +} + +enum netlink_msg_status netlink_put_tc_update_msg(struct nl_batch *bth, + struct zebra_dplane_ctx *ctx) +{ + /* TODO: error handling and other actions (delete, replace, ...) */ + + netlink_batch_add_msg(bth, ctx, netlink_newqdisc_msg_encoder, false); + netlink_batch_add_msg(bth, ctx, netlink_newtclass_msg_encoder, false); + return netlink_batch_add_msg(bth, ctx, netlink_newtfilter_msg_encoder, + false); +} + +#endif /* HAVE_NETLINK */ diff --git a/zebra/tc_netlink.h b/zebra/tc_netlink.h new file mode 100644 index 0000000000..2190bca4f9 --- /dev/null +++ b/zebra/tc_netlink.h @@ -0,0 +1,62 @@ +/* + * Zebra Traffic Control (TC) interaction with the kernel using netlink. + * + * Copyright (C) 2022 Shichu Yang + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_TC_NETLINK_H +#define _ZEBRA_TC_NETLINK_H + +#ifdef HAVE_NETLINK + +#ifdef __cplusplus +extern "C" { +#endif + +/* Represent a prefixed address in flower filter */ + +struct inet_prefix { + uint16_t flags; + uint16_t bytelen; + uint16_t bitlen; + uint16_t family; + uint32_t data[64]; +}; + +enum { + PREFIXLEN_SPECIFIED = (1 << 0), + ADDRTYPE_INET = (1 << 1), + ADDRTYPE_UNSPEC = (1 << 2), + ADDRTYPE_MULTI = (1 << 3), + + ADDRTYPE_INET_UNSPEC = ADDRTYPE_INET | ADDRTYPE_UNSPEC, + ADDRTYPE_INET_MULTI = ADDRTYPE_INET | ADDRTYPE_MULTI +}; + +extern enum netlink_msg_status +netlink_put_tc_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* HAVE_NETLINK */ + +#endif /* _ZEBRA_TC_NETLINK_H */ diff --git a/zebra/tc_socket.c b/zebra/tc_socket.c new file mode 100644 index 0000000000..0bf9e487b0 --- /dev/null +++ b/zebra/tc_socket.c @@ -0,0 +1,41 @@ +/* + * Zebra Traffic Control (TC) interaction with the kernel using socket. + * + * Copyright (C) 2022 Shichu Yang + * + * This file is part of FRR. + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FRR; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include + +#ifndef HAVE_NETLINK + +#include "lib_errors.h" + +#include "zebra/rt.h" +#include "zebra/zebra_dplane.h" +#include "zebra/zebra_errors.h" + +enum zebra_dplane_result kernel_tc_update(struct zebra_dplane_ctx *ctx) +{ + flog_err(EC_LIB_UNAVAILABLE, "%s not Implemented for this platform", + __func__); + return ZEBRA_DPLANE_REQUEST_FAILURE; +} + +#endif /* !HAVE_NETLINK */ diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index fd170450f1..763c92ebb6 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -313,6 +313,25 @@ struct dplane_netconf_info { enum dplane_netconf_status_e linkdown_val; }; +/* + * Traffic control contexts for the dplane + */ +struct dplane_tc_info { + /* Rate spec (unit: Bytes/s) */ + uint64_t rate; + uint64_t ceil; + + /* TODO: custom burst */ + + /* Filter components for "tfilter" */ + uint32_t filter_bm; + struct prefix src_ip; + struct prefix dst_ip; + uint8_t ip_proto; + + /* TODO: more filter components */ +}; + /* * The context block used to exchange info about route updates across * the boundary between the zebra main context (and pthread) and the @@ -362,6 +381,7 @@ struct zebra_dplane_ctx { struct dplane_mac_info macinfo; struct dplane_neigh_info neigh; struct dplane_rule_info rule; + struct dplane_tc_info tc; struct zebra_pbr_iptable iptable; struct zebra_pbr_ipset ipset; struct { @@ -540,6 +560,9 @@ static struct zebra_dplane_globals { _Atomic uint32_t dg_intfs_in; _Atomic uint32_t dg_intf_errors; + _Atomic uint32_t dg_tcs_in; + _Atomic uint32_t dg_tcs_errors; + /* Dataplane pthread */ struct frr_pthread *dg_pthread; @@ -777,6 +800,9 @@ static void dplane_ctx_free_internal(struct zebra_dplane_ctx *ctx) case DPLANE_OP_INTF_INSTALL: case DPLANE_OP_INTF_UPDATE: case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: break; case DPLANE_OP_IPSET_ENTRY_ADD: @@ -1100,6 +1126,16 @@ const char *dplane_op2str(enum dplane_op_e op) case DPLANE_OP_INTF_DELETE: ret = "INTF_DELETE"; break; + + case DPLANE_OP_TC_INSTALL: + ret = "TC_INSTALL"; + break; + case DPLANE_OP_TC_UPDATE: + ret = "TC_UPDATE"; + break; + case DPLANE_OP_TC_DELETE: + ret = "TC_DELETE"; + break; } return ret; @@ -1419,6 +1455,50 @@ uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx) return ctx->u.rinfo.zd_old_distance; } +uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.rate; +} + +uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.ceil; +} + +uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.filter_bm; +} + +const struct prefix * +dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.tc.src_ip); +} + +const struct prefix * +dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return &(ctx->u.tc.dst_ip); +} + +uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx) +{ + DPLANE_CTX_VALID(ctx); + + return ctx->u.tc.ip_proto; +} + /* * Set the nexthops associated with a context: note that processing code * may well expect that nexthops are in canonical (sorted) order, so we @@ -2691,6 +2771,26 @@ done: return ret; } +int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op) +{ + int ret = EINVAL; + + struct zebra_vrf *zvrf = NULL; + struct zebra_ns *zns = NULL; + + ctx->zd_op = op; + ctx->zd_status = ZEBRA_DPLANE_REQUEST_SUCCESS; + + /* TODO: init traffic control qdisc */ + zns = zvrf ? zvrf->zns : zebra_ns_lookup(NS_DEFAULT); + + dplane_ctx_ns_init(ctx, zns, true); + + ret = AOK; + + return ret; +} + /** * dplane_ctx_nexthop_init() - Initialize a context block for a nexthop update * @@ -3410,6 +3510,47 @@ dplane_route_update_internal(struct route_node *rn, return result; } +static enum zebra_dplane_result dplane_tc_update_internal(enum dplane_op_e op) +{ + enum zebra_dplane_result result = ZEBRA_DPLANE_REQUEST_FAILURE; + int ret = EINVAL; + struct zebra_dplane_ctx *ctx = NULL; + + /* Obtain context block */ + ctx = dplane_ctx_alloc(); + + if (!ctx) { + ret = ENOMEM; + goto done; + } + + /* Init context with info from zebra data structs */ + ret = dplane_ctx_tc_init(ctx, op); + + if (ret == AOK) + ret = dplane_update_enqueue(ctx); + +done: + /* Update counter */ + atomic_fetch_add_explicit(&zdplane_info.dg_tcs_in, 1, + memory_order_relaxed); + if (ret == AOK) { + result = ZEBRA_DPLANE_REQUEST_QUEUED; + } else { + atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors, 1, + memory_order_relaxed); + if (ctx) + dplane_ctx_free(&ctx); + } + + return result; +} + +enum zebra_dplane_result dplane_tc_update(void) +{ + return dplane_tc_update_internal(DPLANE_OP_TC_UPDATE); +} + /** * dplane_nexthop_update_internal() - Helper for enqueuing nexthop changes * @@ -5591,6 +5732,13 @@ static void kernel_dplane_log_detail(struct zebra_dplane_ctx *ctx) dplane_ctx_get_ifindex(ctx), dplane_ctx_intf_is_protodown(ctx)); break; + + /* TODO: more detailed log */ + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + zlog_debug("Dplane tc ifidx %u", dplane_ctx_get_ifindex(ctx)); + break; } } @@ -5734,6 +5882,14 @@ static void kernel_dplane_handle_result(struct zebra_dplane_ctx *ctx) 1, memory_order_relaxed); break; + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + if (res != ZEBRA_DPLANE_REQUEST_SUCCESS) + atomic_fetch_add_explicit(&zdplane_info.dg_tcs_errors, + 1, memory_order_relaxed); + break; + /* Ignore 'notifications' - no-op */ case DPLANE_OP_SYS_ROUTE_ADD: case DPLANE_OP_SYS_ROUTE_DELETE: diff --git a/zebra/zebra_dplane.h b/zebra/zebra_dplane.h index c96ea40094..8b239a9ba1 100644 --- a/zebra/zebra_dplane.h +++ b/zebra/zebra_dplane.h @@ -193,6 +193,11 @@ enum dplane_op_e { DPLANE_OP_INTF_INSTALL, DPLANE_OP_INTF_UPDATE, DPLANE_OP_INTF_DELETE, + + /* Traffic control */ + DPLANE_OP_TC_INSTALL, + DPLANE_OP_TC_UPDATE, + DPLANE_OP_TC_DELETE, }; /* @@ -378,6 +383,16 @@ uint8_t dplane_ctx_get_distance(const struct zebra_dplane_ctx *ctx); void dplane_ctx_set_distance(struct zebra_dplane_ctx *ctx, uint8_t distance); uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx *ctx); +/* Accessors for traffic control context */ +uint64_t dplane_ctx_tc_get_rate(const struct zebra_dplane_ctx *ctx); +uint64_t dplane_ctx_tc_get_ceil(const struct zebra_dplane_ctx *ctx); +uint32_t dplane_ctx_tc_get_filter_bm(const struct zebra_dplane_ctx *ctx); +const struct prefix * +dplane_ctx_tc_get_src_ip(const struct zebra_dplane_ctx *ctx); +const struct prefix * +dplane_ctx_tc_get_dst_ip(const struct zebra_dplane_ctx *ctx); +uint8_t dplane_ctx_tc_get_ip_proto(const struct zebra_dplane_ctx *ctx); + void dplane_ctx_set_nexthops(struct zebra_dplane_ctx *ctx, struct nexthop *nh); void dplane_ctx_set_backup_nhg(struct zebra_dplane_ctx *ctx, const struct nexthop_group *nhg); @@ -707,6 +722,13 @@ enum zebra_dplane_result dplane_intf_add(const struct interface *ifp); enum zebra_dplane_result dplane_intf_update(const struct interface *ifp); enum zebra_dplane_result dplane_intf_delete(const struct interface *ifp); +/* + * Enqueue interface link changes for the dataplane. + */ +enum zebra_dplane_result dplane_tc_add(void); +enum zebra_dplane_result dplane_tc_update(void); +enum zebra_dplane_result dplane_tc_delete(void); + /* * Link layer operations for the dataplane. */ @@ -849,6 +871,9 @@ int dplane_ctx_nexthop_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, int dplane_ctx_intf_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op, const struct interface *ifp); +/* Encode traffic control information into data plane context. */ +int dplane_ctx_tc_init(struct zebra_dplane_ctx *ctx, enum dplane_op_e op); + /* Retrieve the limit on the number of pending, unprocessed updates. */ uint32_t dplane_get_in_queue_limit(void); diff --git a/zebra/zebra_nhg.c b/zebra/zebra_nhg.c index c5b533fc22..1964c763c5 100644 --- a/zebra/zebra_nhg.c +++ b/zebra/zebra_nhg.c @@ -3125,6 +3125,9 @@ void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx) case DPLANE_OP_INTF_INSTALL: case DPLANE_OP_INTF_UPDATE: case DPLANE_OP_INTF_DELETE: + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: break; } } diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 79eb99ddf9..03bda8cc33 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -4391,6 +4391,11 @@ static void rib_process_dplane_results(struct thread *thread) zebra_if_dplane_result(ctx); break; + case DPLANE_OP_TC_INSTALL: + case DPLANE_OP_TC_UPDATE: + case DPLANE_OP_TC_DELETE: + break; + /* Some op codes not handled here */ case DPLANE_OP_ADDR_INSTALL: case DPLANE_OP_ADDR_UNINSTALL: