diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am index a2880b7b94..8a410adca1 100644 --- a/bgpd/Makefile.am +++ b/bgpd/Makefile.am @@ -87,7 +87,7 @@ libbgp_a_SOURCES = \ bgp_encap_tlv.c $(BGP_VNC_RFAPI_SRC) bgp_attr_evpn.c \ bgp_evpn.c bgp_evpn_vty.c bgp_vpn.c bgp_label.c bgp_rd.c \ bgp_keepalives.c bgp_io.c bgp_flowspec.c bgp_flowspec_util.c \ - bgp_flowspec_vty.c bgp_labelpool.c + bgp_flowspec_vty.c bgp_labelpool.c bgp_pbr.c noinst_HEADERS = \ bgp_memory.h \ @@ -101,7 +101,7 @@ noinst_HEADERS = \ $(BGP_VNC_RFAPI_HD) bgp_attr_evpn.h bgp_evpn.h bgp_evpn_vty.h \ bgp_vpn.h bgp_label.h bgp_rd.h bgp_evpn_private.h bgp_keepalives.h \ bgp_io.h bgp_flowspec.h bgp_flowspec_private.h bgp_flowspec_util.h \ - bgp_labelpool.h + bgp_labelpool.h bgp_pbr.h bgpd_SOURCES = bgp_main.c bgpd_LDADD = libbgp.a $(BGP_VNC_RFP_LIB) ../lib/libfrr.la @LIBCAP@ @LIBM@ diff --git a/bgpd/bgp_debug.c b/bgpd/bgp_debug.c index 29ac5f520d..3e3fbcbfe8 100644 --- a/bgpd/bgp_debug.c +++ b/bgpd/bgp_debug.c @@ -59,6 +59,7 @@ unsigned long conf_bgp_debug_update_groups; unsigned long conf_bgp_debug_vpn; unsigned long conf_bgp_debug_flowspec; unsigned long conf_bgp_debug_labelpool; +unsigned long conf_bgp_debug_pbr; unsigned long term_bgp_debug_as4; unsigned long term_bgp_debug_neighbor_events; @@ -75,6 +76,7 @@ unsigned long term_bgp_debug_update_groups; unsigned long term_bgp_debug_vpn; unsigned long term_bgp_debug_flowspec; unsigned long term_bgp_debug_labelpool; +unsigned long term_bgp_debug_pbr; struct list *bgp_debug_neighbor_events_peers = NULL; struct list *bgp_debug_keepalive_peers = NULL; @@ -1653,7 +1655,40 @@ DEFUN (no_debug_bgp_vpn, if (vty->node != CONFIG_NODE) vty_out(vty, "disabled debug bgp vpn %s\n", argv[idx]->text); + return CMD_SUCCESS; +} +/* debug bgp pbr */ +DEFUN (debug_bgp_pbr, + debug_bgp_pbr_cmd, + "debug bgp pbr", + DEBUG_STR + BGP_STR + "BGP policy based routing\n") +{ + if (vty->node == CONFIG_NODE) + DEBUG_ON(pbr, PBR); + else { + TERM_DEBUG_ON(pbr, PBR); + vty_out(vty, "BGP policy based routing is on\n"); + } + return CMD_SUCCESS; +} + +DEFUN (no_debug_bgp_pbr, + no_debug_bgp_pbr_cmd, + "no debug bgp pbr", + NO_STR + DEBUG_STR + BGP_STR + "BGP policy based routing\n") +{ + if (vty->node == CONFIG_NODE) + DEBUG_OFF(pbr, PBR); + else { + TERM_DEBUG_OFF(pbr, PBR); + vty_out(vty, "BGP policy based routing is off\n"); + } return CMD_SUCCESS; } @@ -1733,6 +1768,7 @@ DEFUN (no_debug_bgp, TERM_DEBUG_OFF(vpn, VPN_LEAK_LABEL); TERM_DEBUG_OFF(flowspec, FLOWSPEC); TERM_DEBUG_OFF(labelpool, LABELPOOL); + TERM_DEBUG_OFF(pbr, PBR); vty_out(vty, "All possible debugging has been turned off\n"); return CMD_SUCCESS; @@ -1808,6 +1844,9 @@ DEFUN_NOSH (show_debugging_bgp, if (BGP_DEBUG(labelpool, LABELPOOL)) vty_out(vty, " BGP labelpool debugging is on\n"); + if (BGP_DEBUG(pbr, PBR)) + vty_out(vty, " BGP policy based routing debugging is on\n"); + vty_out(vty, "\n"); return CMD_SUCCESS; } @@ -1865,6 +1904,9 @@ int bgp_debug_count(void) if (BGP_DEBUG(labelpool, LABELPOOL)) ret++; + if (BGP_DEBUG(pbr, PBR)) + ret++; + return ret; } @@ -1966,6 +2008,10 @@ static int bgp_config_write_debug(struct vty *vty) write++; } + if (CONF_BGP_DEBUG(pbr, PBR)) { + vty_out(vty, "debug bgp pbr\n"); + write++; + } return write; } @@ -2069,6 +2115,13 @@ void bgp_debug_init(void) install_element(CONFIG_NODE, &debug_bgp_labelpool_cmd); install_element(ENABLE_NODE, &no_debug_bgp_labelpool_cmd); install_element(CONFIG_NODE, &no_debug_bgp_labelpool_cmd); + + /* debug bgp pbr */ + install_element(ENABLE_NODE, &debug_bgp_pbr_cmd); + install_element(CONFIG_NODE, &debug_bgp_pbr_cmd); + install_element(ENABLE_NODE, &no_debug_bgp_pbr_cmd); + install_element(CONFIG_NODE, &no_debug_bgp_pbr_cmd); + } /* Return true if this prefix is on the per_prefix_list of prefixes to debug diff --git a/bgpd/bgp_debug.h b/bgpd/bgp_debug.h index ad476ee918..d5d8fbb505 100644 --- a/bgpd/bgp_debug.h +++ b/bgpd/bgp_debug.h @@ -75,6 +75,7 @@ extern unsigned long conf_bgp_debug_update_groups; extern unsigned long conf_bgp_debug_vpn; extern unsigned long conf_bgp_debug_flowspec; extern unsigned long conf_bgp_debug_labelpool; +extern unsigned long conf_bgp_debug_pbr; extern unsigned long term_bgp_debug_as4; extern unsigned long term_bgp_debug_neighbor_events; @@ -89,6 +90,7 @@ extern unsigned long term_bgp_debug_update_groups; extern unsigned long term_bgp_debug_vpn; extern unsigned long term_bgp_debug_flowspec; extern unsigned long term_bgp_debug_labelpool; +extern unsigned long term_bgp_debug_pbr; extern struct list *bgp_debug_neighbor_events_peers; extern struct list *bgp_debug_keepalive_peers; @@ -123,6 +125,8 @@ struct bgp_debug_filter { #define BGP_DEBUG_VPN_LEAK_LABEL 0x08 #define BGP_DEBUG_FLOWSPEC 0x01 #define BGP_DEBUG_LABELPOOL 0x01 +#define BGP_DEBUG_PBR 0x01 +#define BGP_DEBUG_PBR_ERROR 0x02 #define BGP_DEBUG_PACKET_SEND 0x01 #define BGP_DEBUG_PACKET_SEND_DETAIL 0x02 diff --git a/bgpd/bgp_ecommunity.c b/bgpd/bgp_ecommunity.c index 8eb0222a1b..85b9ffd8ca 100644 --- a/bgpd/bgp_ecommunity.c +++ b/bgpd/bgp_ecommunity.c @@ -34,6 +34,7 @@ #include "bgpd/bgp_lcommunity.h" #include "bgpd/bgp_aspath.h" #include "bgpd/bgp_flowspec_private.h" +#include "bgpd/bgp_pbr.h" /* struct used to dump the rate contained in FS set traffic-rate EC */ union traffic_rate { @@ -931,3 +932,52 @@ int ecommunity_del_val(struct ecommunity *ecom, struct ecommunity_val *eval) ecom->val = p; return 1; } + +int ecommunity_fill_pbr_action(struct ecommunity_val *ecom_eval, + struct bgp_pbr_entry_action *api) +{ + if (ecom_eval->val[1] == ECOMMUNITY_TRAFFIC_RATE) { + api->action = ACTION_TRAFFICRATE; + api->u.r.rate_info[3] = ecom_eval->val[4]; + api->u.r.rate_info[2] = ecom_eval->val[5]; + api->u.r.rate_info[1] = ecom_eval->val[6]; + api->u.r.rate_info[0] = ecom_eval->val[7]; + } else if (ecom_eval->val[1] == ECOMMUNITY_TRAFFIC_ACTION) { + api->action = ACTION_TRAFFIC_ACTION; + /* else distribute code is set by default */ + if (ecom_eval->val[5] & (1 << FLOWSPEC_TRAFFIC_ACTION_TERMINAL)) + api->u.za.filter |= TRAFFIC_ACTION_TERMINATE; + else + api->u.za.filter |= TRAFFIC_ACTION_DISTRIBUTE; + if (ecom_eval->val[5] == 1 << FLOWSPEC_TRAFFIC_ACTION_SAMPLE) + api->u.za.filter |= TRAFFIC_ACTION_SAMPLE; + + } else if (ecom_eval->val[1] == ECOMMUNITY_TRAFFIC_MARKING) { + api->action = ACTION_MARKING; + api->u.marking_dscp = ecom_eval->val[7]; + } else if (ecom_eval->val[1] == ECOMMUNITY_REDIRECT_VRF) { + /* must use external function */ + return 0; + } else if (ecom_eval->val[1] == ECOMMUNITY_REDIRECT_IP_NH) { + /* see draft-ietf-idr-flowspec-redirect-ip-02 + * Q1: how come a ext. community can host ipv6 address + * Q2 : from cisco documentation: + * Announces the reachability of one or more flowspec NLRI. + * When a BGP speaker receives an UPDATE message with the + * redirect-to-IP extended community, it is expected to + * create a traffic filtering rule for every flow-spec + * NLRI in the message that has this path as its best + * path. The filter entry matches the IP packets + * described in the NLRI field and redirects them or + * copies them towards the IPv4 or IPv6 address specified + * in the 'Network Address of Next- Hop' + * field of the associated MP_REACH_NLRI. + */ + struct ecommunity_ip *ip_ecom = (struct ecommunity_ip *) + ecom_eval + 2; + + api->u.zr.redirect_ip_v4 = ip_ecom->ip; + } else + return -1; + return 0; +} diff --git a/bgpd/bgp_ecommunity.h b/bgpd/bgp_ecommunity.h index 3aeb458dc6..88bdb5e2ae 100644 --- a/bgpd/bgp_ecommunity.h +++ b/bgpd/bgp_ecommunity.h @@ -172,4 +172,8 @@ extern int ecommunity_strip(struct ecommunity *ecom, uint8_t type, extern struct ecommunity *ecommunity_new(void); extern int ecommunity_del_val(struct ecommunity *ecom, struct ecommunity_val *eval); +struct bgp_pbr_entry_action; +extern int ecommunity_fill_pbr_action(struct ecommunity_val *ecom_eval, + struct bgp_pbr_entry_action *api); + #endif /* _QUAGGA_BGP_ECOMMUNITY_H */ diff --git a/bgpd/bgp_flowspec_util.c b/bgpd/bgp_flowspec_util.c index 007b27f17e..956cf28c21 100644 --- a/bgpd/bgp_flowspec_util.c +++ b/bgpd/bgp_flowspec_util.c @@ -25,6 +25,7 @@ #include "bgp_table.h" #include "bgp_flowspec_util.h" #include "bgp_flowspec_private.h" +#include "bgp_pbr.h" static void hex2bin(uint8_t *hex, int *bin) { @@ -50,311 +51,28 @@ static int hexstr2num(uint8_t *hexstr, int len) return num; } - -/* - * handle the flowspec address src/dst or generic address NLRI - * return number of bytes analysed ( >= 0). +/* call bgp_flowspec_op_decode + * returns offset */ -int bgp_flowspec_ip_address(enum bgp_flowspec_util_nlri_t type, - uint8_t *nlri_ptr, - uint32_t max_len, - void *result, int *error) +static int bgp_flowspec_call_non_opaque_decode(uint8_t *nlri_content, int len, + struct bgp_pbr_match_val *mval, + uint8_t *match_num, int *error) { - char *display = (char *)result; /* for return_string */ - struct prefix *prefix = (struct prefix *)result; - uint32_t offset = 0; - struct prefix prefix_local; - int psize; + int ret; - *error = 0; - memset(&prefix_local, 0, sizeof(struct prefix)); - /* read the prefix length */ - prefix_local.prefixlen = nlri_ptr[offset]; - psize = PSIZE(prefix_local.prefixlen); - offset++; - /* TODO Flowspec IPv6 Support */ - prefix_local.family = AF_INET; - /* Prefix length check. */ - if (prefix_local.prefixlen > prefix_blen(&prefix_local) * 8) - *error = -1; - /* When packet overflow occur return immediately. */ - if (psize + offset > max_len) - *error = -1; - /* Defensive coding, double-check - * the psize fits in a struct prefix - */ - if (psize > (ssize_t)sizeof(prefix_local.u)) - *error = -1; - memcpy(&prefix_local.u.prefix, &nlri_ptr[offset], psize); - offset += psize; - switch (type) { - case BGP_FLOWSPEC_RETURN_STRING: - prefix2str(&prefix_local, display, - BGP_FLOWSPEC_STRING_DISPLAY_MAX); - break; - case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: - PREFIX_COPY_IPV4(prefix, &prefix_local) - break; - case BGP_FLOWSPEC_VALIDATE_ONLY: - default: - break; - } - return offset; + ret = bgp_flowspec_op_decode( + BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE, + nlri_content, + len, + mval, error); + if (*error < 0) + zlog_err("%s: flowspec_op_decode error %d", + __func__, *error); + else + *match_num = *error; + return ret; } -/* - * handle the flowspec operator NLRI - * return number of bytes analysed - * if there is an error, the passed error param is used to give error: - * -1 if decoding error, - * if result is a string, its assumed length - * is BGP_FLOWSPEC_STRING_DISPLAY_MAX - */ -int bgp_flowspec_op_decode(enum bgp_flowspec_util_nlri_t type, - uint8_t *nlri_ptr, - uint32_t max_len, - void *result, int *error) -{ - int op[8]; - int len, value, value_size; - int loop = 0; - char *ptr = (char *)result; /* for return_string */ - uint32_t offset = 0; - int len_string = BGP_FLOWSPEC_STRING_DISPLAY_MAX; - int len_written; - - *error = 0; - do { - hex2bin(&nlri_ptr[offset], op); - offset++; - len = 2*op[2]+op[3]; - value_size = 1 << len; - value = hexstr2num(&nlri_ptr[offset], value_size); - /* can not be < and > at the same time */ - if (op[5] == 1 && op[6] == 1) - *error = -1; - /* if first element, AND bit can not be set */ - if (op[1] == 1 && loop == 0) - *error = -1; - switch (type) { - case BGP_FLOWSPEC_RETURN_STRING: - if (loop) { - len_written = snprintf(ptr, len_string, - ", "); - len_string -= len_written; - ptr += len_written; - } - if (op[5] == 1) { - len_written = snprintf(ptr, len_string, - "<"); - len_string -= len_written; - ptr += len_written; - } - if (op[6] == 1) { - len_written = snprintf(ptr, len_string, - ">"); - len_string -= len_written; - ptr += len_written; - } - if (op[7] == 1) { - len_written = snprintf(ptr, len_string, - "="); - len_string -= len_written; - ptr += len_written; - } - len_written = snprintf(ptr, len_string, - " %d ", value); - len_string -= len_written; - ptr += len_written; - break; - case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: - /* TODO : FS OPAQUE */ - break; - case BGP_FLOWSPEC_VALIDATE_ONLY: - default: - /* no action */ - break; - } - offset += value_size; - loop++; - } while (op[0] == 0 && offset < max_len - 1); - if (offset > max_len) - *error = -1; - /* use error parameter to count the number of entries */ - if (*error == 0) - *error = loop; - return offset; -} - - -/* - * handle the flowspec tcpflags field - * return number of bytes analysed - * if there is an error, the passed error param is used to give error: - * -1 if decoding error, - * if result is a string, its assumed length - * is BGP_FLOWSPEC_STRING_DISPLAY_MAX - */ -int bgp_flowspec_tcpflags_decode(enum bgp_flowspec_util_nlri_t type, - uint8_t *nlri_ptr, - uint32_t max_len, - void *result, int *error) -{ - int op[8]; - int len, value_size, loop = 0, value; - char *ptr = (char *)result; /* for return_string */ - uint32_t offset = 0; - int len_string = BGP_FLOWSPEC_STRING_DISPLAY_MAX; - int len_written; - - *error = 0; - do { - hex2bin(&nlri_ptr[offset], op); - /* if first element, AND bit can not be set */ - if (op[1] == 1 && loop == 0) - *error = -1; - offset++; - len = 2 * op[2] + op[3]; - value_size = 1 << len; - value = hexstr2num(&nlri_ptr[offset], value_size); - switch (type) { - case BGP_FLOWSPEC_RETURN_STRING: - if (op[1] == 1 && loop != 0) { - len_written = snprintf(ptr, len_string, - ", and "); - len_string -= len_written; - ptr += len_written; - } else if (op[1] == 0 && loop != 0) { - len_written = snprintf(ptr, len_string, - ", or "); - len_string -= len_written; - ptr += len_written; - } - len_written = snprintf(ptr, len_string, - "tcp flags is "); - len_string -= len_written; - ptr += len_written; - if (op[6] == 1) { - ptr += snprintf(ptr, len_string, - "not "); - len_string -= len_written; - ptr += len_written; - } - if (op[7] == 1) { - ptr += snprintf(ptr, len_string, - "exactly match "); - len_string -= len_written; - ptr += len_written; - } - ptr += snprintf(ptr, len_string, - "%d", value); - len_string -= len_written; - ptr += len_written; - break; - case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: - /* TODO : FS OPAQUE */ - break; - case BGP_FLOWSPEC_VALIDATE_ONLY: - default: - /* no action */ - break; - } - offset += value_size; - loop++; - } while (op[0] == 0 && offset < max_len - 1); - if (offset > max_len) - *error = -1; - /* use error parameter to count the number of entries */ - if (*error == 0) - *error = loop; - return offset; -} - -/* - * handle the flowspec fragment type field - * return error (returned values are invalid) or number of bytes analysed - * -1 if error in decoding - * >= 0 : number of bytes analysed (ok). - */ -int bgp_flowspec_fragment_type_decode(enum bgp_flowspec_util_nlri_t type, - uint8_t *nlri_ptr, - uint32_t max_len, - void *result, int *error) -{ - int op[8]; - int len, value, value_size, loop = 0; - char *ptr = (char *)result; /* for return_string */ - uint32_t offset = 0; - int len_string = BGP_FLOWSPEC_STRING_DISPLAY_MAX; - int len_written; - - *error = 0; - do { - hex2bin(&nlri_ptr[offset], op); - offset++; - len = 2 * op[2] + op[3]; - value_size = 1 << len; - value = hexstr2num(&nlri_ptr[offset], value_size); - if (value != 1 && value != 2 && value != 4 && value != 8) - *error = -1; - offset += value_size; - /* TODO : as per RFC5574 : first Fragment bits are Reserved - * does that mean that it is not possible - * to handle multiple occurences ? - * as of today, we only grab the first TCP fragment - */ - if (loop) { - *error = -2; - loop++; - continue; - } - switch (type) { - case BGP_FLOWSPEC_RETURN_STRING: - switch (value) { - case 1: - len_written = snprintf(ptr, len_string, - "dont-fragment"); - len_string -= len_written; - ptr += len_written; - break; - case 2: - len_written = snprintf(ptr, len_string, - "is-fragment"); - len_string -= len_written; - ptr += len_written; - break; - case 4: - len_written = snprintf(ptr, len_string, - "first-fragment"); - len_string -= len_written; - ptr += len_written; - break; - case 8: - len_written = snprintf(ptr, len_string, - "last-fragment"); - len_string -= len_written; - ptr += len_written; - break; - default: - {} - } - break; - case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: - /* TODO : FS OPAQUE */ - break; - case BGP_FLOWSPEC_VALIDATE_ONLY: - default: - /* no action */ - break; - } - loop++; - } while (op[0] == 0 && offset < max_len - 1); - if (offset > max_len) - *error = -1; - return offset; -} - - static bool bgp_flowspec_contains_prefix(struct prefix *pfs, struct prefix *input, int prefix_check) @@ -437,6 +155,508 @@ static bool bgp_flowspec_contains_prefix(struct prefix *pfs, return false; } +/* + * handle the flowspec address src/dst or generic address NLRI + * return number of bytes analysed ( >= 0). + */ +int bgp_flowspec_ip_address(enum bgp_flowspec_util_nlri_t type, + uint8_t *nlri_ptr, + uint32_t max_len, + void *result, int *error) +{ + char *display = (char *)result; /* for return_string */ + struct prefix *prefix = (struct prefix *)result; + uint32_t offset = 0; + struct prefix prefix_local; + int psize; + + *error = 0; + memset(&prefix_local, 0, sizeof(struct prefix)); + /* read the prefix length */ + prefix_local.prefixlen = nlri_ptr[offset]; + psize = PSIZE(prefix_local.prefixlen); + offset++; + /* TODO Flowspec IPv6 Support */ + prefix_local.family = AF_INET; + /* Prefix length check. */ + if (prefix_local.prefixlen > prefix_blen(&prefix_local) * 8) + *error = -1; + /* When packet overflow occur return immediately. */ + if (psize + offset > max_len) + *error = -1; + /* Defensive coding, double-check + * the psize fits in a struct prefix + */ + if (psize > (ssize_t)sizeof(prefix_local.u)) + *error = -1; + memcpy(&prefix_local.u.prefix, &nlri_ptr[offset], psize); + offset += psize; + switch (type) { + case BGP_FLOWSPEC_RETURN_STRING: + prefix2str(&prefix_local, display, + BGP_FLOWSPEC_STRING_DISPLAY_MAX); + break; + case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: + PREFIX_COPY_IPV4(prefix, &prefix_local) + break; + case BGP_FLOWSPEC_VALIDATE_ONLY: + default: + break; + } + return offset; +} + +/* + * handle the flowspec operator NLRI + * return number of bytes analysed + * if there is an error, the passed error param is used to give error: + * -1 if decoding error, + * if result is a string, its assumed length + * is BGP_FLOWSPEC_STRING_DISPLAY_MAX + */ +int bgp_flowspec_op_decode(enum bgp_flowspec_util_nlri_t type, + uint8_t *nlri_ptr, + uint32_t max_len, + void *result, int *error) +{ + int op[8]; + int len, value, value_size; + int loop = 0; + char *ptr = (char *)result; /* for return_string */ + uint32_t offset = 0; + int len_string = BGP_FLOWSPEC_STRING_DISPLAY_MAX; + int len_written; + struct bgp_pbr_match_val *mval = (struct bgp_pbr_match_val *)result; + + *error = 0; + do { + if (loop > BGP_PBR_MATCH_VAL_MAX) + *error = -2; + hex2bin(&nlri_ptr[offset], op); + offset++; + len = 2*op[2]+op[3]; + value_size = 1 << len; + value = hexstr2num(&nlri_ptr[offset], value_size); + /* can not be < and > at the same time */ + if (op[5] == 1 && op[6] == 1) + *error = -1; + /* if first element, AND bit can not be set */ + if (op[1] == 1 && loop == 0) + *error = -1; + switch (type) { + case BGP_FLOWSPEC_RETURN_STRING: + if (loop) { + len_written = snprintf(ptr, len_string, + ", "); + len_string -= len_written; + ptr += len_written; + } + if (op[5] == 1) { + len_written = snprintf(ptr, len_string, + "<"); + len_string -= len_written; + ptr += len_written; + } + if (op[6] == 1) { + len_written = snprintf(ptr, len_string, + ">"); + len_string -= len_written; + ptr += len_written; + } + if (op[7] == 1) { + len_written = snprintf(ptr, len_string, + "="); + len_string -= len_written; + ptr += len_written; + } + len_written = snprintf(ptr, len_string, + " %d ", value); + len_string -= len_written; + ptr += len_written; + break; + case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: + /* limitation: stop converting */ + if (*error == -2) + break; + mval->value = value; + if (op[5] == 1) + mval->compare_operator |= + OPERATOR_COMPARE_LESS_THAN; + if (op[6] == 1) + mval->compare_operator |= + OPERATOR_COMPARE_GREATER_THAN; + if (op[7] == 1) + mval->compare_operator |= + OPERATOR_COMPARE_EQUAL_TO; + if (op[1] == 1) + mval->unary_operator = OPERATOR_UNARY_AND; + else + mval->unary_operator = OPERATOR_UNARY_OR; + mval++; + break; + case BGP_FLOWSPEC_VALIDATE_ONLY: + default: + /* no action */ + break; + } + offset += value_size; + loop++; + } while (op[0] == 0 && offset < max_len - 1); + if (offset > max_len) + *error = -1; + /* use error parameter to count the number of entries */ + if (*error == 0) + *error = loop; + return offset; +} + + +/* + * handle the flowspec tcpflags field + * return number of bytes analysed + * if there is an error, the passed error param is used to give error: + * -1 if decoding error, + * if result is a string, its assumed length + * is BGP_FLOWSPEC_STRING_DISPLAY_MAX + */ +int bgp_flowspec_tcpflags_decode(enum bgp_flowspec_util_nlri_t type, + uint8_t *nlri_ptr, + uint32_t max_len, + void *result, int *error) +{ + int op[8]; + int len, value_size, loop = 0, value; + char *ptr = (char *)result; /* for return_string */ + struct bgp_pbr_match_val *mval = (struct bgp_pbr_match_val *)result; + uint32_t offset = 0; + int len_string = BGP_FLOWSPEC_STRING_DISPLAY_MAX; + int len_written; + + *error = 0; + do { + if (loop > BGP_PBR_MATCH_VAL_MAX) + *error = -2; + hex2bin(&nlri_ptr[offset], op); + /* if first element, AND bit can not be set */ + if (op[1] == 1 && loop == 0) + *error = -1; + offset++; + len = 2 * op[2] + op[3]; + value_size = 1 << len; + value = hexstr2num(&nlri_ptr[offset], value_size); + switch (type) { + case BGP_FLOWSPEC_RETURN_STRING: + if (op[1] == 1 && loop != 0) { + len_written = snprintf(ptr, len_string, + ", and "); + len_string -= len_written; + ptr += len_written; + } else if (op[1] == 0 && loop != 0) { + len_written = snprintf(ptr, len_string, + ", or "); + len_string -= len_written; + ptr += len_written; + } + len_written = snprintf(ptr, len_string, + "tcp flags is "); + len_string -= len_written; + ptr += len_written; + if (op[6] == 1) { + ptr += snprintf(ptr, len_string, + "not "); + len_string -= len_written; + ptr += len_written; + } + if (op[7] == 1) { + ptr += snprintf(ptr, len_string, + "exactly match "); + len_string -= len_written; + ptr += len_written; + } + ptr += snprintf(ptr, len_string, + "%d", value); + len_string -= len_written; + ptr += len_written; + break; + case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: + /* limitation: stop converting */ + if (*error == -2) + break; + mval->value = value; + if (op[6] == 1) { + /* different from */ + mval->compare_operator |= + OPERATOR_COMPARE_LESS_THAN; + mval->compare_operator |= + OPERATOR_COMPARE_GREATER_THAN; + } else + mval->compare_operator |= + OPERATOR_COMPARE_EQUAL_TO; + if (op[7] == 1) + mval->compare_operator |= + OPERATOR_COMPARE_EXACT_MATCH; + if (op[1] == 1) + mval->unary_operator = + OPERATOR_UNARY_AND; + else + mval->unary_operator = + OPERATOR_UNARY_OR; + mval++; + break; + case BGP_FLOWSPEC_VALIDATE_ONLY: + default: + /* no action */ + break; + } + offset += value_size; + loop++; + } while (op[0] == 0 && offset < max_len - 1); + if (offset > max_len) + *error = -1; + /* use error parameter to count the number of entries */ + if (*error == 0) + *error = loop; + return offset; +} + +/* + * handle the flowspec fragment type field + * return error (returned values are invalid) or number of bytes analysed + * -1 if error in decoding + * >= 0 : number of bytes analysed (ok). + */ +int bgp_flowspec_fragment_type_decode(enum bgp_flowspec_util_nlri_t type, + uint8_t *nlri_ptr, + uint32_t max_len, + void *result, int *error) +{ + int op[8]; + int len, value, value_size, loop = 0; + char *ptr = (char *)result; /* for return_string */ + struct bgp_pbr_fragment_val *mval = + (struct bgp_pbr_fragment_val *)result; + uint32_t offset = 0; + int len_string = BGP_FLOWSPEC_STRING_DISPLAY_MAX; + int len_written; + + *error = 0; + do { + hex2bin(&nlri_ptr[offset], op); + offset++; + len = 2 * op[2] + op[3]; + value_size = 1 << len; + value = hexstr2num(&nlri_ptr[offset], value_size); + if (value != 1 && value != 2 && value != 4 && value != 8) + *error = -1; + offset += value_size; + /* TODO : as per RFC5574 : first Fragment bits are Reserved + * does that mean that it is not possible + * to handle multiple occurences ? + * as of today, we only grab the first TCP fragment + */ + if (loop) { + *error = -2; + loop++; + continue; + } + switch (type) { + case BGP_FLOWSPEC_RETURN_STRING: + switch (value) { + case 1: + len_written = snprintf(ptr, len_string, + "dont-fragment"); + len_string -= len_written; + ptr += len_written; + break; + case 2: + len_written = snprintf(ptr, len_string, + "is-fragment"); + len_string -= len_written; + ptr += len_written; + break; + case 4: + len_written = snprintf(ptr, len_string, + "first-fragment"); + len_string -= len_written; + ptr += len_written; + break; + case 8: + len_written = snprintf(ptr, len_string, + "last-fragment"); + len_string -= len_written; + ptr += len_written; + break; + default: + {} + } + break; + case BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE: + mval->bitmask = (uint8_t)value; + break; + case BGP_FLOWSPEC_VALIDATE_ONLY: + default: + /* no action */ + break; + } + loop++; + } while (op[0] == 0 && offset < max_len - 1); + if (offset > max_len) + *error = -1; + return offset; +} + +int bgp_flowspec_match_rules_fill(uint8_t *nlri_content, int len, + struct bgp_pbr_entry_main *bpem) +{ + int offset = 0, error = 0; + struct prefix *prefix; + struct bgp_pbr_match_val *mval; + uint8_t *match_num; + uint8_t bitmask = 0; + int ret = 0, type; + + while (offset < len - 1 && error >= 0) { + type = nlri_content[offset]; + offset++; + switch (type) { + case FLOWSPEC_DEST_PREFIX: + case FLOWSPEC_SRC_PREFIX: + bitmask = 0; + if (type == FLOWSPEC_DEST_PREFIX) { + bitmask |= PREFIX_DST_PRESENT; + prefix = &bpem->dst_prefix; + } else { + bitmask |= PREFIX_SRC_PRESENT; + prefix = &bpem->src_prefix; + } + ret = bgp_flowspec_ip_address( + BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE, + nlri_content + offset, + len - offset, + prefix, &error); + if (error < 0) + zlog_err("%s: flowspec_ip_address error %d", + __func__, error); + else + bpem->match_bitmask |= bitmask; + offset += ret; + break; + case FLOWSPEC_IP_PROTOCOL: + match_num = &(bpem->match_protocol_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->protocol); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; + case FLOWSPEC_PORT: + match_num = &(bpem->match_port_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->port); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; + case FLOWSPEC_DEST_PORT: + match_num = &(bpem->match_dst_port_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->dst_port); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; + case FLOWSPEC_SRC_PORT: + match_num = &(bpem->match_src_port_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->src_port); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; + case FLOWSPEC_ICMP_TYPE: + match_num = &(bpem->match_icmp_type_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->icmp_type); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; + case FLOWSPEC_ICMP_CODE: + match_num = &(bpem->match_icmp_code_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->icmp_code); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; + case FLOWSPEC_PKT_LEN: + match_num = + &(bpem->match_packet_length_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->packet_length); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; + case FLOWSPEC_DSCP: + match_num = &(bpem->match_dscp_num); + mval = (struct bgp_pbr_match_val *) + &(bpem->dscp); + offset += bgp_flowspec_call_non_opaque_decode( + nlri_content + offset, + len - offset, + mval, match_num, + &error); + break; + case FLOWSPEC_TCP_FLAGS: + ret = bgp_flowspec_tcpflags_decode( + BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE, + nlri_content + offset, + len - offset, + &bpem->tcpflags, &error); + if (error < 0) + zlog_err("%s: flowspec_tcpflags_decode error %d", + __func__, error); + else + bpem->match_tcpflags_num = error; + /* contains the number of slots used */ + offset += ret; + break; + case FLOWSPEC_FRAGMENT: + ret = bgp_flowspec_fragment_type_decode( + BGP_FLOWSPEC_CONVERT_TO_NON_OPAQUE, + nlri_content + offset, + len - offset, &bpem->fragment, + &error); + if (error < 0) + zlog_err("%s: flowspec_fragment_type_decode error %d", + __func__, error); + else + bpem->match_bitmask |= FRAGMENT_PRESENT; + offset += ret; + break; + default: + zlog_err("%s: unknown type %d\n", __func__, type); + } + } + return error; +} + + struct bgp_node *bgp_flowspec_get_match_per_ip(afi_t afi, struct bgp_table *rib, struct prefix *match, diff --git a/bgpd/bgp_flowspec_util.h b/bgpd/bgp_flowspec_util.h index aa21461102..e4454ab4db 100644 --- a/bgpd/bgp_flowspec_util.h +++ b/bgpd/bgp_flowspec_util.h @@ -50,6 +50,9 @@ extern int bgp_flowspec_fragment_type_decode(enum bgp_flowspec_util_nlri_t type, uint8_t *nlri_ptr, uint32_t max_len, void *result, int *error); +struct bgp_pbr_entry_main; +extern int bgp_flowspec_match_rules_fill(uint8_t *nlri_content, int len, + struct bgp_pbr_entry_main *bpem); extern struct bgp_node *bgp_flowspec_get_match_per_ip(afi_t afi, struct bgp_table *rib, diff --git a/bgpd/bgp_pbr.c b/bgpd/bgp_pbr.c new file mode 100644 index 0000000000..04d6314fd7 --- /dev/null +++ b/bgpd/bgp_pbr.c @@ -0,0 +1,1140 @@ +/* + * BGP pbr + * Copyright (C) 6WIND + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "zebra.h" +#include "prefix.h" +#include "zclient.h" +#include "jhash.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_pbr.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_flowspec_util.h" +#include "bgpd/bgp_ecommunity.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_zebra.h" +#include "bgpd/bgp_mplsvpn.h" + +DEFINE_MTYPE_STATIC(BGPD, PBR_MATCH_ENTRY, "PBR match entry") +DEFINE_MTYPE_STATIC(BGPD, PBR_MATCH, "PBR match") +DEFINE_MTYPE_STATIC(BGPD, PBR_ACTION, "PBR action") + +static int bgp_pbr_match_counter_unique; +static int bgp_pbr_match_entry_counter_unique; +static int bgp_pbr_action_counter_unique; +static int bgp_pbr_match_iptable_counter_unique; + +struct bgp_pbr_match_iptable_unique { + uint32_t unique; + struct bgp_pbr_match *bpm_found; +}; + +struct bgp_pbr_match_entry_unique { + uint32_t unique; + struct bgp_pbr_match_entry *bpme_found; +}; + +struct bgp_pbr_action_unique { + uint32_t unique; + struct bgp_pbr_action *bpa_found; +}; + +static int bgp_pbr_action_walkcb(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_action *bpa = (struct bgp_pbr_action *)backet->data; + struct bgp_pbr_action_unique *bpau = (struct bgp_pbr_action_unique *) + arg; + uint32_t unique = bpau->unique; + + if (bpa->unique == unique) { + bpau->bpa_found = bpa; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +static int bgp_pbr_match_entry_walkcb(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_match_entry *bpme = + (struct bgp_pbr_match_entry *)backet->data; + struct bgp_pbr_match_entry_unique *bpmeu = + (struct bgp_pbr_match_entry_unique *)arg; + uint32_t unique = bpmeu->unique; + + if (bpme->unique == unique) { + bpmeu->bpme_found = bpme; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +struct bgp_pbr_match_ipsetname { + char *ipsetname; + struct bgp_pbr_match *bpm_found; +}; + +static int bgp_pbr_match_pername_walkcb(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_match *bpm = (struct bgp_pbr_match *)backet->data; + struct bgp_pbr_match_ipsetname *bpmi = + (struct bgp_pbr_match_ipsetname *)arg; + char *ipset_name = bpmi->ipsetname; + + if (!strncmp(ipset_name, bpm->ipset_name, + ZEBRA_IPSET_NAME_SIZE)) { + bpmi->bpm_found = bpm; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +static int bgp_pbr_match_iptable_walkcb(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_match *bpm = (struct bgp_pbr_match *)backet->data; + struct bgp_pbr_match_iptable_unique *bpmiu = + (struct bgp_pbr_match_iptable_unique *)arg; + uint32_t unique = bpmiu->unique; + + if (bpm->unique2 == unique) { + bpmiu->bpm_found = bpm; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +struct bgp_pbr_match_unique { + uint32_t unique; + struct bgp_pbr_match *bpm_found; +}; + +static int bgp_pbr_match_walkcb(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_match *bpm = (struct bgp_pbr_match *)backet->data; + struct bgp_pbr_match_unique *bpmu = (struct bgp_pbr_match_unique *) + arg; + uint32_t unique = bpmu->unique; + + if (bpm->unique == unique) { + bpmu->bpm_found = bpm; + return HASHWALK_ABORT; + } + return HASHWALK_CONTINUE; +} + +static int sprintf_bgp_pbr_match_val(char *str, struct bgp_pbr_match_val *mval, + const char *prepend) +{ + char *ptr = str; + + if (prepend) + ptr += sprintf(ptr, "%s", prepend); + else { + if (mval->unary_operator & OPERATOR_UNARY_OR) + ptr += sprintf(ptr, ", or "); + if (mval->unary_operator & OPERATOR_UNARY_AND) + ptr += sprintf(ptr, ", and "); + } + if (mval->compare_operator & OPERATOR_COMPARE_LESS_THAN) + ptr += sprintf(ptr, "<"); + if (mval->compare_operator & OPERATOR_COMPARE_GREATER_THAN) + ptr += sprintf(ptr, ">"); + if (mval->compare_operator & OPERATOR_COMPARE_EQUAL_TO) + ptr += sprintf(ptr, "="); + if (mval->compare_operator & OPERATOR_COMPARE_EXACT_MATCH) + ptr += sprintf(ptr, "match"); + ptr += sprintf(ptr, " %u", mval->value); + return (int)(ptr - str); +} + +#define INCREMENT_DISPLAY(_ptr, _cnt) do { \ + if (_cnt) \ + (_ptr) += sprintf((_ptr), "; "); \ + _cnt++; \ + } while (0) + +/* return 1 if OK, 0 if validation should stop) */ +static int bgp_pbr_validate_policy_route(struct bgp_pbr_entry_main *api) +{ + /* because bgp pbr entry may contain unsupported + * combinations, a message will be displayed here if + * not supported. + * for now, only match/set supported is + * - combination src/dst => redirect nexthop [ + rate] + * - combination src/dst => redirect VRF [ + rate] + * - combination src/dst => drop + */ + if (api->match_src_port_num || api->match_dst_port_num + || api->match_port_num || api->match_protocol_num + || api->match_icmp_type_num || api->match_icmp_type_num + || api->match_packet_length_num || api->match_dscp_num + || api->match_tcpflags_num) { + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_debug("BGP: some SET actions not supported by Zebra. ignoring."); + } + return 0; + } + if (!(api->match_bitmask & PREFIX_SRC_PRESENT) && + !(api->match_bitmask & PREFIX_DST_PRESENT)) { + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_debug("BGP: match actions without src" + " or dst address can not operate." + " ignoring."); + } + return 0; + } + return 1; +} + +/* return -1 if build or validation failed */ +static int bgp_pbr_build_and_validate_entry(struct prefix *p, + struct bgp_info *info, + struct bgp_pbr_entry_main *api) +{ + int ret; + int i, action_count = 0; + struct ecommunity *ecom; + struct ecommunity_val *ecom_eval; + struct bgp_pbr_entry_action *api_action; + struct prefix *src = NULL, *dst = NULL; + int valid_prefix = 0; + afi_t afi = AFI_IP; + + /* extract match from flowspec entries */ + ret = bgp_flowspec_match_rules_fill((uint8_t *)p->u.prefix_flowspec.ptr, + p->u.prefix_flowspec.prefixlen, api); + if (ret < 0) + return -1; + /* extract actiosn from flowspec ecom list */ + if (info && info->attr && info->attr->ecommunity) { + ecom = info->attr->ecommunity; + for (i = 0; i < ecom->size; i++) { + ecom_eval = (struct ecommunity_val *) + ecom->val + (i * ECOMMUNITY_SIZE); + + if (action_count > ACTIONS_MAX_NUM) { + if (BGP_DEBUG(pbr, PBR_ERROR)) + zlog_err("%s: flowspec actions exceeds limit (max %u)", + __func__, action_count); + break; + } + api_action = &api->actions[action_count]; + + if ((ecom_eval->val[1] == + (char)ECOMMUNITY_REDIRECT_VRF) && + (ecom_eval->val[0] == + (char)ECOMMUNITY_ENCODE_TRANS_EXP || + ecom_eval->val[0] == + (char)ECOMMUNITY_EXTENDED_COMMUNITY_PART_2 || + ecom_eval->val[0] == + (char)ECOMMUNITY_EXTENDED_COMMUNITY_PART_3)) { + struct ecommunity *eckey = ecommunity_new(); + struct ecommunity_val ecom_copy; + + memcpy(&ecom_copy, ecom_eval, + sizeof(struct ecommunity_val)); + ecom_copy.val[0] &= + ~ECOMMUNITY_ENCODE_TRANS_EXP; + ecom_copy.val[1] = ECOMMUNITY_ROUTE_TARGET; + ecommunity_add_val(eckey, &ecom_copy); + + api_action->action = ACTION_REDIRECT; + api_action->u.redirect_vrf = + get_first_vrf_for_redirect_with_rt( + eckey); + ecommunity_free(&eckey); + } else if ((ecom_eval->val[0] == + (char)ECOMMUNITY_ENCODE_REDIRECT_IP_NH) && + (ecom_eval->val[1] == + (char)ECOMMUNITY_REDIRECT_IP_NH)) { + api_action->action = ACTION_REDIRECT_IP; + api_action->u.zr.redirect_ip_v4.s_addr = + info->attr->nexthop.s_addr; + api_action->u.zr.duplicate = ecom_eval->val[7]; + } else { + if (ecom_eval->val[0] != + (char)ECOMMUNITY_ENCODE_TRANS_EXP) + continue; + ret = ecommunity_fill_pbr_action(ecom_eval, + api_action); + if (ret != 0) + continue; + } + api->action_num++; + } + } + + /* validate if incoming matc/action is compatible + * with our policy routing engine + */ + if (!bgp_pbr_validate_policy_route(api)) + return -1; + + /* check inconsistency in the match rule */ + if (api->match_bitmask & PREFIX_SRC_PRESENT) { + src = &api->src_prefix; + afi = family2afi(src->family); + valid_prefix = 1; + } + if (api->match_bitmask & PREFIX_DST_PRESENT) { + dst = &api->dst_prefix; + if (valid_prefix && afi != family2afi(dst->family)) { + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_debug("%s: inconsistency:" + " no match for afi src and dst (%u/%u)", + __func__, afi, family2afi(dst->family)); + } + return -1; + } + } + return 0; +} + +static void bgp_pbr_match_entry_free(void *arg) +{ + struct bgp_pbr_match_entry *bpme; + + bpme = (struct bgp_pbr_match_entry *)arg; + + if (bpme->installed) { + bgp_send_pbr_ipset_entry_match(bpme, false); + bpme->installed = false; + bpme->backpointer = NULL; + } + XFREE(MTYPE_PBR_MATCH_ENTRY, bpme); +} + +static void bgp_pbr_match_free(void *arg) +{ + struct bgp_pbr_match *bpm; + + bpm = (struct bgp_pbr_match *)arg; + + hash_clean(bpm->entry_hash, bgp_pbr_match_entry_free); + + if (hashcount(bpm->entry_hash) == 0) { + /* delete iptable entry first */ + /* then delete ipset match */ + if (bpm->installed) { + if (bpm->installed_in_iptable) { + bgp_send_pbr_iptable(bpm->action, + bpm, false); + bpm->installed_in_iptable = false; + bpm->action->refcnt--; + } + bgp_send_pbr_ipset_match(bpm, false); + bpm->installed = false; + bpm->action = NULL; + } + } + hash_free(bpm->entry_hash); + + XFREE(MTYPE_PBR_MATCH, bpm); +} + +static void *bgp_pbr_match_alloc_intern(void *arg) +{ + struct bgp_pbr_match *bpm, *new; + + bpm = (struct bgp_pbr_match *)arg; + + new = XCALLOC(MTYPE_PBR_MATCH, sizeof(*new)); + memcpy(new, bpm, sizeof(*bpm)); + + return new; +} + +static void bgp_pbr_action_free(void *arg) +{ + struct bgp_pbr_action *bpa; + + bpa = (struct bgp_pbr_action *)arg; + + if (bpa->refcnt == 0) { + if (bpa->installed && bpa->table_id != 0) { + bgp_send_pbr_rule_action(bpa, false); + bgp_zebra_announce_default(bpa->bgp, &(bpa->nh), + AFI_IP, + bpa->table_id, + false); + } + } + XFREE(MTYPE_PBR_ACTION, bpa); +} + +static void *bgp_pbr_action_alloc_intern(void *arg) +{ + struct bgp_pbr_action *bpa, *new; + + bpa = (struct bgp_pbr_action *)arg; + + new = XCALLOC(MTYPE_PBR_ACTION, sizeof(*new)); + + memcpy(new, bpa, sizeof(*bpa)); + + return new; +} + +static void *bgp_pbr_match_entry_alloc_intern(void *arg) +{ + struct bgp_pbr_match_entry *bpme, *new; + + bpme = (struct bgp_pbr_match_entry *)arg; + + new = XCALLOC(MTYPE_PBR_MATCH_ENTRY, sizeof(*new)); + + memcpy(new, bpme, sizeof(*bpme)); + + return new; +} + +uint32_t bgp_pbr_match_hash_key(void *arg) +{ + struct bgp_pbr_match *pbm = (struct bgp_pbr_match *)arg; + uint32_t key; + + key = jhash_1word(pbm->vrf_id, 0x4312abde); + key = jhash_1word(pbm->flags, key); + return jhash_1word(pbm->type, key); +} + +int bgp_pbr_match_hash_equal(const void *arg1, const void *arg2) +{ + const struct bgp_pbr_match *r1, *r2; + + r1 = (const struct bgp_pbr_match *)arg1; + r2 = (const struct bgp_pbr_match *)arg2; + + if (r1->vrf_id != r2->vrf_id) + return 0; + + if (r1->type != r2->type) + return 0; + + if (r1->flags != r2->flags) + return 0; + + if (r1->action != r2->action) + return 0; + + return 1; +} + +uint32_t bgp_pbr_match_entry_hash_key(void *arg) +{ + struct bgp_pbr_match_entry *pbme; + uint32_t key; + + pbme = (struct bgp_pbr_match_entry *)arg; + key = prefix_hash_key(&pbme->src); + key = jhash_1word(prefix_hash_key(&pbme->dst), key); + + return key; +} + +int bgp_pbr_match_entry_hash_equal(const void *arg1, const void *arg2) +{ + const struct bgp_pbr_match_entry *r1, *r2; + + r1 = (const struct bgp_pbr_match_entry *)arg1; + r2 = (const struct bgp_pbr_match_entry *)arg2; + + /* on updates, comparing + * backpointer is not necessary + */ + + /* unique value is self calculated + */ + + /* rate is ignored for now + */ + + if (!prefix_same(&r1->src, &r2->src)) + return 0; + + if (!prefix_same(&r1->dst, &r2->dst)) + return 0; + + return 1; +} + +uint32_t bgp_pbr_action_hash_key(void *arg) +{ + struct bgp_pbr_action *pbra; + uint32_t key; + + pbra = (struct bgp_pbr_action *)arg; + key = jhash_1word(pbra->table_id, 0x4312abde); + key = jhash_1word(pbra->fwmark, key); + return key; +} + +int bgp_pbr_action_hash_equal(const void *arg1, const void *arg2) +{ + const struct bgp_pbr_action *r1, *r2; + + r1 = (const struct bgp_pbr_action *)arg1; + r2 = (const struct bgp_pbr_action *)arg2; + + /* unique value is self calculated + * table and fwmark is self calculated + */ + if (r1->rate != r2->rate) + return 0; + + if (r1->vrf_id != r2->vrf_id) + return 0; + + if (memcmp(&r1->nh, &r2->nh, sizeof(struct nexthop))) + return 0; + return 1; +} + +struct bgp_pbr_action *bgp_pbr_action_rule_lookup(vrf_id_t vrf_id, + uint32_t unique) +{ + struct bgp *bgp = bgp_lookup_by_vrf_id(vrf_id); + struct bgp_pbr_action_unique bpau; + + if (!bgp || unique == 0) + return NULL; + bpau.unique = unique; + bpau.bpa_found = NULL; + hash_walk(bgp->pbr_action_hash, bgp_pbr_action_walkcb, &bpau); + return bpau.bpa_found; +} + +struct bgp_pbr_match *bgp_pbr_match_ipset_lookup(vrf_id_t vrf_id, + uint32_t unique) +{ + struct bgp *bgp = bgp_lookup_by_vrf_id(vrf_id); + struct bgp_pbr_match_unique bpmu; + + if (!bgp || unique == 0) + return NULL; + bpmu.unique = unique; + bpmu.bpm_found = NULL; + hash_walk(bgp->pbr_match_hash, bgp_pbr_match_walkcb, &bpmu); + return bpmu.bpm_found; +} + +struct bgp_pbr_match_entry *bgp_pbr_match_ipset_entry_lookup(vrf_id_t vrf_id, + char *ipset_name, + uint32_t unique) +{ + struct bgp *bgp = bgp_lookup_by_vrf_id(vrf_id); + struct bgp_pbr_match_entry_unique bpmeu; + struct bgp_pbr_match_ipsetname bpmi; + + if (!bgp || unique == 0) + return NULL; + bpmi.ipsetname = XCALLOC(MTYPE_TMP, ZEBRA_IPSET_NAME_SIZE); + snprintf(bpmi.ipsetname, ZEBRA_IPSET_NAME_SIZE, "%s", ipset_name); + bpmi.bpm_found = NULL; + hash_walk(bgp->pbr_match_hash, bgp_pbr_match_pername_walkcb, &bpmi); + XFREE(MTYPE_TMP, bpmi.ipsetname); + if (!bpmi.bpm_found) + return NULL; + bpmeu.bpme_found = NULL; + bpmeu.unique = unique; + hash_walk(bpmi.bpm_found->entry_hash, + bgp_pbr_match_entry_walkcb, &bpmeu); + return bpmeu.bpme_found; +} + +struct bgp_pbr_match *bgp_pbr_match_iptable_lookup(vrf_id_t vrf_id, + uint32_t unique) +{ + struct bgp *bgp = bgp_lookup_by_vrf_id(vrf_id); + struct bgp_pbr_match_iptable_unique bpmiu; + + if (!bgp || unique == 0) + return NULL; + bpmiu.unique = unique; + bpmiu.bpm_found = NULL; + hash_walk(bgp->pbr_match_hash, bgp_pbr_match_iptable_walkcb, &bpmiu); + return bpmiu.bpm_found; +} + +void bgp_pbr_cleanup(struct bgp *bgp) +{ + if (bgp->pbr_match_hash) { + hash_clean(bgp->pbr_match_hash, bgp_pbr_match_free); + hash_free(bgp->pbr_match_hash); + bgp->pbr_match_hash = NULL; + } + if (bgp->pbr_action_hash) { + hash_clean(bgp->pbr_action_hash, bgp_pbr_action_free); + hash_free(bgp->pbr_action_hash); + bgp->pbr_action_hash = NULL; + } +} + +void bgp_pbr_init(struct bgp *bgp) +{ + bgp->pbr_match_hash = + hash_create_size(8, bgp_pbr_match_hash_key, + bgp_pbr_match_hash_equal, + "Match Hash"); + bgp->pbr_action_hash = + hash_create_size(8, bgp_pbr_action_hash_key, + bgp_pbr_action_hash_equal, + "Match Hash Entry"); +} + +void bgp_pbr_print_policy_route(struct bgp_pbr_entry_main *api) +{ + int i = 0; + char return_string[512]; + char *ptr = return_string; + char buff[64]; + int nb_items = 0; + + ptr += sprintf(ptr, "MATCH : "); + if (api->match_bitmask & PREFIX_SRC_PRESENT) { + struct prefix *p = &(api->src_prefix); + + ptr += sprintf(ptr, "@src %s", prefix2str(p, buff, 64)); + INCREMENT_DISPLAY(ptr, nb_items); + } + if (api->match_bitmask & PREFIX_DST_PRESENT) { + struct prefix *p = &(api->dst_prefix); + + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@dst %s", prefix2str(p, buff, 64)); + } + + if (api->match_protocol_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_protocol_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->protocol[i], + i > 0 ? NULL : "@proto "); + + if (api->match_src_port_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_src_port_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->src_port[i], + i > 0 ? NULL : "@srcport "); + + if (api->match_dst_port_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_dst_port_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->dst_port[i], + i > 0 ? NULL : "@dstport "); + + if (api->match_port_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_port_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->port[i], + i > 0 ? NULL : "@port "); + + if (api->match_icmp_type_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_icmp_type_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->icmp_type[i], + i > 0 ? NULL : "@icmptype "); + + if (api->match_icmp_code_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_icmp_code_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->icmp_code[i], + i > 0 ? NULL : "@icmpcode "); + + if (api->match_packet_length_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_packet_length_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->packet_length[i], + i > 0 ? NULL : "@plen "); + + if (api->match_dscp_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_dscp_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->dscp[i], + i > 0 ? NULL : "@dscp "); + + if (api->match_tcpflags_num) + INCREMENT_DISPLAY(ptr, nb_items); + for (i = 0; i < api->match_tcpflags_num; i++) + ptr += sprintf_bgp_pbr_match_val(ptr, &api->tcpflags[i], + i > 0 ? NULL : "@tcpflags "); + + if (api->match_bitmask & FRAGMENT_PRESENT) { + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@fragment %u", api->fragment.bitmask); + } + if (!nb_items) + ptr = return_string; + else + ptr += sprintf(ptr, "; "); + if (api->action_num) + ptr += sprintf(ptr, "SET : "); + nb_items = 0; + for (i = 0; i < api->action_num; i++) { + switch (api->actions[i].action) { + case ACTION_TRAFFICRATE: + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@set rate %f", + api->actions[i].u.r.rate); + break; + case ACTION_TRAFFIC_ACTION: + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@action "); + if (api->actions[i].u.za.filter + & TRAFFIC_ACTION_TERMINATE) + ptr += sprintf(ptr, + " terminate (apply filter(s))"); + if (api->actions[i].u.za.filter + & TRAFFIC_ACTION_DISTRIBUTE) + ptr += sprintf(ptr, " distribute"); + if (api->actions[i].u.za.filter + & TRAFFIC_ACTION_SAMPLE) + ptr += sprintf(ptr, " sample"); + break; + case ACTION_REDIRECT_IP: + INCREMENT_DISPLAY(ptr, nb_items); + char local_buff[INET_ADDRSTRLEN]; + + if (inet_ntop(AF_INET, + &api->actions[i].u.zr.redirect_ip_v4, + local_buff, INET_ADDRSTRLEN) != NULL) + ptr += sprintf(ptr, + "@redirect ip nh %s", local_buff); + break; + case ACTION_REDIRECT: + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@redirect vrf %u", + api->actions[i].u.redirect_vrf); + break; + case ACTION_MARKING: + INCREMENT_DISPLAY(ptr, nb_items); + ptr += sprintf(ptr, "@set dscp %u", + api->actions[i].u.marking_dscp); + break; + default: + break; + } + } + zlog_info("%s", return_string); +} + +static void bgp_pbr_flush_entry(struct bgp *bgp, struct bgp_pbr_action *bpa, + struct bgp_pbr_match *bpm, + struct bgp_pbr_match_entry *bpme) +{ + /* if bpme is null, bpm is also null + */ + if (bpme == NULL) + return; + /* ipset del entry */ + if (bpme->installed) { + bgp_send_pbr_ipset_entry_match(bpme, false); + bpme->installed = false; + bpme->backpointer = NULL; + } + hash_release(bpm->entry_hash, bpme); + if (hashcount(bpm->entry_hash) == 0) { + /* delete iptable entry first */ + /* then delete ipset match */ + if (bpm->installed) { + if (bpm->installed_in_iptable) { + bgp_send_pbr_iptable(bpm->action, + bpm, false); + bpm->installed_in_iptable = false; + bpm->action->refcnt--; + } + bgp_send_pbr_ipset_match(bpm, false); + bpm->installed = false; + bpm->action = NULL; + } + hash_release(bgp->pbr_match_hash, bpm); + /* XXX release pbr_match_action if not used + * note that drop does not need to call send_pbr_action + */ + } + if (bpa->refcnt == 0) { + if (bpa->installed && bpa->table_id != 0) { + bgp_send_pbr_rule_action(bpa, false); + bgp_zebra_announce_default(bpa->bgp, &(bpa->nh), + AFI_IP, + bpa->table_id, + false); + } + } +} + +struct bgp_pbr_match_entry_remain { + struct bgp_pbr_match_entry *bpme_to_match; + struct bgp_pbr_match_entry *bpme_found; +}; + +static int bgp_pbr_get_remaining_entry(struct hash_backet *backet, void *arg) +{ + struct bgp_pbr_match *bpm = (struct bgp_pbr_match *)backet->data; + struct bgp_pbr_match_entry_remain *bpmer = + (struct bgp_pbr_match_entry_remain *)arg; + struct bgp_pbr_match *bpm_temp; + struct bgp_pbr_match_entry *bpme = bpmer->bpme_to_match; + + if (!bpme->backpointer || + bpm == bpme->backpointer || + bpme->backpointer->action == bpm->action) + return HASHWALK_CONTINUE; + /* ensure bpm other characteristics are equal */ + bpm_temp = bpme->backpointer; + if (bpm_temp->vrf_id != bpm->vrf_id || + bpm_temp->type != bpm->type || + bpm_temp->flags != bpm->flags) + return HASHWALK_CONTINUE; + + /* look for remaining bpme */ + bpmer->bpme_found = hash_lookup(bpm->entry_hash, bpme); + if (!bpmer->bpme_found) + return HASHWALK_CONTINUE; + return HASHWALK_ABORT; +} + +static void bgp_pbr_policyroute_remove_from_zebra(struct bgp *bgp, + struct bgp_info *binfo, + vrf_id_t vrf_id, + struct prefix *src, + struct prefix *dst) +{ + struct bgp_pbr_match temp; + struct bgp_pbr_match_entry temp2; + struct bgp_pbr_match *bpm; + struct bgp_pbr_match_entry *bpme; + struct bgp_pbr_match_entry_remain bpmer; + + /* as we don't know information from EC + * look for bpm that have the bpm + * with vrf_id characteristics + */ + memset(&temp2, 0, sizeof(temp2)); + memset(&temp, 0, sizeof(temp)); + if (src) { + temp.flags |= MATCH_IP_SRC_SET; + prefix_copy(&temp2.src, src); + } else + temp2.src.family = AF_INET; + if (dst) { + temp.flags |= MATCH_IP_DST_SET; + prefix_copy(&temp2.dst, dst); + } else + temp2.dst.family = AF_INET; + + if (src == NULL || dst == NULL) + temp.type = IPSET_NET; + else + temp.type = IPSET_NET_NET; + if (vrf_id == VRF_UNKNOWN) /* XXX case BGP destroy */ + temp.vrf_id = 0; + else + temp.vrf_id = vrf_id; + bpme = &temp2; + bpm = &temp; + bpme->backpointer = bpm; + /* right now, a previous entry may already exist + * flush previous entry if necessary + */ + bpmer.bpme_to_match = bpme; + bpmer.bpme_found = NULL; + hash_walk(bgp->pbr_match_hash, bgp_pbr_get_remaining_entry, &bpmer); + if (bpmer.bpme_found) { + static struct bgp_pbr_match *local_bpm; + static struct bgp_pbr_action *local_bpa; + + local_bpm = bpmer.bpme_found->backpointer; + local_bpa = local_bpm->action; + bgp_pbr_flush_entry(bgp, local_bpa, + local_bpm, bpmer.bpme_found); + } +} + +static void bgp_pbr_policyroute_add_to_zebra(struct bgp *bgp, + struct bgp_info *binfo, + vrf_id_t vrf_id, + struct prefix *src, + struct prefix *dst, + struct nexthop *nh, + float *rate) +{ + struct bgp_pbr_match temp; + struct bgp_pbr_match_entry temp2; + struct bgp_pbr_match *bpm; + struct bgp_pbr_match_entry *bpme = NULL; + struct bgp_pbr_action temp3; + struct bgp_pbr_action *bpa = NULL; + struct bgp_pbr_match_entry_remain bpmer; + + /* look for bpa first */ + memset(&temp3, 0, sizeof(temp3)); + if (rate) + temp3.rate = *rate; + if (nh) + memcpy(&temp3.nh, nh, sizeof(struct nexthop)); + temp3.vrf_id = vrf_id; + bpa = hash_get(bgp->pbr_action_hash, &temp3, + bgp_pbr_action_alloc_intern); + + if (bpa->fwmark == 0) { + /* drop is handled by iptable */ + if (nh && nh->type == NEXTHOP_TYPE_BLACKHOLE) { + bpa->table_id = 0; + bpa->installed = true; + } else { + bpa->fwmark = bgp_zebra_tm_get_id(); + bpa->table_id = bpa->fwmark; + bpa->installed = false; + } + bpa->bgp = bgp; + bpa->unique = ++bgp_pbr_action_counter_unique; + /* 0 value is forbidden */ + bpa->install_in_progress = false; + } + + /* then look for bpm */ + memset(&temp, 0, sizeof(temp)); + if (src == NULL || dst == NULL) + temp.type = IPSET_NET; + else + temp.type = IPSET_NET_NET; + temp.vrf_id = vrf_id; + if (src) + temp.flags |= MATCH_IP_SRC_SET; + if (dst) + temp.flags |= MATCH_IP_DST_SET; + temp.action = bpa; + bpm = hash_get(bgp->pbr_match_hash, &temp, + bgp_pbr_match_alloc_intern); + + /* new, then self allocate ipset_name and unique */ + if (bpm && bpm->unique == 0) { + bpm->unique = ++bgp_pbr_match_counter_unique; + /* 0 value is forbidden */ + sprintf(bpm->ipset_name, "match%p", bpm); + bpm->entry_hash = hash_create_size(8, + bgp_pbr_match_entry_hash_key, + bgp_pbr_match_entry_hash_equal, + "Match Entry Hash"); + bpm->installed = false; + + /* unique2 should be updated too */ + bpm->unique2 = ++bgp_pbr_match_iptable_counter_unique; + bpm->installed_in_iptable = false; + bpm->install_in_progress = false; + bpm->install_iptable_in_progress = false; + } + + memset(&temp2, 0, sizeof(temp2)); + if (src) + prefix_copy(&temp2.src, src); + else + temp2.src.family = AF_INET; + if (dst) + prefix_copy(&temp2.dst, dst); + else + temp2.dst.family = AF_INET; + if (bpm) + bpme = hash_get(bpm->entry_hash, &temp2, + bgp_pbr_match_entry_alloc_intern); + if (bpme && bpme->unique == 0) { + bpme->unique = ++bgp_pbr_match_entry_counter_unique; + /* 0 value is forbidden */ + bpme->backpointer = bpm; + bpme->installed = false; + bpme->install_in_progress = false; + } + + /* BGP FS: append entry to zebra + * - policies are not routing entries and as such + * route replace semantics don't necessarily follow + * through to policy entries + * - because of that, not all policing information will be stored + * into zebra. and non selected policies will be suppressed from zebra + * - as consequence, in order to bring consistency + * a policy will be added, then ifan ecmp policy exists, + * it will be suppressed subsequently + */ + /* ip rule add */ + if (!bpa->installed) { + bgp_send_pbr_rule_action(bpa, true); + bgp_zebra_announce_default(bgp, nh, + AFI_IP, bpa->table_id, true); + } + + /* ipset create */ + if (bpm && !bpm->installed) + bgp_send_pbr_ipset_match(bpm, true); + /* ipset add */ + if (bpme && !bpme->installed) + bgp_send_pbr_ipset_entry_match(bpme, true); + + /* iptables */ + if (bpm && !bpm->installed_in_iptable) + bgp_send_pbr_iptable(bpa, bpm, true); + + /* A previous entry may already exist + * flush previous entry if necessary + */ + bpmer.bpme_to_match = bpme; + bpmer.bpme_found = NULL; + hash_walk(bgp->pbr_match_hash, bgp_pbr_get_remaining_entry, &bpmer); + if (bpmer.bpme_found) { + static struct bgp_pbr_match *local_bpm; + static struct bgp_pbr_action *local_bpa; + + local_bpm = bpmer.bpme_found->backpointer; + local_bpa = local_bpm->action; + bgp_pbr_flush_entry(bgp, local_bpa, + local_bpm, bpmer.bpme_found); + } + + +} + +static void bgp_pbr_handle_entry(struct bgp *bgp, + struct bgp_info *binfo, + struct bgp_pbr_entry_main *api, + bool add) +{ + struct nexthop nh; + int i = 0; + int continue_loop = 1; + float rate = 0; + struct prefix *src = NULL, *dst = NULL; + + if (api->match_bitmask & PREFIX_SRC_PRESENT) + src = &api->src_prefix; + if (api->match_bitmask & PREFIX_DST_PRESENT) + dst = &api->dst_prefix; + memset(&nh, 0, sizeof(struct nexthop)); + nh.vrf_id = VRF_UNKNOWN; + + if (!add) + return bgp_pbr_policyroute_remove_from_zebra(bgp, binfo, + api->vrf_id, src, dst); + /* no action for add = true */ + for (i = 0; i < api->action_num; i++) { + switch (api->actions[i].action) { + case ACTION_TRAFFICRATE: + /* drop packet */ + if (api->actions[i].u.r.rate == 0) { + nh.vrf_id = api->vrf_id; + nh.type = NEXTHOP_TYPE_BLACKHOLE; + bgp_pbr_policyroute_add_to_zebra(bgp, binfo, + api->vrf_id, src, dst, + &nh, &rate); + } else { + /* update rate. can be reentrant */ + rate = api->actions[i].u.r.rate; + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_warn("PBR: ignoring Set action rate %f", + api->actions[i].u.r.rate); + } + } + break; + case ACTION_TRAFFIC_ACTION: + if (api->actions[i].u.za.filter + & TRAFFIC_ACTION_SAMPLE) { + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_warn("PBR: Sample action Ignored"); + } + } +#if 0 + if (api->actions[i].u.za.filter + & TRAFFIC_ACTION_DISTRIBUTE) { + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_warn("PBR: Distribute action Applies"); + } + continue_loop = 0; + /* continue forwarding entry as before + * no action + */ + } +#endif /* XXX to confirm behaviour of traffic action. for now , ignore */ + /* terminate action: run other filters + */ + break; + case ACTION_REDIRECT_IP: + nh.type = NEXTHOP_TYPE_IPV4; + nh.gate.ipv4.s_addr = + api->actions[i].u.zr.redirect_ip_v4.s_addr; + nh.vrf_id = api->vrf_id; + bgp_pbr_policyroute_add_to_zebra(bgp, binfo, + api->vrf_id, + src, dst, + &nh, &rate); + /* XXX combination with REDIRECT_VRF + * + REDIRECT_NH_IP not done + */ + continue_loop = 0; + break; + case ACTION_REDIRECT: + nh.vrf_id = api->actions[i].u.redirect_vrf; + nh.type = NEXTHOP_TYPE_IPV4; + bgp_pbr_policyroute_add_to_zebra(bgp, binfo, + api->vrf_id, + src, dst, + &nh, &rate); + continue_loop = 0; + break; + case ACTION_MARKING: + if (BGP_DEBUG(pbr, PBR)) { + bgp_pbr_print_policy_route(api); + zlog_warn("PBR: Set DSCP %u Ignored", + api->actions[i].u.marking_dscp); + } + break; + default: + break; + } + if (continue_loop == 0) + break; + } +} + +void bgp_pbr_update_entry(struct bgp *bgp, struct prefix *p, + struct bgp_info *info, afi_t afi, safi_t safi, + bool nlri_update) +{ + struct bgp_pbr_entry_main api; + + if (afi == AFI_IP6) + return; /* IPv6 not supported */ + if (safi != SAFI_FLOWSPEC) + return; /* not supported */ + /* Make Zebra API structure. */ + memset(&api, 0, sizeof(api)); + api.vrf_id = bgp->vrf_id; + api.afi = afi; + + if (bgp_pbr_build_and_validate_entry(p, info, &api) < 0) { + if (BGP_DEBUG(pbr, PBR_ERROR)) + zlog_err("%s: cancel updating entry in bgp pbr", + __func__); + return; + } + bgp_pbr_handle_entry(bgp, info, &api, nlri_update); +} diff --git a/bgpd/bgp_pbr.h b/bgpd/bgp_pbr.h new file mode 100644 index 0000000000..5129ada37b --- /dev/null +++ b/bgpd/bgp_pbr.h @@ -0,0 +1,256 @@ +/* + * BGP pbr + * Copyright (C) 6WIND + * + * FRR is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * FRR is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __BGP_PBR_H__ +#define __BGP_PBR_H__ + +#include "nexthop.h" +#include "zclient.h" + +/* flowspec case: 0 to 3 actions maximum: + * 1 redirect + * 1 set dscp + * 1 set traffic rate + */ +#define ACTIONS_MAX_NUM 4 +enum bgp_pbr_action_enum { + ACTION_TRAFFICRATE = 1, + ACTION_TRAFFIC_ACTION = 2, + ACTION_REDIRECT = 3, + ACTION_MARKING = 4, + ACTION_REDIRECT_IP = 5 +}; + +#define TRAFFIC_ACTION_SAMPLE (1 << 0) +#define TRAFFIC_ACTION_TERMINATE (1 << 1) +#define TRAFFIC_ACTION_DISTRIBUTE (1 << 2) + +#define OPERATOR_COMPARE_LESS_THAN (1<<1) +#define OPERATOR_COMPARE_GREATER_THAN (1<<2) +#define OPERATOR_COMPARE_EQUAL_TO (1<<3) +#define OPERATOR_COMPARE_EXACT_MATCH (1<<4) + +#define OPERATOR_UNARY_OR (1<<1) +#define OPERATOR_UNARY_AND (1<<2) + +/* struct used to store values [0;65535] + * this can be used for port number of protocol + */ +#define BGP_PBR_MATCH_VAL_MAX 5 + +struct bgp_pbr_match_val { + uint16_t value; + uint8_t compare_operator; + uint8_t unary_operator; +} bgp_pbr_value_t; + +#define FRAGMENT_DONT 1 +#define FRAGMENT_IS 2 +#define FRAGMENT_FIRST 4 +#define FRAGMENT_LAST 8 + +struct bgp_pbr_fragment_val { + uint8_t bitmask; +}; + +struct bgp_pbr_entry_action { + /* used to store enum bgp_pbr_action_enum enumerate */ + uint8_t action; + union { + union { + uint8_t rate_info[4]; /* IEEE.754.1985 */ + float rate; + } r __attribute__((aligned(8))); + struct _pbr_action { + uint8_t do_sample; + uint8_t filter; + } za; + vrf_id_t redirect_vrf; + struct _pbr_redirect_ip { + struct in_addr redirect_ip_v4; + uint8_t duplicate; + } zr; + uint8_t marking_dscp; + } u __attribute__((aligned(8))); +}; + +/* BGP Policy Route structure */ +struct bgp_pbr_entry_main { + uint8_t type; + uint16_t instance; + + uint32_t flags; + + uint8_t message; + + /* + * This is an enum but we are going to treat it as a uint8_t + * for purpose of encoding/decoding + */ + afi_t afi; + safi_t safi; + +#define PREFIX_SRC_PRESENT (1 << 0) +#define PREFIX_DST_PRESENT (1 << 1) +#define FRAGMENT_PRESENT (1 << 2) + uint8_t match_bitmask; + + uint8_t match_src_port_num; + uint8_t match_dst_port_num; + uint8_t match_port_num; + uint8_t match_protocol_num; + uint8_t match_icmp_type_num; + uint8_t match_icmp_code_num; + uint8_t match_packet_length_num; + uint8_t match_dscp_num; + uint8_t match_tcpflags_num; + + struct prefix src_prefix; + struct prefix dst_prefix; + + struct bgp_pbr_match_val protocol[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val src_port[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val dst_port[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val port[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val icmp_type[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val icmp_code[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val packet_length[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val dscp[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_match_val tcpflags[BGP_PBR_MATCH_VAL_MAX]; + struct bgp_pbr_fragment_val fragment; + + uint16_t action_num; + struct bgp_pbr_entry_action actions[ACTIONS_MAX_NUM]; + + uint8_t distance; + + uint32_t metric; + + route_tag_t tag; + + uint32_t mtu; + + vrf_id_t vrf_id; +}; + +struct bgp_pbr_match { + char ipset_name[ZEBRA_IPSET_NAME_SIZE]; + + /* mapped on enum ipset_type + */ + uint32_t type; + +#define MATCH_IP_SRC_SET (1 << 0) +#define MATCH_IP_DST_SET (1 << 1) + uint32_t flags; + + vrf_id_t vrf_id; + + /* unique identifier for ipset create transaction + */ + uint32_t unique; + + /* unique identifier for iptable add transaction + */ + uint32_t unique2; + + bool installed; + bool install_in_progress; + + bool installed_in_iptable; + bool install_iptable_in_progress; + + struct hash *entry_hash; + + struct bgp_pbr_action *action; + +}; + +struct bgp_pbr_match_entry { + struct bgp_pbr_match *backpointer; + + uint32_t unique; + + struct prefix src; + struct prefix dst; + + bool installed; + bool install_in_progress; +}; + +struct bgp_pbr_action { + + /* + * The Unique identifier of this specific pbrms + */ + uint32_t unique; + + uint32_t fwmark; + + uint32_t table_id; + + float rate; + + /* + * nexthop information, or drop information + * contains src vrf_id and nh contains dest vrf_id + */ + vrf_id_t vrf_id; + struct nexthop nh; + + bool installed; + bool install_in_progress; + uint32_t refcnt; + struct bgp *bgp; +}; + +extern struct bgp_pbr_action *bgp_pbr_action_rule_lookup(vrf_id_t vrf_id, + uint32_t unique); + +extern struct bgp_pbr_match *bgp_pbr_match_ipset_lookup(vrf_id_t vrf_id, + uint32_t unique); + +extern struct bgp_pbr_match_entry *bgp_pbr_match_ipset_entry_lookup( + vrf_id_t vrf_id, char *name, + uint32_t unique); +extern struct bgp_pbr_match *bgp_pbr_match_iptable_lookup(vrf_id_t vrf_id, + uint32_t unique); + +extern void bgp_pbr_cleanup(struct bgp *bgp); +extern void bgp_pbr_init(struct bgp *bgp); + +extern uint32_t bgp_pbr_action_hash_key(void *arg); +extern int bgp_pbr_action_hash_equal(const void *arg1, + const void *arg2); +extern uint32_t bgp_pbr_match_entry_hash_key(void *arg); +extern int bgp_pbr_match_entry_hash_equal(const void *arg1, + const void *arg2); +extern uint32_t bgp_pbr_match_hash_key(void *arg); +extern int bgp_pbr_match_hash_equal(const void *arg1, + const void *arg2); + +void bgp_pbr_print_policy_route(struct bgp_pbr_entry_main *api); + +struct bgp_node; +struct bgp_info; +extern void bgp_pbr_update_entry(struct bgp *bgp, struct prefix *p, + struct bgp_info *new_select, + afi_t afi, safi_t safi, + bool nlri_update); + +#endif /* __BGP_PBR_H__ */ diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 299486128c..4802a4caa5 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -75,6 +75,7 @@ #include "bgpd/bgp_evpn_vty.h" #include "bgpd/bgp_flowspec.h" #include "bgpd/bgp_flowspec_util.h" +#include "bgpd/bgp_pbr.h" #ifndef VTYSH_EXTRACT_PL #include "bgpd/bgp_route_clippy.c" @@ -2231,7 +2232,6 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_node *rn, /* If best route remains the same and this is not due to user-initiated * clear, see exactly what needs to be done. */ - if (old_select && old_select == new_select && !CHECK_FLAG(rn->flags, BGP_NODE_USER_CLEAR) && !CHECK_FLAG(old_select->flags, BGP_INFO_ATTR_CHANGED) diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index 89b8eb70cd..00e5677fe0 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -320,7 +320,8 @@ static inline void bgp_bump_version(struct bgp_node *node) static inline int bgp_fibupd_safi(safi_t safi) { if (safi == SAFI_UNICAST || safi == SAFI_MULTICAST - || safi == SAFI_LABELED_UNICAST) + || safi == SAFI_LABELED_UNICAST + || safi == SAFI_FLOWSPEC) return 1; return 0; } diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 581b4e6f5b..93a509c219 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -56,6 +56,7 @@ #include "bgpd/bgp_evpn.h" #include "bgpd/bgp_mplsvpn.h" #include "bgpd/bgp_labelpool.h" +#include "bgpd/bgp_pbr.h" /* All information about zebra. */ struct zclient *zclient = NULL; @@ -997,6 +998,9 @@ static int bgp_table_map_apply(struct route_map *map, struct prefix *p, static struct thread *bgp_tm_thread_connect; static bool bgp_tm_status_connected; +static bool bgp_tm_chunk_obtained; +#define BGP_FLOWSPEC_TABLE_CHUNK 100000 +static uint32_t bgp_tm_min, bgp_tm_max, bgp_tm_chunk_size; static int bgp_zebra_tm_connect(struct thread *t) { @@ -1017,12 +1021,27 @@ static int bgp_zebra_tm_connect(struct thread *t) if (!bgp_tm_status_connected) zlog_debug("Connecting to table manager. Success"); bgp_tm_status_connected = true; + if (!bgp_tm_chunk_obtained) { + if (bgp_zebra_get_table_range(bgp_tm_chunk_size, + &bgp_tm_min, + &bgp_tm_max) >= 0) + bgp_tm_chunk_obtained = true; + } } thread_add_timer(bm->master, bgp_zebra_tm_connect, zclient, delay, &bgp_tm_thread_connect); return 0; } +uint32_t bgp_zebra_tm_get_id(void) +{ + static int table_id; + + if (!bgp_tm_chunk_obtained) + return ++table_id; + return bgp_tm_min++; +} + void bgp_zebra_init_tm_connect(void) { int delay = 1; @@ -1032,6 +1051,9 @@ void bgp_zebra_init_tm_connect(void) if (bgp_tm_thread_connect != NULL) return; bgp_tm_status_connected = false; + bgp_tm_chunk_obtained = false; + bgp_tm_min = bgp_tm_max = 0; + bgp_tm_chunk_size = BGP_FLOWSPEC_TABLE_CHUNK; thread_add_timer(bm->master, bgp_zebra_tm_connect, zclient, delay, &bgp_tm_thread_connect); } @@ -1173,6 +1195,10 @@ void bgp_zebra_announce(struct bgp_node *rn, struct prefix *p, if (bgp_debug_zebra(p)) prefix2str(&api.prefix, buf_prefix, sizeof(buf_prefix)); + if (safi == SAFI_FLOWSPEC) + return bgp_pbr_update_entry(bgp, &rn->p, + info, afi, safi, true); + /* * vrf leaking support (will have only one nexthop) */ @@ -1459,6 +1485,7 @@ void bgp_zebra_withdraw(struct prefix *p, struct bgp_info *info, struct bgp *bgp, safi_t safi) { struct zapi_route api; + struct peer *peer; /* Don't try to install if we're not connected to Zebra or Zebra doesn't * know of this instance. @@ -1466,6 +1493,12 @@ void bgp_zebra_withdraw(struct prefix *p, struct bgp_info *info, if (!bgp_install_info_to_zebra(bgp)) return; + if (safi == SAFI_FLOWSPEC) { + peer = info->peer; + return bgp_pbr_update_entry(peer->bgp, p, + info, AFI_IP, safi, false); + } + memset(&api, 0, sizeof(api)); memcpy(&api.rmac, &(info->attr->rmac), sizeof(struct ethaddr)); api.vrf_id = bgp->vrf_id; @@ -1908,6 +1941,271 @@ int bgp_zebra_advertise_all_vni(struct bgp *bgp, int advertise) return zclient_send_message(zclient); } +static int rule_notify_owner(int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + uint32_t seqno, priority, unique; + enum zapi_rule_notify_owner note; + struct bgp_pbr_action *bgp_pbra; + ifindex_t ifi; + + if (!zapi_rule_notify_decode(zclient->ibuf, &seqno, &priority, &unique, + &ifi, ¬e)) + return -1; + + bgp_pbra = bgp_pbr_action_rule_lookup(vrf_id, unique); + if (!bgp_pbra) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Fail to look BGP rule (%u)", + __PRETTY_FUNCTION__, unique); + return 0; + } + + switch (note) { + case ZAPI_RULE_FAIL_INSTALL: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received RULE_FAIL_INSTALL", + __PRETTY_FUNCTION__); + bgp_pbra->installed = false; + bgp_pbra->install_in_progress = false; + break; + case ZAPI_RULE_INSTALLED: + bgp_pbra->installed = true; + bgp_pbra->install_in_progress = false; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received RULE_INSTALLED", + __PRETTY_FUNCTION__); + break; + case ZAPI_RULE_REMOVED: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received RULE REMOVED", + __PRETTY_FUNCTION__); + break; + } + + return 0; +} + +static int ipset_notify_owner(int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + uint32_t unique; + enum zapi_ipset_notify_owner note; + struct bgp_pbr_match *bgp_pbim; + + if (!zapi_ipset_notify_decode(zclient->ibuf, + &unique, + ¬e)) + return -1; + + bgp_pbim = bgp_pbr_match_ipset_lookup(vrf_id, unique); + if (!bgp_pbim) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Fail to look BGP match (%u)", + __PRETTY_FUNCTION__, unique); + return 0; + } + + switch (note) { + case ZAPI_IPSET_FAIL_INSTALL: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET_FAIL_INSTALL", + __PRETTY_FUNCTION__); + bgp_pbim->installed = false; + bgp_pbim->install_in_progress = false; + break; + case ZAPI_IPSET_INSTALLED: + bgp_pbim->installed = true; + bgp_pbim->install_in_progress = false; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET_INSTALLED", + __PRETTY_FUNCTION__); + break; + case ZAPI_IPSET_REMOVED: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET REMOVED", + __PRETTY_FUNCTION__); + break; + } + + return 0; +} + +static int ipset_entry_notify_owner(int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + uint32_t unique; + char ipset_name[ZEBRA_IPSET_NAME_SIZE]; + enum zapi_ipset_entry_notify_owner note; + struct bgp_pbr_match_entry *bgp_pbime; + + if (!zapi_ipset_entry_notify_decode( + zclient->ibuf, + &unique, + ipset_name, + ¬e)) + return -1; + bgp_pbime = bgp_pbr_match_ipset_entry_lookup(vrf_id, + ipset_name, + unique); + if (!bgp_pbime) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Fail to look BGP match entry (%u)", + __PRETTY_FUNCTION__, unique); + return 0; + } + + switch (note) { + case ZAPI_IPSET_ENTRY_FAIL_INSTALL: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET_ENTRY_FAIL_INSTALL", + __PRETTY_FUNCTION__); + bgp_pbime->installed = false; + bgp_pbime->install_in_progress = false; + break; + case ZAPI_IPSET_ENTRY_INSTALLED: + bgp_pbime->installed = true; + bgp_pbime->install_in_progress = false; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET_ENTRY_INSTALLED", + __PRETTY_FUNCTION__); + break; + case ZAPI_IPSET_ENTRY_REMOVED: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPSET_ENTRY_REMOVED", + __PRETTY_FUNCTION__); + break; + } + return 0; +} + +static int iptable_notify_owner(int command, struct zclient *zclient, + zebra_size_t length, vrf_id_t vrf_id) +{ + uint32_t unique; + enum zapi_iptable_notify_owner note; + struct bgp_pbr_match *bgpm; + + if (!zapi_iptable_notify_decode( + zclient->ibuf, + &unique, + ¬e)) + return -1; + bgpm = bgp_pbr_match_iptable_lookup(vrf_id, unique); + if (!bgpm) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Fail to look BGP iptable (%u)", + __PRETTY_FUNCTION__, unique); + return 0; + } + switch (note) { + case ZAPI_IPTABLE_FAIL_INSTALL: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPTABLE_FAIL_INSTALL", + __PRETTY_FUNCTION__); + bgpm->installed_in_iptable = false; + bgpm->install_iptable_in_progress = false; + break; + case ZAPI_IPTABLE_INSTALLED: + bgpm->installed_in_iptable = true; + bgpm->install_iptable_in_progress = false; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPTABLE_INSTALLED", + __PRETTY_FUNCTION__); + bgpm->action->refcnt++; + break; + case ZAPI_IPTABLE_REMOVED: + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("%s: Received IPTABLE REMOVED", + __PRETTY_FUNCTION__); + break; + } + return 0; +} + +static void bgp_encode_pbr_rule_action(struct stream *s, + struct bgp_pbr_action *pbra) +{ + struct prefix any; + + stream_putl(s, 0); /* seqno unused */ + stream_putl(s, 0); /* ruleno unused */ + + stream_putl(s, pbra->unique); + + memset(&any, 0, sizeof(any)); + any.family = AF_INET; + stream_putc(s, any.family); + stream_putc(s, any.prefixlen); + stream_put(s, &any.u.prefix, prefix_blen(&any)); + + stream_putw(s, 0); /* src port */ + + stream_putc(s, any.family); + stream_putc(s, any.prefixlen); + stream_put(s, &any.u.prefix, prefix_blen(&any)); + + stream_putw(s, 0); /* dst port */ + + stream_putl(s, pbra->fwmark); /* fwmark */ + + stream_putl(s, pbra->table_id); + + stream_putl(s, 0); /* ifindex unused */ +} + +static void bgp_encode_pbr_ipset_match(struct stream *s, + struct bgp_pbr_match *pbim) +{ + stream_putl(s, pbim->unique); + stream_putl(s, pbim->type); + + stream_put(s, pbim->ipset_name, + ZEBRA_IPSET_NAME_SIZE); + + +} + +static void bgp_encode_pbr_ipset_entry_match(struct stream *s, + struct bgp_pbr_match_entry *pbime) +{ + stream_putl(s, pbime->unique); + /* check that back pointer is not null */ + stream_put(s, pbime->backpointer->ipset_name, + ZEBRA_IPSET_NAME_SIZE); + + stream_putc(s, pbime->src.family); + stream_putc(s, pbime->src.prefixlen); + stream_put(s, &pbime->src.u.prefix, prefix_blen(&pbime->src)); + + stream_putc(s, pbime->dst.family); + stream_putc(s, pbime->dst.prefixlen); + stream_put(s, &pbime->dst.u.prefix, prefix_blen(&pbime->dst)); +} + +static void bgp_encode_pbr_iptable_match(struct stream *s, + struct bgp_pbr_action *bpa, + struct bgp_pbr_match *pbm) +{ + stream_putl(s, pbm->unique2); + + stream_putl(s, pbm->type); + + stream_putl(s, pbm->flags); + + /* TODO: correlate with what is contained + * into bgp_pbr_action. + * currently only forward supported + */ + if (bpa->nh.type == NEXTHOP_TYPE_BLACKHOLE) + stream_putl(s, ZEBRA_IPTABLES_DROP); + else + stream_putl(s, ZEBRA_IPTABLES_FORWARD); + stream_putl(s, bpa->fwmark); + stream_put(s, pbm->ipset_name, + ZEBRA_IPSET_NAME_SIZE); +} + /* BGP has established connection with Zebra. */ static void bgp_zebra_connected(struct zclient *zclient) { @@ -2167,6 +2465,10 @@ void bgp_zebra_init(struct thread_master *master) zclient->local_ip_prefix_add = bgp_zebra_process_local_ip_prefix; zclient->local_ip_prefix_del = bgp_zebra_process_local_ip_prefix; zclient->label_chunk = bgp_zebra_process_label_chunk; + zclient->rule_notify_owner = rule_notify_owner; + zclient->ipset_notify_owner = ipset_notify_owner; + zclient->ipset_entry_notify_owner = ipset_entry_notify_owner; + zclient->iptable_notify_owner = iptable_notify_owner; } void bgp_zebra_destroy(void) @@ -2182,3 +2484,176 @@ int bgp_zebra_num_connects(void) { return zclient_num_connects; } + +void bgp_send_pbr_rule_action(struct bgp_pbr_action *pbra, bool install) +{ + struct stream *s; + + if (pbra->install_in_progress) + return; + zlog_debug("%s: table %d fwmark %d %d", __PRETTY_FUNCTION__, + pbra->table_id, pbra->fwmark, install); + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, + install ? ZEBRA_RULE_ADD : ZEBRA_RULE_DELETE, + VRF_DEFAULT); + stream_putl(s, 1); /* send one pbr action */ + + bgp_encode_pbr_rule_action(s, pbra); + + stream_putw_at(s, 0, stream_get_endp(s)); + if (!zclient_send_message(zclient) && install) + pbra->install_in_progress = true; +} + +void bgp_send_pbr_ipset_match(struct bgp_pbr_match *pbrim, bool install) +{ + struct stream *s; + + if (pbrim->install_in_progress) + return; + zlog_debug("%s: name %s type %d %d", __PRETTY_FUNCTION__, + pbrim->ipset_name, pbrim->type, install); + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, + install ? ZEBRA_IPSET_CREATE : + ZEBRA_IPSET_DESTROY, + VRF_DEFAULT); + + stream_putl(s, 1); /* send one pbr action */ + + bgp_encode_pbr_ipset_match(s, pbrim); + + stream_putw_at(s, 0, stream_get_endp(s)); + if (!zclient_send_message(zclient) && install) + pbrim->install_in_progress = true; +} + +void bgp_send_pbr_ipset_entry_match(struct bgp_pbr_match_entry *pbrime, + bool install) +{ + struct stream *s; + + if (pbrime->install_in_progress) + return; + zlog_debug("%s: name %s %d %d", __PRETTY_FUNCTION__, + pbrime->backpointer->ipset_name, + pbrime->unique, install); + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, + install ? ZEBRA_IPSET_ENTRY_ADD : + ZEBRA_IPSET_ENTRY_DELETE, + VRF_DEFAULT); + + stream_putl(s, 1); /* send one pbr action */ + + bgp_encode_pbr_ipset_entry_match(s, pbrime); + + stream_putw_at(s, 0, stream_get_endp(s)); + if (!zclient_send_message(zclient) && install) + pbrime->install_in_progress = true; +} + +void bgp_send_pbr_iptable(struct bgp_pbr_action *pba, + struct bgp_pbr_match *pbm, + bool install) +{ + struct stream *s; + + if (pbm->install_iptable_in_progress) + return; + zlog_debug("%s: name %s type %d mark %d %d", __PRETTY_FUNCTION__, + pbm->ipset_name, pbm->type, pba->fwmark, install); + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, + install ? ZEBRA_IPTABLE_ADD : + ZEBRA_IPTABLE_DELETE, + VRF_DEFAULT); + + bgp_encode_pbr_iptable_match(s, pba, pbm); + + stream_putw_at(s, 0, stream_get_endp(s)); + if (!zclient_send_message(zclient) && install) { + pbm->install_iptable_in_progress = true; + pba->refcnt++; + } +} + +/* inject in table a default route to: + * - if nexthop IP is present : to this nexthop + * - if vrf is different from local : to the matching VRF + */ +void bgp_zebra_announce_default(struct bgp *bgp, struct nexthop *nh, + afi_t afi, uint32_t table_id, bool announce) +{ + struct zapi_nexthop *api_nh; + struct zapi_route api; + struct prefix p; + + if (!nh || nh->type != NEXTHOP_TYPE_IPV4 + || nh->vrf_id == VRF_UNKNOWN) + return; + memset(&p, 0, sizeof(struct prefix)); + /* default route */ + if (afi != AFI_IP) + return; + p.family = AF_INET; + memset(&api, 0, sizeof(api)); + api.vrf_id = bgp->vrf_id; + api.type = ZEBRA_ROUTE_BGP; + api.safi = SAFI_UNICAST; + api.prefix = p; + api.tableid = table_id; + api.nexthop_num = 1; + SET_FLAG(api.message, ZAPI_MESSAGE_TABLEID); + SET_FLAG(api.message, ZAPI_MESSAGE_NEXTHOP); + api_nh = &api.nexthops[0]; + + /* redirect IP */ + if (nh->gate.ipv4.s_addr) { + char buff[PREFIX_STRLEN]; + + api_nh->vrf_id = nh->vrf_id; + api_nh->gate.ipv4 = nh->gate.ipv4; + api_nh->type = NEXTHOP_TYPE_IPV4; + + inet_ntop(AF_INET, &(nh->gate.ipv4), buff, INET_ADDRSTRLEN); + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_info("BGP: sending default route to %s table %d (redirect IP)", + buff, table_id); + zclient_route_send(announce ? ZEBRA_ROUTE_ADD + : ZEBRA_ROUTE_DELETE, + zclient, &api); + } else if (nh->vrf_id != bgp->vrf_id) { + struct vrf *vrf; + struct interface *ifp; + + vrf = vrf_lookup_by_id(nh->vrf_id); + if (!vrf) + return; + /* create default route with interface + * with nexthop-vrf + */ + ifp = if_lookup_by_name_all_vrf(vrf->name); + if (!ifp) + return; + api_nh->vrf_id = nh->vrf_id; + api_nh->type = NEXTHOP_TYPE_IFINDEX; + api_nh->ifindex = ifp->ifindex; + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_info("BGP: sending default route to %s table %d (redirect VRF)", + vrf->name, table_id); + zclient_route_send(announce ? ZEBRA_ROUTE_ADD + : ZEBRA_ROUTE_DELETE, + zclient, &api); + return; + } +} diff --git a/bgpd/bgp_zebra.h b/bgpd/bgp_zebra.h index 7263317b6f..7ac40fecff 100644 --- a/bgpd/bgp_zebra.h +++ b/bgpd/bgp_zebra.h @@ -25,6 +25,7 @@ extern void bgp_zebra_init(struct thread_master *master); extern void bgp_zebra_init_tm_connect(void); +extern uint32_t bgp_zebra_tm_get_id(void); extern void bgp_zebra_destroy(void); extern int bgp_zebra_get_table_range(uint32_t chunk_size, uint32_t *start, uint32_t *end); @@ -70,4 +71,20 @@ extern int bgp_zebra_advertise_all_vni(struct bgp *, int); extern int bgp_zebra_num_connects(void); +struct bgp_pbr_action; +struct bgp_pbr_match; +struct bgp_pbr_match_entry; +extern void bgp_send_pbr_rule_action(struct bgp_pbr_action *pbra, + bool install); +extern void bgp_send_pbr_ipset_match(struct bgp_pbr_match *pbrim, + bool install); +extern void bgp_send_pbr_ipset_entry_match(struct bgp_pbr_match_entry *pbrime, + bool install); +extern void bgp_send_pbr_iptable(struct bgp_pbr_action *pba, + struct bgp_pbr_match *pbm, + bool install); + +extern void bgp_zebra_announce_default(struct bgp *bgp, struct nexthop *nh, + afi_t afi, uint32_t table_id, bool announce); + #endif /* _QUAGGA_BGP_ZEBRA_H */ diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index ccfa90419b..a331fad5d4 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -83,6 +83,7 @@ #include "bgpd/bgp_ecommunity.h" #include "bgpd/bgp_flowspec.h" #include "bgpd/bgp_labelpool.h" +#include "bgpd/bgp_pbr.h" DEFINE_MTYPE_STATIC(BGPD, PEER_TX_SHUTDOWN_MSG, "Peer shutdown message (TX)"); DEFINE_QOBJ_TYPE(bgp_master) @@ -3006,6 +3007,7 @@ static struct bgp *bgp_create(as_t *as, const char *name, bf_assign_index(bm->rd_idspace, bgp->vrf_rd_id); bgp_evpn_init(bgp); + bgp_pbr_init(bgp); return bgp; } @@ -3401,7 +3403,7 @@ void bgp_free(struct bgp *bgp) bf_release_index(bm->rd_idspace, bgp->vrf_rd_id); bgp_evpn_cleanup(bgp); - + bgp_pbr_cleanup(bgp); if (bgp->name) XFREE(MTYPE_BGP, bgp->name); if (bgp->name_pretty) diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 576c89f25e..470fd10850 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -400,6 +400,25 @@ struct bgp { /* Allocate MPLS labels */ uint8_t allocate_mpls_labels[AFI_MAX][SAFI_MAX]; + /* Allocate hash entries to store policy routing information + * The hash are used to host pbr rules somewhere. + * Actually, pbr will only be used by flowspec + * those hash elements will have relationship together as + * illustrated in below diagram: + * + * pbr_action a <----- pbr_match i <--- pbr_match_entry 1..n + * <----- pbr_match j <--- pbr_match_entry 1..m + * + * - here in BGP structure, the list of match and actions will + * stand for the list of ipset sets, and table_ids in the kernel + * - the arrow above between pbr_match and pbr_action indicate + * that a backpointer permits match to find the action + * - the arrow betwen match_entry and match is a hash list + * contained in match, that lists the whole set of entries + */ + struct hash *pbr_match_hash; + struct hash *pbr_action_hash; + /* timer to re-evaluate neighbor default-originate route-maps */ struct thread *t_rmap_def_originate_eval; #define RMAP_DEFAULT_ORIGINATE_EVAL_TIMER 5 diff --git a/lib/zclient.c b/lib/zclient.c index dc27cbef70..cb39099fc2 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -1374,6 +1374,26 @@ stream_failure: return false; } +bool zapi_iptable_notify_decode(struct stream *s, + uint32_t *unique, + enum zapi_iptable_notify_owner *note) +{ + uint32_t uni; + + STREAM_GET(note, s, sizeof(*note)); + + STREAM_GETL(s, uni); + + if (zclient_debug) + zlog_debug("%s: %u", __PRETTY_FUNCTION__, uni); + *unique = uni; + + return true; + +stream_failure: + return false; +} + struct nexthop *nexthop_from_zapi_nexthop(struct zapi_nexthop *znh) { struct nexthop *n = nexthop_new(); @@ -2765,6 +2785,22 @@ static int zclient_read(struct thread *thread) (*zclient->label_chunk)(command, zclient, length, vrf_id); break; + case ZEBRA_IPSET_NOTIFY_OWNER: + if (zclient->ipset_notify_owner) + (*zclient->ipset_notify_owner)(command, zclient, length, + vrf_id); + break; + case ZEBRA_IPSET_ENTRY_NOTIFY_OWNER: + if (zclient->ipset_entry_notify_owner) + (*zclient->ipset_entry_notify_owner)(command, + zclient, length, + vrf_id); + break; + case ZEBRA_IPTABLE_NOTIFY_OWNER: + if (zclient->iptable_notify_owner) + (*zclient->iptable_notify_owner)(command, + zclient, length, + vrf_id); default: break; } diff --git a/lib/zclient.h b/lib/zclient.h index 71f5b38384..8d26b7fe59 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -258,6 +258,10 @@ struct zclient { struct zclient *zclient, uint16_t length, vrf_id_t vrf_id); + int (*iptable_notify_owner)(int command, + struct zclient *zclient, + uint16_t length, + vrf_id_t vrf_id); }; /* Zebra API message flag. */ @@ -680,6 +684,9 @@ bool zapi_ipset_entry_notify_decode(struct stream *s, uint32_t *unique, char *ipset_name, enum zapi_ipset_entry_notify_owner *note); +bool zapi_iptable_notify_decode(struct stream *s, + uint32_t *unique, + enum zapi_iptable_notify_owner *note); extern struct nexthop *nexthop_from_zapi_nexthop(struct zapi_nexthop *znh); extern bool zapi_nexthop_update_decode(struct stream *s, diff --git a/vtysh/Makefile.am b/vtysh/Makefile.am index 52641de72c..d82f9fd1b8 100644 --- a/vtysh/Makefile.am +++ b/vtysh/Makefile.am @@ -54,6 +54,7 @@ vtysh_scan += $(top_srcdir)/bgpd/bgp_nexthop.c vtysh_scan += $(top_srcdir)/bgpd/bgp_route.c vtysh_scan += $(top_srcdir)/bgpd/bgp_routemap.c vtysh_scan += $(top_srcdir)/bgpd/bgp_vty.c +vtysh_scan += $(top_srcdir)/bgpd/bgp_flowspec_vty.c endif if RPKI