mirror_iproute2/tc/tc_bpf.c
Daniel Borkmann 11c39b5e98 tc: add eBPF support to f_bpf
This work adds the tc frontend for kernel commit e2e9b6541dd4 ("cls_bpf:
add initial eBPF support for programmable classifiers").

A C-like classifier program (f.e. see e2e9b6541dd4) is being compiled via
LLVM's eBPF backend into an ELF file, that is then being passed to tc. tc
then loads, if any, eBPF maps and eBPF opcodes (with fixed-up eBPF map file
descriptors) out of its dedicated sections, and via bpf(2) into the kernel
and then the resulting fd via netlink down to cls_bpf. cls_bpf allows for
annotations, currently, I've used the file name for that, so that the user
can easily identify his filter when dumping configurations back.

Example usage:

  clang -O2 -emit-llvm -c cls.c -o - | llc -march=bpf -filetype=obj -o cls.o
  tc filter add dev em1 parent 1: bpf run object-file cls.o classid x:y

  tc filter show dev em1 [...]
  filter parent 1: protocol all pref 49152 bpf handle 0x1 flowid x:y cls.o

I placed the parser bits derived from Alexei's kernel sample, into tc_bpf.c
as my next step is to also add the same support for BPF action, so we can
have a fully fledged eBPF classifier and action in tc.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
2015-03-24 15:45:23 -07:00

539 lines
12 KiB
C

/*
* tc_bpf.c BPF common code
*
* This program is free software; you can distribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Daniel Borkmann <dborkman@redhat.com>
* Jiri Pirko <jiri@resnulli.us>
* Alexei Starovoitov <ast@plumgrid.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <linux/filter.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#ifdef HAVE_ELF
#include <libelf.h>
#include <gelf.h>
#endif
#include "utils.h"
#include "tc_util.h"
#include "tc_bpf.h"
int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
char **bpf_string, bool *need_release,
const char separator)
{
char sp;
if (from_file) {
size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
char *tmp_string;
FILE *fp;
tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
tmp_string = malloc(tmp_len);
if (tmp_string == NULL)
return -ENOMEM;
memset(tmp_string, 0, tmp_len);
fp = fopen(arg, "r");
if (fp == NULL) {
perror("Cannot fopen");
free(tmp_string);
return -ENOENT;
}
if (!fgets(tmp_string, tmp_len, fp)) {
free(tmp_string);
fclose(fp);
return -EIO;
}
fclose(fp);
*need_release = true;
*bpf_string = tmp_string;
} else {
*need_release = false;
*bpf_string = arg;
}
if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
sp != separator) {
if (*need_release)
free(*bpf_string);
return -EINVAL;
}
return 0;
}
int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
bool from_file)
{
char *bpf_string, *token, separator = ',';
int ret = 0, i = 0;
bool need_release;
__u16 bpf_len = 0;
if (argc < 1)
return -EINVAL;
if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
&need_release, separator))
return -EINVAL;
if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
ret = -EINVAL;
goto out;
}
token = bpf_string;
while ((token = strchr(token, separator)) && (++token)[0]) {
if (i >= bpf_len) {
fprintf(stderr, "Real program length exceeds encoded "
"length parameter!\n");
ret = -EINVAL;
goto out;
}
if (sscanf(token, "%hu %hhu %hhu %u,",
&bpf_ops[i].code, &bpf_ops[i].jt,
&bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
fprintf(stderr, "Error at instruction %d!\n", i);
ret = -EINVAL;
goto out;
}
i++;
}
if (i != bpf_len) {
fprintf(stderr, "Parsed program length is less than encoded"
"length parameter!\n");
ret = -EINVAL;
goto out;
}
ret = bpf_len;
out:
if (need_release)
free(bpf_string);
return ret;
}
void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
{
struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
int i;
if (len == 0)
return;
fprintf(f, "bytecode \'%u,", len);
for (i = 0; i < len - 1; i++)
fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
ops[i].jf, ops[i].k);
fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt,
ops[i].jf, ops[i].k);
}
#ifdef HAVE_ELF
struct bpf_elf_sec_data {
GElf_Shdr sec_hdr;
char *sec_name;
Elf_Data *sec_data;
};
static char bpf_log_buf[8192];
static const char *prog_type_section(enum bpf_prog_type type)
{
switch (type) {
case BPF_PROG_TYPE_SCHED_CLS:
return ELF_SECTION_CLASSIFIER;
/* case BPF_PROG_TYPE_SCHED_ACT: */
/* return ELF_SECTION_ACTION; */
default:
return NULL;
}
}
static void bpf_dump_error(const char *format, ...) __check_format_string(1, 2);
static void bpf_dump_error(const char *format, ...)
{
va_list vl;
va_start(vl, format);
vfprintf(stderr, format, vl);
va_end(vl);
fprintf(stderr, "%s", bpf_log_buf);
memset(bpf_log_buf, 0, sizeof(bpf_log_buf));
}
static int bpf_create_map(enum bpf_map_type type, unsigned int size_key,
unsigned int size_value, unsigned int max_elem)
{
union bpf_attr attr = {
.map_type = type,
.key_size = size_key,
.value_size = size_value,
.max_entries = max_elem,
};
return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}
static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
unsigned int len, const char *license)
{
union bpf_attr attr = {
.prog_type = type,
.insns = bpf_ptr_to_u64(insns),
.insn_cnt = len / sizeof(struct bpf_insn),
.license = bpf_ptr_to_u64(license),
.log_buf = bpf_ptr_to_u64(bpf_log_buf),
.log_size = sizeof(bpf_log_buf),
.log_level = 1,
};
return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
static int bpf_prog_attach(enum bpf_prog_type type, const struct bpf_insn *insns,
unsigned int size, const char *license)
{
int prog_fd = bpf_prog_load(type, insns, size, license);
if (prog_fd < 0)
bpf_dump_error("BPF program rejected: %s\n", strerror(errno));
return prog_fd;
}
static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key,
unsigned int size_value, unsigned int max_elem)
{
int map_fd = bpf_create_map(type, size_key, size_value, max_elem);
if (map_fd < 0)
bpf_dump_error("BPF map rejected: %s\n", strerror(errno));
return map_fd;
}
static void bpf_maps_init(int *map_fds, unsigned int max_fds)
{
int i;
for (i = 0; i < max_fds; i++)
map_fds[i] = -1;
}
static void bpf_maps_destroy(const int *map_fds, unsigned int max_fds)
{
int i;
for (i = 0; i < max_fds; i++) {
if (map_fds[i] >= 0)
close(map_fds[i]);
}
}
static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps,
int *map_fds, unsigned int max_fds)
{
int i, ret;
for (i = 0; i < num_maps && num_maps <= max_fds; i++) {
struct bpf_elf_map *map = &maps[i];
ret = bpf_map_attach(map->type, map->size_key,
map->size_value, map->max_elem);
if (ret < 0)
goto err_unwind;
map_fds[i] = ret;
}
return 0;
err_unwind:
bpf_maps_destroy(map_fds, i);
return ret;
}
static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index,
struct bpf_elf_sec_data *sec_data)
{
GElf_Shdr sec_hdr;
Elf_Scn *sec_fd;
Elf_Data *sec_edata;
char *sec_name;
memset(sec_data, 0, sizeof(*sec_data));
sec_fd = elf_getscn(elf_fd, sec_index);
if (!sec_fd)
return -EINVAL;
if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
return -EIO;
sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx,
sec_hdr.sh_name);
if (!sec_name || !sec_hdr.sh_size)
return -ENOENT;
sec_edata = elf_getdata(sec_fd, NULL);
if (!sec_edata || elf_getdata(sec_fd, sec_edata))
return -EIO;
memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
sec_data->sec_name = sec_name;
sec_data->sec_data = sec_edata;
return 0;
}
static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo,
struct bpf_elf_sec_data *data_insn,
Elf_Data *sym_tab, int *map_fds, int max_fds)
{
Elf_Data *idata = data_insn->sec_data;
GElf_Shdr *rhdr = &data_relo->sec_hdr;
int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
struct bpf_insn *insns = idata->d_buf;
unsigned int num_insns = idata->d_size / sizeof(*insns);
for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
unsigned int ioff, fnum;
GElf_Rel relo;
GElf_Sym sym;
if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
return -EIO;
ioff = relo.r_offset / sizeof(struct bpf_insn);
if (ioff >= num_insns)
return -EINVAL;
if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
return -EINVAL;
if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
return -EIO;
fnum = sym.st_value / sizeof(struct bpf_elf_map);
if (fnum >= max_fds)
return -EINVAL;
insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
insns[ioff].imm = map_fds[fnum];
}
return 0;
}
static int bpf_fetch_ancillary(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
int *map_fds, unsigned int max_fds,
char *license, unsigned int lic_len,
Elf_Data **sym_tab)
{
int sec_index, ret = -1;
for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
struct bpf_elf_sec_data data_anc;
ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
&data_anc);
if (ret < 0)
continue;
/* Extract and load eBPF map fds. */
if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS)) {
struct bpf_elf_map *maps = data_anc.sec_data->d_buf;
unsigned int maps_num = data_anc.sec_data->d_size /
sizeof(*maps);
sec_seen[sec_index] = true;
ret = bpf_maps_attach(maps, maps_num, map_fds,
max_fds);
if (ret < 0)
return ret;
}
/* Extract eBPF license. */
else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) {
if (data_anc.sec_data->d_size > lic_len)
return -ENOMEM;
sec_seen[sec_index] = true;
memcpy(license, data_anc.sec_data->d_buf,
data_anc.sec_data->d_size);
}
/* Extract symbol table for relocations (map fd fixups). */
else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) {
sec_seen[sec_index] = true;
*sym_tab = data_anc.sec_data;
}
}
return ret;
}
static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
enum bpf_prog_type type, char *license,
Elf_Data *sym_tab, int *map_fds, unsigned int max_fds)
{
int sec_index, prog_fd = -1;
for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
struct bpf_elf_sec_data data_relo, data_insn;
int ins_index, ret;
/* Attach eBPF programs with relocation data (maps). */
ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
&data_relo);
if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
continue;
ins_index = data_relo.sec_hdr.sh_info;
ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index,
&data_insn);
if (ret < 0)
continue;
if (strcmp(data_insn.sec_name, prog_type_section(type)))
continue;
sec_seen[sec_index] = true;
sec_seen[ins_index] = true;
ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab,
map_fds, max_fds);
if (ret < 0)
continue;
prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
data_insn.sec_data->d_size, license);
if (prog_fd < 0)
continue;
break;
}
return prog_fd;
}
static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
enum bpf_prog_type type, char *license)
{
int sec_index, prog_fd = -1;
for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
struct bpf_elf_sec_data data_insn;
int ret;
/* Attach eBPF programs without relocation data. */
if (sec_seen[sec_index])
continue;
ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
&data_insn);
if (ret < 0)
continue;
if (strcmp(data_insn.sec_name, prog_type_section(type)))
continue;
prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
data_insn.sec_data->d_size, license);
if (prog_fd < 0)
continue;
break;
}
return prog_fd;
}
int bpf_open_object(const char *path, enum bpf_prog_type type)
{
int map_fds[ELF_MAX_MAPS], max_fds = ARRAY_SIZE(map_fds);
char license[ELF_MAX_LICENSE_LEN];
int file_fd, prog_fd = -1, ret;
Elf_Data *sym_tab = NULL;
GElf_Ehdr elf_hdr;
bool *sec_seen;
Elf *elf_fd;
if (elf_version(EV_CURRENT) == EV_NONE)
return -EINVAL;
file_fd = open(path, O_RDONLY, 0);
if (file_fd < 0)
return -errno;
elf_fd = elf_begin(file_fd, ELF_C_READ, NULL);
if (!elf_fd) {
ret = -EINVAL;
goto out;
}
if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) {
ret = -EIO;
goto out_elf;
}
sec_seen = calloc(elf_hdr.e_shnum, sizeof(*sec_seen));
if (!sec_seen) {
ret = -ENOMEM;
goto out_elf;
}
memset(license, 0, sizeof(license));
bpf_maps_init(map_fds, max_fds);
ret = bpf_fetch_ancillary(elf_fd, &elf_hdr, sec_seen, map_fds, max_fds,
license, sizeof(license), &sym_tab);
if (ret < 0)
goto out_maps;
if (sym_tab)
prog_fd = bpf_fetch_prog_relo(elf_fd, &elf_hdr, sec_seen, type,
license, sym_tab, map_fds, max_fds);
if (prog_fd < 0)
prog_fd = bpf_fetch_prog(elf_fd, &elf_hdr, sec_seen, type,
license);
if (prog_fd < 0)
goto out_maps;
out_sec:
free(sec_seen);
out_elf:
elf_end(elf_fd);
out:
close(file_fd);
return prog_fd;
out_maps:
bpf_maps_destroy(map_fds, max_fds);
goto out_sec;
}
#endif /* HAVE_ELF */