mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2025-12-16 16:10:25 +00:00
The control knobs set before loading BPF programs should be declared as
'const volatile' so that it can be optimized by the BPF core.
Committer testing:
root@x1:~# perf ftrace latency --use-bpf -T schedule
^C# DURATION | COUNT | GRAPH |
0 - 1 us | 0 | |
1 - 2 us | 0 | |
2 - 4 us | 0 | |
4 - 8 us | 0 | |
8 - 16 us | 1 | |
16 - 32 us | 5 | |
32 - 64 us | 2 | |
64 - 128 us | 6 | |
128 - 256 us | 7 | |
256 - 512 us | 5 | |
512 - 1024 us | 22 | # |
1 - 2 ms | 36 | ## |
2 - 4 ms | 68 | ##### |
4 - 8 ms | 22 | # |
8 - 16 ms | 91 | ####### |
16 - 32 ms | 11 | |
32 - 64 ms | 26 | ## |
64 - 128 ms | 213 | ################# |
128 - 256 ms | 19 | # |
256 - 512 ms | 14 | # |
512 - 1024 ms | 5 | |
1 - ... s | 8 | |
root@x1:~#
root@x1:~# perf trace -e bpf perf ftrace latency --use-bpf -T schedule
0.000 ( 0.015 ms): perf/2944525 bpf(cmd: 36, uattr: 0x7ffe80de7b40, size: 8) = -1 EOPNOTSUPP (Operation not supported)
0.025 ( 0.102 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7870, size: 148) = 8
0.136 ( 0.026 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7930, size: 148) = 8
0.174 ( 0.026 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de77e0, size: 148) = 8
0.205 ( 0.010 ms): perf/2944525 bpf(uattr: 0x7ffe80de7990, size: 80) = 8
0.227 ( 0.011 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7810, size: 40) = 8
0.244 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7880, size: 40) = 8
0.257 ( 0.006 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7660, size: 40) = 8
0.265 ( 0.058 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7730, size: 148) = 9
0.330 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de78e0, size: 40) = 8
0.337 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7890, size: 40) = 8
0.343 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7880, size: 40) = 8
0.349 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de78b0, size: 40) = 8
0.355 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7890, size: 40) = 8
0.361 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de78b0, size: 40) = 8
0.367 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7880, size: 40) = 8
0.373 ( 0.014 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7a00, size: 40) = 8
0.390 ( 0.358 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80) = 9
0.763 ( 0.014 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80) = 9
0.783 ( 0.011 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80) = 9
0.798 ( 0.017 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80) = 9
0.819 ( 0.003 ms): perf/2944525 bpf(uattr: 0x7ffe80de7700, size: 80) = 9
0.824 ( 0.047 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de76c0, size: 148) = 10
0.878 ( 0.008 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80) = 9
0.891 ( 0.014 ms): perf/2944525 bpf(cmd: MAP_UPDATE_ELEM, uattr: 0x7ffe80de79e0, size: 32) = 0
0.910 ( 0.103 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7880, size: 148) = 9
1.016 ( 0.143 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7880, size: 148) = 10
3.777 ( 0.068 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7570, size: 148) = 12
3.848 ( 0.003 ms): perf/2944525 bpf(cmd: LINK_CREATE, uattr: 0x7ffe80de7550, size: 64) = -1 EBADF (Bad file descriptor)
3.859 ( 0.006 ms): perf/2944525 bpf(cmd: LINK_CREATE, uattr: 0x7ffe80de77c0, size: 64) = 12
6.504 ( 0.010 ms): perf/2944525 bpf(cmd: LINK_CREATE, uattr: 0x7ffe80de77c0, size: 64) = 14
^C# DURATION | COUNT | GRAPH |
0 - 1 us | 0 | |
1 - 2 us | 0 | |
2 - 4 us | 1 | |
4 - 8 us | 3 | |
8 - 16 us | 3 | |
16 - 32 us | 11 | |
32 - 64 us | 9 | |
64 - 128 us | 17 | |
128 - 256 us | 30 | # |
256 - 512 us | 20 | |
512 - 1024 us | 42 | # |
1 - 2 ms | 151 | ###### |
2 - 4 ms | 106 | #### |
4 - 8 ms | 18 | |
8 - 16 ms | 149 | ###### |
16 - 32 ms | 30 | # |
32 - 64 ms | 17 | |
64 - 128 ms | 360 | ############### |
128 - 256 ms | 52 | ## |
256 - 512 ms | 18 | |
512 - 1024 ms | 28 | # |
1 - ... s | 5 | |
root@x1:~#
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240902200515.2103769-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
155 lines
3.5 KiB
C
155 lines
3.5 KiB
C
#include <stdio.h>
|
|
#include <fcntl.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include "util/ftrace.h"
|
|
#include "util/cpumap.h"
|
|
#include "util/thread_map.h"
|
|
#include "util/debug.h"
|
|
#include "util/evlist.h"
|
|
#include "util/bpf_counter.h"
|
|
|
|
#include "util/bpf_skel/func_latency.skel.h"
|
|
|
|
static struct func_latency_bpf *skel;
|
|
|
|
int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
|
|
{
|
|
int fd, err;
|
|
int i, ncpus = 1, ntasks = 1;
|
|
struct filter_entry *func;
|
|
|
|
if (!list_is_singular(&ftrace->filters)) {
|
|
pr_err("ERROR: %s target function(s).\n",
|
|
list_empty(&ftrace->filters) ? "No" : "Too many");
|
|
return -1;
|
|
}
|
|
|
|
func = list_first_entry(&ftrace->filters, struct filter_entry, list);
|
|
|
|
skel = func_latency_bpf__open();
|
|
if (!skel) {
|
|
pr_err("Failed to open func latency skeleton\n");
|
|
return -1;
|
|
}
|
|
|
|
/* don't need to set cpu filter for system-wide mode */
|
|
if (ftrace->target.cpu_list) {
|
|
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
|
|
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
|
|
skel->rodata->has_cpu = 1;
|
|
}
|
|
|
|
if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
|
|
ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
|
|
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
|
|
skel->rodata->has_task = 1;
|
|
}
|
|
|
|
skel->rodata->use_nsec = ftrace->use_nsec;
|
|
|
|
set_max_rlimit();
|
|
|
|
err = func_latency_bpf__load(skel);
|
|
if (err) {
|
|
pr_err("Failed to load func latency skeleton\n");
|
|
goto out;
|
|
}
|
|
|
|
if (ftrace->target.cpu_list) {
|
|
u32 cpu;
|
|
u8 val = 1;
|
|
|
|
fd = bpf_map__fd(skel->maps.cpu_filter);
|
|
|
|
for (i = 0; i < ncpus; i++) {
|
|
cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu;
|
|
bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
|
|
u32 pid;
|
|
u8 val = 1;
|
|
|
|
fd = bpf_map__fd(skel->maps.task_filter);
|
|
|
|
for (i = 0; i < ntasks; i++) {
|
|
pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
|
|
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
|
|
false, func->name);
|
|
if (IS_ERR(skel->links.func_begin)) {
|
|
pr_err("Failed to attach fentry program\n");
|
|
err = PTR_ERR(skel->links.func_begin);
|
|
goto out;
|
|
}
|
|
|
|
skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
|
|
true, func->name);
|
|
if (IS_ERR(skel->links.func_end)) {
|
|
pr_err("Failed to attach fexit program\n");
|
|
err = PTR_ERR(skel->links.func_end);
|
|
goto out;
|
|
}
|
|
|
|
/* XXX: we don't actually use this fd - just for poll() */
|
|
return open("/dev/null", O_RDONLY);
|
|
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
skel->bss->enabled = 1;
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
skel->bss->enabled = 0;
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
|
|
int buckets[])
|
|
{
|
|
int i, fd, err;
|
|
u32 idx;
|
|
u64 *hist;
|
|
int ncpus = cpu__max_cpu().cpu;
|
|
|
|
fd = bpf_map__fd(skel->maps.latency);
|
|
|
|
hist = calloc(ncpus, sizeof(*hist));
|
|
if (hist == NULL)
|
|
return -ENOMEM;
|
|
|
|
for (idx = 0; idx < NUM_BUCKET; idx++) {
|
|
err = bpf_map_lookup_elem(fd, &idx, hist);
|
|
if (err) {
|
|
buckets[idx] = 0;
|
|
continue;
|
|
}
|
|
|
|
for (i = 0; i < ncpus; i++)
|
|
buckets[idx] += hist[i];
|
|
}
|
|
|
|
free(hist);
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
func_latency_bpf__destroy(skel);
|
|
return 0;
|
|
}
|