diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index eaec8253be68..e8cc8208e29f 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -148,6 +148,9 @@ OPTIONS for 'perf ftrace latency' --use-nsec:: Use nano-second instead of micro-second as a base unit of the histogram. +--bucket-range=:: + Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode. + OPTIONS for 'perf ftrace profile' --------------------------------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 88b9f0597b92..e047e5dcda26 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -777,9 +777,17 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[], if (ftrace->use_nsec) num *= 1000; - i = log2(num); - if (i < 0) + if (!ftrace->bucket_range) { + i = log2(num); + if (i < 0) + i = 0; + } else { + // Less than 1 unit (ms or ns), or, in the future, + // than the min latency desired. i = 0; + if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ] + i = num / ftrace->bucket_range + 1; + } if (i >= NUM_BUCKET) i = NUM_BUCKET - 1; @@ -815,28 +823,58 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[]) " DURATION ", "COUNT", bar_total, "GRAPH"); bar_len = buckets[0] * bar_total / total; - printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", + + printf(" %4d - %4d %s | %10d | %.*s%*s |\n", 0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); for (i = 1; i < NUM_BUCKET - 1; i++) { - int start = (1 << (i - 1)); - int stop = 1 << i; + int start, stop; const char *unit = use_nsec ? "ns" : "us"; - if (start >= 1024) { - start >>= 10; - stop >>= 10; - unit = use_nsec ? "us" : "ms"; + if (!ftrace->bucket_range) { + start = (1 << (i - 1)); + stop = 1 << i; + + if (start >= 1024) { + start >>= 10; + stop >>= 10; + unit = use_nsec ? "us" : "ms"; + } + } else { + start = (i - 1) * ftrace->bucket_range + 1; + stop = i * ftrace->bucket_range + 1; + + if (start >= 1000) { + double dstart = start / 1000.0, + dstop = stop / 1000.0; + printf(" %4.2f - %-4.2f", dstart, dstop); + unit = use_nsec ? "us" : "ms"; + goto print_bucket_info; + } } + + printf(" %4d - %4d", start, stop); +print_bucket_info: bar_len = buckets[i] * bar_total / total; - printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", - start, stop, unit, buckets[i], bar_len, bar, + printf(" %s | %10d | %.*s%*s |\n", unit, buckets[i], bar_len, bar, bar_total - bar_len, ""); } bar_len = buckets[NUM_BUCKET - 1] * bar_total / total; - printf(" %4d - %-4s %s | %10d | %.*s%*s |\n", - 1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1], + if (!ftrace->bucket_range) { + printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s "); + } else { + int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range; + + if (upper_outlier >= 1000) { + double dstart = upper_outlier / 1000.0; + + printf(" %4.2f - %-4s %s", dstart, "...", use_nsec ? "us" : "ms"); + } else { + printf(" %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us"); + } + } + printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1], bar_len, bar, bar_total - bar_len, ""); } @@ -1558,6 +1596,8 @@ int cmd_ftrace(int argc, const char **argv) #endif OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec, "Use nano-second histogram"), + OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range, + "Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"), OPT_PARENT(common_options), }; const struct option profile_options[] = { diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index 06d1c4018407..b3cb68295e56 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -36,6 +36,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) return -1; } + skel->rodata->bucket_range = ftrace->bucket_range; + /* don't need to set cpu filter for system-wide mode */ if (ftrace->target.cpu_list) { ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus); diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index f613dc9cb123..00a340ca1543 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -41,6 +41,7 @@ int enabled = 0; const volatile int has_cpu = 0; const volatile int has_task = 0; const volatile int use_nsec = 0; +const volatile unsigned int bucket_range; SEC("kprobe/func") int BPF_PROG(func_begin) @@ -100,12 +101,25 @@ int BPF_PROG(func_end) if (delta < 0) return 0; + if (bucket_range != 0) { + delta /= cmp_base; + // Less than 1 unit (ms or ns), or, in the future, + // than the min latency desired. + key = 0; + if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units ) + key = delta / bucket_range + 1; + if (key >= NUM_BUCKET) + key = NUM_BUCKET - 1; + } + goto do_lookup; + } // calculate index using delta for (key = 0; key < (NUM_BUCKET - 1); key++) { if (delta < (cmp_base << key)) break; } +do_lookup: hist = bpf_map_lookup_elem(&latency, &key); if (!hist) return 0; diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index bae649ef50e8..6ac136484349 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -20,6 +20,7 @@ struct perf_ftrace { unsigned long percpu_buffer_size; bool inherit; bool use_nsec; + unsigned int bucket_range; int graph_depth; int func_stack_trace; int func_irq_info;