mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2026-01-26 23:30:15 +00:00
Calculate the runtime of the softirq events and subtract it from
the corresponding task runtime to improve the precision.
Example usage:
# perf kwork -k sched,irq,softirq record -- perf record -e cpu-clock -o perf_record.data -a sleep 10
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.467 MB perf_record.data (7154 samples) ]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 2.152 MB perf.data (22846 samples) ]
# perf kwork top
Total : 136601.588 ms, 8 cpus
%Cpu(s): 95.66% id, 0.04% hi, 0.05% si
%Cpu0 [ 0.02%]
%Cpu1 [ 0.01%]
%Cpu2 [| 4.61%]
%Cpu3 [ 0.04%]
%Cpu4 [ 0.01%]
%Cpu5 [||||| 17.31%]
%Cpu6 [ 0.51%]
%Cpu7 [||| 11.42%]
PID %CPU RUNTIME COMMMAND
----------------------------------------------------
0 99.98 17073.515 ms swapper/4
0 99.98 17072.173 ms swapper/1
0 99.93 17064.229 ms swapper/3
0 99.62 17011.013 ms swapper/0
0 99.47 16985.180 ms swapper/6
0 95.17 16250.874 ms swapper/2
0 88.51 15111.684 ms swapper/7
0 82.62 14108.577 ms swapper/5
4342 33.00 5644.045 ms perf
4344 0.43 74.351 ms perf
16 0.13 22.296 ms rcu_preempt
4345 0.05 10.093 ms perf
4343 0.05 8.769 ms perf
4341 0.02 4.882 ms perf
4095 0.02 4.605 ms kworker/7:1
75 0.02 4.261 ms kworker/2:1
120 0.01 1.909 ms systemd-journal
98 0.01 2.540 ms jbd2/sda-8
61 0.01 3.404 ms kcompactd0
667 0.01 2.542 ms kworker/u16:2
4340 0.00 1.052 ms kworker/7:2
97 0.00 0.489 ms kworker/7:1H
51 0.00 0.209 ms ksoftirqd/7
50 0.00 0.646 ms migration/7
76 0.00 0.753 ms kworker/6:1
45 0.00 0.572 ms migration/6
87 0.00 0.145 ms kworker/5:1H
73 0.00 0.596 ms kworker/5:1
41 0.00 0.041 ms ksoftirqd/5
40 0.00 0.718 ms migration/5
64 0.00 0.115 ms kworker/4:1
35 0.00 0.556 ms migration/4
353 0.00 2.600 ms sshd
74 0.00 0.205 ms kworker/3:1
33 0.00 1.576 ms kworker/3:0H
30 0.00 0.996 ms migration/3
26 0.00 1.665 ms ksoftirqd/2
25 0.00 0.662 ms migration/2
397 0.00 0.057 ms kworker/1:1
20 0.00 1.005 ms migration/1
2909 0.00 1.053 ms kworker/0:2
17 0.00 0.720 ms migration/0
15 0.00 0.039 ms ksoftirqd/0
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Link: https://lore.kernel.org/r/20230812084917.169338-13-yangjihong1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
289 lines
8.1 KiB
C
289 lines
8.1 KiB
C
#ifndef PERF_UTIL_KWORK_H
|
|
#define PERF_UTIL_KWORK_H
|
|
|
|
#include "util/tool.h"
|
|
#include "util/time-utils.h"
|
|
|
|
#include <linux/bitmap.h>
|
|
#include <linux/list.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/types.h>
|
|
|
|
struct perf_sample;
|
|
struct perf_session;
|
|
|
|
enum kwork_class_type {
|
|
KWORK_CLASS_IRQ,
|
|
KWORK_CLASS_SOFTIRQ,
|
|
KWORK_CLASS_WORKQUEUE,
|
|
KWORK_CLASS_SCHED,
|
|
KWORK_CLASS_MAX,
|
|
};
|
|
|
|
enum kwork_report_type {
|
|
KWORK_REPORT_RUNTIME,
|
|
KWORK_REPORT_LATENCY,
|
|
KWORK_REPORT_TIMEHIST,
|
|
KWORK_REPORT_TOP,
|
|
};
|
|
|
|
enum kwork_trace_type {
|
|
KWORK_TRACE_RAISE,
|
|
KWORK_TRACE_ENTRY,
|
|
KWORK_TRACE_EXIT,
|
|
KWORK_TRACE_MAX,
|
|
};
|
|
|
|
/*
|
|
* data structure:
|
|
*
|
|
* +==================+ +============+ +======================+
|
|
* | class | | work | | atom |
|
|
* +==================+ +============+ +======================+
|
|
* +------------+ | +-----+ | | +------+ | | +-------+ +-----+ |
|
|
* | perf_kwork | +-> | irq | --------|+-> | eth0 | --+-> | raise | - | ... | --+ +-----------+
|
|
* +-----+------+ || +-----+ ||| +------+ ||| +-------+ +-----+ | | | |
|
|
* | || ||| ||| | +-> | atom_page |
|
|
* | || ||| ||| +-------+ +-----+ | | |
|
|
* | class_list ||| |+-> | entry | - | ... | ----> | |
|
|
* | || ||| ||| +-------+ +-----+ | | |
|
|
* | || ||| ||| | +-> | |
|
|
* | || ||| ||| +-------+ +-----+ | | | |
|
|
* | || ||| |+-> | exit | - | ... | --+ +-----+-----+
|
|
* | || ||| | | +-------+ +-----+ | |
|
|
* | || ||| | | | |
|
|
* | || ||| +-----+ | | | |
|
|
* | || |+-> | ... | | | | |
|
|
* | || | | +-----+ | | | |
|
|
* | || | | | | | |
|
|
* | || +---------+ | | +-----+ | | +-------+ +-----+ | |
|
|
* | +-> | softirq | -------> | RCU | ---+-> | raise | - | ... | --+ +-----+-----+
|
|
* | || +---------+ | | +-----+ ||| +-------+ +-----+ | | | |
|
|
* | || | | ||| | +-> | atom_page |
|
|
* | || | | ||| +-------+ +-----+ | | |
|
|
* | || | | |+-> | entry | - | ... | ----> | |
|
|
* | || | | ||| +-------+ +-----+ | | |
|
|
* | || | | ||| | +-> | |
|
|
* | || | | ||| +-------+ +-----+ | | | |
|
|
* | || | | |+-> | exit | - | ... | --+ +-----+-----+
|
|
* | || | | | | +-------+ +-----+ | |
|
|
* | || | | | | | |
|
|
* | || +-----------+ | | +-----+ | | | |
|
|
* | +-> | workqueue | -----> | ... | | | | |
|
|
* | | +-----------+ | | +-----+ | | | |
|
|
* | +==================+ +============+ +======================+ |
|
|
* | |
|
|
* +----> atom_page_list ---------------------------------------------------------+
|
|
*
|
|
*/
|
|
|
|
struct kwork_atom {
|
|
struct list_head list;
|
|
u64 time;
|
|
struct kwork_atom *prev;
|
|
|
|
void *page_addr;
|
|
unsigned long bit_inpage;
|
|
};
|
|
|
|
#define NR_ATOM_PER_PAGE 128
|
|
struct kwork_atom_page {
|
|
struct list_head list;
|
|
struct kwork_atom atoms[NR_ATOM_PER_PAGE];
|
|
DECLARE_BITMAP(bitmap, NR_ATOM_PER_PAGE);
|
|
};
|
|
|
|
struct perf_kwork;
|
|
struct kwork_class;
|
|
struct kwork_work {
|
|
/*
|
|
* class field
|
|
*/
|
|
struct rb_node node;
|
|
struct kwork_class *class;
|
|
|
|
/*
|
|
* work field
|
|
*/
|
|
u64 id;
|
|
int cpu;
|
|
char *name;
|
|
|
|
/*
|
|
* atom field
|
|
*/
|
|
u64 nr_atoms;
|
|
struct list_head atom_list[KWORK_TRACE_MAX];
|
|
|
|
/*
|
|
* runtime report
|
|
*/
|
|
u64 max_runtime;
|
|
u64 max_runtime_start;
|
|
u64 max_runtime_end;
|
|
u64 total_runtime;
|
|
|
|
/*
|
|
* latency report
|
|
*/
|
|
u64 max_latency;
|
|
u64 max_latency_start;
|
|
u64 max_latency_end;
|
|
u64 total_latency;
|
|
|
|
/*
|
|
* top report
|
|
*/
|
|
u32 cpu_usage;
|
|
};
|
|
|
|
struct kwork_class {
|
|
struct list_head list;
|
|
const char *name;
|
|
enum kwork_class_type type;
|
|
|
|
unsigned int nr_tracepoints;
|
|
const struct evsel_str_handler *tp_handlers;
|
|
|
|
struct rb_root_cached work_root;
|
|
|
|
int (*class_init)(struct kwork_class *class,
|
|
struct perf_session *session);
|
|
|
|
void (*work_init)(struct perf_kwork *kwork,
|
|
struct kwork_class *class,
|
|
struct kwork_work *work,
|
|
enum kwork_trace_type src_type,
|
|
struct evsel *evsel,
|
|
struct perf_sample *sample,
|
|
struct machine *machine);
|
|
|
|
void (*work_name)(struct kwork_work *work,
|
|
char *buf, int len);
|
|
};
|
|
|
|
struct trace_kwork_handler {
|
|
int (*raise_event)(struct perf_kwork *kwork,
|
|
struct kwork_class *class, struct evsel *evsel,
|
|
struct perf_sample *sample, struct machine *machine);
|
|
|
|
int (*entry_event)(struct perf_kwork *kwork,
|
|
struct kwork_class *class, struct evsel *evsel,
|
|
struct perf_sample *sample, struct machine *machine);
|
|
|
|
int (*exit_event)(struct perf_kwork *kwork,
|
|
struct kwork_class *class, struct evsel *evsel,
|
|
struct perf_sample *sample, struct machine *machine);
|
|
|
|
int (*sched_switch_event)(struct perf_kwork *kwork,
|
|
struct kwork_class *class, struct evsel *evsel,
|
|
struct perf_sample *sample, struct machine *machine);
|
|
};
|
|
|
|
struct __top_cpus_runtime {
|
|
u64 load;
|
|
u64 idle;
|
|
u64 irq;
|
|
u64 softirq;
|
|
u64 total;
|
|
};
|
|
|
|
struct kwork_top_stat {
|
|
DECLARE_BITMAP(all_cpus_bitmap, MAX_NR_CPUS);
|
|
struct __top_cpus_runtime *cpus_runtime;
|
|
};
|
|
|
|
struct perf_kwork {
|
|
/*
|
|
* metadata
|
|
*/
|
|
struct perf_tool tool;
|
|
struct list_head class_list;
|
|
struct list_head atom_page_list;
|
|
struct list_head sort_list, cmp_id;
|
|
struct rb_root_cached sorted_work_root;
|
|
const struct trace_kwork_handler *tp_handler;
|
|
|
|
/*
|
|
* profile filters
|
|
*/
|
|
const char *profile_name;
|
|
|
|
const char *cpu_list;
|
|
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
|
|
|
|
const char *time_str;
|
|
struct perf_time_interval ptime;
|
|
|
|
/*
|
|
* options for command
|
|
*/
|
|
bool force;
|
|
const char *event_list_str;
|
|
enum kwork_report_type report;
|
|
|
|
/*
|
|
* options for subcommand
|
|
*/
|
|
bool summary;
|
|
const char *sort_order;
|
|
bool show_callchain;
|
|
unsigned int max_stack;
|
|
bool use_bpf;
|
|
|
|
/*
|
|
* statistics
|
|
*/
|
|
u64 timestart;
|
|
u64 timeend;
|
|
|
|
unsigned long nr_events;
|
|
unsigned long nr_lost_chunks;
|
|
unsigned long nr_lost_events;
|
|
|
|
u64 all_runtime;
|
|
u64 all_count;
|
|
u64 nr_skipped_events[KWORK_TRACE_MAX + 1];
|
|
|
|
/*
|
|
* perf kwork top data
|
|
*/
|
|
struct kwork_top_stat top_stat;
|
|
};
|
|
|
|
struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
|
|
struct kwork_class *class,
|
|
struct kwork_work *key);
|
|
|
|
#ifdef HAVE_BPF_SKEL
|
|
|
|
int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork);
|
|
int perf_kwork__report_read_bpf(struct perf_kwork *kwork);
|
|
void perf_kwork__report_cleanup_bpf(void);
|
|
|
|
void perf_kwork__trace_start(void);
|
|
void perf_kwork__trace_finish(void);
|
|
|
|
#else /* !HAVE_BPF_SKEL */
|
|
|
|
static inline int
|
|
perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
static inline int
|
|
perf_kwork__report_read_bpf(struct perf_kwork *kwork __maybe_unused)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
static inline void perf_kwork__report_cleanup_bpf(void) {}
|
|
|
|
static inline void perf_kwork__trace_start(void) {}
|
|
static inline void perf_kwork__trace_finish(void) {}
|
|
|
|
#endif /* HAVE_BPF_SKEL */
|
|
|
|
#endif /* PERF_UTIL_KWORK_H */
|