mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-09-02 16:44:59 +00:00

Add metrics taken from Section 2.1.15.2 "Performance Measurement" in the Processor Programming Reference (PPR) for AMD Family 19h Model 11h Revision B1 processors. The recommended metrics are sourced from Table 27 "Guidance for Common Performance Statistics with Complex Event Selects". The pipeline utilization metrics are sourced from Table 28 "Guidance for Pipeline Utilization Analysis Statistics". These are new to Zen 4 processors and useful for finding performance bottlenecks by analyzing activity at different stages of the pipeline. Metric groups have been added for Level 1 and Level 2 analysis. Signed-off-by: Sandipan Das <sandipan.das@amd.com> Acked-by: Ian Rogers <irogers@google.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ananth Narayan <ananth.narayan@amd.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Jirka Hladky <jhladky@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Stephane Eranian <eranian@google.com> Link: https://lore.kernel.org/r/20221214082652.419965-4-sandipan.das@amd.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
99 lines
4.4 KiB
JSON
99 lines
4.4 KiB
JSON
[
|
|
{
|
|
"MetricName": "total_dispatch_slots",
|
|
"BriefDescription": "Total dispatch slots (upto 6 instructions can be dispatched in each cycle).",
|
|
"MetricExpr": "6 * ls_not_halted_cyc"
|
|
},
|
|
{
|
|
"MetricName": "frontend_bound",
|
|
"BriefDescription": "Fraction of dispatch slots that remained unused because the frontend did not supply enough instructions/ops.",
|
|
"MetricExpr": "d_ratio(de_no_dispatch_per_slot.no_ops_from_frontend, total_dispatch_slots)",
|
|
"MetricGroup": "PipelineL1",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "bad_speculation",
|
|
"BriefDescription": "Fraction of dispatched ops that did not retire.",
|
|
"MetricExpr": "d_ratio(de_src_op_disp.all - ex_ret_ops, total_dispatch_slots)",
|
|
"MetricGroup": "PipelineL1",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "backend_bound",
|
|
"BriefDescription": "Fraction of dispatch slots that remained unused because of backend stalls.",
|
|
"MetricExpr": "d_ratio(de_no_dispatch_per_slot.backend_stalls, total_dispatch_slots)",
|
|
"MetricGroup": "PipelineL1",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "smt_contention",
|
|
"BriefDescription": "Fraction of dispatch slots that remained unused because the other thread was selected.",
|
|
"MetricExpr": "d_ratio(de_no_dispatch_per_slot.smt_contention, total_dispatch_slots)",
|
|
"MetricGroup": "PipelineL1",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "retiring",
|
|
"BriefDescription": "Fraction of dispatch slots used by ops that retired.",
|
|
"MetricExpr": "d_ratio(ex_ret_ops, total_dispatch_slots)",
|
|
"MetricGroup": "PipelineL1",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "frontend_bound_latency",
|
|
"BriefDescription": "Fraction of dispatch slots that remained unused because of a latency bottleneck in the frontend (such as instruction cache or TLB misses).",
|
|
"MetricExpr": "d_ratio((6 * cpu@de_no_dispatch_per_slot.no_ops_from_frontend\\,cmask\\=0x6@), total_dispatch_slots)",
|
|
"MetricGroup": "PipelineL2;frontend_bound_group",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "frontend_bound_bandwidth",
|
|
"BriefDescription": "Fraction of dispatch slots that remained unused because of a bandwidth bottleneck in the frontend (such as decode or op cache fetch bandwidth).",
|
|
"MetricExpr": "d_ratio(de_no_dispatch_per_slot.no_ops_from_frontend - (6 * cpu@de_no_dispatch_per_slot.no_ops_from_frontend\\,cmask\\=0x6@), total_dispatch_slots)",
|
|
"MetricGroup": "PipelineL2;frontend_bound_group",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "bad_speculation_mispredicts",
|
|
"BriefDescription": "Fraction of dispatched ops that were flushed due to branch mispredicts.",
|
|
"MetricExpr": "d_ratio(bad_speculation * ex_ret_brn_misp, ex_ret_brn_misp + resyncs_or_nc_redirects)",
|
|
"MetricGroup": "PipelineL2;bad_speculation_group",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "bad_speculation_pipeline_restarts",
|
|
"BriefDescription": "Fraction of dispatched ops that were flushed due to pipeline restarts (resyncs).",
|
|
"MetricExpr": "d_ratio(bad_speculation * resyncs_or_nc_redirects, ex_ret_brn_misp + resyncs_or_nc_redirects)",
|
|
"MetricGroup": "PipelineL2;bad_speculation_group",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "backend_bound_memory",
|
|
"BriefDescription": "Fraction of dispatch slots that remained unused because of stalls due to the memory subsystem.",
|
|
"MetricExpr": "backend_bound * d_ratio(ex_no_retire.load_not_complete, ex_no_retire.not_complete)",
|
|
"MetricGroup": "PipelineL2;backend_bound_group",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "backend_bound_cpu",
|
|
"BriefDescription": "Fraction of dispatch slots that remained unused because of stalls not related to the memory subsystem.",
|
|
"MetricExpr": "backend_bound * (1 - d_ratio(ex_no_retire.load_not_complete, ex_no_retire.not_complete))",
|
|
"MetricGroup": "PipelineL2;backend_bound_group",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "retiring_fastpath",
|
|
"BriefDescription": "Fraction of dispatch slots used by fastpath ops that retired.",
|
|
"MetricExpr": "retiring * (1 - d_ratio(ex_ret_ucode_ops, ex_ret_ops))",
|
|
"MetricGroup": "PipelineL2;retiring_group",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"MetricName": "retiring_microcode",
|
|
"BriefDescription": "Fraction of dispatch slots used by microcode ops that retired.",
|
|
"MetricExpr": "retiring * d_ratio(ex_ret_ucode_ops, ex_ret_ops)",
|
|
"MetricGroup": "PipelineL2;retiring_group",
|
|
"ScaleUnit": "100%"
|
|
}
|
|
]
|