Line data Source code
1 : /*
2 : * Copyright (c) 2022 Intel and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 : #include <perfmon/perfmon.h>
16 : #include <perfmon/intel/core.h>
17 :
18 : static int
19 559 : is_icelake ()
20 : {
21 559 : return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b ();
22 : }
23 :
24 : static perfmon_cpu_supports_t topdown_lvl2_cpu_supports_icx[] = {
25 : { is_icelake, PERFMON_BUNDLE_TYPE_THREAD }
26 : };
27 :
28 : #define GET_METRIC(m, i) (f64) (((m) >> (i * 8)) & 0xff)
29 :
30 : enum
31 : {
32 : TD_SLOTS = 0,
33 : STALLS_MEM_ANY,
34 : STALLS_TOTAL,
35 : BOUND_ON_STORES,
36 : RECOVERY_CYCLES,
37 : UOP_DROPPING,
38 : UOP_NOT_DELIVERED,
39 : TD_RETIRING,
40 : TD_BAD_SPEC,
41 : TD_FE_BOUND,
42 : TD_BE_BOUND,
43 : };
44 :
45 : static_always_inline f64
46 0 : memory_bound_fraction (perfmon_reading_t *ss)
47 : {
48 0 : return (ss->value[STALLS_MEM_ANY] + ss->value[BOUND_ON_STORES]) /
49 0 : (f64) (ss->value[STALLS_TOTAL] + ss->value[BOUND_ON_STORES]);
50 : }
51 :
52 : static_always_inline f64
53 0 : perf_metrics_sum (perfmon_reading_t *ss)
54 : {
55 0 : return ss->value[TD_RETIRING] + ss->value[TD_BAD_SPEC] +
56 0 : ss->value[TD_FE_BOUND] + ss->value[TD_BE_BOUND];
57 : }
58 :
59 : static_always_inline f64
60 0 : retiring (perfmon_reading_t *ss)
61 : {
62 0 : return ss->value[TD_RETIRING] / perf_metrics_sum (ss);
63 : }
64 :
65 : static_always_inline f64
66 0 : bad_speculation (perfmon_reading_t *ss)
67 : {
68 0 : return ss->value[TD_BAD_SPEC] / perf_metrics_sum (ss);
69 : }
70 :
71 : static_always_inline f64
72 0 : frontend_bound (perfmon_reading_t *ss)
73 : {
74 0 : return (ss->value[TD_FE_BOUND] / perf_metrics_sum (ss)) -
75 0 : (ss->value[UOP_DROPPING] / perf_metrics_sum (ss));
76 : }
77 :
78 : static_always_inline f64
79 0 : backend_bound (perfmon_reading_t *ss)
80 : {
81 0 : return (ss->value[TD_BE_BOUND] / perf_metrics_sum (ss)) +
82 0 : ((5 * ss->value[RECOVERY_CYCLES]) / perf_metrics_sum (ss));
83 : }
84 :
85 : static_always_inline f64
86 0 : fetch_latency (perfmon_reading_t *ss)
87 : {
88 0 : f64 r = ((5 * ss->value[UOP_NOT_DELIVERED] - ss->value[UOP_DROPPING]) /
89 0 : (f64) ss->value[TD_SLOTS]);
90 0 : return r;
91 : }
92 :
93 : static_always_inline f64
94 0 : fetch_bandwidth (perfmon_reading_t *ss)
95 : {
96 0 : return clib_max (0, frontend_bound (ss) - fetch_latency (ss));
97 : }
98 :
99 : static_always_inline f64
100 0 : memory_bound (perfmon_reading_t *ss)
101 : {
102 0 : return backend_bound (ss) * memory_bound_fraction (ss);
103 : }
104 :
105 : static_always_inline f64
106 0 : core_bound (perfmon_reading_t *ss)
107 : {
108 0 : return backend_bound (ss) - memory_bound (ss);
109 : }
110 :
111 : static u8 *
112 0 : format_topdown_lvl2_icx (u8 *s, va_list *args)
113 : {
114 0 : perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *);
115 0 : u64 idx = va_arg (*args, int);
116 0 : f64 sv = 0;
117 :
118 0 : switch (idx)
119 : {
120 0 : case 0:
121 0 : sv = retiring (ss);
122 0 : break;
123 0 : case 1:
124 0 : sv = bad_speculation (ss);
125 0 : break;
126 0 : case 2:
127 0 : sv = frontend_bound (ss);
128 0 : break;
129 0 : case 3:
130 0 : sv = backend_bound (ss);
131 0 : break;
132 0 : case 4:
133 0 : sv = fetch_latency (ss);
134 0 : break;
135 0 : case 5:
136 0 : sv = fetch_bandwidth (ss);
137 0 : break;
138 0 : case 6:
139 0 : sv = memory_bound (ss);
140 0 : break;
141 0 : case 7:
142 0 : sv = core_bound (ss);
143 0 : break;
144 : }
145 :
146 0 : s = format (s, "%f", sv * 100);
147 :
148 0 : return s;
149 : }
150 :
151 559 : PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric_icx) = {
152 : .name = "topdown",
153 : .description = "Top-down Microarchitecture Analysis Level 1 & 2",
154 : .source = "intel-core",
155 : .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
156 : .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY,
157 : .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL,
158 : .events[3] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES,
159 : .events[4] = INTEL_CORE_E_INT_MISC_RECOVERY_CYCLES,
160 : .events[5] = INTEL_CORE_E_INT_MISC_UOP_DROPPING,
161 : .events[6] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CORE,
162 : .events[7] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
163 : .events[8] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
164 : .events[9] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
165 : .events[10] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
166 : .n_events = 11,
167 : .cpu_supports = topdown_lvl2_cpu_supports_icx,
168 : .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports_icx),
169 : .format_fn = format_topdown_lvl2_icx,
170 : .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% FE.FL",
171 : "% FE.FB", "% BE.MB", "% BE.CB"),
172 : .footer = "Retiring (RT), Bad Speculation (BS),\n"
173 : " FrontEnd bound (FE), BackEnd bound (BE),\n"
174 : " Fetch Latency (FL), Fetch Bandwidth (FB),\n"
175 : " Memory Bound (MB), Core Bound (CB)",
176 : };
|