Line data Source code
1 : /*
2 : * Copyright (c) 2021 Intel and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 :
16 : #include <vnet/vnet.h>
17 : #include <vppinfra/math.h>
18 : #include <perfmon/perfmon.h>
19 : #include <perfmon/intel/core.h>
20 :
21 : #define GET_METRIC(m, i) (((m) >> (i * 8)) & 0xff)
22 : #define GET_RATIO(m, i) (((m) >> (i * 32)) & 0xffffffff)
23 : #define RDPMC_SLOTS (1 << 30) /* fixed slots */
24 : #define RDPMC_METRICS (1 << 29) /* l1 & l2 metric counters */
25 :
26 : #define FIXED_COUNTER_SLOTS 3
27 : #define METRIC_COUNTER_TOPDOWN_L1_L2 0
28 :
29 : typedef enum
30 : {
31 : TOPDOWN_E_RETIRING = 0,
32 : TOPDOWN_E_BAD_SPEC,
33 : TOPDOWN_E_FE_BOUND,
34 : TOPDOWN_E_BE_BOUND,
35 : TOPDOWN_E_HEAVYOPS,
36 : TOPDOWN_E_LIGHTOPS,
37 : TOPDOWN_E_BMISPRED,
38 : TOPDOWN_E_MCHCLEAR,
39 : TOPDOWN_E_FETCHLAT,
40 : TOPDOWN_E_FETCH_BW,
41 : TOPDOWN_E_MEMBOUND,
42 : TOPDOWN_E_CORBOUND,
43 : TOPDOWN_E_MAX,
44 : } topdown_e_t;
45 :
46 : enum
47 : {
48 : TOPDOWN_E_RDPMC_SLOTS = 0,
49 : TOPDOWN_E_RDPMC_METRICS,
50 : };
51 :
52 : typedef f64 (topdown_lvl1_parse_fn_t) (void *, topdown_e_t);
53 :
54 : /* Parse thread level states from perfmon_reading */
55 : static_always_inline f64
56 0 : topdown_lvl1_perf_reading (void *ps, topdown_e_t e)
57 : {
58 0 : perfmon_reading_t *ss = (perfmon_reading_t *) ps;
59 :
60 : /* slots are at value[0], everthing else follows at +1 */
61 0 : return ((f64) ss->value[e + 1] / ss->value[0]) * 100;
62 : }
63 :
64 : static_always_inline f64
65 0 : topdown_lvl1_rdpmc_metric (void *ps, topdown_e_t e)
66 : {
67 0 : perfmon_node_stats_t *ss = (perfmon_node_stats_t *) ps;
68 0 : f64 slots_t0 =
69 0 : ss->t[0].value[TOPDOWN_E_RDPMC_SLOTS] *
70 0 : ((f64) GET_METRIC (ss->t[0].value[TOPDOWN_E_RDPMC_METRICS], e) / 0xff);
71 0 : f64 slots_t1 =
72 0 : ss->t[1].value[TOPDOWN_E_RDPMC_SLOTS] *
73 0 : ((f64) GET_METRIC (ss->t[1].value[TOPDOWN_E_RDPMC_METRICS], e) / 0xff);
74 0 : u64 slots_delta = ss->t[1].value[TOPDOWN_E_RDPMC_SLOTS] -
75 0 : ss->t[0].value[TOPDOWN_E_RDPMC_SLOTS];
76 :
77 0 : slots_t1 = slots_t1 - slots_t0;
78 :
79 0 : return (slots_t1 / slots_delta) * 100;
80 : }
81 :
82 : /* Convert the TopDown enum to the perf reading index */
83 : #define TO_LVL2_PERF_IDX(e) \
84 : ({ \
85 : u8 to_idx[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 5, 5, 6, 6, 7, 7, 8, 8 }; \
86 : to_idx[e]; \
87 : })
88 :
89 : /* Parse thread level stats from perfmon_reading */
90 : static_always_inline f64
91 0 : topdown_lvl2_perf_reading (void *ps, topdown_e_t e)
92 : {
93 0 : perfmon_reading_t *ss = (perfmon_reading_t *) ps;
94 0 : u64 value = ss->value[TO_LVL2_PERF_IDX (e)];
95 :
96 : /* If it is an L1 metric, call L1 format */
97 0 : if (TOPDOWN_E_BE_BOUND >= e)
98 : {
99 0 : return topdown_lvl1_perf_reading (ps, e);
100 : }
101 :
102 : /* all the odd metrics, are inferred from even and L1 metrics */
103 0 : if (e & 0x1)
104 : {
105 0 : topdown_e_t e1 = TO_LVL2_PERF_IDX (e) - 4;
106 0 : value = ss->value[e1] - value;
107 : }
108 :
109 0 : return (f64) value / ss->value[0] * 100;
110 : }
111 :
112 : /* Convert the TopDown enum to the rdpmc metric byte position */
113 : #define TO_LVL2_METRIC_BYTE(e) \
114 : ({ \
115 : u8 to_metric[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 4, 4, 5, 5, 6, 6, 7, 7 }; \
116 : to_metric[e]; \
117 : })
118 :
119 : /* Convert the TopDown L2 enum to the reference TopDown L1 enum */
120 : #define TO_LVL1_REF(e) \
121 : ({ \
122 : u8 to_lvl1[TOPDOWN_E_MAX] = { -1, \
123 : -1, \
124 : -1, \
125 : -1, \
126 : TOPDOWN_E_RETIRING, \
127 : TOPDOWN_E_RETIRING, \
128 : TOPDOWN_E_BAD_SPEC, \
129 : TOPDOWN_E_BAD_SPEC, \
130 : TOPDOWN_E_FE_BOUND, \
131 : TOPDOWN_E_FE_BOUND, \
132 : TOPDOWN_E_BE_BOUND, \
133 : TOPDOWN_E_BE_BOUND }; \
134 : to_lvl1[e]; \
135 : })
136 :
137 : static_always_inline f64
138 0 : topdown_lvl2_rdpmc_metric (void *ps, topdown_e_t e)
139 : {
140 0 : f64 r, l1_value = 0;
141 :
142 : /* If it is an L1 metric, call L1 format */
143 0 : if (TOPDOWN_E_BE_BOUND >= e)
144 : {
145 0 : return topdown_lvl1_rdpmc_metric (ps, e);
146 : }
147 :
148 : /* all the odd metrics, are inferred from even and L1 metrics */
149 0 : if (e & 0x1)
150 : {
151 : /* get the L1 reference metric */
152 0 : l1_value = topdown_lvl1_rdpmc_metric (ps, TO_LVL1_REF (e));
153 : }
154 :
155 : /* calculate the l2 metric */
156 : r =
157 0 : fabs (l1_value - topdown_lvl1_rdpmc_metric (ps, TO_LVL2_METRIC_BYTE (e)));
158 0 : return r;
159 : }
160 :
161 : static u8 *
162 0 : format_topdown_lvl2 (u8 *s, va_list *args)
163 : {
164 0 : void *ps = va_arg (*args, void *);
165 0 : u64 idx = va_arg (*args, int);
166 0 : perfmon_bundle_type_t type = va_arg (*args, perfmon_bundle_type_t);
167 0 : f64 sv = 0;
168 :
169 : topdown_lvl1_parse_fn_t *parse_fn,
170 0 : *parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl2_rdpmc_metric,
171 : topdown_lvl2_perf_reading, 0 };
172 :
173 0 : parse_fn = parse_fns[type];
174 0 : ASSERT (parse_fn);
175 :
176 0 : sv = parse_fn (ps, (topdown_e_t) idx);
177 0 : s = format (s, "%f", sv);
178 :
179 0 : return s;
180 : }
181 :
182 : static perfmon_cpu_supports_t topdown_lvl2_cpu_supports[] = {
183 : /* Intel SPR supports papi/thread or rdpmc/node */
184 : { clib_cpu_supports_avx512_fp16, PERFMON_BUNDLE_TYPE_NODE_OR_THREAD }
185 : };
186 :
187 559 : PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric) = {
188 : .name = "topdown",
189 : .description = "Top-down Microarchitecture Analysis Level 1 & 2",
190 : .source = "intel-core",
191 : .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
192 : .events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
193 : .events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
194 : .events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
195 : .events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
196 : .events[5] = INTEL_CORE_E_TOPDOWN_L2_HEAVYOPS_METRIC,
197 : .events[6] = INTEL_CORE_E_TOPDOWN_L2_BMISPRED_METRIC,
198 : .events[7] = INTEL_CORE_E_TOPDOWN_L2_FETCHLAT_METRIC,
199 : .events[8] = INTEL_CORE_E_TOPDOWN_L2_MEMBOUND_METRIC,
200 : .n_events = 9,
201 : .preserve_samples = 0x1FF,
202 : .cpu_supports = topdown_lvl2_cpu_supports,
203 : .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports),
204 : .format_fn = format_topdown_lvl2,
205 : .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% RT.HO",
206 : "% RT.LO", "% BS.BM", "% BS.MC",
207 : "% FE.FL", "% FE.FB", "% BE.MB",
208 : "% BE.CB"),
209 : .footer = "Retiring (RT), Bad Speculation (BS),\n"
210 : " FrontEnd bound (1FE), BackEnd bound (BE),\n"
211 : " Light Operations (LO), Heavy Operations (HO),\n"
212 : " Branch Misprediction (BM), Machine Clears (MC),\n"
213 : " Fetch Latency (FL), Fetch Bandwidth (FB),\n"
214 : " Memory Bound (MB), Core Bound (CB)",
215 : };
|