Line data Source code
1 : #ifndef _FA_NODE_H_
2 : #define _FA_NODE_H_
3 :
4 : #include <stddef.h>
5 : #include <vppinfra/bihash_16_8.h>
6 : #include <vppinfra/bihash_40_8.h>
7 :
8 : #include <plugins/acl/exported_types.h>
9 :
10 : // #define FA_NODE_VERBOSE_DEBUG 3
11 :
12 : #define TCP_FLAG_FIN 0x01
13 : #define TCP_FLAG_SYN 0x02
14 : #define TCP_FLAG_RST 0x04
15 : #define TCP_FLAG_PUSH 0x08
16 : #define TCP_FLAG_ACK 0x10
17 : #define TCP_FLAG_URG 0x20
18 : #define TCP_FLAG_ECE 0x40
19 : #define TCP_FLAG_CWR 0x80
20 : #define TCP_FLAGS_RSTFINACKSYN (TCP_FLAG_RST + TCP_FLAG_FIN + TCP_FLAG_SYN + TCP_FLAG_ACK)
21 : #define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK)
22 :
23 : #define ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
24 : #define ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE (1ULL<<30)
25 : #define ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES 500000
26 :
27 : typedef union {
28 : u64 as_u64;
29 : struct {
30 : u32 lc_index;
31 : u16 mask_type_index_lsb;
32 : u8 tcp_flags;
33 : u8 tcp_flags_valid:1;
34 : u8 l4_valid:1;
35 : u8 is_nonfirst_fragment:1;
36 : u8 is_ip6:1;
37 : u8 flags_reserved:4;
38 : };
39 : } fa_packet_info_t;
40 :
41 : typedef enum {
42 : FA_SK_L4_FLAG_IS_INPUT = (1 << 0),
43 : FA_SK_L4_FLAG_IS_SLOWPATH = (1 << 1),
44 : } fa_session_l4_key_l4_flags_t;
45 :
46 : typedef union {
47 : u64 as_u64;
48 : struct {
49 : u16 port[2];
50 : union {
51 : struct {
52 : u8 proto;
53 : u8 l4_flags;
54 : u16 lsb_of_sw_if_index;
55 : };
56 : u32 non_port_l4_data;
57 : };
58 : };
59 : } fa_session_l4_key_t;
60 :
61 :
62 : static_always_inline
63 168 : int is_session_l4_key_u64_slowpath(u64 l4key) {
64 168 : fa_session_l4_key_t k = { .as_u64 = l4key };
65 168 : return (k.l4_flags & FA_SK_L4_FLAG_IS_SLOWPATH) ? 1 : 0;
66 : }
67 :
68 : typedef union {
69 : struct {
70 : union {
71 : struct {
72 : /* we put the IPv4 addresses
73 : after padding so we can still
74 : use them as (shorter) key together with
75 : L4 info */
76 : u32 l3_zero_pad[6];
77 : ip4_address_t ip4_addr[2];
78 : };
79 : ip6_address_t ip6_addr[2];
80 : };
81 : fa_session_l4_key_t l4;
82 : /* This field should align with u64 value in bihash_40_8 and bihash_16_8 keyvalue struct */
83 : fa_packet_info_t pkt;
84 : };
85 : clib_bihash_kv_40_8_t kv_40_8;
86 : struct {
87 : u64 padding_for_kv_16_8[3];
88 : clib_bihash_kv_16_8_t kv_16_8;
89 : };
90 : } fa_5tuple_t;
91 :
92 : static_always_inline u8 *
93 8179 : format_fa_session_l4_key(u8 * s, va_list * args)
94 : {
95 8179 : fa_session_l4_key_t *l4 = va_arg (*args, fa_session_l4_key_t *);
96 8179 : int is_input = (l4->l4_flags & FA_SK_L4_FLAG_IS_INPUT) ? 1 : 0;
97 8179 : int is_slowpath = (l4->l4_flags & FA_SK_L4_FLAG_IS_SLOWPATH) ? 1 : 0;
98 :
99 16358 : return (format (s, "l4 lsb_of_sw_if_index %d proto %d l4_is_input %d l4_slow_path %d l4_flags 0x%02x port %d -> %d",
100 8179 : l4->lsb_of_sw_if_index,
101 8179 : l4->proto, is_input, is_slowpath,
102 8179 : l4->l4_flags, l4->port[0], l4->port[1]));
103 : }
104 :
105 : typedef struct {
106 : fa_5tuple_t info; /* (5+1)*8 = 48 bytes */
107 : u64 last_active_time; /* +8 bytes = 56 */
108 : u32 sw_if_index; /* +4 bytes = 60 */
109 : union {
110 : u8 as_u8[2];
111 : u16 as_u16;
112 : } tcp_flags_seen; ; /* +2 bytes = 62 */
113 : u16 thread_index; /* +2 bytes = 64 */
114 : u64 link_enqueue_time; /* 8 byte = 8 */
115 : u32 link_prev_idx; /* +4 bytes = 12 */
116 : u32 link_next_idx; /* +4 bytes = 16 */
117 : u8 link_list_id; /* +1 bytes = 17 */
118 : u8 deleted; /* +1 bytes = 18 */
119 : u8 is_ip6; /* +1 bytes = 19 */
120 : u8 reserved1[5]; /* +5 bytes = 24 */
121 : u64 reserved2[5]; /* +5*8 bytes = 64 */
122 : } fa_session_t;
123 :
124 : #define FA_POLICY_EPOCH_MASK 0x7fff
125 : /* input policy epochs have the MSB set */
126 : #define FA_POLICY_EPOCH_IS_INPUT 0x8000
127 :
128 :
129 : /* This structure is used to fill in the u64 value
130 : in the per-sw-if-index hash table */
131 : typedef struct {
132 : union {
133 : u64 as_u64;
134 : struct {
135 : u32 session_index;
136 : u16 thread_index;
137 : u16 intf_policy_epoch;
138 : };
139 : };
140 : } fa_full_session_id_t;
141 :
142 : /*
143 : * A few compile-time constraints on the size and the layout of the union, to ensure
144 : * it makes sense both for bihash and for us.
145 : */
146 :
147 : #define CT_ASSERT_EQUAL(name, x,y) typedef int assert_ ## name ## _compile_time_assertion_failed[((x) == (y))-1]
148 : CT_ASSERT_EQUAL(fa_l3_key_size_is_40, offsetof(fa_5tuple_t, pkt), offsetof(clib_bihash_kv_40_8_t, value));
149 : CT_ASSERT_EQUAL(fa_ip6_kv_val_at_pkt, offsetof(fa_5tuple_t, pkt), offsetof(fa_5tuple_t, kv_40_8.value));
150 : CT_ASSERT_EQUAL(fa_ip4_kv_val_at_pkt, offsetof(fa_5tuple_t, pkt), offsetof(fa_5tuple_t, kv_16_8.value));
151 : CT_ASSERT_EQUAL(fa_l4_key_t_is_8, sizeof(fa_session_l4_key_t), sizeof(u64));
152 : CT_ASSERT_EQUAL(fa_packet_info_t_is_8, sizeof(fa_packet_info_t), sizeof(u64));
153 : CT_ASSERT_EQUAL(fa_l3_kv_size_is_48, sizeof(fa_5tuple_t), sizeof(clib_bihash_kv_40_8_t));
154 : CT_ASSERT_EQUAL(fa_ip4_starts_at_kv16_key, offsetof(fa_5tuple_t, ip4_addr), offsetof(fa_5tuple_t, kv_16_8));
155 : CT_ASSERT_EQUAL(fa_ip4_and_ip6_kv_value_match, offsetof(fa_5tuple_t, kv_16_8.value), offsetof(fa_5tuple_t, kv_40_8.value));
156 :
157 : /* Let's try to fit within two cachelines */
158 : CT_ASSERT_EQUAL(fa_session_t_size_is_128, sizeof(fa_session_t), 128);
159 :
160 : /* Session ID MUST be the same as u64 */
161 : CT_ASSERT_EQUAL(fa_full_session_id_size_is_64, sizeof(fa_full_session_id_t), sizeof(u64));
162 :
163 : CT_ASSERT_EQUAL(fa_5tuple_opaque_t_must_match_5tuple, sizeof(fa_5tuple_opaque_t), sizeof(fa_5tuple_t));
164 : #undef CT_ASSERT_EQUAL
165 :
166 : #define FA_SESSION_BOGUS_INDEX ~0
167 :
168 : typedef struct {
169 : /* The pool of sessions managed by this worker */
170 : fa_session_t *fa_sessions_pool;
171 : /* incoming session change requests from other workers */
172 : clib_spinlock_t pending_session_change_request_lock;
173 : u64 *pending_session_change_requests;
174 : u64 *wip_session_change_requests;
175 : u64 rcvd_session_change_requests;
176 : u64 sent_session_change_requests;
177 : /* per-worker ACL_N_TIMEOUTS of conn lists */
178 : u32 *fa_conn_list_head;
179 : u32 *fa_conn_list_tail;
180 : /* expiry time set whenever an element is enqueued */
181 : u64 *fa_conn_list_head_expiry_time;
182 : /* adds and deletes per-worker-per-interface */
183 : u64 *fa_session_dels_by_sw_if_index;
184 : u64 *fa_session_adds_by_sw_if_index;
185 : /* sessions deleted due to epoch change */
186 : u64 *fa_session_epoch_change_by_sw_if_index;
187 : /* Vector of expired connections retrieved from lists */
188 : u32 *expired;
189 : /* the earliest next expiry time */
190 : u64 next_expiry_time;
191 : /* if not zero, look at all the elements until their enqueue timestamp is after below one */
192 : u64 requeue_until_time;
193 : /* Current time between the checks */
194 : u64 current_time_wait_interval;
195 : /* Counter of how many sessions we did delete */
196 : u64 cnt_deleted_sessions;
197 : /* Counter of already deleted sessions being deleted - should not increment unless a bug */
198 : u64 cnt_already_deleted_sessions;
199 : /* Number of times we requeued a session to a head of the list */
200 : u64 cnt_session_timer_restarted;
201 : /* swipe up to this enqueue time, rather than following the timeouts */
202 : u64 swipe_end_time;
203 : /* bitmap of sw_if_index serviced by this worker */
204 : uword *serviced_sw_if_index_bitmap;
205 : /* bitmap of sw_if_indices to clear. set by main thread, cleared by worker */
206 : uword *pending_clear_sw_if_index_bitmap;
207 : /* atomic, indicates that the swipe-deletion of connections is in progress */
208 : u32 clear_in_process;
209 : /* Interrupt is pending from main thread */
210 : int interrupt_is_pending;
211 : /*
212 : * Interrupt node on the worker thread sets this if it knows there is
213 : * more work to do, but it has to finish to avoid hogging the
214 : * core for too long.
215 : */
216 : int interrupt_is_needed;
217 : /*
218 : * Set to indicate that the interrupt node wants to get less interrupts
219 : * because there is not enough work for the current rate.
220 : */
221 : int interrupt_is_unwanted;
222 : /*
223 : * Set to copy of a "generation" counter in main thread so we can sync the interrupts.
224 : */
225 : int interrupt_generation;
226 : /*
227 : * work in progress data for the pipelined node operation
228 : */
229 : vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
230 : u32 sw_if_indices[VLIB_FRAME_SIZE];
231 : fa_5tuple_t fa_5tuples[VLIB_FRAME_SIZE];
232 : u64 hashes[VLIB_FRAME_SIZE];
233 : u16 nexts[VLIB_FRAME_SIZE];
234 :
235 : } acl_fa_per_worker_data_t;
236 :
237 :
238 : typedef enum {
239 : ACL_FA_ERROR_DROP,
240 : ACL_FA_N_NEXT,
241 : } acl_fa_next_t;
242 :
243 :
244 : typedef enum
245 : {
246 : ACL_FA_CLEANER_RESCHEDULE = 1,
247 : ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
248 : } acl_fa_cleaner_process_event_e;
249 :
250 : void acl_fa_enable_disable(u32 sw_if_index, int is_input, int enable_disable);
251 :
252 : void show_fa_sessions_hash(vlib_main_t * vm, u32 verbose);
253 :
254 : u8 *format_acl_plugin_5tuple (u8 * s, va_list * args);
255 :
256 : /* use like: elog_acl_maybe_trace_X1(am, "foobar: %d", "i4", int32_value); */
257 :
258 : #define elog_acl_maybe_trace_X1(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1) \
259 : do { \
260 : if (am->trace_sessions) { \
261 : CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1)]; } *static_check); \
262 : u16 thread_index = os_get_thread_index (); \
263 : vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
264 : ELOG_TYPE_DECLARE (e) = \
265 : { \
266 : .format = "(%02d) " acl_elog_trace_format_label, \
267 : .format_args = "i2" acl_elog_trace_format_args, \
268 : }; \
269 : CLIB_PACKED(struct \
270 : { \
271 : u16 thread; \
272 : typeof(acl_elog_val1) val1; \
273 : }) *ed; \
274 : ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
275 : ed->thread = thread_index; \
276 : ed->val1 = acl_elog_val1; \
277 : } \
278 : } while (0)
279 :
280 :
281 : /* use like: elog_acl_maybe_trace_X2(am, "foobar: %d some u64: %lu", "i4i8", int32_value, int64_value); */
282 :
283 : #define elog_acl_maybe_trace_X2(am, acl_elog_trace_format_label, acl_elog_trace_format_args, \
284 : acl_elog_val1, acl_elog_val2) \
285 : do { \
286 : if (am->trace_sessions) { \
287 : CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)]; } *static_check); \
288 : u16 thread_index = os_get_thread_index (); \
289 : vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
290 : ELOG_TYPE_DECLARE (e) = \
291 : { \
292 : .format = "(%02d) " acl_elog_trace_format_label, \
293 : .format_args = "i2" acl_elog_trace_format_args, \
294 : }; \
295 : CLIB_PACKED(struct \
296 : { \
297 : u16 thread; \
298 : typeof(acl_elog_val1) val1; \
299 : typeof(acl_elog_val2) val2; \
300 : }) *ed; \
301 : ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
302 : ed->thread = thread_index; \
303 : ed->val1 = acl_elog_val1; \
304 : ed->val2 = acl_elog_val2; \
305 : } \
306 : } while (0)
307 :
308 :
309 : /* use like: elog_acl_maybe_trace_X3(am, "foobar: %d some u64 %lu baz: %d", "i4i8i4", int32_value, u64_value, int_value); */
310 :
311 : #define elog_acl_maybe_trace_X3(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \
312 : acl_elog_val2, acl_elog_val3) \
313 : do { \
314 : if (am->trace_sessions) { \
315 : CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \
316 : - sizeof(acl_elog_val3)]; } *static_check); \
317 : u16 thread_index = os_get_thread_index (); \
318 : vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
319 : ELOG_TYPE_DECLARE (e) = \
320 : { \
321 : .format = "(%02d) " acl_elog_trace_format_label, \
322 : .format_args = "i2" acl_elog_trace_format_args, \
323 : }; \
324 : CLIB_PACKED(struct \
325 : { \
326 : u16 thread; \
327 : typeof(acl_elog_val1) val1; \
328 : typeof(acl_elog_val2) val2; \
329 : typeof(acl_elog_val3) val3; \
330 : }) *ed; \
331 : ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
332 : ed->thread = thread_index; \
333 : ed->val1 = acl_elog_val1; \
334 : ed->val2 = acl_elog_val2; \
335 : ed->val3 = acl_elog_val3; \
336 : } \
337 : } while (0)
338 :
339 :
340 : /* use like: elog_acl_maybe_trace_X4(am, "foobar: %d some int %d baz: %d bar: %d", "i4i4i4i4", int32_value, int32_value2, int_value, int_value); */
341 :
342 : #define elog_acl_maybe_trace_X4(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \
343 : acl_elog_val2, acl_elog_val3, acl_elog_val4) \
344 : do { \
345 : if (am->trace_sessions) { \
346 : CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \
347 : - sizeof(acl_elog_val3) -sizeof(acl_elog_val4)]; } *static_check); \
348 : u16 thread_index = os_get_thread_index (); \
349 : vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
350 : ELOG_TYPE_DECLARE (e) = \
351 : { \
352 : .format = "(%02d) " acl_elog_trace_format_label, \
353 : .format_args = "i2" acl_elog_trace_format_args, \
354 : }; \
355 : CLIB_PACKED(struct \
356 : { \
357 : u16 thread; \
358 : typeof(acl_elog_val1) val1; \
359 : typeof(acl_elog_val2) val2; \
360 : typeof(acl_elog_val3) val3; \
361 : typeof(acl_elog_val4) val4; \
362 : }) *ed; \
363 : ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
364 : ed->thread = thread_index; \
365 : ed->val1 = acl_elog_val1; \
366 : ed->val2 = acl_elog_val2; \
367 : ed->val3 = acl_elog_val3; \
368 : ed->val4 = acl_elog_val4; \
369 : } \
370 : } while (0)
371 :
372 :
373 : #endif
|