Line data Source code
1 : /*
2 : * Copyright (c) 2017 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 :
16 : /**
17 : * @file
18 : * @brief IPv4 Full Reassembly.
19 : *
20 : * This file contains the source code for IPv4 full reassembly.
21 : */
22 :
23 : #include <vppinfra/vec.h>
24 : #include <vnet/vnet.h>
25 : #include <vnet/ip/ip.h>
26 : #include <vnet/ip/ip.api_enum.h>
27 : #include <vppinfra/fifo.h>
28 : #include <vppinfra/bihash_16_8.h>
29 : #include <vnet/ip/reass/ip4_full_reass.h>
30 : #include <stddef.h>
31 :
32 : #define MSEC_PER_SEC 1000
33 : #define IP4_REASS_TIMEOUT_DEFAULT_MS 200
34 :
35 : /* As there are only 1024 reass context per thread, either the DDOS attacks
36 : * or fractions of real timeouts, would consume these contexts quickly and
37 : * running out context space and unable to perform reassembly */
38 : #define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 50 // 50 ms default
39 : #define IP4_REASS_MAX_REASSEMBLIES_DEFAULT 1024
40 : #define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
41 : #define IP4_REASS_HT_LOAD_FACTOR (0.75)
42 :
43 : #define IP4_REASS_DEBUG_BUFFERS 0
44 : #if IP4_REASS_DEBUG_BUFFERS
45 : #define IP4_REASS_DEBUG_BUFFER(bi, what) \
46 : do \
47 : { \
48 : u32 _bi = bi; \
49 : printf (#what "buffer %u", _bi); \
50 : vlib_buffer_t *_b = vlib_get_buffer (vm, _bi); \
51 : while (_b->flags & VLIB_BUFFER_NEXT_PRESENT) \
52 : { \
53 : _bi = _b->next_buffer; \
54 : printf ("[%u]", _bi); \
55 : _b = vlib_get_buffer (vm, _bi); \
56 : } \
57 : printf ("\n"); \
58 : fflush (stdout); \
59 : } \
60 : while (0)
61 : #else
62 : #define IP4_REASS_DEBUG_BUFFER(...)
63 : #endif
64 :
65 : typedef enum
66 : {
67 : IP4_REASS_RC_OK,
68 : IP4_REASS_RC_TOO_MANY_FRAGMENTS,
69 : IP4_REASS_RC_INTERNAL_ERROR,
70 : IP4_REASS_RC_NO_BUF,
71 : IP4_REASS_RC_HANDOFF,
72 : } ip4_full_reass_rc_t;
73 :
74 : typedef struct
75 : {
76 : struct
77 : {
78 : u16 frag_id;
79 : u8 proto;
80 : u8 unused;
81 : u32 fib_index;
82 : ip4_address_t src;
83 : ip4_address_t dst;
84 : };
85 : } ip4_full_reass_key_t;
86 :
87 : STATIC_ASSERT_SIZEOF (ip4_full_reass_key_t, 16);
88 :
89 : typedef union
90 : {
91 : struct
92 : {
93 : u32 reass_index;
94 : u32 memory_owner_thread_index;
95 : };
96 : u64 as_u64;
97 : } ip4_full_reass_val_t;
98 :
99 : typedef union
100 : {
101 : struct
102 : {
103 : ip4_full_reass_key_t k;
104 : ip4_full_reass_val_t v;
105 : };
106 : clib_bihash_kv_16_8_t kv;
107 : } ip4_full_reass_kv_t;
108 :
109 : always_inline u32
110 70672 : ip4_full_reass_buffer_get_data_offset (vlib_buffer_t * b)
111 : {
112 70672 : vnet_buffer_opaque_t *vnb = vnet_buffer (b);
113 70672 : return vnb->ip.reass.range_first - vnb->ip.reass.fragment_first;
114 : }
115 :
116 : always_inline u16
117 41739 : ip4_full_reass_buffer_get_data_len (vlib_buffer_t * b)
118 : {
119 41739 : vnet_buffer_opaque_t *vnb = vnet_buffer (b);
120 83483 : return clib_min (vnb->ip.reass.range_last, vnb->ip.reass.fragment_last) -
121 83483 : (vnb->ip.reass.fragment_first +
122 41739 : ip4_full_reass_buffer_get_data_offset (b)) + 1;
123 : }
124 :
125 : typedef struct
126 : {
127 : // hash table key
128 : ip4_full_reass_key_t key;
129 : // time when last packet was received
130 : f64 last_heard;
131 : // internal id of this reassembly
132 : u64 id;
133 : // buffer index of first buffer in this reassembly context
134 : u32 first_bi;
135 : // last octet of packet, ~0 until fragment without more_fragments arrives
136 : u32 last_packet_octet;
137 : // length of data collected so far
138 : u32 data_len;
139 : // trace operation counter
140 : u32 trace_op_counter;
141 : // next index - used by non-feature node
142 : u32 next_index;
143 : // error next index - used by custom apps (~0 if not used)
144 : u32 error_next_index;
145 : // minimum fragment length for this reassembly - used to estimate MTU
146 : u16 min_fragment_length;
147 : // number of fragments in this reassembly
148 : u32 fragments_n;
149 : // thread owning memory for this context (whose pool contains this ctx)
150 : u32 memory_owner_thread_index;
151 : // thread which received fragment with offset 0 and which sends out the
152 : // completed reassembly
153 : u32 sendout_thread_index;
154 : } ip4_full_reass_t;
155 :
156 : typedef struct
157 : {
158 : ip4_full_reass_t *pool;
159 : u32 reass_n;
160 : u32 id_counter;
161 : // for pacing the main thread timeouts
162 : u32 last_id;
163 : clib_spinlock_t lock;
164 : } ip4_full_reass_per_thread_t;
165 :
166 : typedef struct
167 : {
168 : // IPv4 config
169 : u32 timeout_ms;
170 : f64 timeout;
171 : u32 expire_walk_interval_ms;
172 : // maximum number of fragments in one reassembly
173 : u32 max_reass_len;
174 : // maximum number of reassemblies
175 : u32 max_reass_n;
176 :
177 : // IPv4 runtime
178 : clib_bihash_16_8_t hash;
179 : // per-thread data
180 : ip4_full_reass_per_thread_t *per_thread_data;
181 :
182 : // convenience
183 : vlib_main_t *vlib_main;
184 :
185 : u32 ip4_full_reass_expire_node_idx;
186 :
187 : /** Worker handoff */
188 : u32 fq_index;
189 : u32 fq_local_index;
190 : u32 fq_feature_index;
191 : u32 fq_custom_index;
192 :
193 : // reference count for enabling/disabling feature - per interface
194 : u32 *feature_use_refcount_per_intf;
195 :
196 : // whether local fragmented packets are reassembled or not
197 : int is_local_reass_enabled;
198 : } ip4_full_reass_main_t;
199 :
200 : extern ip4_full_reass_main_t ip4_full_reass_main;
201 :
202 : #ifndef CLIB_MARCH_VARIANT
203 : ip4_full_reass_main_t ip4_full_reass_main;
204 : #endif /* CLIB_MARCH_VARIANT */
205 :
206 : typedef enum
207 : {
208 : IP4_FULL_REASS_NEXT_INPUT,
209 : IP4_FULL_REASS_NEXT_DROP,
210 : IP4_FULL_REASS_NEXT_HANDOFF,
211 : IP4_FULL_REASS_N_NEXT,
212 : } ip4_full_reass_next_t;
213 :
214 : typedef enum
215 : {
216 : NORMAL,
217 : FEATURE,
218 : CUSTOM
219 : } ip4_full_reass_node_type_t;
220 :
221 : typedef enum
222 : {
223 : RANGE_NEW,
224 : RANGE_SHRINK,
225 : RANGE_DISCARD,
226 : RANGE_OVERLAP,
227 : FINALIZE,
228 : HANDOFF,
229 : PASSTHROUGH,
230 : } ip4_full_reass_trace_operation_e;
231 :
232 : typedef struct
233 : {
234 : u16 range_first;
235 : u16 range_last;
236 : u32 range_bi;
237 : i32 data_offset;
238 : u32 data_len;
239 : u32 first_bi;
240 : } ip4_full_reass_range_trace_t;
241 :
242 : typedef struct
243 : {
244 : ip4_full_reass_trace_operation_e action;
245 : u32 reass_id;
246 : ip4_full_reass_range_trace_t trace_range;
247 : u32 size_diff;
248 : u32 op_id;
249 : u32 thread_id;
250 : u32 thread_id_to;
251 : u32 fragment_first;
252 : u32 fragment_last;
253 : u32 total_data_len;
254 : bool is_after_handoff;
255 : ip4_header_t ip4_header;
256 : } ip4_full_reass_trace_t;
257 :
258 : extern vlib_node_registration_t ip4_full_reass_node;
259 : extern vlib_node_registration_t ip4_full_reass_node_feature;
260 : extern vlib_node_registration_t ip4_full_reass_node_custom;
261 :
262 : static void
263 14528 : ip4_full_reass_trace_details (vlib_main_t * vm, u32 bi,
264 : ip4_full_reass_range_trace_t * trace)
265 : {
266 14528 : vlib_buffer_t *b = vlib_get_buffer (vm, bi);
267 14528 : vnet_buffer_opaque_t *vnb = vnet_buffer (b);
268 14528 : trace->range_first = vnb->ip.reass.range_first;
269 14528 : trace->range_last = vnb->ip.reass.range_last;
270 14528 : trace->data_offset = ip4_full_reass_buffer_get_data_offset (b);
271 14528 : trace->data_len = ip4_full_reass_buffer_get_data_len (b);
272 14528 : trace->range_bi = bi;
273 14528 : }
274 :
275 : static u8 *
276 6142 : format_ip4_full_reass_range_trace (u8 * s, va_list * args)
277 : {
278 6142 : ip4_full_reass_range_trace_t *trace =
279 : va_arg (*args, ip4_full_reass_range_trace_t *);
280 : s =
281 6142 : format (s, "range: [%u, %u], off %d, len %u, bi %u", trace->range_first,
282 6142 : trace->range_last, trace->data_offset, trace->data_len,
283 : trace->range_bi);
284 6142 : return s;
285 : }
286 :
287 : static u8 *
288 7012 : format_ip4_full_reass_trace (u8 * s, va_list * args)
289 : {
290 7012 : CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
291 7012 : CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
292 7012 : ip4_full_reass_trace_t *t = va_arg (*args, ip4_full_reass_trace_t *);
293 7012 : u32 indent = 0;
294 7012 : if (~0 != t->reass_id)
295 : {
296 6631 : if (t->is_after_handoff)
297 : {
298 : s =
299 142 : format (s, "%U\n", format_ip4_header, &t->ip4_header,
300 : sizeof (t->ip4_header));
301 142 : indent = 2;
302 : }
303 : s =
304 6631 : format (s, "%Ureass id: %u, op id: %u, ", format_white_space, indent,
305 : t->reass_id, t->op_id);
306 6631 : indent = format_get_indent (s);
307 : s =
308 6631 : format (s,
309 : "first bi: %u, data len: %u, ip/fragment[%u, %u]",
310 : t->trace_range.first_bi, t->total_data_len, t->fragment_first,
311 : t->fragment_last);
312 : }
313 7012 : switch (t->action)
314 : {
315 248 : case RANGE_SHRINK:
316 248 : s = format (s, "\n%Ushrink %U by %u", format_white_space, indent,
317 : format_ip4_full_reass_range_trace, &t->trace_range,
318 : t->size_diff);
319 248 : break;
320 82 : case RANGE_DISCARD:
321 82 : s = format (s, "\n%Udiscard %U", format_white_space, indent,
322 : format_ip4_full_reass_range_trace, &t->trace_range);
323 82 : break;
324 5210 : case RANGE_NEW:
325 5210 : s = format (s, "\n%Unew %U", format_white_space, indent,
326 : format_ip4_full_reass_range_trace, &t->trace_range);
327 5210 : break;
328 602 : case RANGE_OVERLAP:
329 602 : s = format (s, "\n%Uoverlapping/ignored %U", format_white_space, indent,
330 : format_ip4_full_reass_range_trace, &t->trace_range);
331 602 : break;
332 489 : case FINALIZE:
333 489 : s = format (s, "\n%Ufinalize reassembly", format_white_space, indent);
334 489 : break;
335 264 : case HANDOFF:
336 : s =
337 264 : format (s, "handoff from thread #%u to thread #%u", t->thread_id,
338 : t->thread_id_to);
339 264 : break;
340 117 : case PASSTHROUGH:
341 117 : s = format (s, "passthrough - not a fragment");
342 117 : break;
343 : }
344 7012 : return s;
345 : }
346 :
347 : static void
348 14632 : ip4_full_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
349 : ip4_full_reass_t * reass, u32 bi,
350 : ip4_full_reass_trace_operation_e action,
351 : u32 size_diff, u32 thread_id_to)
352 : {
353 14632 : vlib_buffer_t *b = vlib_get_buffer (vm, bi);
354 14635 : vnet_buffer_opaque_t *vnb = vnet_buffer (b);
355 14635 : if (pool_is_free_index
356 14635 : (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b)))
357 : {
358 : // this buffer's trace is gone
359 105 : b->flags &= ~VLIB_BUFFER_IS_TRACED;
360 105 : return;
361 : }
362 14530 : bool is_after_handoff = false;
363 14530 : if (vlib_buffer_get_trace_thread (b) != vm->thread_index)
364 : {
365 284 : is_after_handoff = true;
366 : }
367 14529 : ip4_full_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
368 14525 : t->is_after_handoff = is_after_handoff;
369 14525 : if (t->is_after_handoff)
370 : {
371 282 : clib_memcpy (&t->ip4_header, vlib_buffer_get_current (b),
372 : clib_min (sizeof (t->ip4_header), b->current_length));
373 : }
374 14528 : if (reass)
375 : {
376 13939 : t->reass_id = reass->id;
377 13939 : t->op_id = reass->trace_op_counter;
378 13939 : t->trace_range.first_bi = reass->first_bi;
379 13939 : t->total_data_len = reass->data_len;
380 13939 : ++reass->trace_op_counter;
381 : }
382 : else
383 : {
384 589 : t->reass_id = ~0;
385 589 : t->op_id = 0;
386 589 : t->trace_range.first_bi = 0;
387 589 : t->total_data_len = 0;
388 : }
389 14528 : t->action = action;
390 14528 : ip4_full_reass_trace_details (vm, bi, &t->trace_range);
391 14528 : t->size_diff = size_diff;
392 14528 : t->thread_id = vm->thread_index;
393 14528 : t->thread_id_to = thread_id_to;
394 14528 : t->fragment_first = vnb->ip.reass.fragment_first;
395 14528 : t->fragment_last = vnb->ip.reass.fragment_last;
396 : #if 0
397 : static u8 *s = NULL;
398 : s = format (s, "%U", format_ip4_full_reass_trace, NULL, NULL, t);
399 : printf ("%.*s\n", vec_len (s), s);
400 : fflush (stdout);
401 : vec_reset_length (s);
402 : #endif
403 : }
404 :
405 : always_inline void
406 2116 : ip4_full_reass_free_ctx (ip4_full_reass_per_thread_t * rt,
407 : ip4_full_reass_t * reass)
408 : {
409 2116 : pool_put (rt->pool, reass);
410 2116 : --rt->reass_n;
411 2116 : }
412 :
413 : always_inline void
414 2116 : ip4_full_reass_free (ip4_full_reass_main_t * rm,
415 : ip4_full_reass_per_thread_t * rt,
416 : ip4_full_reass_t * reass)
417 : {
418 2116 : clib_bihash_kv_16_8_t kv = {};
419 2116 : clib_memcpy_fast (&kv, &reass->key, sizeof (kv.key));
420 2116 : clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
421 2116 : return ip4_full_reass_free_ctx (rt, reass);
422 : }
423 :
424 : /* n_left_to_next, and to_next are taken as input params, as this function
425 : * could be called from a graphnode, where its managing local copy of these
426 : * variables, and ignoring those and still trying to enqueue the buffers
427 : * with local variables would cause either buffer leak or corruption */
428 : always_inline void
429 335 : ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
430 : ip4_full_reass_t *reass)
431 : {
432 335 : u32 range_bi = reass->first_bi;
433 : vlib_buffer_t *range_b;
434 : vnet_buffer_opaque_t *range_vnb;
435 335 : u32 *to_free = NULL;
436 :
437 883 : while (~0 != range_bi)
438 : {
439 548 : range_b = vlib_get_buffer (vm, range_bi);
440 548 : range_vnb = vnet_buffer (range_b);
441 :
442 548 : if (~0 != range_bi)
443 : {
444 548 : vec_add1 (to_free, range_bi);
445 : }
446 :
447 548 : range_bi = range_vnb->ip.reass.next_range_bi;
448 : }
449 :
450 : /* send to next_error_index */
451 335 : if (~0 != reass->error_next_index &&
452 0 : reass->error_next_index < node->n_next_nodes)
453 0 : {
454 0 : u32 n_free = vec_len (to_free);
455 :
456 : /* record number of packets sent to custom app */
457 0 : vlib_node_increment_counter (vm, node->node_index,
458 : IP4_ERROR_REASS_TO_CUSTOM_APP, n_free);
459 :
460 0 : if (node->flags & VLIB_NODE_FLAG_TRACE)
461 0 : for (u32 i = 0; i < n_free; i++)
462 : {
463 0 : vlib_buffer_t *b = vlib_get_buffer (vm, to_free[i]);
464 0 : if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
465 0 : ip4_full_reass_add_trace (vm, node, reass, to_free[i],
466 : RANGE_DISCARD, 0, ~0);
467 : }
468 :
469 0 : vlib_buffer_enqueue_to_single_next (vm, node, to_free,
470 0 : reass->error_next_index, n_free);
471 : }
472 : else
473 : {
474 335 : vlib_buffer_free (vm, to_free, vec_len (to_free));
475 : }
476 335 : vec_free (to_free);
477 335 : }
478 :
479 : always_inline void
480 0 : sanitize_reass_buffers_add_missing (vlib_main_t *vm, ip4_full_reass_t *reass,
481 : u32 *bi0)
482 : {
483 0 : u32 range_bi = reass->first_bi;
484 : vlib_buffer_t *range_b;
485 : vnet_buffer_opaque_t *range_vnb;
486 :
487 0 : while (~0 != range_bi)
488 : {
489 0 : range_b = vlib_get_buffer (vm, range_bi);
490 0 : range_vnb = vnet_buffer (range_b);
491 0 : u32 bi = range_bi;
492 0 : if (~0 != bi)
493 : {
494 0 : if (bi == *bi0)
495 0 : *bi0 = ~0;
496 0 : if (range_b->flags & VLIB_BUFFER_NEXT_PRESENT)
497 : {
498 0 : u32 _bi = bi;
499 0 : vlib_buffer_t *_b = vlib_get_buffer (vm, _bi);
500 0 : while (_b->flags & VLIB_BUFFER_NEXT_PRESENT)
501 : {
502 0 : if (_b->next_buffer != range_vnb->ip.reass.next_range_bi)
503 : {
504 0 : _bi = _b->next_buffer;
505 0 : _b = vlib_get_buffer (vm, _bi);
506 : }
507 : else
508 : {
509 0 : _b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
510 0 : break;
511 : }
512 : }
513 : }
514 0 : range_bi = range_vnb->ip.reass.next_range_bi;
515 : }
516 : }
517 0 : if (*bi0 != ~0)
518 : {
519 0 : vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
520 0 : vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
521 0 : if (~0 != reass->first_bi)
522 : {
523 0 : fvnb->ip.reass.next_range_bi = reass->first_bi;
524 0 : reass->first_bi = *bi0;
525 : }
526 : else
527 : {
528 0 : reass->first_bi = *bi0;
529 0 : fvnb->ip.reass.next_range_bi = ~0;
530 : }
531 0 : *bi0 = ~0;
532 : }
533 0 : }
534 :
535 : always_inline void
536 2114 : ip4_full_reass_init (ip4_full_reass_t * reass)
537 : {
538 2114 : reass->first_bi = ~0;
539 2114 : reass->last_packet_octet = ~0;
540 2114 : reass->data_len = 0;
541 2114 : reass->next_index = ~0;
542 2114 : reass->error_next_index = ~0;
543 2114 : }
544 :
545 : always_inline ip4_full_reass_t *
546 14897 : ip4_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
547 : ip4_full_reass_main_t *rm,
548 : ip4_full_reass_per_thread_t *rt,
549 : ip4_full_reass_kv_t *kv, u8 *do_handoff)
550 : {
551 : ip4_full_reass_t *reass;
552 : f64 now;
553 :
554 14897 : again:
555 :
556 14897 : reass = NULL;
557 14897 : now = vlib_time_now (vm);
558 14902 : if (!clib_bihash_search_16_8 (&rm->hash, &kv->kv, &kv->kv))
559 : {
560 12759 : if (vm->thread_index != kv->v.memory_owner_thread_index)
561 : {
562 377 : *do_handoff = 1;
563 377 : return NULL;
564 : }
565 12379 : reass =
566 12382 : pool_elt_at_index (rm->per_thread_data
567 : [kv->v.memory_owner_thread_index].pool,
568 : kv->v.reass_index);
569 :
570 12379 : if (now > reass->last_heard + rm->timeout)
571 : {
572 233 : vlib_node_increment_counter (vm, node->node_index,
573 : IP4_ERROR_REASS_TIMEOUT, 1);
574 233 : ip4_full_reass_drop_all (vm, node, reass);
575 233 : ip4_full_reass_free (rm, rt, reass);
576 233 : reass = NULL;
577 : }
578 : }
579 :
580 14519 : if (reass)
581 : {
582 12146 : reass->last_heard = now;
583 12146 : return reass;
584 : }
585 :
586 2373 : if (rt->reass_n >= rm->max_reass_n)
587 : {
588 258 : reass = NULL;
589 258 : return reass;
590 : }
591 : else
592 : {
593 2115 : pool_get (rt->pool, reass);
594 2114 : clib_memset (reass, 0, sizeof (*reass));
595 2114 : reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
596 2114 : reass->memory_owner_thread_index = vm->thread_index;
597 2114 : ++rt->id_counter;
598 2114 : ip4_full_reass_init (reass);
599 2114 : ++rt->reass_n;
600 : }
601 :
602 2114 : clib_memcpy_fast (&reass->key, &kv->kv.key, sizeof (reass->key));
603 2115 : kv->v.reass_index = (reass - rt->pool);
604 2115 : kv->v.memory_owner_thread_index = vm->thread_index;
605 2115 : reass->last_heard = now;
606 :
607 2115 : int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2);
608 2115 : if (rv)
609 : {
610 0 : ip4_full_reass_free_ctx (rt, reass);
611 0 : reass = NULL;
612 : // if other worker created a context already work with the other copy
613 0 : if (-2 == rv)
614 0 : goto again;
615 : }
616 :
617 2115 : return reass;
618 : }
619 :
620 : always_inline ip4_full_reass_rc_t
621 1781 : ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
622 : ip4_full_reass_main_t * rm,
623 : ip4_full_reass_per_thread_t * rt,
624 : ip4_full_reass_t * reass, u32 * bi0,
625 : u32 * next0, u32 * error0, bool is_custom)
626 : {
627 1781 : vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi);
628 1781 : vlib_buffer_t *last_b = NULL;
629 1781 : u32 sub_chain_bi = reass->first_bi;
630 1781 : u32 total_length = 0;
631 : do
632 : {
633 12582 : u32 tmp_bi = sub_chain_bi;
634 12582 : vlib_buffer_t *tmp = vlib_get_buffer (vm, tmp_bi);
635 12582 : ip4_header_t *ip = vlib_buffer_get_current (tmp);
636 12582 : vnet_buffer_opaque_t *vnb = vnet_buffer (tmp);
637 12582 : if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
638 0 : !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
639 : {
640 0 : return IP4_REASS_RC_INTERNAL_ERROR;
641 : }
642 :
643 12582 : u32 data_len = ip4_full_reass_buffer_get_data_len (tmp);
644 12583 : u32 trim_front =
645 12582 : ip4_header_bytes (ip) + ip4_full_reass_buffer_get_data_offset (tmp);
646 12583 : u32 trim_end =
647 12583 : vlib_buffer_length_in_chain (vm, tmp) - trim_front - data_len;
648 12583 : if (tmp_bi == reass->first_bi)
649 : {
650 : /* first buffer - keep ip4 header */
651 1781 : if (0 != ip4_full_reass_buffer_get_data_offset (tmp))
652 : {
653 0 : return IP4_REASS_RC_INTERNAL_ERROR;
654 : }
655 1781 : trim_front = 0;
656 1781 : trim_end = vlib_buffer_length_in_chain (vm, tmp) - data_len -
657 1781 : ip4_header_bytes (ip);
658 1781 : if (!(vlib_buffer_length_in_chain (vm, tmp) - trim_end > 0))
659 : {
660 0 : return IP4_REASS_RC_INTERNAL_ERROR;
661 : }
662 : }
663 12583 : u32 keep_data =
664 12583 : vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
665 : while (1)
666 : {
667 12582 : if (trim_front)
668 : {
669 10803 : if (trim_front > tmp->current_length)
670 : {
671 : /* drop whole buffer */
672 0 : u32 to_be_freed_bi = tmp_bi;
673 0 : trim_front -= tmp->current_length;
674 0 : if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
675 : {
676 0 : return IP4_REASS_RC_INTERNAL_ERROR;
677 : }
678 0 : tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
679 0 : tmp_bi = tmp->next_buffer;
680 0 : tmp->next_buffer = 0;
681 0 : tmp = vlib_get_buffer (vm, tmp_bi);
682 0 : vlib_buffer_free_one (vm, to_be_freed_bi);
683 0 : continue;
684 : }
685 : else
686 : {
687 10803 : vlib_buffer_advance (tmp, trim_front);
688 10804 : trim_front = 0;
689 : }
690 : }
691 12583 : if (keep_data)
692 : {
693 12583 : if (last_b)
694 : {
695 10804 : last_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
696 10804 : last_b->next_buffer = tmp_bi;
697 : }
698 12583 : last_b = tmp;
699 12583 : if (keep_data <= tmp->current_length)
700 : {
701 12583 : tmp->current_length = keep_data;
702 12583 : keep_data = 0;
703 : }
704 : else
705 : {
706 0 : keep_data -= tmp->current_length;
707 0 : if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
708 : {
709 0 : return IP4_REASS_RC_INTERNAL_ERROR;
710 : }
711 : }
712 12583 : total_length += tmp->current_length;
713 12583 : if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
714 : {
715 0 : tmp_bi = tmp->next_buffer;
716 0 : tmp = vlib_get_buffer (vm, tmp->next_buffer);
717 : }
718 : else
719 : {
720 12583 : break;
721 : }
722 : }
723 : else
724 : {
725 0 : u32 to_be_freed_bi = tmp_bi;
726 0 : if (reass->first_bi == tmp_bi)
727 : {
728 0 : return IP4_REASS_RC_INTERNAL_ERROR;
729 : }
730 0 : if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
731 : {
732 0 : tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
733 0 : tmp_bi = tmp->next_buffer;
734 0 : tmp->next_buffer = 0;
735 0 : tmp = vlib_get_buffer (vm, tmp_bi);
736 0 : vlib_buffer_free_one (vm, to_be_freed_bi);
737 : }
738 : else
739 : {
740 0 : tmp->next_buffer = 0;
741 0 : vlib_buffer_free_one (vm, to_be_freed_bi);
742 0 : break;
743 : }
744 : }
745 : }
746 12582 : sub_chain_bi =
747 12583 : vnet_buffer (vlib_get_buffer (vm, sub_chain_bi))->ip.
748 : reass.next_range_bi;
749 : }
750 12582 : while (~0 != sub_chain_bi);
751 :
752 1781 : if (!last_b)
753 : {
754 0 : return IP4_REASS_RC_INTERNAL_ERROR;
755 : }
756 1781 : last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
757 :
758 1781 : if (total_length < first_b->current_length)
759 : {
760 0 : return IP4_REASS_RC_INTERNAL_ERROR;
761 : }
762 1781 : total_length -= first_b->current_length;
763 1781 : first_b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
764 1781 : first_b->total_length_not_including_first_buffer = total_length;
765 1781 : ip4_header_t *ip = vlib_buffer_get_current (first_b);
766 1781 : ip->flags_and_fragment_offset = 0;
767 1781 : ip->length = clib_host_to_net_u16 (first_b->current_length + total_length);
768 1781 : ip->checksum = ip4_header_checksum (ip);
769 1781 : if (!vlib_buffer_chain_linearize (vm, first_b))
770 : {
771 0 : return IP4_REASS_RC_NO_BUF;
772 : }
773 : // reset to reconstruct the mbuf linking
774 1781 : first_b->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
775 1781 : if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED))
776 : {
777 1115 : ip4_full_reass_add_trace (vm, node, reass, reass->first_bi, FINALIZE, 0,
778 : ~0);
779 : #if 0
780 : // following code does a hexdump of packet fragments to stdout ...
781 : do
782 : {
783 : u32 bi = reass->first_bi;
784 : u8 *s = NULL;
785 : while (~0 != bi)
786 : {
787 : vlib_buffer_t *b = vlib_get_buffer (vm, bi);
788 : s = format (s, "%u: %U\n", bi, format_hexdump,
789 : vlib_buffer_get_current (b), b->current_length);
790 : if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
791 : {
792 : bi = b->next_buffer;
793 : }
794 : else
795 : {
796 : break;
797 : }
798 : }
799 : printf ("%.*s\n", vec_len (s), s);
800 : fflush (stdout);
801 : vec_free (s);
802 : }
803 : while (0);
804 : #endif
805 : }
806 1781 : *bi0 = reass->first_bi;
807 1781 : if (!is_custom)
808 : {
809 1781 : *next0 = IP4_FULL_REASS_NEXT_INPUT;
810 : }
811 : else
812 : {
813 0 : *next0 = reass->next_index;
814 : }
815 1781 : vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
816 :
817 : /* Keep track of number of successfully reassembled packets and number of
818 : * fragments reassembled */
819 1781 : vlib_node_increment_counter (vm, node->node_index, IP4_ERROR_REASS_SUCCESS,
820 : 1);
821 :
822 1781 : vlib_node_increment_counter (vm, node->node_index,
823 : IP4_ERROR_REASS_FRAGMENTS_REASSEMBLED,
824 1781 : reass->fragments_n);
825 :
826 1781 : *error0 = IP4_ERROR_NONE;
827 1781 : ip4_full_reass_free (rm, rt, reass);
828 1781 : reass = NULL;
829 1781 : return IP4_REASS_RC_OK;
830 : }
831 :
832 : always_inline ip4_full_reass_rc_t
833 13299 : ip4_full_reass_insert_range_in_chain (vlib_main_t * vm,
834 : ip4_full_reass_t * reass,
835 : u32 prev_range_bi, u32 new_next_bi)
836 : {
837 13299 : vlib_buffer_t *new_next_b = vlib_get_buffer (vm, new_next_bi);
838 13298 : vnet_buffer_opaque_t *new_next_vnb = vnet_buffer (new_next_b);
839 13298 : if (~0 != prev_range_bi)
840 : {
841 9531 : vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_range_bi);
842 9530 : vnet_buffer_opaque_t *prev_vnb = vnet_buffer (prev_b);
843 9530 : new_next_vnb->ip.reass.next_range_bi = prev_vnb->ip.reass.next_range_bi;
844 9530 : prev_vnb->ip.reass.next_range_bi = new_next_bi;
845 : }
846 : else
847 : {
848 3767 : if (~0 != reass->first_bi)
849 : {
850 1602 : new_next_vnb->ip.reass.next_range_bi = reass->first_bi;
851 : }
852 3767 : reass->first_bi = new_next_bi;
853 : }
854 13297 : vnet_buffer_opaque_t *vnb = vnet_buffer (new_next_b);
855 13297 : if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
856 0 : !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
857 : {
858 0 : return IP4_REASS_RC_INTERNAL_ERROR;
859 : }
860 13297 : reass->data_len += ip4_full_reass_buffer_get_data_len (new_next_b);
861 13299 : return IP4_REASS_RC_OK;
862 : }
863 :
864 : always_inline ip4_full_reass_rc_t
865 164 : ip4_full_reass_remove_range_from_chain (vlib_main_t * vm,
866 : vlib_node_runtime_t * node,
867 : ip4_full_reass_t * reass,
868 : u32 prev_range_bi, u32 discard_bi)
869 : {
870 164 : vlib_buffer_t *discard_b = vlib_get_buffer (vm, discard_bi);
871 164 : vnet_buffer_opaque_t *discard_vnb = vnet_buffer (discard_b);
872 164 : if (~0 != prev_range_bi)
873 : {
874 114 : vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_range_bi);
875 114 : vnet_buffer_opaque_t *prev_vnb = vnet_buffer (prev_b);
876 114 : if (!(prev_vnb->ip.reass.next_range_bi == discard_bi))
877 : {
878 0 : return IP4_REASS_RC_INTERNAL_ERROR;
879 : }
880 114 : prev_vnb->ip.reass.next_range_bi = discard_vnb->ip.reass.next_range_bi;
881 : }
882 : else
883 : {
884 50 : reass->first_bi = discard_vnb->ip.reass.next_range_bi;
885 : }
886 164 : vnet_buffer_opaque_t *vnb = vnet_buffer (discard_b);
887 164 : if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
888 0 : !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
889 : {
890 0 : return IP4_REASS_RC_INTERNAL_ERROR;
891 : }
892 164 : reass->data_len -= ip4_full_reass_buffer_get_data_len (discard_b);
893 : while (1)
894 0 : {
895 164 : u32 to_be_freed_bi = discard_bi;
896 164 : if (PREDICT_FALSE (discard_b->flags & VLIB_BUFFER_IS_TRACED))
897 : {
898 164 : ip4_full_reass_add_trace (vm, node, reass, discard_bi, RANGE_DISCARD,
899 : 0, ~0);
900 : }
901 164 : if (discard_b->flags & VLIB_BUFFER_NEXT_PRESENT)
902 : {
903 0 : discard_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
904 0 : discard_bi = discard_b->next_buffer;
905 0 : discard_b->next_buffer = 0;
906 0 : discard_b = vlib_get_buffer (vm, discard_bi);
907 0 : vlib_buffer_free_one (vm, to_be_freed_bi);
908 : }
909 : else
910 : {
911 164 : discard_b->next_buffer = 0;
912 164 : vlib_buffer_free_one (vm, to_be_freed_bi);
913 164 : break;
914 : }
915 : }
916 164 : return IP4_REASS_RC_OK;
917 : }
918 :
919 : always_inline ip4_full_reass_rc_t
920 14259 : ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
921 : ip4_full_reass_main_t * rm,
922 : ip4_full_reass_per_thread_t * rt,
923 : ip4_full_reass_t * reass, u32 * bi0, u32 * next0,
924 : u32 * error0, bool is_custom, u32 * handoff_thread_idx)
925 : {
926 14259 : vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
927 14269 : vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
928 14269 : if (is_custom)
929 : {
930 : // store (error_)next_index before it's overwritten
931 0 : reass->next_index = fvnb->ip.reass.next_index;
932 0 : reass->error_next_index = fvnb->ip.reass.error_next_index;
933 : }
934 14269 : ip4_full_reass_rc_t rc = IP4_REASS_RC_OK;
935 14269 : int consumed = 0;
936 14269 : ip4_header_t *fip = vlib_buffer_get_current (fb);
937 14267 : const u32 fragment_first = ip4_get_fragment_offset_bytes (fip);
938 14262 : const u32 fragment_length =
939 14266 : clib_net_to_host_u16 (fip->length) - ip4_header_bytes (fip);
940 14262 : const u32 fragment_last = fragment_first + fragment_length - 1;
941 14262 : fvnb->ip.reass.fragment_first = fragment_first;
942 14262 : fvnb->ip.reass.fragment_last = fragment_last;
943 14262 : int more_fragments = ip4_get_fragment_more (fip);
944 14259 : u32 candidate_range_bi = reass->first_bi;
945 14259 : u32 prev_range_bi = ~0;
946 14259 : fvnb->ip.reass.range_first = fragment_first;
947 14259 : fvnb->ip.reass.range_last = fragment_last;
948 14259 : fvnb->ip.reass.next_range_bi = ~0;
949 14259 : if (!more_fragments)
950 : {
951 1857 : reass->last_packet_octet = fragment_last;
952 : }
953 14259 : if (~0 == reass->first_bi)
954 : {
955 : // starting a new reassembly
956 : rc =
957 2115 : ip4_full_reass_insert_range_in_chain (vm, reass, prev_range_bi, *bi0);
958 2115 : if (IP4_REASS_RC_OK != rc)
959 : {
960 0 : return rc;
961 : }
962 2115 : if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
963 : {
964 1449 : ip4_full_reass_add_trace (vm, node, reass, *bi0, RANGE_NEW, 0, ~0);
965 : }
966 2114 : *bi0 = ~0;
967 2114 : reass->min_fragment_length = clib_net_to_host_u16 (fip->length);
968 2114 : reass->fragments_n = 1;
969 2114 : return IP4_REASS_RC_OK;
970 : }
971 12145 : reass->min_fragment_length =
972 12144 : clib_min (clib_net_to_host_u16 (fip->length),
973 : fvnb->ip.reass.estimated_mtu);
974 156464 : while (~0 != candidate_range_bi)
975 : {
976 156450 : vlib_buffer_t *candidate_b = vlib_get_buffer (vm, candidate_range_bi);
977 156447 : vnet_buffer_opaque_t *candidate_vnb = vnet_buffer (candidate_b);
978 156447 : if (fragment_first > candidate_vnb->ip.reass.range_last)
979 : {
980 : // this fragments starts after candidate range
981 151561 : prev_range_bi = candidate_range_bi;
982 151561 : candidate_range_bi = candidate_vnb->ip.reass.next_range_bi;
983 151561 : if (candidate_vnb->ip.reass.range_last < fragment_last &&
984 : ~0 == candidate_range_bi)
985 : {
986 : // special case - this fragment falls beyond all known ranges
987 7738 : rc = ip4_full_reass_insert_range_in_chain (vm, reass,
988 : prev_range_bi, *bi0);
989 7739 : if (IP4_REASS_RC_OK != rc)
990 : {
991 0 : return rc;
992 : }
993 7739 : consumed = 1;
994 7739 : break;
995 : }
996 143823 : continue;
997 : }
998 4886 : if (fragment_last < candidate_vnb->ip.reass.range_first)
999 : {
1000 : // this fragment ends before candidate range without any overlap
1001 2798 : rc = ip4_full_reass_insert_range_in_chain (vm, reass, prev_range_bi,
1002 : *bi0);
1003 2798 : if (IP4_REASS_RC_OK != rc)
1004 : {
1005 0 : return rc;
1006 : }
1007 2798 : consumed = 1;
1008 : }
1009 : else
1010 : {
1011 2088 : if (fragment_first >= candidate_vnb->ip.reass.range_first &&
1012 1618 : fragment_last <= candidate_vnb->ip.reass.range_last)
1013 : {
1014 : // this fragment is a (sub)part of existing range, ignore it
1015 971 : if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
1016 : {
1017 971 : ip4_full_reass_add_trace (vm, node, reass, *bi0,
1018 : RANGE_OVERLAP, 0, ~0);
1019 : }
1020 971 : break;
1021 : }
1022 1117 : int discard_candidate = 0;
1023 1117 : if (fragment_first < candidate_vnb->ip.reass.range_first)
1024 : {
1025 496 : u32 overlap =
1026 496 : fragment_last - candidate_vnb->ip.reass.range_first + 1;
1027 496 : if (overlap < ip4_full_reass_buffer_get_data_len (candidate_b))
1028 : {
1029 496 : candidate_vnb->ip.reass.range_first += overlap;
1030 496 : if (reass->data_len < overlap)
1031 : {
1032 0 : return IP4_REASS_RC_INTERNAL_ERROR;
1033 : }
1034 496 : reass->data_len -= overlap;
1035 496 : if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
1036 : {
1037 496 : ip4_full_reass_add_trace (vm, node, reass,
1038 : candidate_range_bi,
1039 : RANGE_SHRINK, 0, ~0);
1040 : }
1041 496 : rc = ip4_full_reass_insert_range_in_chain (
1042 : vm, reass, prev_range_bi, *bi0);
1043 496 : if (IP4_REASS_RC_OK != rc)
1044 : {
1045 0 : return rc;
1046 : }
1047 496 : consumed = 1;
1048 : }
1049 : else
1050 : {
1051 0 : discard_candidate = 1;
1052 : }
1053 : }
1054 621 : else if (fragment_last > candidate_vnb->ip.reass.range_last)
1055 : {
1056 647 : u32 overlap =
1057 647 : candidate_vnb->ip.reass.range_last - fragment_first + 1;
1058 647 : if (overlap < ip4_full_reass_buffer_get_data_len (candidate_b))
1059 : {
1060 483 : fvnb->ip.reass.range_first += overlap;
1061 483 : if (~0 != candidate_vnb->ip.reass.next_range_bi)
1062 : {
1063 432 : prev_range_bi = candidate_range_bi;
1064 432 : candidate_range_bi =
1065 : candidate_vnb->ip.reass.next_range_bi;
1066 432 : continue;
1067 : }
1068 : else
1069 : {
1070 : // special case - last range discarded
1071 51 : rc = ip4_full_reass_insert_range_in_chain (
1072 : vm, reass, candidate_range_bi, *bi0);
1073 51 : if (IP4_REASS_RC_OK != rc)
1074 : {
1075 0 : return rc;
1076 : }
1077 51 : consumed = 1;
1078 : }
1079 : }
1080 : else
1081 : {
1082 164 : discard_candidate = 1;
1083 : }
1084 : }
1085 : else
1086 : {
1087 0 : discard_candidate = 1;
1088 : }
1089 685 : if (discard_candidate)
1090 : {
1091 164 : u32 next_range_bi = candidate_vnb->ip.reass.next_range_bi;
1092 : // discard candidate range, probe next range
1093 164 : rc = ip4_full_reass_remove_range_from_chain (
1094 : vm, node, reass, prev_range_bi, candidate_range_bi);
1095 164 : if (IP4_REASS_RC_OK != rc)
1096 : {
1097 0 : return rc;
1098 : }
1099 164 : if (~0 != next_range_bi)
1100 : {
1101 64 : candidate_range_bi = next_range_bi;
1102 64 : continue;
1103 : }
1104 : else
1105 : {
1106 : // special case - last range discarded
1107 100 : rc = ip4_full_reass_insert_range_in_chain (
1108 : vm, reass, prev_range_bi, *bi0);
1109 100 : if (IP4_REASS_RC_OK != rc)
1110 : {
1111 0 : return rc;
1112 : }
1113 100 : consumed = 1;
1114 : }
1115 : }
1116 : }
1117 3419 : break;
1118 : }
1119 12143 : ++reass->fragments_n;
1120 12143 : if (consumed)
1121 : {
1122 11184 : if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
1123 : {
1124 9850 : ip4_full_reass_add_trace (vm, node, reass, *bi0, RANGE_NEW, 0, ~0);
1125 : }
1126 : }
1127 12153 : if (~0 != reass->last_packet_octet &&
1128 4054 : reass->data_len == reass->last_packet_octet + 1)
1129 1781 : {
1130 1781 : *handoff_thread_idx = reass->sendout_thread_index;
1131 1781 : int handoff =
1132 1781 : reass->memory_owner_thread_index != reass->sendout_thread_index;
1133 : rc =
1134 1781 : ip4_full_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0,
1135 : is_custom);
1136 1781 : if (IP4_REASS_RC_OK == rc && handoff)
1137 : {
1138 0 : rc = IP4_REASS_RC_HANDOFF;
1139 : }
1140 : }
1141 : else
1142 : {
1143 10372 : if (consumed)
1144 : {
1145 9401 : *bi0 = ~0;
1146 9401 : if (reass->fragments_n > rm->max_reass_len)
1147 : {
1148 1 : rc = IP4_REASS_RC_TOO_MANY_FRAGMENTS;
1149 : }
1150 : }
1151 : else
1152 : {
1153 971 : *next0 = IP4_FULL_REASS_NEXT_DROP;
1154 971 : *error0 = IP4_ERROR_REASS_DUPLICATE_FRAGMENT;
1155 : }
1156 : }
1157 12153 : return rc;
1158 : }
1159 :
1160 : always_inline uword
1161 128 : ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1162 : vlib_frame_t *frame, ip4_full_reass_node_type_t type,
1163 : bool is_local)
1164 : {
1165 128 : u32 *from = vlib_frame_vector_args (frame);
1166 128 : u32 n_left, n_next = 0, to_next[VLIB_FRAME_SIZE];
1167 128 : ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1168 128 : ip4_full_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
1169 : u16 nexts[VLIB_FRAME_SIZE];
1170 :
1171 128 : clib_spinlock_lock (&rt->lock);
1172 :
1173 128 : n_left = frame->n_vectors;
1174 15246 : while (n_left > 0)
1175 : {
1176 : u32 bi0;
1177 : vlib_buffer_t *b0;
1178 : u32 next0;
1179 15118 : u32 error0 = IP4_ERROR_NONE;
1180 :
1181 15118 : bi0 = from[0];
1182 15118 : b0 = vlib_get_buffer (vm, bi0);
1183 :
1184 15120 : ip4_header_t *ip0 = vlib_buffer_get_current (b0);
1185 15119 : if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
1186 : {
1187 : // this is a whole packet - no fragmentation
1188 213 : if (CUSTOM != type)
1189 : {
1190 213 : next0 = IP4_FULL_REASS_NEXT_INPUT;
1191 : }
1192 : else
1193 : {
1194 0 : next0 = vnet_buffer (b0)->ip.reass.next_index;
1195 : }
1196 213 : ip4_full_reass_add_trace (vm, node, NULL, bi0, PASSTHROUGH, 0, ~0);
1197 213 : goto packet_enqueue;
1198 : }
1199 :
1200 14906 : if (is_local && !rm->is_local_reass_enabled)
1201 : {
1202 3 : next0 = IP4_FULL_REASS_NEXT_DROP;
1203 3 : goto packet_enqueue;
1204 : }
1205 :
1206 14903 : const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
1207 14901 : const u32 fragment_length =
1208 14900 : clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
1209 14901 : const u32 fragment_last = fragment_first + fragment_length - 1;
1210 :
1211 : /* Keep track of received fragments */
1212 14901 : vlib_node_increment_counter (vm, node->node_index,
1213 : IP4_ERROR_REASS_FRAGMENTS_RCVD, 1);
1214 :
1215 14901 : if (fragment_first > fragment_last ||
1216 14901 : fragment_first + fragment_length > UINT16_MAX - 20 ||
1217 33 : (fragment_length < 8 && // 8 is minimum frag length per RFC 791
1218 33 : ip4_get_fragment_more (ip0)))
1219 : {
1220 2 : next0 = IP4_FULL_REASS_NEXT_DROP;
1221 2 : error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
1222 2 : goto packet_enqueue;
1223 : }
1224 :
1225 29797 : u32 fib_index = (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ?
1226 14899 : vec_elt (ip4_main.fib_index_by_sw_if_index,
1227 29797 : vnet_buffer (b0)->sw_if_index[VLIB_RX]) :
1228 0 : vnet_buffer (b0)->sw_if_index[VLIB_TX];
1229 :
1230 14898 : ip4_full_reass_kv_t kv = { .k.fib_index = fib_index,
1231 14898 : .k.src.as_u32 = ip0->src_address.as_u32,
1232 14898 : .k.dst.as_u32 = ip0->dst_address.as_u32,
1233 14898 : .k.frag_id = ip0->fragment_id,
1234 14898 : .k.proto = ip0->protocol
1235 :
1236 : };
1237 14898 : u8 do_handoff = 0;
1238 :
1239 : ip4_full_reass_t *reass =
1240 14898 : ip4_full_reass_find_or_create (vm, node, rm, rt, &kv, &do_handoff);
1241 :
1242 14896 : if (reass)
1243 : {
1244 14261 : const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
1245 14260 : if (0 == fragment_first)
1246 : {
1247 1957 : reass->sendout_thread_index = vm->thread_index;
1248 : }
1249 : }
1250 :
1251 14895 : if (PREDICT_FALSE (do_handoff))
1252 : {
1253 377 : next0 = IP4_FULL_REASS_NEXT_HANDOFF;
1254 377 : vnet_buffer (b0)->ip.reass.owner_thread_index =
1255 377 : kv.v.memory_owner_thread_index;
1256 : }
1257 14518 : else if (reass)
1258 : {
1259 : u32 handoff_thread_idx;
1260 14260 : u32 counter = ~0;
1261 14260 : switch (ip4_full_reass_update (vm, node, rm, rt, reass, &bi0, &next0,
1262 : &error0, CUSTOM == type,
1263 : &handoff_thread_idx))
1264 : {
1265 14265 : case IP4_REASS_RC_OK:
1266 : /* nothing to do here */
1267 14265 : break;
1268 0 : case IP4_REASS_RC_HANDOFF:
1269 0 : next0 = IP4_FULL_REASS_NEXT_HANDOFF;
1270 0 : b0 = vlib_get_buffer (vm, bi0);
1271 0 : vnet_buffer (b0)->ip.reass.owner_thread_index =
1272 : handoff_thread_idx;
1273 0 : break;
1274 1 : case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
1275 1 : counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
1276 1 : break;
1277 0 : case IP4_REASS_RC_NO_BUF:
1278 0 : counter = IP4_ERROR_REASS_NO_BUF;
1279 0 : break;
1280 0 : case IP4_REASS_RC_INTERNAL_ERROR:
1281 0 : counter = IP4_ERROR_REASS_INTERNAL_ERROR;
1282 : /* Sanitization is needed in internal error cases only, as
1283 : * the incoming packet is already dropped in other cases,
1284 : * also adding bi0 back to the reassembly list, fixes the
1285 : * leaking of buffers during internal errors.
1286 : *
1287 : * Also it doesnt make sense to send these buffers custom
1288 : * app, these fragments are with internal errors */
1289 0 : sanitize_reass_buffers_add_missing (vm, reass, &bi0);
1290 0 : reass->error_next_index = ~0;
1291 0 : break;
1292 : }
1293 :
1294 14266 : if (~0 != counter)
1295 : {
1296 1 : vlib_node_increment_counter (vm, node->node_index, counter, 1);
1297 1 : ip4_full_reass_drop_all (vm, node, reass);
1298 1 : ip4_full_reass_free (rm, rt, reass);
1299 1 : goto next_packet;
1300 : }
1301 : }
1302 : else
1303 : {
1304 258 : next0 = IP4_FULL_REASS_NEXT_DROP;
1305 258 : error0 = IP4_ERROR_REASS_LIMIT_REACHED;
1306 : }
1307 :
1308 15118 : packet_enqueue:
1309 :
1310 15118 : if (bi0 != ~0)
1311 : {
1312 : /* bi0 might have been updated by reass_finalize, reload */
1313 3605 : b0 = vlib_get_buffer (vm, bi0);
1314 3605 : if (IP4_ERROR_NONE != error0)
1315 : {
1316 1231 : b0->error = node->errors[error0];
1317 : }
1318 :
1319 3605 : if (next0 == IP4_FULL_REASS_NEXT_HANDOFF)
1320 : {
1321 376 : if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1322 : {
1323 376 : ip4_full_reass_add_trace (
1324 : vm, node, NULL, bi0, HANDOFF, 0,
1325 376 : vnet_buffer (b0)->ip.reass.owner_thread_index);
1326 : }
1327 : }
1328 3229 : else if (FEATURE == type && IP4_ERROR_NONE == error0)
1329 : {
1330 1924 : vnet_feature_next (&next0, b0);
1331 : }
1332 :
1333 : /* Increment the counter to-custom-app also as this fragment is
1334 : * also going to application */
1335 3604 : if (CUSTOM == type)
1336 : {
1337 0 : vlib_node_increment_counter (vm, node->node_index,
1338 : IP4_ERROR_REASS_TO_CUSTOM_APP, 1);
1339 : }
1340 :
1341 3604 : to_next[n_next] = bi0;
1342 3604 : nexts[n_next] = next0;
1343 3604 : n_next++;
1344 : IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
1345 : }
1346 :
1347 11513 : next_packet:
1348 15118 : from += 1;
1349 15118 : n_left -= 1;
1350 : }
1351 :
1352 128 : clib_spinlock_unlock (&rt->lock);
1353 :
1354 128 : vlib_buffer_enqueue_to_next (vm, node, to_next, nexts, n_next);
1355 128 : return frame->n_vectors;
1356 : }
1357 :
1358 2300 : VLIB_NODE_FN (ip4_full_reass_node) (vlib_main_t * vm,
1359 : vlib_node_runtime_t * node,
1360 : vlib_frame_t * frame)
1361 : {
1362 0 : return ip4_full_reass_inline (vm, node, frame, NORMAL, false /* is_local */);
1363 : }
1364 :
1365 183788 : VLIB_REGISTER_NODE (ip4_full_reass_node) = {
1366 : .name = "ip4-full-reassembly",
1367 : .vector_size = sizeof (u32),
1368 : .format_trace = format_ip4_full_reass_trace,
1369 : .n_errors = IP4_N_ERROR,
1370 : .error_counters = ip4_error_counters,
1371 : .n_next_nodes = IP4_FULL_REASS_N_NEXT,
1372 : .next_nodes =
1373 : {
1374 : [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
1375 : [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
1376 : [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-full-reassembly-handoff",
1377 :
1378 : },
1379 : };
1380 :
1381 2304 : VLIB_NODE_FN (ip4_local_full_reass_node)
1382 : (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1383 : {
1384 4 : return ip4_full_reass_inline (vm, node, frame, NORMAL, true /* is_local */);
1385 : }
1386 :
1387 183788 : VLIB_REGISTER_NODE (ip4_local_full_reass_node) = {
1388 : .name = "ip4-local-full-reassembly",
1389 : .vector_size = sizeof (u32),
1390 : .format_trace = format_ip4_full_reass_trace,
1391 : .n_errors = IP4_N_ERROR,
1392 : .error_counters = ip4_error_counters,
1393 : .n_next_nodes = IP4_FULL_REASS_N_NEXT,
1394 : .next_nodes =
1395 : {
1396 : [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
1397 : [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
1398 : [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-local-full-reassembly-handoff",
1399 :
1400 : },
1401 : };
1402 :
1403 2424 : VLIB_NODE_FN (ip4_full_reass_node_feature) (vlib_main_t * vm,
1404 : vlib_node_runtime_t * node,
1405 : vlib_frame_t * frame)
1406 : {
1407 124 : return ip4_full_reass_inline (vm, node, frame, FEATURE,
1408 : false /* is_local */);
1409 : }
1410 :
1411 183788 : VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = {
1412 : .name = "ip4-full-reassembly-feature",
1413 : .vector_size = sizeof (u32),
1414 : .format_trace = format_ip4_full_reass_trace,
1415 : .n_errors = IP4_N_ERROR,
1416 : .error_counters = ip4_error_counters,
1417 : .n_next_nodes = IP4_FULL_REASS_N_NEXT,
1418 : .next_nodes =
1419 : {
1420 : [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
1421 : [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
1422 : [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-full-reass-feature-hoff",
1423 : },
1424 : };
1425 :
1426 76635 : VNET_FEATURE_INIT (ip4_full_reass_feature, static) = {
1427 : .arc_name = "ip4-unicast",
1428 : .node_name = "ip4-full-reassembly-feature",
1429 : .runs_before = VNET_FEATURES ("ip4-lookup",
1430 : "ipsec4-input-feature"),
1431 : .runs_after = 0,
1432 : };
1433 :
1434 2300 : VLIB_NODE_FN (ip4_full_reass_node_custom) (vlib_main_t * vm,
1435 : vlib_node_runtime_t * node,
1436 : vlib_frame_t * frame)
1437 : {
1438 0 : return ip4_full_reass_inline (vm, node, frame, CUSTOM, false /* is_local */);
1439 : }
1440 :
1441 183788 : VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = {
1442 : .name = "ip4-full-reassembly-custom",
1443 : .vector_size = sizeof (u32),
1444 : .format_trace = format_ip4_full_reass_trace,
1445 : .n_errors = IP4_N_ERROR,
1446 : .error_counters = ip4_error_counters,
1447 : .n_next_nodes = IP4_FULL_REASS_N_NEXT,
1448 : .next_nodes =
1449 : {
1450 : [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
1451 : [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
1452 : [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-full-reass-custom-hoff",
1453 : },
1454 : };
1455 :
1456 76635 : VNET_FEATURE_INIT (ip4_full_reass_custom, static) = {
1457 : .arc_name = "ip4-unicast",
1458 : .node_name = "ip4-full-reassembly-feature",
1459 : .runs_before = VNET_FEATURES ("ip4-lookup",
1460 : "ipsec4-input-feature"),
1461 : .runs_after = 0,
1462 : };
1463 :
1464 :
1465 : #ifndef CLIB_MARCH_VARIANT
1466 : uword
1467 0 : ip4_full_reass_custom_register_next_node (uword node_index)
1468 : {
1469 0 : return vlib_node_add_next (vlib_get_main (),
1470 0 : ip4_full_reass_node_custom.index, node_index);
1471 : }
1472 :
1473 : always_inline u32
1474 663 : ip4_full_reass_get_nbuckets ()
1475 : {
1476 663 : ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1477 : u32 nbuckets;
1478 : u8 i;
1479 :
1480 : /* need more mem with more workers */
1481 663 : nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) /
1482 : IP4_REASS_HT_LOAD_FACTOR);
1483 :
1484 7982 : for (i = 0; i < 31; i++)
1485 7982 : if ((1 << i) >= nbuckets)
1486 663 : break;
1487 663 : nbuckets = 1 << i;
1488 :
1489 663 : return nbuckets;
1490 : }
1491 : #endif /* CLIB_MARCH_VARIANT */
1492 :
1493 : typedef enum
1494 : {
1495 : IP4_EVENT_CONFIG_CHANGED = 1,
1496 : } ip4_full_reass_event_t;
1497 :
1498 : typedef struct
1499 : {
1500 : int failure;
1501 : clib_bihash_16_8_t *new_hash;
1502 : } ip4_rehash_cb_ctx;
1503 :
1504 : #ifndef CLIB_MARCH_VARIANT
1505 : static int
1506 0 : ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx)
1507 : {
1508 0 : ip4_rehash_cb_ctx *ctx = _ctx;
1509 0 : if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1))
1510 : {
1511 0 : ctx->failure = 1;
1512 : }
1513 0 : return (BIHASH_WALK_CONTINUE);
1514 : }
1515 :
1516 : static void
1517 619 : ip4_full_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
1518 : u32 max_reassembly_length,
1519 : u32 expire_walk_interval_ms)
1520 : {
1521 619 : ip4_full_reass_main.timeout_ms = timeout_ms;
1522 619 : ip4_full_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
1523 619 : ip4_full_reass_main.max_reass_n = max_reassemblies;
1524 619 : ip4_full_reass_main.max_reass_len = max_reassembly_length;
1525 619 : ip4_full_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
1526 619 : }
1527 :
1528 : vnet_api_error_t
1529 44 : ip4_full_reass_set (u32 timeout_ms, u32 max_reassemblies,
1530 : u32 max_reassembly_length, u32 expire_walk_interval_ms)
1531 : {
1532 44 : u32 old_nbuckets = ip4_full_reass_get_nbuckets ();
1533 44 : ip4_full_reass_set_params (timeout_ms, max_reassemblies,
1534 : max_reassembly_length, expire_walk_interval_ms);
1535 44 : vlib_process_signal_event (ip4_full_reass_main.vlib_main,
1536 44 : ip4_full_reass_main.ip4_full_reass_expire_node_idx,
1537 : IP4_EVENT_CONFIG_CHANGED, 0);
1538 44 : u32 new_nbuckets = ip4_full_reass_get_nbuckets ();
1539 44 : if (ip4_full_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
1540 : {
1541 : clib_bihash_16_8_t new_hash;
1542 1 : clib_memset (&new_hash, 0, sizeof (new_hash));
1543 : ip4_rehash_cb_ctx ctx;
1544 1 : ctx.failure = 0;
1545 1 : ctx.new_hash = &new_hash;
1546 1 : clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets,
1547 1 : new_nbuckets * 1024);
1548 1 : clib_bihash_foreach_key_value_pair_16_8 (&ip4_full_reass_main.hash,
1549 : ip4_rehash_cb, &ctx);
1550 1 : if (ctx.failure)
1551 : {
1552 0 : clib_bihash_free_16_8 (&new_hash);
1553 0 : return -1;
1554 : }
1555 : else
1556 : {
1557 1 : clib_bihash_free_16_8 (&ip4_full_reass_main.hash);
1558 1 : clib_memcpy_fast (&ip4_full_reass_main.hash, &new_hash,
1559 : sizeof (ip4_full_reass_main.hash));
1560 1 : clib_bihash_copied (&ip4_full_reass_main.hash, &new_hash);
1561 : }
1562 : }
1563 44 : return 0;
1564 : }
1565 :
1566 : vnet_api_error_t
1567 0 : ip4_full_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
1568 : u32 * max_reassembly_length,
1569 : u32 * expire_walk_interval_ms)
1570 : {
1571 0 : *timeout_ms = ip4_full_reass_main.timeout_ms;
1572 0 : *max_reassemblies = ip4_full_reass_main.max_reass_n;
1573 0 : *max_reassembly_length = ip4_full_reass_main.max_reass_len;
1574 0 : *expire_walk_interval_ms = ip4_full_reass_main.expire_walk_interval_ms;
1575 0 : return 0;
1576 : }
1577 :
1578 : static clib_error_t *
1579 575 : ip4_full_reass_init_function (vlib_main_t * vm)
1580 : {
1581 575 : ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1582 575 : clib_error_t *error = 0;
1583 : u32 nbuckets;
1584 : vlib_node_t *node;
1585 :
1586 575 : rm->vlib_main = vm;
1587 :
1588 575 : vec_validate (rm->per_thread_data, vlib_num_workers ());
1589 : ip4_full_reass_per_thread_t *rt;
1590 1205 : vec_foreach (rt, rm->per_thread_data)
1591 : {
1592 630 : clib_spinlock_init (&rt->lock);
1593 630 : pool_alloc (rt->pool, rm->max_reass_n);
1594 : }
1595 :
1596 575 : node = vlib_get_node_by_name (vm, (u8 *) "ip4-full-reassembly-expire-walk");
1597 575 : ASSERT (node);
1598 575 : rm->ip4_full_reass_expire_node_idx = node->index;
1599 :
1600 575 : ip4_full_reass_set_params (IP4_REASS_TIMEOUT_DEFAULT_MS,
1601 : IP4_REASS_MAX_REASSEMBLIES_DEFAULT,
1602 : IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
1603 : IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
1604 :
1605 575 : nbuckets = ip4_full_reass_get_nbuckets ();
1606 575 : clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
1607 :
1608 575 : rm->fq_index = vlib_frame_queue_main_init (ip4_full_reass_node.index, 0);
1609 575 : rm->fq_local_index =
1610 575 : vlib_frame_queue_main_init (ip4_local_full_reass_node.index, 0);
1611 575 : rm->fq_feature_index =
1612 575 : vlib_frame_queue_main_init (ip4_full_reass_node_feature.index, 0);
1613 575 : rm->fq_custom_index =
1614 575 : vlib_frame_queue_main_init (ip4_full_reass_node_custom.index, 0);
1615 :
1616 575 : rm->feature_use_refcount_per_intf = NULL;
1617 575 : rm->is_local_reass_enabled = 1;
1618 :
1619 575 : return error;
1620 : }
1621 :
1622 38591 : VLIB_INIT_FUNCTION (ip4_full_reass_init_function);
1623 : #endif /* CLIB_MARCH_VARIANT */
1624 :
1625 : static uword
1626 575 : ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
1627 : CLIB_UNUSED (vlib_frame_t *f))
1628 : {
1629 575 : ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1630 575 : uword event_type, *event_data = 0;
1631 :
1632 : while (true)
1633 148224 : {
1634 148799 : vlib_process_wait_for_event_or_clock (vm,
1635 148799 : (f64)
1636 148799 : rm->expire_walk_interval_ms /
1637 : (f64) MSEC_PER_SEC);
1638 148224 : event_type = vlib_process_get_events (vm, &event_data);
1639 :
1640 148224 : switch (event_type)
1641 : {
1642 148224 : case ~0:
1643 : /* no events => timeout */
1644 : /* fallthrough */
1645 : case IP4_EVENT_CONFIG_CHANGED:
1646 : /* nothing to do here */
1647 148224 : break;
1648 0 : default:
1649 0 : clib_warning ("BUG: event type 0x%wx", event_type);
1650 0 : break;
1651 : }
1652 148224 : f64 now = vlib_time_now (vm);
1653 :
1654 : ip4_full_reass_t *reass;
1655 148224 : int *pool_indexes_to_free = NULL;
1656 :
1657 148224 : uword thread_index = 0;
1658 : int index;
1659 148224 : const uword nthreads = vlib_num_workers () + 1;
1660 :
1661 338102 : for (thread_index = 0; thread_index < nthreads; ++thread_index)
1662 : {
1663 189878 : ip4_full_reass_per_thread_t *rt =
1664 189878 : &rm->per_thread_data[thread_index];
1665 189878 : clib_spinlock_lock (&rt->lock);
1666 :
1667 189878 : vec_reset_length (pool_indexes_to_free);
1668 :
1669 : /* Pace the number of timeouts handled per thread,to avoid barrier
1670 : * sync issues in real world scenarios */
1671 :
1672 189878 : u32 beg = rt->last_id;
1673 : /* to ensure we walk at least once per sec per context */
1674 189878 : u32 end =
1675 : beg + (IP4_REASS_MAX_REASSEMBLIES_DEFAULT *
1676 : IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS / MSEC_PER_SEC +
1677 : 1);
1678 189878 : if (end > vec_len (rt->pool))
1679 : {
1680 189878 : end = vec_len (rt->pool);
1681 189878 : rt->last_id = 0;
1682 : }
1683 : else
1684 : {
1685 0 : rt->last_id = end;
1686 : }
1687 :
1688 190081 : pool_foreach_stepping_index (index, beg, end, rt->pool)
1689 : {
1690 203 : reass = pool_elt_at_index (rt->pool, index);
1691 203 : if (now > reass->last_heard + rm->timeout)
1692 : {
1693 101 : vec_add1 (pool_indexes_to_free, index);
1694 : }
1695 : }
1696 :
1697 189878 : if (vec_len (pool_indexes_to_free))
1698 5 : vlib_node_increment_counter (vm, node->node_index,
1699 : IP4_ERROR_REASS_TIMEOUT,
1700 5 : vec_len (pool_indexes_to_free));
1701 : int *i;
1702 189979 : vec_foreach (i, pool_indexes_to_free)
1703 : {
1704 101 : ip4_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
1705 101 : ip4_full_reass_drop_all (vm, node, reass);
1706 101 : ip4_full_reass_free (rm, rt, reass);
1707 : }
1708 :
1709 189878 : clib_spinlock_unlock (&rt->lock);
1710 : }
1711 :
1712 148224 : vec_free (pool_indexes_to_free);
1713 148224 : if (event_data)
1714 : {
1715 707 : vec_set_len (event_data, 0);
1716 : }
1717 : }
1718 :
1719 : return 0;
1720 : }
1721 :
1722 183788 : VLIB_REGISTER_NODE (ip4_full_reass_expire_node) = {
1723 : .function = ip4_full_reass_walk_expired,
1724 : .type = VLIB_NODE_TYPE_PROCESS,
1725 : .name = "ip4-full-reassembly-expire-walk",
1726 : .format_trace = format_ip4_full_reass_trace,
1727 : .n_errors = IP4_N_ERROR,
1728 : .error_counters = ip4_error_counters,
1729 : };
1730 :
1731 : static u8 *
1732 50 : format_ip4_full_reass_key (u8 * s, va_list * args)
1733 : {
1734 50 : ip4_full_reass_key_t *key = va_arg (*args, ip4_full_reass_key_t *);
1735 : s =
1736 50 : format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
1737 : key->fib_index, format_ip4_address, &key->src, format_ip4_address,
1738 50 : &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
1739 50 : return s;
1740 : }
1741 :
1742 : static u8 *
1743 50 : format_ip4_reass (u8 * s, va_list * args)
1744 : {
1745 50 : vlib_main_t *vm = va_arg (*args, vlib_main_t *);
1746 50 : ip4_full_reass_t *reass = va_arg (*args, ip4_full_reass_t *);
1747 :
1748 50 : s = format (s, "ID: %lu, key: %U\n first_bi: %u, data_len: %u, "
1749 : "last_packet_octet: %u, trace_op_counter: %u\n",
1750 : reass->id, format_ip4_full_reass_key, &reass->key,
1751 : reass->first_bi, reass->data_len,
1752 : reass->last_packet_octet, reass->trace_op_counter);
1753 :
1754 50 : u32 bi = reass->first_bi;
1755 50 : u32 counter = 0;
1756 100 : while (~0 != bi)
1757 : {
1758 50 : vlib_buffer_t *b = vlib_get_buffer (vm, bi);
1759 50 : vnet_buffer_opaque_t *vnb = vnet_buffer (b);
1760 : s =
1761 50 : format (s,
1762 : " #%03u: range: [%u, %u], bi: %u, off: %d, len: %u, "
1763 50 : "fragment[%u, %u]\n", counter, vnb->ip.reass.range_first,
1764 50 : vnb->ip.reass.range_last, bi,
1765 : ip4_full_reass_buffer_get_data_offset (b),
1766 50 : ip4_full_reass_buffer_get_data_len (b),
1767 50 : vnb->ip.reass.fragment_first, vnb->ip.reass.fragment_last);
1768 50 : if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
1769 : {
1770 0 : bi = b->next_buffer;
1771 : }
1772 : else
1773 : {
1774 50 : bi = ~0;
1775 : }
1776 : }
1777 50 : return s;
1778 : }
1779 :
1780 : static clib_error_t *
1781 20 : show_ip4_reass (vlib_main_t * vm,
1782 : unformat_input_t * input,
1783 : CLIB_UNUSED (vlib_cli_command_t * lmd))
1784 : {
1785 20 : ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1786 :
1787 20 : vlib_cli_output (vm, "---------------------");
1788 20 : vlib_cli_output (vm, "IP4 reassembly status");
1789 20 : vlib_cli_output (vm, "---------------------");
1790 20 : bool details = false;
1791 20 : if (unformat (input, "details"))
1792 : {
1793 20 : details = true;
1794 : }
1795 :
1796 20 : u32 sum_reass_n = 0;
1797 : ip4_full_reass_t *reass;
1798 : uword thread_index;
1799 20 : const uword nthreads = vlib_num_workers () + 1;
1800 46 : for (thread_index = 0; thread_index < nthreads; ++thread_index)
1801 : {
1802 26 : ip4_full_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
1803 26 : clib_spinlock_lock (&rt->lock);
1804 26 : if (details)
1805 : {
1806 76 : pool_foreach (reass, rt->pool) {
1807 50 : vlib_cli_output (vm, "%U", format_ip4_reass, vm, reass);
1808 : }
1809 : }
1810 26 : sum_reass_n += rt->reass_n;
1811 26 : clib_spinlock_unlock (&rt->lock);
1812 : }
1813 20 : vlib_cli_output (vm, "---------------------");
1814 20 : vlib_cli_output (vm, "Current full IP4 reassemblies count: %lu\n",
1815 : (long unsigned) sum_reass_n);
1816 20 : vlib_cli_output (vm,
1817 : "Maximum configured concurrent full IP4 reassemblies per worker-thread: %lu\n",
1818 20 : (long unsigned) rm->max_reass_n);
1819 20 : vlib_cli_output (vm,
1820 : "Maximum configured amount of fragments "
1821 : "per full IP4 reassembly: %lu\n",
1822 20 : (long unsigned) rm->max_reass_len);
1823 20 : vlib_cli_output (vm,
1824 : "Maximum configured full IP4 reassembly timeout: %lums\n",
1825 20 : (long unsigned) rm->timeout_ms);
1826 20 : vlib_cli_output (vm,
1827 : "Maximum configured full IP4 reassembly expire walk interval: %lums\n",
1828 20 : (long unsigned) rm->expire_walk_interval_ms);
1829 20 : return 0;
1830 : }
1831 :
1832 285289 : VLIB_CLI_COMMAND (show_ip4_full_reass_cmd, static) = {
1833 : .path = "show ip4-full-reassembly",
1834 : .short_help = "show ip4-full-reassembly [details]",
1835 : .function = show_ip4_reass,
1836 : };
1837 :
1838 : #ifndef CLIB_MARCH_VARIANT
1839 : vnet_api_error_t
1840 144 : ip4_full_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1841 : {
1842 144 : return vnet_feature_enable_disable ("ip4-unicast",
1843 : "ip4-full-reassembly-feature",
1844 : sw_if_index, enable_disable, 0, 0);
1845 : }
1846 : #endif /* CLIB_MARCH_VARIANT */
1847 :
1848 :
1849 : #define foreach_ip4_full_reass_handoff_error \
1850 : _(CONGESTION_DROP, "congestion drop")
1851 :
1852 :
1853 : typedef enum
1854 : {
1855 : #define _(sym,str) IP4_FULL_REASS_HANDOFF_ERROR_##sym,
1856 : foreach_ip4_full_reass_handoff_error
1857 : #undef _
1858 : IP4_FULL_REASS_HANDOFF_N_ERROR,
1859 : } ip4_full_reass_handoff_error_t;
1860 :
1861 : static char *ip4_full_reass_handoff_error_strings[] = {
1862 : #define _(sym,string) string,
1863 : foreach_ip4_full_reass_handoff_error
1864 : #undef _
1865 : };
1866 :
1867 : typedef struct
1868 : {
1869 : u32 next_worker_index;
1870 : } ip4_full_reass_handoff_trace_t;
1871 :
1872 : static u8 *
1873 264 : format_ip4_full_reass_handoff_trace (u8 * s, va_list * args)
1874 : {
1875 264 : CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1876 264 : CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1877 264 : ip4_full_reass_handoff_trace_t *t =
1878 : va_arg (*args, ip4_full_reass_handoff_trace_t *);
1879 :
1880 : s =
1881 264 : format (s, "ip4-full-reassembly-handoff: next-worker %d",
1882 : t->next_worker_index);
1883 :
1884 264 : return s;
1885 : }
1886 :
1887 : always_inline uword
1888 12 : ip4_full_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1889 : vlib_frame_t *frame,
1890 : ip4_full_reass_node_type_t type,
1891 : bool is_local)
1892 : {
1893 12 : ip4_full_reass_main_t *rm = &ip4_full_reass_main;
1894 :
1895 : vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1896 : u32 n_enq, n_left_from, *from;
1897 : u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1898 : u32 fq_index;
1899 :
1900 12 : from = vlib_frame_vector_args (frame);
1901 12 : n_left_from = frame->n_vectors;
1902 12 : vlib_get_buffers (vm, from, bufs, n_left_from);
1903 :
1904 12 : b = bufs;
1905 12 : ti = thread_indices;
1906 :
1907 12 : switch (type)
1908 : {
1909 0 : case NORMAL:
1910 0 : if (is_local)
1911 : {
1912 0 : fq_index = rm->fq_local_index;
1913 : }
1914 : else
1915 : {
1916 0 : fq_index = rm->fq_index;
1917 : }
1918 0 : break;
1919 12 : case FEATURE:
1920 12 : fq_index = rm->fq_feature_index;
1921 12 : break;
1922 0 : case CUSTOM:
1923 0 : fq_index = rm->fq_custom_index;
1924 0 : break;
1925 0 : default:
1926 0 : clib_warning ("Unexpected `type' (%d)!", type);
1927 : }
1928 :
1929 390 : while (n_left_from > 0)
1930 : {
1931 378 : ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1932 :
1933 378 : if (PREDICT_FALSE
1934 : ((node->flags & VLIB_NODE_FLAG_TRACE)
1935 : && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1936 : {
1937 : ip4_full_reass_handoff_trace_t *t =
1938 378 : vlib_add_trace (vm, node, b[0], sizeof (*t));
1939 378 : t->next_worker_index = ti[0];
1940 : }
1941 :
1942 378 : n_left_from -= 1;
1943 378 : ti += 1;
1944 378 : b += 1;
1945 : }
1946 12 : n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
1947 12 : thread_indices, frame->n_vectors, 1);
1948 :
1949 12 : if (n_enq < frame->n_vectors)
1950 0 : vlib_node_increment_counter (vm, node->node_index,
1951 : IP4_FULL_REASS_HANDOFF_ERROR_CONGESTION_DROP,
1952 0 : frame->n_vectors - n_enq);
1953 12 : return frame->n_vectors;
1954 : }
1955 :
1956 2300 : VLIB_NODE_FN (ip4_full_reass_handoff_node) (vlib_main_t * vm,
1957 : vlib_node_runtime_t * node,
1958 : vlib_frame_t * frame)
1959 : {
1960 0 : return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL,
1961 : false /* is_local */);
1962 : }
1963 :
1964 :
1965 183788 : VLIB_REGISTER_NODE (ip4_full_reass_handoff_node) = {
1966 : .name = "ip4-full-reassembly-handoff",
1967 : .vector_size = sizeof (u32),
1968 : .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
1969 : .error_strings = ip4_full_reass_handoff_error_strings,
1970 : .format_trace = format_ip4_full_reass_handoff_trace,
1971 :
1972 : .n_next_nodes = 1,
1973 :
1974 : .next_nodes = {
1975 : [0] = "error-drop",
1976 : },
1977 : };
1978 :
1979 2300 : VLIB_NODE_FN (ip4_local_full_reass_handoff_node)
1980 : (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1981 : {
1982 0 : return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL,
1983 : true /* is_local */);
1984 : }
1985 :
1986 183788 : VLIB_REGISTER_NODE (ip4_local_full_reass_handoff_node) = {
1987 : .name = "ip4-local-full-reassembly-handoff",
1988 : .vector_size = sizeof (u32),
1989 : .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
1990 : .error_strings = ip4_full_reass_handoff_error_strings,
1991 : .format_trace = format_ip4_full_reass_handoff_trace,
1992 :
1993 : .n_next_nodes = 1,
1994 :
1995 : .next_nodes = {
1996 : [0] = "error-drop",
1997 : },
1998 : };
1999 :
2000 2312 : VLIB_NODE_FN (ip4_full_reass_feature_handoff_node) (vlib_main_t * vm,
2001 : vlib_node_runtime_t *
2002 : node,
2003 : vlib_frame_t * frame)
2004 : {
2005 12 : return ip4_full_reass_handoff_node_inline (vm, node, frame, FEATURE,
2006 : false /* is_local */);
2007 : }
2008 :
2009 183788 : VLIB_REGISTER_NODE (ip4_full_reass_feature_handoff_node) = {
2010 : .name = "ip4-full-reass-feature-hoff",
2011 : .vector_size = sizeof (u32),
2012 : .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
2013 : .error_strings = ip4_full_reass_handoff_error_strings,
2014 : .format_trace = format_ip4_full_reass_handoff_trace,
2015 :
2016 : .n_next_nodes = 1,
2017 :
2018 : .next_nodes = {
2019 : [0] = "error-drop",
2020 : },
2021 : };
2022 :
2023 2300 : VLIB_NODE_FN (ip4_full_reass_custom_handoff_node) (vlib_main_t * vm,
2024 : vlib_node_runtime_t *
2025 : node,
2026 : vlib_frame_t * frame)
2027 : {
2028 0 : return ip4_full_reass_handoff_node_inline (vm, node, frame, CUSTOM,
2029 : false /* is_local */);
2030 : }
2031 :
2032 183788 : VLIB_REGISTER_NODE (ip4_full_reass_custom_handoff_node) = {
2033 : .name = "ip4-full-reass-custom-hoff",
2034 : .vector_size = sizeof (u32),
2035 : .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
2036 : .error_strings = ip4_full_reass_handoff_error_strings,
2037 : .format_trace = format_ip4_full_reass_handoff_trace,
2038 :
2039 : .n_next_nodes = 1,
2040 :
2041 : .next_nodes = {
2042 : [0] = "error-drop",
2043 : },
2044 : };
2045 :
2046 : #ifndef CLIB_MARCH_VARIANT
2047 : int
2048 0 : ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
2049 : {
2050 0 : ip4_full_reass_main_t *rm = &ip4_full_reass_main;
2051 0 : vec_validate (rm->feature_use_refcount_per_intf, sw_if_index);
2052 0 : if (is_enable)
2053 : {
2054 0 : if (!rm->feature_use_refcount_per_intf[sw_if_index])
2055 : {
2056 0 : ++rm->feature_use_refcount_per_intf[sw_if_index];
2057 0 : return vnet_feature_enable_disable ("ip4-unicast",
2058 : "ip4-full-reassembly-feature",
2059 : sw_if_index, 1, 0, 0);
2060 : }
2061 0 : ++rm->feature_use_refcount_per_intf[sw_if_index];
2062 : }
2063 : else
2064 : {
2065 0 : --rm->feature_use_refcount_per_intf[sw_if_index];
2066 0 : if (!rm->feature_use_refcount_per_intf[sw_if_index])
2067 0 : return vnet_feature_enable_disable ("ip4-unicast",
2068 : "ip4-full-reassembly-feature",
2069 : sw_if_index, 0, 0, 0);
2070 : }
2071 0 : return -1;
2072 : }
2073 :
2074 : void
2075 6 : ip4_local_full_reass_enable_disable (int enable)
2076 : {
2077 6 : if (enable)
2078 : {
2079 2 : ip4_full_reass_main.is_local_reass_enabled = 1;
2080 : }
2081 : else
2082 : {
2083 4 : ip4_full_reass_main.is_local_reass_enabled = 0;
2084 : }
2085 6 : }
2086 :
2087 : int
2088 0 : ip4_local_full_reass_enabled ()
2089 : {
2090 0 : return ip4_full_reass_main.is_local_reass_enabled;
2091 : }
2092 :
2093 : #endif
2094 :
2095 : /*
2096 : * fd.io coding-style-patch-verification: ON
2097 : *
2098 : * Local Variables:
2099 : * eval: (c-set-style "gnu")
2100 : * End:
2101 : */
|