Line data Source code
1 : /*
2 : * Copyright (c) 2015 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 : /*
16 : * buffer.c: allocate/free network buffers.
17 : *
18 : * Copyright (c) 2008 Eliot Dresselhaus
19 : *
20 : * Permission is hereby granted, free of charge, to any person obtaining
21 : * a copy of this software and associated documentation files (the
22 : * "Software"), to deal in the Software without restriction, including
23 : * without limitation the rights to use, copy, modify, merge, publish,
24 : * distribute, sublicense, and/or sell copies of the Software, and to
25 : * permit persons to whom the Software is furnished to do so, subject to
26 : * the following conditions:
27 : *
28 : * The above copyright notice and this permission notice shall be
29 : * included in all copies or substantial portions of the Software.
30 : *
31 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 : * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 : * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 : * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 : * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 : * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 : * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 : */
39 :
40 : /**
41 : * @file
42 : *
43 : * Allocate/free network buffers.
44 : */
45 :
46 : #include <vppinfra/linux/sysfs.h>
47 : #include <vlib/vlib.h>
48 : #include <vlib/unix/unix.h>
49 : #include <vlib/stats/stats.h>
50 :
51 : #define VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA 16384
52 : #define VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA_UNPRIV 8192
53 :
54 : #ifdef CLIB_HAVE_VEC128
55 : /* Assumptions by vlib_buffer_free_inline: */
56 : STATIC_ASSERT_FITS_IN (vlib_buffer_t, flags, 16);
57 : STATIC_ASSERT_FITS_IN (vlib_buffer_t, ref_count, 16);
58 : STATIC_ASSERT_FITS_IN (vlib_buffer_t, buffer_pool_index, 16);
59 : #endif
60 :
61 : /* Make sure that buffer template size is not accidentally changed */
62 : STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64);
63 :
64 : u16 __vlib_buffer_external_hdr_size = 0;
65 :
66 : uword
67 225387 : vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm,
68 : vlib_buffer_t * b_first)
69 : {
70 225387 : vlib_buffer_t *b = b_first;
71 225387 : uword l_first = b_first->current_length;
72 225387 : uword l = 0;
73 545569 : while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
74 : {
75 320182 : b = vlib_get_buffer (vm, b->next_buffer);
76 320182 : l += b->current_length;
77 : }
78 225387 : b_first->total_length_not_including_first_buffer = l;
79 225387 : b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
80 225387 : return l + l_first;
81 : }
82 :
83 : u8 *
84 648390 : format_vlib_buffer_no_chain (u8 * s, va_list * args)
85 : {
86 648390 : vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
87 648390 : u32 indent = format_get_indent (s);
88 648390 : u8 *a = 0;
89 :
90 : #define _(bit, name, v) \
91 : if (v && (b->flags & VLIB_BUFFER_##name)) \
92 : a = format (a, "%s ", v);
93 648390 : foreach_vlib_buffer_flag
94 : #undef _
95 648390 : s = format (s, "current data %d, length %d, buffer-pool %d, "
96 648390 : "ref-count %u", b->current_data, b->current_length,
97 648390 : b->buffer_pool_index, b->ref_count);
98 :
99 648390 : if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
100 140757 : s = format (s, ", totlen-nifb %d",
101 : b->total_length_not_including_first_buffer);
102 :
103 648390 : if (b->flags & VLIB_BUFFER_IS_TRACED)
104 648390 : s = format (s, ", trace handle 0x%x", b->trace_handle);
105 :
106 648390 : if (a)
107 140757 : s = format (s, "\n%U%v", format_white_space, indent, a);
108 648390 : vec_free (a);
109 :
110 648390 : return s;
111 : }
112 :
113 : u8 *
114 0 : format_vlib_buffer (u8 * s, va_list * args)
115 : {
116 0 : vlib_main_t *vm = vlib_get_main ();
117 0 : vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
118 0 : u32 indent = format_get_indent (s);
119 :
120 0 : s = format (s, "%U", format_vlib_buffer_no_chain, b);
121 :
122 0 : while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
123 : {
124 0 : u32 next_buffer = b->next_buffer;
125 0 : b = vlib_get_buffer (vm, next_buffer);
126 :
127 : s =
128 0 : format (s, "\n%Unext-buffer 0x%x, segment length %d, ref-count %u",
129 0 : format_white_space, indent, next_buffer, b->current_length,
130 0 : b->ref_count);
131 : }
132 :
133 0 : return s;
134 : }
135 :
136 : u8 *
137 0 : format_vlib_buffer_and_data (u8 * s, va_list * args)
138 : {
139 0 : vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
140 :
141 0 : s = format (s, "%U, %U",
142 : format_vlib_buffer, b,
143 : format_hex_bytes, vlib_buffer_get_current (b), 64);
144 :
145 0 : return s;
146 : }
147 :
148 : static u8 *
149 0 : format_vlib_buffer_known_state (u8 * s, va_list * args)
150 : {
151 0 : vlib_buffer_known_state_t state = va_arg (*args, vlib_buffer_known_state_t);
152 : char *t;
153 :
154 0 : switch (state)
155 : {
156 0 : case VLIB_BUFFER_UNKNOWN:
157 0 : t = "unknown";
158 0 : break;
159 :
160 0 : case VLIB_BUFFER_KNOWN_ALLOCATED:
161 0 : t = "known-allocated";
162 0 : break;
163 :
164 0 : case VLIB_BUFFER_KNOWN_FREE:
165 0 : t = "known-free";
166 0 : break;
167 :
168 0 : default:
169 0 : t = "invalid";
170 0 : break;
171 : }
172 :
173 0 : return format (s, "%s", t);
174 : }
175 :
176 : u8 *
177 0 : format_vlib_buffer_contents (u8 * s, va_list * va)
178 : {
179 0 : vlib_main_t *vm = va_arg (*va, vlib_main_t *);
180 0 : vlib_buffer_t *b = va_arg (*va, vlib_buffer_t *);
181 :
182 : while (1)
183 : {
184 0 : vec_add (s, vlib_buffer_get_current (b), b->current_length);
185 0 : if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
186 0 : break;
187 0 : b = vlib_get_buffer (vm, b->next_buffer);
188 : }
189 :
190 0 : return s;
191 : }
192 :
193 : static u8 *
194 0 : vlib_validate_buffer_helper (vlib_main_t * vm,
195 : u32 bi,
196 : uword follow_buffer_next, uword ** unique_hash)
197 : {
198 0 : vlib_buffer_main_t *bm = vm->buffer_main;
199 0 : vlib_buffer_t *b = vlib_get_buffer (vm, bi);
200 :
201 0 : if (vec_len (bm->buffer_pools) <= b->buffer_pool_index)
202 0 : return format (0, "unknown buffer pool 0x%x", b->buffer_pool_index);
203 :
204 0 : if ((signed) b->current_data < (signed) -VLIB_BUFFER_PRE_DATA_SIZE)
205 0 : return format (0, "current data %d before pre-data", b->current_data);
206 :
207 0 : if (b->current_data + b->current_length >
208 0 : vlib_buffer_get_default_data_size (vm))
209 0 : return format (0, "%d-%d beyond end of buffer %d", b->current_data,
210 0 : b->current_length, vlib_buffer_get_default_data_size (vm));
211 :
212 0 : if (follow_buffer_next && (b->flags & VLIB_BUFFER_NEXT_PRESENT))
213 : {
214 : vlib_buffer_known_state_t k;
215 : u8 *msg, *result;
216 :
217 0 : k = vlib_buffer_is_known (vm, b->next_buffer);
218 0 : if (k != VLIB_BUFFER_KNOWN_ALLOCATED)
219 0 : return format (0, "next 0x%x: %U",
220 : b->next_buffer, format_vlib_buffer_known_state, k);
221 :
222 0 : if (unique_hash)
223 : {
224 0 : if (hash_get (*unique_hash, b->next_buffer))
225 0 : return format (0, "duplicate buffer 0x%x", b->next_buffer);
226 :
227 0 : hash_set1 (*unique_hash, b->next_buffer);
228 : }
229 :
230 0 : msg = vlib_validate_buffer (vm, b->next_buffer, follow_buffer_next);
231 0 : if (msg)
232 : {
233 0 : result = format (0, "next 0x%x: %v", b->next_buffer, msg);
234 0 : vec_free (msg);
235 0 : return result;
236 : }
237 : }
238 :
239 0 : return 0;
240 : }
241 :
242 : u8 *
243 0 : vlib_validate_buffer (vlib_main_t * vm, u32 bi, uword follow_buffer_next)
244 : {
245 0 : return vlib_validate_buffer_helper (vm, bi, follow_buffer_next,
246 : /* unique_hash */ 0);
247 : }
248 :
249 : u8 *
250 0 : vlib_validate_buffers (vlib_main_t * vm,
251 : u32 * buffers,
252 : uword next_buffer_stride,
253 : uword n_buffers,
254 : vlib_buffer_known_state_t known_state,
255 : uword follow_buffer_next)
256 : {
257 : uword i, *hash;
258 0 : u32 bi, *b = buffers;
259 : vlib_buffer_known_state_t k;
260 0 : u8 *msg = 0, *result = 0;
261 :
262 0 : hash = hash_create (0, 0);
263 0 : for (i = 0; i < n_buffers; i++)
264 : {
265 0 : bi = b[0];
266 0 : b += next_buffer_stride;
267 :
268 : /* Buffer is not unique. */
269 0 : if (hash_get (hash, bi))
270 : {
271 0 : msg = format (0, "not unique");
272 0 : goto done;
273 : }
274 :
275 0 : k = vlib_buffer_is_known (vm, bi);
276 0 : if (k != known_state)
277 : {
278 0 : msg = format (0, "is %U; expected %U",
279 : format_vlib_buffer_known_state, k,
280 : format_vlib_buffer_known_state, known_state);
281 0 : goto done;
282 : }
283 :
284 0 : msg = vlib_validate_buffer_helper (vm, bi, follow_buffer_next, &hash);
285 0 : if (msg)
286 0 : goto done;
287 :
288 0 : hash_set1 (hash, bi);
289 : }
290 :
291 0 : done:
292 0 : if (msg)
293 : {
294 0 : result = format (0, "0x%x: %v", bi, msg);
295 0 : vec_free (msg);
296 : }
297 0 : hash_free (hash);
298 0 : return result;
299 : }
300 :
301 : /* When debugging validate that given buffers are either known allocated
302 : or known free. */
303 : void
304 24585800 : vlib_buffer_validate_alloc_free (vlib_main_t * vm,
305 : u32 * buffers,
306 : uword n_buffers,
307 : vlib_buffer_known_state_t expected_state)
308 : {
309 24585800 : vlib_buffer_main_t *bm = vm->buffer_main;
310 : u32 *b;
311 : uword i, bi, is_free;
312 :
313 : if (CLIB_DEBUG == 0)
314 : return;
315 :
316 24585800 : is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED;
317 24585800 : b = buffers;
318 397422000 : for (i = 0; i < n_buffers; i++)
319 : {
320 : vlib_buffer_known_state_t known;
321 :
322 372836000 : bi = b[0];
323 372836000 : b += 1;
324 372836000 : known = vlib_buffer_is_known (vm, bi);
325 :
326 372836000 : if (known == VLIB_BUFFER_UNKNOWN &&
327 : expected_state == VLIB_BUFFER_KNOWN_FREE)
328 76899 : known = VLIB_BUFFER_KNOWN_FREE;
329 :
330 372836000 : if (known != expected_state)
331 : {
332 0 : clib_panic ("%s %U buffer 0x%x", is_free ? "freeing" : "allocating",
333 : format_vlib_buffer_known_state, known, bi);
334 : }
335 :
336 372836000 : clib_spinlock_lock (&bm->buffer_known_hash_lockp);
337 372836000 : hash_set (bm->buffer_known_hash, bi, is_free ? VLIB_BUFFER_KNOWN_FREE :
338 : VLIB_BUFFER_KNOWN_ALLOCATED);
339 372836000 : clib_spinlock_unlock (&bm->buffer_known_hash_lockp);
340 : }
341 : }
342 :
343 : void
344 3354 : vlib_packet_template_init (vlib_main_t * vm,
345 : vlib_packet_template_t * t,
346 : void *packet_data,
347 : uword n_packet_data_bytes,
348 : uword min_n_buffers_each_alloc, char *fmt, ...)
349 : {
350 : va_list va;
351 :
352 3354 : va_start (va, fmt);
353 3354 : t->name = va_format (0, fmt, &va);
354 3354 : va_end (va);
355 :
356 3354 : vlib_worker_thread_barrier_sync (vm);
357 :
358 3354 : clib_memset (t, 0, sizeof (t[0]));
359 :
360 3354 : vec_add (t->packet_data, packet_data, n_packet_data_bytes);
361 3354 : t->min_n_buffers_each_alloc = min_n_buffers_each_alloc;
362 3354 : vlib_worker_thread_barrier_release (vm);
363 3354 : }
364 :
365 : void *
366 984 : vlib_packet_template_get_packet (vlib_main_t * vm,
367 : vlib_packet_template_t * t, u32 * bi_result)
368 : {
369 : u32 bi;
370 : vlib_buffer_t *b;
371 :
372 984 : if (vlib_buffer_alloc (vm, &bi, 1) != 1)
373 0 : return 0;
374 :
375 984 : *bi_result = bi;
376 :
377 984 : b = vlib_get_buffer (vm, bi);
378 1968 : clib_memcpy_fast (vlib_buffer_get_current (b),
379 1968 : t->packet_data, vec_len (t->packet_data));
380 984 : b->current_length = vec_len (t->packet_data);
381 :
382 984 : return b->data;
383 : }
384 :
385 : /* Append given data to end of buffer, possibly allocating new buffers. */
386 : int
387 7032 : vlib_buffer_add_data (vlib_main_t * vm, u32 * buffer_index, void *data,
388 : u32 n_data_bytes)
389 : {
390 : u32 n_buffer_bytes, n_left, n_left_this_buffer, bi;
391 : vlib_buffer_t *b;
392 : void *d;
393 :
394 7032 : bi = *buffer_index;
395 7032 : if (bi == ~0 && 1 != vlib_buffer_alloc (vm, &bi, 1))
396 0 : goto out_of_buffers;
397 :
398 7032 : d = data;
399 7032 : n_left = n_data_bytes;
400 7032 : n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
401 :
402 7032 : b = vlib_get_buffer (vm, bi);
403 7032 : b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
404 :
405 : /* Get to the end of the chain before we try to append data... */
406 7032 : while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
407 0 : b = vlib_get_buffer (vm, b->next_buffer);
408 :
409 : while (1)
410 0 : {
411 : u32 n;
412 :
413 7032 : ASSERT (n_buffer_bytes >= b->current_length);
414 7032 : n_left_this_buffer =
415 7032 : n_buffer_bytes - (b->current_data + b->current_length);
416 7032 : n = clib_min (n_left_this_buffer, n_left);
417 7032 : clib_memcpy_fast (vlib_buffer_get_current (b) + b->current_length, d,
418 : n);
419 7032 : b->current_length += n;
420 7032 : n_left -= n;
421 7032 : if (n_left == 0)
422 7032 : break;
423 :
424 0 : d += n;
425 0 : if (1 != vlib_buffer_alloc (vm, &b->next_buffer, 1))
426 0 : goto out_of_buffers;
427 :
428 0 : b->flags |= VLIB_BUFFER_NEXT_PRESENT;
429 :
430 0 : b = vlib_get_buffer (vm, b->next_buffer);
431 : }
432 :
433 7032 : *buffer_index = bi;
434 7032 : return 0;
435 :
436 0 : out_of_buffers:
437 0 : clib_warning ("out of buffers");
438 0 : return 1;
439 : }
440 :
441 : u16
442 0 : vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm,
443 : vlib_buffer_t * first,
444 : vlib_buffer_t ** last, void *data,
445 : u16 data_len)
446 : {
447 0 : vlib_buffer_t *l = *last;
448 0 : u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
449 0 : u16 copied = 0;
450 0 : ASSERT (n_buffer_bytes >= l->current_length + l->current_data);
451 0 : while (data_len)
452 : {
453 0 : u16 max = n_buffer_bytes - l->current_length - l->current_data;
454 0 : if (max == 0)
455 : {
456 0 : if (1 != vlib_buffer_alloc_from_pool (vm, &l->next_buffer, 1,
457 0 : first->buffer_pool_index))
458 0 : return copied;
459 0 : *last = l = vlib_buffer_chain_buffer (vm, l, l->next_buffer);
460 0 : max = n_buffer_bytes - l->current_length - l->current_data;
461 : }
462 :
463 0 : u16 len = (data_len > max) ? max : data_len;
464 0 : clib_memcpy_fast (vlib_buffer_get_current (l) + l->current_length,
465 0 : data + copied, len);
466 0 : vlib_buffer_chain_increase_length (first, l, len);
467 0 : data_len -= len;
468 0 : copied += len;
469 : }
470 0 : return copied;
471 : }
472 :
473 : static uword
474 2795 : vlib_buffer_alloc_size (uword ext_hdr_size, uword data_size)
475 : {
476 2795 : uword alloc_size = ext_hdr_size + sizeof (vlib_buffer_t) + data_size;
477 2795 : alloc_size = CLIB_CACHE_LINE_ROUND (alloc_size);
478 :
479 : /* in case when we have even number of cachelines, we add one more for
480 : * better cache occupancy */
481 2795 : alloc_size |= CLIB_CACHE_LINE_BYTES;
482 :
483 2795 : return alloc_size;
484 : }
485 :
486 : u8
487 559 : vlib_buffer_pool_create (vlib_main_t * vm, char *name, u32 data_size,
488 : u32 physmem_map_index)
489 : {
490 559 : vlib_buffer_main_t *bm = vm->buffer_main;
491 : vlib_buffer_pool_t *bp;
492 559 : vlib_physmem_map_t *m = vlib_physmem_get_map (vm, physmem_map_index);
493 559 : uword start = pointer_to_uword (m->base);
494 559 : uword size = (uword) m->n_pages << m->log2_page_size;
495 : uword i, j;
496 : u32 alloc_size, n_alloc_per_page;
497 :
498 559 : if (vec_len (bm->buffer_pools) >= 255)
499 0 : return ~0;
500 :
501 559 : vec_add2_aligned (bm->buffer_pools, bp, 1, CLIB_CACHE_LINE_BYTES);
502 :
503 559 : if (bm->buffer_mem_size == 0)
504 : {
505 559 : bm->buffer_mem_start = start;
506 559 : bm->buffer_mem_size = size;
507 : }
508 0 : else if (start < bm->buffer_mem_start)
509 : {
510 0 : bm->buffer_mem_size += bm->buffer_mem_start - start;
511 0 : bm->buffer_mem_start = start;
512 0 : if (size > bm->buffer_mem_size)
513 0 : bm->buffer_mem_size = size;
514 : }
515 0 : else if (start > bm->buffer_mem_start)
516 : {
517 0 : uword new_size = start - bm->buffer_mem_start + size;
518 0 : if (new_size > bm->buffer_mem_size)
519 0 : bm->buffer_mem_size = new_size;
520 : }
521 :
522 559 : if ((u64) bm->buffer_mem_size >
523 : ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES)))
524 : {
525 0 : clib_panic ("buffer memory size out of range!");
526 : }
527 :
528 559 : bp->start = start;
529 559 : bp->size = size;
530 559 : bp->index = bp - bm->buffer_pools;
531 559 : bp->buffer_template.buffer_pool_index = bp->index;
532 559 : bp->buffer_template.ref_count = 1;
533 559 : bp->physmem_map_index = physmem_map_index;
534 559 : bp->name = format (0, "%s%c", name, 0);
535 559 : bp->data_size = data_size;
536 559 : bp->numa_node = m->numa_node;
537 :
538 559 : vec_validate_aligned (bp->threads, vlib_get_n_threads () - 1,
539 : CLIB_CACHE_LINE_BYTES);
540 :
541 559 : alloc_size = vlib_buffer_alloc_size (bm->ext_hdr_size, data_size);
542 559 : n_alloc_per_page = (1ULL << m->log2_page_size) / alloc_size;
543 :
544 : /* preallocate buffer indices memory */
545 559 : bp->n_buffers = m->n_pages * n_alloc_per_page;
546 559 : bp->buffers = clib_mem_alloc_aligned (bp->n_buffers * sizeof (u32),
547 : CLIB_CACHE_LINE_BYTES);
548 :
549 559 : clib_spinlock_init (&bp->lock);
550 :
551 4579890 : for (j = 0; j < m->n_pages; j++)
552 9158660 : for (i = 0; i < n_alloc_per_page; i++)
553 : {
554 : u8 *p;
555 : u32 bi;
556 :
557 4579330 : p = m->base + (j << m->log2_page_size) + i * alloc_size;
558 4579330 : p += bm->ext_hdr_size;
559 :
560 : /*
561 : * Waste 1 buffer (maximum) so that 0 is never a valid buffer index.
562 : * Allows various places to ASSERT (bi != 0). Much easier
563 : * than debugging downstream crashes in successor nodes.
564 : */
565 4579330 : if (p == m->base)
566 559 : continue;
567 :
568 4578770 : vlib_buffer_copy_template ((vlib_buffer_t *) p, &bp->buffer_template);
569 :
570 4578770 : bi = vlib_get_buffer_index (vm, (vlib_buffer_t *) p);
571 :
572 4578770 : bp->buffers[bp->n_avail++] = bi;
573 :
574 4578770 : vlib_get_buffer (vm, bi);
575 : }
576 :
577 559 : return bp->index;
578 : }
579 :
580 : static u8 *
581 200 : format_vlib_buffer_pool (u8 * s, va_list * va)
582 : {
583 200 : vlib_main_t *vm = va_arg (*va, vlib_main_t *);
584 200 : vlib_buffer_pool_t *bp = va_arg (*va, vlib_buffer_pool_t *);
585 : vlib_buffer_pool_thread_t *bpt;
586 200 : u32 cached = 0;
587 :
588 200 : if (!bp)
589 100 : return format (s, "%-20s%=6s%=6s%=6s%=11s%=6s%=8s%=8s%=8s",
590 : "Pool Name", "Index", "NUMA", "Size", "Data Size",
591 : "Total", "Avail", "Cached", "Used");
592 :
593 : /* *INDENT-OFF* */
594 212 : vec_foreach (bpt, bp->threads)
595 112 : cached += bpt->n_cached;
596 : /* *INDENT-ON* */
597 :
598 100 : s = format (s, "%-20s%=6d%=6d%=6u%=11u%=6u%=8u%=8u%=8u",
599 100 : bp->name, bp->index, bp->numa_node, bp->data_size +
600 100 : sizeof (vlib_buffer_t) + vm->buffer_main->ext_hdr_size,
601 : bp->data_size, bp->n_buffers, bp->n_avail, cached,
602 100 : bp->n_buffers - bp->n_avail - cached);
603 :
604 100 : return s;
605 : }
606 :
607 : u8 *
608 100 : format_vlib_buffer_pool_all (u8 *s, va_list *va)
609 : {
610 100 : vlib_main_t *vm = va_arg (*va, vlib_main_t *);
611 100 : vlib_buffer_main_t *bm = vm->buffer_main;
612 : vlib_buffer_pool_t *bp;
613 :
614 100 : s = format (s, "%U", format_vlib_buffer_pool, vm, 0);
615 :
616 200 : vec_foreach (bp, bm->buffer_pools)
617 100 : s = format (s, "\n%U", format_vlib_buffer_pool, vm, bp);
618 :
619 100 : return s;
620 : }
621 :
622 : static clib_error_t *
623 100 : show_buffers (vlib_main_t *vm, unformat_input_t *input,
624 : vlib_cli_command_t *cmd)
625 : {
626 100 : vlib_cli_output (vm, "%U", format_vlib_buffer_pool_all, vm);
627 100 : return 0;
628 : }
629 :
630 : /* *INDENT-OFF* */
631 272887 : VLIB_CLI_COMMAND (show_buffers_command, static) = {
632 : .path = "show buffers",
633 : .short_help = "Show packet buffer allocation",
634 : .function = show_buffers,
635 : };
636 : /* *INDENT-ON* */
637 :
638 : clib_error_t *
639 559 : vlib_buffer_num_workers_change (vlib_main_t *vm)
640 : {
641 559 : vlib_buffer_main_t *bm = vm->buffer_main;
642 : vlib_buffer_pool_t *bp;
643 :
644 1118 : vec_foreach (bp, bm->buffer_pools)
645 559 : vec_validate_aligned (bp->threads, vlib_get_n_threads () - 1,
646 : CLIB_CACHE_LINE_BYTES);
647 :
648 559 : return 0;
649 : }
650 :
651 1119 : VLIB_NUM_WORKERS_CHANGE_FN (vlib_buffer_num_workers_change);
652 :
653 : static clib_error_t *
654 2236 : vlib_buffer_main_init_numa_alloc (struct vlib_main_t *vm, u32 numa_node,
655 : u32 * physmem_map_index,
656 : clib_mem_page_sz_t log2_page_size,
657 : u8 unpriv)
658 : {
659 2236 : vlib_buffer_main_t *bm = vm->buffer_main;
660 2236 : u32 buffers_per_numa = bm->buffers_per_numa;
661 : clib_error_t *error;
662 : u32 buffer_size;
663 : uword n_pages, pagesize;
664 2236 : u8 *name = 0;
665 :
666 2236 : ASSERT (log2_page_size != CLIB_MEM_PAGE_SZ_UNKNOWN);
667 :
668 2236 : pagesize = clib_mem_page_bytes (log2_page_size);
669 2236 : buffer_size = vlib_buffer_alloc_size (bm->ext_hdr_size,
670 2236 : vlib_buffer_get_default_data_size
671 : (vm));
672 2236 : if (buffer_size > pagesize)
673 0 : return clib_error_return (0, "buffer size (%llu) is greater than page "
674 : "size (%llu)", buffer_size, pagesize);
675 :
676 2236 : if (buffers_per_numa == 0)
677 2236 : buffers_per_numa = unpriv ? VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA_UNPRIV :
678 : VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA;
679 :
680 2236 : name = format (0, "buffers-numa-%d%c", numa_node, 0);
681 2236 : n_pages = (buffers_per_numa - 1) / (pagesize / buffer_size) + 1;
682 2236 : error = vlib_physmem_shared_map_create (vm, (char *) name,
683 : n_pages * pagesize,
684 2236 : min_log2 (pagesize), numa_node,
685 : physmem_map_index);
686 2236 : vec_free (name);
687 2236 : return error;
688 : }
689 :
690 : static clib_error_t *
691 1118 : vlib_buffer_main_init_numa_node (struct vlib_main_t *vm, u32 numa_node,
692 : u8 * index)
693 : {
694 1118 : vlib_buffer_main_t *bm = vm->buffer_main;
695 : u32 physmem_map_index;
696 : clib_error_t *error;
697 1118 : u8 *name = 0;
698 :
699 1118 : if (bm->log2_page_size == CLIB_MEM_PAGE_SZ_UNKNOWN)
700 : {
701 1118 : error = vlib_buffer_main_init_numa_alloc (vm, numa_node,
702 : &physmem_map_index,
703 : CLIB_MEM_PAGE_SZ_DEFAULT_HUGE,
704 : 0 /* unpriv */ );
705 1118 : if (!error)
706 0 : goto buffer_pool_create;
707 :
708 : /* If alloc failed, retry without hugepages */
709 1118 : vlib_log_warn (bm->log_default,
710 : "numa[%u] falling back to non-hugepage backed "
711 : "buffer pool (%U)", numa_node, format_clib_error, error);
712 1118 : clib_error_free (error);
713 :
714 1118 : error = vlib_buffer_main_init_numa_alloc (vm, numa_node,
715 : &physmem_map_index,
716 : CLIB_MEM_PAGE_SZ_DEFAULT,
717 : 1 /* unpriv */ );
718 : }
719 : else
720 0 : error = vlib_buffer_main_init_numa_alloc (vm, numa_node,
721 : &physmem_map_index,
722 : bm->log2_page_size,
723 : 0 /* unpriv */ );
724 1118 : if (error)
725 559 : return error;
726 :
727 559 : buffer_pool_create:
728 559 : name = format (name, "default-numa-%d%c", numa_node, 0);
729 559 : *index = vlib_buffer_pool_create (vm, (char *) name,
730 : vlib_buffer_get_default_data_size (vm),
731 : physmem_map_index);
732 :
733 559 : if (*index == (u8) ~ 0)
734 0 : error = clib_error_return (0, "maximum number of buffer pools reached");
735 559 : vec_free (name);
736 :
737 :
738 559 : return error;
739 : }
740 :
741 : void
742 1118 : vlib_buffer_main_alloc (vlib_main_t * vm)
743 : {
744 : vlib_buffer_main_t *bm;
745 :
746 1118 : if (vm->buffer_main)
747 559 : return;
748 :
749 559 : vm->buffer_main = bm = clib_mem_alloc (sizeof (bm[0]));
750 559 : clib_memset (vm->buffer_main, 0, sizeof (bm[0]));
751 559 : bm->default_data_size = VLIB_BUFFER_DEFAULT_DATA_SIZE;
752 : }
753 :
754 : static u32
755 2406 : buffer_get_cached (vlib_buffer_pool_t * bp)
756 : {
757 2406 : u32 cached = 0;
758 : vlib_buffer_pool_thread_t *bpt;
759 :
760 2406 : clib_spinlock_lock (&bp->lock);
761 :
762 : /* *INDENT-OFF* */
763 5284 : vec_foreach (bpt, bp->threads)
764 2878 : cached += bpt->n_cached;
765 : /* *INDENT-ON* */
766 :
767 2406 : clib_spinlock_unlock (&bp->lock);
768 :
769 2406 : return cached;
770 : }
771 :
772 : static vlib_buffer_pool_t *
773 3609 : buffer_get_by_index (vlib_buffer_main_t * bm, u32 index)
774 : {
775 : vlib_buffer_pool_t *bp;
776 3609 : if (!bm->buffer_pools || vec_len (bm->buffer_pools) < index)
777 0 : return 0;
778 3609 : bp = vec_elt_at_index (bm->buffer_pools, index);
779 :
780 3609 : return bp;
781 : }
782 :
783 : static void
784 1203 : buffer_gauges_collect_used_fn (vlib_stats_collector_data_t *d)
785 : {
786 1203 : vlib_main_t *vm = vlib_get_main ();
787 : vlib_buffer_pool_t *bp =
788 1203 : buffer_get_by_index (vm->buffer_main, d->private_data);
789 1203 : if (!bp)
790 0 : return;
791 :
792 1203 : d->entry->value = bp->n_buffers - bp->n_avail - buffer_get_cached (bp);
793 : }
794 :
795 : static void
796 1203 : buffer_gauges_collect_available_fn (vlib_stats_collector_data_t *d)
797 : {
798 1203 : vlib_main_t *vm = vlib_get_main ();
799 : vlib_buffer_pool_t *bp =
800 1203 : buffer_get_by_index (vm->buffer_main, d->private_data);
801 1203 : if (!bp)
802 0 : return;
803 :
804 1203 : d->entry->value = bp->n_avail;
805 : }
806 :
807 : static void
808 1203 : buffer_gauges_collect_cached_fn (vlib_stats_collector_data_t *d)
809 : {
810 1203 : vlib_main_t *vm = vlib_get_main ();
811 : vlib_buffer_pool_t *bp =
812 1203 : buffer_get_by_index (vm->buffer_main, d->private_data);
813 1203 : if (!bp)
814 0 : return;
815 :
816 1203 : d->entry->value = buffer_get_cached (bp);
817 : }
818 :
819 : clib_error_t *
820 559 : vlib_buffer_main_init (struct vlib_main_t * vm)
821 : {
822 : vlib_buffer_main_t *bm;
823 : clib_error_t *err;
824 559 : clib_bitmap_t *bmp = 0, *bmp_has_memory = 0;
825 : u32 numa_node;
826 : vlib_buffer_pool_t *bp;
827 559 : u8 *name = 0, first_valid_buffer_pool_index = ~0;
828 :
829 559 : vlib_buffer_main_alloc (vm);
830 :
831 559 : bm = vm->buffer_main;
832 559 : bm->log_default = vlib_log_register_class ("buffer", 0);
833 559 : bm->ext_hdr_size = __vlib_buffer_external_hdr_size;
834 :
835 559 : clib_spinlock_init (&bm->buffer_known_hash_lockp);
836 :
837 559 : if ((err = clib_sysfs_read ("/sys/devices/system/node/online", "%U",
838 : unformat_bitmap_list, &bmp)))
839 0 : clib_error_free (err);
840 :
841 559 : if ((err = clib_sysfs_read ("/sys/devices/system/node/has_memory", "%U",
842 : unformat_bitmap_list, &bmp_has_memory)))
843 0 : clib_error_free (err);
844 :
845 559 : if (bmp && bmp_has_memory)
846 559 : bmp = clib_bitmap_and (bmp, bmp_has_memory);
847 :
848 : /* no info from sysfs, assuming that only numa 0 exists */
849 559 : if (bmp == 0)
850 0 : bmp = clib_bitmap_set (bmp, 0, 1);
851 :
852 559 : if (clib_bitmap_last_set (bmp) >= VLIB_BUFFER_MAX_NUMA_NODES)
853 0 : clib_panic ("system have more than %u NUMA nodes",
854 : VLIB_BUFFER_MAX_NUMA_NODES);
855 :
856 : /* *INDENT-OFF* */
857 1677 : clib_bitmap_foreach (numa_node, bmp)
858 : {
859 1118 : u8 *index = bm->default_buffer_pool_index_for_numa + numa_node;
860 1118 : index[0] = ~0;
861 1118 : if ((err = vlib_buffer_main_init_numa_node (vm, numa_node, index)))
862 : {
863 559 : clib_error_report (err);
864 559 : clib_error_free (err);
865 559 : continue;
866 : }
867 :
868 559 : if (first_valid_buffer_pool_index == 0xff)
869 559 : first_valid_buffer_pool_index = index[0];
870 : }
871 : /* *INDENT-ON* */
872 :
873 559 : if (first_valid_buffer_pool_index == (u8) ~ 0)
874 : {
875 0 : err = clib_error_return (0, "failed to allocate buffer pool(s)");
876 0 : goto done;
877 : }
878 :
879 : /* *INDENT-OFF* */
880 1677 : clib_bitmap_foreach (numa_node, bmp)
881 : {
882 1118 : if (bm->default_buffer_pool_index_for_numa[numa_node] == (u8) ~0)
883 559 : bm->default_buffer_pool_index_for_numa[numa_node] =
884 : first_valid_buffer_pool_index;
885 : }
886 : /* *INDENT-ON* */
887 :
888 1118 : vec_foreach (bp, bm->buffer_pools)
889 : {
890 559 : vlib_stats_collector_reg_t reg = { .private_data = bp - bm->buffer_pools };
891 559 : if (bp->n_buffers == 0)
892 0 : continue;
893 :
894 559 : reg.entry_index =
895 559 : vlib_stats_add_gauge ("/buffer-pools/%s/cached", bp->name);
896 559 : reg.collect_fn = buffer_gauges_collect_cached_fn;
897 559 : vlib_stats_register_collector_fn (®);
898 :
899 559 : reg.entry_index = vlib_stats_add_gauge ("/buffer-pools/%s/used", bp->name);
900 559 : reg.collect_fn = buffer_gauges_collect_used_fn;
901 559 : vlib_stats_register_collector_fn (®);
902 :
903 559 : reg.entry_index =
904 559 : vlib_stats_add_gauge ("/buffer-pools/%s/available", bp->name);
905 559 : reg.collect_fn = buffer_gauges_collect_available_fn;
906 559 : vlib_stats_register_collector_fn (®);
907 : }
908 :
909 559 : done:
910 559 : vec_free (bmp);
911 559 : vec_free (bmp_has_memory);
912 559 : vec_free (name);
913 559 : return err;
914 : }
915 :
916 : static clib_error_t *
917 559 : vlib_buffers_configure (vlib_main_t * vm, unformat_input_t * input)
918 : {
919 : vlib_buffer_main_t *bm;
920 :
921 559 : vlib_buffer_main_alloc (vm);
922 :
923 559 : bm = vm->buffer_main;
924 559 : bm->log2_page_size = CLIB_MEM_PAGE_SZ_UNKNOWN;
925 :
926 559 : while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
927 : {
928 0 : if (unformat (input, "buffers-per-numa %u", &bm->buffers_per_numa))
929 : ;
930 0 : else if (unformat (input, "page-size %U", unformat_log2_page_size,
931 : &bm->log2_page_size))
932 : ;
933 0 : else if (unformat (input, "default data-size %u",
934 : &bm->default_data_size))
935 : ;
936 : else
937 0 : return unformat_parse_error (input);
938 : }
939 :
940 559 : unformat_free (input);
941 559 : return 0;
942 : }
943 :
944 7306 : VLIB_EARLY_CONFIG_FUNCTION (vlib_buffers_configure, "buffers");
945 :
946 : #if VLIB_BUFFER_ALLOC_FAULT_INJECTOR > 0
947 : u32
948 : vlib_buffer_alloc_may_fail (vlib_main_t * vm, u32 n_buffers)
949 : {
950 : f64 r;
951 :
952 : r = random_f64 (&vm->buffer_alloc_success_seed);
953 :
954 : /* Fail this request? */
955 : if (r > vm->buffer_alloc_success_rate)
956 : n_buffers--;
957 : /* 5% chance of returning nothing at all */
958 : if (r > vm->buffer_alloc_success_rate && r > 0.95)
959 : n_buffers = 0;
960 :
961 : return n_buffers;
962 : }
963 : #endif
964 :
965 : __clib_export int
966 0 : vlib_buffer_set_alloc_free_callback (
967 : vlib_main_t *vm, vlib_buffer_alloc_free_callback_t *alloc_callback_fn,
968 : vlib_buffer_alloc_free_callback_t *free_callback_fn)
969 : {
970 0 : vlib_buffer_main_t *bm = vm->buffer_main;
971 0 : if ((alloc_callback_fn && bm->alloc_callback_fn) ||
972 0 : (free_callback_fn && bm->free_callback_fn))
973 0 : return 1;
974 0 : bm->alloc_callback_fn = alloc_callback_fn;
975 0 : bm->free_callback_fn = free_callback_fn;
976 0 : return 0;
977 : }
978 :
979 : /** @endcond */
980 : /*
981 : * fd.io coding-style-patch-verification: ON
982 : *
983 : * Local Variables:
984 : * eval: (c-set-style "gnu")
985 : * End:
986 : */
|