Line data Source code
1 : /*
2 : * Copyright (c) 2017 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 :
16 : #define _GNU_SOURCE
17 : #include <stdlib.h>
18 : #include <sys/types.h>
19 : #include <sys/stat.h>
20 : #include <unistd.h>
21 : #include <sys/mount.h>
22 : #include <sys/mman.h>
23 : #include <fcntl.h>
24 : #include <linux/mempolicy.h>
25 : #include <linux/memfd.h>
26 :
27 : #include <vppinfra/clib.h>
28 : #include <vppinfra/mem.h>
29 : #include <vppinfra/lock.h>
30 : #include <vppinfra/time.h>
31 : #include <vppinfra/bitmap.h>
32 : #include <vppinfra/format.h>
33 : #include <vppinfra/clib_error.h>
34 : #include <vppinfra/linux/sysfs.h>
35 :
36 : #ifndef F_LINUX_SPECIFIC_BASE
37 : #define F_LINUX_SPECIFIC_BASE 1024
38 : #endif
39 :
40 : #ifndef F_ADD_SEALS
41 : #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
42 : #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
43 :
44 : #define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
45 : #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
46 : #define F_SEAL_GROW 0x0004 /* prevent file from growing */
47 : #define F_SEAL_WRITE 0x0008 /* prevent writes */
48 : #endif
49 :
50 : #ifndef MFD_HUGETLB
51 : #define MFD_HUGETLB 0x0004U
52 : #endif
53 :
54 : #ifndef MAP_HUGE_SHIFT
55 : #define MAP_HUGE_SHIFT 26
56 : #endif
57 :
58 : #ifndef MFD_HUGE_SHIFT
59 : #define MFD_HUGE_SHIFT 26
60 : #endif
61 :
62 : #ifndef MAP_FIXED_NOREPLACE
63 : #define MAP_FIXED_NOREPLACE 0x100000
64 : #endif
65 :
66 : static void
67 32801 : map_lock ()
68 : {
69 32801 : while (clib_atomic_test_and_set (&clib_mem_main.map_lock))
70 0 : CLIB_PAUSE ();
71 32801 : }
72 :
73 : static void
74 32801 : map_unlock ()
75 : {
76 32801 : clib_atomic_release (&clib_mem_main.map_lock);
77 32801 : }
78 :
79 : static clib_mem_page_sz_t
80 0 : legacy_get_log2_default_hugepage_size (void)
81 : {
82 0 : clib_mem_page_sz_t log2_page_size = CLIB_MEM_PAGE_SZ_UNKNOWN;
83 : FILE *fp;
84 0 : char tmp[33] = { };
85 :
86 0 : if ((fp = fopen ("/proc/meminfo", "r")) == NULL)
87 0 : return CLIB_MEM_PAGE_SZ_UNKNOWN;
88 :
89 0 : while (fscanf (fp, "%32s", tmp) > 0)
90 0 : if (strncmp ("Hugepagesize:", tmp, 13) == 0)
91 : {
92 : u32 size;
93 0 : if (fscanf (fp, "%u", &size) > 0)
94 0 : log2_page_size = 10 + min_log2 (size);
95 0 : break;
96 : }
97 :
98 0 : fclose (fp);
99 0 : return log2_page_size;
100 : }
101 :
102 : void
103 1221 : clib_mem_main_init (void)
104 : {
105 1221 : clib_mem_main_t *mm = &clib_mem_main;
106 : long sysconf_page_size;
107 : uword page_size;
108 : void *va;
109 : int fd;
110 :
111 1221 : if (mm->log2_page_sz != CLIB_MEM_PAGE_SZ_UNKNOWN)
112 587 : return;
113 :
114 : /* system page size */
115 634 : sysconf_page_size = sysconf (_SC_PAGESIZE);
116 634 : if (sysconf_page_size < 0)
117 : {
118 0 : clib_panic ("Could not determine the page size");
119 : }
120 634 : page_size = sysconf_page_size;
121 634 : mm->log2_page_sz = min_log2 (page_size);
122 :
123 : /* default system hugeppage size */
124 634 : if ((fd = syscall (__NR_memfd_create, "test", MFD_HUGETLB)) != -1)
125 : {
126 634 : mm->log2_default_hugepage_sz = clib_mem_get_fd_log2_page_size (fd);
127 634 : close (fd);
128 : }
129 : else /* likely kernel older than 4.14 */
130 0 : mm->log2_default_hugepage_sz = legacy_get_log2_default_hugepage_size ();
131 :
132 634 : mm->log2_sys_default_hugepage_sz = mm->log2_default_hugepage_sz;
133 :
134 : /* numa nodes */
135 634 : va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
136 : MAP_ANONYMOUS, -1, 0);
137 634 : if (va == MAP_FAILED)
138 0 : return;
139 :
140 634 : if (mlock (va, page_size))
141 0 : goto done;
142 :
143 10778 : for (int i = 0; i < CLIB_MAX_NUMAS; i++)
144 : {
145 : int status;
146 10144 : if (syscall (__NR_move_pages, 0, 1, &va, &i, &status, 0) == 0)
147 1268 : mm->numa_node_bitmap |= 1ULL << i;
148 : }
149 :
150 634 : done:
151 634 : munmap (va, page_size);
152 : }
153 :
154 : __clib_export u64
155 1768 : clib_mem_get_fd_page_size (int fd)
156 : {
157 1768 : struct stat st = { 0 };
158 1768 : if (fstat (fd, &st) == -1)
159 0 : return 0;
160 1768 : return st.st_blksize;
161 : }
162 :
163 : __clib_export clib_mem_page_sz_t
164 1603 : clib_mem_get_fd_log2_page_size (int fd)
165 : {
166 1603 : uword page_size = clib_mem_get_fd_page_size (fd);
167 1603 : return page_size ? min_log2 (page_size) : CLIB_MEM_PAGE_SZ_UNKNOWN;
168 : }
169 :
170 : __clib_export void
171 0 : clib_mem_vm_randomize_va (uword * requested_va,
172 : clib_mem_page_sz_t log2_page_size)
173 : {
174 0 : u8 bit_mask = 15;
175 :
176 0 : if (log2_page_size <= 12)
177 0 : bit_mask = 15;
178 0 : else if (log2_page_size > 12 && log2_page_size <= 16)
179 0 : bit_mask = 3;
180 : else
181 0 : bit_mask = 0;
182 :
183 0 : *requested_va +=
184 0 : (clib_cpu_time_now () & bit_mask) * (1ull << log2_page_size);
185 0 : }
186 :
187 : static int
188 0 : legacy_memfd_create (u8 * name)
189 : {
190 0 : clib_mem_main_t *mm = &clib_mem_main;
191 0 : int fd = -1;
192 : char *mount_dir;
193 : u8 *temp;
194 : u8 *filename;
195 :
196 : /*
197 : * Since mkdtemp will modify template string "/tmp/hugepage_mount.XXXXXX",
198 : * it must not be a string constant, but should be declared as
199 : * a character array.
200 : */
201 0 : temp = format (0, "/tmp/hugepage_mount.XXXXXX%c", 0);
202 :
203 : /* create mount directory */
204 0 : if ((mount_dir = mkdtemp ((char *) temp)) == 0)
205 : {
206 0 : vec_free (temp);
207 0 : vec_reset_length (mm->error);
208 0 : mm->error = clib_error_return_unix (mm->error, "mkdtemp");
209 0 : return CLIB_MEM_ERROR;
210 : }
211 :
212 0 : if (mount ("none", mount_dir, "hugetlbfs", 0, NULL))
213 : {
214 0 : vec_free (temp);
215 0 : rmdir ((char *) mount_dir);
216 0 : vec_reset_length (mm->error);
217 0 : mm->error = clib_error_return_unix (mm->error, "mount");
218 0 : return CLIB_MEM_ERROR;
219 : }
220 :
221 0 : filename = format (0, "%s/%s%c", mount_dir, name, 0);
222 :
223 0 : if ((fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1)
224 : {
225 0 : vec_reset_length (mm->error);
226 0 : mm->error = clib_error_return_unix (mm->error, "mkdtemp");
227 : }
228 :
229 0 : umount2 ((char *) mount_dir, MNT_DETACH);
230 0 : rmdir ((char *) mount_dir);
231 0 : vec_free (filename);
232 0 : vec_free (temp);
233 :
234 0 : return fd;
235 : }
236 :
237 : __clib_export int
238 1344 : clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...)
239 : {
240 1344 : clib_mem_main_t *mm = &clib_mem_main;
241 : int fd;
242 : unsigned int memfd_flags;
243 : va_list va;
244 1344 : u8 *s = 0;
245 :
246 1344 : if (log2_page_size == mm->log2_page_sz)
247 575 : log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT;
248 769 : else if (log2_page_size == mm->log2_sys_default_hugepage_sz)
249 0 : log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT_HUGE;
250 :
251 1344 : switch (log2_page_size)
252 : {
253 0 : case CLIB_MEM_PAGE_SZ_UNKNOWN:
254 0 : return CLIB_MEM_ERROR;
255 1344 : case CLIB_MEM_PAGE_SZ_DEFAULT:
256 1344 : memfd_flags = MFD_ALLOW_SEALING;
257 1344 : break;
258 0 : case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
259 0 : memfd_flags = MFD_HUGETLB;
260 0 : break;
261 0 : default:
262 0 : memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT;
263 : }
264 :
265 1344 : va_start (va, fmt);
266 1344 : s = va_format (0, fmt, &va);
267 1344 : va_end (va);
268 :
269 : /* memfd_create maximum string size is 249 chars without trailing zero */
270 1344 : if (vec_len (s) > 249)
271 0 : vec_set_len (s, 249);
272 1344 : vec_add1 (s, 0);
273 :
274 : /* memfd_create introduced in kernel 3.17, we don't support older kernels */
275 1344 : fd = syscall (__NR_memfd_create, (char *) s, memfd_flags);
276 :
277 : /* kernel versions < 4.14 does not support memfd_create for huge pages */
278 1344 : if (fd == -1 && errno == EINVAL &&
279 : log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE)
280 : {
281 0 : fd = legacy_memfd_create (s);
282 : }
283 1344 : else if (fd == -1)
284 : {
285 0 : vec_reset_length (mm->error);
286 0 : mm->error = clib_error_return_unix (mm->error, "memfd_create");
287 0 : vec_free (s);
288 0 : return CLIB_MEM_ERROR;
289 : }
290 :
291 1344 : vec_free (s);
292 :
293 2688 : if ((memfd_flags & MFD_ALLOW_SEALING) &&
294 1344 : ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1))
295 : {
296 0 : vec_reset_length (mm->error);
297 0 : mm->error = clib_error_return_unix (mm->error, "fcntl (F_ADD_SEALS)");
298 0 : close (fd);
299 0 : return CLIB_MEM_ERROR;
300 : }
301 :
302 1344 : return fd;
303 : }
304 :
305 : uword
306 31853 : clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz)
307 : {
308 31853 : clib_mem_main_t *mm = &clib_mem_main;
309 31853 : uword pagesize = 1ULL << log2_page_sz;
310 31853 : uword sys_page_sz = 1ULL << mm->log2_page_sz;
311 : uword n_bytes;
312 31853 : void *base = 0, *p;
313 :
314 31853 : size = round_pow2 (size, pagesize);
315 :
316 : /* in adition of requested reservation, we also rserve one system page
317 : * (typically 4K) adjacent to the start off reservation */
318 :
319 31853 : if (start)
320 : {
321 : /* start address is provided, so we just need to make sure we are not
322 : * replacing existing map */
323 575 : if (start & pow2_mask (log2_page_sz))
324 0 : return ~0;
325 :
326 575 : base = (void *) start - sys_page_sz;
327 575 : base = mmap (base, size + sys_page_sz, PROT_NONE,
328 : MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
329 575 : return (base == MAP_FAILED) ? ~0 : start;
330 : }
331 :
332 : /* to make sure that we get reservation aligned to page_size we need to
333 : * request one additional page as mmap will return us address which is
334 : * aligned only to system page size */
335 31278 : base = mmap (0, size + pagesize, PROT_NONE,
336 : MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
337 :
338 31278 : if (base == MAP_FAILED)
339 0 : return ~0;
340 :
341 : /* return additional space at the end of allocation */
342 31278 : p = base + size + pagesize;
343 31278 : n_bytes = (uword) p & pow2_mask (log2_page_sz);
344 31278 : if (n_bytes)
345 : {
346 0 : p -= n_bytes;
347 0 : munmap (p, n_bytes);
348 : }
349 :
350 : /* return additional space at the start of allocation */
351 31278 : n_bytes = pagesize - sys_page_sz - n_bytes;
352 31278 : if (n_bytes)
353 : {
354 0 : munmap (base, n_bytes);
355 0 : base += n_bytes;
356 : }
357 :
358 31278 : return (uword) base + sys_page_sz;
359 : }
360 :
361 : __clib_export clib_mem_vm_map_hdr_t *
362 0 : clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t * hdr)
363 : {
364 0 : clib_mem_main_t *mm = &clib_mem_main;
365 0 : uword sys_page_sz = 1ULL << mm->log2_page_sz;
366 : clib_mem_vm_map_hdr_t *next;
367 0 : if (hdr == 0)
368 : {
369 0 : hdr = mm->first_map;
370 0 : if (hdr)
371 0 : mprotect (hdr, sys_page_sz, PROT_READ);
372 0 : return hdr;
373 : }
374 0 : next = hdr->next;
375 0 : mprotect (hdr, sys_page_sz, PROT_NONE);
376 0 : if (next)
377 0 : mprotect (next, sys_page_sz, PROT_READ);
378 0 : return next;
379 : }
380 :
381 : void *
382 31278 : clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
383 : uword size, int fd, uword offset, char *name)
384 : {
385 31278 : clib_mem_main_t *mm = &clib_mem_main;
386 : clib_mem_vm_map_hdr_t *hdr;
387 31278 : uword sys_page_sz = 1ULL << mm->log2_page_sz;
388 31278 : int mmap_flags = MAP_FIXED, is_huge = 0;
389 :
390 31278 : if (fd != -1)
391 : {
392 784 : mmap_flags |= MAP_SHARED;
393 784 : log2_page_sz = clib_mem_get_fd_log2_page_size (fd);
394 784 : if (log2_page_sz > mm->log2_page_sz)
395 0 : is_huge = 1;
396 : }
397 : else
398 : {
399 30494 : mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
400 :
401 30494 : if (log2_page_sz == mm->log2_page_sz)
402 2252 : log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT;
403 :
404 30494 : switch (log2_page_sz)
405 : {
406 0 : case CLIB_MEM_PAGE_SZ_UNKNOWN:
407 : /* will fail later */
408 0 : break;
409 30494 : case CLIB_MEM_PAGE_SZ_DEFAULT:
410 30494 : log2_page_sz = mm->log2_page_sz;
411 30494 : break;
412 0 : case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
413 0 : mmap_flags |= MAP_HUGETLB;
414 0 : log2_page_sz = mm->log2_default_hugepage_sz;
415 0 : is_huge = 1;
416 0 : break;
417 0 : default:
418 0 : mmap_flags |= MAP_HUGETLB;
419 0 : mmap_flags |= log2_page_sz << MAP_HUGE_SHIFT;
420 0 : is_huge = 1;
421 : }
422 : }
423 :
424 31278 : if (log2_page_sz == CLIB_MEM_PAGE_SZ_UNKNOWN)
425 0 : return CLIB_MEM_VM_MAP_FAILED;
426 :
427 31278 : size = round_pow2 (size, 1ULL << log2_page_sz);
428 :
429 31278 : base = (void *) clib_mem_vm_reserve ((uword) base, size, log2_page_sz);
430 :
431 31278 : if (base == (void *) ~0)
432 0 : return CLIB_MEM_VM_MAP_FAILED;
433 :
434 31278 : base = mmap (base, size, PROT_READ | PROT_WRITE, mmap_flags, fd, offset);
435 :
436 31278 : if (base == MAP_FAILED)
437 0 : return CLIB_MEM_VM_MAP_FAILED;
438 :
439 31278 : if (is_huge && (mlock (base, size) != 0))
440 : {
441 0 : munmap (base, size);
442 0 : return CLIB_MEM_VM_MAP_FAILED;
443 : }
444 :
445 31278 : hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE,
446 : MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
447 :
448 31278 : if (hdr != base - sys_page_sz)
449 : {
450 0 : munmap (base, size);
451 0 : return CLIB_MEM_VM_MAP_FAILED;
452 : }
453 :
454 31278 : map_lock ();
455 :
456 31278 : if (mm->last_map)
457 : {
458 30094 : mprotect (mm->last_map, sys_page_sz, PROT_READ | PROT_WRITE);
459 30094 : mm->last_map->next = hdr;
460 30094 : mprotect (mm->last_map, sys_page_sz, PROT_NONE);
461 : }
462 : else
463 1184 : mm->first_map = hdr;
464 :
465 31278 : clib_mem_unpoison (hdr, sys_page_sz);
466 31278 : hdr->next = 0;
467 31278 : hdr->prev = mm->last_map;
468 31278 : snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name);
469 31278 : mm->last_map = hdr;
470 :
471 31278 : hdr->base_addr = (uword) base;
472 31278 : hdr->log2_page_sz = log2_page_sz;
473 31278 : hdr->num_pages = size >> log2_page_sz;
474 31278 : hdr->fd = fd;
475 31278 : hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0;
476 31278 : mprotect (hdr, sys_page_sz, PROT_NONE);
477 :
478 31278 : map_unlock ();
479 :
480 31278 : clib_mem_unpoison (base, size);
481 31278 : return base;
482 : }
483 :
484 : __clib_export int
485 1523 : clib_mem_vm_unmap (void *base)
486 : {
487 1523 : clib_mem_main_t *mm = &clib_mem_main;
488 1523 : uword size, sys_page_sz = 1ULL << mm->log2_page_sz;
489 1523 : clib_mem_vm_map_hdr_t *hdr = base - sys_page_sz;;
490 :
491 1523 : map_lock ();
492 1523 : if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0)
493 0 : goto out;
494 :
495 1523 : size = hdr->num_pages << hdr->log2_page_sz;
496 1523 : if (munmap ((void *) hdr->base_addr, size) != 0)
497 0 : goto out;
498 :
499 1523 : if (hdr->next)
500 : {
501 737 : mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE);
502 737 : hdr->next->prev = hdr->prev;
503 737 : mprotect (hdr->next, sys_page_sz, PROT_NONE);
504 : }
505 : else
506 786 : mm->last_map = hdr->prev;
507 :
508 1523 : if (hdr->prev)
509 : {
510 948 : mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE);
511 948 : hdr->prev->next = hdr->next;
512 948 : mprotect (hdr->prev, sys_page_sz, PROT_NONE);
513 : }
514 : else
515 575 : mm->first_map = hdr->next;
516 :
517 1523 : map_unlock ();
518 :
519 1523 : if (munmap (hdr, sys_page_sz) != 0)
520 0 : return CLIB_MEM_ERROR;
521 :
522 1523 : return 0;
523 0 : out:
524 0 : map_unlock ();
525 0 : return CLIB_MEM_ERROR;
526 : }
527 :
528 : __clib_export void
529 1425 : clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
530 : uword n_pages, clib_mem_page_stats_t * stats)
531 : {
532 1425 : int i, *status = 0;
533 1425 : void **ptr = 0;
534 :
535 1425 : log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
536 :
537 1425 : vec_validate (status, n_pages - 1);
538 1425 : vec_validate (ptr, n_pages - 1);
539 :
540 3160530 : for (i = 0; i < n_pages; i++)
541 3159100 : ptr[i] = start + (i << log2_page_size);
542 :
543 1425 : clib_memset (stats, 0, sizeof (clib_mem_page_stats_t));
544 1425 : stats->total = n_pages;
545 1425 : stats->log2_page_sz = log2_page_size;
546 :
547 1425 : if (syscall (__NR_move_pages, 0, n_pages, ptr, 0, status, 0) != 0)
548 : {
549 0 : stats->unknown = n_pages;
550 0 : goto done;
551 : }
552 :
553 3160530 : for (i = 0; i < n_pages; i++)
554 : {
555 3159100 : if (status[i] >= 0 && status[i] < CLIB_MAX_NUMAS)
556 : {
557 184663 : stats->mapped++;
558 184663 : stats->per_numa[status[i]]++;
559 : }
560 2974440 : else if (status[i] == -EFAULT)
561 2974440 : stats->not_mapped++;
562 : else
563 0 : stats->unknown++;
564 : }
565 :
566 1425 : done:
567 1425 : vec_free (status);
568 1425 : vec_free (ptr);
569 1425 : }
570 :
571 :
572 : __clib_export u64 *
573 1150 : clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size,
574 : int n_pages)
575 : {
576 1150 : int pagesize = sysconf (_SC_PAGESIZE);
577 : int fd;
578 : int i;
579 1150 : u64 *r = 0;
580 :
581 1150 : log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
582 :
583 1150 : if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1)
584 0 : return 0;
585 :
586 2300 : for (i = 0; i < n_pages; i++)
587 : {
588 1150 : u64 seek, pagemap = 0;
589 1150 : uword vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size);
590 1150 : seek = ((u64) vaddr / pagesize) * sizeof (u64);
591 1150 : if (lseek (fd, seek, SEEK_SET) != seek)
592 0 : goto done;
593 :
594 1150 : if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap)))
595 0 : goto done;
596 :
597 1150 : if ((pagemap & (1ULL << 63)) == 0)
598 0 : goto done;
599 :
600 1150 : pagemap &= pow2_mask (55);
601 1150 : vec_add1 (r, pagemap * pagesize);
602 : }
603 :
604 1150 : done:
605 1150 : close (fd);
606 1150 : if (vec_len (r) != n_pages)
607 : {
608 0 : vec_free (r);
609 0 : return 0;
610 : }
611 1150 : return r;
612 : }
613 :
614 : __clib_export int
615 575 : clib_mem_set_numa_affinity (u8 numa_node, int force)
616 : {
617 575 : clib_mem_main_t *mm = &clib_mem_main;
618 575 : clib_bitmap_t *bmp = 0;
619 : int rv;
620 :
621 : /* no numa support */
622 575 : if (mm->numa_node_bitmap == 0)
623 : {
624 0 : if (numa_node)
625 : {
626 0 : vec_reset_length (mm->error);
627 0 : mm->error = clib_error_return (mm->error, "%s: numa not supported",
628 : (char *) __func__);
629 0 : return CLIB_MEM_ERROR;
630 : }
631 : else
632 0 : return 0;
633 : }
634 :
635 575 : bmp = clib_bitmap_set (bmp, numa_node, 1);
636 :
637 575 : rv = syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, bmp,
638 575 : vec_len (bmp) * sizeof (bmp[0]) * 8 + 1);
639 :
640 575 : clib_bitmap_free (bmp);
641 575 : vec_reset_length (mm->error);
642 :
643 575 : if (rv)
644 : {
645 0 : mm->error = clib_error_return_unix (mm->error, (char *) __func__);
646 0 : return CLIB_MEM_ERROR;
647 : }
648 :
649 575 : return 0;
650 : }
651 :
652 : __clib_export int
653 575 : clib_mem_set_default_numa_affinity ()
654 : {
655 575 : clib_mem_main_t *mm = &clib_mem_main;
656 :
657 575 : if (syscall (__NR_set_mempolicy, MPOL_DEFAULT, 0, 0))
658 : {
659 0 : vec_reset_length (mm->error);
660 0 : mm->error = clib_error_return_unix (mm->error, (char *) __func__);
661 0 : return CLIB_MEM_ERROR;
662 : }
663 575 : return 0;
664 : }
665 :
666 : /*
667 : * fd.io coding-style-patch-verification: ON
668 : *
669 : * Local Variables:
670 : * eval: (c-set-style "gnu")
671 : * End:
672 : */
|