Line data Source code
1 : /*
2 : * Copyright (c) 2017 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 :
16 : #define _GNU_SOURCE
17 : #include <stdlib.h>
18 : #include <sys/types.h>
19 : #include <sys/stat.h>
20 : #include <unistd.h>
21 : #include <sys/mount.h>
22 : #include <sys/mman.h>
23 : #include <fcntl.h>
24 : #include <linux/mempolicy.h>
25 : #include <linux/memfd.h>
26 :
27 : #include <vppinfra/clib.h>
28 : #include <vppinfra/mem.h>
29 : #include <vppinfra/lock.h>
30 : #include <vppinfra/time.h>
31 : #include <vppinfra/bitmap.h>
32 : #include <vppinfra/format.h>
33 : #include <vppinfra/clib_error.h>
34 : #include <vppinfra/linux/sysfs.h>
35 :
36 : #ifndef F_LINUX_SPECIFIC_BASE
37 : #define F_LINUX_SPECIFIC_BASE 1024
38 : #endif
39 :
40 : #ifndef F_ADD_SEALS
41 : #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
42 : #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
43 :
44 : #define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
45 : #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
46 : #define F_SEAL_GROW 0x0004 /* prevent file from growing */
47 : #define F_SEAL_WRITE 0x0008 /* prevent writes */
48 : #endif
49 :
50 : #ifndef MFD_HUGETLB
51 : #define MFD_HUGETLB 0x0004U
52 : #endif
53 :
54 : #ifndef MAP_HUGE_SHIFT
55 : #define MAP_HUGE_SHIFT 26
56 : #endif
57 :
58 : #ifndef MFD_HUGE_SHIFT
59 : #define MFD_HUGE_SHIFT 26
60 : #endif
61 :
62 : #ifndef MAP_FIXED_NOREPLACE
63 : #define MAP_FIXED_NOREPLACE 0x100000
64 : #endif
65 :
66 : static void
67 31386 : map_lock ()
68 : {
69 31386 : while (clib_atomic_test_and_set (&clib_mem_main.map_lock))
70 0 : CLIB_PAUSE ();
71 31386 : }
72 :
73 : static void
74 31386 : map_unlock ()
75 : {
76 31386 : clib_atomic_release (&clib_mem_main.map_lock);
77 31386 : }
78 :
79 : static clib_mem_page_sz_t
80 0 : legacy_get_log2_default_hugepage_size (void)
81 : {
82 0 : clib_mem_page_sz_t log2_page_size = CLIB_MEM_PAGE_SZ_UNKNOWN;
83 : FILE *fp;
84 0 : char tmp[33] = { };
85 :
86 0 : if ((fp = fopen ("/proc/meminfo", "r")) == NULL)
87 0 : return CLIB_MEM_PAGE_SZ_UNKNOWN;
88 :
89 0 : while (fscanf (fp, "%32s", tmp) > 0)
90 0 : if (strncmp ("Hugepagesize:", tmp, 13) == 0)
91 : {
92 : u32 size;
93 0 : if (fscanf (fp, "%u", &size) > 0)
94 0 : log2_page_size = 10 + min_log2 (size);
95 0 : break;
96 : }
97 :
98 0 : fclose (fp);
99 0 : return log2_page_size;
100 : }
101 :
102 : void
103 1189 : clib_mem_main_init (void)
104 : {
105 1189 : clib_mem_main_t *mm = &clib_mem_main;
106 : long sysconf_page_size;
107 : uword page_size;
108 : void *va;
109 : int fd;
110 :
111 1189 : if (mm->log2_page_sz != CLIB_MEM_PAGE_SZ_UNKNOWN)
112 571 : return;
113 :
114 : /* system page size */
115 618 : sysconf_page_size = sysconf (_SC_PAGESIZE);
116 618 : if (sysconf_page_size < 0)
117 : {
118 0 : clib_panic ("Could not determine the page size");
119 : }
120 618 : page_size = sysconf_page_size;
121 618 : mm->log2_page_sz = min_log2 (page_size);
122 :
123 : /* default system hugeppage size */
124 618 : if ((fd = syscall (__NR_memfd_create, "test", MFD_HUGETLB)) != -1)
125 : {
126 618 : mm->log2_default_hugepage_sz = clib_mem_get_fd_log2_page_size (fd);
127 618 : close (fd);
128 : }
129 : else /* likely kernel older than 4.14 */
130 0 : mm->log2_default_hugepage_sz = legacy_get_log2_default_hugepage_size ();
131 :
132 618 : mm->log2_sys_default_hugepage_sz = mm->log2_default_hugepage_sz;
133 :
134 : /* numa nodes */
135 618 : va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
136 : MAP_ANONYMOUS, -1, 0);
137 618 : if (va == MAP_FAILED)
138 0 : return;
139 :
140 618 : if (mlock (va, page_size))
141 0 : goto done;
142 :
143 10506 : for (int i = 0; i < CLIB_MAX_NUMAS; i++)
144 : {
145 : int status;
146 9888 : if (syscall (__NR_move_pages, 0, 1, &va, &i, &status, 0) == 0)
147 1236 : mm->numa_node_bitmap |= 1ULL << i;
148 : }
149 :
150 618 : done:
151 618 : munmap (va, page_size);
152 : }
153 :
154 : __clib_export u64
155 1732 : clib_mem_get_fd_page_size (int fd)
156 : {
157 1732 : struct stat st = { 0 };
158 1732 : if (fstat (fd, &st) == -1)
159 0 : return 0;
160 1732 : return st.st_blksize;
161 : }
162 :
163 : __clib_export clib_mem_page_sz_t
164 1567 : clib_mem_get_fd_log2_page_size (int fd)
165 : {
166 1567 : uword page_size = clib_mem_get_fd_page_size (fd);
167 1567 : return page_size ? min_log2 (page_size) : CLIB_MEM_PAGE_SZ_UNKNOWN;
168 : }
169 :
170 : __clib_export void
171 0 : clib_mem_vm_randomize_va (uword * requested_va,
172 : clib_mem_page_sz_t log2_page_size)
173 : {
174 0 : u8 bit_mask = 15;
175 :
176 0 : if (log2_page_size <= 12)
177 0 : bit_mask = 15;
178 0 : else if (log2_page_size > 12 && log2_page_size <= 16)
179 0 : bit_mask = 3;
180 : else
181 0 : bit_mask = 0;
182 :
183 0 : *requested_va +=
184 0 : (clib_cpu_time_now () & bit_mask) * (1ull << log2_page_size);
185 0 : }
186 :
187 : static int
188 0 : legacy_memfd_create (u8 * name)
189 : {
190 0 : clib_mem_main_t *mm = &clib_mem_main;
191 0 : int fd = -1;
192 : char *mount_dir;
193 : u8 *temp;
194 : u8 *filename;
195 :
196 : /*
197 : * Since mkdtemp will modify template string "/tmp/hugepage_mount.XXXXXX",
198 : * it must not be a string constant, but should be declared as
199 : * a character array.
200 : */
201 0 : temp = format (0, "/tmp/hugepage_mount.XXXXXX%c", 0);
202 :
203 : /* create mount directory */
204 0 : if ((mount_dir = mkdtemp ((char *) temp)) == 0)
205 : {
206 0 : vec_free (temp);
207 0 : vec_reset_length (mm->error);
208 0 : mm->error = clib_error_return_unix (mm->error, "mkdtemp");
209 0 : return CLIB_MEM_ERROR;
210 : }
211 :
212 0 : if (mount ("none", mount_dir, "hugetlbfs", 0, NULL))
213 : {
214 0 : vec_free (temp);
215 0 : rmdir ((char *) mount_dir);
216 0 : vec_reset_length (mm->error);
217 0 : mm->error = clib_error_return_unix (mm->error, "mount");
218 0 : return CLIB_MEM_ERROR;
219 : }
220 :
221 0 : filename = format (0, "%s/%s%c", mount_dir, name, 0);
222 :
223 0 : if ((fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1)
224 : {
225 0 : vec_reset_length (mm->error);
226 0 : mm->error = clib_error_return_unix (mm->error, "mkdtemp");
227 : }
228 :
229 0 : umount2 ((char *) mount_dir, MNT_DETACH);
230 0 : rmdir ((char *) mount_dir);
231 0 : vec_free (filename);
232 0 : vec_free (temp);
233 :
234 0 : return fd;
235 : }
236 :
237 : __clib_export int
238 1309 : clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...)
239 : {
240 1309 : clib_mem_main_t *mm = &clib_mem_main;
241 : int fd;
242 : unsigned int memfd_flags;
243 : va_list va;
244 1309 : u8 *s = 0;
245 :
246 1309 : if (log2_page_size == mm->log2_page_sz)
247 559 : log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT;
248 750 : else if (log2_page_size == mm->log2_sys_default_hugepage_sz)
249 0 : log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT_HUGE;
250 :
251 1309 : switch (log2_page_size)
252 : {
253 0 : case CLIB_MEM_PAGE_SZ_UNKNOWN:
254 0 : return CLIB_MEM_ERROR;
255 1309 : case CLIB_MEM_PAGE_SZ_DEFAULT:
256 1309 : memfd_flags = MFD_ALLOW_SEALING;
257 1309 : break;
258 0 : case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
259 0 : memfd_flags = MFD_HUGETLB;
260 0 : break;
261 0 : default:
262 0 : memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT;
263 : }
264 :
265 1309 : va_start (va, fmt);
266 1309 : s = va_format (0, fmt, &va);
267 1309 : va_end (va);
268 :
269 : /* memfd_create maximum string size is 249 chars without trailing zero */
270 1309 : if (vec_len (s) > 249)
271 0 : vec_set_len (s, 249);
272 1309 : vec_add1 (s, 0);
273 :
274 : /* memfd_create introduced in kernel 3.17, we don't support older kernels */
275 1309 : fd = syscall (__NR_memfd_create, (char *) s, memfd_flags);
276 :
277 : /* kernel versions < 4.14 does not support memfd_create for huge pages */
278 1309 : if (fd == -1 && errno == EINVAL &&
279 : log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE)
280 : {
281 0 : fd = legacy_memfd_create (s);
282 : }
283 1309 : else if (fd == -1)
284 : {
285 0 : vec_reset_length (mm->error);
286 0 : mm->error = clib_error_return_unix (mm->error, "memfd_create");
287 0 : vec_free (s);
288 0 : return CLIB_MEM_ERROR;
289 : }
290 :
291 1309 : vec_free (s);
292 :
293 2618 : if ((memfd_flags & MFD_ALLOW_SEALING) &&
294 1309 : ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1))
295 : {
296 0 : vec_reset_length (mm->error);
297 0 : mm->error = clib_error_return_unix (mm->error, "fcntl (F_ADD_SEALS)");
298 0 : close (fd);
299 0 : return CLIB_MEM_ERROR;
300 : }
301 :
302 1309 : return fd;
303 : }
304 :
305 : uword
306 30440 : clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz)
307 : {
308 30440 : clib_mem_main_t *mm = &clib_mem_main;
309 30440 : uword pagesize = 1ULL << log2_page_sz;
310 30440 : uword sys_page_sz = 1ULL << mm->log2_page_sz;
311 : uword n_bytes;
312 30440 : void *base = 0, *p;
313 :
314 30440 : size = round_pow2 (size, pagesize);
315 :
316 : /* in adition of requested reservation, we also rserve one system page
317 : * (typically 4K) adjacent to the start off reservation */
318 :
319 30440 : if (start)
320 : {
321 : /* start address is provided, so we just need to make sure we are not
322 : * replacing existing map */
323 559 : if (start & pow2_mask (log2_page_sz))
324 0 : return ~0;
325 :
326 559 : base = (void *) start - sys_page_sz;
327 559 : base = mmap (base, size + sys_page_sz, PROT_NONE,
328 : MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
329 559 : return (base == MAP_FAILED) ? ~0 : start;
330 : }
331 :
332 : /* to make sure that we get reservation aligned to page_size we need to
333 : * request one additional page as mmap will return us address which is
334 : * aligned only to system page size */
335 29881 : base = mmap (0, size + pagesize, PROT_NONE,
336 : MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
337 :
338 29881 : if (base == MAP_FAILED)
339 0 : return ~0;
340 :
341 : /* return additional space at the end of allocation */
342 29881 : p = base + size + pagesize;
343 29881 : n_bytes = (uword) p & pow2_mask (log2_page_sz);
344 29881 : if (n_bytes)
345 : {
346 0 : p -= n_bytes;
347 0 : munmap (p, n_bytes);
348 : }
349 :
350 : /* return additional space at the start of allocation */
351 29881 : n_bytes = pagesize - sys_page_sz - n_bytes;
352 29881 : if (n_bytes)
353 : {
354 0 : munmap (base, n_bytes);
355 0 : base += n_bytes;
356 : }
357 :
358 29881 : return (uword) base + sys_page_sz;
359 : }
360 :
361 : __clib_export clib_mem_vm_map_hdr_t *
362 0 : clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t * hdr)
363 : {
364 0 : clib_mem_main_t *mm = &clib_mem_main;
365 0 : uword sys_page_sz = 1ULL << mm->log2_page_sz;
366 : clib_mem_vm_map_hdr_t *next;
367 0 : if (hdr == 0)
368 : {
369 0 : hdr = mm->first_map;
370 0 : if (hdr)
371 0 : mprotect (hdr, sys_page_sz, PROT_READ);
372 0 : return hdr;
373 : }
374 0 : next = hdr->next;
375 0 : mprotect (hdr, sys_page_sz, PROT_NONE);
376 0 : if (next)
377 0 : mprotect (next, sys_page_sz, PROT_READ);
378 0 : return next;
379 : }
380 :
381 : void *
382 29881 : clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
383 : uword size, int fd, uword offset, char *name)
384 : {
385 29881 : clib_mem_main_t *mm = &clib_mem_main;
386 : clib_mem_vm_map_hdr_t *hdr;
387 29881 : uword sys_page_sz = 1ULL << mm->log2_page_sz;
388 29881 : int mmap_flags = MAP_FIXED, is_huge = 0;
389 :
390 29881 : if (fd != -1)
391 : {
392 765 : mmap_flags |= MAP_SHARED;
393 765 : log2_page_sz = clib_mem_get_fd_log2_page_size (fd);
394 765 : if (log2_page_sz > mm->log2_page_sz)
395 0 : is_huge = 1;
396 : }
397 : else
398 : {
399 29116 : mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
400 :
401 29116 : if (log2_page_sz == mm->log2_page_sz)
402 2218 : log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT;
403 :
404 29116 : switch (log2_page_sz)
405 : {
406 0 : case CLIB_MEM_PAGE_SZ_UNKNOWN:
407 : /* will fail later */
408 0 : break;
409 29116 : case CLIB_MEM_PAGE_SZ_DEFAULT:
410 29116 : log2_page_sz = mm->log2_page_sz;
411 29116 : break;
412 0 : case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
413 0 : mmap_flags |= MAP_HUGETLB;
414 0 : log2_page_sz = mm->log2_default_hugepage_sz;
415 0 : is_huge = 1;
416 0 : break;
417 0 : default:
418 0 : mmap_flags |= MAP_HUGETLB;
419 0 : mmap_flags |= log2_page_sz << MAP_HUGE_SHIFT;
420 0 : is_huge = 1;
421 : }
422 : }
423 :
424 29881 : if (log2_page_sz == CLIB_MEM_PAGE_SZ_UNKNOWN)
425 0 : return CLIB_MEM_VM_MAP_FAILED;
426 :
427 29881 : size = round_pow2 (size, 1ULL << log2_page_sz);
428 :
429 29881 : base = (void *) clib_mem_vm_reserve ((uword) base, size, log2_page_sz);
430 :
431 29881 : if (base == (void *) ~0)
432 0 : return CLIB_MEM_VM_MAP_FAILED;
433 :
434 29881 : base = mmap (base, size, PROT_READ | PROT_WRITE, mmap_flags, fd, offset);
435 :
436 29881 : if (base == MAP_FAILED)
437 0 : return CLIB_MEM_VM_MAP_FAILED;
438 :
439 29881 : if (is_huge && (mlock (base, size) != 0))
440 : {
441 0 : munmap (base, size);
442 0 : return CLIB_MEM_VM_MAP_FAILED;
443 : }
444 :
445 29881 : hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE,
446 : MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
447 :
448 29881 : if (hdr != base - sys_page_sz)
449 : {
450 0 : munmap (base, size);
451 0 : return CLIB_MEM_VM_MAP_FAILED;
452 : }
453 :
454 29881 : map_lock ();
455 :
456 29881 : if (mm->last_map)
457 : {
458 28729 : mprotect (mm->last_map, sys_page_sz, PROT_READ | PROT_WRITE);
459 28729 : mm->last_map->next = hdr;
460 28729 : mprotect (mm->last_map, sys_page_sz, PROT_NONE);
461 : }
462 : else
463 1152 : mm->first_map = hdr;
464 :
465 29881 : clib_mem_unpoison (hdr, sys_page_sz);
466 29881 : hdr->next = 0;
467 29881 : hdr->prev = mm->last_map;
468 29881 : snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name);
469 29881 : mm->last_map = hdr;
470 :
471 29881 : hdr->base_addr = (uword) base;
472 29881 : hdr->log2_page_sz = log2_page_sz;
473 29881 : hdr->num_pages = size >> log2_page_sz;
474 29881 : hdr->fd = fd;
475 29881 : hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0;
476 29881 : mprotect (hdr, sys_page_sz, PROT_NONE);
477 :
478 29881 : map_unlock ();
479 :
480 29881 : clib_mem_unpoison (base, size);
481 29881 : return base;
482 : }
483 :
484 : __clib_export int
485 1505 : clib_mem_vm_unmap (void *base)
486 : {
487 1505 : clib_mem_main_t *mm = &clib_mem_main;
488 1505 : uword size, sys_page_sz = 1ULL << mm->log2_page_sz;
489 1505 : clib_mem_vm_map_hdr_t *hdr = base - sys_page_sz;;
490 :
491 1505 : map_lock ();
492 1505 : if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0)
493 0 : goto out;
494 :
495 1505 : size = hdr->num_pages << hdr->log2_page_sz;
496 1505 : if (munmap ((void *) hdr->base_addr, size) != 0)
497 0 : goto out;
498 :
499 1505 : if (hdr->next)
500 : {
501 738 : mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE);
502 738 : hdr->next->prev = hdr->prev;
503 738 : mprotect (hdr->next, sys_page_sz, PROT_NONE);
504 : }
505 : else
506 767 : mm->last_map = hdr->prev;
507 :
508 1505 : if (hdr->prev)
509 : {
510 946 : mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE);
511 946 : hdr->prev->next = hdr->next;
512 946 : mprotect (hdr->prev, sys_page_sz, PROT_NONE);
513 : }
514 : else
515 559 : mm->first_map = hdr->next;
516 :
517 1505 : map_unlock ();
518 :
519 1505 : if (munmap (hdr, sys_page_sz) != 0)
520 0 : return CLIB_MEM_ERROR;
521 :
522 1505 : return 0;
523 0 : out:
524 0 : map_unlock ();
525 0 : return CLIB_MEM_ERROR;
526 : }
527 :
528 : __clib_export void
529 1400 : clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
530 : uword n_pages, clib_mem_page_stats_t * stats)
531 : {
532 1400 : int i, *status = 0;
533 1400 : void **ptr = 0;
534 :
535 1400 : log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
536 :
537 1400 : vec_validate (status, n_pages - 1);
538 1400 : vec_validate (ptr, n_pages - 1);
539 :
540 1587530 : for (i = 0; i < n_pages; i++)
541 1586130 : ptr[i] = start + (i << log2_page_size);
542 :
543 1400 : clib_memset (stats, 0, sizeof (clib_mem_page_stats_t));
544 1400 : stats->total = n_pages;
545 1400 : stats->log2_page_sz = log2_page_size;
546 :
547 1400 : if (syscall (__NR_move_pages, 0, n_pages, ptr, 0, status, 0) != 0)
548 : {
549 0 : stats->unknown = n_pages;
550 0 : goto done;
551 : }
552 :
553 1587530 : for (i = 0; i < n_pages; i++)
554 : {
555 1586130 : if (status[i] >= 0 && status[i] < CLIB_MAX_NUMAS)
556 : {
557 7772 : stats->mapped++;
558 7772 : stats->per_numa[status[i]]++;
559 : }
560 1578360 : else if (status[i] == -EFAULT)
561 1578360 : stats->not_mapped++;
562 : else
563 0 : stats->unknown++;
564 : }
565 :
566 1400 : done:
567 1400 : vec_free (status);
568 1400 : vec_free (ptr);
569 1400 : }
570 :
571 :
572 : __clib_export u64 *
573 1118 : clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size,
574 : int n_pages)
575 : {
576 1118 : int pagesize = sysconf (_SC_PAGESIZE);
577 : int fd;
578 : int i;
579 1118 : u64 *r = 0;
580 :
581 1118 : log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
582 :
583 1118 : if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1)
584 0 : return 0;
585 :
586 2236 : for (i = 0; i < n_pages; i++)
587 : {
588 1118 : u64 seek, pagemap = 0;
589 1118 : uword vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size);
590 1118 : seek = ((u64) vaddr / pagesize) * sizeof (u64);
591 1118 : if (lseek (fd, seek, SEEK_SET) != seek)
592 0 : goto done;
593 :
594 1118 : if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap)))
595 0 : goto done;
596 :
597 1118 : if ((pagemap & (1ULL << 63)) == 0)
598 0 : goto done;
599 :
600 1118 : pagemap &= pow2_mask (55);
601 1118 : vec_add1 (r, pagemap * pagesize);
602 : }
603 :
604 1118 : done:
605 1118 : close (fd);
606 1118 : if (vec_len (r) != n_pages)
607 : {
608 0 : vec_free (r);
609 0 : return 0;
610 : }
611 1118 : return r;
612 : }
613 :
614 : __clib_export int
615 559 : clib_mem_set_numa_affinity (u8 numa_node, int force)
616 : {
617 559 : clib_mem_main_t *mm = &clib_mem_main;
618 559 : clib_bitmap_t *bmp = 0;
619 : int rv;
620 :
621 : /* no numa support */
622 559 : if (mm->numa_node_bitmap == 0)
623 : {
624 0 : if (numa_node)
625 : {
626 0 : vec_reset_length (mm->error);
627 0 : mm->error = clib_error_return (mm->error, "%s: numa not supported",
628 : (char *) __func__);
629 0 : return CLIB_MEM_ERROR;
630 : }
631 : else
632 0 : return 0;
633 : }
634 :
635 559 : bmp = clib_bitmap_set (bmp, numa_node, 1);
636 :
637 559 : rv = syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, bmp,
638 559 : vec_len (bmp) * sizeof (bmp[0]) * 8 + 1);
639 :
640 559 : clib_bitmap_free (bmp);
641 559 : vec_reset_length (mm->error);
642 :
643 559 : if (rv)
644 : {
645 0 : mm->error = clib_error_return_unix (mm->error, (char *) __func__);
646 0 : return CLIB_MEM_ERROR;
647 : }
648 :
649 559 : return 0;
650 : }
651 :
652 : __clib_export int
653 559 : clib_mem_set_default_numa_affinity ()
654 : {
655 559 : clib_mem_main_t *mm = &clib_mem_main;
656 :
657 559 : if (syscall (__NR_set_mempolicy, MPOL_DEFAULT, 0, 0))
658 : {
659 0 : vec_reset_length (mm->error);
660 0 : mm->error = clib_error_return_unix (mm->error, (char *) __func__);
661 0 : return CLIB_MEM_ERROR;
662 : }
663 559 : return 0;
664 : }
665 :
666 : /*
667 : * fd.io coding-style-patch-verification: ON
668 : *
669 : * Local Variables:
670 : * eval: (c-set-style "gnu")
671 : * End:
672 : */
|