LCOV - code coverage report
Current view: top level - vppinfra/linux - mem.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 194 327 59.3 %
Date: 2023-07-05 22:20:52 Functions: 13 17 76.5 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2017 Cisco and/or its affiliates.
       3             :  * Licensed under the Apache License, Version 2.0 (the "License");
       4             :  * you may not use this file except in compliance with the License.
       5             :  * You may obtain a copy of the License at:
       6             :  *
       7             :  *     http://www.apache.org/licenses/LICENSE-2.0
       8             :  *
       9             :  * Unless required by applicable law or agreed to in writing, software
      10             :  * distributed under the License is distributed on an "AS IS" BASIS,
      11             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             :  * See the License for the specific language governing permissions and
      13             :  * limitations under the License.
      14             :  */
      15             : 
      16             : #define _GNU_SOURCE
      17             : #include <stdlib.h>
      18             : #include <sys/types.h>
      19             : #include <sys/stat.h>
      20             : #include <unistd.h>
      21             : #include <sys/mount.h>
      22             : #include <sys/mman.h>
      23             : #include <fcntl.h>
      24             : #include <linux/mempolicy.h>
      25             : #include <linux/memfd.h>
      26             : 
      27             : #include <vppinfra/clib.h>
      28             : #include <vppinfra/mem.h>
      29             : #include <vppinfra/lock.h>
      30             : #include <vppinfra/time.h>
      31             : #include <vppinfra/bitmap.h>
      32             : #include <vppinfra/format.h>
      33             : #include <vppinfra/clib_error.h>
      34             : #include <vppinfra/linux/sysfs.h>
      35             : 
      36             : #ifndef F_LINUX_SPECIFIC_BASE
      37             : #define F_LINUX_SPECIFIC_BASE 1024
      38             : #endif
      39             : 
      40             : #ifndef F_ADD_SEALS
      41             : #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
      42             : #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
      43             : 
      44             : #define F_SEAL_SEAL     0x0001  /* prevent further seals from being set */
      45             : #define F_SEAL_SHRINK   0x0002  /* prevent file from shrinking */
      46             : #define F_SEAL_GROW     0x0004  /* prevent file from growing */
      47             : #define F_SEAL_WRITE    0x0008  /* prevent writes */
      48             : #endif
      49             : 
      50             : #ifndef MFD_HUGETLB
      51             : #define MFD_HUGETLB 0x0004U
      52             : #endif
      53             : 
      54             : #ifndef MAP_HUGE_SHIFT
      55             : #define MAP_HUGE_SHIFT 26
      56             : #endif
      57             : 
      58             : #ifndef MFD_HUGE_SHIFT
      59             : #define MFD_HUGE_SHIFT 26
      60             : #endif
      61             : 
      62             : #ifndef MAP_FIXED_NOREPLACE
      63             : #define MAP_FIXED_NOREPLACE 0x100000
      64             : #endif
      65             : 
      66             : static void
      67       31386 : map_lock ()
      68             : {
      69       31386 :   while (clib_atomic_test_and_set (&clib_mem_main.map_lock))
      70           0 :     CLIB_PAUSE ();
      71       31386 : }
      72             : 
      73             : static void
      74       31386 : map_unlock ()
      75             : {
      76       31386 :   clib_atomic_release (&clib_mem_main.map_lock);
      77       31386 : }
      78             : 
      79             : static clib_mem_page_sz_t
      80           0 : legacy_get_log2_default_hugepage_size (void)
      81             : {
      82           0 :   clib_mem_page_sz_t log2_page_size = CLIB_MEM_PAGE_SZ_UNKNOWN;
      83             :   FILE *fp;
      84           0 :   char tmp[33] = { };
      85             : 
      86           0 :   if ((fp = fopen ("/proc/meminfo", "r")) == NULL)
      87           0 :     return CLIB_MEM_PAGE_SZ_UNKNOWN;
      88             : 
      89           0 :   while (fscanf (fp, "%32s", tmp) > 0)
      90           0 :     if (strncmp ("Hugepagesize:", tmp, 13) == 0)
      91             :       {
      92             :         u32 size;
      93           0 :         if (fscanf (fp, "%u", &size) > 0)
      94           0 :           log2_page_size = 10 + min_log2 (size);
      95           0 :         break;
      96             :       }
      97             : 
      98           0 :   fclose (fp);
      99           0 :   return log2_page_size;
     100             : }
     101             : 
     102             : void
     103        1189 : clib_mem_main_init (void)
     104             : {
     105        1189 :   clib_mem_main_t *mm = &clib_mem_main;
     106             :   long sysconf_page_size;
     107             :   uword page_size;
     108             :   void *va;
     109             :   int fd;
     110             : 
     111        1189 :   if (mm->log2_page_sz != CLIB_MEM_PAGE_SZ_UNKNOWN)
     112         571 :     return;
     113             : 
     114             :   /* system page size */
     115         618 :   sysconf_page_size = sysconf (_SC_PAGESIZE);
     116         618 :   if (sysconf_page_size < 0)
     117             :     {
     118           0 :       clib_panic ("Could not determine the page size");
     119             :     }
     120         618 :   page_size = sysconf_page_size;
     121         618 :   mm->log2_page_sz = min_log2 (page_size);
     122             : 
     123             :   /* default system hugeppage size */
     124         618 :   if ((fd = syscall (__NR_memfd_create, "test", MFD_HUGETLB)) != -1)
     125             :     {
     126         618 :       mm->log2_default_hugepage_sz = clib_mem_get_fd_log2_page_size (fd);
     127         618 :       close (fd);
     128             :     }
     129             :   else                          /* likely kernel older than 4.14 */
     130           0 :     mm->log2_default_hugepage_sz = legacy_get_log2_default_hugepage_size ();
     131             : 
     132         618 :   mm->log2_sys_default_hugepage_sz = mm->log2_default_hugepage_sz;
     133             : 
     134             :   /* numa nodes */
     135         618 :   va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
     136             :              MAP_ANONYMOUS, -1, 0);
     137         618 :   if (va == MAP_FAILED)
     138           0 :     return;
     139             : 
     140         618 :   if (mlock (va, page_size))
     141           0 :     goto done;
     142             : 
     143       10506 :   for (int i = 0; i < CLIB_MAX_NUMAS; i++)
     144             :     {
     145             :       int status;
     146        9888 :       if (syscall (__NR_move_pages, 0, 1, &va, &i, &status, 0) == 0)
     147        1236 :         mm->numa_node_bitmap |= 1ULL << i;
     148             :     }
     149             : 
     150         618 : done:
     151         618 :   munmap (va, page_size);
     152             : }
     153             : 
     154             : __clib_export u64
     155        1732 : clib_mem_get_fd_page_size (int fd)
     156             : {
     157        1732 :   struct stat st = { 0 };
     158        1732 :   if (fstat (fd, &st) == -1)
     159           0 :     return 0;
     160        1732 :   return st.st_blksize;
     161             : }
     162             : 
     163             : __clib_export clib_mem_page_sz_t
     164        1567 : clib_mem_get_fd_log2_page_size (int fd)
     165             : {
     166        1567 :   uword page_size = clib_mem_get_fd_page_size (fd);
     167        1567 :   return page_size ? min_log2 (page_size) : CLIB_MEM_PAGE_SZ_UNKNOWN;
     168             : }
     169             : 
     170             : __clib_export void
     171           0 : clib_mem_vm_randomize_va (uword * requested_va,
     172             :                           clib_mem_page_sz_t log2_page_size)
     173             : {
     174           0 :   u8 bit_mask = 15;
     175             : 
     176           0 :   if (log2_page_size <= 12)
     177           0 :     bit_mask = 15;
     178           0 :   else if (log2_page_size > 12 && log2_page_size <= 16)
     179           0 :     bit_mask = 3;
     180             :   else
     181           0 :     bit_mask = 0;
     182             : 
     183           0 :   *requested_va +=
     184           0 :     (clib_cpu_time_now () & bit_mask) * (1ull << log2_page_size);
     185           0 : }
     186             : 
     187             : static int
     188           0 : legacy_memfd_create (u8 * name)
     189             : {
     190           0 :   clib_mem_main_t *mm = &clib_mem_main;
     191           0 :   int fd = -1;
     192             :   char *mount_dir;
     193             :   u8 *temp;
     194             :   u8 *filename;
     195             : 
     196             :   /*
     197             :    * Since mkdtemp will modify template string "/tmp/hugepage_mount.XXXXXX",
     198             :    * it must not be a string constant, but should be declared as
     199             :    * a character array.
     200             :    */
     201           0 :   temp = format (0, "/tmp/hugepage_mount.XXXXXX%c", 0);
     202             : 
     203             :   /* create mount directory */
     204           0 :   if ((mount_dir = mkdtemp ((char *) temp)) == 0)
     205             :     {
     206           0 :       vec_free (temp);
     207           0 :       vec_reset_length (mm->error);
     208           0 :       mm->error = clib_error_return_unix (mm->error, "mkdtemp");
     209           0 :       return CLIB_MEM_ERROR;
     210             :     }
     211             : 
     212           0 :   if (mount ("none", mount_dir, "hugetlbfs", 0, NULL))
     213             :     {
     214           0 :       vec_free (temp);
     215           0 :       rmdir ((char *) mount_dir);
     216           0 :       vec_reset_length (mm->error);
     217           0 :       mm->error = clib_error_return_unix (mm->error, "mount");
     218           0 :       return CLIB_MEM_ERROR;
     219             :     }
     220             : 
     221           0 :   filename = format (0, "%s/%s%c", mount_dir, name, 0);
     222             : 
     223           0 :   if ((fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1)
     224             :     {
     225           0 :       vec_reset_length (mm->error);
     226           0 :       mm->error = clib_error_return_unix (mm->error, "mkdtemp");
     227             :     }
     228             : 
     229           0 :   umount2 ((char *) mount_dir, MNT_DETACH);
     230           0 :   rmdir ((char *) mount_dir);
     231           0 :   vec_free (filename);
     232           0 :   vec_free (temp);
     233             : 
     234           0 :   return fd;
     235             : }
     236             : 
     237             : __clib_export int
     238        1309 : clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...)
     239             : {
     240        1309 :   clib_mem_main_t *mm = &clib_mem_main;
     241             :   int fd;
     242             :   unsigned int memfd_flags;
     243             :   va_list va;
     244        1309 :   u8 *s = 0;
     245             : 
     246        1309 :   if (log2_page_size == mm->log2_page_sz)
     247         559 :     log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT;
     248         750 :   else if (log2_page_size == mm->log2_sys_default_hugepage_sz)
     249           0 :     log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT_HUGE;
     250             : 
     251        1309 :   switch (log2_page_size)
     252             :     {
     253           0 :     case CLIB_MEM_PAGE_SZ_UNKNOWN:
     254           0 :       return CLIB_MEM_ERROR;
     255        1309 :     case CLIB_MEM_PAGE_SZ_DEFAULT:
     256        1309 :       memfd_flags = MFD_ALLOW_SEALING;
     257        1309 :       break;
     258           0 :     case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
     259           0 :       memfd_flags = MFD_HUGETLB;
     260           0 :       break;
     261           0 :     default:
     262           0 :       memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT;
     263             :     }
     264             : 
     265        1309 :   va_start (va, fmt);
     266        1309 :   s = va_format (0, fmt, &va);
     267        1309 :   va_end (va);
     268             : 
     269             :   /* memfd_create maximum string size is 249 chars without trailing zero */
     270        1309 :   if (vec_len (s) > 249)
     271           0 :     vec_set_len (s, 249);
     272        1309 :   vec_add1 (s, 0);
     273             : 
     274             :   /* memfd_create introduced in kernel 3.17, we don't support older kernels */
     275        1309 :   fd = syscall (__NR_memfd_create, (char *) s, memfd_flags);
     276             : 
     277             :   /* kernel versions < 4.14 does not support memfd_create for huge pages */
     278        1309 :   if (fd == -1 && errno == EINVAL &&
     279             :       log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE)
     280             :     {
     281           0 :       fd = legacy_memfd_create (s);
     282             :     }
     283        1309 :   else if (fd == -1)
     284             :     {
     285           0 :       vec_reset_length (mm->error);
     286           0 :       mm->error = clib_error_return_unix (mm->error, "memfd_create");
     287           0 :       vec_free (s);
     288           0 :       return CLIB_MEM_ERROR;
     289             :     }
     290             : 
     291        1309 :   vec_free (s);
     292             : 
     293        2618 :   if ((memfd_flags & MFD_ALLOW_SEALING) &&
     294        1309 :       ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1))
     295             :     {
     296           0 :       vec_reset_length (mm->error);
     297           0 :       mm->error = clib_error_return_unix (mm->error, "fcntl (F_ADD_SEALS)");
     298           0 :       close (fd);
     299           0 :       return CLIB_MEM_ERROR;
     300             :     }
     301             : 
     302        1309 :   return fd;
     303             : }
     304             : 
     305             : uword
     306       30440 : clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz)
     307             : {
     308       30440 :   clib_mem_main_t *mm = &clib_mem_main;
     309       30440 :   uword pagesize = 1ULL << log2_page_sz;
     310       30440 :   uword sys_page_sz = 1ULL << mm->log2_page_sz;
     311             :   uword n_bytes;
     312       30440 :   void *base = 0, *p;
     313             : 
     314       30440 :   size = round_pow2 (size, pagesize);
     315             : 
     316             :   /* in adition of requested reservation, we also rserve one system page
     317             :    * (typically 4K) adjacent to the start off reservation */
     318             : 
     319       30440 :   if (start)
     320             :     {
     321             :       /* start address is provided, so we just need to make sure we are not
     322             :        * replacing existing map */
     323         559 :       if (start & pow2_mask (log2_page_sz))
     324           0 :         return ~0;
     325             : 
     326         559 :       base = (void *) start - sys_page_sz;
     327         559 :       base = mmap (base, size + sys_page_sz, PROT_NONE,
     328             :                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
     329         559 :       return (base == MAP_FAILED) ? ~0 : start;
     330             :     }
     331             : 
     332             :   /* to make sure that we get reservation aligned to page_size we need to
     333             :    * request one additional page as mmap will return us address which is
     334             :    * aligned only to system page size */
     335       29881 :   base = mmap (0, size + pagesize, PROT_NONE,
     336             :                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     337             : 
     338       29881 :   if (base == MAP_FAILED)
     339           0 :     return ~0;
     340             : 
     341             :   /* return additional space at the end of allocation */
     342       29881 :   p = base + size + pagesize;
     343       29881 :   n_bytes = (uword) p & pow2_mask (log2_page_sz);
     344       29881 :   if (n_bytes)
     345             :     {
     346           0 :       p -= n_bytes;
     347           0 :       munmap (p, n_bytes);
     348             :     }
     349             : 
     350             :   /* return additional space at the start of allocation */
     351       29881 :   n_bytes = pagesize - sys_page_sz - n_bytes;
     352       29881 :   if (n_bytes)
     353             :     {
     354           0 :       munmap (base, n_bytes);
     355           0 :       base += n_bytes;
     356             :     }
     357             : 
     358       29881 :   return (uword) base + sys_page_sz;
     359             : }
     360             : 
     361             : __clib_export clib_mem_vm_map_hdr_t *
     362           0 : clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t * hdr)
     363             : {
     364           0 :   clib_mem_main_t *mm = &clib_mem_main;
     365           0 :   uword sys_page_sz = 1ULL << mm->log2_page_sz;
     366             :   clib_mem_vm_map_hdr_t *next;
     367           0 :   if (hdr == 0)
     368             :     {
     369           0 :       hdr = mm->first_map;
     370           0 :       if (hdr)
     371           0 :         mprotect (hdr, sys_page_sz, PROT_READ);
     372           0 :       return hdr;
     373             :     }
     374           0 :   next = hdr->next;
     375           0 :   mprotect (hdr, sys_page_sz, PROT_NONE);
     376           0 :   if (next)
     377           0 :     mprotect (next, sys_page_sz, PROT_READ);
     378           0 :   return next;
     379             : }
     380             : 
     381             : void *
     382       29881 : clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
     383             :                           uword size, int fd, uword offset, char *name)
     384             : {
     385       29881 :   clib_mem_main_t *mm = &clib_mem_main;
     386             :   clib_mem_vm_map_hdr_t *hdr;
     387       29881 :   uword sys_page_sz = 1ULL << mm->log2_page_sz;
     388       29881 :   int mmap_flags = MAP_FIXED, is_huge = 0;
     389             : 
     390       29881 :   if (fd != -1)
     391             :     {
     392         765 :       mmap_flags |= MAP_SHARED;
     393         765 :       log2_page_sz = clib_mem_get_fd_log2_page_size (fd);
     394         765 :       if (log2_page_sz > mm->log2_page_sz)
     395           0 :         is_huge = 1;
     396             :     }
     397             :   else
     398             :     {
     399       29116 :       mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
     400             : 
     401       29116 :       if (log2_page_sz == mm->log2_page_sz)
     402        2218 :         log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT;
     403             : 
     404       29116 :       switch (log2_page_sz)
     405             :         {
     406           0 :         case CLIB_MEM_PAGE_SZ_UNKNOWN:
     407             :           /* will fail later */
     408           0 :           break;
     409       29116 :         case CLIB_MEM_PAGE_SZ_DEFAULT:
     410       29116 :           log2_page_sz = mm->log2_page_sz;
     411       29116 :           break;
     412           0 :         case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
     413           0 :           mmap_flags |= MAP_HUGETLB;
     414           0 :           log2_page_sz = mm->log2_default_hugepage_sz;
     415           0 :           is_huge = 1;
     416           0 :           break;
     417           0 :         default:
     418           0 :           mmap_flags |= MAP_HUGETLB;
     419           0 :           mmap_flags |= log2_page_sz << MAP_HUGE_SHIFT;
     420           0 :           is_huge = 1;
     421             :         }
     422             :     }
     423             : 
     424       29881 :   if (log2_page_sz == CLIB_MEM_PAGE_SZ_UNKNOWN)
     425           0 :     return CLIB_MEM_VM_MAP_FAILED;
     426             : 
     427       29881 :   size = round_pow2 (size, 1ULL << log2_page_sz);
     428             : 
     429       29881 :   base = (void *) clib_mem_vm_reserve ((uword) base, size, log2_page_sz);
     430             : 
     431       29881 :   if (base == (void *) ~0)
     432           0 :     return CLIB_MEM_VM_MAP_FAILED;
     433             : 
     434       29881 :   base = mmap (base, size, PROT_READ | PROT_WRITE, mmap_flags, fd, offset);
     435             : 
     436       29881 :   if (base == MAP_FAILED)
     437           0 :     return CLIB_MEM_VM_MAP_FAILED;
     438             : 
     439       29881 :   if (is_huge && (mlock (base, size) != 0))
     440             :     {
     441           0 :       munmap (base, size);
     442           0 :       return CLIB_MEM_VM_MAP_FAILED;
     443             :     }
     444             : 
     445       29881 :   hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE,
     446             :               MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
     447             : 
     448       29881 :   if (hdr != base - sys_page_sz)
     449             :     {
     450           0 :       munmap (base, size);
     451           0 :       return CLIB_MEM_VM_MAP_FAILED;
     452             :     }
     453             : 
     454       29881 :   map_lock ();
     455             : 
     456       29881 :   if (mm->last_map)
     457             :     {
     458       28729 :       mprotect (mm->last_map, sys_page_sz, PROT_READ | PROT_WRITE);
     459       28729 :       mm->last_map->next = hdr;
     460       28729 :       mprotect (mm->last_map, sys_page_sz, PROT_NONE);
     461             :     }
     462             :   else
     463        1152 :     mm->first_map = hdr;
     464             : 
     465       29881 :   clib_mem_unpoison (hdr, sys_page_sz);
     466       29881 :   hdr->next = 0;
     467       29881 :   hdr->prev = mm->last_map;
     468       29881 :   snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name);
     469       29881 :   mm->last_map = hdr;
     470             : 
     471       29881 :   hdr->base_addr = (uword) base;
     472       29881 :   hdr->log2_page_sz = log2_page_sz;
     473       29881 :   hdr->num_pages = size >> log2_page_sz;
     474       29881 :   hdr->fd = fd;
     475       29881 :   hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0;
     476       29881 :   mprotect (hdr, sys_page_sz, PROT_NONE);
     477             : 
     478       29881 :   map_unlock ();
     479             : 
     480       29881 :   clib_mem_unpoison (base, size);
     481       29881 :   return base;
     482             : }
     483             : 
     484             : __clib_export int
     485        1505 : clib_mem_vm_unmap (void *base)
     486             : {
     487        1505 :   clib_mem_main_t *mm = &clib_mem_main;
     488        1505 :   uword size, sys_page_sz = 1ULL << mm->log2_page_sz;
     489        1505 :   clib_mem_vm_map_hdr_t *hdr = base - sys_page_sz;;
     490             : 
     491        1505 :   map_lock ();
     492        1505 :   if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0)
     493           0 :     goto out;
     494             : 
     495        1505 :   size = hdr->num_pages << hdr->log2_page_sz;
     496        1505 :   if (munmap ((void *) hdr->base_addr, size) != 0)
     497           0 :     goto out;
     498             : 
     499        1505 :   if (hdr->next)
     500             :     {
     501         738 :       mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE);
     502         738 :       hdr->next->prev = hdr->prev;
     503         738 :       mprotect (hdr->next, sys_page_sz, PROT_NONE);
     504             :     }
     505             :   else
     506         767 :     mm->last_map = hdr->prev;
     507             : 
     508        1505 :   if (hdr->prev)
     509             :     {
     510         946 :       mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE);
     511         946 :       hdr->prev->next = hdr->next;
     512         946 :       mprotect (hdr->prev, sys_page_sz, PROT_NONE);
     513             :     }
     514             :   else
     515         559 :     mm->first_map = hdr->next;
     516             : 
     517        1505 :   map_unlock ();
     518             : 
     519        1505 :   if (munmap (hdr, sys_page_sz) != 0)
     520           0 :     return CLIB_MEM_ERROR;
     521             : 
     522        1505 :   return 0;
     523           0 : out:
     524           0 :   map_unlock ();
     525           0 :   return CLIB_MEM_ERROR;
     526             : }
     527             : 
     528             : __clib_export void
     529        1400 : clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
     530             :                          uword n_pages, clib_mem_page_stats_t * stats)
     531             : {
     532        1400 :   int i, *status = 0;
     533        1400 :   void **ptr = 0;
     534             : 
     535        1400 :   log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
     536             : 
     537        1400 :   vec_validate (status, n_pages - 1);
     538        1400 :   vec_validate (ptr, n_pages - 1);
     539             : 
     540     1587530 :   for (i = 0; i < n_pages; i++)
     541     1586130 :     ptr[i] = start + (i << log2_page_size);
     542             : 
     543        1400 :   clib_memset (stats, 0, sizeof (clib_mem_page_stats_t));
     544        1400 :   stats->total = n_pages;
     545        1400 :   stats->log2_page_sz = log2_page_size;
     546             : 
     547        1400 :   if (syscall (__NR_move_pages, 0, n_pages, ptr, 0, status, 0) != 0)
     548             :     {
     549           0 :       stats->unknown = n_pages;
     550           0 :       goto done;
     551             :     }
     552             : 
     553     1587530 :   for (i = 0; i < n_pages; i++)
     554             :     {
     555     1586130 :       if (status[i] >= 0 && status[i] < CLIB_MAX_NUMAS)
     556             :         {
     557        7772 :           stats->mapped++;
     558        7772 :           stats->per_numa[status[i]]++;
     559             :         }
     560     1578360 :       else if (status[i] == -EFAULT)
     561     1578360 :         stats->not_mapped++;
     562             :       else
     563           0 :         stats->unknown++;
     564             :     }
     565             : 
     566        1400 : done:
     567        1400 :   vec_free (status);
     568        1400 :   vec_free (ptr);
     569        1400 : }
     570             : 
     571             : 
     572             : __clib_export u64 *
     573        1118 : clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size,
     574             :                        int n_pages)
     575             : {
     576        1118 :   int pagesize = sysconf (_SC_PAGESIZE);
     577             :   int fd;
     578             :   int i;
     579        1118 :   u64 *r = 0;
     580             : 
     581        1118 :   log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
     582             : 
     583        1118 :   if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1)
     584           0 :     return 0;
     585             : 
     586        2236 :   for (i = 0; i < n_pages; i++)
     587             :     {
     588        1118 :       u64 seek, pagemap = 0;
     589        1118 :       uword vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size);
     590        1118 :       seek = ((u64) vaddr / pagesize) * sizeof (u64);
     591        1118 :       if (lseek (fd, seek, SEEK_SET) != seek)
     592           0 :         goto done;
     593             : 
     594        1118 :       if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap)))
     595           0 :         goto done;
     596             : 
     597        1118 :       if ((pagemap & (1ULL << 63)) == 0)
     598           0 :         goto done;
     599             : 
     600        1118 :       pagemap &= pow2_mask (55);
     601        1118 :       vec_add1 (r, pagemap * pagesize);
     602             :     }
     603             : 
     604        1118 : done:
     605        1118 :   close (fd);
     606        1118 :   if (vec_len (r) != n_pages)
     607             :     {
     608           0 :       vec_free (r);
     609           0 :       return 0;
     610             :     }
     611        1118 :   return r;
     612             : }
     613             : 
     614             : __clib_export int
     615         559 : clib_mem_set_numa_affinity (u8 numa_node, int force)
     616             : {
     617         559 :   clib_mem_main_t *mm = &clib_mem_main;
     618         559 :   clib_bitmap_t *bmp = 0;
     619             :   int rv;
     620             : 
     621             :   /* no numa support */
     622         559 :   if (mm->numa_node_bitmap == 0)
     623             :     {
     624           0 :       if (numa_node)
     625             :         {
     626           0 :           vec_reset_length (mm->error);
     627           0 :           mm->error = clib_error_return (mm->error, "%s: numa not supported",
     628             :                                          (char *) __func__);
     629           0 :           return CLIB_MEM_ERROR;
     630             :         }
     631             :       else
     632           0 :         return 0;
     633             :     }
     634             : 
     635         559 :   bmp = clib_bitmap_set (bmp, numa_node, 1);
     636             : 
     637         559 :   rv = syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, bmp,
     638         559 :                 vec_len (bmp) * sizeof (bmp[0]) * 8 + 1);
     639             : 
     640         559 :   clib_bitmap_free (bmp);
     641         559 :   vec_reset_length (mm->error);
     642             : 
     643         559 :   if (rv)
     644             :     {
     645           0 :       mm->error = clib_error_return_unix (mm->error, (char *) __func__);
     646           0 :       return CLIB_MEM_ERROR;
     647             :     }
     648             : 
     649         559 :   return 0;
     650             : }
     651             : 
     652             : __clib_export int
     653         559 : clib_mem_set_default_numa_affinity ()
     654             : {
     655         559 :   clib_mem_main_t *mm = &clib_mem_main;
     656             : 
     657         559 :   if (syscall (__NR_set_mempolicy, MPOL_DEFAULT, 0, 0))
     658             :     {
     659           0 :       vec_reset_length (mm->error);
     660           0 :       mm->error = clib_error_return_unix (mm->error, (char *) __func__);
     661           0 :       return CLIB_MEM_ERROR;
     662             :     }
     663         559 :   return 0;
     664             : }
     665             : 
     666             : /*
     667             :  * fd.io coding-style-patch-verification: ON
     668             :  *
     669             :  * Local Variables:
     670             :  * eval: (c-set-style "gnu")
     671             :  * End:
     672             :  */

Generated by: LCOV version 1.14