LCOV - coverage-filtered.info - vppinfra/vector/index_to

LCOV - code coverage report

Current view:	top level - vppinfra/vector - index_to_ptr.h (source / functions)		Hit	Total	Coverage
Test:	coverage-filtered.info	Lines:	107	165	64.8 %
Date:	2023-10-26 01:39:38	Functions:	2	2	100.0 %

          Line data    Source code

       1             : /* SPDX-License-Identifier: Apache-2.0
       2             :  * Copyright(c) 2021 Cisco Systems, Inc.
       3             :  */
       4             : 
       5             : #ifndef included_vector_index_to_ptr_h
       6             : #define included_vector_index_to_ptr_h
       7             : #include <vppinfra/clib.h>
       8             : 
       9             : #ifdef CLIB_HAVE_VEC128
      10             : static_always_inline void
      11    10928870 : clib_index_to_ptr_u32x4 (u32 *indices, void **ptrs, i32 i, u64x2 ov, u8 shift)
      12             : {
      13    10928870 :   u32x4 iv4 = u32x4_load_unaligned (indices + i);
      14             :   u64x2 pv2;
      15    10928870 :   pv2 = u64x2_from_u32x4 (iv4);
      16    10928870 :   u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i);
      17             : #ifdef __aarch64__
      18             :   pv2 = u64x2_from_u32x4_high (iv4);
      19             : #else
      20    10928870 :   pv2 = u64x2_from_u32x4 ((u32x4) u8x16_word_shift_right (iv4, 8));
      21             : #endif
      22    10928870 :   u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i + 2);
      23    10928870 : }
      24             : #endif
      25             : 
      26             : /** \brief Convert array of indices to pointers with base and shift
      27             : 
      28             :     @param indices source array of u32 indices
      29             :     @param base base pointer
      30             :     @param shift numbers of bits to be shifted
      31             :     @param ptrs destinatin array of pointers
      32             :     @param n_elts number of elements in the source array
      33             : */
      34             : 
      35             : static_always_inline void
      36    73526231 : clib_index_to_ptr_u32 (u32 *indices, void *base, u8 shift, void **ptrs,
      37             :                        u32 n_elts)
      38             : {
      39             : #if defined CLIB_HAVE_VEC512
      40           0 :   if (n_elts >= 8)
      41             :     {
      42           0 :       u64x8 off = u64x8_splat ((u64) base);
      43             :       u64x8 b0, b1, b2, b3, b4, b5, b6, b7;
      44             : 
      45           0 :       while (n_elts >= 64)
      46             :         {
      47           0 :           b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
      48           0 :           b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
      49           0 :           b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
      50           0 :           b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
      51           0 :           b4 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 32));
      52           0 :           b5 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 40));
      53           0 :           b6 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 48));
      54           0 :           b7 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 56));
      55           0 :           u64x8_store_unaligned ((b0 << shift) + off, ptrs);
      56           0 :           u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
      57           0 :           u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
      58           0 :           u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
      59           0 :           u64x8_store_unaligned ((b4 << shift) + off, ptrs + 32);
      60           0 :           u64x8_store_unaligned ((b5 << shift) + off, ptrs + 40);
      61           0 :           u64x8_store_unaligned ((b6 << shift) + off, ptrs + 48);
      62           0 :           u64x8_store_unaligned ((b7 << shift) + off, ptrs + 56);
      63           0 :           ptrs += 64;
      64           0 :           indices += 64;
      65           0 :           n_elts -= 64;
      66             :         }
      67             : 
      68           0 :       if (n_elts == 0)
      69           0 :         return;
      70             : 
      71           0 :       if (n_elts >= 32)
      72             :         {
      73           0 :           b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
      74           0 :           b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
      75           0 :           b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
      76           0 :           b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
      77           0 :           u64x8_store_unaligned ((b0 << shift) + off, ptrs);
      78           0 :           u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
      79           0 :           u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
      80           0 :           u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
      81           0 :           ptrs += 32;
      82           0 :           indices += 32;
      83           0 :           n_elts -= 32;
      84             :         }
      85           0 :       if (n_elts >= 16)
      86             :         {
      87           0 :           b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
      88           0 :           b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
      89           0 :           u64x8_store_unaligned ((b0 << shift) + off, ptrs);
      90           0 :           u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
      91           0 :           ptrs += 16;
      92           0 :           indices += 16;
      93           0 :           n_elts -= 16;
      94             :         }
      95           0 :       if (n_elts >= 8)
      96             :         {
      97           0 :           b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
      98           0 :           u64x8_store_unaligned ((b0 << shift) + off, ptrs);
      99           0 :           ptrs += 8;
     100           0 :           indices += 8;
     101           0 :           n_elts -= 8;
     102             :         }
     103             : 
     104           0 :       if (n_elts == 0)
     105           0 :         return;
     106             : 
     107           0 :       b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + n_elts - 8));
     108           0 :       u64x8_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 8);
     109             :     }
     110             :   else
     111             :     {
     112           0 :       u32 mask = pow2_mask (n_elts);
     113           0 :       u64x8 r = u64x8_from_u32x8 (u32x8_mask_load_zero (indices, mask));
     114           0 :       u64x8_mask_store ((r << shift) + u64x8_splat ((u64) base), ptrs, mask);
     115           0 :       return;
     116             :     }
     117             : #elif defined CLIB_HAVE_VEC256
     118    72011786 :   if (n_elts >= 4)
     119             :     {
     120    70054272 :       u64x4 off = u64x4_splat ((u64) base);
     121             :       u64x4 b0, b1, b2, b3, b4, b5, b6, b7;
     122             : 
     123    81892072 :       while (n_elts >= 32)
     124             :         {
     125    11837817 :           b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
     126    11837817 :           b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
     127    11837817 :           b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
     128    11837817 :           b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
     129    11837817 :           b4 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 16));
     130    11837817 :           b5 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 20));
     131    11837817 :           b6 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 24));
     132    11837817 :           b7 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 28));
     133    11837817 :           u64x4_store_unaligned ((b0 << shift) + off, ptrs);
     134    11837817 :           u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
     135    11837817 :           u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
     136    11837817 :           u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
     137    11837817 :           u64x4_store_unaligned ((b4 << shift) + off, ptrs + 16);
     138    11837817 :           u64x4_store_unaligned ((b5 << shift) + off, ptrs + 20);
     139    11837817 :           u64x4_store_unaligned ((b6 << shift) + off, ptrs + 24);
     140    11837817 :           u64x4_store_unaligned ((b7 << shift) + off, ptrs + 28);
     141    11837816 :           ptrs += 32;
     142    11837816 :           indices += 32;
     143    11837816 :           n_elts -= 32;
     144             :         }
     145             : 
     146    70054274 :       if (n_elts == 0)
     147      674428 :         return;
     148             : 
     149    69379831 :       if (n_elts >= 16)
     150             :         {
     151     2714077 :           b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
     152     2714075 :           b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
     153     2714074 :           b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
     154     2714073 :           b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
     155     2714073 :           u64x4_store_unaligned ((b0 << shift) + off, ptrs);
     156     2714073 :           u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
     157     2714073 :           u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
     158     2714073 :           u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
     159     2714073 :           ptrs += 16;
     160     2714073 :           indices += 16;
     161     2714073 :           n_elts -= 16;
     162             :         }
     163    69379827 :       if (n_elts >= 8)
     164             :         {
     165     3673853 :           b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
     166     3673853 :           b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
     167     3673853 :           u64x4_store_unaligned ((b0 << shift) + off, ptrs);
     168     3673853 :           u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
     169     3673853 :           ptrs += 8;
     170     3673853 :           indices += 8;
     171     3673853 :           n_elts -= 8;
     172             :         }
     173    69379827 :       if (n_elts > 4)
     174             :         {
     175     3060349 :           b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
     176     3060349 :           u64x4_store_unaligned ((b0 << shift) + off, ptrs);
     177     3060349 :           ptrs += 4;
     178     3060349 :           indices += 4;
     179     3060349 :           n_elts -= 4;
     180             :         }
     181             : 
     182    69379827 :       b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + n_elts - 4));
     183    69379827 :       u64x4_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 4);
     184    69379827 :       return;
     185             :     }
     186             : #ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
     187             :   else
     188             :     {
     189     1957517 :       u32 mask = pow2_mask (n_elts);
     190     1957517 :       u64x4 r = u64x4_from_u32x4 (u32x4_mask_load_zero (indices, mask));
     191     1957517 :       u64x4_mask_store ((r << shift) + u64x4_splat ((u64) base), ptrs, mask);
     192     1957517 :       return;
     193             :     }
     194             : #endif
     195             : #elif defined(CLIB_HAVE_VEC128)
     196     1514445 :   if (n_elts >= 4)
     197             :     {
     198     1333896 :       u64x2 ov = u64x2_splat ((u64) base);
     199     1333896 :       u32 *i = (u32 *) indices;
     200     1333896 :       void **p = (void **) ptrs;
     201     1333896 :       u32 n = n_elts;
     202             : 
     203     2300963 :       while (n >= 32)
     204             :         {
     205      967068 :           clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
     206      967068 :           clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
     207      967068 :           clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
     208      967068 :           clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
     209      967068 :           clib_index_to_ptr_u32x4 (indices, ptrs, 16, ov, shift);
     210      967068 :           clib_index_to_ptr_u32x4 (indices, ptrs, 20, ov, shift);
     211      967068 :           clib_index_to_ptr_u32x4 (indices, ptrs, 24, ov, shift);
     212      967068 :           clib_index_to_ptr_u32x4 (indices, ptrs, 28, ov, shift);
     213      967068 :           indices += 32;
     214      967068 :           ptrs += 32;
     215      967068 :           n -= 32;
     216             :         }
     217             : 
     218     1333896 :       if (n == 0)
     219       94343 :         return;
     220             : 
     221     1239553 :       if (n >= 16)
     222             :         {
     223      217055 :           clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
     224      217055 :           clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
     225      217055 :           clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
     226      217055 :           clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
     227      217055 :           indices += 16;
     228      217055 :           ptrs += 16;
     229      217055 :           n -= 16;
     230             :         }
     231             : 
     232     1239553 :       if (n >= 8)
     233             :         {
     234      365533 :           clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
     235      365533 :           clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
     236      365533 :           indices += 8;
     237      365533 :           ptrs += 8;
     238      365533 :           n -= 8;
     239             :         }
     240             : 
     241     1239553 :       if (n > 4)
     242      353504 :         clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
     243             : 
     244     1239553 :       clib_index_to_ptr_u32x4 (i, p, n_elts - 4, ov, shift);
     245     1239553 :       return;
     246             :     }
     247             : #endif
     248      487590 :   while (n_elts)
     249             :     {
     250      307044 :       ptrs[0] = base + ((u64) indices[0] << shift);
     251      307044 :       ptrs += 1;
     252      307044 :       indices += 1;
     253      307044 :       n_elts -= 1;
     254             :     }
     255             : }
     256             : 
     257             : #endif

Generated by: LCOV version 1.14