LCOV - coverage-filtered.info - vppinfra/vector/index_to

LCOV - code coverage report

Current view:	top level - vppinfra/vector - index_to_ptr.h (source / functions)		Hit	Total	Coverage
Test:	coverage-filtered.info	Lines:	107	165	64.8 %
Date:	2023-07-05 22:20:52	Functions:	2	2	100.0 %

          Line data    Source code

       1             : /* SPDX-License-Identifier: Apache-2.0
       2             :  * Copyright(c) 2021 Cisco Systems, Inc.
       3             :  */
       4             : 
       5             : #ifndef included_vector_index_to_ptr_h
       6             : #define included_vector_index_to_ptr_h
       7             : #include <vppinfra/clib.h>
       8             : 
       9             : #ifdef CLIB_HAVE_VEC128
      10             : static_always_inline void
      11     8136531 : clib_index_to_ptr_u32x4 (u32 *indices, void **ptrs, i32 i, u64x2 ov, u8 shift)
      12             : {
      13     8136531 :   u32x4 iv4 = u32x4_load_unaligned (indices + i);
      14             :   u64x2 pv2;
      15     8136531 :   pv2 = u64x2_from_u32x4 (iv4);
      16     8136531 :   u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i);
      17             : #ifdef __aarch64__
      18             :   pv2 = u64x2_from_u32x4_high (iv4);
      19             : #else
      20     8136531 :   pv2 = u64x2_from_u32x4 ((u32x4) u8x16_word_shift_right (iv4, 8));
      21             : #endif
      22     8136531 :   u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i + 2);
      23     8136531 : }
      24             : #endif
      25             : 
      26             : /** \brief Convert array of indices to pointers with base and shift
      27             : 
      28             :     @param indices source array of u32 indices
      29             :     @param base base pointer
      30             :     @param shift numbers of bits to be shifted
      31             :     @param ptrs destinatin array of pointers
      32             :     @param n_elts number of elements in the source array
      33             : */
      34             : 
      35             : static_always_inline void
      36    59499122 : clib_index_to_ptr_u32 (u32 *indices, void *base, u8 shift, void **ptrs,
      37             :                        u32 n_elts)
      38             : {
      39             : #if defined CLIB_HAVE_VEC512
      40           0 :   if (n_elts >= 8)
      41             :     {
      42           0 :       u64x8 off = u64x8_splat ((u64) base);
      43             :       u64x8 b0, b1, b2, b3, b4, b5, b6, b7;
      44             : 
      45           0 :       while (n_elts >= 64)
      46             :         {
      47           0 :           b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
      48           0 :           b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
      49           0 :           b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
      50           0 :           b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
      51           0 :           b4 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 32));
      52           0 :           b5 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 40));
      53           0 :           b6 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 48));
      54           0 :           b7 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 56));
      55           0 :           u64x8_store_unaligned ((b0 << shift) + off, ptrs);
      56           0 :           u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
      57           0 :           u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
      58           0 :           u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
      59           0 :           u64x8_store_unaligned ((b4 << shift) + off, ptrs + 32);
      60           0 :           u64x8_store_unaligned ((b5 << shift) + off, ptrs + 40);
      61           0 :           u64x8_store_unaligned ((b6 << shift) + off, ptrs + 48);
      62           0 :           u64x8_store_unaligned ((b7 << shift) + off, ptrs + 56);
      63           0 :           ptrs += 64;
      64           0 :           indices += 64;
      65           0 :           n_elts -= 64;
      66             :         }
      67             : 
      68           0 :       if (n_elts == 0)
      69           0 :         return;
      70             : 
      71           0 :       if (n_elts >= 32)
      72             :         {
      73           0 :           b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
      74           0 :           b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
      75           0 :           b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
      76           0 :           b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
      77           0 :           u64x8_store_unaligned ((b0 << shift) + off, ptrs);
      78           0 :           u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
      79           0 :           u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
      80           0 :           u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
      81           0 :           ptrs += 32;
      82           0 :           indices += 32;
      83           0 :           n_elts -= 32;
      84             :         }
      85           0 :       if (n_elts >= 16)
      86             :         {
      87           0 :           b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
      88           0 :           b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
      89           0 :           u64x8_store_unaligned ((b0 << shift) + off, ptrs);
      90           0 :           u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
      91           0 :           ptrs += 16;
      92           0 :           indices += 16;
      93           0 :           n_elts -= 16;
      94             :         }
      95           0 :       if (n_elts >= 8)
      96             :         {
      97           0 :           b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
      98           0 :           u64x8_store_unaligned ((b0 << shift) + off, ptrs);
      99           0 :           ptrs += 8;
     100           0 :           indices += 8;
     101           0 :           n_elts -= 8;
     102             :         }
     103             : 
     104           0 :       if (n_elts == 0)
     105           0 :         return;
     106             : 
     107           0 :       b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + n_elts - 8));
     108           0 :       u64x8_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 8);
     109             :     }
     110             :   else
     111             :     {
     112           0 :       u32 mask = pow2_mask (n_elts);
     113           0 :       u64x8 r = u64x8_from_u32x8 (u32x8_mask_load_zero (indices, mask));
     114           0 :       u64x8_mask_store ((r << shift) + u64x8_splat ((u64) base), ptrs, mask);
     115           0 :       return;
     116             :     }
     117             : #elif defined CLIB_HAVE_VEC256
     118    58170186 :   if (n_elts >= 4)
     119             :     {
     120    56233582 :       u64x4 off = u64x4_splat ((u64) base);
     121             :       u64x4 b0, b1, b2, b3, b4, b5, b6, b7;
     122             : 
     123    66812206 :       while (n_elts >= 32)
     124             :         {
     125    10578629 :           b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
     126    10578629 :           b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
     127    10578628 :           b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
     128    10578628 :           b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
     129    10578629 :           b4 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 16));
     130    10578629 :           b5 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 20));
     131    10578629 :           b6 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 24));
     132    10578629 :           b7 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 28));
     133    10578629 :           u64x4_store_unaligned ((b0 << shift) + off, ptrs);
     134    10578629 :           u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
     135    10578629 :           u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
     136    10578629 :           u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
     137    10578629 :           u64x4_store_unaligned ((b4 << shift) + off, ptrs + 16);
     138    10578629 :           u64x4_store_unaligned ((b5 << shift) + off, ptrs + 20);
     139    10578629 :           u64x4_store_unaligned ((b6 << shift) + off, ptrs + 24);
     140    10578629 :           u64x4_store_unaligned ((b7 << shift) + off, ptrs + 28);
     141    10578629 :           ptrs += 32;
     142    10578629 :           indices += 32;
     143    10578629 :           n_elts -= 32;
     144             :         }
     145             : 
     146    56233582 :       if (n_elts == 0)
     147      655828 :         return;
     148             : 
     149    55577756 :       if (n_elts >= 16)
     150             :         {
     151     2202039 :           b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
     152     2202039 :           b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
     153     2202039 :           b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
     154     2202038 :           b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
     155     2202038 :           u64x4_store_unaligned ((b0 << shift) + off, ptrs);
     156     2202038 :           u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
     157     2202038 :           u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
     158     2202038 :           u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
     159     2202038 :           ptrs += 16;
     160     2202038 :           indices += 16;
     161     2202038 :           n_elts -= 16;
     162             :         }
     163    55577755 :       if (n_elts >= 8)
     164             :         {
     165     3202276 :           b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
     166     3202277 :           b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
     167     3202277 :           u64x4_store_unaligned ((b0 << shift) + off, ptrs);
     168     3202277 :           u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
     169     3202277 :           ptrs += 8;
     170     3202277 :           indices += 8;
     171     3202277 :           n_elts -= 8;
     172             :         }
     173    55577756 :       if (n_elts > 4)
     174             :         {
     175     2563811 :           b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
     176     2563810 :           u64x4_store_unaligned ((b0 << shift) + off, ptrs);
     177     2563811 :           ptrs += 4;
     178     2563811 :           indices += 4;
     179     2563811 :           n_elts -= 4;
     180             :         }
     181             : 
     182    55577756 :       b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + n_elts - 4));
     183    55577755 :       u64x4_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 4);
     184    55577755 :       return;
     185             :     }
     186             : #ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
     187             :   else
     188             :     {
     189     1936605 :       u32 mask = pow2_mask (n_elts);
     190     1936605 :       u64x4 r = u64x4_from_u32x4 (u32x4_mask_load_zero (indices, mask));
     191     1936605 :       u64x4_mask_store ((r << shift) + u64x4_splat ((u64) base), ptrs, mask);
     192     1936605 :       return;
     193             :     }
     194             : #endif
     195             : #elif defined(CLIB_HAVE_VEC128)
     196     1328936 :   if (n_elts >= 4)
     197             :     {
     198     1145569 :       u64x2 ov = u64x2_splat ((u64) base);
     199     1145569 :       u32 *i = (u32 *) indices;
     200     1145569 :       void **p = (void **) ptrs;
     201     1145569 :       u32 n = n_elts;
     202             : 
     203     1829588 :       while (n >= 32)
     204             :         {
     205      684017 :           clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
     206      684017 :           clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
     207      684017 :           clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
     208      684017 :           clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
     209      684017 :           clib_index_to_ptr_u32x4 (indices, ptrs, 16, ov, shift);
     210      684017 :           clib_index_to_ptr_u32x4 (indices, ptrs, 20, ov, shift);
     211      684017 :           clib_index_to_ptr_u32x4 (indices, ptrs, 24, ov, shift);
     212      684017 :           clib_index_to_ptr_u32x4 (indices, ptrs, 28, ov, shift);
     213      684017 :           indices += 32;
     214      684017 :           ptrs += 32;
     215      684017 :           n -= 32;
     216             :         }
     217             : 
     218     1145569 :       if (n == 0)
     219       98494 :         return;
     220             : 
     221     1047075 :       if (n >= 16)
     222             :         {
     223      180230 :           clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
     224      180230 :           clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
     225      180230 :           clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
     226      180230 :           clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
     227      180230 :           indices += 16;
     228      180230 :           ptrs += 16;
     229      180230 :           n -= 16;
     230             :         }
     231             : 
     232     1047075 :       if (n >= 8)
     233             :         {
     234      297180 :           clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
     235      297180 :           clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
     236      297180 :           indices += 8;
     237      297180 :           ptrs += 8;
     238      297180 :           n -= 8;
     239             :         }
     240             : 
     241     1047075 :       if (n > 4)
     242      302035 :         clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
     243             : 
     244     1047075 :       clib_index_to_ptr_u32x4 (i, p, n_elts - 4, ov, shift);
     245     1047075 :       return;
     246             :     }
     247             : #endif
     248      493919 :   while (n_elts)
     249             :     {
     250      310552 :       ptrs[0] = base + ((u64) indices[0] << shift);
     251      310552 :       ptrs += 1;
     252      310552 :       indices += 1;
     253      310552 :       n_elts -= 1;
     254             :     }
     255             : }
     256             : 
     257             : #endif

Generated by: LCOV version 1.14