LCOV - coverage-filtered.info - vppinfra/vector.h

LCOV - code coverage report

Current view:	top level - vppinfra - vector.h (source / functions)		Hit	Total	Coverage
Test:	coverage-filtered.info	Lines:	1	1	100.0 %
Date:	2023-10-26 01:39:38	Functions:	3	5	60.0 %

          Line data    Source code

       1             : /*
       2             :  * Copyright (c) 2015 Cisco and/or its affiliates.
       3             :  * Licensed under the Apache License, Version 2.0 (the "License");
       4             :  * you may not use this file except in compliance with the License.
       5             :  * You may obtain a copy of the License at:
       6             :  *
       7             :  *     http://www.apache.org/licenses/LICENSE-2.0
       8             :  *
       9             :  * Unless required by applicable law or agreed to in writing, software
      10             :  * distributed under the License is distributed on an "AS IS" BASIS,
      11             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             :  * See the License for the specific language governing permissions and
      13             :  * limitations under the License.
      14             :  */
      15             : /*
      16             :   Copyright (c) 2005 Eliot Dresselhaus
      17             : 
      18             :   Permission is hereby granted, free of charge, to any person obtaining
      19             :   a copy of this software and associated documentation files (the
      20             :   "Software"), to deal in the Software without restriction, including
      21             :   without limitation the rights to use, copy, modify, merge, publish,
      22             :   distribute, sublicense, and/or sell copies of the Software, and to
      23             :   permit persons to whom the Software is furnished to do so, subject to
      24             :   the following conditions:
      25             : 
      26             :   The above copyright notice and this permission notice shall be
      27             :   included in all copies or substantial portions of the Software.
      28             : 
      29             :   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      30             :   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      31             :   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
      32             :   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
      33             :   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
      34             :   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
      35             :   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      36             : */
      37             : 
      38             : #ifndef included_clib_vector_h
      39             : #define included_clib_vector_h
      40             : 
      41             : #include <vppinfra/clib.h>
      42             : 
      43             : /* Vector types. */
      44             : 
      45             : #if defined (__aarch64__) && defined(__ARM_NEON) || defined (__i686__)
      46             : #define CLIB_HAVE_VEC128
      47             : #endif
      48             : 
      49             : #if defined (__SSE4_2__) && __GNUC__ >= 4
      50             : #define CLIB_HAVE_VEC128
      51             : #endif
      52             : 
      53             : #if defined (__ALTIVEC__)
      54             : #define CLIB_HAVE_VEC128
      55             : #endif
      56             : 
      57             : #if defined (__AVX2__)
      58             : #define CLIB_HAVE_VEC256
      59             : #if defined (__clang__)  && __clang_major__ < 4
      60             : #undef CLIB_HAVE_VEC256
      61             : #endif
      62             : #endif
      63             : 
      64             : #if defined (__AVX512BITALG__)
      65             : #define CLIB_HAVE_VEC512
      66             : #endif
      67             : 
      68             : #define _vector_size(n) __attribute__ ((vector_size (n), __may_alias__))
      69             : #define _vector_size_unaligned(n)                                             \
      70             :   __attribute__ ((vector_size (n), __aligned__ (1), __may_alias__))
      71             : 
      72             : #define foreach_vec64i  _(i,8,8)  _(i,16,4)  _(i,32,2)
      73             : #define foreach_vec64u  _(u,8,8)  _(u,16,4)  _(u,32,2)
      74             : #define foreach_vec64f  _(f,32,2)
      75             : #define foreach_vec128i _(i,8,16) _(i,16,8)  _(i,32,4)  _(i,64,2)
      76             : #define foreach_vec128u _(u,8,16) _(u,16,8)  _(u,32,4)  _(u,64,2)
      77             : #define foreach_vec128f _(f,32,4) _(f,64,2)
      78             : #define foreach_vec256i _(i,8,32) _(i,16,16) _(i,32,8)  _(i,64,4)
      79             : #define foreach_vec256u _(u,8,32) _(u,16,16) _(u,32,8)  _(u,64,4)
      80             : #define foreach_vec256f _(f,32,8) _(f,64,4)
      81             : #define foreach_vec512i _(i,8,64) _(i,16,32) _(i,32,16) _(i,64,8)
      82             : #define foreach_vec512u _(u,8,64) _(u,16,32) _(u,32,16) _(u,64,8)
      83             : #define foreach_vec512f _(f,32,16) _(f,64,8)
      84             : 
      85             : #if defined (CLIB_HAVE_VEC512)
      86             : #define foreach_int_vec foreach_vec64i foreach_vec128i foreach_vec256i foreach_vec512i
      87             : #define foreach_uint_vec foreach_vec64u foreach_vec128u foreach_vec256u foreach_vec512u
      88             : #define foreach_float_vec foreach_vec64f foreach_vec128f foreach_vec256f foreach_vec512f
      89             : #elif defined (CLIB_HAVE_VEC256)
      90             : #define foreach_int_vec foreach_vec64i foreach_vec128i foreach_vec256i
      91             : #define foreach_uint_vec foreach_vec64u foreach_vec128u foreach_vec256u
      92             : #define foreach_float_vec foreach_vec64f foreach_vec128f foreach_vec256f
      93             : #else
      94             : #define foreach_int_vec foreach_vec64i foreach_vec128i
      95             : #define foreach_uint_vec foreach_vec64u foreach_vec128u
      96             : #define foreach_float_vec foreach_vec64f foreach_vec128f
      97             : #endif
      98             : 
      99             : #define foreach_vec foreach_int_vec foreach_uint_vec foreach_float_vec
     100             : 
     101             : /* Type Definitions */
     102             : #define _(t, s, c)                                                            \
     103             :   typedef t##s t##s##x##c _vector_size (s / 8 * c);                           \
     104             :   typedef t##s t##s##x##c##u _vector_size_unaligned (s / 8 * c);              \
     105             :   typedef union                                                               \
     106             :   {                                                                           \
     107             :     t##s##x##c as_##t##s##x##c;                                               \
     108             :     t##s as_##t##s[c];                                                        \
     109             :   } t##s##x##c##_union_t;
     110             : 
     111             : /* clang-format off */
     112             :   foreach_vec64i foreach_vec64u foreach_vec64f
     113             :   foreach_vec128i foreach_vec128u foreach_vec128f
     114             :   foreach_vec256i foreach_vec256u foreach_vec256f
     115             :   foreach_vec512i foreach_vec512u foreach_vec512f
     116             : /* clang-format on */
     117             : #undef _
     118             : 
     119             :   typedef union
     120             : {
     121             : #define _(t, s, c) t##s##x##c as_##t##s##x##c;
     122             :   foreach_vec128i foreach_vec128u foreach_vec128f
     123             : #undef _
     124             : } vec128_t;
     125             : 
     126             : typedef union
     127             : {
     128             : #define _(t, s, c) t##s##x##c as_##t##s##x##c;
     129             :   foreach_vec256i foreach_vec256u foreach_vec256f
     130             : #undef _
     131             : #define _(t, s, c) t##s##x##c as_##t##s##x##c[2];
     132             :     foreach_vec128i foreach_vec128u foreach_vec128f
     133             : #undef _
     134             : } vec256_t;
     135             : 
     136             : typedef union
     137             : {
     138             : #define _(t, s, c) t##s##x##c as_##t##s##x##c;
     139             :   foreach_vec512i foreach_vec512u foreach_vec512f
     140             : #undef _
     141             : #define _(t, s, c) t##s##x##c as_##t##s##x##c[2];
     142             :     foreach_vec256i foreach_vec256u foreach_vec256f
     143             : #undef _
     144             : #define _(t, s, c) t##s##x##c as_##t##s##x##c[4];
     145             :       foreach_vec128i foreach_vec128u foreach_vec128f
     146             : #undef _
     147             : } vec512_t;
     148             : 
     149             : /* universal inlines */
     150             : #define _(t, s, c) \
     151             : static_always_inline t##s##x##c                                         \
     152             : t##s##x##c##_zero ()                                                    \
     153             : { return (t##s##x##c) {}; }                                             \
     154             : 
     155  1181427668 : foreach_vec
     156             : #undef _
     157             : 
     158             : #undef _vector_size
     159             : 
     160             :   /* _shuffle and _shuffle2 */
     161             : #if defined(__GNUC__) && !defined(__clang__)
     162             : #define __builtin_shufflevector(v1, v2, ...)                                  \
     163             :   __builtin_shuffle ((v1), (v2), (__typeof__ (v1)){ __VA_ARGS__ })
     164             : #endif
     165             : 
     166             : #define u8x16_shuffle(v1, ...)                                                \
     167             :   (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v1), __VA_ARGS__)
     168             : #define u8x32_shuffle(v1, ...)                                                \
     169             :   (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v1), __VA_ARGS__)
     170             : #define u8x64_shuffle(v1, ...)                                                \
     171             :   (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v1), __VA_ARGS__)
     172             : 
     173             : #define u16x8_shuffle(v1, ...)                                                \
     174             :   (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v1), __VA_ARGS__)
     175             : #define u16x16_shuffle(v1, ...)                                               \
     176             :   (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v1), __VA_ARGS__)
     177             : #define u16x32_shuffle(v1, ...)                                               \
     178             :   (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v1), __VA_ARGS__);
     179             : 
     180             : #define u32x4_shuffle(v1, ...)                                                \
     181             :   (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v1), __VA_ARGS__)
     182             : #define u32x8_shuffle(v1, ...)                                                \
     183             :   (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v1), __VA_ARGS__)
     184             : #define u32x16_shuffle(v1, ...)                                               \
     185             :   (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v1), __VA_ARGS__)
     186             : 
     187             : #define u64x2_shuffle(v1, ...)                                                \
     188             :   (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v1), __VA_ARGS__)
     189             : #define u64x4_shuffle(v1, ...)                                                \
     190             :   (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v1), __VA_ARGS__)
     191             : #define u64x8_shuffle(v1, ...)                                                \
     192             :   (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v1), __VA_ARGS__)
     193             : 
     194             : #define u8x16_shuffle2(v1, v2, ...)                                           \
     195             :   (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v2), __VA_ARGS__)
     196             : #define u8x32_shuffle2(v1, v2, ...)                                           \
     197             :   (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v2), __VA_ARGS__)
     198             : #define u8x64_shuffle2(v1, v2, ...)                                           \
     199             :   (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v2), __VA_ARGS__)
     200             : 
     201             : #define u16x8_shuffle2(v1, v2, ...)                                           \
     202             :   (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v2), __VA_ARGS__)
     203             : #define u16x16_shuffle2(v1, v2, ...)                                          \
     204             :   (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v2), __VA_ARGS__)
     205             : #define u16x32_shuffle2(v1, v2, ...)                                          \
     206             :   (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v2), __VA_ARGS__);
     207             : 
     208             : #define u32x4_shuffle2(v1, v2, ...)                                           \
     209             :   (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v2), __VA_ARGS__)
     210             : #define u32x8_shuffle2(v1, v2, ...)                                           \
     211             :   (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v2), __VA_ARGS__)
     212             : #define u32x16_shuffle2(v1, v2, ...)                                          \
     213             :   (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v2), __VA_ARGS__)
     214             : 
     215             : #define u64x2_shuffle2(v1, v2, ...)                                           \
     216             :   (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v2), __VA_ARGS__)
     217             : #define u64x4_shuffle2(v1, v2, ...)                                           \
     218             :   (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v2), __VA_ARGS__)
     219             : #define u64x8_shuffle2(v1, v2, ...)                                           \
     220             :   (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v2), __VA_ARGS__)
     221             : 
     222             : #define VECTOR_WORD_TYPE(t) t##x
     223             : #define VECTOR_WORD_TYPE_LEN(t) (sizeof (VECTOR_WORD_TYPE(t)) / sizeof (t))
     224             : 
     225             : #if defined (__SSE4_2__) && __GNUC__ >= 4
     226             : #include <vppinfra/vector_sse42.h>
     227             : #endif
     228             : 
     229             : #if defined (__AVX2__)
     230             : #include <vppinfra/vector_avx2.h>
     231             : #endif
     232             : 
     233             : #if defined(__AVX512F__)
     234             : #include <vppinfra/vector_avx512.h>
     235             : #endif
     236             : 
     237             : #if defined (__ALTIVEC__)
     238             : #include <vppinfra/vector_altivec.h>
     239             : #endif
     240             : 
     241             : #if defined (__aarch64__)
     242             : #include <vppinfra/vector_neon.h>
     243             : #endif
     244             : 
     245             : /* this macro generate _splat inline functions for each scalar vector type */
     246             : #ifndef CLIB_VEC128_SPLAT_DEFINED
     247             : #define _(t, s, c) \
     248             :   static_always_inline t##s##x##c                       \
     249             : t##s##x##c##_splat (t##s x)                             \
     250             : {                                                       \
     251             :     t##s##x##c r;                                       \
     252             :     int i;                                              \
     253             :                                                         \
     254             :     for (i = 0; i < c; i++)                          \
     255             :       r[i] = x;                                         \
     256             :                                                         \
     257             :     return r;                                           \
     258             : }
     259             :   foreach_vec128i foreach_vec128u
     260             : #undef _
     261             : #endif
     262             : 
     263             : #endif /* included_clib_vector_h */

Generated by: LCOV version 1.14