LCOV - code coverage report
Current view: top level - vppinfra/crypto - aes.h (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 125 133 94.0 %
Date: 2023-07-05 22:20:52 Functions: 16 20 80.0 %

          Line data    Source code
       1             : /*
       2             :  *------------------------------------------------------------------
       3             :  * Copyright (c) 2020 Cisco and/or its affiliates.
       4             :  * Licensed under the Apache License, Version 2.0 (the "License");
       5             :  * you may not use this file except in compliance with the License.
       6             :  * You may obtain a copy of the License at:
       7             :  *
       8             :  *     http://www.apache.org/licenses/LICENSE-2.0
       9             :  *
      10             :  * Unless required by applicable law or agreed to in writing, software
      11             :  * distributed under the License is distributed on an "AS IS" BASIS,
      12             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      13             :  * See the License for the specific language governing permissions and
      14             :  * limitations under the License.
      15             :  *------------------------------------------------------------------
      16             :  */
      17             : 
      18             : #ifndef __aesni_h__
      19             : #define __aesni_h__
      20             : 
      21             : typedef enum
      22             : {
      23             :   AES_KEY_128 = 0,
      24             :   AES_KEY_192 = 1,
      25             :   AES_KEY_256 = 2,
      26             : } aes_key_size_t;
      27             : 
      28             : #define AES_KEY_ROUNDS(x)               (10 + x * 2)
      29             : #define AES_KEY_BYTES(x)                (16 + x * 8)
      30             : 
      31             : static_always_inline u8x16
      32      500908 : aes_block_load (u8 * p)
      33             : {
      34      500908 :   return *(u8x16u *) p;
      35             : }
      36             : 
      37             : static_always_inline u8x16
      38   106312880 : aes_enc_round (u8x16 a, u8x16 k)
      39             : {
      40             : #if defined (__AES__)
      41   212624770 :   return (u8x16) _mm_aesenc_si128 ((__m128i) a, (__m128i) k);
      42             : #elif defined (__ARM_FEATURE_CRYPTO)
      43             :   return vaesmcq_u8 (vaeseq_u8 (a, u8x16_splat (0))) ^ k;
      44             : #endif
      45             : }
      46             : 
      47             : #if defined(__VAES__) && defined(__AVX512F__)
      48             : static_always_inline u8x64
      49           0 : aes_enc_round_x4 (u8x64 a, u8x64 k)
      50             : {
      51           0 :   return (u8x64) _mm512_aesenc_epi128 ((__m512i) a, (__m512i) k);
      52             : }
      53             : 
      54             : static_always_inline u8x64
      55           0 : aes_enc_last_round_x4 (u8x64 a, u8x64 k)
      56             : {
      57           0 :   return (u8x64) _mm512_aesenclast_epi128 ((__m512i) a, (__m512i) k);
      58             : }
      59             : 
      60             : static_always_inline u8x64
      61           0 : aes_dec_round_x4 (u8x64 a, u8x64 k)
      62             : {
      63           0 :   return (u8x64) _mm512_aesdec_epi128 ((__m512i) a, (__m512i) k);
      64             : }
      65             : 
      66             : static_always_inline u8x64
      67           0 : aes_dec_last_round_x4 (u8x64 a, u8x64 k)
      68             : {
      69           0 :   return (u8x64) _mm512_aesdeclast_epi128 ((__m512i) a, (__m512i) k);
      70             : }
      71             : #endif
      72             : 
      73             : #ifdef __VAES__
      74             : static_always_inline u8x32
      75             : aes_enc_round_x2 (u8x32 a, u8x32 k)
      76             : {
      77             :   return (u8x32) _mm256_aesenc_epi128 ((__m256i) a, (__m256i) k);
      78             : }
      79             : 
      80             : static_always_inline u8x32
      81             : aes_enc_last_round_x2 (u8x32 a, u8x32 k)
      82             : {
      83             :   return (u8x32) _mm256_aesenclast_epi128 ((__m256i) a, (__m256i) k);
      84             : }
      85             : 
      86             : static_always_inline u8x32
      87             : aes_dec_round_x2 (u8x32 a, u8x32 k)
      88             : {
      89             :   return (u8x32) _mm256_aesdec_epi128 ((__m256i) a, (__m256i) k);
      90             : }
      91             : 
      92             : static_always_inline u8x32
      93             : aes_dec_last_round_x2 (u8x32 a, u8x32 k)
      94             : {
      95             :   return (u8x32) _mm256_aesdeclast_epi128 ((__m256i) a, (__m256i) k);
      96             : }
      97             : #endif
      98             : 
      99             : static_always_inline u8x16
     100     8350384 : aes_enc_last_round (u8x16 a, u8x16 k)
     101             : {
     102             : #if defined (__AES__)
     103    16700728 :   return (u8x16) _mm_aesenclast_si128 ((__m128i) a, (__m128i) k);
     104             : #elif defined (__ARM_FEATURE_CRYPTO)
     105             :   return vaeseq_u8 (a, u8x16_splat (0)) ^ k;
     106             : #endif
     107             : }
     108             : 
     109             : #ifdef __x86_64__
     110             : 
     111             : static_always_inline u8x16
     112     2450970 : aes_dec_round (u8x16 a, u8x16 k)
     113             : {
     114     4901940 :   return (u8x16) _mm_aesdec_si128 ((__m128i) a, (__m128i) k);
     115             : }
     116             : 
     117             : static_always_inline u8x16
     118      238768 : aes_dec_last_round (u8x16 a, u8x16 k)
     119             : {
     120      477536 :   return (u8x16) _mm_aesdeclast_si128 ((__m128i) a, (__m128i) k);
     121             : }
     122             : #endif
     123             : 
     124             : static_always_inline void
     125      461134 : aes_block_store (u8 * p, u8x16 r)
     126             : {
     127      461134 :   *(u8x16u *) p = r;
     128      461134 : }
     129             : 
     130             : static_always_inline u8x16
     131      100936 : aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks)
     132             : {
     133      100936 :   int rounds = AES_KEY_ROUNDS (ks);
     134      100936 :   block ^= round_keys[0];
     135     1409440 :   for (int i = 1; i < rounds; i += 1)
     136     1308500 :     block = aes_enc_round (block, round_keys[i]);
     137      100936 :   return aes_enc_last_round (block, round_keys[rounds]);
     138             : }
     139             : 
     140             : static_always_inline u8x16
     141       27932 : aes_inv_mix_column (u8x16 a)
     142             : {
     143             : #if defined (__AES__)
     144       55864 :   return (u8x16) _mm_aesimc_si128 ((__m128i) a);
     145             : #elif defined (__ARM_FEATURE_CRYPTO)
     146             :   return vaesimcq_u8 (a);
     147             : #endif
     148             : }
     149             : 
     150             : #ifdef __x86_64__
     151             : #define aes_keygen_assist(a, b) \
     152             :   (u8x16) _mm_aeskeygenassist_si128((__m128i) a, b)
     153             : 
     154             : /* AES-NI based AES key expansion based on code samples from
     155             :    Intel(r) Advanced Encryption Standard (AES) New Instructions White Paper
     156             :    (323641-001) */
     157             : 
     158             : static_always_inline void
     159       20860 : aes128_key_assist (u8x16 * rk, u8x16 r)
     160             : {
     161       20860 :   u8x16 t = rk[-1];
     162       20860 :   t ^= u8x16_word_shift_left (t, 4);
     163       20860 :   t ^= u8x16_word_shift_left (t, 4);
     164       20860 :   t ^= u8x16_word_shift_left (t, 4);
     165       20860 :   rk[0] = t ^ (u8x16) u32x4_shuffle ((u32x4) r, 3, 3, 3, 3);
     166       20860 : }
     167             : 
     168             : static_always_inline void
     169        2086 : aes128_key_expand (u8x16 *rk, u8x16u const *k)
     170             : {
     171        2086 :   rk[0] = k[0];
     172        2086 :   aes128_key_assist (rk + 1, aes_keygen_assist (rk[0], 0x01));
     173        2086 :   aes128_key_assist (rk + 2, aes_keygen_assist (rk[1], 0x02));
     174        2086 :   aes128_key_assist (rk + 3, aes_keygen_assist (rk[2], 0x04));
     175        2086 :   aes128_key_assist (rk + 4, aes_keygen_assist (rk[3], 0x08));
     176        2086 :   aes128_key_assist (rk + 5, aes_keygen_assist (rk[4], 0x10));
     177        2086 :   aes128_key_assist (rk + 6, aes_keygen_assist (rk[5], 0x20));
     178        2086 :   aes128_key_assist (rk + 7, aes_keygen_assist (rk[6], 0x40));
     179        2086 :   aes128_key_assist (rk + 8, aes_keygen_assist (rk[7], 0x80));
     180        2086 :   aes128_key_assist (rk + 9, aes_keygen_assist (rk[8], 0x1b));
     181        2086 :   aes128_key_assist (rk + 10, aes_keygen_assist (rk[9], 0x36));
     182        2086 : }
     183             : 
     184             : static_always_inline void
     185        9344 : aes192_key_assist (u8x16 * r1, u8x16 * r2, u8x16 key_assist)
     186             : {
     187             :   u8x16 t;
     188        9344 :   r1[0] ^= t = u8x16_word_shift_left (r1[0], 4);
     189        9344 :   r1[0] ^= t = u8x16_word_shift_left (t, 4);
     190        9344 :   r1[0] ^= u8x16_word_shift_left (t, 4);
     191        9344 :   r1[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) key_assist, 0x55);
     192        9344 :   r2[0] ^= u8x16_word_shift_left (r2[0], 4);
     193        9344 :   r2[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) r1[0], 0xff);
     194        9344 : }
     195             : 
     196             : static_always_inline void
     197        1168 : aes192_key_expand (u8x16 * rk, u8x16u const *k)
     198             : {
     199             :   u8x16 r1, r2;
     200             : 
     201        1168 :   rk[0] = r1 = k[0];
     202             :   /* *INDENT-OFF* */
     203        1168 :   rk[1] = r2 = (u8x16) (u64x2) { *(u64 *) (k + 1), 0 };
     204             :   /* *INDENT-ON* */
     205             : 
     206        1168 :   aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x1));
     207        1168 :   rk[1] = (u8x16) _mm_shuffle_pd ((__m128d) rk[1], (__m128d) r1, 0);
     208        1168 :   rk[2] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
     209             : 
     210        1168 :   aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x2));
     211        1168 :   rk[3] = r1;
     212        1168 :   rk[4] = r2;
     213             : 
     214        1168 :   aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x4));
     215        1168 :   rk[4] = (u8x16) _mm_shuffle_pd ((__m128d) rk[4], (__m128d) r1, 0);
     216        1168 :   rk[5] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
     217             : 
     218        1168 :   aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x8));
     219        1168 :   rk[6] = r1;
     220        1168 :   rk[7] = r2;
     221             : 
     222        1168 :   aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x10));
     223        1168 :   rk[7] = (u8x16) _mm_shuffle_pd ((__m128d) rk[7], (__m128d) r1, 0);
     224        1168 :   rk[8] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
     225             : 
     226        1168 :   aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x20));
     227        1168 :   rk[9] = r1;
     228        1168 :   rk[10] = r2;
     229             : 
     230        1168 :   aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x40));
     231        1168 :   rk[10] = (u8x16) _mm_shuffle_pd ((__m128d) rk[10], (__m128d) r1, 0);
     232        1168 :   rk[11] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
     233             : 
     234        1168 :   aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x80));
     235        1168 :   rk[12] = r1;
     236        1168 : }
     237             : 
     238             : static_always_inline void
     239      702590 : aes256_key_assist (u8x16 * rk, int i, u8x16 key_assist)
     240             : {
     241             :   u8x16 r, t;
     242      702590 :   rk += i;
     243      702590 :   r = rk[-2];
     244      702590 :   r ^= t = u8x16_word_shift_left (r, 4);
     245      702590 :   r ^= t = u8x16_word_shift_left (t, 4);
     246      702590 :   r ^= u8x16_word_shift_left (t, 4);
     247      702590 :   r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 3, 3, 3, 3);
     248      702590 :   rk[0] = r;
     249             : 
     250      702590 :   if (i >= 14)
     251      100370 :     return;
     252             : 
     253      602220 :   key_assist = aes_keygen_assist (rk[0], 0x0);
     254      602220 :   r = rk[-1];
     255      602220 :   r ^= t = u8x16_word_shift_left (r, 4);
     256      602220 :   r ^= t = u8x16_word_shift_left (t, 4);
     257      602220 :   r ^= u8x16_word_shift_left (t, 4);
     258      602220 :   r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 2, 2, 2, 2);
     259      602220 :   rk[1] = r;
     260             : }
     261             : 
     262             : static_always_inline void
     263      100370 : aes256_key_expand (u8x16 * rk, u8x16u const *k)
     264             : {
     265      100370 :   rk[0] = k[0];
     266      100370 :   rk[1] = k[1];
     267      100370 :   aes256_key_assist (rk, 2, aes_keygen_assist (rk[1], 0x01));
     268      100370 :   aes256_key_assist (rk, 4, aes_keygen_assist (rk[3], 0x02));
     269      100370 :   aes256_key_assist (rk, 6, aes_keygen_assist (rk[5], 0x04));
     270      100370 :   aes256_key_assist (rk, 8, aes_keygen_assist (rk[7], 0x08));
     271      100370 :   aes256_key_assist (rk, 10, aes_keygen_assist (rk[9], 0x10));
     272      100370 :   aes256_key_assist (rk, 12, aes_keygen_assist (rk[11], 0x20));
     273      100370 :   aes256_key_assist (rk, 14, aes_keygen_assist (rk[13], 0x40));
     274      100370 : }
     275             : #endif
     276             : 
     277             : #ifdef __aarch64__
     278             : 
     279             : static const u8x16 aese_prep_mask1 =
     280             :   { 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
     281             : static const u8x16 aese_prep_mask2 =
     282             :   { 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 };
     283             : 
     284             : static_always_inline void
     285             : aes128_key_expand_round_neon (u8x16 * rk, u32 rcon)
     286             : {
     287             :   u8x16 r, t, last_round = rk[-1], z = { };
     288             :   r = vqtbl1q_u8 (last_round, aese_prep_mask1);
     289             :   r = vaeseq_u8 (r, z);
     290             :   r ^= (u8x16) vdupq_n_u32 (rcon);
     291             :   r ^= last_round;
     292             :   r ^= t = vextq_u8 (z, last_round, 12);
     293             :   r ^= t = vextq_u8 (z, t, 12);
     294             :   r ^= vextq_u8 (z, t, 12);
     295             :   rk[0] = r;
     296             : }
     297             : 
     298             : static_always_inline void
     299             : aes128_key_expand (u8x16 *rk, u8x16u const *k)
     300             : {
     301             :   rk[0] = k[0];
     302             :   aes128_key_expand_round_neon (rk + 1, 0x01);
     303             :   aes128_key_expand_round_neon (rk + 2, 0x02);
     304             :   aes128_key_expand_round_neon (rk + 3, 0x04);
     305             :   aes128_key_expand_round_neon (rk + 4, 0x08);
     306             :   aes128_key_expand_round_neon (rk + 5, 0x10);
     307             :   aes128_key_expand_round_neon (rk + 6, 0x20);
     308             :   aes128_key_expand_round_neon (rk + 7, 0x40);
     309             :   aes128_key_expand_round_neon (rk + 8, 0x80);
     310             :   aes128_key_expand_round_neon (rk + 9, 0x1b);
     311             :   aes128_key_expand_round_neon (rk + 10, 0x36);
     312             : }
     313             : 
     314             : static_always_inline void
     315             : aes192_key_expand_round_neon (u8x8 * rk, u32 rcon)
     316             : {
     317             :   u8x8 r, last_round = rk[-1], z = { };
     318             :   u8x16 r2, z2 = { };
     319             : 
     320             :   r2 = (u8x16) vdupq_lane_u64 ((uint64x1_t) last_round, 0);
     321             :   r2 = vqtbl1q_u8 (r2, aese_prep_mask1);
     322             :   r2 = vaeseq_u8 (r2, z2);
     323             :   r2 ^= (u8x16) vdupq_n_u32 (rcon);
     324             : 
     325             :   r = (u8x8) vdup_laneq_u64 ((u64x2) r2, 0);
     326             :   r ^= rk[-3];
     327             :   r ^= vext_u8 (z, rk[-3], 4);
     328             :   rk[0] = r;
     329             : 
     330             :   r = rk[-2] ^ vext_u8 (r, z, 4);
     331             :   r ^= vext_u8 (z, r, 4);
     332             :   rk[1] = r;
     333             : 
     334             :   if (rcon == 0x80)
     335             :     return;
     336             : 
     337             :   r = rk[-1] ^ vext_u8 (r, z, 4);
     338             :   r ^= vext_u8 (z, r, 4);
     339             :   rk[2] = r;
     340             : }
     341             : 
     342             : static_always_inline void
     343             : aes192_key_expand (u8x16 * ek, const u8x16u * k)
     344             : {
     345             :   u8x8 *rk = (u8x8 *) ek;
     346             :   ek[0] = k[0];
     347             :   rk[2] = *(u8x8u *) (k + 1);
     348             :   aes192_key_expand_round_neon (rk + 3, 0x01);
     349             :   aes192_key_expand_round_neon (rk + 6, 0x02);
     350             :   aes192_key_expand_round_neon (rk + 9, 0x04);
     351             :   aes192_key_expand_round_neon (rk + 12, 0x08);
     352             :   aes192_key_expand_round_neon (rk + 15, 0x10);
     353             :   aes192_key_expand_round_neon (rk + 18, 0x20);
     354             :   aes192_key_expand_round_neon (rk + 21, 0x40);
     355             :   aes192_key_expand_round_neon (rk + 24, 0x80);
     356             : }
     357             : 
     358             : 
     359             : static_always_inline void
     360             : aes256_key_expand_round_neon (u8x16 * rk, u32 rcon)
     361             : {
     362             :   u8x16 r, t, z = { };
     363             : 
     364             :   r = vqtbl1q_u8 (rk[-1], rcon ? aese_prep_mask1 : aese_prep_mask2);
     365             :   r = vaeseq_u8 (r, z);
     366             :   if (rcon)
     367             :     r ^= (u8x16) vdupq_n_u32 (rcon);
     368             :   r ^= rk[-2];
     369             :   r ^= t = vextq_u8 (z, rk[-2], 12);
     370             :   r ^= t = vextq_u8 (z, t, 12);
     371             :   r ^= vextq_u8 (z, t, 12);
     372             :   rk[0] = r;
     373             : }
     374             : 
     375             : static_always_inline void
     376             : aes256_key_expand (u8x16 *rk, u8x16u const *k)
     377             : {
     378             :   rk[0] = k[0];
     379             :   rk[1] = k[1];
     380             :   aes256_key_expand_round_neon (rk + 2, 0x01);
     381             :   aes256_key_expand_round_neon (rk + 3, 0);
     382             :   aes256_key_expand_round_neon (rk + 4, 0x02);
     383             :   aes256_key_expand_round_neon (rk + 5, 0);
     384             :   aes256_key_expand_round_neon (rk + 6, 0x04);
     385             :   aes256_key_expand_round_neon (rk + 7, 0);
     386             :   aes256_key_expand_round_neon (rk + 8, 0x08);
     387             :   aes256_key_expand_round_neon (rk + 9, 0);
     388             :   aes256_key_expand_round_neon (rk + 10, 0x10);
     389             :   aes256_key_expand_round_neon (rk + 11, 0);
     390             :   aes256_key_expand_round_neon (rk + 12, 0x20);
     391             :   aes256_key_expand_round_neon (rk + 13, 0);
     392             :   aes256_key_expand_round_neon (rk + 14, 0x40);
     393             : }
     394             : 
     395             : #endif
     396             : 
     397             : static_always_inline void
     398      103624 : aes_key_expand (u8x16 * key_schedule, u8 const *key, aes_key_size_t ks)
     399             : {
     400      103624 :   switch (ks)
     401             :     {
     402        2086 :     case AES_KEY_128:
     403        2086 :       aes128_key_expand (key_schedule, (u8x16u const *) key);
     404        2086 :       break;
     405        1168 :     case AES_KEY_192:
     406        1168 :       aes192_key_expand (key_schedule, (u8x16u const *) key);
     407        1168 :       break;
     408      100370 :     case AES_KEY_256:
     409      100370 :       aes256_key_expand (key_schedule, (u8x16u const *) key);
     410      100370 :       break;
     411             :     }
     412      103624 : }
     413             : 
     414             : static_always_inline void
     415        2688 : aes_key_enc_to_dec (u8x16 * ke, u8x16 * kd, aes_key_size_t ks)
     416             : {
     417        2688 :   int rounds = AES_KEY_ROUNDS (ks);
     418             : 
     419        2688 :   kd[rounds] = ke[0];
     420        2688 :   kd[0] = ke[rounds];
     421             : 
     422       15310 :   for (int i = 1; i < (rounds / 2); i++)
     423             :     {
     424       12622 :       kd[rounds - i] = aes_inv_mix_column (ke[i]);
     425       12622 :       kd[i] = aes_inv_mix_column (ke[rounds - i]);
     426             :     }
     427             : 
     428        2688 :   kd[rounds / 2] = aes_inv_mix_column (ke[rounds / 2]);
     429        2688 : }
     430             : 
     431             : #endif /* __aesni_h__ */
     432             : 
     433             : /*
     434             :  * fd.io coding-style-patch-verification: ON
     435             :  *
     436             :  * Local Variables:
     437             :  * eval: (c-set-style "gnu")
     438             :  * End:
     439             :  */

Generated by: LCOV version 1.14