Line data Source code
1 : /*
2 : *------------------------------------------------------------------
3 : * Copyright (c) 2020 Cisco and/or its affiliates.
4 : * Licensed under the Apache License, Version 2.0 (the "License");
5 : * you may not use this file except in compliance with the License.
6 : * You may obtain a copy of the License at:
7 : *
8 : * http://www.apache.org/licenses/LICENSE-2.0
9 : *
10 : * Unless required by applicable law or agreed to in writing, software
11 : * distributed under the License is distributed on an "AS IS" BASIS,
12 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 : * See the License for the specific language governing permissions and
14 : * limitations under the License.
15 : *------------------------------------------------------------------
16 : */
17 :
18 : #ifndef __aesni_h__
19 : #define __aesni_h__
20 :
21 : typedef enum
22 : {
23 : AES_KEY_128 = 0,
24 : AES_KEY_192 = 1,
25 : AES_KEY_256 = 2,
26 : } aes_key_size_t;
27 :
28 : #define AES_KEY_ROUNDS(x) (10 + x * 2)
29 : #define AES_KEY_BYTES(x) (16 + x * 8)
30 :
31 : static_always_inline u8x16
32 500908 : aes_block_load (u8 * p)
33 : {
34 500908 : return *(u8x16u *) p;
35 : }
36 :
37 : static_always_inline u8x16
38 106312880 : aes_enc_round (u8x16 a, u8x16 k)
39 : {
40 : #if defined (__AES__)
41 212624770 : return (u8x16) _mm_aesenc_si128 ((__m128i) a, (__m128i) k);
42 : #elif defined (__ARM_FEATURE_CRYPTO)
43 : return vaesmcq_u8 (vaeseq_u8 (a, u8x16_splat (0))) ^ k;
44 : #endif
45 : }
46 :
47 : #if defined(__VAES__) && defined(__AVX512F__)
48 : static_always_inline u8x64
49 0 : aes_enc_round_x4 (u8x64 a, u8x64 k)
50 : {
51 0 : return (u8x64) _mm512_aesenc_epi128 ((__m512i) a, (__m512i) k);
52 : }
53 :
54 : static_always_inline u8x64
55 0 : aes_enc_last_round_x4 (u8x64 a, u8x64 k)
56 : {
57 0 : return (u8x64) _mm512_aesenclast_epi128 ((__m512i) a, (__m512i) k);
58 : }
59 :
60 : static_always_inline u8x64
61 0 : aes_dec_round_x4 (u8x64 a, u8x64 k)
62 : {
63 0 : return (u8x64) _mm512_aesdec_epi128 ((__m512i) a, (__m512i) k);
64 : }
65 :
66 : static_always_inline u8x64
67 0 : aes_dec_last_round_x4 (u8x64 a, u8x64 k)
68 : {
69 0 : return (u8x64) _mm512_aesdeclast_epi128 ((__m512i) a, (__m512i) k);
70 : }
71 : #endif
72 :
73 : #ifdef __VAES__
74 : static_always_inline u8x32
75 : aes_enc_round_x2 (u8x32 a, u8x32 k)
76 : {
77 : return (u8x32) _mm256_aesenc_epi128 ((__m256i) a, (__m256i) k);
78 : }
79 :
80 : static_always_inline u8x32
81 : aes_enc_last_round_x2 (u8x32 a, u8x32 k)
82 : {
83 : return (u8x32) _mm256_aesenclast_epi128 ((__m256i) a, (__m256i) k);
84 : }
85 :
86 : static_always_inline u8x32
87 : aes_dec_round_x2 (u8x32 a, u8x32 k)
88 : {
89 : return (u8x32) _mm256_aesdec_epi128 ((__m256i) a, (__m256i) k);
90 : }
91 :
92 : static_always_inline u8x32
93 : aes_dec_last_round_x2 (u8x32 a, u8x32 k)
94 : {
95 : return (u8x32) _mm256_aesdeclast_epi128 ((__m256i) a, (__m256i) k);
96 : }
97 : #endif
98 :
99 : static_always_inline u8x16
100 8350384 : aes_enc_last_round (u8x16 a, u8x16 k)
101 : {
102 : #if defined (__AES__)
103 16700728 : return (u8x16) _mm_aesenclast_si128 ((__m128i) a, (__m128i) k);
104 : #elif defined (__ARM_FEATURE_CRYPTO)
105 : return vaeseq_u8 (a, u8x16_splat (0)) ^ k;
106 : #endif
107 : }
108 :
109 : #ifdef __x86_64__
110 :
111 : static_always_inline u8x16
112 2450970 : aes_dec_round (u8x16 a, u8x16 k)
113 : {
114 4901940 : return (u8x16) _mm_aesdec_si128 ((__m128i) a, (__m128i) k);
115 : }
116 :
117 : static_always_inline u8x16
118 238768 : aes_dec_last_round (u8x16 a, u8x16 k)
119 : {
120 477536 : return (u8x16) _mm_aesdeclast_si128 ((__m128i) a, (__m128i) k);
121 : }
122 : #endif
123 :
124 : static_always_inline void
125 461134 : aes_block_store (u8 * p, u8x16 r)
126 : {
127 461134 : *(u8x16u *) p = r;
128 461134 : }
129 :
130 : static_always_inline u8x16
131 100936 : aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks)
132 : {
133 100936 : int rounds = AES_KEY_ROUNDS (ks);
134 100936 : block ^= round_keys[0];
135 1409440 : for (int i = 1; i < rounds; i += 1)
136 1308500 : block = aes_enc_round (block, round_keys[i]);
137 100936 : return aes_enc_last_round (block, round_keys[rounds]);
138 : }
139 :
140 : static_always_inline u8x16
141 27932 : aes_inv_mix_column (u8x16 a)
142 : {
143 : #if defined (__AES__)
144 55864 : return (u8x16) _mm_aesimc_si128 ((__m128i) a);
145 : #elif defined (__ARM_FEATURE_CRYPTO)
146 : return vaesimcq_u8 (a);
147 : #endif
148 : }
149 :
150 : #ifdef __x86_64__
151 : #define aes_keygen_assist(a, b) \
152 : (u8x16) _mm_aeskeygenassist_si128((__m128i) a, b)
153 :
154 : /* AES-NI based AES key expansion based on code samples from
155 : Intel(r) Advanced Encryption Standard (AES) New Instructions White Paper
156 : (323641-001) */
157 :
158 : static_always_inline void
159 20860 : aes128_key_assist (u8x16 * rk, u8x16 r)
160 : {
161 20860 : u8x16 t = rk[-1];
162 20860 : t ^= u8x16_word_shift_left (t, 4);
163 20860 : t ^= u8x16_word_shift_left (t, 4);
164 20860 : t ^= u8x16_word_shift_left (t, 4);
165 20860 : rk[0] = t ^ (u8x16) u32x4_shuffle ((u32x4) r, 3, 3, 3, 3);
166 20860 : }
167 :
168 : static_always_inline void
169 2086 : aes128_key_expand (u8x16 *rk, u8x16u const *k)
170 : {
171 2086 : rk[0] = k[0];
172 2086 : aes128_key_assist (rk + 1, aes_keygen_assist (rk[0], 0x01));
173 2086 : aes128_key_assist (rk + 2, aes_keygen_assist (rk[1], 0x02));
174 2086 : aes128_key_assist (rk + 3, aes_keygen_assist (rk[2], 0x04));
175 2086 : aes128_key_assist (rk + 4, aes_keygen_assist (rk[3], 0x08));
176 2086 : aes128_key_assist (rk + 5, aes_keygen_assist (rk[4], 0x10));
177 2086 : aes128_key_assist (rk + 6, aes_keygen_assist (rk[5], 0x20));
178 2086 : aes128_key_assist (rk + 7, aes_keygen_assist (rk[6], 0x40));
179 2086 : aes128_key_assist (rk + 8, aes_keygen_assist (rk[7], 0x80));
180 2086 : aes128_key_assist (rk + 9, aes_keygen_assist (rk[8], 0x1b));
181 2086 : aes128_key_assist (rk + 10, aes_keygen_assist (rk[9], 0x36));
182 2086 : }
183 :
184 : static_always_inline void
185 9344 : aes192_key_assist (u8x16 * r1, u8x16 * r2, u8x16 key_assist)
186 : {
187 : u8x16 t;
188 9344 : r1[0] ^= t = u8x16_word_shift_left (r1[0], 4);
189 9344 : r1[0] ^= t = u8x16_word_shift_left (t, 4);
190 9344 : r1[0] ^= u8x16_word_shift_left (t, 4);
191 9344 : r1[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) key_assist, 0x55);
192 9344 : r2[0] ^= u8x16_word_shift_left (r2[0], 4);
193 9344 : r2[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) r1[0], 0xff);
194 9344 : }
195 :
196 : static_always_inline void
197 1168 : aes192_key_expand (u8x16 * rk, u8x16u const *k)
198 : {
199 : u8x16 r1, r2;
200 :
201 1168 : rk[0] = r1 = k[0];
202 : /* *INDENT-OFF* */
203 1168 : rk[1] = r2 = (u8x16) (u64x2) { *(u64 *) (k + 1), 0 };
204 : /* *INDENT-ON* */
205 :
206 1168 : aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x1));
207 1168 : rk[1] = (u8x16) _mm_shuffle_pd ((__m128d) rk[1], (__m128d) r1, 0);
208 1168 : rk[2] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
209 :
210 1168 : aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x2));
211 1168 : rk[3] = r1;
212 1168 : rk[4] = r2;
213 :
214 1168 : aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x4));
215 1168 : rk[4] = (u8x16) _mm_shuffle_pd ((__m128d) rk[4], (__m128d) r1, 0);
216 1168 : rk[5] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
217 :
218 1168 : aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x8));
219 1168 : rk[6] = r1;
220 1168 : rk[7] = r2;
221 :
222 1168 : aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x10));
223 1168 : rk[7] = (u8x16) _mm_shuffle_pd ((__m128d) rk[7], (__m128d) r1, 0);
224 1168 : rk[8] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
225 :
226 1168 : aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x20));
227 1168 : rk[9] = r1;
228 1168 : rk[10] = r2;
229 :
230 1168 : aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x40));
231 1168 : rk[10] = (u8x16) _mm_shuffle_pd ((__m128d) rk[10], (__m128d) r1, 0);
232 1168 : rk[11] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
233 :
234 1168 : aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x80));
235 1168 : rk[12] = r1;
236 1168 : }
237 :
238 : static_always_inline void
239 702590 : aes256_key_assist (u8x16 * rk, int i, u8x16 key_assist)
240 : {
241 : u8x16 r, t;
242 702590 : rk += i;
243 702590 : r = rk[-2];
244 702590 : r ^= t = u8x16_word_shift_left (r, 4);
245 702590 : r ^= t = u8x16_word_shift_left (t, 4);
246 702590 : r ^= u8x16_word_shift_left (t, 4);
247 702590 : r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 3, 3, 3, 3);
248 702590 : rk[0] = r;
249 :
250 702590 : if (i >= 14)
251 100370 : return;
252 :
253 602220 : key_assist = aes_keygen_assist (rk[0], 0x0);
254 602220 : r = rk[-1];
255 602220 : r ^= t = u8x16_word_shift_left (r, 4);
256 602220 : r ^= t = u8x16_word_shift_left (t, 4);
257 602220 : r ^= u8x16_word_shift_left (t, 4);
258 602220 : r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 2, 2, 2, 2);
259 602220 : rk[1] = r;
260 : }
261 :
262 : static_always_inline void
263 100370 : aes256_key_expand (u8x16 * rk, u8x16u const *k)
264 : {
265 100370 : rk[0] = k[0];
266 100370 : rk[1] = k[1];
267 100370 : aes256_key_assist (rk, 2, aes_keygen_assist (rk[1], 0x01));
268 100370 : aes256_key_assist (rk, 4, aes_keygen_assist (rk[3], 0x02));
269 100370 : aes256_key_assist (rk, 6, aes_keygen_assist (rk[5], 0x04));
270 100370 : aes256_key_assist (rk, 8, aes_keygen_assist (rk[7], 0x08));
271 100370 : aes256_key_assist (rk, 10, aes_keygen_assist (rk[9], 0x10));
272 100370 : aes256_key_assist (rk, 12, aes_keygen_assist (rk[11], 0x20));
273 100370 : aes256_key_assist (rk, 14, aes_keygen_assist (rk[13], 0x40));
274 100370 : }
275 : #endif
276 :
277 : #ifdef __aarch64__
278 :
279 : static const u8x16 aese_prep_mask1 =
280 : { 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
281 : static const u8x16 aese_prep_mask2 =
282 : { 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 };
283 :
284 : static_always_inline void
285 : aes128_key_expand_round_neon (u8x16 * rk, u32 rcon)
286 : {
287 : u8x16 r, t, last_round = rk[-1], z = { };
288 : r = vqtbl1q_u8 (last_round, aese_prep_mask1);
289 : r = vaeseq_u8 (r, z);
290 : r ^= (u8x16) vdupq_n_u32 (rcon);
291 : r ^= last_round;
292 : r ^= t = vextq_u8 (z, last_round, 12);
293 : r ^= t = vextq_u8 (z, t, 12);
294 : r ^= vextq_u8 (z, t, 12);
295 : rk[0] = r;
296 : }
297 :
298 : static_always_inline void
299 : aes128_key_expand (u8x16 *rk, u8x16u const *k)
300 : {
301 : rk[0] = k[0];
302 : aes128_key_expand_round_neon (rk + 1, 0x01);
303 : aes128_key_expand_round_neon (rk + 2, 0x02);
304 : aes128_key_expand_round_neon (rk + 3, 0x04);
305 : aes128_key_expand_round_neon (rk + 4, 0x08);
306 : aes128_key_expand_round_neon (rk + 5, 0x10);
307 : aes128_key_expand_round_neon (rk + 6, 0x20);
308 : aes128_key_expand_round_neon (rk + 7, 0x40);
309 : aes128_key_expand_round_neon (rk + 8, 0x80);
310 : aes128_key_expand_round_neon (rk + 9, 0x1b);
311 : aes128_key_expand_round_neon (rk + 10, 0x36);
312 : }
313 :
314 : static_always_inline void
315 : aes192_key_expand_round_neon (u8x8 * rk, u32 rcon)
316 : {
317 : u8x8 r, last_round = rk[-1], z = { };
318 : u8x16 r2, z2 = { };
319 :
320 : r2 = (u8x16) vdupq_lane_u64 ((uint64x1_t) last_round, 0);
321 : r2 = vqtbl1q_u8 (r2, aese_prep_mask1);
322 : r2 = vaeseq_u8 (r2, z2);
323 : r2 ^= (u8x16) vdupq_n_u32 (rcon);
324 :
325 : r = (u8x8) vdup_laneq_u64 ((u64x2) r2, 0);
326 : r ^= rk[-3];
327 : r ^= vext_u8 (z, rk[-3], 4);
328 : rk[0] = r;
329 :
330 : r = rk[-2] ^ vext_u8 (r, z, 4);
331 : r ^= vext_u8 (z, r, 4);
332 : rk[1] = r;
333 :
334 : if (rcon == 0x80)
335 : return;
336 :
337 : r = rk[-1] ^ vext_u8 (r, z, 4);
338 : r ^= vext_u8 (z, r, 4);
339 : rk[2] = r;
340 : }
341 :
342 : static_always_inline void
343 : aes192_key_expand (u8x16 * ek, const u8x16u * k)
344 : {
345 : u8x8 *rk = (u8x8 *) ek;
346 : ek[0] = k[0];
347 : rk[2] = *(u8x8u *) (k + 1);
348 : aes192_key_expand_round_neon (rk + 3, 0x01);
349 : aes192_key_expand_round_neon (rk + 6, 0x02);
350 : aes192_key_expand_round_neon (rk + 9, 0x04);
351 : aes192_key_expand_round_neon (rk + 12, 0x08);
352 : aes192_key_expand_round_neon (rk + 15, 0x10);
353 : aes192_key_expand_round_neon (rk + 18, 0x20);
354 : aes192_key_expand_round_neon (rk + 21, 0x40);
355 : aes192_key_expand_round_neon (rk + 24, 0x80);
356 : }
357 :
358 :
359 : static_always_inline void
360 : aes256_key_expand_round_neon (u8x16 * rk, u32 rcon)
361 : {
362 : u8x16 r, t, z = { };
363 :
364 : r = vqtbl1q_u8 (rk[-1], rcon ? aese_prep_mask1 : aese_prep_mask2);
365 : r = vaeseq_u8 (r, z);
366 : if (rcon)
367 : r ^= (u8x16) vdupq_n_u32 (rcon);
368 : r ^= rk[-2];
369 : r ^= t = vextq_u8 (z, rk[-2], 12);
370 : r ^= t = vextq_u8 (z, t, 12);
371 : r ^= vextq_u8 (z, t, 12);
372 : rk[0] = r;
373 : }
374 :
375 : static_always_inline void
376 : aes256_key_expand (u8x16 *rk, u8x16u const *k)
377 : {
378 : rk[0] = k[0];
379 : rk[1] = k[1];
380 : aes256_key_expand_round_neon (rk + 2, 0x01);
381 : aes256_key_expand_round_neon (rk + 3, 0);
382 : aes256_key_expand_round_neon (rk + 4, 0x02);
383 : aes256_key_expand_round_neon (rk + 5, 0);
384 : aes256_key_expand_round_neon (rk + 6, 0x04);
385 : aes256_key_expand_round_neon (rk + 7, 0);
386 : aes256_key_expand_round_neon (rk + 8, 0x08);
387 : aes256_key_expand_round_neon (rk + 9, 0);
388 : aes256_key_expand_round_neon (rk + 10, 0x10);
389 : aes256_key_expand_round_neon (rk + 11, 0);
390 : aes256_key_expand_round_neon (rk + 12, 0x20);
391 : aes256_key_expand_round_neon (rk + 13, 0);
392 : aes256_key_expand_round_neon (rk + 14, 0x40);
393 : }
394 :
395 : #endif
396 :
397 : static_always_inline void
398 103624 : aes_key_expand (u8x16 * key_schedule, u8 const *key, aes_key_size_t ks)
399 : {
400 103624 : switch (ks)
401 : {
402 2086 : case AES_KEY_128:
403 2086 : aes128_key_expand (key_schedule, (u8x16u const *) key);
404 2086 : break;
405 1168 : case AES_KEY_192:
406 1168 : aes192_key_expand (key_schedule, (u8x16u const *) key);
407 1168 : break;
408 100370 : case AES_KEY_256:
409 100370 : aes256_key_expand (key_schedule, (u8x16u const *) key);
410 100370 : break;
411 : }
412 103624 : }
413 :
414 : static_always_inline void
415 2688 : aes_key_enc_to_dec (u8x16 * ke, u8x16 * kd, aes_key_size_t ks)
416 : {
417 2688 : int rounds = AES_KEY_ROUNDS (ks);
418 :
419 2688 : kd[rounds] = ke[0];
420 2688 : kd[0] = ke[rounds];
421 :
422 15310 : for (int i = 1; i < (rounds / 2); i++)
423 : {
424 12622 : kd[rounds - i] = aes_inv_mix_column (ke[i]);
425 12622 : kd[i] = aes_inv_mix_column (ke[rounds - i]);
426 : }
427 :
428 2688 : kd[rounds / 2] = aes_inv_mix_column (ke[rounds / 2]);
429 2688 : }
430 :
431 : #endif /* __aesni_h__ */
432 :
433 : /*
434 : * fd.io coding-style-patch-verification: ON
435 : *
436 : * Local Variables:
437 : * eval: (c-set-style "gnu")
438 : * End:
439 : */
|