Line data Source code
1 : /* SPDX-License-Identifier: Apache-2.0
2 : * Copyright(c) 2023 Cisco Systems, Inc.
3 : */
4 :
5 : #ifndef __crypto_aes_gcm_h__
6 : #define __crypto_aes_gcm_h__
7 :
8 : #include <vppinfra/clib.h>
9 : #include <vppinfra/vector.h>
10 : #include <vppinfra/cache.h>
11 : #include <vppinfra/string.h>
12 : #include <vppinfra/crypto/aes.h>
13 : #include <vppinfra/crypto/ghash.h>
14 :
15 : #define NUM_HI 36
16 : #if defined(__VAES__) && defined(__AVX512F__)
17 : typedef u8x64 aes_data_t;
18 : typedef u8x64u aes_ghash_t;
19 : typedef u8x64u aes_mem_t;
20 : typedef u32x16 aes_gcm_counter_t;
21 : #define N 64
22 : #define aes_gcm_load_partial(p, n) u8x64_load_partial ((u8 *) (p), n)
23 : #define aes_gcm_store_partial(v, p, n) u8x64_store_partial (v, (u8 *) (p), n)
24 : #define aes_gcm_splat(v) u8x64_splat (v)
25 : #define aes_gcm_reflect(r) u8x64_reflect_u8x16 (r)
26 : #define aes_gcm_ghash_reduce(c) ghash4_reduce (&(c)->gd)
27 : #define aes_gcm_ghash_reduce2(c) ghash4_reduce2 (&(c)->gd)
28 : #define aes_gcm_ghash_final(c) (c)->T = ghash4_final (&(c)->gd)
29 : #elif defined(__VAES__)
30 : typedef u8x32 aes_data_t;
31 : typedef u8x32u aes_ghash_t;
32 : typedef u8x32u aes_mem_t;
33 : typedef u32x8 aes_gcm_counter_t;
34 : #define N 32
35 : #define aes_gcm_load_partial(p, n) u8x32_load_partial ((u8 *) (p), n)
36 : #define aes_gcm_store_partial(v, p, n) u8x32_store_partial (v, (u8 *) (p), n)
37 : #define aes_gcm_splat(v) u8x32_splat (v)
38 : #define aes_gcm_reflect(r) u8x32_reflect_u8x16 (r)
39 : #define aes_gcm_ghash_reduce(c) ghash2_reduce (&(c)->gd)
40 : #define aes_gcm_ghash_reduce2(c) ghash2_reduce2 (&(c)->gd)
41 : #define aes_gcm_ghash_final(c) (c)->T = ghash2_final (&(c)->gd)
42 : #else
43 : typedef u8x16 aes_data_t;
44 : typedef u8x16 aes_ghash_t;
45 : typedef u8x16u aes_mem_t;
46 : typedef u32x4 aes_gcm_counter_t;
47 : #define N 16
48 : #define aes_gcm_load_partial(p, n) u8x16_load_partial ((u8 *) (p), n)
49 : #define aes_gcm_store_partial(v, p, n) u8x16_store_partial (v, (u8 *) (p), n)
50 : #define aes_gcm_splat(v) u8x16_splat (v)
51 : #define aes_gcm_reflect(r) u8x16_reflect (r)
52 : #define aes_gcm_ghash_reduce(c) ghash_reduce (&(c)->gd)
53 : #define aes_gcm_ghash_reduce2(c) ghash_reduce2 (&(c)->gd)
54 : #define aes_gcm_ghash_final(c) (c)->T = ghash_final (&(c)->gd)
55 : #endif
56 : #define N_LANES (N / 16)
57 :
58 : typedef enum
59 : {
60 : AES_GCM_OP_UNKNONW = 0,
61 : AES_GCM_OP_ENCRYPT,
62 : AES_GCM_OP_DECRYPT,
63 : AES_GCM_OP_GMAC
64 : } aes_gcm_op_t;
65 :
66 : typedef union
67 : {
68 : u8x16 x1;
69 : u8x32 x2;
70 : u8x64 x4;
71 : u8x16 lanes[4];
72 : } __clib_aligned (64)
73 : aes_gcm_expaned_key_t;
74 :
75 : typedef struct
76 : {
77 : /* pre-calculated hash key values */
78 : const u8x16 Hi[NUM_HI];
79 : /* extracted AES key */
80 : const aes_gcm_expaned_key_t Ke[AES_KEY_ROUNDS (AES_KEY_256) + 1];
81 : } aes_gcm_key_data_t;
82 :
83 : typedef struct
84 : {
85 : aes_gcm_op_t operation;
86 : int last;
87 : u8 rounds;
88 : uword data_bytes;
89 : uword aad_bytes;
90 :
91 : u8x16 T;
92 :
93 : /* hash */
94 : const u8x16 *Hi;
95 : const aes_ghash_t *next_Hi;
96 :
97 : /* expaded keys */
98 : const aes_gcm_expaned_key_t *Ke;
99 :
100 : /* counter */
101 : u32 counter;
102 : u8x16 EY0;
103 : aes_gcm_counter_t Y;
104 :
105 : /* ghash */
106 : ghash_ctx_t gd;
107 : } aes_gcm_ctx_t;
108 :
109 : static_always_inline u8x16
110 141039 : aes_gcm_final_block (aes_gcm_ctx_t *ctx)
111 : {
112 141039 : return (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3);
113 : }
114 :
115 : static_always_inline void
116 1240480 : aes_gcm_ghash_mul_first (aes_gcm_ctx_t *ctx, aes_data_t data, u32 n_lanes)
117 : {
118 1240480 : uword hash_offset = NUM_HI - n_lanes;
119 1240480 : ctx->next_Hi = (aes_ghash_t *) (ctx->Hi + hash_offset);
120 : #if N_LANES == 4
121 0 : u8x64 tag4 = {};
122 0 : tag4 = u8x64_insert_u8x16 (tag4, ctx->T, 0);
123 0 : ghash4_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ tag4, *ctx->next_Hi++);
124 : #elif N_LANES == 2
125 : u8x32 tag2 = {};
126 : tag2 = u8x32_insert_lo (tag2, ctx->T);
127 : ghash2_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ tag2, *ctx->next_Hi++);
128 : #else
129 1240480 : ghash_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ ctx->T, *ctx->next_Hi++);
130 : #endif
131 1240480 : }
132 :
133 : static_always_inline void
134 6552240 : aes_gcm_ghash_mul_next (aes_gcm_ctx_t *ctx, aes_data_t data)
135 : {
136 : #if N_LANES == 4
137 0 : ghash4_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
138 : #elif N_LANES == 2
139 : ghash2_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
140 : #else
141 6552240 : ghash_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
142 : #endif
143 6552240 : }
144 :
145 : static_always_inline void
146 71244 : aes_gcm_ghash_mul_final_block (aes_gcm_ctx_t *ctx)
147 : {
148 : #if N_LANES == 4
149 0 : u8x64 h = u8x64_insert_u8x16 (u8x64_zero (), ctx->Hi[NUM_HI - 1], 0);
150 0 : u8x64 r4 = u8x64_insert_u8x16 (u8x64_zero (), aes_gcm_final_block (ctx), 0);
151 0 : ghash4_mul_next (&ctx->gd, r4, h);
152 : #elif N_LANES == 2
153 : u8x32 h = u8x32_insert_lo (u8x32_zero (), ctx->Hi[NUM_HI - 1]);
154 : u8x32 r2 = u8x32_insert_lo (u8x32_zero (), aes_gcm_final_block (ctx));
155 : ghash2_mul_next (&ctx->gd, r2, h);
156 : #else
157 71244 : ghash_mul_next (&ctx->gd, aes_gcm_final_block (ctx), ctx->Hi[NUM_HI - 1]);
158 : #endif
159 71244 : }
160 :
161 : static_always_inline void
162 2037340 : aes_gcm_enc_ctr0_round (aes_gcm_ctx_t *ctx, int aes_round)
163 : {
164 2037340 : if (aes_round == 0)
165 141041 : ctx->EY0 ^= ctx->Ke[0].x1;
166 1896300 : else if (aes_round == ctx->rounds)
167 141041 : ctx->EY0 = aes_enc_last_round (ctx->EY0, ctx->Ke[aes_round].x1);
168 : else
169 1755260 : ctx->EY0 = aes_enc_round (ctx->EY0, ctx->Ke[aes_round].x1);
170 2037340 : }
171 :
172 : static_always_inline void
173 141041 : aes_gcm_ghash (aes_gcm_ctx_t *ctx, u8 *data, u32 n_left)
174 : {
175 : uword i;
176 141041 : aes_data_t r = {};
177 141041 : const aes_mem_t *d = (aes_mem_t *) data;
178 :
179 141041 : for (; n_left >= 8 * N; n_left -= 8 * N, d += 8)
180 : {
181 0 : if (ctx->operation == AES_GCM_OP_GMAC && n_left == N * 8)
182 : {
183 0 : aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_LANES + 1);
184 0 : for (i = 1; i < 8; i++)
185 0 : aes_gcm_ghash_mul_next (ctx, d[i]);
186 0 : aes_gcm_ghash_mul_final_block (ctx);
187 0 : aes_gcm_ghash_reduce (ctx);
188 0 : aes_gcm_ghash_reduce2 (ctx);
189 0 : aes_gcm_ghash_final (ctx);
190 0 : goto done;
191 : }
192 :
193 0 : aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_LANES);
194 0 : for (i = 1; i < 8; i++)
195 0 : aes_gcm_ghash_mul_next (ctx, d[i]);
196 0 : aes_gcm_ghash_reduce (ctx);
197 0 : aes_gcm_ghash_reduce2 (ctx);
198 0 : aes_gcm_ghash_final (ctx);
199 : }
200 :
201 141041 : if (n_left > 0)
202 : {
203 141029 : int n_lanes = (n_left + 15) / 16;
204 :
205 141029 : if (ctx->operation == AES_GCM_OP_GMAC)
206 0 : n_lanes++;
207 :
208 141029 : if (n_left < N)
209 : {
210 140880 : clib_memcpy_fast (&r, d, n_left);
211 140880 : aes_gcm_ghash_mul_first (ctx, r, n_lanes);
212 : }
213 : else
214 : {
215 149 : aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
216 149 : n_left -= N;
217 149 : i = 1;
218 :
219 149 : if (n_left >= 4 * N)
220 : {
221 0 : aes_gcm_ghash_mul_next (ctx, d[i]);
222 0 : aes_gcm_ghash_mul_next (ctx, d[i + 1]);
223 0 : aes_gcm_ghash_mul_next (ctx, d[i + 2]);
224 0 : aes_gcm_ghash_mul_next (ctx, d[i + 3]);
225 0 : n_left -= 4 * N;
226 0 : i += 4;
227 : }
228 149 : if (n_left >= 2 * N)
229 : {
230 0 : aes_gcm_ghash_mul_next (ctx, d[i]);
231 0 : aes_gcm_ghash_mul_next (ctx, d[i + 1]);
232 0 : n_left -= 2 * N;
233 0 : i += 2;
234 : }
235 :
236 149 : if (n_left >= N)
237 : {
238 0 : aes_gcm_ghash_mul_next (ctx, d[i]);
239 0 : n_left -= N;
240 0 : i += 1;
241 : }
242 :
243 149 : if (n_left)
244 : {
245 149 : clib_memcpy_fast (&r, d + i, n_left);
246 149 : aes_gcm_ghash_mul_next (ctx, r);
247 : }
248 : }
249 :
250 141029 : if (ctx->operation == AES_GCM_OP_GMAC)
251 0 : aes_gcm_ghash_mul_final_block (ctx);
252 141029 : aes_gcm_ghash_reduce (ctx);
253 141029 : aes_gcm_ghash_reduce2 (ctx);
254 141029 : aes_gcm_ghash_final (ctx);
255 : }
256 12 : else if (ctx->operation == AES_GCM_OP_GMAC)
257 0 : ctx->T =
258 0 : ghash_mul (aes_gcm_final_block (ctx) ^ ctx->T, ctx->Hi[NUM_HI - 1]);
259 :
260 12 : done:
261 : /* encrypt counter 0 E(Y0, k) */
262 141041 : if (ctx->operation == AES_GCM_OP_GMAC)
263 0 : for (int i = 0; i < ctx->rounds + 1; i += 1)
264 0 : aes_gcm_enc_ctr0_round (ctx, i);
265 141041 : }
266 :
267 : static_always_inline void
268 1958620 : aes_gcm_enc_first_round (aes_gcm_ctx_t *ctx, aes_data_t *r, uword n_blocks)
269 : {
270 1958620 : const aes_gcm_expaned_key_t Ke0 = ctx->Ke[0];
271 1958620 : uword i = 0;
272 :
273 : /* As counter is stored in network byte order for performance reasons we
274 : are incrementing least significant byte only except in case where we
275 : overlow. As we are processing four 128, 256 or 512-blocks in parallel
276 : except the last round, overflow can happen only when n_blocks == 4 */
277 :
278 : #if N_LANES == 4
279 0 : const u32x16 ctr_inv_4444 = { 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24,
280 : 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24 };
281 :
282 0 : const u32x16 ctr_4444 = {
283 : 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0,
284 : };
285 :
286 0 : if (n_blocks == 4)
287 0 : for (; i < 2; i++)
288 : {
289 0 : r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
290 0 : ctx->Y += ctr_inv_4444;
291 : }
292 :
293 0 : if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 242))
294 0 : {
295 0 : u32x16 Yr = (u32x16) aes_gcm_reflect ((u8x64) ctx->Y);
296 :
297 0 : for (; i < n_blocks; i++)
298 : {
299 0 : r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
300 0 : Yr += ctr_4444;
301 0 : ctx->Y = (u32x16) aes_gcm_reflect ((u8x64) Yr);
302 : }
303 : }
304 : else
305 : {
306 0 : for (; i < n_blocks; i++)
307 : {
308 0 : r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
309 0 : ctx->Y += ctr_inv_4444;
310 : }
311 : }
312 0 : ctx->counter += n_blocks * 4;
313 : #elif N_LANES == 2
314 : const u32x8 ctr_inv_22 = { 0, 0, 0, 2 << 24, 0, 0, 0, 2 << 24 };
315 : const u32x8 ctr_22 = { 2, 0, 0, 0, 2, 0, 0, 0 };
316 :
317 : if (n_blocks == 4)
318 : for (; i < 2; i++)
319 : {
320 : r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
321 : ctx->Y += ctr_inv_22;
322 : }
323 :
324 : if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 250))
325 : {
326 : u32x8 Yr = (u32x8) aes_gcm_reflect ((u8x32) ctx->Y);
327 :
328 : for (; i < n_blocks; i++)
329 : {
330 : r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
331 : Yr += ctr_22;
332 : ctx->Y = (u32x8) aes_gcm_reflect ((u8x32) Yr);
333 : }
334 : }
335 : else
336 : {
337 : for (; i < n_blocks; i++)
338 : {
339 : r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
340 : ctx->Y += ctr_inv_22;
341 : }
342 : }
343 : ctx->counter += n_blocks * 2;
344 : #else
345 1958620 : const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
346 :
347 1958620 : if (PREDICT_TRUE ((u8) ctx->counter < 0xfe) || n_blocks < 3)
348 : {
349 9610160 : for (; i < n_blocks; i++)
350 : {
351 7651540 : r[i] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
352 7651540 : ctx->Y += ctr_inv_1;
353 : }
354 1958620 : ctx->counter += n_blocks;
355 : }
356 : else
357 : {
358 0 : r[i++] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
359 0 : ctx->Y += ctr_inv_1;
360 0 : ctx->counter += 1;
361 :
362 0 : for (; i < n_blocks; i++)
363 : {
364 0 : r[i] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
365 0 : ctx->counter++;
366 0 : ctx->Y[3] = clib_host_to_net_u32 (ctx->counter);
367 : }
368 : }
369 : #endif
370 1958620 : }
371 :
372 : static_always_inline void
373 25228000 : aes_gcm_enc_round (aes_data_t *r, const aes_gcm_expaned_key_t *Ke,
374 : uword n_blocks)
375 : {
376 123889000 : for (int i = 0; i < n_blocks; i++)
377 : #if N_LANES == 4
378 0 : r[i] = aes_enc_round_x4 (r[i], Ke->x4);
379 : #elif N_LANES == 2
380 : r[i] = aes_enc_round_x2 (r[i], Ke->x2);
381 : #else
382 98660900 : r[i] = aes_enc_round (r[i], Ke->x1);
383 : #endif
384 25228000 : }
385 :
386 : static_always_inline void
387 1958620 : aes_gcm_enc_last_round (aes_gcm_ctx_t *ctx, aes_data_t *r, aes_data_t *d,
388 : const aes_gcm_expaned_key_t *Ke, uword n_blocks)
389 : {
390 : /* additional ronuds for AES-192 and AES-256 */
391 9559040 : for (int i = 10; i < ctx->rounds; i++)
392 7600420 : aes_gcm_enc_round (r, Ke + i, n_blocks);
393 :
394 9610160 : for (int i = 0; i < n_blocks; i++)
395 : #if N_LANES == 4
396 0 : d[i] ^= aes_enc_last_round_x4 (r[i], Ke[ctx->rounds].x4);
397 : #elif N_LANES == 2
398 : d[i] ^= aes_enc_last_round_x2 (r[i], Ke[ctx->rounds].x2);
399 : #else
400 7651540 : d[i] ^= aes_enc_last_round (r[i], Ke[ctx->rounds].x1);
401 : #endif
402 1958620 : }
403 :
404 : static_always_inline void
405 240287 : aes_gcm_calc (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst, u32 n,
406 : u32 n_bytes, int with_ghash)
407 : {
408 240287 : const aes_gcm_expaned_key_t *k = ctx->Ke;
409 240287 : const aes_mem_t *sv = (aes_mem_t *) src;
410 240287 : aes_mem_t *dv = (aes_mem_t *) dst;
411 240287 : uword ghash_blocks, gc = 1;
412 : aes_data_t r[4];
413 : u32 i, n_lanes;
414 :
415 240287 : if (ctx->operation == AES_GCM_OP_ENCRYPT)
416 : {
417 122814 : ghash_blocks = 4;
418 122814 : n_lanes = N_LANES * 4;
419 : }
420 : else
421 : {
422 117473 : ghash_blocks = n;
423 117473 : n_lanes = n * N_LANES;
424 : #if N_LANES != 1
425 0 : if (ctx->last)
426 0 : n_lanes = (n_bytes + 15) / 16;
427 : #endif
428 : }
429 :
430 240287 : n_bytes -= (n - 1) * N;
431 :
432 : /* AES rounds 0 and 1 */
433 240287 : aes_gcm_enc_first_round (ctx, r, n);
434 240287 : aes_gcm_enc_round (r, k + 1, n);
435 :
436 : /* load data - decrypt round */
437 240287 : if (ctx->operation == AES_GCM_OP_DECRYPT)
438 : {
439 431467 : for (i = 0; i < n - ctx->last; i++)
440 313994 : d[i] = sv[i];
441 :
442 117473 : if (ctx->last)
443 64964 : d[n - 1] = aes_gcm_load_partial ((u8 *) (sv + n - 1), n_bytes);
444 : }
445 :
446 : /* GHASH multiply block 0 */
447 240287 : if (with_ghash)
448 169043 : aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
449 :
450 : /* AES rounds 2 and 3 */
451 240287 : aes_gcm_enc_round (r, k + 2, n);
452 240287 : aes_gcm_enc_round (r, k + 3, n);
453 :
454 : /* GHASH multiply block 1 */
455 240287 : if (with_ghash && gc++ < ghash_blocks)
456 154535 : aes_gcm_ghash_mul_next (ctx, (d[1]));
457 :
458 : /* AES rounds 4 and 5 */
459 240287 : aes_gcm_enc_round (r, k + 4, n);
460 240287 : aes_gcm_enc_round (r, k + 5, n);
461 :
462 : /* GHASH multiply block 2 */
463 240287 : if (with_ghash && gc++ < ghash_blocks)
464 153479 : aes_gcm_ghash_mul_next (ctx, (d[2]));
465 :
466 : /* AES rounds 6 and 7 */
467 240287 : aes_gcm_enc_round (r, k + 6, n);
468 240287 : aes_gcm_enc_round (r, k + 7, n);
469 :
470 : /* GHASH multiply block 3 */
471 240287 : if (with_ghash && gc++ < ghash_blocks)
472 108181 : aes_gcm_ghash_mul_next (ctx, (d[3]));
473 :
474 : /* load 4 blocks of data - decrypt round */
475 240287 : if (ctx->operation == AES_GCM_OP_ENCRYPT)
476 : {
477 455649 : for (i = 0; i < n - ctx->last; i++)
478 332835 : d[i] = sv[i];
479 :
480 122814 : if (ctx->last)
481 66415 : d[n - 1] = aes_gcm_load_partial (sv + n - 1, n_bytes);
482 : }
483 :
484 : /* AES rounds 8 and 9 */
485 240287 : aes_gcm_enc_round (r, k + 8, n);
486 240287 : aes_gcm_enc_round (r, k + 9, n);
487 :
488 : /* AES last round(s) */
489 240287 : aes_gcm_enc_last_round (ctx, r, d, k, n);
490 :
491 : /* store data */
492 887116 : for (i = 0; i < n - ctx->last; i++)
493 646829 : dv[i] = d[i];
494 :
495 240287 : if (ctx->last)
496 131379 : aes_gcm_store_partial (d[n - 1], dv + n - 1, n_bytes);
497 :
498 : /* GHASH reduce 1st step */
499 240287 : aes_gcm_ghash_reduce (ctx);
500 :
501 : /* GHASH reduce 2nd step */
502 240287 : if (with_ghash)
503 169043 : aes_gcm_ghash_reduce2 (ctx);
504 :
505 : /* GHASH final step */
506 240287 : if (with_ghash)
507 169043 : aes_gcm_ghash_final (ctx);
508 240287 : }
509 :
510 : static_always_inline void
511 859167 : aes_gcm_calc_double (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst)
512 : {
513 859167 : const aes_gcm_expaned_key_t *k = ctx->Ke;
514 859167 : const aes_mem_t *sv = (aes_mem_t *) src;
515 859167 : aes_mem_t *dv = (aes_mem_t *) dst;
516 : aes_data_t r[4];
517 :
518 : /* AES rounds 0 and 1 */
519 859167 : aes_gcm_enc_first_round (ctx, r, 4);
520 859167 : aes_gcm_enc_round (r, k + 1, 4);
521 :
522 : /* load 4 blocks of data - decrypt round */
523 859167 : if (ctx->operation == AES_GCM_OP_DECRYPT)
524 2108580 : for (int i = 0; i < 4; i++)
525 1686860 : d[i] = sv[i];
526 :
527 : /* GHASH multiply block 0 */
528 859167 : aes_gcm_ghash_mul_first (ctx, d[0], N_LANES * 8);
529 :
530 : /* AES rounds 2 and 3 */
531 859167 : aes_gcm_enc_round (r, k + 2, 4);
532 859167 : aes_gcm_enc_round (r, k + 3, 4);
533 :
534 : /* GHASH multiply block 1 */
535 859167 : aes_gcm_ghash_mul_next (ctx, (d[1]));
536 :
537 : /* AES rounds 4 and 5 */
538 859167 : aes_gcm_enc_round (r, k + 4, 4);
539 859167 : aes_gcm_enc_round (r, k + 5, 4);
540 :
541 : /* GHASH multiply block 2 */
542 859167 : aes_gcm_ghash_mul_next (ctx, (d[2]));
543 :
544 : /* AES rounds 6 and 7 */
545 859167 : aes_gcm_enc_round (r, k + 6, 4);
546 859167 : aes_gcm_enc_round (r, k + 7, 4);
547 :
548 : /* GHASH multiply block 3 */
549 859167 : aes_gcm_ghash_mul_next (ctx, (d[3]));
550 :
551 : /* AES rounds 8 and 9 */
552 859167 : aes_gcm_enc_round (r, k + 8, 4);
553 859167 : aes_gcm_enc_round (r, k + 9, 4);
554 :
555 : /* load 4 blocks of data - encrypt round */
556 859167 : if (ctx->operation == AES_GCM_OP_ENCRYPT)
557 2187260 : for (int i = 0; i < 4; i++)
558 1749810 : d[i] = sv[i];
559 :
560 : /* AES last round(s) */
561 859167 : aes_gcm_enc_last_round (ctx, r, d, k, 4);
562 :
563 : /* store 4 blocks of data */
564 4295840 : for (int i = 0; i < 4; i++)
565 3436670 : dv[i] = d[i];
566 :
567 : /* load next 4 blocks of data data - decrypt round */
568 859167 : if (ctx->operation == AES_GCM_OP_DECRYPT)
569 2108580 : for (int i = 0; i < 4; i++)
570 1686860 : d[i] = sv[i + 4];
571 :
572 : /* GHASH multiply block 4 */
573 859167 : aes_gcm_ghash_mul_next (ctx, (d[0]));
574 :
575 : /* AES rounds 0 and 1 */
576 859167 : aes_gcm_enc_first_round (ctx, r, 4);
577 859167 : aes_gcm_enc_round (r, k + 1, 4);
578 :
579 : /* GHASH multiply block 5 */
580 859167 : aes_gcm_ghash_mul_next (ctx, (d[1]));
581 :
582 : /* AES rounds 2 and 3 */
583 859167 : aes_gcm_enc_round (r, k + 2, 4);
584 859167 : aes_gcm_enc_round (r, k + 3, 4);
585 :
586 : /* GHASH multiply block 6 */
587 859167 : aes_gcm_ghash_mul_next (ctx, (d[2]));
588 :
589 : /* AES rounds 4 and 5 */
590 859167 : aes_gcm_enc_round (r, k + 4, 4);
591 859167 : aes_gcm_enc_round (r, k + 5, 4);
592 :
593 : /* GHASH multiply block 7 */
594 859167 : aes_gcm_ghash_mul_next (ctx, (d[3]));
595 :
596 : /* AES rounds 6 and 7 */
597 859167 : aes_gcm_enc_round (r, k + 6, 4);
598 859167 : aes_gcm_enc_round (r, k + 7, 4);
599 :
600 : /* GHASH reduce 1st step */
601 859167 : aes_gcm_ghash_reduce (ctx);
602 :
603 : /* AES rounds 8 and 9 */
604 859167 : aes_gcm_enc_round (r, k + 8, 4);
605 859167 : aes_gcm_enc_round (r, k + 9, 4);
606 :
607 : /* GHASH reduce 2nd step */
608 859167 : aes_gcm_ghash_reduce2 (ctx);
609 :
610 : /* load 4 blocks of data - encrypt round */
611 859167 : if (ctx->operation == AES_GCM_OP_ENCRYPT)
612 2187260 : for (int i = 0; i < 4; i++)
613 1749810 : d[i] = sv[i + 4];
614 :
615 : /* AES last round(s) */
616 859167 : aes_gcm_enc_last_round (ctx, r, d, k, 4);
617 :
618 : /* store data */
619 4295840 : for (int i = 0; i < 4; i++)
620 3436670 : dv[i + 4] = d[i];
621 :
622 : /* GHASH final step */
623 859167 : aes_gcm_ghash_final (ctx);
624 859167 : }
625 :
626 : static_always_inline void
627 65188 : aes_gcm_mask_bytes (aes_data_t *d, uword n_bytes)
628 : {
629 : const union
630 : {
631 : u8 b[64];
632 : aes_data_t r;
633 : } scale = {
634 : .b = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
635 : 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
636 : 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
637 : 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 },
638 : };
639 :
640 65188 : d[0] &= (aes_gcm_splat (n_bytes) > scale.r);
641 65188 : }
642 :
643 : static_always_inline void
644 71244 : aes_gcm_calc_last (aes_gcm_ctx_t *ctx, aes_data_t *d, int n_blocks,
645 : u32 n_bytes)
646 : {
647 71244 : int n_lanes = (N_LANES == 1 ? n_blocks : (n_bytes + 15) / 16) + 1;
648 71244 : n_bytes -= (n_blocks - 1) * N;
649 : int i;
650 :
651 71244 : aes_gcm_enc_ctr0_round (ctx, 0);
652 71244 : aes_gcm_enc_ctr0_round (ctx, 1);
653 :
654 71244 : if (n_bytes != N)
655 65188 : aes_gcm_mask_bytes (d + n_blocks - 1, n_bytes);
656 :
657 71244 : aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
658 :
659 71244 : aes_gcm_enc_ctr0_round (ctx, 2);
660 71244 : aes_gcm_enc_ctr0_round (ctx, 3);
661 :
662 71244 : if (n_blocks > 1)
663 56441 : aes_gcm_ghash_mul_next (ctx, d[1]);
664 :
665 71244 : aes_gcm_enc_ctr0_round (ctx, 4);
666 71244 : aes_gcm_enc_ctr0_round (ctx, 5);
667 :
668 71244 : if (n_blocks > 2)
669 55313 : aes_gcm_ghash_mul_next (ctx, d[2]);
670 :
671 71244 : aes_gcm_enc_ctr0_round (ctx, 6);
672 71244 : aes_gcm_enc_ctr0_round (ctx, 7);
673 :
674 71244 : if (n_blocks > 3)
675 9972 : aes_gcm_ghash_mul_next (ctx, d[3]);
676 :
677 71244 : aes_gcm_enc_ctr0_round (ctx, 8);
678 71244 : aes_gcm_enc_ctr0_round (ctx, 9);
679 :
680 71244 : aes_gcm_ghash_mul_final_block (ctx);
681 71244 : aes_gcm_ghash_reduce (ctx);
682 :
683 316782 : for (i = 10; i < ctx->rounds; i++)
684 245538 : aes_gcm_enc_ctr0_round (ctx, i);
685 :
686 71244 : aes_gcm_ghash_reduce2 (ctx);
687 :
688 71244 : aes_gcm_ghash_final (ctx);
689 :
690 71244 : aes_gcm_enc_ctr0_round (ctx, i);
691 71244 : }
692 :
693 : static_always_inline void
694 71246 : aes_gcm_enc (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, u32 n_left)
695 : {
696 : aes_data_t d[4];
697 :
698 71246 : if (PREDICT_FALSE (n_left == 0))
699 : {
700 : int i;
701 28 : for (i = 0; i < ctx->rounds + 1; i++)
702 26 : aes_gcm_enc_ctr0_round (ctx, i);
703 20677 : return;
704 : }
705 :
706 71244 : if (n_left < 4 * N)
707 : {
708 15846 : ctx->last = 1;
709 15846 : if (n_left > 3 * N)
710 : {
711 1631 : aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 0);
712 1631 : aes_gcm_calc_last (ctx, d, 4, n_left);
713 : }
714 14215 : else if (n_left > 2 * N)
715 : {
716 30 : aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 0);
717 30 : aes_gcm_calc_last (ctx, d, 3, n_left);
718 : }
719 14185 : else if (n_left > N)
720 : {
721 374 : aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 0);
722 374 : aes_gcm_calc_last (ctx, d, 2, n_left);
723 : }
724 : else
725 : {
726 13811 : aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 0);
727 13811 : aes_gcm_calc_last (ctx, d, 1, n_left);
728 : }
729 15846 : return;
730 : }
731 :
732 55398 : aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 0);
733 :
734 : /* next */
735 55398 : n_left -= 4 * N;
736 55398 : dst += 4 * N;
737 55398 : src += 4 * N;
738 :
739 492850 : for (; n_left >= 8 * N; n_left -= 8 * N, src += 8 * N, dst += 8 * N)
740 437452 : aes_gcm_calc_double (ctx, d, src, dst);
741 :
742 55398 : if (n_left >= 4 * N)
743 : {
744 1001 : aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 1);
745 :
746 : /* next */
747 1001 : n_left -= 4 * N;
748 1001 : dst += 4 * N;
749 1001 : src += 4 * N;
750 : }
751 :
752 55398 : if (n_left == 0)
753 : {
754 4829 : aes_gcm_calc_last (ctx, d, 4, 4 * N);
755 4829 : return;
756 : }
757 :
758 50569 : ctx->last = 1;
759 :
760 50569 : if (n_left > 3 * N)
761 : {
762 3512 : aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
763 3512 : aes_gcm_calc_last (ctx, d, 4, n_left);
764 : }
765 47057 : else if (n_left > 2 * N)
766 : {
767 45311 : aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
768 45311 : aes_gcm_calc_last (ctx, d, 3, n_left);
769 : }
770 1746 : else if (n_left > N)
771 : {
772 754 : aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
773 754 : aes_gcm_calc_last (ctx, d, 2, n_left);
774 : }
775 : else
776 : {
777 992 : aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
778 992 : aes_gcm_calc_last (ctx, d, 1, n_left);
779 : }
780 : }
781 :
782 : static_always_inline void
783 69795 : aes_gcm_dec (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, uword n_left)
784 : {
785 69795 : aes_data_t d[4] = {};
786 : ghash_ctx_t gd;
787 :
788 : /* main encryption loop */
789 491510 : for (; n_left >= 8 * N; n_left -= 8 * N, dst += 8 * N, src += 8 * N)
790 421715 : aes_gcm_calc_double (ctx, d, src, dst);
791 :
792 69795 : if (n_left >= 4 * N)
793 : {
794 52509 : aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 1);
795 :
796 : /* next */
797 52509 : n_left -= 4 * N;
798 52509 : dst += N * 4;
799 52509 : src += N * 4;
800 : }
801 :
802 69795 : if (n_left)
803 : {
804 64964 : ctx->last = 1;
805 :
806 64964 : if (n_left > 3 * N)
807 4102 : aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
808 60862 : else if (n_left > 2 * N)
809 45298 : aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
810 15564 : else if (n_left > N)
811 1056 : aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
812 : else
813 14508 : aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
814 : }
815 :
816 : /* interleaved counter 0 encryption E(Y0, k) and ghash of final GCM
817 : * (bit length) block */
818 :
819 69795 : aes_gcm_enc_ctr0_round (ctx, 0);
820 69795 : aes_gcm_enc_ctr0_round (ctx, 1);
821 :
822 69795 : ghash_mul_first (&gd, aes_gcm_final_block (ctx) ^ ctx->T,
823 69795 : ctx->Hi[NUM_HI - 1]);
824 :
825 69795 : aes_gcm_enc_ctr0_round (ctx, 2);
826 69795 : aes_gcm_enc_ctr0_round (ctx, 3);
827 :
828 69795 : ghash_reduce (&gd);
829 :
830 69795 : aes_gcm_enc_ctr0_round (ctx, 4);
831 69795 : aes_gcm_enc_ctr0_round (ctx, 5);
832 :
833 69795 : ghash_reduce2 (&gd);
834 :
835 69795 : aes_gcm_enc_ctr0_round (ctx, 6);
836 69795 : aes_gcm_enc_ctr0_round (ctx, 7);
837 :
838 69795 : ctx->T = ghash_final (&gd);
839 :
840 69795 : aes_gcm_enc_ctr0_round (ctx, 8);
841 69795 : aes_gcm_enc_ctr0_round (ctx, 9);
842 :
843 379940 : for (int i = 10; i < ctx->rounds + 1; i += 1)
844 310145 : aes_gcm_enc_ctr0_round (ctx, i);
845 69795 : }
846 :
847 : static_always_inline int
848 141041 : aes_gcm (const u8 *src, u8 *dst, const u8 *aad, u8 *ivp, u8 *tag,
849 : u32 data_bytes, u32 aad_bytes, u8 tag_len,
850 : const aes_gcm_key_data_t *kd, int aes_rounds, aes_gcm_op_t op)
851 : {
852 141041 : u8 *addt = (u8 *) aad;
853 : u32x4 Y0;
854 :
855 141041 : aes_gcm_ctx_t _ctx = { .counter = 2,
856 : .rounds = aes_rounds,
857 : .operation = op,
858 : .data_bytes = data_bytes,
859 : .aad_bytes = aad_bytes,
860 141041 : .Ke = kd->Ke,
861 141041 : .Hi = kd->Hi },
862 141041 : *ctx = &_ctx;
863 :
864 : /* initalize counter */
865 141041 : Y0 = (u32x4) (u64x2){ *(u64u *) ivp, 0 };
866 141041 : Y0[2] = *(u32u *) (ivp + 8);
867 141041 : Y0[3] = 1 << 24;
868 141041 : ctx->EY0 = (u8x16) Y0;
869 :
870 : #if N_LANES == 4
871 0 : ctx->Y = u32x16_splat_u32x4 (Y0) + (u32x16){
872 : 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
873 : };
874 : #elif N_LANES == 2
875 : ctx->Y =
876 : u32x8_splat_u32x4 (Y0) + (u32x8){ 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24 };
877 : #else
878 141041 : ctx->Y = Y0 + (u32x4){ 0, 0, 0, 1 << 24 };
879 : #endif
880 :
881 : /* calculate ghash for AAD */
882 141041 : aes_gcm_ghash (ctx, addt, aad_bytes);
883 :
884 : /* ghash and encrypt/edcrypt */
885 141041 : if (op == AES_GCM_OP_ENCRYPT)
886 71246 : aes_gcm_enc (ctx, src, dst, data_bytes);
887 69795 : else if (op == AES_GCM_OP_DECRYPT)
888 69795 : aes_gcm_dec (ctx, src, dst, data_bytes);
889 :
890 : /* final tag is */
891 141041 : ctx->T = u8x16_reflect (ctx->T) ^ ctx->EY0;
892 :
893 : /* tag_len 16 -> 0 */
894 141041 : tag_len &= 0xf;
895 :
896 141041 : if (op == AES_GCM_OP_ENCRYPT || op == AES_GCM_OP_GMAC)
897 : {
898 : /* store tag */
899 71246 : if (tag_len)
900 0 : u8x16_store_partial (ctx->T, tag, tag_len);
901 : else
902 71246 : ((u8x16u *) tag)[0] = ctx->T;
903 : }
904 : else
905 : {
906 : /* check tag */
907 69795 : if (tag_len)
908 : {
909 0 : u16 mask = pow2_mask (tag_len);
910 0 : u8x16 expected = u8x16_load_partial (tag, tag_len);
911 0 : if ((u8x16_msb_mask (expected == ctx->T) & mask) == mask)
912 0 : return 1;
913 : }
914 : else
915 : {
916 69795 : if (u8x16_is_equal (ctx->T, *(u8x16u *) tag))
917 69447 : return 1;
918 : }
919 : }
920 71594 : return 0;
921 : }
922 :
923 : static_always_inline void
924 103154 : clib_aes_gcm_key_expand (aes_gcm_key_data_t *kd, const u8 *key,
925 : aes_key_size_t ks)
926 : {
927 : u8x16 H;
928 : u8x16 ek[AES_KEY_ROUNDS (AES_KEY_256) + 1];
929 103154 : aes_gcm_expaned_key_t *Ke = (aes_gcm_expaned_key_t *) kd->Ke;
930 :
931 : /* expand AES key */
932 103154 : aes_key_expand (ek, key, ks);
933 1645930 : for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
934 1542780 : Ke[i].lanes[0] = Ke[i].lanes[1] = Ke[i].lanes[2] = Ke[i].lanes[3] = ek[i];
935 :
936 : /* pre-calculate H */
937 103154 : H = aes_encrypt_block (u8x16_zero (), ek, ks);
938 103154 : H = u8x16_reflect (H);
939 103154 : ghash_precompute (H, (u8x16 *) kd->Hi, ARRAY_LEN (kd->Hi));
940 103154 : }
941 :
942 : static_always_inline void
943 : clib_aes128_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
944 : u32 data_bytes, const u8 *aad, u32 aad_bytes,
945 : const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
946 : {
947 : aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
948 : tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_ENCRYPT);
949 : }
950 :
951 : static_always_inline void
952 : clib_aes256_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
953 : u32 data_bytes, const u8 *aad, u32 aad_bytes,
954 : const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
955 : {
956 : aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
957 : tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_ENCRYPT);
958 : }
959 :
960 : static_always_inline int
961 : clib_aes128_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
962 : u32 data_bytes, const u8 *aad, u32 aad_bytes,
963 : const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
964 : {
965 : return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
966 : data_bytes, aad_bytes, tag_bytes, kd,
967 : AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_DECRYPT);
968 : }
969 :
970 : static_always_inline int
971 : clib_aes256_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
972 : u32 data_bytes, const u8 *aad, u32 aad_bytes,
973 : const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
974 : {
975 : return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
976 : data_bytes, aad_bytes, tag_bytes, kd,
977 : AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_DECRYPT);
978 : }
979 :
980 : static_always_inline void
981 : clib_aes128_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
982 : const u8 *iv, u32 tag_bytes, u8 *tag)
983 : {
984 : aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
985 : AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_GMAC);
986 : }
987 :
988 : static_always_inline void
989 : clib_aes256_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
990 : const u8 *iv, u32 tag_bytes, u8 *tag)
991 : {
992 : aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
993 : AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_GMAC);
994 : }
995 :
996 : #endif /* __crypto_aes_gcm_h__ */
|