luajitos

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

AES-128-GCM.c (13571B)


      1 /*
      2  * AES-128-GCM Implementation with Hardware Acceleration
      3  * Supports AES-NI and PCLMULQDQ for high performance
      4  * Conforms to NIST SP 800-38D
      5  */
      6 
      7 #include "AES-128-GCM.h"
      8 #include <stdio.h>
      9 #include <stdlib.h>
     10 #include <string.h>
     11 #include <stdint.h>
     12 #include <immintrin.h>
     13 #include <wmmintrin.h>
     14 
     15 // CPU feature detection
     16 #ifdef __GNUC__
     17 #include <cpuid.h>
     18 #endif
     19 
     20 // Feature flags (shared with AES-256-GCM)
     21 static int has_aesni = 0;
     22 static int has_pclmulqdq = 0;
     23 static int features_detected = 0;
     24 
     25 // Check CPU capabilities
     26 static void detect_cpu_features(void) {
     27     if (features_detected) return;
     28     features_detected = 1;
     29 #ifdef __GNUC__
     30     unsigned int eax, ebx, ecx, edx;
     31     if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
     32         has_aesni = (ecx & bit_AES) != 0;
     33         has_pclmulqdq = (ecx & bit_PCLMUL) != 0;
     34     }
     35 #endif
     36 }
     37 
     38 // Utility: Reverse bytes in __m128i (for GCM)
     39 static inline __m128i reverse_bytes(__m128i x) {
     40     const __m128i mask = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
     41     return _mm_shuffle_epi8(x, mask);
     42 }
     43 
     44 // AES-128 key expansion using AES-NI
     45 static void aes128_key_expansion(const uint8_t *key, aes128_key_schedule *ks) {
     46     __m128i temp1, temp2;
     47     __m128i *round_key = ks->round_keys;
     48 
     49     ks->nr = 10;  // AES-128 has 10 rounds
     50 
     51     // Load the key (16 bytes for AES-128)
     52     round_key[0] = _mm_loadu_si128((__m128i*)key);
     53 
     54     // Helper macro for key expansion
     55     #define AES_128_KEY_EXP(round_idx, temp1, temp2, rcon) do { \
     56         temp2 = _mm_aeskeygenassist_si128(temp1, rcon); \
     57         temp2 = _mm_shuffle_epi32(temp2, 0xff); \
     58         temp1 = _mm_xor_si128(temp1, _mm_slli_si128(temp1, 4)); \
     59         temp1 = _mm_xor_si128(temp1, _mm_slli_si128(temp1, 4)); \
     60         temp1 = _mm_xor_si128(temp1, _mm_slli_si128(temp1, 4)); \
     61         temp1 = _mm_xor_si128(temp1, temp2); \
     62         round_key[round_idx] = temp1; \
     63     } while(0)
     64 
     65     temp1 = round_key[0];
     66     AES_128_KEY_EXP(1, temp1, temp2, 0x01);
     67     AES_128_KEY_EXP(2, temp1, temp2, 0x02);
     68     AES_128_KEY_EXP(3, temp1, temp2, 0x04);
     69     AES_128_KEY_EXP(4, temp1, temp2, 0x08);
     70     AES_128_KEY_EXP(5, temp1, temp2, 0x10);
     71     AES_128_KEY_EXP(6, temp1, temp2, 0x20);
     72     AES_128_KEY_EXP(7, temp1, temp2, 0x40);
     73     AES_128_KEY_EXP(8, temp1, temp2, 0x80);
     74     AES_128_KEY_EXP(9, temp1, temp2, 0x1b);
     75     AES_128_KEY_EXP(10, temp1, temp2, 0x36);
     76 }
     77 
     78 // AES-128 single block encryption using AES-NI
     79 static inline __m128i aes128_encrypt_block(__m128i plaintext, const aes128_key_schedule *ks) {
     80     __m128i tmp = _mm_xor_si128(plaintext, ks->round_keys[0]);
     81 
     82     for (int i = 1; i < 10; i++) {
     83         tmp = _mm_aesenc_si128(tmp, ks->round_keys[i]);
     84     }
     85 
     86     tmp = _mm_aesenclast_si128(tmp, ks->round_keys[10]);
     87     return tmp;
     88 }
     89 
     90 // GHASH multiplication using PCLMULQDQ (same as AES-256-GCM)
     91 static inline __m128i gf_mul(__m128i a, __m128i b) {
     92     __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
     93     __m128i tmp8, tmp9, tmp10, tmp11, tmp12;
     94     __m128i XMMMASK = _mm_setr_epi32(0xffffffff, 0x0, 0x0, 0x0);
     95 
     96     tmp3 = _mm_clmulepi64_si128(a, b, 0x00);
     97     tmp6 = _mm_clmulepi64_si128(a, b, 0x11);
     98 
     99     tmp4 = _mm_shuffle_epi32(a, 78);
    100     tmp5 = _mm_shuffle_epi32(b, 78);
    101     tmp4 = _mm_xor_si128(tmp4, a);
    102     tmp5 = _mm_xor_si128(tmp5, b);
    103 
    104     tmp4 = _mm_clmulepi64_si128(tmp4, tmp5, 0x00);
    105     tmp4 = _mm_xor_si128(tmp4, tmp3);
    106     tmp4 = _mm_xor_si128(tmp4, tmp6);
    107 
    108     tmp5 = _mm_slli_si128(tmp4, 8);
    109     tmp4 = _mm_srli_si128(tmp4, 8);
    110     tmp3 = _mm_xor_si128(tmp3, tmp5);
    111     tmp6 = _mm_xor_si128(tmp6, tmp4);
    112 
    113     // Reduction
    114     tmp7 = _mm_srli_epi32(tmp3, 31);
    115     tmp8 = _mm_srli_epi32(tmp6, 31);
    116     tmp3 = _mm_slli_epi32(tmp3, 1);
    117     tmp6 = _mm_slli_epi32(tmp6, 1);
    118 
    119     tmp9 = _mm_srli_si128(tmp7, 12);
    120     tmp8 = _mm_slli_si128(tmp8, 4);
    121     tmp7 = _mm_slli_si128(tmp7, 4);
    122     tmp3 = _mm_or_si128(tmp3, tmp7);
    123     tmp6 = _mm_or_si128(tmp6, tmp8);
    124     tmp6 = _mm_or_si128(tmp6, tmp9);
    125 
    126     tmp7 = _mm_slli_epi32(tmp3, 31);
    127     tmp8 = _mm_slli_epi32(tmp3, 30);
    128     tmp9 = _mm_slli_epi32(tmp3, 25);
    129 
    130     tmp7 = _mm_xor_si128(tmp7, tmp8);
    131     tmp7 = _mm_xor_si128(tmp7, tmp9);
    132     tmp8 = _mm_srli_si128(tmp7, 4);
    133     tmp7 = _mm_slli_si128(tmp7, 12);
    134     tmp3 = _mm_xor_si128(tmp3, tmp7);
    135 
    136     tmp2 = _mm_srli_epi32(tmp3, 1);
    137     tmp4 = _mm_srli_epi32(tmp3, 2);
    138     tmp5 = _mm_srli_epi32(tmp3, 7);
    139     tmp2 = _mm_xor_si128(tmp2, tmp4);
    140     tmp2 = _mm_xor_si128(tmp2, tmp5);
    141     tmp2 = _mm_xor_si128(tmp2, tmp8);
    142     tmp3 = _mm_xor_si128(tmp3, tmp2);
    143     tmp6 = _mm_xor_si128(tmp6, tmp3);
    144 
    145     return tmp6;
    146 }
    147 
    148 // Initialize GCM context
    149 int aes128_gcm_init(aes128_gcm_context *ctx, const uint8_t *key) {
    150     detect_cpu_features();
    151 
    152     if (!has_aesni || !has_pclmulqdq) {
    153         fprintf(stderr, "Error: CPU does not support AES-NI and PCLMULQDQ\n");
    154         return -1;
    155     }
    156 
    157     // Expand the key
    158     aes128_key_expansion(key, &ctx->key_schedule);
    159 
    160     // Compute H = E(K, 0^128)
    161     __m128i zero = _mm_setzero_si128();
    162     ctx->H = aes128_encrypt_block(zero, &ctx->key_schedule);
    163     ctx->H = reverse_bytes(ctx->H);
    164 
    165     // Precompute powers of H for faster GHASH
    166     ctx->H_powers[0] = ctx->H;
    167     for (int i = 1; i < 8; i++) {
    168         ctx->H_powers[i] = gf_mul(ctx->H_powers[i-1], ctx->H);
    169     }
    170 
    171     return 0;
    172 }
    173 
    174 // GHASH computation (same as AES-256-GCM)
    175 static void ghash(const aes128_gcm_context *ctx, const uint8_t *aad, size_t aad_len,
    176                   const uint8_t *ciphertext, size_t ct_len, __m128i *tag) {
    177     __m128i hash = _mm_setzero_si128();
    178     size_t i;
    179 
    180     // Process AAD
    181     for (i = 0; i + 16 <= aad_len; i += 16) {
    182         __m128i block = _mm_loadu_si128((__m128i*)(aad + i));
    183         block = reverse_bytes(block);
    184         hash = _mm_xor_si128(hash, block);
    185         hash = gf_mul(hash, ctx->H);
    186     }
    187 
    188     // Handle remaining AAD bytes
    189     if (i < aad_len) {
    190         uint8_t temp[16] = {0};
    191         memcpy(temp, aad + i, aad_len - i);
    192         __m128i block = _mm_loadu_si128((__m128i*)temp);
    193         block = reverse_bytes(block);
    194         hash = _mm_xor_si128(hash, block);
    195         hash = gf_mul(hash, ctx->H);
    196     }
    197 
    198     // Process ciphertext
    199     for (i = 0; i + 16 <= ct_len; i += 16) {
    200         __m128i block = _mm_loadu_si128((__m128i*)(ciphertext + i));
    201         block = reverse_bytes(block);
    202         hash = _mm_xor_si128(hash, block);
    203         hash = gf_mul(hash, ctx->H);
    204     }
    205 
    206     // Handle remaining ciphertext bytes
    207     if (i < ct_len) {
    208         uint8_t temp[16] = {0};
    209         memcpy(temp, ciphertext + i, ct_len - i);
    210         __m128i block = _mm_loadu_si128((__m128i*)temp);
    211         block = reverse_bytes(block);
    212         hash = _mm_xor_si128(hash, block);
    213         hash = gf_mul(hash, ctx->H);
    214     }
    215 
    216     // Process length block
    217     uint64_t aad_bits = aad_len * 8;
    218     uint64_t ct_bits = ct_len * 8;
    219     __m128i len_block = _mm_set_epi64x(ct_bits, aad_bits);
    220     len_block = reverse_bytes(len_block);
    221     hash = _mm_xor_si128(hash, len_block);
    222     hash = gf_mul(hash, ctx->H);
    223 
    224     *tag = reverse_bytes(hash);
    225 }
    226 
    227 // Increment counter (32-bit increment of rightmost 32 bits)
    228 static inline __m128i inc_counter(__m128i counter) {
    229     uint32_t ctr = _mm_extract_epi32(counter, 0);
    230     ctr++;
    231     counter = _mm_insert_epi32(counter, ctr, 0);
    232     return counter;
    233 }
    234 
    235 // AES-128-GCM Encryption
    236 int aes128_gcm_encrypt(aes128_gcm_context *ctx,
    237                        const uint8_t *iv, size_t iv_len,
    238                        const uint8_t *aad, size_t aad_len,
    239                        const uint8_t *plaintext, size_t pt_len,
    240                        uint8_t *ciphertext,
    241                        uint8_t *tag, size_t tag_len) {
    242     if (tag_len > 16) {
    243         return -1;
    244     }
    245 
    246     // Prepare initial counter block
    247     __m128i counter;
    248     if (iv_len == 12) {
    249         // Standard case: IV is 96 bits
    250         counter = _mm_set_epi32(1,
    251                                 ((uint32_t*)iv)[2],
    252                                 ((uint32_t*)iv)[1],
    253                                 ((uint32_t*)iv)[0]);
    254     } else {
    255         // Non-standard IV length: use GHASH
    256         __m128i hash = _mm_setzero_si128();
    257         size_t i;
    258         for (i = 0; i + 16 <= iv_len; i += 16) {
    259             __m128i block = _mm_loadu_si128((__m128i*)(iv + i));
    260             block = reverse_bytes(block);
    261             hash = _mm_xor_si128(hash, block);
    262             hash = gf_mul(hash, ctx->H);
    263         }
    264         if (i < iv_len) {
    265             uint8_t temp[16] = {0};
    266             memcpy(temp, iv + i, iv_len - i);
    267             __m128i block = _mm_loadu_si128((__m128i*)temp);
    268             block = reverse_bytes(block);
    269             hash = _mm_xor_si128(hash, block);
    270             hash = gf_mul(hash, ctx->H);
    271         }
    272         uint64_t iv_bits = iv_len * 8;
    273         __m128i len_block = _mm_set_epi64x(iv_bits, 0);
    274         len_block = reverse_bytes(len_block);
    275         hash = _mm_xor_si128(hash, len_block);
    276         hash = gf_mul(hash, ctx->H);
    277         counter = reverse_bytes(hash);
    278     }
    279 
    280     // Save J0 for tag computation
    281     __m128i J0 = counter;
    282 
    283     // Encrypt plaintext
    284     size_t i;
    285     for (i = 0; i + 16 <= pt_len; i += 16) {
    286         counter = inc_counter(counter);
    287         __m128i keystream = aes128_encrypt_block(counter, &ctx->key_schedule);
    288         __m128i pt_block = _mm_loadu_si128((__m128i*)(plaintext + i));
    289         __m128i ct_block = _mm_xor_si128(pt_block, keystream);
    290         _mm_storeu_si128((__m128i*)(ciphertext + i), ct_block);
    291     }
    292 
    293     // Handle remaining bytes
    294     if (i < pt_len) {
    295         counter = inc_counter(counter);
    296         __m128i keystream = aes128_encrypt_block(counter, &ctx->key_schedule);
    297         uint8_t ks_bytes[16];
    298         _mm_storeu_si128((__m128i*)ks_bytes, keystream);
    299         for (size_t j = 0; j < pt_len - i; j++) {
    300             ciphertext[i + j] = plaintext[i + j] ^ ks_bytes[j];
    301         }
    302     }
    303 
    304     // Compute authentication tag
    305     __m128i auth_tag;
    306     ghash(ctx, aad, aad_len, ciphertext, pt_len, &auth_tag);
    307 
    308     // Encrypt the tag with J0
    309     __m128i J0_keystream = aes128_encrypt_block(J0, &ctx->key_schedule);
    310     auth_tag = _mm_xor_si128(auth_tag, J0_keystream);
    311 
    312     // Store tag
    313     _mm_storeu_si128((__m128i*)tag, auth_tag);
    314 
    315     return 0;
    316 }
    317 
    318 // AES-128-GCM Decryption
    319 int aes128_gcm_decrypt(aes128_gcm_context *ctx,
    320                        const uint8_t *iv, size_t iv_len,
    321                        const uint8_t *aad, size_t aad_len,
    322                        const uint8_t *ciphertext, size_t ct_len,
    323                        const uint8_t *tag, size_t tag_len,
    324                        uint8_t *plaintext) {
    325     if (tag_len > 16) {
    326         return -1;
    327     }
    328 
    329     // Prepare initial counter block (same as encryption)
    330     __m128i counter;
    331     if (iv_len == 12) {
    332         counter = _mm_set_epi32(1,
    333                                 ((uint32_t*)iv)[2],
    334                                 ((uint32_t*)iv)[1],
    335                                 ((uint32_t*)iv)[0]);
    336     } else {
    337         __m128i hash = _mm_setzero_si128();
    338         size_t i;
    339         for (i = 0; i + 16 <= iv_len; i += 16) {
    340             __m128i block = _mm_loadu_si128((__m128i*)(iv + i));
    341             block = reverse_bytes(block);
    342             hash = _mm_xor_si128(hash, block);
    343             hash = gf_mul(hash, ctx->H);
    344         }
    345         if (i < iv_len) {
    346             uint8_t temp[16] = {0};
    347             memcpy(temp, iv + i, iv_len - i);
    348             __m128i block = _mm_loadu_si128((__m128i*)temp);
    349             block = reverse_bytes(block);
    350             hash = _mm_xor_si128(hash, block);
    351             hash = gf_mul(hash, ctx->H);
    352         }
    353         uint64_t iv_bits = iv_len * 8;
    354         __m128i len_block = _mm_set_epi64x(iv_bits, 0);
    355         len_block = reverse_bytes(len_block);
    356         hash = _mm_xor_si128(hash, len_block);
    357         hash = gf_mul(hash, ctx->H);
    358         counter = reverse_bytes(hash);
    359     }
    360 
    361     __m128i J0 = counter;
    362 
    363     // Verify authentication tag first
    364     __m128i computed_tag;
    365     ghash(ctx, aad, aad_len, ciphertext, ct_len, &computed_tag);
    366     __m128i J0_keystream = aes128_encrypt_block(J0, &ctx->key_schedule);
    367     computed_tag = _mm_xor_si128(computed_tag, J0_keystream);
    368 
    369     // Compare tags (constant-time comparison)
    370     uint8_t computed_tag_bytes[16];
    371     _mm_storeu_si128((__m128i*)computed_tag_bytes, computed_tag);
    372 
    373     int tag_match = 1;
    374     for (size_t i = 0; i < tag_len; i++) {
    375         tag_match &= (computed_tag_bytes[i] == tag[i]);
    376     }
    377 
    378     if (!tag_match) {
    379         return -1;  // Authentication failed
    380     }
    381 
    382     // Decrypt ciphertext
    383     size_t i;
    384     for (i = 0; i + 16 <= ct_len; i += 16) {
    385         counter = inc_counter(counter);
    386         __m128i keystream = aes128_encrypt_block(counter, &ctx->key_schedule);
    387         __m128i ct_block = _mm_loadu_si128((__m128i*)(ciphertext + i));
    388         __m128i pt_block = _mm_xor_si128(ct_block, keystream);
    389         _mm_storeu_si128((__m128i*)(plaintext + i), pt_block);
    390     }
    391 
    392     // Handle remaining bytes
    393     if (i < ct_len) {
    394         counter = inc_counter(counter);
    395         __m128i keystream = aes128_encrypt_block(counter, &ctx->key_schedule);
    396         uint8_t ks_bytes[16];
    397         _mm_storeu_si128((__m128i*)ks_bytes, keystream);
    398         for (size_t j = 0; j < ct_len - i; j++) {
    399             plaintext[i + j] = ciphertext[i + j] ^ ks_bytes[j];
    400         }
    401     }
    402 
    403     return 0;
    404 }
    405 
    406 /**
    407  * Clean up AES-128-GCM context
    408  * Zeros all sensitive key material
    409  */
    410 void aes128_gcm_cleanup(aes128_gcm_context *ctx) {
    411     if (ctx == NULL) return;
    412 
    413     // Zero all sensitive data using volatile to prevent compiler optimization
    414     volatile uint8_t *p = (volatile uint8_t *)ctx;
    415     size_t n = sizeof(aes128_gcm_context);
    416     while (n--) {
    417         *p++ = 0;
    418     }
    419 }