luajitos

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

ChaCha20-Poly1305.c (20019B)


      1 /*
      2  * ChaCha20-Poly1305 AEAD Implementation
      3  * RFC 8439 compliant
      4  * Fast software-only implementation
      5  */
      6 
      7 #include "ChaCha20-Poly1305.h"
      8 #include <stdio.h>
      9 #include <string.h>
     10 
     11 // Utility: Read 32-bit little-endian
     12 static inline uint32_t read_le32(const uint8_t *p) {
     13     return ((uint32_t)p[0]) |
     14            ((uint32_t)p[1] << 8) |
     15            ((uint32_t)p[2] << 16) |
     16            ((uint32_t)p[3] << 24);
     17 }
     18 
     19 // Utility: Write 32-bit little-endian
     20 static inline void write_le32(uint8_t *p, uint32_t v) {
     21     p[0] = v & 0xff;
     22     p[1] = (v >> 8) & 0xff;
     23     p[2] = (v >> 16) & 0xff;
     24     p[3] = (v >> 24) & 0xff;
     25 }
     26 
     27 // Utility: Write 64-bit little-endian
     28 static inline void write_le64(uint8_t *p, uint64_t v) {
     29     p[0] = v & 0xff;
     30     p[1] = (v >> 8) & 0xff;
     31     p[2] = (v >> 16) & 0xff;
     32     p[3] = (v >> 24) & 0xff;
     33     p[4] = (v >> 32) & 0xff;
     34     p[5] = (v >> 40) & 0xff;
     35     p[6] = (v >> 48) & 0xff;
     36     p[7] = (v >> 56) & 0xff;
     37 }
     38 
     39 // ChaCha20 quarter round
     40 #define ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
     41 
     42 #define QUARTERROUND(a, b, c, d) \
     43     a += b; d ^= a; d = ROTL32(d, 16); \
     44     c += d; b ^= c; b = ROTL32(b, 12); \
     45     a += b; d ^= a; d = ROTL32(d, 8); \
     46     c += d; b ^= c; b = ROTL32(b, 7)
     47 
     48 // ChaCha20 block function
     49 static void chacha20_block(uint32_t out[16], const uint32_t in[16]) {
     50     int i;
     51     uint32_t x[16];
     52 
     53     // Copy input to working state
     54     for (i = 0; i < 16; i++) {
     55         x[i] = in[i];
     56     }
     57 
     58     // 20 rounds (10 double rounds)
     59     for (i = 0; i < 10; i++) {
     60         // Column rounds
     61         QUARTERROUND(x[0], x[4], x[8], x[12]);
     62         QUARTERROUND(x[1], x[5], x[9], x[13]);
     63         QUARTERROUND(x[2], x[6], x[10], x[14]);
     64         QUARTERROUND(x[3], x[7], x[11], x[15]);
     65 
     66         // Diagonal rounds
     67         QUARTERROUND(x[0], x[5], x[10], x[15]);
     68         QUARTERROUND(x[1], x[6], x[11], x[12]);
     69         QUARTERROUND(x[2], x[7], x[8], x[13]);
     70         QUARTERROUND(x[3], x[4], x[9], x[14]);
     71     }
     72 
     73     // Add original state
     74     for (i = 0; i < 16; i++) {
     75         out[i] = x[i] + in[i];
     76     }
     77 }
     78 
     79 // Initialize ChaCha20 state
     80 static void chacha20_init_state(chacha20_context *ctx,
     81                                 const uint8_t *key,
     82                                 const uint8_t *nonce,
     83                                 uint32_t counter) {
     84     // Constants "expand 32-byte k"
     85     ctx->state[0] = 0x61707865;
     86     ctx->state[1] = 0x3320646e;
     87     ctx->state[2] = 0x79622d32;
     88     ctx->state[3] = 0x6b206574;
     89 
     90     // Key (256 bits = 8 words)
     91     ctx->state[4] = read_le32(key + 0);
     92     ctx->state[5] = read_le32(key + 4);
     93     ctx->state[6] = read_le32(key + 8);
     94     ctx->state[7] = read_le32(key + 12);
     95     ctx->state[8] = read_le32(key + 16);
     96     ctx->state[9] = read_le32(key + 20);
     97     ctx->state[10] = read_le32(key + 24);
     98     ctx->state[11] = read_le32(key + 28);
     99 
    100     // Counter (32 bits)
    101     ctx->state[12] = counter;
    102 
    103     // Nonce (96 bits = 3 words)
    104     ctx->state[13] = read_le32(nonce + 0);
    105     ctx->state[14] = read_le32(nonce + 4);
    106     ctx->state[15] = read_le32(nonce + 8);
    107 
    108     ctx->keystream_pos = 64; // Force generation on first use
    109     ctx->counter = counter;
    110 }
    111 
    112 // Generate next ChaCha20 keystream block
    113 static void chacha20_generate_block(chacha20_context *ctx) {
    114     uint32_t block[16];
    115 
    116     chacha20_block(block, ctx->state);
    117 
    118     // Convert to bytes
    119     for (int i = 0; i < 16; i++) {
    120         write_le32(ctx->keystream + i * 4, block[i]);
    121     }
    122 
    123     // Increment counter
    124     ctx->state[12]++;
    125     ctx->keystream_pos = 0;
    126 }
    127 
    128 // Standalone ChaCha20 encryption/decryption
    129 int chacha20_crypt(const uint8_t *key, const uint8_t *nonce,
    130                    uint32_t counter, const uint8_t *input,
    131                    uint8_t *output, size_t len) {
    132     chacha20_context ctx;
    133 
    134     chacha20_init_state(&ctx, key, nonce, counter);
    135 
    136     for (size_t i = 0; i < len; i++) {
    137         if (ctx.keystream_pos >= 64) {
    138             chacha20_generate_block(&ctx);
    139         }
    140         output[i] = input[i] ^ ctx.keystream[ctx.keystream_pos++];
    141     }
    142 
    143     // Cleanup
    144     memset(&ctx, 0, sizeof(ctx));
    145     return 0;
    146 }
    147 
    148 // Poly1305 clamp function
    149 static void poly1305_clamp(uint32_t r[5], const uint8_t key[16]) {
    150     r[0] = (read_le32(key + 0)) & 0x0fffffff;
    151     r[1] = (read_le32(key + 4)) & 0x0ffffffc;
    152     r[2] = (read_le32(key + 8)) & 0x0ffffffc;
    153     r[3] = (read_le32(key + 12)) & 0x0ffffffc;
    154     r[4] = 0;
    155 }
    156 
    157 // Poly1305 initialization
    158 static int poly1305_init_internal(poly1305_context *ctx, const uint8_t *key) {
    159     // Clamp r
    160     poly1305_clamp(ctx->r, key);
    161 
    162     // Load pad
    163     ctx->pad[0] = read_le32(key + 16);
    164     ctx->pad[1] = read_le32(key + 20);
    165     ctx->pad[2] = read_le32(key + 24);
    166     ctx->pad[3] = read_le32(key + 28);
    167 
    168     // Initialize accumulator
    169     ctx->h[0] = 0;
    170     ctx->h[1] = 0;
    171     ctx->h[2] = 0;
    172     ctx->h[3] = 0;
    173     ctx->h[4] = 0;
    174 
    175     ctx->buffer_len = 0;
    176     ctx->total_len = 0;
    177 
    178     return 0;
    179 }
    180 
    181 // Poly1305 block processing
    182 static void poly1305_block(poly1305_context *ctx, const uint8_t *block, int final) {
    183     uint32_t h0, h1, h2, h3, h4;
    184     uint32_t r0, r1, r2, r3, r4;
    185     uint64_t d0, d1, d2, d3, d4;
    186     uint32_t c;
    187 
    188     h0 = ctx->h[0];
    189     h1 = ctx->h[1];
    190     h2 = ctx->h[2];
    191     h3 = ctx->h[3];
    192     h4 = ctx->h[4];
    193 
    194     r0 = ctx->r[0];
    195     r1 = ctx->r[1];
    196     r2 = ctx->r[2];
    197     r3 = ctx->r[3];
    198     r4 = ctx->r[4];
    199 
    200     // Add block to accumulator
    201     h0 += (read_le32(block + 0)) & 0x03ffffff;
    202     h1 += ((read_le32(block + 3) >> 2) & 0x03ffffff);
    203     h2 += ((read_le32(block + 6) >> 4) & 0x03ffffff);
    204     h3 += ((read_le32(block + 9) >> 6) & 0x03ffffff);
    205     h4 += ((read_le32(block + 12) >> 8) | (final ? 0 : (1 << 24)));
    206 
    207     // Multiply by r
    208     d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * (5 * r4)) +
    209          ((uint64_t)h2 * (5 * r3)) + ((uint64_t)h3 * (5 * r2)) +
    210          ((uint64_t)h4 * (5 * r1));
    211 
    212     d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) +
    213          ((uint64_t)h2 * (5 * r4)) + ((uint64_t)h3 * (5 * r3)) +
    214          ((uint64_t)h4 * (5 * r2));
    215 
    216     d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) +
    217          ((uint64_t)h2 * r0) + ((uint64_t)h3 * (5 * r4)) +
    218          ((uint64_t)h4 * (5 * r3));
    219 
    220     d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) +
    221          ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) +
    222          ((uint64_t)h4 * (5 * r4));
    223 
    224     d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) +
    225          ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) +
    226          ((uint64_t)h4 * r0);
    227 
    228     // Reduce modulo 2^130-5
    229     c = (uint32_t)(d0 >> 26); d1 += c; h0 = (uint32_t)d0 & 0x3ffffff;
    230     c = (uint32_t)(d1 >> 26); d2 += c; h1 = (uint32_t)d1 & 0x3ffffff;
    231     c = (uint32_t)(d2 >> 26); d3 += c; h2 = (uint32_t)d2 & 0x3ffffff;
    232     c = (uint32_t)(d3 >> 26); d4 += c; h3 = (uint32_t)d3 & 0x3ffffff;
    233     c = (uint32_t)(d4 >> 26); h0 += c * 5; h4 = (uint32_t)d4 & 0x3ffffff;
    234     c = h0 >> 26; h1 += c; h0 &= 0x3ffffff;
    235 
    236     ctx->h[0] = h0;
    237     ctx->h[1] = h1;
    238     ctx->h[2] = h2;
    239     ctx->h[3] = h3;
    240     ctx->h[4] = h4;
    241 }
    242 
    243 // Poly1305 update
    244 static void poly1305_update_internal(poly1305_context *ctx,
    245                                      const uint8_t *data, size_t len) {
    246     // Process buffered data first
    247     if (ctx->buffer_len > 0) {
    248         size_t to_copy = 16 - ctx->buffer_len;
    249         if (to_copy > len) to_copy = len;
    250 
    251         memcpy(ctx->buffer + ctx->buffer_len, data, to_copy);
    252         ctx->buffer_len += to_copy;
    253         data += to_copy;
    254         len -= to_copy;
    255         ctx->total_len += to_copy;
    256 
    257         if (ctx->buffer_len == 16) {
    258             poly1305_block(ctx, ctx->buffer, 0);
    259             ctx->buffer_len = 0;
    260         }
    261     }
    262 
    263     // Process full blocks
    264     while (len >= 16) {
    265         poly1305_block(ctx, data, 0);
    266         data += 16;
    267         len -= 16;
    268         ctx->total_len += 16;
    269     }
    270 
    271     // Buffer remaining data
    272     if (len > 0) {
    273         memcpy(ctx->buffer, data, len);
    274         ctx->buffer_len = len;
    275         ctx->total_len += len;
    276     }
    277 }
    278 
    279 // Poly1305 finalization
    280 static void poly1305_finish_internal(poly1305_context *ctx, uint8_t *tag) {
    281     uint32_t h0, h1, h2, h3, h4;
    282     uint32_t g0, g1, g2, g3, g4;
    283     uint32_t mask;
    284     uint64_t f;
    285 
    286     // Process final block if any
    287     if (ctx->buffer_len > 0) {
    288         // Pad with zeros and 0x01
    289         ctx->buffer[ctx->buffer_len++] = 1;
    290         while (ctx->buffer_len < 16) {
    291             ctx->buffer[ctx->buffer_len++] = 0;
    292         }
    293         poly1305_block(ctx, ctx->buffer, 1);
    294     }
    295 
    296     h0 = ctx->h[0];
    297     h1 = ctx->h[1];
    298     h2 = ctx->h[2];
    299     h3 = ctx->h[3];
    300     h4 = ctx->h[4];
    301 
    302     // Fully reduce h mod 2^130-5
    303     h1 += h0 >> 26; h0 &= 0x3ffffff;
    304     h2 += h1 >> 26; h1 &= 0x3ffffff;
    305     h3 += h2 >> 26; h2 &= 0x3ffffff;
    306     h4 += h3 >> 26; h3 &= 0x3ffffff;
    307     h0 += (h4 >> 26) * 5; h4 &= 0x3ffffff;
    308     h1 += h0 >> 26; h0 &= 0x3ffffff;
    309 
    310     // Compute h + (-p)
    311     g0 = h0 + 5;
    312     g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
    313     g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
    314     g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
    315     g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
    316 
    317     // Select h if h < p, else h + (-p)
    318     mask = (g4 >> 31) - 1;
    319     g0 &= mask;
    320     g1 &= mask;
    321     g2 &= mask;
    322     g3 &= mask;
    323     g4 &= mask;
    324     mask = ~mask;
    325     h0 = (h0 & mask) | g0;
    326     h1 = (h1 & mask) | g1;
    327     h2 = (h2 & mask) | g2;
    328     h3 = (h3 & mask) | g3;
    329     h4 = (h4 & mask) | g4;
    330 
    331     // Compute h % (2^128)
    332     h0 = (h0 | (h1 << 26)) & 0xffffffff;
    333     h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
    334     h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
    335     h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
    336 
    337     // Add pad
    338     f = (uint64_t)h0 + ctx->pad[0]; h0 = (uint32_t)f;
    339     f = (uint64_t)h1 + ctx->pad[1] + (f >> 32); h1 = (uint32_t)f;
    340     f = (uint64_t)h2 + ctx->pad[2] + (f >> 32); h2 = (uint32_t)f;
    341     f = (uint64_t)h3 + ctx->pad[3] + (f >> 32); h3 = (uint32_t)f;
    342 
    343     // Write tag
    344     write_le32(tag + 0, h0);
    345     write_le32(tag + 4, h1);
    346     write_le32(tag + 8, h2);
    347     write_le32(tag + 12, h3);
    348 }
    349 
    350 // Initialize ChaCha20-Poly1305
    351 int chacha20_poly1305_init(chacha20_poly1305_context *ctx,
    352                            const uint8_t *key,
    353                            const uint8_t *nonce) {
    354     if (!ctx || !key || !nonce) return -1;
    355 
    356     // Initialize ChaCha20 with counter=0
    357     chacha20_init_state(&ctx->cipher, key, nonce, 0);
    358 
    359     // Generate Poly1305 key (first block of ChaCha20 keystream)
    360     uint8_t poly_key[32];
    361     chacha20_generate_block(&ctx->cipher);
    362     memcpy(poly_key, ctx->cipher.keystream, 32);
    363     ctx->cipher.keystream_pos = 64; // Mark block as used
    364 
    365     // Initialize ChaCha20 for data encryption (counter=1)
    366     ctx->cipher.state[12] = 1;
    367 
    368     // Initialize Poly1305
    369     poly1305_init_internal(&ctx->mac, poly_key);
    370 
    371     // Zero sensitive data
    372     memset(poly_key, 0, sizeof(poly_key));
    373 
    374     ctx->aad_len = 0;
    375     ctx->data_len = 0;
    376 
    377     return 0;
    378 }
    379 
    380 // ChaCha20-Poly1305 encryption
    381 int chacha20_poly1305_encrypt(chacha20_poly1305_context *ctx,
    382                                const uint8_t *aad, size_t aad_len,
    383                                const uint8_t *plaintext, size_t pt_len,
    384                                uint8_t *ciphertext,
    385                                uint8_t *tag) {
    386     if (!ctx || !ciphertext || !tag) return -1;
    387     if (pt_len > 0 && !plaintext) return -1;
    388 
    389     // Authenticate AAD
    390     if (aad_len > 0 && aad) {
    391         poly1305_update_internal(&ctx->mac, aad, aad_len);
    392 
    393         // Pad to 16 bytes
    394         if (aad_len % 16 != 0) {
    395             uint8_t zeros[16] = {0};
    396             poly1305_update_internal(&ctx->mac, zeros, 16 - (aad_len % 16));
    397         }
    398     }
    399 
    400     // Encrypt plaintext
    401     for (size_t i = 0; i < pt_len; i++) {
    402         if (ctx->cipher.keystream_pos >= 64) {
    403             chacha20_generate_block(&ctx->cipher);
    404         }
    405         ciphertext[i] = plaintext[i] ^ ctx->cipher.keystream[ctx->cipher.keystream_pos++];
    406     }
    407 
    408     // Authenticate ciphertext
    409     if (pt_len > 0) {
    410         poly1305_update_internal(&ctx->mac, ciphertext, pt_len);
    411 
    412         // Pad to 16 bytes
    413         if (pt_len % 16 != 0) {
    414             uint8_t zeros[16] = {0};
    415             poly1305_update_internal(&ctx->mac, zeros, 16 - (pt_len % 16));
    416         }
    417     }
    418 
    419     // Authenticate lengths
    420     uint8_t len_block[16];
    421     write_le64(len_block, aad_len);
    422     write_le64(len_block + 8, pt_len);
    423     poly1305_update_internal(&ctx->mac, len_block, 16);
    424 
    425     // Generate tag
    426     poly1305_finish_internal(&ctx->mac, tag);
    427 
    428     return 0;
    429 }
    430 
    431 // ChaCha20-Poly1305 decryption
    432 int chacha20_poly1305_decrypt(chacha20_poly1305_context *ctx,
    433                                const uint8_t *aad, size_t aad_len,
    434                                const uint8_t *ciphertext, size_t ct_len,
    435                                const uint8_t *tag,
    436                                uint8_t *plaintext) {
    437     if (!ctx || !ciphertext || !tag || !plaintext) return -1;
    438 
    439     uint8_t computed_tag[16];
    440 
    441     // Authenticate AAD
    442     if (aad_len > 0 && aad) {
    443         poly1305_update_internal(&ctx->mac, aad, aad_len);
    444 
    445         // Pad to 16 bytes
    446         if (aad_len % 16 != 0) {
    447             uint8_t zeros[16] = {0};
    448             poly1305_update_internal(&ctx->mac, zeros, 16 - (aad_len % 16));
    449         }
    450     }
    451 
    452     // Authenticate ciphertext
    453     if (ct_len > 0) {
    454         poly1305_update_internal(&ctx->mac, ciphertext, ct_len);
    455 
    456         // Pad to 16 bytes
    457         if (ct_len % 16 != 0) {
    458             uint8_t zeros[16] = {0};
    459             poly1305_update_internal(&ctx->mac, zeros, 16 - (ct_len % 16));
    460         }
    461     }
    462 
    463     // Authenticate lengths
    464     uint8_t len_block[16];
    465     write_le64(len_block, aad_len);
    466     write_le64(len_block + 8, ct_len);
    467     poly1305_update_internal(&ctx->mac, len_block, 16);
    468 
    469     // Generate tag
    470     poly1305_finish_internal(&ctx->mac, computed_tag);
    471 
    472     // Constant-time tag comparison
    473     int result = 0;
    474     for (int i = 0; i < 16; i++) {
    475         result |= (computed_tag[i] ^ tag[i]);
    476     }
    477 
    478     if (result != 0) {
    479         // Authentication failed
    480         memset(plaintext, 0, ct_len);
    481         return -1;
    482     }
    483 
    484     // Decrypt ciphertext
    485     for (size_t i = 0; i < ct_len; i++) {
    486         if (ctx->cipher.keystream_pos >= 64) {
    487             chacha20_generate_block(&ctx->cipher);
    488         }
    489         plaintext[i] = ciphertext[i] ^ ctx->cipher.keystream[ctx->cipher.keystream_pos++];
    490     }
    491 
    492     return 0;
    493 }
    494 
    495 // Cleanup
    496 void chacha20_poly1305_cleanup(chacha20_poly1305_context *ctx) {
    497     if (!ctx) return;
    498 
    499     volatile uint8_t *p = (volatile uint8_t *)ctx;
    500     size_t n = sizeof(chacha20_poly1305_context);
    501     while (n--) {
    502         *p++ = 0;
    503     }
    504 }
    505 
    506 // Test program
    507 #ifdef INCLUDE_MAIN
    508 int main(void) {
    509     printf("╔════════════════════════════════════════════════════╗\n");
    510     printf("║   ChaCha20-Poly1305 AEAD Implementation           ║\n");
    511     printf("║   RFC 8439 Compliant                               ║\n");
    512     printf("╚════════════════════════════════════════════════════╝\n\n");
    513 
    514     printf("ChaCha20-Poly1305 Features:\n");
    515     printf("• 256-bit keys\n");
    516     printf("• 96-bit nonces\n");
    517     printf("• Authenticated encryption (AEAD)\n");
    518     printf("• Used in TLS 1.3, WireGuard, SSH\n");
    519     printf("• Fast software implementation (~1-2 GB/s)\n");
    520     printf("• No hardware acceleration needed\n\n");
    521 
    522     // Test vectors from RFC 8439
    523     const uint8_t key[32] = {
    524         0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
    525         0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
    526         0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
    527         0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
    528     };
    529 
    530     const uint8_t nonce[12] = {
    531         0x07, 0x00, 0x00, 0x00,
    532         0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
    533     };
    534 
    535     const char *plaintext = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it.";
    536     const char *aad = "Additional authenticated data";
    537 
    538     size_t pt_len = strlen(plaintext);
    539     size_t aad_len = strlen(aad);
    540 
    541     uint8_t ciphertext[256];
    542     uint8_t tag[16];
    543     uint8_t decrypted[256];
    544 
    545     printf("════════════════════════════════════════════════════\n");
    546     printf("Test: Encryption and Decryption\n");
    547     printf("════════════════════════════════════════════════════\n\n");
    548 
    549     printf("Plaintext: \"%s\"\n", plaintext);
    550     printf("AAD: \"%s\"\n\n", aad);
    551 
    552     // Encrypt
    553     chacha20_poly1305_context ctx;
    554     if (chacha20_poly1305_init(&ctx, key, nonce) != 0) {
    555         fprintf(stderr, "✗ Initialization failed\n");
    556         return 1;
    557     }
    558 
    559     if (chacha20_poly1305_encrypt(&ctx, (uint8_t*)aad, aad_len,
    560                                   (uint8_t*)plaintext, pt_len,
    561                                   ciphertext, tag) != 0) {
    562         fprintf(stderr, "✗ Encryption failed\n");
    563         return 1;
    564     }
    565 
    566     printf("✓ Encryption successful\n");
    567     printf("Ciphertext (%zu bytes): ", pt_len);
    568     for (size_t i = 0; i < (pt_len < 32 ? pt_len : 32); i++) {
    569         printf("%02x", ciphertext[i]);
    570     }
    571     if (pt_len > 32) printf("...");
    572     printf("\n");
    573 
    574     printf("Tag (16 bytes): ");
    575     for (int i = 0; i < 16; i++) {
    576         printf("%02x", tag[i]);
    577     }
    578     printf("\n\n");
    579 
    580     // Decrypt
    581     chacha20_poly1305_cleanup(&ctx);
    582     if (chacha20_poly1305_init(&ctx, key, nonce) != 0) {
    583         fprintf(stderr, "✗ Initialization failed\n");
    584         return 1;
    585     }
    586 
    587     if (chacha20_poly1305_decrypt(&ctx, (uint8_t*)aad, aad_len,
    588                                   ciphertext, pt_len, tag,
    589                                   decrypted) != 0) {
    590         fprintf(stderr, "✗ Decryption/Authentication failed\n");
    591         return 1;
    592     }
    593 
    594     decrypted[pt_len] = '\0';
    595     printf("✓ Decryption successful\n");
    596     printf("Decrypted: \"%s\"\n\n", decrypted);
    597 
    598     if (memcmp(plaintext, decrypted, pt_len) == 0) {
    599         printf("✓ Encryption/Decryption verified!\n\n");
    600     } else {
    601         printf("✗ Decryption mismatch!\n\n");
    602         return 1;
    603     }
    604 
    605     // Test authentication failure
    606     printf("════════════════════════════════════════════════════\n");
    607     printf("Test: Authentication Failure (corrupted tag)\n");
    608     printf("════════════════════════════════════════════════════\n\n");
    609 
    610     tag[0] ^= 0x01; // Corrupt tag
    611 
    612     chacha20_poly1305_cleanup(&ctx);
    613     if (chacha20_poly1305_init(&ctx, key, nonce) != 0) {
    614         fprintf(stderr, "✗ Initialization failed\n");
    615         return 1;
    616     }
    617 
    618     if (chacha20_poly1305_decrypt(&ctx, (uint8_t*)aad, aad_len,
    619                                   ciphertext, pt_len, tag,
    620                                   decrypted) == 0) {
    621         printf("✗ Should have rejected corrupted tag!\n");
    622         return 1;
    623     } else {
    624         printf("✓ Correctly rejected corrupted tag\n\n");
    625     }
    626 
    627     // Cleanup
    628     chacha20_poly1305_cleanup(&ctx);
    629 
    630     printf("════════════════════════════════════════════════════\n");
    631     printf("Security Notes:\n");
    632     printf("════════════════════════════════════════════════════\n");
    633     printf("• NEVER reuse (key, nonce) pairs\n");
    634     printf("• Generate random nonce for each message\n");
    635     printf("• Always verify tag before using plaintext\n");
    636     printf("• Suitable for production use\n");
    637     printf("• Alternative to AES-GCM (faster on non-AES-NI CPUs)\n");
    638     printf("• Used in modern protocols (TLS 1.3, WireGuard)\n");
    639 
    640     return 0;
    641 }
    642 #endif