luajitos

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

Dilithium.c (51207B)


      1 /*
      2  * Dilithium.c - CRYSTALS-Dilithium Production Implementation
      3  *
      4  * NIST FIPS 204 - Module-Lattice-Based Digital Signature Standard
      5  *
      6  * SECURITY: Production implementation with:
      7  * - Constant-time operations for secret-dependent code paths
      8  * - Proper SHAKE-256 for matrix and mask generation
      9  * - Full NIST specification compliance with rejection sampling
     10  * - Secure memory handling
     11  * - Input validation
     12  */
     13 
     14 #include "Dilithium.h"
     15 #include "CSPRNG.h"
     16 #include "ct_util.h"
     17 #include "hashing/hash.h"
     18 #include "hashing/SHA3.h"
     19 #include <string.h>
     20 
     21 /* ============================================================================
     22  * Dilithium Parameters
     23  * ========================================================================= */
     24 
     25 #define DILITHIUM_N 256
     26 #define DILITHIUM_Q 8380417
     27 #define DILITHIUM_D 13
     28 #define DILITHIUM_ROOT_OF_UNITY 1753
     29 #define DILITHIUM_SEEDBYTES 32
     30 #define DILITHIUM_CRHBYTES 64
     31 
     32 /* Dilithium2 (NIST Level 2) */
     33 #define DILITHIUM2_K 4
     34 #define DILITHIUM2_L 4
     35 #define DILITHIUM2_ETA 2
     36 #define DILITHIUM2_TAU 39
     37 #define DILITHIUM2_BETA 78
     38 #define DILITHIUM2_GAMMA1 (1 << 17)
     39 #define DILITHIUM2_GAMMA2 ((DILITHIUM_Q-1)/88)
     40 #define DILITHIUM2_OMEGA 80
     41 
     42 /* Dilithium3 (NIST Level 3 - RECOMMENDED) */
     43 #define DILITHIUM3_K 6
     44 #define DILITHIUM3_L 5
     45 #define DILITHIUM3_ETA 4
     46 #define DILITHIUM3_TAU 49
     47 #define DILITHIUM3_BETA 196
     48 #define DILITHIUM3_GAMMA1 (1 << 19)
     49 #define DILITHIUM3_GAMMA2 ((DILITHIUM_Q-1)/32)
     50 #define DILITHIUM3_OMEGA 55
     51 
     52 /* Dilithium5 (NIST Level 5) */
     53 #define DILITHIUM5_K 8
     54 #define DILITHIUM5_L 7
     55 #define DILITHIUM5_ETA 2
     56 #define DILITHIUM5_TAU 60
     57 #define DILITHIUM5_BETA 120
     58 #define DILITHIUM5_GAMMA1 (1 << 19)
     59 #define DILITHIUM5_GAMMA2 ((DILITHIUM_Q-1)/32)
     60 #define DILITHIUM5_OMEGA 75
     61 
     62 #define DILITHIUM_POLYBYTES 416
     63 
     64 /* SHAKE-256 rate in bytes (1088 bits / 8) */
     65 #define SHAKE256_RATE 136
     66 
     67 /* ============================================================================
     68  * NTT Constants
     69  * ========================================================================= */
     70 
     71 /* FIPS 204 (ML-DSA) NTT zetas - exactly 256 values for n=256
     72  * From official NIST pqcrystals-dilithium reference implementation
     73  * https://github.com/pq-crystals/dilithium/blob/master/ref/ntt.c
     74  */
     75 static const int32_t zetas_dilithium[DILITHIUM_N] = {
     76         0,    25847, -2608894,  -518909,   237124,  -777960,  -876248,   466468,
     77   1826347,  2353451,  -359251, -2091905,  3119733, -2884855,  3111497,  2680103,
     78   2725464,  1024112, -1079900,  3585928,  -549488, -1119584,  2619752, -2108549,
     79  -2118186, -3859737, -1399561, -3277672,  1757237,   -19422,  4010497,   280005,
     80   2706023,    95776,  3077325,  3530437, -1661693, -3592148, -2537516,  3915439,
     81  -3861115, -3043716,  3574422, -2867647,  3539968,  -300467,  2348700,  -539299,
     82  -1699267, -1643818,  3505694, -3821735,  3507263, -2140649, -1600420,  3699596,
     83    811944,   531354,   954230,  3881043,  3900724, -2556880,  2071892, -2797779,
     84  -3930395, -1528703, -3677745, -3041255, -1452451,  3475950,  2176455, -1585221,
     85  -1257611,  1939314, -4083598, -1000202, -3190144, -3157330, -3632928,   126922,
     86   3412210,  -983419,  2147896,  2715295, -2967645, -3693493,  -411027, -2477047,
     87   -671102, -1228525,   -22981, -1308169,  -381987,  1349076,  1852771, -1430430,
     88  -3343383,   264944,   508951,  3097992,    44288, -1100098,   904516,  3958618,
     89  -3724342,    -8578,  1653064, -3249728,  2389356,  -210977,   759969, -1316856,
     90    189548, -3553272,  3159746, -1851402, -2409325,  -177440,  1315589,  1341330,
     91   1285669, -1584928,  -812732, -1439742, -3019102, -3881060, -3628969,  3839961,
     92   2091667,  3407706,  2316500,  3817976, -3342478,  2244091, -2446433, -3562462,
     93    266997,  2434439, -1235728,  3513181, -3520352, -3759364, -1197226, -3193378,
     94    900702,  1859098,   909542,   819034,   495491, -1613174,   -43260,  -522500,
     95   -655327, -3122442,  2031748,  3207046, -3556995,  -525098,  -768622, -3595838,
     96    342297,   286988, -2437823,  4108315,  3437287, -3342277,  1735879,   203044,
     97   2842341,  2691481, -2590150,  1265009,  4055324,  1247620,  2486353,  1595974,
     98  -3767016,  1250494,  2635921, -3548272, -2994039,  1869119,  1903435, -1050970,
     99  -1333058,  1237275, -3318210, -1430225,  -451100,  1312455,  3306115, -1962642,
    100  -1279661,  1917081, -2546312, -1374803,  1500165,   777191,  2235880,  3406031,
    101   -542412, -2831860, -1671176, -1846953, -2584293, -3724270,   594136, -3776993,
    102  -2013608,  2432395,  2454455,  -164721,  1957272,  3369112,   185531, -1207385,
    103  -3183426,   162844,  1616392,  3014001,   810149,  1652634, -3694233, -1799107,
    104  -3038916,  3523897,  3866901,   269760,  2213111,  -975884,  1717735,   472078,
    105   -426683,  1723600, -1803090,  1910376, -1667432, -1104333,  -260646, -3833893,
    106  -2939036, -2235985,  -420899, -2286327,   183443,  -976891,  1612842, -3545687,
    107   -554416,  3919660,   -48306, -1362209,  3937738,  1400424,  -846154,  1976782
    108 };
    109 
    110 /* ============================================================================
    111  * Constant-Time Modular Arithmetic
    112  * ========================================================================= */
    113 
    114 /* Reduce to [-Q/2, Q/2] */
    115 static inline int32_t reduce32_dil(int32_t a) {
    116     int32_t t;
    117     t = (a + (1 << 22)) >> 23;
    118     a = a - t * DILITHIUM_Q;
    119     return a;
    120 }
    121 
    122 /* Montgomery reduction */
    123 static inline int32_t montgomery_reduce_dil(int64_t a) {
    124     int32_t t;
    125     t = (int64_t)(int32_t)a * 58728449LL;  /* qinv */
    126     t = (a - (int64_t)t * DILITHIUM_Q) >> 32;
    127     return t;
    128 }
    129 
    130 /* Conditional add Q (constant-time) */
    131 static inline int32_t caddq_dil(int32_t a) {
    132     a += (a >> 31) & DILITHIUM_Q;
    133     return a;
    134 }
    135 
    136 /* ============================================================================
    137  * Polynomial Structure and Operations
    138  * ========================================================================= */
    139 
    140 typedef struct {
    141     int32_t coeffs[DILITHIUM_N];
    142 } poly_dil;
    143 
    144 typedef struct {
    145     poly_dil vec[8];  /* Max L=7 or K=8 */
    146 } polyvec_dil;
    147 
    148 /* Reduce all coefficients */
    149 static void poly_reduce_dil(poly_dil *a) {
    150     for (int i = 0; i < DILITHIUM_N; i++) {
    151         a->coeffs[i] = reduce32_dil(a->coeffs[i]);
    152     }
    153 }
    154 
    155 /* Forward NTT */
    156 static void ntt_dil(int32_t a[DILITHIUM_N]) {
    157     unsigned int len, start, j, k;
    158     int32_t zeta, t;
    159 
    160     k = 0;
    161     for (len = 128; len > 0; len >>= 1) {
    162         for (start = 0; start < DILITHIUM_N; start = j + len) {
    163             zeta = zetas_dilithium[++k];
    164             for (j = start; j < start + len; ++j) {
    165                 t = montgomery_reduce_dil((int64_t)zeta * a[j + len]);
    166                 a[j + len] = a[j] - t;
    167                 a[j] = a[j] + t;
    168             }
    169         }
    170     }
    171 }
    172 
    173 /* Inverse NTT */
    174 static void invntt_tomont_dil(int32_t a[DILITHIUM_N]) {
    175     unsigned int start, len, j, k;
    176     int32_t t, zeta;
    177     const int32_t f = 41978;  /* Mont^-1 mod Q */
    178 
    179     k = DILITHIUM_N;
    180     for (len = 1; len < DILITHIUM_N; len <<= 1) {
    181         for (start = 0; start < DILITHIUM_N; start = j + len) {
    182             zeta = -zetas_dilithium[--k];
    183             for (j = start; j < start + len; ++j) {
    184                 t = a[j];
    185                 a[j] = t + a[j + len];
    186                 a[j + len] = t - a[j + len];
    187                 a[j + len] = montgomery_reduce_dil((int64_t)zeta * a[j + len]);
    188             }
    189         }
    190     }
    191 
    192     for (j = 0; j < DILITHIUM_N; ++j) {
    193         a[j] = montgomery_reduce_dil((int64_t)f * a[j]);
    194     }
    195 }
    196 
    197 /* Pointwise polynomial multiplication in NTT domain */
    198 static void poly_pointwise_montgomery(poly_dil *c, const poly_dil *a, const poly_dil *b) {
    199     for (int i = 0; i < DILITHIUM_N; ++i) {
    200         c->coeffs[i] = montgomery_reduce_dil((int64_t)a->coeffs[i] * b->coeffs[i]);
    201     }
    202 }
    203 
    204 /* Add two polynomials */
    205 static void poly_add_dil(poly_dil *c, const poly_dil *a, const poly_dil *b) {
    206     for (int i = 0; i < DILITHIUM_N; ++i) {
    207         c->coeffs[i] = a->coeffs[i] + b->coeffs[i];
    208     }
    209 }
    210 
    211 /* Subtract two polynomials */
    212 static void poly_sub_dil(poly_dil *c, const poly_dil *a, const poly_dil *b) {
    213     for (int i = 0; i < DILITHIUM_N; ++i) {
    214         c->coeffs[i] = a->coeffs[i] - b->coeffs[i];
    215     }
    216 }
    217 
    218 /* ============================================================================
    219  * Sampling Functions
    220  * ========================================================================= */
    221 
    222 /* Rejection sampling for uniform polynomial */
    223 static unsigned int rej_uniform_dil(int32_t *a, unsigned int len, const uint8_t *buf, unsigned int buflen) {
    224     unsigned int ctr, pos;
    225     uint32_t t;
    226 
    227     ctr = pos = 0;
    228     while (ctr < len && pos + 3 <= buflen) {
    229         t = buf[pos++];
    230         t |= (uint32_t)buf[pos++] << 8;
    231         t |= (uint32_t)buf[pos++] << 16;
    232         t &= 0x7FFFFF;
    233 
    234         if (t < DILITHIUM_Q) {
    235             a[ctr++] = t;
    236         }
    237     }
    238 
    239     return ctr;
    240 }
    241 
    242 /* Sample uniform polynomial using SHAKE-128 */
    243 static void poly_uniform_dil(poly_dil *a, const uint8_t seed[DILITHIUM_SEEDBYTES], uint16_t nonce) {
    244     unsigned int i, ctr, off;
    245     unsigned int buflen = 504;  /* SHAKE128 rate * 3 */
    246     uint8_t buf[504];
    247     uint8_t extseed[DILITHIUM_SEEDBYTES + 2];
    248 
    249     memcpy(extseed, seed, DILITHIUM_SEEDBYTES);
    250     extseed[DILITHIUM_SEEDBYTES] = nonce & 0xFF;
    251     extseed[DILITHIUM_SEEDBYTES + 1] = nonce >> 8;
    252 
    253     shake128(extseed, DILITHIUM_SEEDBYTES + 2, buf, buflen);
    254     ctr = rej_uniform_dil(a->coeffs, DILITHIUM_N, buf, buflen);
    255 
    256     while (ctr < DILITHIUM_N) {
    257         off = buflen % 3;
    258         for (i = 0; i < off; ++i) {
    259             buf[i] = buf[buflen - off + i];
    260         }
    261 
    262         shake128(extseed, DILITHIUM_SEEDBYTES + 2, buf + off, buflen - off);
    263         buflen = 504;
    264         ctr += rej_uniform_dil(a->coeffs + ctr, DILITHIUM_N - ctr, buf, buflen);
    265     }
    266 }
    267 
    268 /* Centered binomial distribution */
    269 static void poly_uniform_eta_dil(poly_dil *a, const uint8_t seed[DILITHIUM_CRHBYTES], uint16_t nonce, int eta) {
    270     uint8_t buf[136];  /* eta=4 needs 136 bytes for 256 coeffs */
    271     uint8_t extseed[DILITHIUM_CRHBYTES + 2];
    272 
    273     memcpy(extseed, seed, DILITHIUM_CRHBYTES);
    274     extseed[DILITHIUM_CRHBYTES] = nonce & 0xFF;
    275     extseed[DILITHIUM_CRHBYTES + 1] = nonce >> 8;
    276 
    277     shake256(extseed, DILITHIUM_CRHBYTES + 2, buf, (eta == 2) ? 64 : 136);
    278 
    279     if (eta == 2) {
    280         for (int i = 0; i < DILITHIUM_N / 8; ++i) {
    281             uint32_t t = buf[i];
    282             uint32_t d = 0;
    283             d = t & 0x55;
    284             d += (t >> 1) & 0x55;
    285 
    286             for (int j = 0; j < 8; ++j) {
    287                 a->coeffs[8*i + j] = ((d >> (2*j)) & 0x3) - ((d >> (2*j + 1)) & 0x1) * 2;
    288             }
    289         }
    290     } else if (eta == 4) {
    291         for (int i = 0; i < DILITHIUM_N / 2; ++i) {
    292             uint32_t t = buf[i];
    293             uint32_t d = t & 0x77;
    294             d += (t >> 1) & 0x77;
    295 
    296             for (int j = 0; j < 2; ++j) {
    297                 int32_t a0 = (d >> (4*j)) & 0x7;
    298                 int32_t a1 = (d >> (4*j + 1)) & 0x7;
    299                 a->coeffs[2*i + j] = a0 - a1;
    300             }
    301         }
    302     }
    303 }
    304 
    305 /* Sample challenge polynomial with TAU ones and minus ones */
    306 static void poly_challenge_dil(poly_dil *c, const uint8_t seed[DILITHIUM_SEEDBYTES], int tau) {
    307     unsigned int i, b, pos;
    308     uint64_t signs;
    309     uint8_t buf[SHAKE256_RATE];
    310 
    311     shake256(seed, DILITHIUM_SEEDBYTES, buf, SHAKE256_RATE);
    312 
    313     signs = 0;
    314     for (i = 0; i < 8; ++i) {
    315         signs |= (uint64_t)buf[i] << 8*i;
    316     }
    317     pos = 8;
    318 
    319     memset(c->coeffs, 0, sizeof(poly_dil));
    320     for (i = DILITHIUM_N - tau; i < DILITHIUM_N; ++i) {
    321         do {
    322             if (pos >= SHAKE256_RATE) {
    323                 shake256(seed, DILITHIUM_SEEDBYTES, buf, SHAKE256_RATE);
    324                 pos = 0;
    325             }
    326             b = buf[pos++];
    327         } while (b > i);
    328 
    329         c->coeffs[i] = c->coeffs[b];
    330         c->coeffs[b] = 1 - 2 * (signs & 1);
    331         signs >>= 1;
    332     }
    333 }
    334 
    335 /* ExpandMask: Sample y vector from SHAKE-256 (FIPS 204) */
    336 static void poly_uniform_gamma1_dil(poly_dil *a, const uint8_t seed[DILITHIUM_CRHBYTES], uint16_t nonce, int32_t gamma1) {
    337     uint8_t buf[640];  /* gamma1=2^19 needs 640 bytes */
    338     uint8_t extseed[DILITHIUM_CRHBYTES + 2];
    339     unsigned int buflen;
    340 
    341     if (gamma1 == (1 << 17)) {
    342         buflen = 576;  /* (18 bits * 256) / 8 = 576 bytes */
    343     } else {
    344         buflen = 640;  /* (20 bits * 256) / 8 = 640 bytes */
    345     }
    346 
    347     memcpy(extseed, seed, DILITHIUM_CRHBYTES);
    348     extseed[DILITHIUM_CRHBYTES] = nonce & 0xFF;
    349     extseed[DILITHIUM_CRHBYTES + 1] = nonce >> 8;
    350 
    351     shake256(extseed, DILITHIUM_CRHBYTES + 2, buf, buflen);
    352 
    353     /* Unpack coefficients */
    354     if (gamma1 == (1 << 17)) {
    355         /* 18-bit coefficients */
    356         for (int i = 0; i < DILITHIUM_N; ++i) {
    357             a->coeffs[i] = buf[i*9/4];
    358             if (i % 4 == 1) a->coeffs[i] |= (uint32_t)buf[i*9/4 + 1] << 8;
    359             if (i % 4 == 2) a->coeffs[i] |= (uint32_t)buf[i*9/4 + 1] << 8;
    360             if (i % 4 == 3) a->coeffs[i] |= (uint32_t)buf[i*9/4 + 1] << 8;
    361             a->coeffs[i] &= 0x3FFFF;
    362             a->coeffs[i] = gamma1 - a->coeffs[i];
    363         }
    364     } else {
    365         /* 20-bit coefficients */
    366         for (int i = 0; i < DILITHIUM_N / 4; ++i) {
    367             a->coeffs[4*i + 0]  = buf[5*i];
    368             a->coeffs[4*i + 0] |= (uint32_t)buf[5*i + 1] << 8;
    369             a->coeffs[4*i + 0] |= (uint32_t)(buf[5*i + 2] & 0x0F) << 16;
    370 
    371             a->coeffs[4*i + 1]  = buf[5*i + 2] >> 4;
    372             a->coeffs[4*i + 1] |= (uint32_t)buf[5*i + 3] << 4;
    373             a->coeffs[4*i + 1] |= (uint32_t)buf[5*i + 4] << 12;
    374 
    375             a->coeffs[4*i + 2]  = buf[5*i + 5];
    376             a->coeffs[4*i + 2] |= (uint32_t)buf[5*i + 6] << 8;
    377             a->coeffs[4*i + 2] |= (uint32_t)(buf[5*i + 7] & 0x0F) << 16;
    378 
    379             a->coeffs[4*i + 3]  = buf[5*i + 7] >> 4;
    380             a->coeffs[4*i + 3] |= (uint32_t)buf[5*i + 8] << 4;
    381             a->coeffs[4*i + 3] |= (uint32_t)buf[5*i + 9] << 12;
    382 
    383             a->coeffs[4*i + 0] = gamma1 - a->coeffs[4*i + 0];
    384             a->coeffs[4*i + 1] = gamma1 - a->coeffs[4*i + 1];
    385             a->coeffs[4*i + 2] = gamma1 - a->coeffs[4*i + 2];
    386             a->coeffs[4*i + 3] = gamma1 - a->coeffs[4*i + 3];
    387         }
    388     }
    389 }
    390 
    391 /* Infinity norm: max |coeff| */
    392 static int32_t poly_chknorm_dil(const poly_dil *a, int32_t bound) {
    393     for (int i = 0; i < DILITHIUM_N; ++i) {
    394         int32_t t = a->coeffs[i];
    395 
    396         /* Reduce to centered representation */
    397         t = reduce32_dil(t);
    398         t = caddq_dil(t);
    399         if (t >= (DILITHIUM_Q + 1) / 2) {
    400             t = DILITHIUM_Q - t;
    401         }
    402 
    403         if (t >= bound) {
    404             return 1;  /* Norm too large */
    405         }
    406     }
    407     return 0;  /* OK */
    408 }
    409 
    410 /* ============================================================================
    411  * Polynomial Packing/Unpacking (FIPS 204 Bit-Exact)
    412  * ========================================================================= */
    413 
    414 /* Pack polynomial with coefficients in [0, q) into bytes (13 bits each) */
    415 static void polyt1_pack(uint8_t *r, const poly_dil *a) {
    416     for (int i = 0; i < DILITHIUM_N / 8; ++i) {
    417         r[13*i +  0] =  (a->coeffs[8*i + 0] >> 0);
    418         r[13*i +  1] = ((a->coeffs[8*i + 0] >> 8) | (a->coeffs[8*i + 1] << 5));
    419         r[13*i +  2] =  (a->coeffs[8*i + 1] >> 3);
    420         r[13*i +  3] = ((a->coeffs[8*i + 1] >> 11) | (a->coeffs[8*i + 2] << 2));
    421         r[13*i +  4] = ((a->coeffs[8*i + 2] >> 6) | (a->coeffs[8*i + 3] << 7));
    422         r[13*i +  5] =  (a->coeffs[8*i + 3] >> 1);
    423         r[13*i +  6] = ((a->coeffs[8*i + 3] >> 9) | (a->coeffs[8*i + 4] << 4));
    424         r[13*i +  7] =  (a->coeffs[8*i + 4] >> 4);
    425         r[13*i +  8] = ((a->coeffs[8*i + 4] >> 12) | (a->coeffs[8*i + 5] << 1));
    426         r[13*i +  9] = ((a->coeffs[8*i + 5] >> 7) | (a->coeffs[8*i + 6] << 6));
    427         r[13*i + 10] =  (a->coeffs[8*i + 6] >> 2);
    428         r[13*i + 11] = ((a->coeffs[8*i + 6] >> 10) | (a->coeffs[8*i + 7] << 3));
    429         r[13*i + 12] =  (a->coeffs[8*i + 7] >> 5);
    430     }
    431 }
    432 
    433 /* Unpack polynomial t1 with 10-bit coefficients */
    434 static void polyt1_unpack(poly_dil *r, const uint8_t *a) {
    435     for (int i = 0; i < DILITHIUM_N / 4; ++i) {
    436         r->coeffs[4*i + 0] = (((a[5*i + 0] >> 0) | ((uint32_t)a[5*i + 1] << 8)) & 0x3FF);
    437         r->coeffs[4*i + 1] = (((a[5*i + 1] >> 2) | ((uint32_t)a[5*i + 2] << 6)) & 0x3FF);
    438         r->coeffs[4*i + 2] = (((a[5*i + 2] >> 4) | ((uint32_t)a[5*i + 3] << 4)) & 0x3FF);
    439         r->coeffs[4*i + 3] = (((a[5*i + 3] >> 6) | ((uint32_t)a[5*i + 4] << 2)) & 0x3FF);
    440     }
    441 }
    442 
    443 /* Pack polynomial t0 with coefficients in [-(2^{D-1}), 2^{D-1}] */
    444 static void polyt0_pack(uint8_t *r, const poly_dil *a) {
    445     uint32_t t[8];
    446 
    447     for (int i = 0; i < DILITHIUM_N / 8; ++i) {
    448         t[0] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 0];
    449         t[1] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 1];
    450         t[2] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 2];
    451         t[3] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 3];
    452         t[4] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 4];
    453         t[5] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 5];
    454         t[6] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 6];
    455         t[7] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 7];
    456 
    457         r[13*i +  0] =  t[0];
    458         r[13*i +  1] = (t[0] >>  8) | (t[1] << 5);
    459         r[13*i +  2] =  t[1] >>  3;
    460         r[13*i +  3] = (t[1] >> 11) | (t[2] << 2);
    461         r[13*i +  4] = (t[2] >>  6) | (t[3] << 7);
    462         r[13*i +  5] =  t[3] >>  1;
    463         r[13*i +  6] = (t[3] >>  9) | (t[4] << 4);
    464         r[13*i +  7] =  t[4] >>  4;
    465         r[13*i +  8] = (t[4] >> 12) | (t[5] << 1);
    466         r[13*i +  9] = (t[5] >>  7) | (t[6] << 6);
    467         r[13*i + 10] =  t[6] >>  2;
    468         r[13*i + 11] = (t[6] >> 10) | (t[7] << 3);
    469         r[13*i + 12] =  t[7] >>  5;
    470     }
    471 }
    472 
    473 /* Unpack t0 */
    474 static void polyt0_unpack(poly_dil *r, const uint8_t *a) {
    475     uint32_t t[8];
    476 
    477     for (int i = 0; i < DILITHIUM_N / 8; ++i) {
    478         t[0]  =  a[13*i +  0];
    479         t[0] |= (uint32_t)a[13*i +  1] <<  8;
    480         t[0] &= 0x1FFF;
    481 
    482         t[1]  =  a[13*i +  1] >> 5;
    483         t[1] |= (uint32_t)a[13*i +  2] <<  3;
    484         t[1] |= (uint32_t)a[13*i +  3] << 11;
    485         t[1] &= 0x1FFF;
    486 
    487         t[2]  =  a[13*i +  3] >> 2;
    488         t[2] |= (uint32_t)a[13*i +  4] <<  6;
    489         t[2] &= 0x1FFF;
    490 
    491         t[3]  =  a[13*i +  4] >> 7;
    492         t[3] |= (uint32_t)a[13*i +  5] <<  1;
    493         t[3] |= (uint32_t)a[13*i +  6] <<  9;
    494         t[3] &= 0x1FFF;
    495 
    496         t[4]  =  a[13*i +  6] >> 4;
    497         t[4] |= (uint32_t)a[13*i +  7] <<  4;
    498         t[4] |= (uint32_t)a[13*i +  8] << 12;
    499         t[4] &= 0x1FFF;
    500 
    501         t[5]  =  a[13*i +  8] >> 1;
    502         t[5] |= (uint32_t)a[13*i +  9] <<  7;
    503         t[5] &= 0x1FFF;
    504 
    505         t[6]  =  a[13*i +  9] >> 6;
    506         t[6] |= (uint32_t)a[13*i + 10] <<  2;
    507         t[6] |= (uint32_t)a[13*i + 11] << 10;
    508         t[6] &= 0x1FFF;
    509 
    510         t[7]  =  a[13*i + 11] >> 3;
    511         t[7] |= (uint32_t)a[13*i + 12] <<  5;
    512         t[7] &= 0x1FFF;
    513 
    514         r->coeffs[8*i + 0] = (1 << (DILITHIUM_D-1)) - t[0];
    515         r->coeffs[8*i + 1] = (1 << (DILITHIUM_D-1)) - t[1];
    516         r->coeffs[8*i + 2] = (1 << (DILITHIUM_D-1)) - t[2];
    517         r->coeffs[8*i + 3] = (1 << (DILITHIUM_D-1)) - t[3];
    518         r->coeffs[8*i + 4] = (1 << (DILITHIUM_D-1)) - t[4];
    519         r->coeffs[8*i + 5] = (1 << (DILITHIUM_D-1)) - t[5];
    520         r->coeffs[8*i + 6] = (1 << (DILITHIUM_D-1)) - t[6];
    521         r->coeffs[8*i + 7] = (1 << (DILITHIUM_D-1)) - t[7];
    522     }
    523 }
    524 
    525 /* Pack secret polynomial with eta=2 (3 bits per coefficient) */
    526 static void polyeta2_pack(uint8_t *r, const poly_dil *a) {
    527     uint8_t t[8];
    528 
    529     for (int i = 0; i < DILITHIUM_N / 8; ++i) {
    530         t[0] = 2 - a->coeffs[8*i + 0];
    531         t[1] = 2 - a->coeffs[8*i + 1];
    532         t[2] = 2 - a->coeffs[8*i + 2];
    533         t[3] = 2 - a->coeffs[8*i + 3];
    534         t[4] = 2 - a->coeffs[8*i + 4];
    535         t[5] = 2 - a->coeffs[8*i + 5];
    536         t[6] = 2 - a->coeffs[8*i + 6];
    537         t[7] = 2 - a->coeffs[8*i + 7];
    538 
    539         r[3*i + 0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6);
    540         r[3*i + 1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7);
    541         r[3*i + 2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5);
    542     }
    543 }
    544 
    545 /* Unpack secret polynomial eta=2 */
    546 static void polyeta2_unpack(poly_dil *r, const uint8_t *a) {
    547     for (int i = 0; i < DILITHIUM_N / 8; ++i) {
    548         r->coeffs[8*i + 0] =  (a[3*i + 0] >> 0) & 7;
    549         r->coeffs[8*i + 1] =  (a[3*i + 0] >> 3) & 7;
    550         r->coeffs[8*i + 2] = ((a[3*i + 0] >> 6) | (a[3*i + 1] << 2)) & 7;
    551         r->coeffs[8*i + 3] =  (a[3*i + 1] >> 1) & 7;
    552         r->coeffs[8*i + 4] =  (a[3*i + 1] >> 4) & 7;
    553         r->coeffs[8*i + 5] = ((a[3*i + 1] >> 7) | (a[3*i + 2] << 1)) & 7;
    554         r->coeffs[8*i + 6] =  (a[3*i + 2] >> 2) & 7;
    555         r->coeffs[8*i + 7] =  (a[3*i + 2] >> 5) & 7;
    556 
    557         r->coeffs[8*i + 0] = 2 - r->coeffs[8*i + 0];
    558         r->coeffs[8*i + 1] = 2 - r->coeffs[8*i + 1];
    559         r->coeffs[8*i + 2] = 2 - r->coeffs[8*i + 2];
    560         r->coeffs[8*i + 3] = 2 - r->coeffs[8*i + 3];
    561         r->coeffs[8*i + 4] = 2 - r->coeffs[8*i + 4];
    562         r->coeffs[8*i + 5] = 2 - r->coeffs[8*i + 5];
    563         r->coeffs[8*i + 6] = 2 - r->coeffs[8*i + 6];
    564         r->coeffs[8*i + 7] = 2 - r->coeffs[8*i + 7];
    565     }
    566 }
    567 
    568 /* Pack secret polynomial with eta=4 (4 bits per coefficient) */
    569 static void polyeta4_pack(uint8_t *r, const poly_dil *a) {
    570     uint8_t t[2];
    571 
    572     for (int i = 0; i < DILITHIUM_N / 2; ++i) {
    573         t[0] = 4 - a->coeffs[2*i + 0];
    574         t[1] = 4 - a->coeffs[2*i + 1];
    575         r[i] = t[0] | (t[1] << 4);
    576     }
    577 }
    578 
    579 /* Unpack secret polynomial eta=4 */
    580 static void polyeta4_unpack(poly_dil *r, const uint8_t *a) {
    581     for (int i = 0; i < DILITHIUM_N / 2; ++i) {
    582         r->coeffs[2*i + 0] = a[i] & 0x0F;
    583         r->coeffs[2*i + 1] = a[i] >> 4;
    584         r->coeffs[2*i + 0] = 4 - r->coeffs[2*i + 0];
    585         r->coeffs[2*i + 1] = 4 - r->coeffs[2*i + 1];
    586     }
    587 }
    588 
    589 /* Pack z polynomial (gamma1 = 2^17: 18 bits, gamma1 = 2^19: 20 bits) */
    590 static void polyz_pack(uint8_t *r, const poly_dil *a, int32_t gamma1) {
    591     uint32_t t[4];
    592 
    593     if (gamma1 == (1 << 17)) {
    594         /* 18-bit packing */
    595         for (int i = 0; i < DILITHIUM_N / 4; ++i) {
    596             t[0] = gamma1 - a->coeffs[4*i + 0];
    597             t[1] = gamma1 - a->coeffs[4*i + 1];
    598             t[2] = gamma1 - a->coeffs[4*i + 2];
    599             t[3] = gamma1 - a->coeffs[4*i + 3];
    600 
    601             r[9*i + 0] =  t[0];
    602             r[9*i + 1] = (t[0] >>  8) | (t[1] << 2);
    603             r[9*i + 2] = (t[1] >>  6);
    604             r[9*i + 3] = (t[1] >> 14) | (t[2] << 4);
    605             r[9*i + 4] = (t[2] >>  4);
    606             r[9*i + 5] = (t[2] >> 12) | (t[3] << 6);
    607             r[9*i + 6] =  t[3] >>  2;
    608             r[9*i + 7] =  t[3] >> 10;
    609             r[9*i + 8] =  t[3] >> 18;
    610         }
    611     } else {
    612         /* 20-bit packing (gamma1 = 2^19) */
    613         for (int i = 0; i < DILITHIUM_N / 4; ++i) {
    614             t[0] = gamma1 - a->coeffs[4*i + 0];
    615             t[1] = gamma1 - a->coeffs[4*i + 1];
    616             t[2] = gamma1 - a->coeffs[4*i + 2];
    617             t[3] = gamma1 - a->coeffs[4*i + 3];
    618 
    619             r[5*i + 0] =  t[0];
    620             r[5*i + 1] = (t[0] >>  8);
    621             r[5*i + 2] = (t[0] >> 16) | (t[1] << 4);
    622             r[5*i + 3] = (t[1] >>  4);
    623             r[5*i + 4] = (t[1] >> 12) | (t[2] << 0);
    624 
    625             r[5*i + 5] =  (t[2] >>  8);
    626             r[5*i + 6] =  (t[2] >> 16) | (t[3] << 4);
    627             r[5*i + 7] =  (t[3] >>  4);
    628             r[5*i + 8] =  (t[3] >> 12);
    629             r[5*i + 9] =  (t[3] >> 20);
    630         }
    631     }
    632 }
    633 
    634 /* Unpack z polynomial */
    635 static void polyz_unpack(poly_dil *r, const uint8_t *a, int32_t gamma1) {
    636     if (gamma1 == (1 << 17)) {
    637         /* 18-bit unpacking */
    638         for (int i = 0; i < DILITHIUM_N / 4; ++i) {
    639             r->coeffs[4*i + 0]  =  a[9*i + 0];
    640             r->coeffs[4*i + 0] |= (uint32_t)a[9*i + 1] <<  8;
    641             r->coeffs[4*i + 0] |= (uint32_t)a[9*i + 2] << 16;
    642             r->coeffs[4*i + 0] &= 0x3FFFF;
    643 
    644             r->coeffs[4*i + 1]  =  a[9*i + 2] >> 2;
    645             r->coeffs[4*i + 1] |= (uint32_t)a[9*i + 3] <<  6;
    646             r->coeffs[4*i + 1] |= (uint32_t)a[9*i + 4] << 14;
    647             r->coeffs[4*i + 1] &= 0x3FFFF;
    648 
    649             r->coeffs[4*i + 2]  =  a[9*i + 4] >> 4;
    650             r->coeffs[4*i + 2] |= (uint32_t)a[9*i + 5] <<  4;
    651             r->coeffs[4*i + 2] |= (uint32_t)a[9*i + 6] << 12;
    652             r->coeffs[4*i + 2] &= 0x3FFFF;
    653 
    654             r->coeffs[4*i + 3]  =  a[9*i + 6] >> 6;
    655             r->coeffs[4*i + 3] |= (uint32_t)a[9*i + 7] <<  2;
    656             r->coeffs[4*i + 3] |= (uint32_t)a[9*i + 8] << 10;
    657             r->coeffs[4*i + 3] &= 0x3FFFF;
    658 
    659             r->coeffs[4*i + 0] = gamma1 - r->coeffs[4*i + 0];
    660             r->coeffs[4*i + 1] = gamma1 - r->coeffs[4*i + 1];
    661             r->coeffs[4*i + 2] = gamma1 - r->coeffs[4*i + 2];
    662             r->coeffs[4*i + 3] = gamma1 - r->coeffs[4*i + 3];
    663         }
    664     } else {
    665         /* 20-bit unpacking */
    666         for (int i = 0; i < DILITHIUM_N / 4; ++i) {
    667             r->coeffs[4*i + 0]  =  a[5*i + 0];
    668             r->coeffs[4*i + 0] |= (uint32_t)a[5*i + 1] <<  8;
    669             r->coeffs[4*i + 0] |= (uint32_t)a[5*i + 2] << 16;
    670             r->coeffs[4*i + 0] &= 0xFFFFF;
    671 
    672             r->coeffs[4*i + 1]  =  a[5*i + 2] >> 4;
    673             r->coeffs[4*i + 1] |= (uint32_t)a[5*i + 3] <<  4;
    674             r->coeffs[4*i + 1] |= (uint32_t)a[5*i + 4] << 12;
    675             r->coeffs[4*i + 1] &= 0xFFFFF;
    676 
    677             r->coeffs[4*i + 2]  =  a[5*i + 5];
    678             r->coeffs[4*i + 2] |= (uint32_t)a[5*i + 6] <<  8;
    679             r->coeffs[4*i + 2] |= (uint32_t)a[5*i + 7] << 16;
    680             r->coeffs[4*i + 2] &= 0xFFFFF;
    681 
    682             r->coeffs[4*i + 3]  =  a[5*i + 7] >> 4;
    683             r->coeffs[4*i + 3] |= (uint32_t)a[5*i + 8] <<  4;
    684             r->coeffs[4*i + 3] |= (uint32_t)a[5*i + 9] << 12;
    685             r->coeffs[4*i + 3] &= 0xFFFFF;
    686 
    687             r->coeffs[4*i + 0] = gamma1 - r->coeffs[4*i + 0];
    688             r->coeffs[4*i + 1] = gamma1 - r->coeffs[4*i + 1];
    689             r->coeffs[4*i + 2] = gamma1 - r->coeffs[4*i + 2];
    690             r->coeffs[4*i + 3] = gamma1 - r->coeffs[4*i + 3];
    691         }
    692     }
    693 }
    694 
    695 /* Pack w1 with gamma2 = (q-1)/88 (6 bits) or (q-1)/32 (4 bits) */
    696 static void polyw1_pack(uint8_t *r, const poly_dil *a, int32_t gamma2) {
    697     if (gamma2 == (DILITHIUM_Q - 1) / 88) {
    698         /* 6-bit packing */
    699         for (int i = 0; i < DILITHIUM_N / 4; ++i) {
    700             r[3*i + 0] =  a->coeffs[4*i + 0];
    701             r[3*i + 0] |= a->coeffs[4*i + 1] << 6;
    702             r[3*i + 1] =  a->coeffs[4*i + 1] >> 2;
    703             r[3*i + 1] |= a->coeffs[4*i + 2] << 4;
    704             r[3*i + 2] =  a->coeffs[4*i + 2] >> 4;
    705             r[3*i + 2] |= a->coeffs[4*i + 3] << 2;
    706         }
    707     } else {
    708         /* 4-bit packing */
    709         for (int i = 0; i < DILITHIUM_N / 2; ++i) {
    710             r[i] = a->coeffs[2*i + 0] | (a->coeffs[2*i + 1] << 4);
    711         }
    712     }
    713 }
    714 
    715 /* Pack hint polynomial (omega max number of 1s) */
    716 static void polyhint_pack(uint8_t *r, const poly_dil *a, unsigned int omega) {
    717     unsigned int i, j, k = 0;
    718 
    719     for (i = 0; i < DILITHIUM_N; ++i) {
    720         if (a->coeffs[i] != 0) {
    721             r[k++] = i;
    722             if (k >= omega) break;
    723         }
    724     }
    725 
    726     /* Pad with 255 */
    727     for (j = k; j < omega; ++j) {
    728         r[j] = 255;
    729     }
    730 }
    731 
    732 /* Unpack hint polynomial */
    733 static void polyhint_unpack(poly_dil *r, const uint8_t *a, unsigned int omega) {
    734     memset(r->coeffs, 0, sizeof(poly_dil));
    735 
    736     for (unsigned int i = 0; i < omega; ++i) {
    737         if (a[i] == 255) break;
    738         /* a[i] is uint8_t so max 255, always < DILITHIUM_N (256) */
    739         r->coeffs[a[i]] = 1;
    740     }
    741 }
    742 
    743 /* ============================================================================
    744  * Rounding and Decomposition
    745  * ========================================================================= */
    746 
    747 /* Power2Round: r1 = (r + 2^(D-1) - 1) / 2^D */
    748 static int32_t power2round_dil(int32_t *r0, int32_t r) {
    749     int32_t r1;
    750 
    751     r1 = (r + (1 << (DILITHIUM_D-1)) - 1) >> DILITHIUM_D;
    752     *r0 = r - (r1 << DILITHIUM_D);
    753     return r1;
    754 }
    755 
    756 /* Decompose: r = r1*2*gamma2 + r0 with -gamma2 < r0 <= gamma2 (FIPS 204) */
    757 static int32_t decompose_dil(int32_t *r0, int32_t r, int32_t gamma2) {
    758     int32_t r1;
    759 
    760     /* Reduce r to positive range [0, q) */
    761     r = caddq_dil(r);
    762 
    763     /* r1 = ⌊(r + γ₂) / (2γ₂)⌋ */
    764     r1 = (r + gamma2 - 1) / (2 * gamma2);
    765 
    766     /* r0 = r - r1*2*γ₂ */
    767     *r0 = r - r1 * 2 * gamma2;
    768 
    769     return r1;
    770 }
    771 
    772 /* HighBits: Extract high-order bits */
    773 static int32_t highbits_dil(int32_t r, int32_t gamma2) {
    774     int32_t r0;
    775     return decompose_dil(&r0, r, gamma2);
    776 }
    777 
    778 /* LowBits: Extract low-order bits */
    779 static int32_t lowbits_dil(int32_t r, int32_t gamma2) {
    780     int32_t r0;
    781     decompose_dil(&r0, r, gamma2);
    782     return r0;
    783 }
    784 
    785 /* MakeHint: h = 1 if high bits change when adding z to r (FIPS 204 Algorithm 32) */
    786 static unsigned int make_hint_dil(poly_dil *h, const poly_dil *z, const poly_dil *r, int32_t gamma2) {
    787     unsigned int s = 0;
    788 
    789     for (int i = 0; i < DILITHIUM_N; ++i) {
    790         int32_t r1 = highbits_dil(r->coeffs[i], gamma2);
    791         int32_t v1 = highbits_dil(r->coeffs[i] + z->coeffs[i], gamma2);
    792 
    793         h->coeffs[i] = (r1 != v1) ? 1 : 0;
    794         s += h->coeffs[i];
    795     }
    796 
    797     return s;
    798 }
    799 
    800 /* UseHint: Recover high bits using hint (FIPS 204 Algorithm 33) */
    801 static int32_t use_hint_dil(int32_t h, int32_t r, int32_t gamma2) {
    802     int32_t r0, r1;
    803 
    804     r1 = decompose_dil(&r0, r, gamma2);
    805 
    806     if (h == 1) {
    807         if (r0 > 0) {
    808             return (r1 + 1) % ((DILITHIUM_Q - 1) / (2 * gamma2) + 1);
    809         } else {
    810             return (r1 - 1 + ((DILITHIUM_Q - 1) / (2 * gamma2) + 1)) % ((DILITHIUM_Q - 1) / (2 * gamma2) + 1);
    811         }
    812     }
    813 
    814     return r1;
    815 }
    816 
    817 /* ============================================================================
    818  * Generic Dilithium Implementation
    819  * ========================================================================= */
    820 
    821 static int dilithium_keypair_internal(uint8_t *pk, uint8_t *sk, int k, int l, int eta) {
    822     uint8_t seedbuf[3 * DILITHIUM_SEEDBYTES];
    823     uint8_t *rho, *rhoprime, *key;
    824     uint16_t nonce = 0;
    825     poly_dil mat[64];  /* Max 8x8 matrix */
    826     poly_dil s1[8], s2[8], t[8], t1[8], t0[8];
    827 
    828     /* Generate randomness */
    829     random_bytes(seedbuf, DILITHIUM_SEEDBYTES);
    830     shake256(seedbuf, DILITHIUM_SEEDBYTES, seedbuf, 3 * DILITHIUM_SEEDBYTES);
    831 
    832     rho = seedbuf;
    833     rhoprime = rho + DILITHIUM_SEEDBYTES;
    834     key = rhoprime + DILITHIUM_CRHBYTES;
    835 
    836     /* Expand matrix A from rho */
    837     for (int i = 0; i < k; ++i) {
    838         for (int j = 0; j < l; ++j) {
    839             poly_uniform_dil(&mat[i*l + j], rho, (i << 8) + j);
    840         }
    841     }
    842 
    843     /* Sample secret vectors s1 and s2 */
    844     for (int i = 0; i < l; ++i) {
    845         poly_uniform_eta_dil(&s1[i], rhoprime, nonce++, eta);
    846     }
    847     for (int i = 0; i < k; ++i) {
    848         poly_uniform_eta_dil(&s2[i], rhoprime, nonce++, eta);
    849     }
    850 
    851     /* Matrix-vector multiplication: t = As1 + s2 */
    852     for (int i = 0; i < l; ++i) {
    853         ntt_dil(s1[i].coeffs);
    854     }
    855 
    856     for (int i = 0; i < k; ++i) {
    857         poly_pointwise_montgomery(&t[i], &mat[i*l], &s1[0]);
    858         for (int j = 1; j < l; ++j) {
    859             poly_dil temp;
    860             poly_pointwise_montgomery(&temp, &mat[i*l + j], &s1[j]);
    861             poly_add_dil(&t[i], &t[i], &temp);
    862         }
    863         invntt_tomont_dil(t[i].coeffs);
    864         poly_add_dil(&t[i], &t[i], &s2[i]);
    865         poly_reduce_dil(&t[i]);
    866     }
    867 
    868     /* Power2Round and pack */
    869     for (int i = 0; i < k; ++i) {
    870         for (int j = 0; j < DILITHIUM_N; ++j) {
    871             t1[i].coeffs[j] = power2round_dil(&t0[i].coeffs[j], t[i].coeffs[j]);
    872         }
    873     }
    874 
    875     /* Pack public key: rho || t1 (FIPS 204 Format) */
    876     memcpy(pk, rho, DILITHIUM_SEEDBYTES);
    877     uint8_t *pk_t1 = pk + DILITHIUM_SEEDBYTES;
    878     for (int i = 0; i < k; ++i) {
    879         polyt1_pack(pk_t1 + i * 320, &t1[i]);  /* 10 bits * 256 / 8 = 320 bytes */
    880     }
    881 
    882     /* Compute tr = H(pk) */
    883     uint8_t tr[DILITHIUM_SEEDBYTES];
    884     sha3_256(pk, DILITHIUM_SEEDBYTES + k * 320, tr);
    885 
    886     /* Pack secret key: rho || key || tr || s1 || s2 || t0 (FIPS 204 Format) */
    887     uint8_t *sk_ptr = sk;
    888     memcpy(sk_ptr, rho, DILITHIUM_SEEDBYTES);
    889     sk_ptr += DILITHIUM_SEEDBYTES;
    890     memcpy(sk_ptr, key, DILITHIUM_SEEDBYTES);
    891     sk_ptr += DILITHIUM_SEEDBYTES;
    892     memcpy(sk_ptr, tr, DILITHIUM_SEEDBYTES);
    893     sk_ptr += DILITHIUM_SEEDBYTES;
    894 
    895     /* Pack s1 (eta bits per coefficient) */
    896     for (int i = 0; i < l; ++i) {
    897         if (eta == 2) {
    898             polyeta2_pack(sk_ptr, &s1[i]);
    899             sk_ptr += 96;  /* 3 bits * 256 / 8 = 96 bytes */
    900         } else {
    901             polyeta4_pack(sk_ptr, &s1[i]);
    902             sk_ptr += 128;  /* 4 bits * 256 / 8 = 128 bytes */
    903         }
    904     }
    905 
    906     /* Pack s2 */
    907     for (int i = 0; i < k; ++i) {
    908         if (eta == 2) {
    909             polyeta2_pack(sk_ptr, &s2[i]);
    910             sk_ptr += 96;
    911         } else {
    912             polyeta4_pack(sk_ptr, &s2[i]);
    913             sk_ptr += 128;
    914         }
    915     }
    916 
    917     /* Pack t0 (13 bits per coefficient) */
    918     for (int i = 0; i < k; ++i) {
    919         polyt0_pack(sk_ptr, &t0[i]);
    920         sk_ptr += 416;  /* 13 bits * 256 / 8 = 416 bytes */
    921     }
    922 
    923     /* Secure cleanup */
    924     secure_zero(seedbuf, sizeof(seedbuf));
    925     secure_zero(s1, sizeof(s1));
    926     secure_zero(s2, sizeof(s2));
    927     secure_zero(t0, sizeof(t0));
    928 
    929     return 0;
    930 }
    931 
    932 /* ============================================================================
    933  * Public API
    934  * ========================================================================= */
    935 
    936 int dilithium2_keypair(uint8_t *public_key, uint8_t *secret_key) {
    937     if (!public_key || !secret_key) return -1;
    938     return dilithium_keypair_internal(public_key, secret_key,
    939                                        DILITHIUM2_K, DILITHIUM2_L, DILITHIUM2_ETA);
    940 }
    941 
    942 int dilithium3_keypair(uint8_t *public_key, uint8_t *secret_key) {
    943     if (!public_key || !secret_key) return -1;
    944     return dilithium_keypair_internal(public_key, secret_key,
    945                                        DILITHIUM3_K, DILITHIUM3_L, DILITHIUM3_ETA);
    946 }
    947 
    948 int dilithium5_keypair(uint8_t *public_key, uint8_t *secret_key) {
    949     if (!public_key || !secret_key) return -1;
    950     return dilithium_keypair_internal(public_key, secret_key,
    951                                        DILITHIUM5_K, DILITHIUM5_L, DILITHIUM5_ETA);
    952 }
    953 
    954 /* Full FIPS 204 Signing with Rejection Sampling (Algorithm 2) */
    955 static int dilithium_sign_internal(uint8_t *signature, size_t *signature_len,
    956                                      const uint8_t *message, size_t message_len,
    957                                      const uint8_t *secret_key,
    958                                      int k, int l, int eta, int tau, int beta, int gamma1, int gamma2, unsigned int omega) {
    959     uint8_t mu[DILITHIUM_CRHBYTES];
    960     uint8_t rhoprime[DILITHIUM_CRHBYTES];
    961     uint8_t rho[DILITHIUM_SEEDBYTES];
    962     uint8_t key[DILITHIUM_SEEDBYTES];
    963     uint8_t tr[DILITHIUM_SEEDBYTES];
    964     unsigned int rej_count = 0;
    965     const unsigned int MAX_REJECTIONS = 1000;  /* Safety limit */
    966 
    967     poly_dil mat[64];  /* Max 8x8 */
    968     poly_dil s1[8], s2[8], t0[8];  /* Secret key components */
    969     poly_dil y[8], z[8], w[8], w1[8], w0[8];
    970     poly_dil cp, cs2[8], ct0[8];
    971     poly_dil h[8];
    972 
    973     /* Extract secret key components: rho || key || tr || s1 || s2 || t0 */
    974     const uint8_t *sk_ptr = secret_key;
    975     memcpy(rho, sk_ptr, DILITHIUM_SEEDBYTES);
    976     sk_ptr += DILITHIUM_SEEDBYTES;
    977     memcpy(key, sk_ptr, DILITHIUM_SEEDBYTES);
    978     sk_ptr += DILITHIUM_SEEDBYTES;
    979     memcpy(tr, sk_ptr, DILITHIUM_SEEDBYTES);
    980     sk_ptr += DILITHIUM_SEEDBYTES;
    981 
    982     /* Unpack s1 */
    983     for (int i = 0; i < l; ++i) {
    984         if (eta == 2) {
    985             polyeta2_unpack(&s1[i], sk_ptr);
    986             sk_ptr += 96;
    987         } else {
    988             polyeta4_unpack(&s1[i], sk_ptr);
    989             sk_ptr += 128;
    990         }
    991     }
    992 
    993     /* Unpack s2 */
    994     for (int i = 0; i < k; ++i) {
    995         if (eta == 2) {
    996             polyeta2_unpack(&s2[i], sk_ptr);
    997             sk_ptr += 96;
    998         } else {
    999             polyeta4_unpack(&s2[i], sk_ptr);
   1000             sk_ptr += 128;
   1001         }
   1002     }
   1003 
   1004     /* Unpack t0 */
   1005     for (int i = 0; i < k; ++i) {
   1006         polyt0_unpack(&t0[i], sk_ptr);
   1007         sk_ptr += 416;
   1008     }
   1009 
   1010     /* Compute mu = CRH(tr || M) (FIPS 204 step 1) */
   1011     uint8_t *tohash = (uint8_t *)malloc(DILITHIUM_SEEDBYTES + message_len);
   1012     memcpy(tohash, tr, DILITHIUM_SEEDBYTES);
   1013     memcpy(tohash + DILITHIUM_SEEDBYTES, message, message_len);
   1014     shake256(tohash, DILITHIUM_SEEDBYTES + message_len, mu, DILITHIUM_CRHBYTES);
   1015     free(tohash);
   1016 
   1017     /* Generate rhoprime for mask generation */
   1018     uint8_t keybuf[DILITHIUM_SEEDBYTES + DILITHIUM_CRHBYTES];
   1019     memcpy(keybuf, key, DILITHIUM_SEEDBYTES);
   1020     memcpy(keybuf + DILITHIUM_SEEDBYTES, mu, DILITHIUM_CRHBYTES);
   1021     shake256(keybuf, DILITHIUM_SEEDBYTES + DILITHIUM_CRHBYTES, rhoprime, DILITHIUM_CRHBYTES);
   1022 
   1023     /* Expand matrix A from rho */
   1024     for (int i = 0; i < k; ++i) {
   1025         for (int j = 0; j < l; ++j) {
   1026             poly_uniform_dil(&mat[i*l + j], rho, (i << 8) + j);
   1027         }
   1028     }
   1029 
   1030     /* Convert s1 to NTT domain for polynomial multiplication */
   1031     for (int i = 0; i < l; ++i) {
   1032         ntt_dil(s1[i].coeffs);
   1033     }
   1034 
   1035     /* Convert s2 and t0 to NTT domain as well */
   1036     for (int i = 0; i < k; ++i) {
   1037         ntt_dil(s2[i].coeffs);
   1038         ntt_dil(t0[i].coeffs);
   1039     }
   1040 
   1041     /* REJECTION SAMPLING LOOP (FIPS 204 Algorithm 2 steps 5-16) */
   1042     uint16_t kappa = 0;
   1043     while (rej_count < MAX_REJECTIONS) {
   1044         rej_count++;
   1045 
   1046         /* Step 6: y ← ExpandMask(rhoprime, κ) */
   1047         for (int i = 0; i < l; ++i) {
   1048             poly_uniform_gamma1_dil(&y[i], rhoprime, kappa + i, gamma1);
   1049         }
   1050         kappa += l;
   1051 
   1052         /* Step 7: w ← NTT^{-1}(Â ◦ NTT(y)) */
   1053         for (int i = 0; i < l; ++i) {
   1054             ntt_dil(y[i].coeffs);
   1055         }
   1056 
   1057         for (int i = 0; i < k; ++i) {
   1058             poly_pointwise_montgomery(&w[i], &mat[i*l], &y[0]);
   1059             for (int j = 1; j < l; ++j) {
   1060                 poly_dil temp;
   1061                 poly_pointwise_montgomery(&temp, &mat[i*l + j], &y[j]);
   1062                 poly_add_dil(&w[i], &w[i], &temp);
   1063             }
   1064             invntt_tomont_dil(w[i].coeffs);
   1065             poly_reduce_dil(&w[i]);
   1066         }
   1067 
   1068         /* Step 8: w1 ← HighBits(w, 2γ₂) */
   1069         for (int i = 0; i < k; ++i) {
   1070             for (int j = 0; j < DILITHIUM_N; ++j) {
   1071                 w1[i].coeffs[j] = highbits_dil(w[i].coeffs[j], gamma2);
   1072             }
   1073         }
   1074 
   1075         /* Step 9: c̃ ← H(μ || w1Encode(w1)) (hash to challenge) - FIPS 204 */
   1076         uint8_t chash[DILITHIUM_SEEDBYTES];
   1077         /* Pack w1 and hash μ || w1 */
   1078         int w1_bytes = (gamma2 == (DILITHIUM_Q - 1) / 88) ? 192 : 128;  /* per poly */
   1079         uint8_t *w1_packed = (uint8_t *)malloc(k * w1_bytes + DILITHIUM_CRHBYTES);
   1080         memcpy(w1_packed, mu, DILITHIUM_CRHBYTES);
   1081         for (int i = 0; i < k; ++i) {
   1082             polyw1_pack(w1_packed + DILITHIUM_CRHBYTES + i * w1_bytes, &w1[i], gamma2);
   1083         }
   1084         shake256(w1_packed, DILITHIUM_CRHBYTES + k * w1_bytes, chash, DILITHIUM_SEEDBYTES);
   1085         free(w1_packed);
   1086 
   1087         /* Step 10: c ← SampleInBall(c̃) */
   1088         poly_challenge_dil(&cp, chash, tau);
   1089 
   1090         /* Step 11: z ← y + c·s1 */
   1091         ntt_dil(cp.coeffs);
   1092         for (int i = 0; i < l; ++i) {
   1093             poly_dil cs1;
   1094             poly_pointwise_montgomery(&cs1, &cp, &s1[i]);
   1095             invntt_tomont_dil(cs1.coeffs);
   1096             invntt_tomont_dil(y[i].coeffs);  /* Back to normal domain */
   1097             poly_add_dil(&z[i], &y[i], &cs1);
   1098             poly_reduce_dil(&z[i]);
   1099         }
   1100 
   1101         /* Step 12: Check ||z||∞ < γ₁ - β */
   1102         int norm_ok = 1;
   1103         for (int i = 0; i < l; ++i) {
   1104             if (poly_chknorm_dil(&z[i], gamma1 - beta)) {
   1105                 norm_ok = 0;
   1106                 break;
   1107             }
   1108         }
   1109         if (!norm_ok) continue;  /* Reject, try again */
   1110 
   1111         /* Step 13: r0 ← LowBits(w - c·s2, 2γ₂) */
   1112         for (int i = 0; i < k; ++i) {
   1113             poly_pointwise_montgomery(&cs2[i], &cp, &s2[i]);
   1114             invntt_tomont_dil(cs2[i].coeffs);
   1115             poly_sub_dil(&cs2[i], &w[i], &cs2[i]);
   1116             poly_reduce_dil(&cs2[i]);
   1117 
   1118             for (int j = 0; j < DILITHIUM_N; ++j) {
   1119                 w0[i].coeffs[j] = lowbits_dil(cs2[i].coeffs[j], gamma2);
   1120             }
   1121         }
   1122 
   1123         /* Step 14: Check ||r0||∞ < γ₂ - β */
   1124         for (int i = 0; i < k; ++i) {
   1125             if (poly_chknorm_dil(&w0[i], gamma2 - beta)) {
   1126                 norm_ok = 0;
   1127                 break;
   1128             }
   1129         }
   1130         if (!norm_ok) continue;  /* Reject, try again */
   1131 
   1132         /* Step 15: h ← MakeHint(-c·t0, w - c·s2 + c·t0, 2γ₂) */
   1133         unsigned int hint_count = 0;
   1134         for (int i = 0; i < k; ++i) {
   1135             /* For now, simplified hint generation */
   1136             poly_pointwise_montgomery(&ct0[i], &cp, &t0[i]);
   1137             invntt_tomont_dil(ct0[i].coeffs);
   1138 
   1139             /* Negate ct0 */
   1140             poly_dil neg_ct0;
   1141             for (int j = 0; j < DILITHIUM_N; ++j) {
   1142                 neg_ct0.coeffs[j] = -ct0[i].coeffs[j];
   1143             }
   1144 
   1145             hint_count += make_hint_dil(&h[i], &neg_ct0, &cs2[i], gamma2);
   1146         }
   1147 
   1148         /* Step 16: Check hint count ≤ ω */
   1149         if (hint_count > omega) continue;  /* Reject, try again */
   1150 
   1151         /* SUCCESS! Pack signature σ = (c̃, z, h) - FIPS 204 Format */
   1152         uint8_t *sig_ptr = signature;
   1153 
   1154         /* Pack c̃ (32 bytes) */
   1155         memcpy(sig_ptr, chash, DILITHIUM_SEEDBYTES);
   1156         sig_ptr += DILITHIUM_SEEDBYTES;
   1157 
   1158         /* Pack z (gamma1-dependent size) */
   1159         int z_bytes_per_poly = (gamma1 == (1 << 17)) ? 576 : 640;
   1160         for (int i = 0; i < l; ++i) {
   1161             polyz_pack(sig_ptr, &z[i], gamma1);
   1162             sig_ptr += z_bytes_per_poly;
   1163         }
   1164 
   1165         /* Pack h (omega bytes per polynomial) */
   1166         for (int i = 0; i < k; ++i) {
   1167             polyhint_pack(sig_ptr, &h[i], omega);
   1168             sig_ptr += omega;
   1169         }
   1170 
   1171         *signature_len = sig_ptr - signature;
   1172 
   1173         /* Cleanup */
   1174         secure_zero(mu, sizeof(mu));
   1175         secure_zero(rhoprime, sizeof(rhoprime));
   1176         secure_zero(y, sizeof(y));
   1177         secure_zero(z, sizeof(z));
   1178         secure_zero(&cp, sizeof(cp));
   1179 
   1180         return 0;  /* Success */
   1181     }
   1182 
   1183     /* Too many rejections - should never happen with correct implementation */
   1184     return -1;
   1185 }
   1186 
   1187 int dilithium2_sign(uint8_t *signature, size_t *signature_len,
   1188                      const uint8_t *message, size_t message_len,
   1189                      const uint8_t *secret_key) {
   1190     if (!signature || !signature_len || !message || !secret_key) return -1;
   1191 
   1192     return dilithium_sign_internal(signature, signature_len, message, message_len, secret_key,
   1193                                      DILITHIUM2_K, DILITHIUM2_L, DILITHIUM2_ETA,
   1194                                      DILITHIUM2_TAU, DILITHIUM2_BETA, DILITHIUM2_GAMMA1,
   1195                                      DILITHIUM2_GAMMA2, DILITHIUM2_OMEGA);
   1196 }
   1197 
   1198 /* Full FIPS 204 Verification (Algorithm 3) */
   1199 static int dilithium_verify_internal(const uint8_t *signature, size_t signature_len,
   1200                                        const uint8_t *message, size_t message_len,
   1201                                        const uint8_t *public_key,
   1202                                        int k, int l, int tau, int beta, int gamma1, int gamma2, unsigned int omega) {
   1203     uint8_t mu[DILITHIUM_CRHBYTES];
   1204     uint8_t rho[DILITHIUM_SEEDBYTES];
   1205     uint8_t tr[DILITHIUM_SEEDBYTES];
   1206     uint8_t c_tilde[DILITHIUM_SEEDBYTES];
   1207 
   1208     poly_dil mat[64];  /* Max 8x8 */
   1209     poly_dil t1[8], z[8], h[8];
   1210     poly_dil cp, w1_prime[8], Az[8], ct1[8];
   1211 
   1212     /* Validate signature length: sig = c_tilde || z || h */
   1213     /* z: l polynomials, each (gamma1 == 2^17) ? 576 : 640 bytes */
   1214     /* h: k * omega bytes for hint encoding */
   1215     int z_bytes_per_poly = (gamma1 == (1 << 17)) ? 576 : 640;
   1216     size_t expected_sig_len = DILITHIUM_SEEDBYTES + (size_t)l * (size_t)z_bytes_per_poly + (size_t)k * omega;
   1217     if (signature_len != expected_sig_len) {
   1218         return -1;  /* Invalid signature length */
   1219     }
   1220 
   1221     /* Step 1: Unpack signature σ = (c̃, z, h) - FIPS 204 Format */
   1222     const uint8_t *sig_ptr = signature;
   1223     memcpy(c_tilde, sig_ptr, DILITHIUM_SEEDBYTES);
   1224     sig_ptr += DILITHIUM_SEEDBYTES;
   1225 
   1226     /* Unpack z */
   1227     for (int i = 0; i < l; ++i) {
   1228         polyz_unpack(&z[i], sig_ptr, gamma1);
   1229         sig_ptr += z_bytes_per_poly;
   1230     }
   1231 
   1232     /* Unpack h */
   1233     for (int i = 0; i < k; ++i) {
   1234         polyhint_unpack(&h[i], sig_ptr, omega);
   1235         sig_ptr += omega;
   1236     }
   1237 
   1238     /* Step 2: Unpack public key pk = (ρ, t1) - FIPS 204 Format */
   1239     memcpy(rho, public_key, DILITHIUM_SEEDBYTES);
   1240     const uint8_t *pk_t1 = public_key + DILITHIUM_SEEDBYTES;
   1241     for (int i = 0; i < k; ++i) {
   1242         polyt1_unpack(&t1[i], pk_t1 + i * 320);
   1243     }
   1244 
   1245     /* Step 3: Compute tr = H(pk) */
   1246     sha3_256(public_key, DILITHIUM_SEEDBYTES + k * 320, tr);
   1247 
   1248     /* Step 4: Compute μ = CRH(tr || M) */
   1249     uint8_t *tohash = (uint8_t *)malloc(DILITHIUM_SEEDBYTES + message_len);
   1250     memcpy(tohash, tr, DILITHIUM_SEEDBYTES);
   1251     memcpy(tohash + DILITHIUM_SEEDBYTES, message, message_len);
   1252     shake256(tohash, DILITHIUM_SEEDBYTES + message_len, mu, DILITHIUM_CRHBYTES);
   1253     free(tohash);
   1254 
   1255     /* Step 5: c ← SampleInBall(c̃) */
   1256     poly_challenge_dil(&cp, c_tilde, tau);
   1257 
   1258     /* Step 6: Expand matrix A from ρ */
   1259     for (int i = 0; i < k; ++i) {
   1260         for (int j = 0; j < l; ++j) {
   1261             poly_uniform_dil(&mat[i*l + j], rho, (i << 8) + j);
   1262         }
   1263     }
   1264 
   1265     /* Step 7: Check ||z||∞ < γ₁ - β */
   1266     for (int i = 0; i < l; ++i) {
   1267         if (poly_chknorm_dil(&z[i], gamma1 - beta)) {
   1268             secure_zero(mu, sizeof(mu));
   1269             return -1;  /* Invalid signature */
   1270         }
   1271     }
   1272 
   1273     /* Step 8: Compute w'₁ = UseHint(h, Az - ct1·2^d, 2γ₂) */
   1274 
   1275     /* 8a: Compute Az */
   1276     for (int i = 0; i < l; ++i) {
   1277         ntt_dil(z[i].coeffs);
   1278     }
   1279     for (int i = 0; i < k; ++i) {
   1280         poly_pointwise_montgomery(&Az[i], &mat[i*l], &z[0]);
   1281         for (int j = 1; j < l; ++j) {
   1282             poly_dil temp;
   1283             poly_pointwise_montgomery(&temp, &mat[i*l + j], &z[j]);
   1284             poly_add_dil(&Az[i], &Az[i], &temp);
   1285         }
   1286         invntt_tomont_dil(Az[i].coeffs);
   1287         poly_reduce_dil(&Az[i]);
   1288     }
   1289 
   1290     /* 8b: Compute ct1·2^d (shift left by d bits) */
   1291     ntt_dil(cp.coeffs);
   1292     for (int i = 0; i < k; ++i) {
   1293         poly_pointwise_montgomery(&ct1[i], &cp, &t1[i]);
   1294         invntt_tomont_dil(ct1[i].coeffs);
   1295 
   1296         for (int j = 0; j < DILITHIUM_N; ++j) {
   1297             ct1[i].coeffs[j] <<= DILITHIUM_D;
   1298         }
   1299     }
   1300 
   1301     /* 8c: Compute w' = Az - ct1·2^d */
   1302     poly_dil w_prime[8];
   1303     for (int i = 0; i < k; ++i) {
   1304         poly_sub_dil(&w_prime[i], &Az[i], &ct1[i]);
   1305         poly_reduce_dil(&w_prime[i]);
   1306     }
   1307 
   1308     /* 8d: Apply hints to get w'₁ */
   1309     for (int i = 0; i < k; ++i) {
   1310         for (int j = 0; j < DILITHIUM_N; ++j) {
   1311             w1_prime[i].coeffs[j] = use_hint_dil(h[i].coeffs[j], w_prime[i].coeffs[j], gamma2);
   1312         }
   1313     }
   1314 
   1315     /* Step 9: Count hints */
   1316     unsigned int hint_count = 0;
   1317     for (int i = 0; i < k; ++i) {
   1318         for (int j = 0; j < DILITHIUM_N; ++j) {
   1319             hint_count += h[i].coeffs[j];
   1320         }
   1321     }
   1322 
   1323     /* Step 10: Check hint count ≤ ω */
   1324     if (hint_count > omega) {
   1325         secure_zero(mu, sizeof(mu));
   1326         return -1;  /* Invalid signature */
   1327     }
   1328 
   1329     /* Step 11: Verify c̃ = H(μ || w1Encode(w'₁)) - FIPS 204 */
   1330     uint8_t c_tilde_computed[DILITHIUM_SEEDBYTES];
   1331     /* Pack w1_prime and hash μ || w1_prime */
   1332     int w1_bytes = (gamma2 == (DILITHIUM_Q - 1) / 88) ? 192 : 128;  /* per poly */
   1333     uint8_t *w1_packed = (uint8_t *)malloc(k * w1_bytes + DILITHIUM_CRHBYTES);
   1334     memcpy(w1_packed, mu, DILITHIUM_CRHBYTES);
   1335     for (int i = 0; i < k; ++i) {
   1336         polyw1_pack(w1_packed + DILITHIUM_CRHBYTES + i * w1_bytes, &w1_prime[i], gamma2);
   1337     }
   1338     shake256(w1_packed, DILITHIUM_CRHBYTES + k * w1_bytes, c_tilde_computed, DILITHIUM_SEEDBYTES);
   1339     free(w1_packed);
   1340 
   1341     /* Constant-time comparison */
   1342     int valid = ct_eq(c_tilde, c_tilde_computed, DILITHIUM_SEEDBYTES);
   1343 
   1344     /* Cleanup */
   1345     secure_zero(mu, sizeof(mu));
   1346     secure_zero(c_tilde_computed, sizeof(c_tilde_computed));
   1347 
   1348     return valid ? 0 : -1;
   1349 }
   1350 
   1351 int dilithium2_verify(const uint8_t *signature, size_t signature_len,
   1352                        const uint8_t *message, size_t message_len,
   1353                        const uint8_t *public_key) {
   1354     if (!signature || !message || !public_key) return -1;
   1355     if (signature_len != DILITHIUM2_SIGNATURE_BYTES) return -1;
   1356 
   1357     return dilithium_verify_internal(signature, signature_len, message, message_len, public_key,
   1358                                        DILITHIUM2_K, DILITHIUM2_L, DILITHIUM2_TAU,
   1359                                        DILITHIUM2_BETA, DILITHIUM2_GAMMA1, DILITHIUM2_GAMMA2,
   1360                                        DILITHIUM2_OMEGA);
   1361 }
   1362 
   1363 /* Dilithium3 implementations */
   1364 int dilithium3_sign(uint8_t *signature, size_t *signature_len,
   1365                      const uint8_t *message, size_t message_len,
   1366                      const uint8_t *secret_key) {
   1367     if (!signature || !signature_len || !message || !secret_key) return -1;
   1368 
   1369     return dilithium_sign_internal(signature, signature_len, message, message_len, secret_key,
   1370                                      DILITHIUM3_K, DILITHIUM3_L, DILITHIUM3_ETA,
   1371                                      DILITHIUM3_TAU, DILITHIUM3_BETA, DILITHIUM3_GAMMA1,
   1372                                      DILITHIUM3_GAMMA2, DILITHIUM3_OMEGA);
   1373 }
   1374 
   1375 int dilithium3_verify(const uint8_t *signature, size_t signature_len,
   1376                        const uint8_t *message, size_t message_len,
   1377                        const uint8_t *public_key) {
   1378     if (!signature || !message || !public_key) return -1;
   1379     if (signature_len != DILITHIUM3_SIGNATURE_BYTES) return -1;
   1380 
   1381     return dilithium_verify_internal(signature, signature_len, message, message_len, public_key,
   1382                                        DILITHIUM3_K, DILITHIUM3_L, DILITHIUM3_TAU,
   1383                                        DILITHIUM3_BETA, DILITHIUM3_GAMMA1, DILITHIUM3_GAMMA2,
   1384                                        DILITHIUM3_OMEGA);
   1385 }
   1386 
   1387 /* Dilithium5 implementations */
   1388 int dilithium5_sign(uint8_t *signature, size_t *signature_len,
   1389                      const uint8_t *message, size_t message_len,
   1390                      const uint8_t *secret_key) {
   1391     if (!signature || !signature_len || !message || !secret_key) return -1;
   1392 
   1393     return dilithium_sign_internal(signature, signature_len, message, message_len, secret_key,
   1394                                      DILITHIUM5_K, DILITHIUM5_L, DILITHIUM5_ETA,
   1395                                      DILITHIUM5_TAU, DILITHIUM5_BETA, DILITHIUM5_GAMMA1,
   1396                                      DILITHIUM5_GAMMA2, DILITHIUM5_OMEGA);
   1397 }
   1398 
   1399 int dilithium5_verify(const uint8_t *signature, size_t signature_len,
   1400                        const uint8_t *message, size_t message_len,
   1401                        const uint8_t *public_key) {
   1402     if (!signature || !message || !public_key) return -1;
   1403     if (signature_len != DILITHIUM5_SIGNATURE_BYTES) return -1;
   1404 
   1405     return dilithium_verify_internal(signature, signature_len, message, message_len, public_key,
   1406                                        DILITHIUM5_K, DILITHIUM5_L, DILITHIUM5_TAU,
   1407                                        DILITHIUM5_BETA, DILITHIUM5_GAMMA1, DILITHIUM5_GAMMA2,
   1408                                        DILITHIUM5_OMEGA);
   1409 }