Ed25519.c (24140B)
1 /* 2 * Ed25519 Implementation 3 * RFC 8032 - Edwards-curve Digital Signature Algorithm (EdDSA) 4 * 5 * Based on Curve25519: y^2 = x^3 + 486662x^2 + x (mod 2^255-19) 6 * Using twisted Edwards curve: -x^2 + y^2 = 1 + dx^2y^2 7 */ 8 9 #include "Ed25519.h" 10 #include "hashing/SHA512.c" /* We'll need SHA-512 */ 11 #include <string.h> 12 13 /* Field element: value mod 2^255-19 represented as 10 x 32-bit limbs */ 14 typedef int32_t fe[10]; 15 16 /* Point on Edwards curve */ 17 typedef struct { 18 fe X, Y, Z, T; /* Extended coordinates */ 19 } ge_p3; 20 21 typedef struct { 22 fe X, Y, Z; /* Projective coordinates */ 23 } ge_p2; 24 25 typedef struct { 26 fe YplusX, YminusX, Z, T2d; /* Precomputed for doubling */ 27 } ge_precomp; 28 29 typedef struct { 30 fe YplusX, YminusX, Z; /* Cached */ 31 } ge_cached; 32 33 /* Constants */ 34 static const fe fe_zero = {0}; 35 static const fe fe_one = {1}; 36 static const fe fe_d = { 37 -10913610, 13857413, -15372611, 6949391, 114729, 38 -8787816, -6275908, -3247719, -18696448, -12055116 39 }; 40 41 static const fe fe_sqrtm1 = { 42 -32595792, -7943725, 9377950, 3500415, 12389472, 43 -272473, -25146209, -2005654, 326686, 11406482 44 }; 45 46 /* Load 32-bit little-endian */ 47 static uint32_t load_3(const uint8_t *in) { 48 return (uint32_t)in[0] | ((uint32_t)in[1] << 8) | ((uint32_t)in[2] << 16); 49 } 50 51 static uint32_t load_4(const uint8_t *in) { 52 return (uint32_t)in[0] | ((uint32_t)in[1] << 8) | 53 ((uint32_t)in[2] << 16) | ((uint32_t)in[3] << 24); 54 } 55 56 static uint64_t load_4_64(const uint8_t *in) { 57 return (uint64_t)load_4(in); 58 } 59 60 /* Field element operations */ 61 62 static void fe_copy(fe h, const fe f) { 63 memcpy(h, f, sizeof(fe)); 64 } 65 66 static void fe_0(fe h) { 67 memset(h, 0, sizeof(fe)); 68 } 69 70 static void fe_1(fe h) { 71 h[0] = 1; 72 h[1] = 0; h[2] = 0; h[3] = 0; h[4] = 0; 73 h[5] = 0; h[6] = 0; h[7] = 0; h[8] = 0; h[9] = 0; 74 } 75 76 static void fe_add(fe h, const fe f, const fe g) { 77 for (int i = 0; i < 10; i++) { 78 h[i] = f[i] + g[i]; 79 } 80 } 81 82 static void fe_sub(fe h, const fe f, const fe g) { 83 for (int i = 0; i < 10; i++) { 84 h[i] = f[i] - g[i]; 85 } 86 } 87 88 static void fe_neg(fe h, const fe f) { 89 for (int i = 0; i < 10; i++) { 90 h[i] = -f[i]; 91 } 92 } 93 94 static void fe_mul(fe h, const fe f, const fe g) { 95 int32_t f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4]; 96 int32_t f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9]; 97 int32_t g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4]; 98 int32_t g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9]; 99 100 int64_t h0 = (int64_t)f0 * g0 + (int64_t)f1 * (2*g9) + (int64_t)f2 * (2*g8) + 101 (int64_t)f3 * (2*g7) + (int64_t)f4 * (2*g6) + (int64_t)f5 * (2*g5) + 102 (int64_t)f6 * (2*g4) + (int64_t)f7 * (2*g3) + (int64_t)f8 * (2*g2) + (int64_t)f9 * (2*g1); 103 int64_t h1 = (int64_t)f0 * g1 + (int64_t)f1 * g0 + (int64_t)f2 * (2*g9) + 104 (int64_t)f3 * (2*g8) + (int64_t)f4 * (2*g7) + (int64_t)f5 * (2*g6) + 105 (int64_t)f6 * (2*g5) + (int64_t)f7 * (2*g4) + (int64_t)f8 * (2*g3) + (int64_t)f9 * (2*g2); 106 int64_t h2 = (int64_t)f0 * g2 + (int64_t)f1 * g1 + (int64_t)f2 * g0 + 107 (int64_t)f3 * (2*g9) + (int64_t)f4 * (2*g8) + (int64_t)f5 * (2*g7) + 108 (int64_t)f6 * (2*g6) + (int64_t)f7 * (2*g5) + (int64_t)f8 * (2*g4) + (int64_t)f9 * (2*g3); 109 int64_t h3 = (int64_t)f0 * g3 + (int64_t)f1 * g2 + (int64_t)f2 * g1 + (int64_t)f3 * g0 + 110 (int64_t)f4 * (2*g9) + (int64_t)f5 * (2*g8) + (int64_t)f6 * (2*g7) + 111 (int64_t)f7 * (2*g6) + (int64_t)f8 * (2*g5) + (int64_t)f9 * (2*g4); 112 int64_t h4 = (int64_t)f0 * g4 + (int64_t)f1 * g3 + (int64_t)f2 * g2 + (int64_t)f3 * g1 + (int64_t)f4 * g0 + 113 (int64_t)f5 * (2*g9) + (int64_t)f6 * (2*g8) + (int64_t)f7 * (2*g7) + 114 (int64_t)f8 * (2*g6) + (int64_t)f9 * (2*g5); 115 int64_t h5 = (int64_t)f0 * g5 + (int64_t)f1 * g4 + (int64_t)f2 * g3 + (int64_t)f3 * g2 + 116 (int64_t)f4 * g1 + (int64_t)f5 * g0 + (int64_t)f6 * (2*g9) + 117 (int64_t)f7 * (2*g8) + (int64_t)f8 * (2*g7) + (int64_t)f9 * (2*g6); 118 int64_t h6 = (int64_t)f0 * g6 + (int64_t)f1 * g5 + (int64_t)f2 * g4 + (int64_t)f3 * g3 + 119 (int64_t)f4 * g2 + (int64_t)f5 * g1 + (int64_t)f6 * g0 + 120 (int64_t)f7 * (2*g9) + (int64_t)f8 * (2*g8) + (int64_t)f9 * (2*g7); 121 int64_t h7 = (int64_t)f0 * g7 + (int64_t)f1 * g6 + (int64_t)f2 * g5 + (int64_t)f3 * g4 + 122 (int64_t)f4 * g3 + (int64_t)f5 * g2 + (int64_t)f6 * g1 + (int64_t)f7 * g0 + 123 (int64_t)f8 * (2*g9) + (int64_t)f9 * (2*g8); 124 int64_t h8 = (int64_t)f0 * g8 + (int64_t)f1 * g7 + (int64_t)f2 * g6 + (int64_t)f3 * g5 + 125 (int64_t)f4 * g4 + (int64_t)f5 * g3 + (int64_t)f6 * g2 + (int64_t)f7 * g1 + 126 (int64_t)f8 * g0 + (int64_t)f9 * (2*g9); 127 int64_t h9 = (int64_t)f0 * g9 + (int64_t)f1 * g8 + (int64_t)f2 * g7 + (int64_t)f3 * g6 + 128 (int64_t)f4 * g5 + (int64_t)f5 * g4 + (int64_t)f6 * g3 + (int64_t)f7 * g2 + 129 (int64_t)f8 * g1 + (int64_t)f9 * g0; 130 131 /* Reduce mod 2^255-19 */ 132 int64_t carry; 133 carry = (h0 + (1<<25)) >> 26; h1 += carry; h0 -= carry << 26; 134 carry = (h4 + (1<<25)) >> 26; h5 += carry; h4 -= carry << 26; 135 carry = (h1 + (1<<24)) >> 25; h2 += carry; h1 -= carry << 25; 136 carry = (h5 + (1<<24)) >> 25; h6 += carry; h5 -= carry << 25; 137 carry = (h2 + (1<<25)) >> 26; h3 += carry; h2 -= carry << 26; 138 carry = (h6 + (1<<25)) >> 26; h7 += carry; h6 -= carry << 26; 139 carry = (h3 + (1<<24)) >> 25; h4 += carry; h3 -= carry << 25; 140 carry = (h7 + (1<<24)) >> 25; h8 += carry; h7 -= carry << 25; 141 carry = (h4 + (1<<25)) >> 26; h5 += carry; h4 -= carry << 26; 142 carry = (h8 + (1<<25)) >> 26; h9 += carry; h8 -= carry << 26; 143 carry = (h9 + (1<<24)) >> 25; h0 += carry * 19; h9 -= carry << 25; 144 carry = (h0 + (1<<25)) >> 26; h1 += carry; h0 -= carry << 26; 145 146 h[0] = (int32_t)h0; h[1] = (int32_t)h1; h[2] = (int32_t)h2; h[3] = (int32_t)h3; h[4] = (int32_t)h4; 147 h[5] = (int32_t)h5; h[6] = (int32_t)h6; h[7] = (int32_t)h7; h[8] = (int32_t)h8; h[9] = (int32_t)h9; 148 } 149 150 static void fe_sq(fe h, const fe f) { 151 fe_mul(h, f, f); 152 } 153 154 static void fe_sq2(fe h, const fe f) { 155 fe_sq(h, f); 156 fe_add(h, h, h); 157 } 158 159 static void fe_invert(fe out, const fe z) { 160 fe t0, t1, t2, t3; 161 fe_sq(t0, z); 162 fe_sq(t1, t0); 163 fe_sq(t1, t1); 164 fe_mul(t1, z, t1); 165 fe_mul(t0, t0, t1); 166 fe_sq(t2, t0); 167 fe_mul(t1, t1, t2); 168 fe_sq(t2, t1); 169 for (int i = 1; i < 5; i++) fe_sq(t2, t2); 170 fe_mul(t1, t2, t1); 171 fe_sq(t2, t1); 172 for (int i = 1; i < 10; i++) fe_sq(t2, t2); 173 fe_mul(t2, t2, t1); 174 fe_sq(t3, t2); 175 for (int i = 1; i < 20; i++) fe_sq(t3, t3); 176 fe_mul(t2, t3, t2); 177 fe_sq(t2, t2); 178 for (int i = 1; i < 10; i++) fe_sq(t2, t2); 179 fe_mul(t1, t2, t1); 180 fe_sq(t2, t1); 181 for (int i = 1; i < 50; i++) fe_sq(t2, t2); 182 fe_mul(t2, t2, t1); 183 fe_sq(t3, t2); 184 for (int i = 1; i < 100; i++) fe_sq(t3, t3); 185 fe_mul(t2, t3, t2); 186 fe_sq(t2, t2); 187 for (int i = 1; i < 50; i++) fe_sq(t2, t2); 188 fe_mul(t1, t2, t1); 189 fe_sq(t1, t1); 190 for (int i = 1; i < 5; i++) fe_sq(t1, t1); 191 fe_mul(out, t1, t0); 192 } 193 194 static void fe_tobytes(uint8_t *s, const fe h) { 195 int32_t h0 = h[0], h1 = h[1], h2 = h[2], h3 = h[3], h4 = h[4]; 196 int32_t h5 = h[5], h6 = h[6], h7 = h[7], h8 = h[8], h9 = h[9]; 197 int32_t q = (19 * h9 + (1 << 24)) >> 25; 198 q = (h0 + q) >> 26; q = (h1 + q) >> 25; q = (h2 + q) >> 26; 199 q = (h3 + q) >> 25; q = (h4 + q) >> 26; q = (h5 + q) >> 25; 200 q = (h6 + q) >> 26; q = (h7 + q) >> 25; q = (h8 + q) >> 26; 201 q = (h9 + q) >> 25; 202 h0 += 19 * q; 203 int32_t carry = h0 >> 26; h1 += carry; h0 -= carry << 26; 204 carry = h1 >> 25; h2 += carry; h1 -= carry << 25; 205 carry = h2 >> 26; h3 += carry; h2 -= carry << 26; 206 carry = h3 >> 25; h4 += carry; h3 -= carry << 25; 207 carry = h4 >> 26; h5 += carry; h4 -= carry << 26; 208 carry = h5 >> 25; h6 += carry; h5 -= carry << 25; 209 carry = h6 >> 26; h7 += carry; h6 -= carry << 26; 210 carry = h7 >> 25; h8 += carry; h7 -= carry << 25; 211 carry = h8 >> 26; h9 += carry; h8 -= carry << 26; 212 s[0] = h0; s[1] = h0 >> 8; s[2] = h0 >> 16; s[3] = (h0 >> 24) | (h1 << 2); 213 s[4] = h1 >> 6; s[5] = h1 >> 14; s[6] = (h1 >> 22) | (h2 << 3); 214 s[7] = h2 >> 5; s[8] = h2 >> 13; s[9] = (h2 >> 21) | (h3 << 5); 215 s[10] = h3 >> 3; s[11] = h3 >> 11; s[12] = (h3 >> 19) | (h4 << 6); 216 s[13] = h4 >> 2; s[14] = h4 >> 10; s[15] = h4 >> 18; 217 s[16] = h5; s[17] = h5 >> 8; s[18] = h5 >> 16; s[19] = (h5 >> 24) | (h6 << 1); 218 s[20] = h6 >> 7; s[21] = h6 >> 15; s[22] = (h6 >> 23) | (h7 << 3); 219 s[23] = h7 >> 5; s[24] = h7 >> 13; s[25] = (h7 >> 21) | (h8 << 4); 220 s[26] = h8 >> 4; s[27] = h8 >> 12; s[28] = (h8 >> 20) | (h9 << 6); 221 s[29] = h9 >> 2; s[30] = h9 >> 10; s[31] = h9 >> 18; 222 } 223 224 static void fe_frombytes(fe h, const uint8_t *s) { 225 int64_t h0 = load_4_64(s); 226 int64_t h1 = load_3(s + 4) << 6; 227 int64_t h2 = load_3(s + 7) << 5; 228 int64_t h3 = load_3(s + 10) << 3; 229 int64_t h4 = load_3(s + 13) << 2; 230 int64_t h5 = load_4_64(s + 16); 231 int64_t h6 = load_3(s + 20) << 7; 232 int64_t h7 = load_3(s + 23) << 5; 233 int64_t h8 = load_3(s + 26) << 4; 234 int64_t h9 = (load_3(s + 29) & 8388607) << 2; 235 int64_t carry; 236 carry = (h9 + (1<<24)) >> 25; h0 += carry * 19; h9 -= carry << 25; 237 carry = (h1 + (1<<24)) >> 25; h2 += carry; h1 -= carry << 25; 238 carry = (h3 + (1<<24)) >> 25; h4 += carry; h3 -= carry << 25; 239 carry = (h5 + (1<<24)) >> 25; h6 += carry; h5 -= carry << 25; 240 carry = (h7 + (1<<24)) >> 25; h8 += carry; h7 -= carry << 25; 241 carry = (h0 + (1<<25)) >> 26; h1 += carry; h0 -= carry << 26; 242 carry = (h2 + (1<<25)) >> 26; h3 += carry; h2 -= carry << 26; 243 carry = (h4 + (1<<25)) >> 26; h5 += carry; h4 -= carry << 26; 244 carry = (h6 + (1<<25)) >> 26; h7 += carry; h6 -= carry << 26; 245 carry = (h8 + (1<<25)) >> 26; h9 += carry; h8 -= carry << 26; 246 h[0] = (int32_t)h0; h[1] = (int32_t)h1; h[2] = (int32_t)h2; h[3] = (int32_t)h3; h[4] = (int32_t)h4; 247 h[5] = (int32_t)h5; h[6] = (int32_t)h6; h[7] = (int32_t)h7; h[8] = (int32_t)h8; h[9] = (int32_t)h9; 248 } 249 250 /* Check if field element is negative */ 251 static int fe_isnegative(const fe f) { 252 uint8_t s[32]; 253 fe_tobytes(s, f); 254 return s[0] & 1; 255 } 256 257 /* Check if field element is nonzero */ 258 static int fe_isnonzero(const fe f) { 259 uint8_t s[32]; 260 fe_tobytes(s, f); 261 uint8_t result = 0; 262 for (int i = 0; i < 32; i++) { 263 result |= s[i]; 264 } 265 return result != 0; 266 } 267 268 /* Conditional move: if b == 1, set f = g; if b == 0, do nothing */ 269 static void fe_cmov(fe f, const fe g, int b) { 270 int32_t mask = -b; /* All 1s if b=1, all 0s if b=0 */ 271 for (int i = 0; i < 10; i++) { 272 f[i] ^= mask & (f[i] ^ g[i]); 273 } 274 } 275 276 /* Compute sqrt(u/v) = u * v^3 * (u * v^7)^((p-5)/8) */ 277 static int fe_sqrt_ratio_m1(fe r, const fe u, const fe v) { 278 fe v3, vxx, check, x, vx2; 279 280 fe_sq(v3, v); 281 fe_mul(v3, v3, v); /* v3 = v^3 */ 282 fe_sq(x, v3); 283 fe_mul(x, x, v); 284 fe_mul(x, x, u); /* x = uv^7 */ 285 286 /* Compute x^((p-5)/8) using addition chain */ 287 fe t0, t1, t2; 288 fe_sq(t0, x); 289 fe_sq(t1, t0); 290 fe_sq(t1, t1); 291 fe_mul(t1, x, t1); 292 fe_mul(t0, t0, t1); 293 fe_sq(t2, t0); 294 fe_mul(t1, t1, t2); 295 fe_sq(t2, t1); 296 for (int i = 1; i < 5; i++) fe_sq(t2, t2); 297 fe_mul(t1, t2, t1); 298 fe_sq(t2, t1); 299 for (int i = 1; i < 10; i++) fe_sq(t2, t2); 300 fe_mul(t2, t2, t1); 301 fe_sq(r, t2); 302 for (int i = 1; i < 20; i++) fe_sq(r, r); 303 fe_mul(r, r, t2); 304 fe_sq(r, r); 305 for (int i = 1; i < 10; i++) fe_sq(r, r); 306 fe_mul(r, r, t1); 307 fe_sq(t2, r); 308 for (int i = 1; i < 50; i++) fe_sq(t2, t2); 309 fe_mul(t2, t2, r); 310 fe_sq(r, t2); 311 for (int i = 1; i < 100; i++) fe_sq(r, r); 312 fe_mul(r, r, t2); 313 fe_sq(r, r); 314 for (int i = 1; i < 50; i++) fe_sq(r, r); 315 fe_mul(r, r, t1); 316 fe_sq(r, r); 317 fe_sq(r, r); 318 fe_mul(r, r, x); 319 320 /* r = u * v^3 * (uv^7)^((p-5)/8) */ 321 fe_mul(vx2, r, r); 322 fe_mul(vx2, vx2, v); 323 fe_sub(check, vx2, u); 324 if (fe_isnonzero(check)) { 325 fe_add(check, vx2, u); 326 if (fe_isnonzero(check)) { 327 return -1; /* Not a square */ 328 } 329 fe_mul(r, r, fe_sqrtm1); 330 } 331 332 if (fe_isnegative(r)) { 333 fe_neg(r, r); 334 } 335 return 0; 336 } 337 338 /* Group operations */ 339 static void ge_p3_0(ge_p3 *h) { 340 fe_0(h->X); 341 fe_1(h->Y); 342 fe_1(h->Z); 343 fe_0(h->T); 344 } 345 346 static void ge_p3_tobytes(uint8_t *s, const ge_p3 *h) { 347 fe recip, x, y; 348 fe_invert(recip, h->Z); 349 fe_mul(x, h->X, recip); 350 fe_mul(y, h->Y, recip); 351 fe_tobytes(s, y); 352 s[31] ^= (fe_isnegative(x) << 7); 353 } 354 355 /* Decode point from bytes (decompress) */ 356 static int ge_frombytes_negate_vartime(ge_p3 *h, const uint8_t *s) { 357 fe u, v, v3, vxx, check; 358 359 fe_frombytes(h->Y, s); 360 fe_1(h->Z); 361 fe_sq(u, h->Y); /* u = y^2 */ 362 fe_mul(v, u, fe_d); /* v = dy^2 */ 363 fe_sub(u, u, h->Z); /* u = y^2-1 */ 364 fe_add(v, v, h->Z); /* v = dy^2+1 */ 365 366 /* Compute x = sqrt(u/v) */ 367 if (fe_sqrt_ratio_m1(h->X, u, v) != 0) { 368 return -1; /* Not on curve */ 369 } 370 371 if (fe_isnegative(h->X) != ((s[31] >> 7) & 1)) { 372 fe_neg(h->X, h->X); 373 } 374 375 fe_mul(h->T, h->X, h->Y); 376 return 0; 377 } 378 379 /* Point addition: r = p + q */ 380 static void ge_add(ge_p3 *r, const ge_p3 *p, const ge_cached *q) { 381 fe t0, A, B, C, D, E, F, G, H; 382 383 fe_sub(A, p->Y, p->X); 384 fe_add(B, p->Y, p->X); 385 fe_mul(A, A, q->YminusX); 386 fe_mul(B, B, q->YplusX); 387 fe_mul(C, q->Z, p->T); 388 fe_add(C, C, C); 389 fe_sub(D, B, A); 390 fe_add(E, B, A); 391 fe_add(F, p->Z, p->Z); 392 fe_sub(F, F, C); 393 fe_add(G, F, D); 394 fe_sub(H, F, D); 395 396 fe_mul(r->X, G, H); 397 fe_mul(r->Y, E, F); 398 fe_mul(r->Z, F, G); 399 fe_mul(r->T, E, H); 400 } 401 402 /* Point doubling */ 403 static void ge_p3_dbl(ge_p2 *r, const ge_p3 *p) { 404 fe A, B, C, D, E, G, F, H; 405 406 fe_sq(A, p->X); 407 fe_sq(B, p->Y); 408 fe_sq2(C, p->Z); 409 fe_add(D, A, B); 410 fe_add(E, p->X, p->Y); 411 fe_sq(E, E); 412 fe_sub(E, E, D); 413 fe_sub(G, B, A); 414 fe_sub(F, C, G); 415 fe_sub(H, B, A); 416 417 fe_mul(r->X, E, F); 418 fe_mul(r->Y, G, H); 419 fe_mul(r->Z, F, G); 420 } 421 422 /* Convert p2 to p3 */ 423 static void ge_p2_to_p3(ge_p3 *r, const ge_p2 *p) { 424 fe_copy(r->X, p->X); 425 fe_copy(r->Y, p->Y); 426 fe_copy(r->Z, p->Z); 427 fe_mul(r->T, p->X, p->Y); 428 } 429 430 /* Convert p3 to cached */ 431 static void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) { 432 fe_add(r->YplusX, p->Y, p->X); 433 fe_sub(r->YminusX, p->Y, p->X); 434 fe_copy(r->Z, p->Z); 435 } 436 437 /* Ed25519 base point (generator) */ 438 static const fe ed25519_base_x = { 439 0x1ad5258f, 0x1ceec73, 0x2c7f5e6, 0x35a6e8b, 0x21c2cbd, 440 0x3406648, 0x3e7a32d, 0x1e6e4bd, 0x15fb5d5, 0x2484f2a 441 }; 442 443 static const fe ed25519_base_y = { 444 0x2666666, 0x1999999, 0x0, 0x1333333, 0x1999999, 445 0x0, 0x1333333, 0x1999999, 0x0, 0x1333333 446 }; 447 448 /* Scalar multiplication with base point using double-and-add */ 449 static void ge_scalarmult_base(ge_p3 *h, const uint8_t *a) { 450 ge_p3 result, temp; 451 ge_p2 doubled; 452 ge_cached cached_base; 453 454 /* Start with identity (neutral element) */ 455 ge_p3_0(&result); 456 457 /* Initialize base point */ 458 ge_p3 base; 459 fe_copy(base.X, ed25519_base_x); 460 fe_copy(base.Y, ed25519_base_y); 461 fe_1(base.Z); 462 fe_mul(base.T, base.X, base.Y); 463 464 /* Double-and-add algorithm */ 465 for (int i = 0; i < 256; i++) { 466 /* Get bit i of scalar */ 467 int bit = (a[i / 8] >> (i % 8)) & 1; 468 469 if (bit) { 470 /* result = result + base */ 471 ge_p3_to_cached(&cached_base, &base); 472 ge_add(&temp, &result, &cached_base); 473 fe_copy(result.X, temp.X); 474 fe_copy(result.Y, temp.Y); 475 fe_copy(result.Z, temp.Z); 476 fe_copy(result.T, temp.T); 477 } 478 479 /* base = base + base (double) */ 480 if (i < 255) { 481 ge_p3_to_cached(&cached_base, &base); 482 ge_add(&temp, &base, &cached_base); 483 fe_copy(base.X, temp.X); 484 fe_copy(base.Y, temp.Y); 485 fe_copy(base.Z, temp.Z); 486 fe_copy(base.T, temp.T); 487 } 488 } 489 490 fe_copy(h->X, result.X); 491 fe_copy(h->Y, result.Y); 492 fe_copy(h->Z, result.Z); 493 fe_copy(h->T, result.T); 494 } 495 496 /* Scalar multiplication: r = a * P (variable point) */ 497 static void ge_scalarmult(ge_p3 *r, const uint8_t *a, const ge_p3 *P) { 498 ge_p3 result, temp; 499 ge_cached cached_point; 500 ge_p3 point; 501 502 /* Start with identity */ 503 ge_p3_0(&result); 504 505 /* Copy input point */ 506 fe_copy(point.X, P->X); 507 fe_copy(point.Y, P->Y); 508 fe_copy(point.Z, P->Z); 509 fe_copy(point.T, P->T); 510 511 /* Double-and-add */ 512 for (int i = 0; i < 256; i++) { 513 int bit = (a[i / 8] >> (i % 8)) & 1; 514 515 if (bit) { 516 ge_p3_to_cached(&cached_point, &point); 517 ge_add(&temp, &result, &cached_point); 518 fe_copy(result.X, temp.X); 519 fe_copy(result.Y, temp.Y); 520 fe_copy(result.Z, temp.Z); 521 fe_copy(result.T, temp.T); 522 } 523 524 if (i < 255) { 525 ge_p3_to_cached(&cached_point, &point); 526 ge_add(&temp, &point, &cached_point); 527 fe_copy(point.X, temp.X); 528 fe_copy(point.Y, temp.Y); 529 fe_copy(point.Z, temp.Z); 530 fe_copy(point.T, temp.T); 531 } 532 } 533 534 fe_copy(r->X, result.X); 535 fe_copy(r->Y, result.Y); 536 fe_copy(r->Z, result.Z); 537 fe_copy(r->T, result.T); 538 } 539 540 /* Scalar operations (mod L where L is the group order) */ 541 static const uint8_t L[32] = { 542 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 543 0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14, 544 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 545 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 546 }; 547 548 /* Reduce scalar mod L */ 549 static void sc_reduce(uint8_t *s) { 550 int64_t carry; 551 int64_t s0 = 0x1FFFFF & load_3(s); 552 int64_t s1 = 0x1FFFFF & (load_4(s + 2) >> 5); 553 int64_t s2 = 0x1FFFFF & (load_3(s + 5) >> 2); 554 int64_t s3 = 0x1FFFFF & (load_4(s + 7) >> 7); 555 int64_t s4 = 0x1FFFFF & (load_4(s + 10) >> 4); 556 int64_t s5 = 0x1FFFFF & (load_3(s + 13) >> 1); 557 int64_t s6 = 0x1FFFFF & (load_4(s + 15) >> 6); 558 int64_t s7 = 0x1FFFFF & (load_3(s + 18) >> 3); 559 int64_t s8 = 0x1FFFFF & load_3(s + 21); 560 int64_t s9 = 0x1FFFFF & (load_4(s + 23) >> 5); 561 int64_t s10 = 0x1FFFFF & (load_3(s + 26) >> 2); 562 int64_t s11 = (load_4(s + 28) >> 7); 563 564 /* Repeated reduction steps (simplified) */ 565 for (int i = 0; i < 3; i++) { 566 carry = (s0 + (1<<20)) >> 21; s1 += carry; s0 -= carry << 21; 567 carry = (s2 + (1<<20)) >> 21; s3 += carry; s2 -= carry << 21; 568 carry = (s4 + (1<<20)) >> 21; s5 += carry; s4 -= carry << 21; 569 carry = (s6 + (1<<20)) >> 21; s7 += carry; s6 -= carry << 21; 570 carry = (s8 + (1<<20)) >> 21; s9 += carry; s8 -= carry << 21; 571 carry = (s10 + (1<<20)) >> 21; s11 += carry; s10 -= carry << 21; 572 573 carry = (s1 + (1<<20)) >> 21; s2 += carry; s1 -= carry << 21; 574 carry = (s3 + (1<<20)) >> 21; s4 += carry; s3 -= carry << 21; 575 carry = (s5 + (1<<20)) >> 21; s6 += carry; s5 -= carry << 21; 576 carry = (s7 + (1<<20)) >> 21; s8 += carry; s7 -= carry << 21; 577 carry = (s9 + (1<<20)) >> 21; s10 += carry; s9 -= carry << 21; 578 carry = (s11 + (1<<20)) >> 21; s0 += carry * 666643; s11 -= carry << 21; 579 } 580 581 s[0] = s0 >> 0; s[1] = s0 >> 8; s[2] = (s0 >> 16) | (s1 << 5); 582 s[3] = s1 >> 3; s[4] = s1 >> 11; s[5] = (s1 >> 19) | (s2 << 2); 583 s[6] = s2 >> 6; s[7] = (s2 >> 14) | (s3 << 7); 584 s[8] = s3 >> 1; s[9] = s3 >> 9; s[10] = (s3 >> 17) | (s4 << 4); 585 s[11] = s4 >> 4; s[12] = s4 >> 12; s[13] = (s4 >> 20) | (s5 << 1); 586 s[14] = s5 >> 7; s[15] = (s5 >> 15) | (s6 << 6); 587 s[16] = s6 >> 2; s[17] = s6 >> 10; s[18] = (s6 >> 18) | (s7 << 3); 588 s[19] = s7 >> 5; s[20] = s7 >> 13; s[21] = s8 >> 0; 589 s[22] = s8 >> 8; s[23] = (s8 >> 16) | (s9 << 5); 590 s[24] = s9 >> 3; s[25] = s9 >> 11; s[26] = (s9 >> 19) | (s10 << 2); 591 s[27] = s10 >> 6; s[28] = (s10 >> 14) | (s11 << 7); 592 s[29] = s11 >> 1; s[30] = s11 >> 9; s[31] = s11 >> 17; 593 } 594 595 /* Ed25519 public API implementations */ 596 597 void ed25519_create_keypair(uint8_t public_key[32], uint8_t secret_key[64], 598 const uint8_t seed[32]) { 599 uint8_t hash[64]; 600 ge_p3 A; 601 602 /* Hash the seed to get secret scalar */ 603 sha512(seed, 32, hash); 604 605 /* Clamp the secret scalar */ 606 hash[0] &= 248; 607 hash[31] &= 127; 608 hash[31] |= 64; 609 610 /* Compute public key A = a*B where B is the base point */ 611 ge_scalarmult_base(&A, hash); 612 ge_p3_tobytes(public_key, &A); 613 614 /* Store seed and public key in secret key */ 615 memcpy(secret_key, seed, 32); 616 memcpy(secret_key + 32, public_key, 32); 617 618 /* Zero sensitive data */ 619 memset(hash, 0, 64); 620 } 621 622 void ed25519_sign(uint8_t signature[64], 623 const uint8_t *message, size_t message_len, 624 const uint8_t secret_key[64]) { 625 uint8_t hash[64], hram[64], r[64]; 626 ge_p3 R; 627 628 /* Hash the secret key */ 629 sha512(secret_key, 32, hash); 630 hash[0] &= 248; 631 hash[31] &= 127; 632 hash[31] |= 64; 633 634 /* Compute r = SHA-512(hash[32..64] || message) */ 635 uint8_t *r_input = malloc(32 + message_len); 636 memcpy(r_input, hash + 32, 32); 637 memcpy(r_input + 32, message, message_len); 638 sha512(r_input, 32 + message_len, r); 639 free(r_input); 640 sc_reduce(r); 641 642 /* Compute R = r * B */ 643 ge_scalarmult_base(&R, r); 644 ge_p3_tobytes(signature, &R); 645 646 /* Compute hram = SHA-512(R || A || message) where A is public key */ 647 uint8_t *hram_input = malloc(64 + message_len); 648 memcpy(hram_input, signature, 32); /* R */ 649 memcpy(hram_input + 32, secret_key + 32, 32); /* A (public key) */ 650 memcpy(hram_input + 64, message, message_len); 651 sha512(hram_input, 64 + message_len, hram); 652 free(hram_input); 653 sc_reduce(hram); 654 655 /* Compute S = (r + hram * a) mod L */ 656 uint8_t S[32]; 657 memset(S, 0, 32); 658 for (int i = 0; i < 32; i++) { 659 int64_t carry = 0; 660 for (int j = 0; j <= i; j++) { 661 carry += (int64_t)r[j] + (int64_t)hram[i - j] * (int64_t)hash[j]; 662 } 663 S[i] = carry & 0xFF; 664 } 665 sc_reduce(S); 666 memcpy(signature + 32, S, 32); 667 668 /* Zero sensitive data */ 669 memset(hash, 0, 64); 670 memset(r, 0, 64); 671 memset(hram, 0, 64); 672 memset(S, 0, 32); 673 } 674 675 int ed25519_verify(const uint8_t signature[64], 676 const uint8_t *message, size_t message_len, 677 const uint8_t public_key[32]) { 678 ge_p3 A, R; 679 uint8_t hram[64]; 680 const uint8_t *sig_R = signature; 681 const uint8_t *sig_S = signature + 32; 682 683 /* Check S < L */ 684 for (int i = 31; i >= 0; i--) { 685 if (sig_S[i] < L[i]) break; 686 if (sig_S[i] > L[i]) return -1; 687 } 688 689 /* Decode public key A */ 690 if (ge_frombytes_negate_vartime(&A, public_key) != 0) { 691 return -1; /* Invalid public key */ 692 } 693 694 /* Decode R from signature */ 695 if (ge_frombytes_negate_vartime(&R, sig_R) != 0) { 696 return -1; /* Invalid R */ 697 } 698 699 /* Compute hram = SHA-512(R || A || message) */ 700 uint8_t *hram_input = malloc(64 + message_len); 701 memcpy(hram_input, sig_R, 32); 702 memcpy(hram_input + 32, public_key, 32); 703 memcpy(hram_input + 64, message, message_len); 704 sha512(hram_input, 64 + message_len, hram); 705 free(hram_input); 706 sc_reduce(hram); 707 708 /* Verify: [S]B == R + [hram]A */ 709 /* Compute lhs = [S]B */ 710 ge_p3 lhs; 711 ge_scalarmult_base(&lhs, sig_S); 712 713 /* Compute rhs = R + [hram]A */ 714 ge_p3 hram_A; 715 ge_scalarmult(&hram_A, hram, &A); 716 717 ge_cached cached; 718 ge_p3_to_cached(&cached, &hram_A); 719 ge_p3 rhs; 720 ge_add(&rhs, &R, &cached); 721 722 /* Compare lhs and rhs */ 723 uint8_t lhs_bytes[32], rhs_bytes[32]; 724 ge_p3_tobytes(lhs_bytes, &lhs); 725 ge_p3_tobytes(rhs_bytes, &rhs); 726 727 int result = 0; 728 for (int i = 0; i < 32; i++) { 729 result |= (lhs_bytes[i] ^ rhs_bytes[i]); 730 } 731 732 memset(hram, 0, 64); 733 return (result == 0) ? 0 : -1; 734 }