Curve25519.c (13452B)
1 /* 2 * Curve25519 Implementation 3 * Full field and scalar arithmetic for Ed25519 and X25519 4 * 5 * Curve equation: y^2 = x^3 + 486662x^2 + x (mod 2^255-19) 6 * Edwards form: -x^2 + y^2 = 1 - (121665/121666)x^2y^2 7 */ 8 9 #include <stdint.h> 10 #include <string.h> 11 12 /* Field element: integers mod 2^255-19 */ 13 typedef int64_t fe25519[10]; 14 15 /* Constants */ 16 static const fe25519 fe25519_zero = {0}; 17 static const fe25519 fe25519_one = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 18 19 /* Reduce coefficients */ 20 static void fe25519_reduce(fe25519 h) { 21 int64_t carry; 22 23 carry = (h[0] + (1LL << 25)) >> 26; h[1] += carry; h[0] -= carry * (1LL << 26); 24 carry = (h[4] + (1LL << 25)) >> 26; h[5] += carry; h[4] -= carry * (1LL << 26); 25 carry = (h[1] + (1LL << 24)) >> 25; h[2] += carry; h[1] -= carry * (1LL << 25); 26 carry = (h[5] + (1LL << 24)) >> 25; h[6] += carry; h[5] -= carry * (1LL << 25); 27 carry = (h[2] + (1LL << 25)) >> 26; h[3] += carry; h[2] -= carry * (1LL << 26); 28 carry = (h[6] + (1LL << 25)) >> 26; h[7] += carry; h[6] -= carry * (1LL << 26); 29 carry = (h[3] + (1LL << 24)) >> 25; h[4] += carry; h[3] -= carry * (1LL << 25); 30 carry = (h[7] + (1LL << 24)) >> 25; h[8] += carry; h[7] -= carry * (1LL << 25); 31 carry = (h[4] + (1LL << 25)) >> 26; h[5] += carry; h[4] -= carry * (1LL << 26); 32 carry = (h[8] + (1LL << 25)) >> 26; h[9] += carry; h[8] -= carry * (1LL << 26); 33 carry = (h[9] + (1LL << 24)) >> 25; h[0] += carry * 19; h[9] -= carry * (1LL << 25); 34 carry = (h[0] + (1LL << 25)) >> 26; h[1] += carry; h[0] -= carry * (1LL << 26); 35 } 36 37 /* Copy field element */ 38 static void fe25519_copy(fe25519 h, const fe25519 f) { 39 for (int i = 0; i < 10; i++) h[i] = f[i]; 40 } 41 42 /* Zero */ 43 static void fe25519_0(fe25519 h) { 44 for (int i = 0; i < 10; i++) h[i] = 0; 45 } 46 47 /* One */ 48 static void fe25519_1(fe25519 h) { 49 h[0] = 1; 50 for (int i = 1; i < 10; i++) h[i] = 0; 51 } 52 53 /* Addition */ 54 static void fe25519_add(fe25519 h, const fe25519 f, const fe25519 g) { 55 for (int i = 0; i < 10; i++) h[i] = f[i] + g[i]; 56 } 57 58 /* Subtraction */ 59 static void fe25519_sub(fe25519 h, const fe25519 f, const fe25519 g) { 60 for (int i = 0; i < 10; i++) h[i] = f[i] - g[i]; 61 } 62 63 /* Negation */ 64 static void fe25519_neg(fe25519 h, const fe25519 f) { 65 for (int i = 0; i < 10; i++) h[i] = -f[i]; 66 } 67 68 /* Multiplication */ 69 static void fe25519_mul(fe25519 h, const fe25519 f, const fe25519 g) { 70 int64_t f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4]; 71 int64_t f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9]; 72 int64_t g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4]; 73 int64_t g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9]; 74 75 int64_t g1_19 = 19 * g1, g2_19 = 19 * g2, g3_19 = 19 * g3; 76 int64_t g4_19 = 19 * g4, g5_19 = 19 * g5, g6_19 = 19 * g6; 77 int64_t g7_19 = 19 * g7, g8_19 = 19 * g8, g9_19 = 19 * g9; 78 79 int64_t f1_2 = 2 * f1, f3_2 = 2 * f3, f5_2 = 2 * f5; 80 int64_t f7_2 = 2 * f7, f9_2 = 2 * f9; 81 82 int64_t h0 = f0*g0 + f1_2*g9_19 + f2*g8_19 + f3_2*g7_19 + f4*g6_19 + f5_2*g5_19 + f6*g4_19 + f7_2*g3_19 + f8*g2_19 + f9_2*g1_19; 83 int64_t h1 = f0*g1 + f1*g0 + f2*g9_19 + f3*g8_19 + f4*g7_19 + f5*g6_19 + f6*g5_19 + f7*g4_19 + f8*g3_19 + f9*g2_19; 84 int64_t h2 = f0*g2 + f1_2*g1 + f2*g0 + f3_2*g9_19 + f4*g8_19 + f5_2*g7_19 + f6*g6_19 + f7_2*g5_19 + f8*g4_19 + f9_2*g3_19; 85 int64_t h3 = f0*g3 + f1*g2 + f2*g1 + f3*g0 + f4*g9_19 + f5*g8_19 + f6*g7_19 + f7*g6_19 + f8*g5_19 + f9*g4_19; 86 int64_t h4 = f0*g4 + f1_2*g3 + f2*g2 + f3_2*g1 + f4*g0 + f5_2*g9_19 + f6*g8_19 + f7_2*g7_19 + f8*g6_19 + f9_2*g5_19; 87 int64_t h5 = f0*g5 + f1*g4 + f2*g3 + f3*g2 + f4*g1 + f5*g0 + f6*g9_19 + f7*g8_19 + f8*g7_19 + f9*g6_19; 88 int64_t h6 = f0*g6 + f1_2*g5 + f2*g4 + f3_2*g3 + f4*g2 + f5_2*g1 + f6*g0 + f7_2*g9_19 + f8*g8_19 + f9_2*g7_19; 89 int64_t h7 = f0*g7 + f1*g6 + f2*g5 + f3*g4 + f4*g3 + f5*g2 + f6*g1 + f7*g0 + f8*g9_19 + f9*g8_19; 90 int64_t h8 = f0*g8 + f1_2*g7 + f2*g6 + f3_2*g5 + f4*g4 + f5_2*g3 + f6*g2 + f7_2*g1 + f8*g0 + f9_2*g9_19; 91 int64_t h9 = f0*g9 + f1*g8 + f2*g7 + f3*g6 + f4*g5 + f5*g4 + f6*g3 + f7*g2 + f8*g1 + f9*g0; 92 93 h[0] = h0; h[1] = h1; h[2] = h2; h[3] = h3; h[4] = h4; 94 h[5] = h5; h[6] = h6; h[7] = h7; h[8] = h8; h[9] = h9; 95 96 fe25519_reduce(h); 97 } 98 99 /* Squaring */ 100 static void fe25519_sq(fe25519 h, const fe25519 f) { 101 int64_t f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4]; 102 int64_t f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9]; 103 104 int64_t f0_2 = 2 * f0, f1_2 = 2 * f1, f2_2 = 2 * f2, f3_2 = 2 * f3; 105 int64_t f4_2 = 2 * f4, f5_2 = 2 * f5, f6_2 = 2 * f6, f7_2 = 2 * f7; 106 int64_t f5_38 = 38 * f5, f6_19 = 19 * f6, f7_38 = 38 * f7; 107 int64_t f8_19 = 19 * f8, f9_38 = 38 * f9; 108 109 int64_t h0 = f0*f0 + f1_2*f9_38 + f2_2*f8_19 + f3_2*f7_38 + f4_2*f6_19 + f5*f5_38; 110 int64_t h1 = f0_2*f1 + f2*f9_38 + f3_2*f8_19 + f4*f7_38 + f5_2*f6_19; 111 int64_t h2 = f0_2*f2 + f1_2*f1 + f3_2*f9_38 + f4_2*f8_19 + f5_2*f7_38 + f6*f6_19; 112 int64_t h3 = f0_2*f3 + f1_2*f2 + f4*f9_38 + f5_2*f8_19 + f6*f7_38; 113 int64_t h4 = f0_2*f4 + f1_2*f3_2 + f2*f2 + f5_2*f9_38 + f6_2*f8_19 + f7*f7_38; 114 int64_t h5 = f0_2*f5 + f1_2*f4 + f2_2*f3 + f6*f9_38 + f7_2*f8_19; 115 int64_t h6 = f0_2*f6 + f1_2*f5_2 + f2_2*f4 + f3_2*f3 + f7_2*f9_38 + f8*f8_19; 116 int64_t h7 = f0_2*f7 + f1_2*f6 + f2_2*f5 + f3_2*f4 + f8*f9_38; 117 int64_t h8 = f0_2*f8 + f1_2*f7_2 + f2_2*f6 + f3_2*f5_2 + f4*f4 + f9*f9_38; 118 int64_t h9 = f0_2*f9 + f1_2*f8 + f2_2*f7 + f3_2*f6 + f4_2*f5; 119 120 h[0] = h0; h[1] = h1; h[2] = h2; h[3] = h3; h[4] = h4; 121 h[5] = h5; h[6] = h6; h[7] = h7; h[8] = h8; h[9] = h9; 122 123 fe25519_reduce(h); 124 } 125 126 /* Square twice */ 127 static void fe25519_sq2(fe25519 h, const fe25519 f) { 128 fe25519_sq(h, f); 129 fe25519_sq(h, h); 130 } 131 132 /* Compute f^(2^252-3) for inversion */ 133 static void fe25519_pow22523(fe25519 out, const fe25519 z) { 134 fe25519 t0, t1, t2; 135 136 fe25519_sq(t0, z); 137 fe25519_sq(t1, t0); fe25519_sq(t1, t1); 138 fe25519_mul(t1, z, t1); 139 fe25519_mul(t0, t0, t1); 140 fe25519_sq(t0, t0); 141 fe25519_mul(t0, t1, t0); 142 fe25519_sq(t1, t0); for (int i = 1; i < 5; i++) fe25519_sq(t1, t1); 143 fe25519_mul(t0, t1, t0); 144 fe25519_sq(t1, t0); for (int i = 1; i < 10; i++) fe25519_sq(t1, t1); 145 fe25519_mul(t1, t1, t0); 146 fe25519_sq(t2, t1); for (int i = 1; i < 20; i++) fe25519_sq(t2, t2); 147 fe25519_mul(t1, t2, t1); 148 fe25519_sq(t1, t1); for (int i = 1; i < 10; i++) fe25519_sq(t1, t1); 149 fe25519_mul(t0, t1, t0); 150 fe25519_sq(t1, t0); for (int i = 1; i < 50; i++) fe25519_sq(t1, t1); 151 fe25519_mul(t1, t1, t0); 152 fe25519_sq(t2, t1); for (int i = 1; i < 100; i++) fe25519_sq(t2, t2); 153 fe25519_mul(t1, t2, t1); 154 fe25519_sq(t1, t1); for (int i = 1; i < 50; i++) fe25519_sq(t1, t1); 155 fe25519_mul(t0, t1, t0); 156 fe25519_sq(t0, t0); fe25519_sq(t0, t0); 157 fe25519_mul(out, t0, z); 158 } 159 160 /* Inversion */ 161 static void fe25519_invert(fe25519 out, const fe25519 z) { 162 fe25519 t0, t1, t2, t3; 163 164 fe25519_sq(t0, z); 165 fe25519_sq(t1, t0); fe25519_sq(t1, t1); 166 fe25519_mul(t1, z, t1); 167 fe25519_mul(t0, t0, t1); 168 fe25519_sq(t2, t0); 169 fe25519_mul(t1, t1, t2); 170 fe25519_sq(t2, t1); for (int i = 1; i < 5; i++) fe25519_sq(t2, t2); 171 fe25519_mul(t1, t2, t1); 172 fe25519_sq(t2, t1); for (int i = 1; i < 10; i++) fe25519_sq(t2, t2); 173 fe25519_mul(t2, t2, t1); 174 fe25519_sq(t3, t2); for (int i = 1; i < 20; i++) fe25519_sq(t3, t3); 175 fe25519_mul(t2, t3, t2); 176 fe25519_sq(t2, t2); for (int i = 1; i < 10; i++) fe25519_sq(t2, t2); 177 fe25519_mul(t1, t2, t1); 178 fe25519_sq(t2, t1); for (int i = 1; i < 50; i++) fe25519_sq(t2, t2); 179 fe25519_mul(t2, t2, t1); 180 fe25519_sq(t3, t2); for (int i = 1; i < 100; i++) fe25519_sq(t3, t3); 181 fe25519_mul(t2, t3, t2); 182 fe25519_sq(t2, t2); for (int i = 1; i < 50; i++) fe25519_sq(t2, t2); 183 fe25519_mul(t1, t2, t1); 184 fe25519_sq(t1, t1); for (int i = 1; i < 5; i++) fe25519_sq(t1, t1); 185 fe25519_mul(out, t1, t0); 186 } 187 188 /* Check if negative (LSB = 1) */ 189 static int fe25519_isnegative(const fe25519 f) { 190 uint8_t s[32]; 191 fe25519 h; 192 fe25519_copy(h, f); 193 fe25519_reduce(h); 194 195 /* Convert to bytes */ 196 int64_t h0 = h[0], h1 = h[1], h2 = h[2], h3 = h[3], h4 = h[4]; 197 int64_t h5 = h[5], h6 = h[6], h7 = h[7], h8 = h[8], h9 = h[9]; 198 199 int64_t q = (19 * h9 + (1LL << 24)) >> 25; 200 q = (h0 + q) >> 26; q = (h1 + q) >> 25; q = (h2 + q) >> 26; 201 q = (h3 + q) >> 25; q = (h4 + q) >> 26; q = (h5 + q) >> 25; 202 q = (h6 + q) >> 26; q = (h7 + q) >> 25; q = (h8 + q) >> 26; q = (h9 + q) >> 25; 203 204 h0 += 19 * q; 205 int64_t carry = h0 >> 26; h1 += carry; h0 -= carry * (1LL << 26); 206 207 s[0] = h0; 208 return s[0] & 1; 209 } 210 211 /* Convert to bytes */ 212 void fe25519_tobytes(uint8_t *s, const fe25519 h) { 213 fe25519 t; 214 fe25519_copy(t, h); 215 fe25519_reduce(t); 216 217 int64_t h0 = t[0], h1 = t[1], h2 = t[2], h3 = t[3], h4 = t[4]; 218 int64_t h5 = t[5], h6 = t[6], h7 = t[7], h8 = t[8], h9 = t[9]; 219 220 int64_t q = (19 * h9 + (1LL << 24)) >> 25; 221 q = (h0 + q) >> 26; q = (h1 + q) >> 25; q = (h2 + q) >> 26; 222 q = (h3 + q) >> 25; q = (h4 + q) >> 26; q = (h5 + q) >> 25; 223 q = (h6 + q) >> 26; q = (h7 + q) >> 25; q = (h8 + q) >> 26; q = (h9 + q) >> 25; 224 225 h0 += 19 * q; 226 int64_t carry = h0 >> 26; h1 += carry; h0 -= carry * (1LL << 26); 227 carry = h1 >> 25; h2 += carry; h1 -= carry * (1LL << 25); 228 carry = h2 >> 26; h3 += carry; h2 -= carry * (1LL << 26); 229 carry = h3 >> 25; h4 += carry; h3 -= carry * (1LL << 25); 230 carry = h4 >> 26; h5 += carry; h4 -= carry * (1LL << 26); 231 carry = h5 >> 25; h6 += carry; h5 -= carry * (1LL << 25); 232 carry = h6 >> 26; h7 += carry; h6 -= carry * (1LL << 26); 233 carry = h7 >> 25; h8 += carry; h7 -= carry * (1LL << 25); 234 carry = h8 >> 26; h9 += carry; h8 -= carry * (1LL << 26); 235 236 s[0] = h0; s[1] = h0 >> 8; s[2] = h0 >> 16; s[3] = (h0 >> 24) | (h1 << 2); 237 s[4] = h1 >> 6; s[5] = h1 >> 14; s[6] = (h1 >> 22) | (h2 << 3); 238 s[7] = h2 >> 5; s[8] = h2 >> 13; s[9] = (h2 >> 21) | (h3 << 5); 239 s[10] = h3 >> 3; s[11] = h3 >> 11; s[12] = (h3 >> 19) | (h4 << 6); 240 s[13] = h4 >> 2; s[14] = h4 >> 10; s[15] = h4 >> 18; 241 s[16] = h5; s[17] = h5 >> 8; s[18] = h5 >> 16; s[19] = (h5 >> 24) | (h6 << 1); 242 s[20] = h6 >> 7; s[21] = h6 >> 15; s[22] = (h6 >> 23) | (h7 << 3); 243 s[23] = h7 >> 5; s[24] = h7 >> 13; s[25] = (h7 >> 21) | (h8 << 4); 244 s[26] = h8 >> 4; s[27] = h8 >> 12; s[28] = (h8 >> 20) | (h9 << 6); 245 s[29] = h9 >> 2; s[30] = h9 >> 10; s[31] = h9 >> 18; 246 } 247 248 /* Convert from bytes */ 249 void fe25519_frombytes(fe25519 h, const uint8_t *s) { 250 int64_t h0 = s[0] | (s[1] << 8) | (s[2] << 16) | ((int64_t)s[3] << 24); 251 int64_t h1 = s[4] | (s[5] << 8) | (s[6] << 16) | ((int64_t)s[7] << 24); 252 int64_t h2 = s[8] | (s[9] << 8) | (s[10] << 16) | ((int64_t)s[11] << 24); 253 int64_t h3 = s[12] | (s[13] << 8) | (s[14] << 16) | ((int64_t)s[15] << 24); 254 int64_t h4 = s[16] | (s[17] << 8) | (s[18] << 16) | ((int64_t)s[19] << 24); 255 int64_t h5 = s[20] | (s[21] << 8) | (s[22] << 16) | ((int64_t)s[23] << 24); 256 int64_t h6 = s[24] | (s[25] << 8) | (s[26] << 16) | ((int64_t)s[27] << 24); 257 int64_t h7 = s[28] | (s[29] << 8) | (s[30] << 16) | ((int64_t)s[31] << 24); 258 259 h[0] = h0 & 0x3ffffff; 260 h[1] = (h0 >> 26) | ((h1 & 0x1ffffff) << 6); 261 h[2] = (h1 >> 19) | ((h2 & 0xffffff) << 13); 262 h[3] = (h2 >> 13) | ((h3 & 0x1fff) << 19); 263 h[4] = h3 >> 6; 264 h[5] = h4 & 0x1ffffff; 265 h[6] = (h4 >> 25) | ((h5 & 0x7fffff) << 7); 266 h[7] = (h5 >> 18) | ((h6 & 0x3ffff) << 14); 267 h[8] = (h6 >> 12) | ((h7 & 0x7f) << 20); 268 h[9] = (h7 >> 7) & 0x1ffffff; 269 270 fe25519_reduce(h); 271 } 272 273 /* Scalar multiplication (X25519) */ 274 void curve25519_scalarmult(uint8_t *q, const uint8_t *n, const uint8_t *p) { 275 fe25519 x1, x2, z2, x3, z3, tmp0, tmp1; 276 uint8_t e[32]; 277 278 memcpy(e, n, 32); 279 e[0] &= 248; 280 e[31] &= 127; 281 e[31] |= 64; 282 283 fe25519_frombytes(x1, p); 284 fe25519_1(x2); 285 fe25519_0(z2); 286 fe25519_copy(x3, x1); 287 fe25519_1(z3); 288 289 int swap = 0; 290 for (int pos = 254; pos >= 0; pos--) { 291 int b = (e[pos / 8] >> (pos & 7)) & 1; 292 swap ^= b; 293 294 /* Conditional swap */ 295 if (swap) { 296 fe25519 t; 297 fe25519_copy(t, x2); fe25519_copy(x2, x3); fe25519_copy(x3, t); 298 fe25519_copy(t, z2); fe25519_copy(z2, z3); fe25519_copy(z3, t); 299 } 300 swap = b; 301 302 /* Montgomery ladder step */ 303 fe25519_add(tmp0, x2, z2); 304 fe25519_sub(tmp1, x2, z2); 305 fe25519_add(x2, x3, z3); 306 fe25519_sub(z2, x3, z3); 307 fe25519_mul(z3, x2, tmp1); 308 fe25519_mul(z2, z2, tmp0); 309 fe25519_sq(tmp0, tmp1); 310 fe25519_sq(tmp1, tmp0); 311 fe25519_add(x3, z3, z2); 312 fe25519_sub(z2, z3, z2); 313 fe25519_mul(x2, tmp1, tmp0); 314 fe25519_sub(tmp1, tmp1, tmp0); 315 fe25519_sq(z2, z2); 316 fe25519_mul(z3, tmp1, (fe25519){121666}); 317 fe25519_sq(x3, x3); 318 fe25519_add(tmp0, tmp0, z3); 319 fe25519_mul(z3, x1, z2); 320 fe25519_mul(z2, tmp1, tmp0); 321 } 322 323 if (swap) { 324 fe25519 t; 325 fe25519_copy(t, x2); fe25519_copy(x2, x3); fe25519_copy(x3, t); 326 fe25519_copy(t, z2); fe25519_copy(z2, z3); fe25519_copy(z3, t); 327 } 328 329 fe25519_invert(z2, z2); 330 fe25519_mul(x2, x2, z2); 331 fe25519_tobytes(q, x2); 332 } 333 334 /* Base point scalar mult */ 335 void curve25519_scalarmult_base(uint8_t *q, const uint8_t *n) { 336 static const uint8_t basepoint[32] = {9}; 337 curve25519_scalarmult(q, n, basepoint); 338 }