You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

1713 lines
46 KiB

  1. /* Copyright (c) 2015 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. /**
  5. * @file decaf.c
  6. * @author Mike Hamburg
  7. * @brief Decaf high-level functions.
  8. */
  9. #define _XOPEN_SOURCE 600 /* for posix_memalign */
  10. #define __STDC_WANT_LIB_EXT1__ 1 /* for memset_s */
  11. #include <decaf.h>
  12. #include <string.h>
  13. #include "word.h"
  14. #include "field.h"
  15. #include "decaf_config.h"
  16. /* Include the curve data here */
  17. #include "curve_data.inc.c"
  18. #if (COFACTOR == 8) && !IMAGINE_TWIST
  19. /* FUTURE: Curve41417 doesn't have these properties. */
  20. #error "Currently require IMAGINE_TWIST (and thus p=5 mod 8) for cofactor 8"
  21. #endif
  22. #if IMAGINE_TWIST && (P_MOD_8 != 5)
  23. #error "Cannot use IMAGINE_TWIST except for p == 5 mod 8"
  24. #endif
  25. #if (COFACTOR != 8) && (COFACTOR != 4)
  26. #error "COFACTOR must be 4 or 8"
  27. #endif
  28. #if IMAGINE_TWIST
  29. extern const gf SQRT_MINUS_ONE;
  30. #endif
  31. #if COFACTOR == 8
  32. extern const gf SQRT_ONE_MINUS_D; /* TODO: Intern this? */
  33. #endif
  34. #define sv static void
  35. #define snv static void __attribute__((noinline))
  36. #define siv static inline void __attribute__((always_inline))
  37. const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}};
  38. extern const scalar_t API_NS(sc_r2);
  39. extern const decaf_word_t API_NS(MONTGOMERY_FACTOR);
  40. extern const point_t API_NS(point_base);
  41. /* These are externally exposed (but private) instead of static so that
  42. * f_arithmetic.c can use it
  43. */
  44. #define ONE API_NS(ONE)
  45. #define ZERO API_NS(ZERO)
  46. #define gf_eq API_NS(gf_eq)
  47. const gf ZERO = {{{0}}}, ONE = {{{1}}};
  48. /* Projective Niels coordinates */
  49. typedef struct { gf a, b, c; } niels_s, niels_t[1];
  50. typedef struct { niels_t n; gf z; } __attribute__((aligned(32))) pniels_s, pniels_t[1]; /* MAGIC alignment */
  51. /* Precomputed base */
  52. struct precomputed_s { niels_t table [DECAF_COMBS_N<<(DECAF_COMBS_T-1)]; };
  53. extern const gf API_NS(precomputed_base_as_fe)[];
  54. const precomputed_s *API_NS(precomputed_base) =
  55. (const precomputed_s *) &API_NS(precomputed_base_as_fe);
  56. const size_t API_NS2(sizeof,precomputed_s) = sizeof(precomputed_s);
  57. const size_t API_NS2(alignof,precomputed_s) = 32;
  58. /* TODO PERF: Vectorize vs unroll */
  59. #ifdef __clang__
  60. #if 100*__clang_major__ + __clang_minor__ > 305
  61. #define UNROLL _Pragma("clang loop unroll(full)") // PERF TODO: vectorize?
  62. #endif
  63. #endif
  64. #ifndef UNROLL
  65. #define UNROLL
  66. #endif
  67. #define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }}
  68. #define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }}
  69. /** Copy x = y */
  70. siv gf_cpy(gf x, const gf y) { x[0] = y[0]; }
  71. /** Constant time, x = is_z ? z : y */
  72. siv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) {
  73. constant_time_select(x,z,y,sizeof(gf),is_z);
  74. }
  75. /** Constant time, if (neg) x=-x; */
  76. sv cond_neg(gf x, decaf_bool_t neg) {
  77. gf y;
  78. gf_sub(y,ZERO,x);
  79. cond_sel(x,x,y,neg);
  80. }
  81. /** Constant time, if (swap) (x,y) = (y,x); */
  82. siv cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) {
  83. FOR_LIMB_U(i, {
  84. decaf_word_t s = (x->limb[i] ^ y->limb[i]) & swap;
  85. x->limb[i] ^= s;
  86. y->limb[i] ^= s;
  87. });
  88. }
  89. /** Compare a==b */
  90. decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) {
  91. gf c;
  92. gf_sub(c,a,b);
  93. gf_strong_reduce(c);
  94. decaf_word_t ret=0;
  95. FOR_LIMB(i, ret |= c->limb[i] );
  96. /* Hope the compiler is too dumb to optimize this, thus noinline */
  97. return ((decaf_dword_t)ret - 1) >> WBITS;
  98. }
  99. /** Inverse square root using addition chain. */
  100. static decaf_bool_t gf_isqrt_chk(gf y, const gf x, decaf_bool_t allow_zero) {
  101. gf tmp0, tmp1;
  102. gf_isr((gf_s *)y, (const gf_s *)x);
  103. gf_sqr(tmp0,y);
  104. gf_mul(tmp1,tmp0,x);
  105. return gf_eq(tmp1,ONE) | (allow_zero & gf_eq(tmp1,ZERO));
  106. }
  107. /** Inverse. */
  108. sv gf_invert(gf y, const gf x) {
  109. gf t1, t2;
  110. gf_sqr(t1, x); // o^2
  111. decaf_bool_t ret = gf_isqrt_chk(t2, t1, 0); // +-1/sqrt(o^2) = +-1/o
  112. (void)ret; assert(ret);
  113. gf_sqr(t1, t2);
  114. gf_mul(t2, t1, x); // not direct to y in case of alias.
  115. gf_cpy(y, t2);
  116. }
  117. /**
  118. * Mul by signed int. Not constant-time WRT the sign of that int.
  119. * Just uses a full mul (PERF)
  120. */
  121. static inline void gf_mulw_sgn(gf c, const gf a, int w) {
  122. if (w>0) {
  123. gf_mulw(c, a, w);
  124. } else {
  125. gf_mulw(c, a, -w);
  126. gf_sub(c,ZERO,c);
  127. }
  128. }
  129. /** Return high bit of x = low bit of 2x mod p */
  130. static decaf_word_t hibit(const gf x) {
  131. gf y;
  132. gf_add(y,x,x);
  133. gf_strong_reduce(y);
  134. return -(y->limb[0]&1);
  135. }
  136. #if COFACTOR==8
  137. /** Return high bit of x = low bit of 2x mod p */
  138. static decaf_word_t lobit(const gf x) {
  139. gf y;
  140. gf_cpy(y,x);
  141. gf_strong_reduce(y);
  142. return -(y->limb[0]&1);
  143. }
  144. #endif
  145. /** {extra,accum} - sub +? p
  146. * Must have extra <= 1
  147. */
  148. snv sc_subx(
  149. scalar_t out,
  150. const decaf_word_t accum[SCALAR_LIMBS],
  151. const scalar_t sub,
  152. const scalar_t p,
  153. decaf_word_t extra
  154. ) {
  155. dsword_t chain = 0;
  156. unsigned int i;
  157. for (i=0; i<SCALAR_LIMBS; i++) {
  158. chain = (chain + accum[i]) - sub->limb[i];
  159. out->limb[i] = chain;
  160. chain >>= WBITS;
  161. }
  162. decaf_bool_t borrow = chain+extra; /* = 0 or -1 */
  163. chain = 0;
  164. for (i=0; i<SCALAR_LIMBS; i++) {
  165. chain = (chain + out->limb[i]) + (p->limb[i] & borrow);
  166. out->limb[i] = chain;
  167. chain >>= WBITS;
  168. }
  169. }
  170. snv sc_montmul (
  171. scalar_t out,
  172. const scalar_t a,
  173. const scalar_t b
  174. ) {
  175. unsigned int i,j;
  176. decaf_word_t accum[SCALAR_LIMBS+1] = {0};
  177. decaf_word_t hi_carry = 0;
  178. for (i=0; i<SCALAR_LIMBS; i++) {
  179. decaf_word_t mand = a->limb[i];
  180. const decaf_word_t *mier = b->limb;
  181. decaf_dword_t chain = 0;
  182. for (j=0; j<SCALAR_LIMBS; j++) {
  183. chain += ((decaf_dword_t)mand)*mier[j] + accum[j];
  184. accum[j] = chain;
  185. chain >>= WBITS;
  186. }
  187. accum[j] = chain;
  188. mand = accum[0] * API_NS(MONTGOMERY_FACTOR);
  189. chain = 0;
  190. mier = sc_p->limb;
  191. for (j=0; j<SCALAR_LIMBS; j++) {
  192. chain += (decaf_dword_t)mand*mier[j] + accum[j];
  193. if (j) accum[j-1] = chain;
  194. chain >>= WBITS;
  195. }
  196. chain += accum[j];
  197. chain += hi_carry;
  198. accum[j-1] = chain;
  199. hi_carry = chain >> WBITS;
  200. }
  201. sc_subx(out, accum, sc_p, sc_p, hi_carry);
  202. }
  203. void API_NS(scalar_mul) (
  204. scalar_t out,
  205. const scalar_t a,
  206. const scalar_t b
  207. ) {
  208. sc_montmul(out,a,b);
  209. sc_montmul(out,out,API_NS(sc_r2));
  210. }
  211. /* PERF: could implement this */
  212. siv sc_montsqr (
  213. scalar_t out,
  214. const scalar_t a
  215. ) {
  216. sc_montmul(out,a,a);
  217. }
  218. decaf_bool_t API_NS(scalar_invert) (
  219. scalar_t out,
  220. const scalar_t a
  221. ) {
  222. #if 0
  223. /* FIELD MAGIC. TODO PERF: not updated for 25519 */
  224. scalar_t chain[7], tmp;
  225. sc_montmul(chain[0],a,API_NS(sc_r2));
  226. unsigned int i,j;
  227. /* Addition chain generated by a not-too-clever SAGE script. First part: compute a^(2^222-1) */
  228. const struct { uint8_t widx, sidx, sct, midx; } muls [] = {
  229. {2,0,1,0}, {3,2,1,0}, {4,3,1,0}, {5,4,1,0}, /* 0x3,7,f,1f */
  230. {1,5,1,0}, {1,1,3,3}, {6,1,9,1}, {1,6,1,0}, {6,1,18,6}, /* a^(2^37-1) */
  231. {1,6,37,6}, {1,1,37,6}, {1,1,111,1} /* a^(2^222-1) */
  232. };
  233. /* Second part: sliding window */
  234. const struct { uint8_t sct, midx; } muls1 [] = {
  235. {6, 5}, {4, 2}, {3, 0}, {2, 0}, {4, 0}, {8, 5},
  236. {2, 0}, {5, 3}, {4, 0}, {4, 0}, {5, 3}, {3, 2},
  237. {3, 2}, {3, 2}, {2, 0}, {3, 0}, {4, 2}, {2, 0},
  238. {4, 3}, {3, 2}, {2, 0}, {3, 2}, {5, 2}, {3, 2},
  239. {2, 0}, {3, 0}, {7, 0}, {5, 0}, {3, 2}, {3, 2},
  240. {4, 2}, {5, 0}, {5, 3}, {3, 0}, {2, 0}, {5, 2},
  241. {4, 3}, {4, 0}, {3, 2}, {7, 4}, {2, 0}, {2, 0},
  242. {2, 0}, {2, 0}, {3, 0}, {5, 2}, {5, 4}, {5, 2},
  243. {5, 0}, {2, 0}, {3, 0}, {3, 0}, {2, 0}, {2, 0},
  244. {2, 0}, {3, 2}, {2, 0}, {3, 2}, {5, 0}, {4, 0},
  245. {6, 4}, {4, 0}
  246. };
  247. for (i=0; i<sizeof(muls)/sizeof(muls[0]); i++) {
  248. sc_montsqr(tmp, chain[muls[i].sidx]);
  249. for (j=1; j<muls[i].sct; j++) {
  250. sc_montsqr(tmp, tmp);
  251. }
  252. sc_montmul(chain[muls[i].widx], tmp, chain[muls[i].midx]);
  253. }
  254. for (i=0; i<sizeof(muls1)/sizeof(muls1[0]); i++) {
  255. sc_montsqr(tmp, chain[1]);
  256. for (j=1; j<muls1[i].sct; j++) {
  257. sc_montsqr(tmp, tmp);
  258. }
  259. sc_montmul(chain[1], tmp, chain[muls1[i].midx]);
  260. }
  261. sc_montmul(out,chain[1],API_NS(scalar_one));
  262. for (i=0; i<sizeof(chain)/sizeof(chain[0]); i++) {
  263. API_NS(scalar_destroy)(chain[i]);
  264. }
  265. return ~API_NS(scalar_eq)(out,API_NS(scalar_zero));
  266. #else
  267. scalar_t b, ma;
  268. int i;
  269. sc_montmul(b,API_NS(scalar_one),API_NS(sc_r2));
  270. sc_montmul(ma,a,API_NS(sc_r2));
  271. for (i=SCALAR_BITS-1; i>=0; i--) {
  272. sc_montsqr(b,b);
  273. decaf_word_t w = sc_p->limb[i/WBITS];
  274. if (i<WBITS) {
  275. assert(w >= 2);
  276. w-=2;
  277. }
  278. if (1 & w>>(i%WBITS)) {
  279. sc_montmul(b,b,ma);
  280. }
  281. }
  282. sc_montmul(out,b,API_NS(scalar_one));
  283. API_NS(scalar_destroy)(b);
  284. API_NS(scalar_destroy)(ma);
  285. return ~API_NS(scalar_eq)(out,API_NS(scalar_zero));
  286. #endif
  287. }
  288. void API_NS(scalar_sub) (
  289. scalar_t out,
  290. const scalar_t a,
  291. const scalar_t b
  292. ) {
  293. sc_subx(out, a->limb, b, sc_p, 0);
  294. }
  295. void API_NS(scalar_add) (
  296. scalar_t out,
  297. const scalar_t a,
  298. const scalar_t b
  299. ) {
  300. decaf_dword_t chain = 0;
  301. unsigned int i;
  302. for (i=0; i<SCALAR_LIMBS; i++) {
  303. chain = (chain + a->limb[i]) + b->limb[i];
  304. out->limb[i] = chain;
  305. chain >>= WBITS;
  306. }
  307. sc_subx(out, out->limb, sc_p, sc_p, chain);
  308. }
  309. snv sc_halve (
  310. scalar_t out,
  311. const scalar_t a,
  312. const scalar_t p
  313. ) {
  314. decaf_word_t mask = -(a->limb[0] & 1);
  315. decaf_dword_t chain = 0;
  316. unsigned int i;
  317. for (i=0; i<SCALAR_LIMBS; i++) {
  318. chain = (chain + a->limb[i]) + (p->limb[i] & mask);
  319. out->limb[i] = chain;
  320. chain >>= WBITS;
  321. }
  322. for (i=0; i<SCALAR_LIMBS-1; i++) {
  323. out->limb[i] = out->limb[i]>>1 | out->limb[i+1]<<(WBITS-1);
  324. }
  325. out->limb[i] = out->limb[i]>>1 | chain<<(WBITS-1);
  326. }
  327. void API_NS(scalar_set_unsigned) (
  328. scalar_t out,
  329. decaf_word_t w
  330. ) {
  331. memset(out,0,sizeof(scalar_t));
  332. out->limb[0] = w;
  333. }
  334. decaf_bool_t API_NS(scalar_eq) (
  335. const scalar_t a,
  336. const scalar_t b
  337. ) {
  338. decaf_word_t diff = 0;
  339. unsigned int i;
  340. for (i=0; i<SCALAR_LIMBS; i++) {
  341. diff |= a->limb[i] ^ b->limb[i];
  342. }
  343. return (((decaf_dword_t)diff)-1)>>WBITS;
  344. }
  345. /* *** API begins here *** */
  346. /** identity = (0,1) */
  347. const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};
  348. static void gf_encode ( unsigned char ser[SER_BYTES], gf a ) {
  349. gf_serialize(ser, (gf_s *)a);
  350. }
  351. static void deisogenize (
  352. gf_s *__restrict__ s,
  353. gf_s *__restrict__ minus_t_over_s,
  354. const point_t p,
  355. decaf_bool_t toggle_hibit_s,
  356. decaf_bool_t toggle_hibit_t_over_s,
  357. decaf_bool_t toggle_rotation
  358. ) {
  359. #if COFACTOR == 4 && !IMAGINE_TWIST
  360. (void) toggle_rotation;
  361. /* TODO: Can shave off one mul here; not important but makes consistent with paper */
  362. gf b, d;
  363. gf_s *a = s, *c = minus_t_over_s;
  364. gf_mulw_sgn ( a, p->y, 1-EDWARDS_D );
  365. gf_mul ( c, a, p->t ); /* -dYT, with EDWARDS_D = d-1 */
  366. gf_mul ( a, p->x, p->z );
  367. gf_sub ( d, c, a ); /* aXZ-dYT with a=-1 */
  368. gf_add ( a, p->z, p->y );
  369. gf_sub ( b, p->z, p->y );
  370. gf_mul ( c, b, a );
  371. gf_mulw_sgn ( b, c, -EDWARDS_D ); /* (a-d)(Z+Y)(Z-Y) */
  372. decaf_bool_t ok = gf_isqrt_chk ( a, b, DECAF_TRUE ); /* r in the paper */
  373. (void)ok; assert(ok);
  374. gf_mulw_sgn ( b, a, -EDWARDS_D ); /* u in the paper */
  375. gf_mul ( c, b, a ); /* ur */
  376. gf_mul ( a, c, d ); /* ur (aZX-dYT) */
  377. gf_add ( d, b, b ); /* 2u = -2au since a=-1 */
  378. gf_mul ( c, d, p->z ); /* 2uZ */
  379. cond_neg ( b, toggle_hibit_t_over_s ^ ~hibit(c) ); /* u <- -u if negative. */
  380. cond_neg ( c, toggle_hibit_t_over_s ^ ~hibit(c) ); /* u <- -u if negative. */
  381. gf_mul ( d, b, p->y );
  382. gf_add ( s, a, d );
  383. cond_neg ( s, toggle_hibit_s ^ hibit(s) );
  384. #else
  385. /* More complicated because of rotation */
  386. /* FIXME This code is wrong for certain non-Curve25519 curves; check if it's because of Cofactor==8 or IMAGINE_ROTATION */
  387. gf c, d;
  388. gf_s *b = s, *a = minus_t_over_s;
  389. #if IMAGINE_TWIST
  390. gf x, t;
  391. gf_mul ( x, p->x, SQRT_MINUS_ONE);
  392. gf_mul ( t, p->t, SQRT_MINUS_ONE);
  393. gf_sub ( x, ZERO, x );
  394. gf_sub ( t, ZERO, t );
  395. gf_add ( a, p->z, x );
  396. gf_sub ( b, p->z, x );
  397. gf_mul ( c, a, b ); /* "zx" = Z^2 - aX^2 = Z^2 - X^2 */
  398. #else
  399. const gf_s *x = p->x, *t = p->t;
  400. /* Won't hit the cond_sel below because COFACTOR==8 requires IMAGINE_TWIST for now. */
  401. gf_sqr ( a, p->z );
  402. gf_sqr ( b, p->x );
  403. gf_add ( c, a, b ); /* "zx" = Z^2 - aX^2 = Z^2 + X^2 */
  404. #endif
  405. gf_mul ( a, p->z, t ); /* "tz" = T*Z */
  406. gf_sqr ( b, a );
  407. gf_mul ( d, b, c ); /* (TZ)^2 * (Z^2-aX^2) */
  408. decaf_bool_t ok = gf_isqrt_chk ( b, d, DECAF_TRUE );
  409. (void)ok; assert(ok);
  410. gf_mul ( d, b, a ); /* "osx" = 1 / sqrt(z^2-ax^2) */
  411. gf_mul ( a, b, c );
  412. gf_mul ( b, a, d ); /* 1/tz */
  413. decaf_bool_t rotate;
  414. #if (COFACTOR == 8)
  415. {
  416. gf e;
  417. gf_sqr(e, p->z);
  418. gf_mul(a, e, b); /* z^2 / tz = z/t = 1/xy */
  419. rotate = hibit(a) ^ toggle_rotation;
  420. /* Curve25519: cond select between zx * 1/tz or sqrt(1-d); y=-x */
  421. gf_mul ( a, b, c );
  422. cond_sel ( a, a, SQRT_ONE_MINUS_D, rotate );
  423. cond_sel ( x, p->y, x, rotate );
  424. }
  425. #else
  426. (void)toggle_rotation;
  427. rotate = 0;
  428. #endif
  429. gf_mul ( c, a, d ); // new "osx"
  430. gf_mul ( a, c, p->z );
  431. gf_add ( a, a, a ); // 2 * "osx" * Z
  432. decaf_bool_t tg1 = rotate ^ toggle_hibit_t_over_s ^~ hibit(a);
  433. cond_neg ( c, tg1 );
  434. cond_neg ( a, rotate ^ tg1 );
  435. gf_mul ( d, b, p->z );
  436. gf_add ( d, d, c );
  437. gf_mul ( b, d, x ); /* here "x" = y unless rotate */
  438. cond_neg ( b, toggle_hibit_s ^ hibit(b) );
  439. #endif
  440. }
  441. void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
  442. gf s, mtos;
  443. deisogenize(s,mtos,p,0,0,0);
  444. gf_encode ( ser, s );
  445. }
  446. /**
  447. * Deserialize a field element, return TRUE if < p.
  448. */
  449. static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) {
  450. return gf_deserialize((gf_s *)s, ser);
  451. }
  452. decaf_bool_t API_NS(point_decode) (
  453. point_t p,
  454. const unsigned char ser[SER_BYTES],
  455. decaf_bool_t allow_identity
  456. ) {
  457. gf s, a, b, c, d, e, f;
  458. decaf_bool_t succ = gf_deser(s, ser), zero = gf_eq(s, ZERO);
  459. succ &= allow_identity | ~zero;
  460. succ &= ~hibit(s);
  461. gf_sqr ( a, s );
  462. #if IMAGINE_TWIST
  463. gf_sub ( f, ONE, a ); /* f = 1-as^2 = 1-s^2*/
  464. #else
  465. gf_add ( f, ONE, a ); /* f = 1-as^2 = 1+s^2 */
  466. #endif
  467. succ &= ~ gf_eq( f, ZERO );
  468. gf_sqr ( b, f );
  469. gf_mulw_sgn ( c, a, 4*IMAGINE_TWIST-4*EDWARDS_D );
  470. gf_add ( c, c, b ); /* t^2 */
  471. gf_mul ( d, f, s ); /* s(1-as^2) for denoms */
  472. gf_sqr ( e, d );
  473. gf_mul ( b, c, e );
  474. succ &= gf_isqrt_chk ( e, b, DECAF_TRUE ); /* e = 1/(t s (1-as^2)) */
  475. gf_mul ( b, e, d ); /* 1/t */
  476. gf_mul ( d, e, c ); /* d = t / (s(1-as^2)) */
  477. gf_mul ( e, d, f ); /* t/s */
  478. decaf_bool_t negtos = hibit(e);
  479. cond_neg(b, negtos);
  480. cond_neg(d, negtos);
  481. #if IMAGINE_TWIST
  482. gf_add ( p->z, ONE, a); /* Z = 1+as^2 = 1-s^2 */
  483. #else
  484. gf_sub ( p->z, ONE, a); /* Z = 1+as^2 = 1-s^2 */
  485. #endif
  486. #if COFACTOR == 8
  487. gf_mul ( a, p->z, d); /* t(1+s^2) / s(1-s^2) = 2/xy */
  488. succ &= ~lobit(a); /* = ~hibit(a/2), since hibit(x) = lobit(2x) */
  489. #endif
  490. gf_mul ( a, f, b ); /* y = (1-s^2) / t */
  491. gf_mul ( p->y, p->z, a ); /* Y = yZ */
  492. #if IMAGINE_TWIST
  493. gf_add ( b, s, s );
  494. gf_mul(p->x, b, SQRT_MINUS_ONE); /* Curve25519 */
  495. #else
  496. gf_add ( p->x, s, s );
  497. #endif
  498. gf_mul ( p->t, p->x, a ); /* T = 2s (1-as^2)/t */
  499. p->y->limb[0] -= zero;
  500. assert(API_NS(point_valid)(p) | ~succ);
  501. return succ;
  502. }
  503. #if IMAGINE_TWIST
  504. #define TWISTED_D (-(EDWARDS_D))
  505. #else
  506. #define TWISTED_D ((EDWARDS_D)-1)
  507. #endif
  508. #if TWISTED_D < 0
  509. #define EFF_D (-(TWISTED_D))
  510. #define NEG_D 1
  511. #else
  512. #define EFF_D TWISTED_D
  513. #define NEG_D 0
  514. #endif
  515. void API_NS(point_sub) (
  516. point_t p,
  517. const point_t q,
  518. const point_t r
  519. ) {
  520. gf a, b, c, d;
  521. gf_sub_nr ( b, q->y, q->x );
  522. gf_sub_nr ( d, r->y, r->x );
  523. gf_add_nr ( c, r->y, r->x );
  524. gf_mul ( a, c, b );
  525. gf_add_nr ( b, q->y, q->x );
  526. gf_mul ( p->y, d, b );
  527. gf_mul ( b, r->t, q->t );
  528. gf_mulw_sgn ( p->x, b, 2*EFF_D );
  529. gf_add_nr ( b, a, p->y );
  530. gf_sub_nr ( c, p->y, a );
  531. gf_mul ( a, q->z, r->z );
  532. gf_add_nr ( a, a, a );
  533. #if NEG_D
  534. gf_sub_nr ( p->y, a, p->x );
  535. gf_add_nr ( a, a, p->x );
  536. #else
  537. gf_add_nr ( p->y, a, p->x );
  538. gf_sub_nr ( a, a, p->x );
  539. #endif
  540. gf_mul ( p->z, a, p->y );
  541. gf_mul ( p->x, p->y, c );
  542. gf_mul ( p->y, a, b );
  543. gf_mul ( p->t, b, c );
  544. }
  545. void API_NS(point_add) (
  546. point_t p,
  547. const point_t q,
  548. const point_t r
  549. ) {
  550. gf a, b, c, d;
  551. gf_sub_nr ( b, q->y, q->x );
  552. gf_sub_nr ( c, r->y, r->x );
  553. gf_add_nr ( d, r->y, r->x );
  554. gf_mul ( a, c, b );
  555. gf_add_nr ( b, q->y, q->x );
  556. gf_mul ( p->y, d, b );
  557. gf_mul ( b, r->t, q->t );
  558. gf_mulw_sgn ( p->x, b, 2*EFF_D );
  559. gf_add_nr ( b, a, p->y );
  560. gf_sub_nr ( c, p->y, a );
  561. gf_mul ( a, q->z, r->z );
  562. gf_add_nr ( a, a, a );
  563. #if NEG_D
  564. gf_add_nr ( p->y, a, p->x );
  565. gf_sub_nr ( a, a, p->x );
  566. #else
  567. gf_sub_nr ( p->y, a, p->x );
  568. gf_add_nr ( a, a, p->x );
  569. #endif
  570. gf_mul ( p->z, a, p->y );
  571. gf_mul ( p->x, p->y, c );
  572. gf_mul ( p->y, a, b );
  573. gf_mul ( p->t, b, c );
  574. }
  575. snv point_double_internal (
  576. point_t p,
  577. const point_t q,
  578. decaf_bool_t before_double
  579. ) {
  580. gf a, b, c, d;
  581. gf_sqr ( c, q->x );
  582. gf_sqr ( a, q->y );
  583. gf_add_nr ( d, c, a );
  584. gf_add_nr ( p->t, q->y, q->x );
  585. gf_sqr ( b, p->t );
  586. gf_subx_nr ( b, b, d, 3 );
  587. gf_sub_nr ( p->t, a, c );
  588. gf_sqr ( p->x, q->z );
  589. gf_add_nr ( p->z, p->x, p->x );
  590. gf_subx_nr ( a, p->z, p->t, 4 );
  591. gf_mul ( p->x, a, b );
  592. gf_mul ( p->z, p->t, a );
  593. gf_mul ( p->y, p->t, d );
  594. if (!before_double) gf_mul ( p->t, b, d );
  595. }
  596. void API_NS(point_double)(point_t p, const point_t q) {
  597. point_double_internal(p,q,0);
  598. }
  599. void API_NS(point_negate) (
  600. point_t nega,
  601. const point_t a
  602. ) {
  603. gf_sub(nega->x, ZERO, a->x);
  604. gf_cpy(nega->y, a->y);
  605. gf_cpy(nega->z, a->z);
  606. gf_sub(nega->t, ZERO, a->t);
  607. }
  608. siv scalar_decode_short (
  609. scalar_t s,
  610. const unsigned char ser[SER_BYTES],
  611. unsigned int nbytes
  612. ) {
  613. unsigned int i,j,k=0;
  614. for (i=0; i<SCALAR_LIMBS; i++) {
  615. decaf_word_t out = 0;
  616. for (j=0; j<sizeof(decaf_word_t) && k<nbytes; j++,k++) {
  617. out |= ((decaf_word_t)ser[k])<<(8*j);
  618. }
  619. s->limb[i] = out;
  620. }
  621. }
  622. decaf_bool_t API_NS(scalar_decode)(
  623. scalar_t s,
  624. const unsigned char ser[SER_BYTES]
  625. ) {
  626. unsigned int i;
  627. scalar_decode_short(s, ser, SER_BYTES);
  628. dsword_t accum = 0;
  629. for (i=0; i<SCALAR_LIMBS; i++) {
  630. accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
  631. }
  632. API_NS(scalar_mul)(s,s,API_NS(scalar_one)); /* ham-handed reduce */
  633. return accum;
  634. }
  635. void API_NS(scalar_destroy) (
  636. scalar_t scalar
  637. ) {
  638. decaf_bzero(scalar, sizeof(scalar_t));
  639. }
  640. static inline void ignore_result ( decaf_bool_t boo ) {
  641. (void)boo;
  642. }
  643. void API_NS(scalar_decode_long)(
  644. scalar_t s,
  645. const unsigned char *ser,
  646. size_t ser_len
  647. ) {
  648. if (ser_len == 0) {
  649. API_NS(scalar_copy)(s, API_NS(scalar_zero));
  650. return;
  651. }
  652. size_t i;
  653. scalar_t t1, t2;
  654. i = ser_len - (ser_len%SER_BYTES);
  655. if (i==ser_len) i -= SER_BYTES;
  656. scalar_decode_short(t1, &ser[i], ser_len-i);
  657. if (ser_len == sizeof(scalar_t)) {
  658. assert(i==0);
  659. /* ham-handed reduce */
  660. API_NS(scalar_mul)(s,t1,API_NS(scalar_one));
  661. API_NS(scalar_destroy)(t1);
  662. return;
  663. }
  664. while (i) {
  665. i -= SER_BYTES;
  666. sc_montmul(t1,t1,API_NS(sc_r2));
  667. ignore_result( API_NS(scalar_decode)(t2, ser+i) );
  668. API_NS(scalar_add)(t1, t1, t2);
  669. }
  670. API_NS(scalar_copy)(s, t1);
  671. API_NS(scalar_destroy)(t1);
  672. API_NS(scalar_destroy)(t2);
  673. }
  674. void API_NS(scalar_encode)(
  675. unsigned char ser[SER_BYTES],
  676. const scalar_t s
  677. ) {
  678. unsigned int i,j,k=0;
  679. for (i=0; i<SCALAR_LIMBS; i++) {
  680. for (j=0; j<sizeof(decaf_word_t); j++,k++) {
  681. ser[k] = s->limb[i] >> (8*j);
  682. }
  683. }
  684. }
  685. /* Operations on [p]niels */
  686. siv cond_neg_niels (
  687. niels_t n,
  688. decaf_bool_t neg
  689. ) {
  690. cond_swap(n->a, n->b, neg);
  691. cond_neg(n->c, neg);
  692. }
  693. static void pt_to_pniels (
  694. pniels_t b,
  695. const point_t a
  696. ) {
  697. gf_sub ( b->n->a, a->y, a->x );
  698. gf_add ( b->n->b, a->x, a->y );
  699. gf_mulw_sgn ( b->n->c, a->t, 2*TWISTED_D );
  700. gf_add ( b->z, a->z, a->z );
  701. }
  702. static void pniels_to_pt (
  703. point_t e,
  704. const pniels_t d
  705. ) {
  706. gf eu;
  707. gf_add ( eu, d->n->b, d->n->a );
  708. gf_sub ( e->y, d->n->b, d->n->a );
  709. gf_mul ( e->t, e->y, eu);
  710. gf_mul ( e->x, d->z, e->y );
  711. gf_mul ( e->y, d->z, eu );
  712. gf_sqr ( e->z, d->z );
  713. }
  714. snv niels_to_pt (
  715. point_t e,
  716. const niels_t n
  717. ) {
  718. gf_add ( e->y, n->b, n->a );
  719. gf_sub ( e->x, n->b, n->a );
  720. gf_mul ( e->t, e->y, e->x );
  721. gf_cpy ( e->z, ONE );
  722. }
  723. snv add_niels_to_pt (
  724. point_t d,
  725. const niels_t e,
  726. decaf_bool_t before_double
  727. ) {
  728. gf a, b, c;
  729. gf_sub_nr ( b, d->y, d->x );
  730. gf_mul ( a, e->a, b );
  731. gf_add_nr ( b, d->x, d->y );
  732. gf_mul ( d->y, e->b, b );
  733. gf_mul ( d->x, e->c, d->t );
  734. gf_add_nr ( c, a, d->y );
  735. gf_sub_nr ( b, d->y, a );
  736. gf_sub_nr ( d->y, d->z, d->x );
  737. gf_add_nr ( a, d->x, d->z );
  738. gf_mul ( d->z, a, d->y );
  739. gf_mul ( d->x, d->y, b );
  740. gf_mul ( d->y, a, c );
  741. if (!before_double) gf_mul ( d->t, b, c );
  742. }
  743. snv sub_niels_from_pt (
  744. point_t d,
  745. const niels_t e,
  746. decaf_bool_t before_double
  747. ) {
  748. gf a, b, c;
  749. gf_sub_nr ( b, d->y, d->x );
  750. gf_mul ( a, e->b, b );
  751. gf_add_nr ( b, d->x, d->y );
  752. gf_mul ( d->y, e->a, b );
  753. gf_mul ( d->x, e->c, d->t );
  754. gf_add_nr ( c, a, d->y );
  755. gf_sub_nr ( b, d->y, a );
  756. gf_add_nr ( d->y, d->z, d->x );
  757. gf_sub_nr ( a, d->z, d->x );
  758. gf_mul ( d->z, a, d->y );
  759. gf_mul ( d->x, d->y, b );
  760. gf_mul ( d->y, a, c );
  761. if (!before_double) gf_mul ( d->t, b, c );
  762. }
  763. sv add_pniels_to_pt (
  764. point_t p,
  765. const pniels_t pn,
  766. decaf_bool_t before_double
  767. ) {
  768. gf L0;
  769. gf_mul ( L0, p->z, pn->z );
  770. gf_cpy ( p->z, L0 );
  771. add_niels_to_pt( p, pn->n, before_double );
  772. }
  773. sv sub_pniels_from_pt (
  774. point_t p,
  775. const pniels_t pn,
  776. decaf_bool_t before_double
  777. ) {
  778. gf L0;
  779. gf_mul ( L0, p->z, pn->z );
  780. gf_cpy ( p->z, L0 );
  781. sub_niels_from_pt( p, pn->n, before_double );
  782. }
  783. extern const scalar_t API_NS(point_scalarmul_adjustment);
  784. siv constant_time_lookup_xx (
  785. void *__restrict__ out_,
  786. const void *table_,
  787. decaf_word_t elem_bytes,
  788. decaf_word_t n_table,
  789. decaf_word_t idx
  790. ) {
  791. constant_time_lookup(out_,table_,elem_bytes,n_table,idx);
  792. }
  793. snv prepare_fixed_window(
  794. pniels_t *multiples,
  795. const point_t b,
  796. int ntable
  797. ) {
  798. point_t tmp;
  799. pniels_t pn;
  800. int i;
  801. point_double_internal(tmp, b, 0);
  802. pt_to_pniels(pn, tmp);
  803. pt_to_pniels(multiples[0], b);
  804. API_NS(point_copy)(tmp, b);
  805. for (i=1; i<ntable; i++) {
  806. add_pniels_to_pt(tmp, pn, 0);
  807. pt_to_pniels(multiples[i], tmp);
  808. }
  809. decaf_bzero(pn,sizeof(pn));
  810. decaf_bzero(tmp,sizeof(tmp));
  811. }
  812. void API_NS(point_scalarmul) (
  813. point_t a,
  814. const point_t b,
  815. const scalar_t scalar
  816. ) {
  817. const int WINDOW = DECAF_WINDOW_BITS,
  818. WINDOW_MASK = (1<<WINDOW)-1,
  819. WINDOW_T_MASK = WINDOW_MASK >> 1,
  820. NTABLE = 1<<(WINDOW-1);
  821. scalar_t scalar1x;
  822. API_NS(scalar_add)(scalar1x, scalar, API_NS(point_scalarmul_adjustment));
  823. sc_halve(scalar1x,scalar1x,sc_p);
  824. /* Set up a precomputed table with odd multiples of b. */
  825. pniels_t pn, multiples[NTABLE];
  826. point_t tmp;
  827. prepare_fixed_window(multiples, b, NTABLE);
  828. /* Initialize. */
  829. int i,j,first=1;
  830. i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
  831. for (; i>=0; i-=WINDOW) {
  832. /* Fetch another block of bits */
  833. decaf_word_t bits = scalar1x->limb[i/WBITS] >> (i%WBITS);
  834. if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
  835. bits ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  836. }
  837. bits &= WINDOW_MASK;
  838. decaf_word_t inv = (bits>>(WINDOW-1))-1;
  839. bits ^= inv;
  840. /* Add in from table. Compute t only on last iteration. */
  841. constant_time_lookup_xx(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK);
  842. cond_neg_niels(pn->n, inv);
  843. if (first) {
  844. pniels_to_pt(tmp, pn);
  845. first = 0;
  846. } else {
  847. /* Using Hisil et al's lookahead method instead of extensible here
  848. * for no particular reason. Double WINDOW times, but only compute t on
  849. * the last one.
  850. */
  851. for (j=0; j<WINDOW-1; j++)
  852. point_double_internal(tmp, tmp, -1);
  853. point_double_internal(tmp, tmp, 0);
  854. add_pniels_to_pt(tmp, pn, i ? -1 : 0);
  855. }
  856. }
  857. /* Write out the answer */
  858. API_NS(point_copy)(a,tmp);
  859. decaf_bzero(scalar1x,sizeof(scalar1x));
  860. decaf_bzero(pn,sizeof(pn));
  861. decaf_bzero(multiples,sizeof(multiples));
  862. decaf_bzero(tmp,sizeof(tmp));
  863. }
  864. void API_NS(point_double_scalarmul) (
  865. point_t a,
  866. const point_t b,
  867. const scalar_t scalarb,
  868. const point_t c,
  869. const scalar_t scalarc
  870. ) {
  871. const int WINDOW = DECAF_WINDOW_BITS,
  872. WINDOW_MASK = (1<<WINDOW)-1,
  873. WINDOW_T_MASK = WINDOW_MASK >> 1,
  874. NTABLE = 1<<(WINDOW-1);
  875. scalar_t scalar1x, scalar2x;
  876. API_NS(scalar_add)(scalar1x, scalarb, API_NS(point_scalarmul_adjustment));
  877. sc_halve(scalar1x,scalar1x,sc_p);
  878. API_NS(scalar_add)(scalar2x, scalarc, API_NS(point_scalarmul_adjustment));
  879. sc_halve(scalar2x,scalar2x,sc_p);
  880. /* Set up a precomputed table with odd multiples of b. */
  881. pniels_t pn, multiples1[NTABLE], multiples2[NTABLE];
  882. point_t tmp;
  883. prepare_fixed_window(multiples1, b, NTABLE);
  884. prepare_fixed_window(multiples2, c, NTABLE);
  885. /* Initialize. */
  886. int i,j,first=1;
  887. i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
  888. for (; i>=0; i-=WINDOW) {
  889. /* Fetch another block of bits */
  890. decaf_word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
  891. bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
  892. if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
  893. bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  894. bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  895. }
  896. bits1 &= WINDOW_MASK;
  897. bits2 &= WINDOW_MASK;
  898. decaf_word_t inv1 = (bits1>>(WINDOW-1))-1;
  899. decaf_word_t inv2 = (bits2>>(WINDOW-1))-1;
  900. bits1 ^= inv1;
  901. bits2 ^= inv2;
  902. /* Add in from table. Compute t only on last iteration. */
  903. constant_time_lookup_xx(pn, multiples1, sizeof(pn), NTABLE, bits1 & WINDOW_T_MASK);
  904. cond_neg_niels(pn->n, inv1);
  905. if (first) {
  906. pniels_to_pt(tmp, pn);
  907. first = 0;
  908. } else {
  909. /* Using Hisil et al's lookahead method instead of extensible here
  910. * for no particular reason. Double WINDOW times, but only compute t on
  911. * the last one.
  912. */
  913. for (j=0; j<WINDOW-1; j++)
  914. point_double_internal(tmp, tmp, -1);
  915. point_double_internal(tmp, tmp, 0);
  916. add_pniels_to_pt(tmp, pn, 0);
  917. }
  918. constant_time_lookup_xx(pn, multiples2, sizeof(pn), NTABLE, bits2 & WINDOW_T_MASK);
  919. cond_neg_niels(pn->n, inv2);
  920. add_pniels_to_pt(tmp, pn, i?-1:0);
  921. }
  922. /* Write out the answer */
  923. API_NS(point_copy)(a,tmp);
  924. decaf_bzero(scalar1x,sizeof(scalar1x));
  925. decaf_bzero(scalar2x,sizeof(scalar2x));
  926. decaf_bzero(pn,sizeof(pn));
  927. decaf_bzero(multiples1,sizeof(multiples1));
  928. decaf_bzero(multiples2,sizeof(multiples2));
  929. decaf_bzero(tmp,sizeof(tmp));
  930. }
  931. decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) {
  932. /* equality mod 2-torsion compares x/y */
  933. gf a, b;
  934. gf_mul ( a, p->y, q->x );
  935. gf_mul ( b, q->y, p->x );
  936. decaf_bool_t succ = gf_eq(a,b);
  937. #if (COFACTOR == 8) && IMAGINE_TWIST
  938. gf_mul ( a, p->y, q->y );
  939. gf_mul ( b, q->x, p->x );
  940. #if !(IMAGINE_TWIST)
  941. gf_sub ( a, ZERO, a );
  942. #else
  943. /* Interesting note: the 4tor would normally be rotation.
  944. * But because of the *i twist, it's actually
  945. * (x,y) <-> (iy,ix)
  946. */
  947. /* No code, just a comment. */
  948. #endif
  949. succ |= gf_eq(a,b);
  950. #endif
  951. return succ;
  952. }
  953. void API_NS(point_from_hash_nonuniform) (
  954. point_t p,
  955. const unsigned char ser[SER_BYTES]
  956. ) {
  957. // TODO: simplify since we don't return a hint anymore
  958. // TODO: test pathological case ur0^2 = 1/(1-d)
  959. gf r0,r,a,b,c,dee,D,N,rN,e;
  960. gf_deser(r0,ser);
  961. gf_strong_reduce(r0);
  962. gf_sqr(a,r0);
  963. #if P_MOD_8 == 5
  964. /* r = QNR * a */
  965. gf_mul(r,a,SQRT_MINUS_ONE);
  966. #else
  967. gf_sub(r,ZERO,a);
  968. #endif
  969. gf_mulw_sgn(dee,ONE,EDWARDS_D);
  970. gf_mulw_sgn(c,r,EDWARDS_D);
  971. /* Compute D := (dr+a-d)(dr-ar-d) with a=1 */
  972. gf_sub(a,c,dee);
  973. gf_add(a,a,ONE);
  974. gf_sub(b,c,r);
  975. gf_sub(b,b,dee);
  976. gf_mul(D,a,b);
  977. /* compute N := (r+1)(a-2d) */
  978. gf_add(a,r,ONE);
  979. gf_mulw_sgn(N,a,1-2*EDWARDS_D);
  980. /* e = +-1/sqrt(+-ND) */
  981. gf_mul(rN,r,N);
  982. gf_mul(a,rN,D);
  983. decaf_bool_t square = gf_isqrt_chk(e,a,DECAF_FALSE);
  984. /* b <- t/s */
  985. cond_sel(c,r0,r,square); /* r? = sqr ? r : 1 */
  986. /* In two steps to avoid overflow on 32-bit arch */
  987. gf_mulw_sgn(a,c,1-2*EDWARDS_D);
  988. gf_mulw_sgn(b,a,1-2*EDWARDS_D);
  989. gf_sub(c,r,ONE);
  990. gf_mul(a,b,c); /* = r? * (r-1) * (a-2d)^2 with a=1 */
  991. gf_mul(b,a,e);
  992. cond_neg(b,~square);
  993. cond_sel(c,r0,ONE,square);
  994. gf_mul(a,e,c);
  995. gf_mul(c,a,D); /* 1/s except for sign. FUTURE: simplify using this. */
  996. gf_sub(b,b,c);
  997. /* a <- s = e * N * (sqr ? r : r0)
  998. * e^2 r N D = 1
  999. * 1/s = 1/(e * N * (sqr ? r : r0)) = e * D * (sqr ? 1 : r0)
  1000. */
  1001. gf_mul(a,N,r0);
  1002. cond_sel(rN,a,rN,square);
  1003. gf_mul(a,rN,e);
  1004. gf_mul(c,a,b);
  1005. /* Normalize/negate */
  1006. decaf_bool_t neg_s = hibit(a) ^ ~square;
  1007. cond_neg(a,neg_s); /* ends up negative if ~square */
  1008. /* b <- t */
  1009. cond_sel(b,c,ONE,gf_eq(c,ZERO)); /* 0,0 -> 1,0 */
  1010. /* isogenize */
  1011. #if IMAGINE_TWIST
  1012. gf_mul(c,a,SQRT_MINUS_ONE);
  1013. gf_cpy(a,c);
  1014. #endif
  1015. gf_sqr(c,a); /* s^2 */
  1016. gf_add(a,a,a); /* 2s */
  1017. gf_add(e,c,ONE);
  1018. gf_mul(p->t,a,e); /* 2s(1+s^2) */
  1019. gf_mul(p->x,a,b); /* 2st */
  1020. gf_sub(a,ONE,c);
  1021. gf_mul(p->y,e,a); /* (1+s^2)(1-s^2) */
  1022. gf_mul(p->z,a,b); /* (1-s^2)t */
  1023. assert(API_NS(point_valid)(p));
  1024. }
  1025. decaf_bool_t
  1026. API_NS(invert_elligator_nonuniform) (
  1027. unsigned char recovered_hash[SER_BYTES],
  1028. const point_t p,
  1029. uint16_t hint_
  1030. ) {
  1031. uint64_t hint = hint_;
  1032. decaf_bool_t sgn_s = -(hint & 1),
  1033. sgn_t_over_s = -(hint>>1 & 1),
  1034. sgn_r0 = -(hint>>2 & 1),
  1035. sgn_ed_T = -(hint>>3 & 1);
  1036. gf a, b, c, d;
  1037. deisogenize(a,c,p,sgn_s,sgn_t_over_s,sgn_ed_T);
  1038. /* ok, a = s; c = -t/s */
  1039. gf_mul(b,c,a);
  1040. gf_sub(b,ONE,b); /* t+1 */
  1041. gf_sqr(c,a); /* s^2 */
  1042. decaf_bool_t is_identity = gf_eq(p->t,ZERO);
  1043. { /* identity adjustments */
  1044. /* in case of identity, currently c=0, t=0, b=1, will encode to 1 */
  1045. /* if hint is 0, -> 0 */
  1046. /* if hint is to neg t/s, then go to infinity, effectively set s to 1 */
  1047. cond_sel(c,c,ONE,is_identity & sgn_t_over_s);
  1048. cond_sel(b,b,ZERO,is_identity & ~sgn_t_over_s & ~sgn_s); /* identity adjust */
  1049. }
  1050. gf_mulw_sgn(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */
  1051. gf_add(a,b,d); /* num? */
  1052. gf_sub(d,d,b); /* den? */
  1053. gf_mul(b,a,d); /* n*d */
  1054. cond_sel(a,d,a,sgn_s);
  1055. #if P_MOD_8 == 5
  1056. gf_mul(d,b,SQRT_MINUS_ONE);
  1057. #else
  1058. gf_sub(d,ZERO,b);
  1059. #endif
  1060. decaf_bool_t succ = gf_isqrt_chk(c,d,DECAF_TRUE);
  1061. gf_mul(b,a,c);
  1062. cond_neg(b, sgn_r0^hibit(b));
  1063. succ &= ~(gf_eq(b,ZERO) & sgn_r0);
  1064. #if COFACTOR == 8
  1065. succ &= ~(is_identity & sgn_ed_T); /* NB: there are no preimages of rotated identity. */
  1066. #endif
  1067. gf_encode(recovered_hash, b);
  1068. /* TODO: deal with overflow flag */
  1069. return succ;
  1070. }
  1071. void API_NS(point_from_hash_uniform) (
  1072. point_t pt,
  1073. const unsigned char hashed_data[2*SER_BYTES]
  1074. ) {
  1075. point_t pt2;
  1076. API_NS(point_from_hash_nonuniform)(pt,hashed_data);
  1077. API_NS(point_from_hash_nonuniform)(pt2,&hashed_data[SER_BYTES]);
  1078. API_NS(point_add)(pt,pt,pt2);
  1079. }
  1080. decaf_bool_t
  1081. API_NS(invert_elligator_uniform) (
  1082. unsigned char partial_hash[2*SER_BYTES],
  1083. const point_t p,
  1084. uint16_t hint
  1085. ) {
  1086. point_t pt2;
  1087. API_NS(point_from_hash_nonuniform)(pt2,&partial_hash[SER_BYTES]);
  1088. API_NS(point_sub)(pt2,p,pt2);
  1089. return API_NS(invert_elligator_nonuniform)(partial_hash,pt2,hint);
  1090. }
  1091. decaf_bool_t API_NS(point_valid) (
  1092. const point_t p
  1093. ) {
  1094. gf a,b,c;
  1095. gf_mul(a,p->x,p->y);
  1096. gf_mul(b,p->z,p->t);
  1097. decaf_bool_t out = gf_eq(a,b);
  1098. gf_sqr(a,p->x);
  1099. gf_sqr(b,p->y);
  1100. gf_sub(a,b,a);
  1101. gf_sqr(b,p->t);
  1102. gf_mulw_sgn(c,b,TWISTED_D);
  1103. gf_sqr(b,p->z);
  1104. gf_add(b,b,c);
  1105. out &= gf_eq(a,b);
  1106. out &= ~gf_eq(p->z,ZERO);
  1107. return out;
  1108. }
  1109. void API_NS(point_debugging_torque) (
  1110. point_t q,
  1111. const point_t p
  1112. ) {
  1113. #if COFACTOR == 8
  1114. gf tmp;
  1115. gf_mul(tmp,p->x,SQRT_MINUS_ONE);
  1116. gf_mul(q->x,p->y,SQRT_MINUS_ONE);
  1117. gf_cpy(q->y,tmp);
  1118. gf_cpy(q->z,p->z);
  1119. gf_sub(q->t,ZERO,p->t);
  1120. #else
  1121. gf_sub(q->x,ZERO,p->x);
  1122. gf_sub(q->y,ZERO,p->y);
  1123. gf_cpy(q->z,p->z);
  1124. gf_cpy(q->t,p->t);
  1125. #endif
  1126. }
  1127. void API_NS(point_debugging_pscale) (
  1128. point_t q,
  1129. const point_t p,
  1130. const uint8_t factor[SER_BYTES]
  1131. ) {
  1132. gf gfac,tmp;
  1133. ignore_result(gf_deser(gfac,factor));
  1134. cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO));
  1135. gf_mul(tmp,p->x,gfac);
  1136. gf_cpy(q->x,tmp);
  1137. gf_mul(tmp,p->y,gfac);
  1138. gf_cpy(q->y,tmp);
  1139. gf_mul(tmp,p->z,gfac);
  1140. gf_cpy(q->z,tmp);
  1141. gf_mul(tmp,p->t,gfac);
  1142. gf_cpy(q->t,tmp);
  1143. }
  1144. static void gf_batch_invert (
  1145. gf *__restrict__ out,
  1146. const gf *in,
  1147. unsigned int n
  1148. ) {
  1149. gf t1;
  1150. assert(n>1);
  1151. gf_cpy(out[1], in[0]);
  1152. int i;
  1153. for (i=1; i<(int) (n-1); i++) {
  1154. gf_mul(out[i+1], out[i], in[i]);
  1155. }
  1156. gf_mul(out[0], out[n-1], in[n-1]);
  1157. gf_invert(out[0], out[0]);
  1158. for (i=n-1; i>0; i--) {
  1159. gf_mul(t1, out[i], out[0]);
  1160. gf_cpy(out[i], t1);
  1161. gf_mul(t1, out[0], in[i]);
  1162. gf_cpy(out[0], t1);
  1163. }
  1164. }
  1165. static void batch_normalize_niels (
  1166. niels_t *table,
  1167. gf *zs,
  1168. gf *zis,
  1169. int n
  1170. ) {
  1171. int i;
  1172. gf product;
  1173. gf_batch_invert(zis, zs, n);
  1174. for (i=0; i<n; i++) {
  1175. gf_mul(product, table[i]->a, zis[i]);
  1176. gf_strong_reduce(product);
  1177. gf_cpy(table[i]->a, product);
  1178. gf_mul(product, table[i]->b, zis[i]);
  1179. gf_strong_reduce(product);
  1180. gf_cpy(table[i]->b, product);
  1181. gf_mul(product, table[i]->c, zis[i]);
  1182. gf_strong_reduce(product);
  1183. gf_cpy(table[i]->c, product);
  1184. }
  1185. decaf_bzero(product,sizeof(product));
  1186. }
  1187. void API_NS(precompute) (
  1188. precomputed_s *table,
  1189. const point_t base
  1190. ) {
  1191. const unsigned int n = DECAF_COMBS_N, t = DECAF_COMBS_T, s = DECAF_COMBS_S;
  1192. assert(n*t*s >= SCALAR_BITS);
  1193. point_t working, start, doubles[t-1];
  1194. API_NS(point_copy)(working, base);
  1195. pniels_t pn_tmp;
  1196. gf zs[n<<(t-1)], zis[n<<(t-1)];
  1197. unsigned int i,j,k;
  1198. /* Compute n tables */
  1199. for (i=0; i<n; i++) {
  1200. /* Doubling phase */
  1201. for (j=0; j<t; j++) {
  1202. if (j) API_NS(point_add)(start, start, working);
  1203. else API_NS(point_copy)(start, working);
  1204. if (j==t-1 && i==n-1) break;
  1205. point_double_internal(working, working,0);
  1206. if (j<t-1) API_NS(point_copy)(doubles[j], working);
  1207. for (k=0; k<s-1; k++)
  1208. point_double_internal(working, working, k<s-2);
  1209. }
  1210. /* Gray-code phase */
  1211. for (j=0;; j++) {
  1212. int gray = j ^ (j>>1);
  1213. int idx = (((i+1)<<(t-1))-1) ^ gray;
  1214. pt_to_pniels(pn_tmp, start);
  1215. memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n));
  1216. gf_cpy(zs[idx], pn_tmp->z);
  1217. if (j >= (1u<<(t-1)) - 1) break;
  1218. int delta = (j+1) ^ ((j+1)>>1) ^ gray;
  1219. for (k=0; delta>1; k++)
  1220. delta >>=1;
  1221. if (gray & (1<<k)) {
  1222. API_NS(point_add)(start, start, doubles[k]);
  1223. } else {
  1224. API_NS(point_sub)(start, start, doubles[k]);
  1225. }
  1226. }
  1227. }
  1228. batch_normalize_niels(table->table,zs,zis,n<<(t-1));
  1229. decaf_bzero(zs,sizeof(zs));
  1230. decaf_bzero(zis,sizeof(zis));
  1231. decaf_bzero(pn_tmp,sizeof(pn_tmp));
  1232. decaf_bzero(working,sizeof(working));
  1233. decaf_bzero(start,sizeof(start));
  1234. decaf_bzero(doubles,sizeof(doubles));
  1235. }
  1236. extern const scalar_t API_NS(precomputed_scalarmul_adjustment);
  1237. siv constant_time_lookup_xx_niels (
  1238. niels_s *__restrict__ ni,
  1239. const niels_t *table,
  1240. int nelts,
  1241. int idx
  1242. ) {
  1243. constant_time_lookup_xx(ni, table, sizeof(niels_s), nelts, idx);
  1244. }
  1245. void API_NS(precomputed_scalarmul) (
  1246. point_t out,
  1247. const precomputed_s *table,
  1248. const scalar_t scalar
  1249. ) {
  1250. int i;
  1251. unsigned j,k;
  1252. const unsigned int n = DECAF_COMBS_N, t = DECAF_COMBS_T, s = DECAF_COMBS_S;
  1253. scalar_t scalar1x;
  1254. API_NS(scalar_add)(scalar1x, scalar, API_NS(precomputed_scalarmul_adjustment));
  1255. sc_halve(scalar1x,scalar1x,sc_p);
  1256. niels_t ni;
  1257. for (i=s-1; i>=0; i--) {
  1258. if (i != (int)s-1) point_double_internal(out,out,0);
  1259. for (j=0; j<n; j++) {
  1260. int tab = 0;
  1261. for (k=0; k<t; k++) {
  1262. unsigned int bit = i + s*(k + j*t);
  1263. if (bit < SCALAR_BITS) {
  1264. tab |= (scalar1x->limb[bit/WBITS] >> (bit%WBITS) & 1) << k;
  1265. }
  1266. }
  1267. decaf_bool_t invert = (tab>>(t-1))-1;
  1268. tab ^= invert;
  1269. tab &= (1<<(t-1)) - 1;
  1270. constant_time_lookup_xx_niels(ni, &table->table[j<<(t-1)], 1<<(t-1), tab);
  1271. cond_neg_niels(ni, invert);
  1272. if ((i!=(int)s-1)||j) {
  1273. add_niels_to_pt(out, ni, j==n-1 && i);
  1274. } else {
  1275. niels_to_pt(out, ni);
  1276. }
  1277. }
  1278. }
  1279. decaf_bzero(ni,sizeof(ni));
  1280. decaf_bzero(scalar1x,sizeof(scalar1x));
  1281. }
  1282. void API_NS(point_cond_sel) (
  1283. point_t out,
  1284. const point_t a,
  1285. const point_t b,
  1286. decaf_bool_t pick_b
  1287. ) {
  1288. pick_b = ~(((decaf_dword_t)pick_b - 1) >> WBITS);
  1289. constant_time_select(out,b,a,sizeof(point_t),pick_b);
  1290. }
  1291. void API_NS(scalar_cond_sel) (
  1292. scalar_t out,
  1293. const scalar_t a,
  1294. const scalar_t b,
  1295. decaf_bool_t pick_b
  1296. ) {
  1297. pick_b = ~(((decaf_dword_t)pick_b - 1) >> WBITS);
  1298. constant_time_select(out,b,a,sizeof(scalar_t),pick_b);
  1299. }
  1300. /* TODO: restore Curve25519 Montgomery ladder? */
  1301. decaf_bool_t API_NS(direct_scalarmul) (
  1302. uint8_t scaled[SER_BYTES],
  1303. const uint8_t base[SER_BYTES],
  1304. const scalar_t scalar,
  1305. decaf_bool_t allow_identity,
  1306. decaf_bool_t short_circuit
  1307. ) {
  1308. point_t basep;
  1309. decaf_bool_t succ = API_NS(point_decode)(basep, base, allow_identity);
  1310. if (short_circuit & ~succ) return succ;
  1311. API_NS(point_cond_sel)(basep, API_NS(point_base), basep, succ);
  1312. API_NS(point_scalarmul)(basep, basep, scalar);
  1313. API_NS(point_encode)(scaled, basep);
  1314. API_NS(point_destroy)(basep);
  1315. return succ;
  1316. }
  1317. /**
  1318. * @cond internal
  1319. * Control for variable-time scalar multiply algorithms.
  1320. */
  1321. struct smvt_control {
  1322. int power, addend;
  1323. };
  1324. static int recode_wnaf (
  1325. struct smvt_control *control, /* [nbits/(tableBits+1) + 3] */
  1326. const scalar_t scalar,
  1327. unsigned int tableBits
  1328. ) {
  1329. int current = 0, i, j;
  1330. unsigned int position = 0;
  1331. /* PERF: negate scalar if it's large
  1332. * PERF: this is a pretty simplistic algorithm. I'm sure there's a faster one...
  1333. * PERF MINOR: not technically WNAF, since last digits can be adjacent. Could be rtl.
  1334. */
  1335. for (i=SCALAR_BITS-1; i >= 0; i--) {
  1336. int bit = (scalar->limb[i/WORD_BITS] >> (i%WORD_BITS)) & 1;
  1337. current = 2*current + bit;
  1338. /*
  1339. * Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0
  1340. * So current loses (tableBits+1) bits every time. It otherwise gains
  1341. * 1 bit per iteration. The number of iterations is
  1342. * (nbits + 2 + tableBits), and an additional control word is added at
  1343. * the end. So the total number of control words is at most
  1344. * ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2.
  1345. * There's also the stopper with power -1, for a total of +3.
  1346. */
  1347. if (current >= (2<<tableBits) || current <= -1 - (2<<tableBits)) {
  1348. int delta = (current + 1) >> 1; /* |delta| < 2^tablebits */
  1349. current = -(current & 1);
  1350. for (j=i; (delta & 1) == 0; j++) {
  1351. delta >>= 1;
  1352. }
  1353. control[position].power = j+1;
  1354. control[position].addend = delta;
  1355. position++;
  1356. assert(position <= SCALAR_BITS/(tableBits+1) + 2);
  1357. }
  1358. }
  1359. if (current) {
  1360. for (j=0; (current & 1) == 0; j++) {
  1361. current >>= 1;
  1362. }
  1363. control[position].power = j;
  1364. control[position].addend = current;
  1365. position++;
  1366. assert(position <= SCALAR_BITS/(tableBits+1) + 2);
  1367. }
  1368. control[position].power = -1;
  1369. control[position].addend = 0;
  1370. return position;
  1371. }
  1372. sv prepare_wnaf_table(
  1373. pniels_t *output,
  1374. const point_t working,
  1375. unsigned int tbits
  1376. ) {
  1377. point_t tmp;
  1378. int i;
  1379. pt_to_pniels(output[0], working);
  1380. if (tbits == 0) return;
  1381. API_NS(point_double)(tmp,working);
  1382. pniels_t twop;
  1383. pt_to_pniels(twop, tmp);
  1384. add_pniels_to_pt(tmp, output[0],0);
  1385. pt_to_pniels(output[1], tmp);
  1386. for (i=2; i < 1<<tbits; i++) {
  1387. add_pniels_to_pt(tmp, twop,0);
  1388. pt_to_pniels(output[i], tmp);
  1389. }
  1390. API_NS(point_destroy)(tmp);
  1391. }
  1392. extern const gf API_NS(precomputed_wnaf_as_fe)[];
  1393. static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_fe);
  1394. const size_t API_NS2(sizeof,precomputed_wnafs) __attribute((visibility("hidden")))
  1395. = sizeof(niels_t)<<DECAF_WNAF_FIXED_TABLE_BITS;
  1396. void API_NS(precompute_wnafs) (
  1397. niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
  1398. const point_t base
  1399. ) __attribute__ ((visibility ("hidden")));
  1400. void API_NS(precompute_wnafs) (
  1401. niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
  1402. const point_t base
  1403. ) {
  1404. pniels_t tmp[1<<DECAF_WNAF_FIXED_TABLE_BITS];
  1405. gf zs[1<<DECAF_WNAF_FIXED_TABLE_BITS], zis[1<<DECAF_WNAF_FIXED_TABLE_BITS];
  1406. int i;
  1407. prepare_wnaf_table(tmp,base,DECAF_WNAF_FIXED_TABLE_BITS);
  1408. for (i=0; i<1<<DECAF_WNAF_FIXED_TABLE_BITS; i++) {
  1409. memcpy(out[i], tmp[i]->n, sizeof(niels_t));
  1410. gf_cpy(zs[i], tmp[i]->z);
  1411. }
  1412. batch_normalize_niels(out, zs, zis, 1<<DECAF_WNAF_FIXED_TABLE_BITS);
  1413. decaf_bzero(tmp,sizeof(tmp));
  1414. decaf_bzero(zs,sizeof(zs));
  1415. decaf_bzero(zis,sizeof(zis));
  1416. }
  1417. void API_NS(base_double_scalarmul_non_secret) (
  1418. point_t combo,
  1419. const scalar_t scalar1,
  1420. const point_t base2,
  1421. const scalar_t scalar2
  1422. ) {
  1423. const int table_bits_var = DECAF_WNAF_VAR_TABLE_BITS,
  1424. table_bits_pre = DECAF_WNAF_FIXED_TABLE_BITS;
  1425. struct smvt_control control_var[SCALAR_BITS/(table_bits_var+1)+3];
  1426. struct smvt_control control_pre[SCALAR_BITS/(table_bits_pre+1)+3];
  1427. int ncb_pre = recode_wnaf(control_pre, scalar1, table_bits_pre);
  1428. int ncb_var = recode_wnaf(control_var, scalar2, table_bits_var);
  1429. pniels_t precmp_var[1<<table_bits_var];
  1430. prepare_wnaf_table(precmp_var, base2, table_bits_var);
  1431. int contp=0, contv=0, i = control_var[0].power;
  1432. if (i < 0) {
  1433. API_NS(point_copy)(combo, API_NS(point_identity));
  1434. return;
  1435. } else if (i > control_pre[0].power) {
  1436. pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
  1437. contv++;
  1438. } else if (i == control_pre[0].power && i >=0 ) {
  1439. pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
  1440. add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1], i);
  1441. contv++; contp++;
  1442. } else {
  1443. i = control_pre[0].power;
  1444. niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1]);
  1445. contp++;
  1446. }
  1447. for (i--; i >= 0; i--) {
  1448. int cv = (i==control_var[contv].power), cp = (i==control_pre[contp].power);
  1449. point_double_internal(combo,combo,i && !(cv||cp));
  1450. if (cv) {
  1451. assert(control_var[contv].addend);
  1452. if (control_var[contv].addend > 0) {
  1453. add_pniels_to_pt(combo, precmp_var[control_var[contv].addend >> 1], i&&!cp);
  1454. } else {
  1455. sub_pniels_from_pt(combo, precmp_var[(-control_var[contv].addend) >> 1], i&&!cp);
  1456. }
  1457. contv++;
  1458. }
  1459. if (cp) {
  1460. assert(control_pre[contp].addend);
  1461. if (control_pre[contp].addend > 0) {
  1462. add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[contp].addend >> 1], i);
  1463. } else {
  1464. sub_niels_from_pt(combo, API_NS(wnaf_base)[(-control_pre[contp].addend) >> 1], i);
  1465. }
  1466. contp++;
  1467. }
  1468. }
  1469. // Non-secret, but whatever this is cheap.
  1470. decaf_bzero(control_var,sizeof(control_var));
  1471. decaf_bzero(control_pre,sizeof(control_pre));
  1472. decaf_bzero(precmp_var,sizeof(precmp_var));
  1473. assert(contv == ncb_var); (void)ncb_var;
  1474. assert(contp == ncb_pre); (void)ncb_pre;
  1475. }
  1476. void API_NS(point_destroy) (
  1477. point_t point
  1478. ) {
  1479. decaf_bzero(point, sizeof(point_t));
  1480. }
  1481. void API_NS(precomputed_destroy) (
  1482. precomputed_s *pre
  1483. ) {
  1484. decaf_bzero(pre, API_NS2(sizeof,precomputed_s));
  1485. }