You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

1607 lines
46 KiB

  1. /**
  2. * @file curve25519/decaf.c
  3. * @author Mike Hamburg
  4. *
  5. * @copyright
  6. * Copyright (c) 2015-2016 Cryptography Research, Inc. \n
  7. * Released under the MIT License. See LICENSE.txt for license information.
  8. *
  9. * @brief Decaf high-level functions.
  10. *
  11. * @warning This file was automatically generated in Python.
  12. * Please do not edit it.
  13. */
  14. #define _XOPEN_SOURCE 600 /* for posix_memalign */
  15. #include "word.h"
  16. #include "field.h"
  17. #include <decaf.h>
  18. #include <decaf/ed255.h>
  19. /* Template stuff */
  20. #define API_NS(_id) decaf_255_##_id
  21. #define SCALAR_BITS DECAF_255_SCALAR_BITS
  22. #define SCALAR_SER_BYTES DECAF_255_SCALAR_BYTES
  23. #define SCALAR_LIMBS DECAF_255_SCALAR_LIMBS
  24. #define scalar_t API_NS(scalar_t)
  25. #define point_t API_NS(point_t)
  26. #define precomputed_s API_NS(precomputed_s)
  27. #define IMAGINE_TWIST 1
  28. #define COFACTOR 8
  29. /* Comb config: number of combs, n, t, s. */
  30. #define COMBS_N 3
  31. #define COMBS_T 5
  32. #define COMBS_S 17
  33. #define DECAF_WINDOW_BITS 4
  34. #define DECAF_WNAF_FIXED_TABLE_BITS 5
  35. #define DECAF_WNAF_VAR_TABLE_BITS 3
  36. #define EDDSA_USE_SIGMA_ISOGENY 1
  37. static const int EDWARDS_D = -121665;
  38. static const scalar_t point_scalarmul_adjustment = {{{
  39. SC_LIMB(0xd6ec31748d98951c), SC_LIMB(0xc6ef5bf4737dcf70), SC_LIMB(0xfffffffffffffffe), SC_LIMB(0x0fffffffffffffff)
  40. }}}, precomputed_scalarmul_adjustment = {{{
  41. SC_LIMB(0x977f4a4775473484), SC_LIMB(0x6de72ae98b3ab623), SC_LIMB(0xffffffffffffffff), SC_LIMB(0x0fffffffffffffff)
  42. }}};
  43. const uint8_t decaf_x25519_base_point[DECAF_X25519_PUBLIC_BYTES] = { 0x09 };
  44. #define RISTRETTO_FACTOR DECAF_255_RISTRETTO_FACTOR
  45. const gf RISTRETTO_FACTOR = {{{
  46. 0x702557fa2bf03, 0x514b7d1a82cc6, 0x7f89efd8b43a7, 0x1aef49ec23700, 0x079376fa30500
  47. }}};
  48. #if IMAGINE_TWIST
  49. #define TWISTED_D (-(EDWARDS_D))
  50. #else
  51. #define TWISTED_D ((EDWARDS_D)-1)
  52. #endif
  53. #if TWISTED_D < 0
  54. #define EFF_D (-(TWISTED_D))
  55. #define NEG_D 1
  56. #else
  57. #define EFF_D TWISTED_D
  58. #define NEG_D 0
  59. #endif
  60. /* End of template stuff */
  61. /* Sanity */
  62. #if (COFACTOR == 8) && !IMAGINE_TWIST && !UNSAFE_CURVE_HAS_POINTS_AT_INFINITY
  63. /* FUTURE MAGIC: Curve41417 doesn't have these properties. */
  64. #error "Currently require IMAGINE_TWIST (and thus p=5 mod 8) for cofactor 8"
  65. /* OK, but why?
  66. * Two reasons: #1: There are bugs when COFACTOR == && IMAGINE_TWIST
  67. # #2:
  68. */
  69. #endif
  70. #if IMAGINE_TWIST && (P_MOD_8 != 5)
  71. #error "Cannot use IMAGINE_TWIST except for p == 5 mod 8"
  72. #endif
  73. #if (COFACTOR != 8) && (COFACTOR != 4)
  74. #error "COFACTOR must be 4 or 8"
  75. #endif
  76. #if IMAGINE_TWIST
  77. extern const gf SQRT_MINUS_ONE;
  78. #endif
  79. #define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */
  80. extern const point_t API_NS(point_base);
  81. /* Projective Niels coordinates */
  82. typedef struct { gf a, b, c; } niels_s, niels_t[1];
  83. typedef struct { niels_t n; gf z; } VECTOR_ALIGNED pniels_s, pniels_t[1];
  84. /* Precomputed base */
  85. struct precomputed_s { niels_t table [COMBS_N<<(COMBS_T-1)]; };
  86. extern const gf API_NS(precomputed_base_as_fe)[];
  87. const precomputed_s *API_NS(precomputed_base) =
  88. (const precomputed_s *) &API_NS(precomputed_base_as_fe);
  89. const size_t API_NS(sizeof_precomputed_s) = sizeof(precomputed_s);
  90. const size_t API_NS(alignof_precomputed_s) = sizeof(big_register_t);
  91. /** Inverse. */
  92. static void
  93. gf_invert(gf y, const gf x, int assert_nonzero) {
  94. gf t1, t2;
  95. gf_sqr(t1, x); // o^2
  96. mask_t ret = gf_isr(t2, t1); // +-1/sqrt(o^2) = +-1/o
  97. (void)ret;
  98. if (assert_nonzero) assert(ret);
  99. gf_sqr(t1, t2);
  100. gf_mul(t2, t1, x); // not direct to y in case of alias.
  101. gf_copy(y, t2);
  102. }
  103. /** identity = (0,1) */
  104. const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};
  105. /* Predeclare because not static: called by elligator */
  106. void API_NS(deisogenize) (
  107. gf_s *__restrict__ s,
  108. gf_s *__restrict__ inv_el_sum,
  109. gf_s *__restrict__ inv_el_m1,
  110. const point_t p,
  111. mask_t toggle_s,
  112. mask_t toggle_altx,
  113. mask_t toggle_rotation
  114. );
  115. void API_NS(deisogenize) (
  116. gf_s *__restrict__ s,
  117. gf_s *__restrict__ inv_el_sum,
  118. gf_s *__restrict__ inv_el_m1,
  119. const point_t p,
  120. mask_t toggle_s,
  121. mask_t toggle_altx,
  122. mask_t toggle_rotation
  123. ) {
  124. #if COFACTOR == 4 && !IMAGINE_TWIST
  125. (void)toggle_rotation; /* Only applies to cofactor 8 */
  126. gf t1;
  127. gf_s *t2 = s, *t3=inv_el_sum, *t4=inv_el_m1;
  128. gf_add(t1,p->x,p->t);
  129. gf_sub(t2,p->x,p->t);
  130. gf_mul(t3,t1,t2); /* t3 = num */
  131. gf_sqr(t2,p->x);
  132. gf_mul(t1,t2,t3);
  133. gf_mulw(t2,t1,-1-TWISTED_D); /* -x^2 * (a-d) * num */
  134. gf_isr(t1,t2); /* t1 = isr */
  135. gf_mul(t2,t1,t3); /* t2 = ratio */
  136. gf_mul(t4,t2,RISTRETTO_FACTOR);
  137. mask_t negx = gf_lobit(t4) ^ toggle_altx;
  138. gf_cond_neg(t2, negx);
  139. gf_mul(t3,t2,p->z);
  140. gf_sub(t3,t3,p->t);
  141. gf_mul(t2,t3,p->x);
  142. gf_mulw(t4,t2,-1-TWISTED_D);
  143. gf_mul(s,t4,t1);
  144. mask_t lobs = gf_lobit(s);
  145. gf_cond_neg(s,lobs);
  146. gf_copy(inv_el_m1,p->x);
  147. gf_cond_neg(inv_el_m1,~lobs^negx^toggle_s);
  148. gf_add(inv_el_m1,inv_el_m1,p->t);
  149. #elif COFACTOR == 8 && IMAGINE_TWIST
  150. /* More complicated because of rotation */
  151. gf t1,t2,t3,t4,t5;
  152. gf_add(t1,p->z,p->y);
  153. gf_sub(t2,p->z,p->y);
  154. gf_mul(t3,t1,t2); /* t3 = num */
  155. gf_mul(t2,p->x,p->y); /* t2 = den */
  156. gf_sqr(t1,t2);
  157. gf_mul(t4,t1,t3);
  158. gf_mulw(t1,t4,-1-TWISTED_D);
  159. gf_isr(t4,t1); /* isqrt(num*(a-d)*den^2) */
  160. gf_mul(t1,t2,t4);
  161. gf_mul(t2,t1,RISTRETTO_FACTOR); /* t2 = "iden" in ristretto.sage */
  162. gf_mul(t1,t3,t4); /* t1 = "inum" in ristretto.sage */
  163. /* Calculate altxy = iden*inum*i*t^2*(d-a) */
  164. gf_mul(t3,t1,t2);
  165. gf_mul_i(t4,t3);
  166. gf_mul(t3,t4,p->t);
  167. gf_mul(t4,t3,p->t);
  168. gf_mulw(t3,t4,TWISTED_D+1); /* iden*inum*i*t^2*(d-1) */
  169. mask_t rotate = toggle_rotation ^ gf_lobit(t3);
  170. /* Rotate if altxy is negative */
  171. gf_cond_swap(t1,t2,rotate);
  172. gf_mul_i(t4,p->x);
  173. gf_cond_sel(t4,p->y,t4,rotate); /* t4 = "fac" = ix if rotate, else y */
  174. gf_mul_i(t5,RISTRETTO_FACTOR); /* t5 = imi */
  175. gf_mul(t3,t5,t2); /* iden * imi */
  176. gf_mul(t2,t5,t1);
  177. gf_mul(t5,t2,p->t); /* "altx" = iden*imi*t */
  178. mask_t negx = gf_lobit(t5) ^ toggle_altx;
  179. gf_cond_neg(t1,negx^rotate);
  180. gf_mul(t2,t1,p->z);
  181. gf_add(t2,t2,ONE);
  182. gf_mul(inv_el_sum,t2,t4);
  183. gf_mul(s,inv_el_sum,t3);
  184. mask_t negs = gf_lobit(s);
  185. gf_cond_neg(s,negs);
  186. mask_t negz = ~negs ^ toggle_s ^ negx;
  187. gf_copy(inv_el_m1,p->z);
  188. gf_cond_neg(inv_el_m1,negz);
  189. gf_sub(inv_el_m1,inv_el_m1,t4);
  190. #else
  191. #error "Cofactor must be 4 (with no IMAGINE_TWIST) or 8 (with IMAGINE_TWIST)"
  192. #endif
  193. }
  194. void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
  195. gf s,ie1,ie2;
  196. API_NS(deisogenize)(s,ie1,ie2,p,0,0,0);
  197. gf_serialize(ser,s,1);
  198. }
  199. decaf_error_t API_NS(point_decode) (
  200. point_t p,
  201. const unsigned char ser[SER_BYTES],
  202. decaf_bool_t allow_identity
  203. ) {
  204. gf s, s2, num, tmp;
  205. gf_s *tmp2=s2, *ynum=p->z, *isr=p->x, *den=p->t;
  206. mask_t succ = gf_deserialize(s, ser, 1, 0);
  207. succ &= bool_to_mask(allow_identity) | ~gf_eq(s, ZERO);
  208. succ &= ~gf_lobit(s);
  209. gf_sqr(s2,s); /* s^2 = -as^2 */
  210. #if IMAGINE_TWIST
  211. gf_sub(s2,ZERO,s2); /* -as^2 */
  212. #endif
  213. gf_sub(den,ONE,s2); /* 1+as^2 */
  214. gf_add(ynum,ONE,s2); /* 1-as^2 */
  215. gf_mulw(num,s2,-4*TWISTED_D);
  216. gf_sqr(tmp,den); /* tmp = den^2 */
  217. gf_add(num,tmp,num); /* num = den^2 - 4*d*s^2 */
  218. gf_mul(tmp2,num,tmp); /* tmp2 = num*den^2 */
  219. succ &= gf_isr(isr,tmp2); /* isr = 1/sqrt(num*den^2) */
  220. gf_mul(tmp,isr,den); /* isr*den */
  221. gf_mul(p->y,tmp,ynum); /* isr*den*(1-as^2) */
  222. gf_mul(tmp2,tmp,s); /* s*isr*den */
  223. gf_add(tmp2,tmp2,tmp2); /* 2*s*isr*den */
  224. gf_mul(tmp,tmp2,isr); /* 2*s*isr^2*den */
  225. gf_mul(p->x,tmp,num); /* 2*s*isr^2*den*num */
  226. gf_mul(tmp,tmp2,RISTRETTO_FACTOR); /* 2*s*isr*den*magic */
  227. gf_cond_neg(p->x,gf_lobit(tmp)); /* flip x */
  228. #if COFACTOR==8
  229. /* Additionally check y != 0 and x*y*isomagic nonegative */
  230. succ &= ~gf_eq(p->y,ZERO);
  231. gf_mul(tmp,p->x,p->y);
  232. gf_mul(tmp2,tmp,RISTRETTO_FACTOR);
  233. succ &= ~gf_lobit(tmp2);
  234. #endif
  235. #if IMAGINE_TWIST
  236. gf_copy(tmp,p->x);
  237. gf_mul_i(p->x,tmp);
  238. #endif
  239. /* Fill in z and t */
  240. gf_copy(p->z,ONE);
  241. gf_mul(p->t,p->x,p->y);
  242. assert(API_NS(point_valid)(p) | ~succ);
  243. return decaf_succeed_if(mask_to_bool(succ));
  244. }
  245. void API_NS(point_sub) (
  246. point_t p,
  247. const point_t q,
  248. const point_t r
  249. ) {
  250. gf a, b, c, d;
  251. gf_sub_nr ( b, q->y, q->x ); /* 3+e */
  252. gf_sub_nr ( d, r->y, r->x ); /* 3+e */
  253. gf_add_nr ( c, r->y, r->x ); /* 2+e */
  254. gf_mul ( a, c, b );
  255. gf_add_nr ( b, q->y, q->x ); /* 2+e */
  256. gf_mul ( p->y, d, b );
  257. gf_mul ( b, r->t, q->t );
  258. gf_mulw ( p->x, b, 2*EFF_D );
  259. gf_add_nr ( b, a, p->y ); /* 2+e */
  260. gf_sub_nr ( c, p->y, a ); /* 3+e */
  261. gf_mul ( a, q->z, r->z );
  262. gf_add_nr ( a, a, a ); /* 2+e */
  263. if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
  264. #if NEG_D
  265. gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
  266. gf_add_nr ( a, a, p->x ); /* 3+e or 2+e */
  267. #else
  268. gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
  269. gf_sub_nr ( a, a, p->x ); /* 4+e or 3+e */
  270. #endif
  271. gf_mul ( p->z, a, p->y );
  272. gf_mul ( p->x, p->y, c );
  273. gf_mul ( p->y, a, b );
  274. gf_mul ( p->t, b, c );
  275. }
  276. void API_NS(point_add) (
  277. point_t p,
  278. const point_t q,
  279. const point_t r
  280. ) {
  281. gf a, b, c, d;
  282. gf_sub_nr ( b, q->y, q->x ); /* 3+e */
  283. gf_sub_nr ( c, r->y, r->x ); /* 3+e */
  284. gf_add_nr ( d, r->y, r->x ); /* 2+e */
  285. gf_mul ( a, c, b );
  286. gf_add_nr ( b, q->y, q->x ); /* 2+e */
  287. gf_mul ( p->y, d, b );
  288. gf_mul ( b, r->t, q->t );
  289. gf_mulw ( p->x, b, 2*EFF_D );
  290. gf_add_nr ( b, a, p->y ); /* 2+e */
  291. gf_sub_nr ( c, p->y, a ); /* 3+e */
  292. gf_mul ( a, q->z, r->z );
  293. gf_add_nr ( a, a, a ); /* 2+e */
  294. if (GF_HEADROOM <= 3) gf_weak_reduce(a); /* or 1+e */
  295. #if NEG_D
  296. gf_add_nr ( p->y, a, p->x ); /* 3+e or 2+e */
  297. gf_sub_nr ( a, a, p->x ); /* 4+e or 3+e */
  298. #else
  299. gf_sub_nr ( p->y, a, p->x ); /* 4+e or 3+e */
  300. gf_add_nr ( a, a, p->x ); /* 3+e or 2+e */
  301. #endif
  302. gf_mul ( p->z, a, p->y );
  303. gf_mul ( p->x, p->y, c );
  304. gf_mul ( p->y, a, b );
  305. gf_mul ( p->t, b, c );
  306. }
  307. static DECAF_NOINLINE void
  308. point_double_internal (
  309. point_t p,
  310. const point_t q,
  311. int before_double
  312. ) {
  313. gf a, b, c, d;
  314. gf_sqr ( c, q->x );
  315. gf_sqr ( a, q->y );
  316. gf_add_nr ( d, c, a ); /* 2+e */
  317. gf_add_nr ( p->t, q->y, q->x ); /* 2+e */
  318. gf_sqr ( b, p->t );
  319. gf_subx_nr ( b, b, d, 3 ); /* 4+e */
  320. gf_sub_nr ( p->t, a, c ); /* 3+e */
  321. gf_sqr ( p->x, q->z );
  322. gf_add_nr ( p->z, p->x, p->x ); /* 2+e */
  323. gf_subx_nr ( a, p->z, p->t, 4 ); /* 6+e */
  324. if (GF_HEADROOM == 5) gf_weak_reduce(a); /* or 1+e */
  325. gf_mul ( p->x, a, b );
  326. gf_mul ( p->z, p->t, a );
  327. gf_mul ( p->y, p->t, d );
  328. if (!before_double) gf_mul ( p->t, b, d );
  329. }
  330. void API_NS(point_double)(point_t p, const point_t q) {
  331. point_double_internal(p,q,0);
  332. }
  333. void API_NS(point_negate) (
  334. point_t nega,
  335. const point_t a
  336. ) {
  337. gf_sub(nega->x, ZERO, a->x);
  338. gf_copy(nega->y, a->y);
  339. gf_copy(nega->z, a->z);
  340. gf_sub(nega->t, ZERO, a->t);
  341. }
  342. /* Operations on [p]niels */
  343. static DECAF_INLINE void
  344. cond_neg_niels (
  345. niels_t n,
  346. mask_t neg
  347. ) {
  348. gf_cond_swap(n->a, n->b, neg);
  349. gf_cond_neg(n->c, neg);
  350. }
  351. static DECAF_NOINLINE void pt_to_pniels (
  352. pniels_t b,
  353. const point_t a
  354. ) {
  355. gf_sub ( b->n->a, a->y, a->x );
  356. gf_add ( b->n->b, a->x, a->y );
  357. gf_mulw ( b->n->c, a->t, 2*TWISTED_D );
  358. gf_add ( b->z, a->z, a->z );
  359. }
  360. static DECAF_NOINLINE void pniels_to_pt (
  361. point_t e,
  362. const pniels_t d
  363. ) {
  364. gf eu;
  365. gf_add ( eu, d->n->b, d->n->a );
  366. gf_sub ( e->y, d->n->b, d->n->a );
  367. gf_mul ( e->t, e->y, eu);
  368. gf_mul ( e->x, d->z, e->y );
  369. gf_mul ( e->y, d->z, eu );
  370. gf_sqr ( e->z, d->z );
  371. }
  372. static DECAF_NOINLINE void
  373. niels_to_pt (
  374. point_t e,
  375. const niels_t n
  376. ) {
  377. gf_add ( e->y, n->b, n->a );
  378. gf_sub ( e->x, n->b, n->a );
  379. gf_mul ( e->t, e->y, e->x );
  380. gf_copy ( e->z, ONE );
  381. }
  382. static DECAF_NOINLINE void
  383. add_niels_to_pt (
  384. point_t d,
  385. const niels_t e,
  386. int before_double
  387. ) {
  388. gf a, b, c;
  389. gf_sub_nr ( b, d->y, d->x ); /* 3+e */
  390. gf_mul ( a, e->a, b );
  391. gf_add_nr ( b, d->x, d->y ); /* 2+e */
  392. gf_mul ( d->y, e->b, b );
  393. gf_mul ( d->x, e->c, d->t );
  394. gf_add_nr ( c, a, d->y ); /* 2+e */
  395. gf_sub_nr ( b, d->y, a ); /* 3+e */
  396. gf_sub_nr ( d->y, d->z, d->x ); /* 3+e */
  397. gf_add_nr ( a, d->x, d->z ); /* 2+e */
  398. gf_mul ( d->z, a, d->y );
  399. gf_mul ( d->x, d->y, b );
  400. gf_mul ( d->y, a, c );
  401. if (!before_double) gf_mul ( d->t, b, c );
  402. }
  403. static DECAF_NOINLINE void
  404. sub_niels_from_pt (
  405. point_t d,
  406. const niels_t e,
  407. int before_double
  408. ) {
  409. gf a, b, c;
  410. gf_sub_nr ( b, d->y, d->x ); /* 3+e */
  411. gf_mul ( a, e->b, b );
  412. gf_add_nr ( b, d->x, d->y ); /* 2+e */
  413. gf_mul ( d->y, e->a, b );
  414. gf_mul ( d->x, e->c, d->t );
  415. gf_add_nr ( c, a, d->y ); /* 2+e */
  416. gf_sub_nr ( b, d->y, a ); /* 3+e */
  417. gf_add_nr ( d->y, d->z, d->x ); /* 2+e */
  418. gf_sub_nr ( a, d->z, d->x ); /* 3+e */
  419. gf_mul ( d->z, a, d->y );
  420. gf_mul ( d->x, d->y, b );
  421. gf_mul ( d->y, a, c );
  422. if (!before_double) gf_mul ( d->t, b, c );
  423. }
  424. static void
  425. add_pniels_to_pt (
  426. point_t p,
  427. const pniels_t pn,
  428. int before_double
  429. ) {
  430. gf L0;
  431. gf_mul ( L0, p->z, pn->z );
  432. gf_copy ( p->z, L0 );
  433. add_niels_to_pt( p, pn->n, before_double );
  434. }
  435. static void
  436. sub_pniels_from_pt (
  437. point_t p,
  438. const pniels_t pn,
  439. int before_double
  440. ) {
  441. gf L0;
  442. gf_mul ( L0, p->z, pn->z );
  443. gf_copy ( p->z, L0 );
  444. sub_niels_from_pt( p, pn->n, before_double );
  445. }
  446. static DECAF_NOINLINE void
  447. prepare_fixed_window(
  448. pniels_t *multiples,
  449. const point_t b,
  450. int ntable
  451. ) {
  452. point_t tmp;
  453. pniels_t pn;
  454. int i;
  455. point_double_internal(tmp, b, 0);
  456. pt_to_pniels(pn, tmp);
  457. pt_to_pniels(multiples[0], b);
  458. API_NS(point_copy)(tmp, b);
  459. for (i=1; i<ntable; i++) {
  460. add_pniels_to_pt(tmp, pn, 0);
  461. pt_to_pniels(multiples[i], tmp);
  462. }
  463. decaf_bzero(pn,sizeof(pn));
  464. decaf_bzero(tmp,sizeof(tmp));
  465. }
  466. void API_NS(point_scalarmul) (
  467. point_t a,
  468. const point_t b,
  469. const scalar_t scalar
  470. ) {
  471. const int WINDOW = DECAF_WINDOW_BITS,
  472. WINDOW_MASK = (1<<WINDOW)-1,
  473. WINDOW_T_MASK = WINDOW_MASK >> 1,
  474. NTABLE = 1<<(WINDOW-1);
  475. scalar_t scalar1x;
  476. API_NS(scalar_add)(scalar1x, scalar, point_scalarmul_adjustment);
  477. API_NS(scalar_halve)(scalar1x,scalar1x);
  478. /* Set up a precomputed table with odd multiples of b. */
  479. pniels_t pn, multiples[NTABLE];
  480. point_t tmp;
  481. prepare_fixed_window(multiples, b, NTABLE);
  482. /* Initialize. */
  483. int i,j,first=1;
  484. i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
  485. for (; i>=0; i-=WINDOW) {
  486. /* Fetch another block of bits */
  487. word_t bits = scalar1x->limb[i/WBITS] >> (i%WBITS);
  488. if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
  489. bits ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  490. }
  491. bits &= WINDOW_MASK;
  492. mask_t inv = (bits>>(WINDOW-1))-1;
  493. bits ^= inv;
  494. /* Add in from table. Compute t only on last iteration. */
  495. constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK);
  496. cond_neg_niels(pn->n, inv);
  497. if (first) {
  498. pniels_to_pt(tmp, pn);
  499. first = 0;
  500. } else {
  501. /* Using Hisil et al's lookahead method instead of extensible here
  502. * for no particular reason. Double WINDOW times, but only compute t on
  503. * the last one.
  504. */
  505. for (j=0; j<WINDOW-1; j++)
  506. point_double_internal(tmp, tmp, -1);
  507. point_double_internal(tmp, tmp, 0);
  508. add_pniels_to_pt(tmp, pn, i ? -1 : 0);
  509. }
  510. }
  511. /* Write out the answer */
  512. API_NS(point_copy)(a,tmp);
  513. decaf_bzero(scalar1x,sizeof(scalar1x));
  514. decaf_bzero(pn,sizeof(pn));
  515. decaf_bzero(multiples,sizeof(multiples));
  516. decaf_bzero(tmp,sizeof(tmp));
  517. }
  518. void API_NS(point_double_scalarmul) (
  519. point_t a,
  520. const point_t b,
  521. const scalar_t scalarb,
  522. const point_t c,
  523. const scalar_t scalarc
  524. ) {
  525. const int WINDOW = DECAF_WINDOW_BITS,
  526. WINDOW_MASK = (1<<WINDOW)-1,
  527. WINDOW_T_MASK = WINDOW_MASK >> 1,
  528. NTABLE = 1<<(WINDOW-1);
  529. scalar_t scalar1x, scalar2x;
  530. API_NS(scalar_add)(scalar1x, scalarb, point_scalarmul_adjustment);
  531. API_NS(scalar_halve)(scalar1x,scalar1x);
  532. API_NS(scalar_add)(scalar2x, scalarc, point_scalarmul_adjustment);
  533. API_NS(scalar_halve)(scalar2x,scalar2x);
  534. /* Set up a precomputed table with odd multiples of b. */
  535. pniels_t pn, multiples1[NTABLE], multiples2[NTABLE];
  536. point_t tmp;
  537. prepare_fixed_window(multiples1, b, NTABLE);
  538. prepare_fixed_window(multiples2, c, NTABLE);
  539. /* Initialize. */
  540. int i,j,first=1;
  541. i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1;
  542. for (; i>=0; i-=WINDOW) {
  543. /* Fetch another block of bits */
  544. word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
  545. bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
  546. if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
  547. bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  548. bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  549. }
  550. bits1 &= WINDOW_MASK;
  551. bits2 &= WINDOW_MASK;
  552. mask_t inv1 = (bits1>>(WINDOW-1))-1;
  553. mask_t inv2 = (bits2>>(WINDOW-1))-1;
  554. bits1 ^= inv1;
  555. bits2 ^= inv2;
  556. /* Add in from table. Compute t only on last iteration. */
  557. constant_time_lookup(pn, multiples1, sizeof(pn), NTABLE, bits1 & WINDOW_T_MASK);
  558. cond_neg_niels(pn->n, inv1);
  559. if (first) {
  560. pniels_to_pt(tmp, pn);
  561. first = 0;
  562. } else {
  563. /* Using Hisil et al's lookahead method instead of extensible here
  564. * for no particular reason. Double WINDOW times, but only compute t on
  565. * the last one.
  566. */
  567. for (j=0; j<WINDOW-1; j++)
  568. point_double_internal(tmp, tmp, -1);
  569. point_double_internal(tmp, tmp, 0);
  570. add_pniels_to_pt(tmp, pn, 0);
  571. }
  572. constant_time_lookup(pn, multiples2, sizeof(pn), NTABLE, bits2 & WINDOW_T_MASK);
  573. cond_neg_niels(pn->n, inv2);
  574. add_pniels_to_pt(tmp, pn, i?-1:0);
  575. }
  576. /* Write out the answer */
  577. API_NS(point_copy)(a,tmp);
  578. decaf_bzero(scalar1x,sizeof(scalar1x));
  579. decaf_bzero(scalar2x,sizeof(scalar2x));
  580. decaf_bzero(pn,sizeof(pn));
  581. decaf_bzero(multiples1,sizeof(multiples1));
  582. decaf_bzero(multiples2,sizeof(multiples2));
  583. decaf_bzero(tmp,sizeof(tmp));
  584. }
  585. void API_NS(point_dual_scalarmul) (
  586. point_t a1,
  587. point_t a2,
  588. const point_t b,
  589. const scalar_t scalar1,
  590. const scalar_t scalar2
  591. ) {
  592. const int WINDOW = DECAF_WINDOW_BITS,
  593. WINDOW_MASK = (1<<WINDOW)-1,
  594. WINDOW_T_MASK = WINDOW_MASK >> 1,
  595. NTABLE = 1<<(WINDOW-1);
  596. scalar_t scalar1x, scalar2x;
  597. API_NS(scalar_add)(scalar1x, scalar1, point_scalarmul_adjustment);
  598. API_NS(scalar_halve)(scalar1x,scalar1x);
  599. API_NS(scalar_add)(scalar2x, scalar2, point_scalarmul_adjustment);
  600. API_NS(scalar_halve)(scalar2x,scalar2x);
  601. /* Set up a precomputed table with odd multiples of b. */
  602. point_t multiples1[NTABLE], multiples2[NTABLE], working, tmp;
  603. pniels_t pn;
  604. API_NS(point_copy)(working, b);
  605. /* Initialize. */
  606. int i,j;
  607. for (i=0; i<NTABLE; i++) {
  608. API_NS(point_copy)(multiples1[i], API_NS(point_identity));
  609. API_NS(point_copy)(multiples2[i], API_NS(point_identity));
  610. }
  611. for (i=0; i<SCALAR_BITS; i+=WINDOW) {
  612. if (i) {
  613. for (j=0; j<WINDOW-1; j++)
  614. point_double_internal(working, working, -1);
  615. point_double_internal(working, working, 0);
  616. }
  617. /* Fetch another block of bits */
  618. word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
  619. bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
  620. if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
  621. bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  622. bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
  623. }
  624. bits1 &= WINDOW_MASK;
  625. bits2 &= WINDOW_MASK;
  626. mask_t inv1 = (bits1>>(WINDOW-1))-1;
  627. mask_t inv2 = (bits2>>(WINDOW-1))-1;
  628. bits1 ^= inv1;
  629. bits2 ^= inv2;
  630. pt_to_pniels(pn, working);
  631. constant_time_lookup(tmp, multiples1, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
  632. cond_neg_niels(pn->n, inv1);
  633. /* add_pniels_to_pt(multiples1[bits1 & WINDOW_T_MASK], pn, 0); */
  634. add_pniels_to_pt(tmp, pn, 0);
  635. constant_time_insert(multiples1, tmp, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
  636. constant_time_lookup(tmp, multiples2, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
  637. cond_neg_niels(pn->n, inv1^inv2);
  638. /* add_pniels_to_pt(multiples2[bits2 & WINDOW_T_MASK], pn, 0); */
  639. add_pniels_to_pt(tmp, pn, 0);
  640. constant_time_insert(multiples2, tmp, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
  641. }
  642. if (NTABLE > 1) {
  643. API_NS(point_copy)(working, multiples1[NTABLE-1]);
  644. API_NS(point_copy)(tmp , multiples2[NTABLE-1]);
  645. for (i=NTABLE-1; i>1; i--) {
  646. API_NS(point_add)(multiples1[i-1], multiples1[i-1], multiples1[i]);
  647. API_NS(point_add)(multiples2[i-1], multiples2[i-1], multiples2[i]);
  648. API_NS(point_add)(working, working, multiples1[i-1]);
  649. API_NS(point_add)(tmp, tmp, multiples2[i-1]);
  650. }
  651. API_NS(point_add)(multiples1[0], multiples1[0], multiples1[1]);
  652. API_NS(point_add)(multiples2[0], multiples2[0], multiples2[1]);
  653. point_double_internal(working, working, 0);
  654. point_double_internal(tmp, tmp, 0);
  655. API_NS(point_add)(a1, working, multiples1[0]);
  656. API_NS(point_add)(a2, tmp, multiples2[0]);
  657. } else {
  658. API_NS(point_copy)(a1, multiples1[0]);
  659. API_NS(point_copy)(a2, multiples2[0]);
  660. }
  661. decaf_bzero(scalar1x,sizeof(scalar1x));
  662. decaf_bzero(scalar2x,sizeof(scalar2x));
  663. decaf_bzero(pn,sizeof(pn));
  664. decaf_bzero(multiples1,sizeof(multiples1));
  665. decaf_bzero(multiples2,sizeof(multiples2));
  666. decaf_bzero(tmp,sizeof(tmp));
  667. decaf_bzero(working,sizeof(working));
  668. }
  669. decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) {
  670. /* equality mod 2-torsion compares x/y */
  671. gf a, b;
  672. gf_mul ( a, p->y, q->x );
  673. gf_mul ( b, q->y, p->x );
  674. mask_t succ = gf_eq(a,b);
  675. #if (COFACTOR == 8) && IMAGINE_TWIST
  676. gf_mul ( a, p->y, q->y );
  677. gf_mul ( b, q->x, p->x );
  678. #if !(IMAGINE_TWIST)
  679. gf_sub ( a, ZERO, a );
  680. #else
  681. /* Interesting note: the 4tor would normally be rotation.
  682. * But because of the *i twist, it's actually
  683. * (x,y) <-> (iy,ix)
  684. */
  685. /* No code, just a comment. */
  686. #endif
  687. succ |= gf_eq(a,b);
  688. #endif
  689. return mask_to_bool(succ);
  690. }
  691. decaf_bool_t API_NS(point_valid) (
  692. const point_t p
  693. ) {
  694. gf a,b,c;
  695. gf_mul(a,p->x,p->y);
  696. gf_mul(b,p->z,p->t);
  697. mask_t out = gf_eq(a,b);
  698. gf_sqr(a,p->x);
  699. gf_sqr(b,p->y);
  700. gf_sub(a,b,a);
  701. gf_sqr(b,p->t);
  702. gf_mulw(c,b,TWISTED_D);
  703. gf_sqr(b,p->z);
  704. gf_add(b,b,c);
  705. out &= gf_eq(a,b);
  706. out &= ~gf_eq(p->z,ZERO);
  707. return mask_to_bool(out);
  708. }
  709. void API_NS(point_debugging_torque) (
  710. point_t q,
  711. const point_t p
  712. ) {
  713. #if COFACTOR == 8 && IMAGINE_TWIST
  714. gf tmp;
  715. gf_mul(tmp,p->x,SQRT_MINUS_ONE);
  716. gf_mul(q->x,p->y,SQRT_MINUS_ONE);
  717. gf_copy(q->y,tmp);
  718. gf_copy(q->z,p->z);
  719. gf_sub(q->t,ZERO,p->t);
  720. #else
  721. gf_sub(q->x,ZERO,p->x);
  722. gf_sub(q->y,ZERO,p->y);
  723. gf_copy(q->z,p->z);
  724. gf_copy(q->t,p->t);
  725. #endif
  726. }
  727. void API_NS(point_debugging_pscale) (
  728. point_t q,
  729. const point_t p,
  730. const uint8_t factor[SER_BYTES]
  731. ) {
  732. gf gfac,tmp;
  733. /* NB this means you'll never pscale by negative numbers for p521 */
  734. ignore_result(gf_deserialize(gfac,factor,0,0));
  735. gf_cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO));
  736. gf_mul(tmp,p->x,gfac);
  737. gf_copy(q->x,tmp);
  738. gf_mul(tmp,p->y,gfac);
  739. gf_copy(q->y,tmp);
  740. gf_mul(tmp,p->z,gfac);
  741. gf_copy(q->z,tmp);
  742. gf_mul(tmp,p->t,gfac);
  743. gf_copy(q->t,tmp);
  744. }
  745. static void gf_batch_invert (
  746. gf *__restrict__ out,
  747. const gf *in,
  748. unsigned int n
  749. ) {
  750. gf t1;
  751. assert(n>1);
  752. gf_copy(out[1], in[0]);
  753. int i;
  754. for (i=1; i<(int) (n-1); i++) {
  755. gf_mul(out[i+1], out[i], in[i]);
  756. }
  757. gf_mul(out[0], out[n-1], in[n-1]);
  758. gf_invert(out[0], out[0], 1);
  759. for (i=n-1; i>0; i--) {
  760. gf_mul(t1, out[i], out[0]);
  761. gf_copy(out[i], t1);
  762. gf_mul(t1, out[0], in[i]);
  763. gf_copy(out[0], t1);
  764. }
  765. }
  766. static void batch_normalize_niels (
  767. niels_t *table,
  768. const gf *zs,
  769. gf *__restrict__ zis,
  770. int n
  771. ) {
  772. int i;
  773. gf product;
  774. gf_batch_invert(zis, zs, n);
  775. for (i=0; i<n; i++) {
  776. gf_mul(product, table[i]->a, zis[i]);
  777. gf_strong_reduce(product);
  778. gf_copy(table[i]->a, product);
  779. gf_mul(product, table[i]->b, zis[i]);
  780. gf_strong_reduce(product);
  781. gf_copy(table[i]->b, product);
  782. gf_mul(product, table[i]->c, zis[i]);
  783. gf_strong_reduce(product);
  784. gf_copy(table[i]->c, product);
  785. }
  786. decaf_bzero(product,sizeof(product));
  787. }
  788. void API_NS(precompute) (
  789. precomputed_s *table,
  790. const point_t base
  791. ) {
  792. const unsigned int n = COMBS_N, t = COMBS_T, s = COMBS_S;
  793. assert(n*t*s >= SCALAR_BITS);
  794. point_t working, start, doubles[t-1];
  795. API_NS(point_copy)(working, base);
  796. pniels_t pn_tmp;
  797. gf zs[n<<(t-1)], zis[n<<(t-1)];
  798. unsigned int i,j,k;
  799. /* Compute n tables */
  800. for (i=0; i<n; i++) {
  801. /* Doubling phase */
  802. for (j=0; j<t; j++) {
  803. if (j) API_NS(point_add)(start, start, working);
  804. else API_NS(point_copy)(start, working);
  805. if (j==t-1 && i==n-1) break;
  806. point_double_internal(working, working,0);
  807. if (j<t-1) API_NS(point_copy)(doubles[j], working);
  808. for (k=0; k<s-1; k++)
  809. point_double_internal(working, working, k<s-2);
  810. }
  811. /* Gray-code phase */
  812. for (j=0;; j++) {
  813. int gray = j ^ (j>>1);
  814. int idx = (((i+1)<<(t-1))-1) ^ gray;
  815. pt_to_pniels(pn_tmp, start);
  816. memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n));
  817. gf_copy(zs[idx], pn_tmp->z);
  818. if (j >= (1u<<(t-1)) - 1) break;
  819. int delta = (j+1) ^ ((j+1)>>1) ^ gray;
  820. for (k=0; delta>1; k++)
  821. delta >>=1;
  822. if (gray & (1<<k)) {
  823. API_NS(point_add)(start, start, doubles[k]);
  824. } else {
  825. API_NS(point_sub)(start, start, doubles[k]);
  826. }
  827. }
  828. }
  829. batch_normalize_niels(table->table,(const gf *)zs,zis,n<<(t-1));
  830. decaf_bzero(zs,sizeof(zs));
  831. decaf_bzero(zis,sizeof(zis));
  832. decaf_bzero(pn_tmp,sizeof(pn_tmp));
  833. decaf_bzero(working,sizeof(working));
  834. decaf_bzero(start,sizeof(start));
  835. decaf_bzero(doubles,sizeof(doubles));
  836. }
  837. static DECAF_INLINE void
  838. constant_time_lookup_niels (
  839. niels_s *__restrict__ ni,
  840. const niels_t *table,
  841. int nelts,
  842. int idx
  843. ) {
  844. constant_time_lookup(ni, table, sizeof(niels_s), nelts, idx);
  845. }
  846. void API_NS(precomputed_scalarmul) (
  847. point_t out,
  848. const precomputed_s *table,
  849. const scalar_t scalar
  850. ) {
  851. int i;
  852. unsigned j,k;
  853. const unsigned int n = COMBS_N, t = COMBS_T, s = COMBS_S;
  854. scalar_t scalar1x;
  855. API_NS(scalar_add)(scalar1x, scalar, precomputed_scalarmul_adjustment);
  856. API_NS(scalar_halve)(scalar1x,scalar1x);
  857. niels_t ni;
  858. for (i=s-1; i>=0; i--) {
  859. if (i != (int)s-1) point_double_internal(out,out,0);
  860. for (j=0; j<n; j++) {
  861. int tab = 0;
  862. for (k=0; k<t; k++) {
  863. unsigned int bit = i + s*(k + j*t);
  864. if (bit < SCALAR_BITS) {
  865. tab |= (scalar1x->limb[bit/WBITS] >> (bit%WBITS) & 1) << k;
  866. }
  867. }
  868. mask_t invert = (tab>>(t-1))-1;
  869. tab ^= invert;
  870. tab &= (1<<(t-1)) - 1;
  871. constant_time_lookup_niels(ni, &table->table[j<<(t-1)], 1<<(t-1), tab);
  872. cond_neg_niels(ni, invert);
  873. if ((i!=(int)s-1)||j) {
  874. add_niels_to_pt(out, ni, j==n-1 && i);
  875. } else {
  876. niels_to_pt(out, ni);
  877. }
  878. }
  879. }
  880. decaf_bzero(ni,sizeof(ni));
  881. decaf_bzero(scalar1x,sizeof(scalar1x));
  882. }
  883. void API_NS(point_cond_sel) (
  884. point_t out,
  885. const point_t a,
  886. const point_t b,
  887. decaf_bool_t pick_b
  888. ) {
  889. constant_time_select(out,a,b,sizeof(point_t),bool_to_mask(pick_b),0);
  890. }
  891. /* FUTURE: restore Curve25519 Montgomery ladder? */
  892. decaf_error_t API_NS(direct_scalarmul) (
  893. uint8_t scaled[SER_BYTES],
  894. const uint8_t base[SER_BYTES],
  895. const scalar_t scalar,
  896. decaf_bool_t allow_identity,
  897. decaf_bool_t short_circuit
  898. ) {
  899. point_t basep;
  900. decaf_error_t succ = API_NS(point_decode)(basep, base, allow_identity);
  901. if (short_circuit && succ != DECAF_SUCCESS) return succ;
  902. API_NS(point_cond_sel)(basep, API_NS(point_base), basep, succ);
  903. API_NS(point_scalarmul)(basep, basep, scalar);
  904. API_NS(point_encode)(scaled, basep);
  905. API_NS(point_destroy)(basep);
  906. return succ;
  907. }
  908. void API_NS(point_mul_by_cofactor_and_encode_like_eddsa) (
  909. uint8_t enc[DECAF_EDDSA_25519_PUBLIC_BYTES],
  910. const point_t p
  911. ) {
  912. /* The point is now on the twisted curve. Move it to untwisted. */
  913. gf x, y, z, t;
  914. point_t q;
  915. #if COFACTOR == 8
  916. API_NS(point_double)(q,p);
  917. #else
  918. API_NS(point_copy)(q,p);
  919. #endif
  920. #if EDDSA_USE_SIGMA_ISOGENY
  921. {
  922. /* Use 4-isogeny like ed25519:
  923. * 2*x*y*sqrt(d/a-1)/(ax^2 + y^2 - 2)
  924. * (y^2 - ax^2)/(y^2 + ax^2)
  925. * with a = -1, d = -EDWARDS_D:
  926. * -2xysqrt(EDWARDS_D-1)/(2z^2-y^2+x^2)
  927. * (y^2+x^2)/(y^2-x^2)
  928. */
  929. gf u;
  930. gf_sqr ( x, q->x ); // x^2
  931. gf_sqr ( t, q->y ); // y^2
  932. gf_add( u, x, t ); // x^2 + y^2
  933. gf_add( z, q->y, q->x );
  934. gf_sqr ( y, z);
  935. gf_sub ( y, u, y ); // -2xy
  936. gf_sub ( z, t, x ); // y^2 - x^2
  937. gf_sqr ( x, q->z );
  938. gf_add ( t, x, x);
  939. gf_sub ( t, t, z); // 2z^2 - y^2 + x^2
  940. gf_mul ( x, y, z ); // 2xy(y^2-x^2)
  941. gf_mul ( y, u, t ); // (x^2+y^2)(2z^2-y^2+x^2)
  942. gf_mul ( u, z, t );
  943. gf_copy( z, u );
  944. gf_mul ( u, x, RISTRETTO_FACTOR );
  945. #if IMAGINE_TWIST
  946. gf_mul_i( x, u );
  947. #else
  948. #error "... probably wrong"
  949. gf_copy( x, u );
  950. #endif
  951. decaf_bzero(u,sizeof(u));
  952. }
  953. #elif IMAGINE_TWIST
  954. {
  955. API_NS(point_double)(q,q);
  956. API_NS(point_double)(q,q);
  957. gf_mul_i(x, q->x);
  958. gf_copy(y, q->y);
  959. gf_copy(z, q->z);
  960. }
  961. #else
  962. {
  963. /* 4-isogeny: 2xy/(y^+x^2), (y^2-x^2)/(2z^2-y^2+x^2) */
  964. gf u;
  965. gf_sqr ( x, q->x );
  966. gf_sqr ( t, q->y );
  967. gf_add( u, x, t );
  968. gf_add( z, q->y, q->x );
  969. gf_sqr ( y, z);
  970. gf_sub ( y, y, u );
  971. gf_sub ( z, t, x );
  972. gf_sqr ( x, q->z );
  973. gf_add ( t, x, x);
  974. gf_sub ( t, t, z);
  975. gf_mul ( x, t, y );
  976. gf_mul ( y, z, u );
  977. gf_mul ( z, u, t );
  978. decaf_bzero(u,sizeof(u));
  979. }
  980. #endif
  981. /* Affinize */
  982. gf_invert(z,z,1);
  983. gf_mul(t,x,z);
  984. gf_mul(x,y,z);
  985. /* Encode */
  986. enc[DECAF_EDDSA_25519_PRIVATE_BYTES-1] = 0;
  987. gf_serialize(enc, x, 1);
  988. enc[DECAF_EDDSA_25519_PRIVATE_BYTES-1] |= 0x80 & gf_lobit(t);
  989. decaf_bzero(x,sizeof(x));
  990. decaf_bzero(y,sizeof(y));
  991. decaf_bzero(z,sizeof(z));
  992. decaf_bzero(t,sizeof(t));
  993. API_NS(point_destroy)(q);
  994. }
  995. decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
  996. point_t p,
  997. const uint8_t enc[DECAF_EDDSA_25519_PUBLIC_BYTES]
  998. ) {
  999. uint8_t enc2[DECAF_EDDSA_25519_PUBLIC_BYTES];
  1000. memcpy(enc2,enc,sizeof(enc2));
  1001. mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] & 0x80);
  1002. enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] &= ~0x80;
  1003. mask_t succ = gf_deserialize(p->y, enc2, 1, 0);
  1004. #if 7 == 0
  1005. succ &= word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]);
  1006. #endif
  1007. gf_sqr(p->x,p->y);
  1008. gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
  1009. #if EDDSA_USE_SIGMA_ISOGENY
  1010. gf_mulw(p->t,p->z,EDWARDS_D); /* d-dy^2 */
  1011. gf_mulw(p->x,p->z,EDWARDS_D-1); /* num = (1-y^2)(d-1) */
  1012. gf_copy(p->z,p->x);
  1013. #else
  1014. gf_mulw(p->t,p->x,EDWARDS_D); /* dy^2 */
  1015. #endif
  1016. gf_sub(p->t,ONE,p->t); /* denom = 1-dy^2 or 1-d + dy^2 */
  1017. gf_mul(p->x,p->z,p->t);
  1018. succ &= gf_isr(p->t,p->x); /* 1/sqrt(num * denom) */
  1019. gf_mul(p->x,p->t,p->z); /* sqrt(num / denom) */
  1020. gf_cond_neg(p->x,gf_lobit(p->x)^low);
  1021. gf_copy(p->z,ONE);
  1022. #if EDDSA_USE_SIGMA_ISOGENY
  1023. {
  1024. /* Use 4-isogeny like ed25519:
  1025. * 2*x*y/sqrt(1-d/a)/(ax^2 + y^2 - 2)
  1026. * (y^2 - ax^2)/(y^2 + ax^2)
  1027. * (MAGIC: above formula may be off by a factor of -a
  1028. * or something somewhere; check it for other a)
  1029. *
  1030. * with a = -1, d = -EDWARDS_D:
  1031. * -2xy/sqrt(1-EDWARDS_D)/(2z^2-y^2+x^2)
  1032. * (y^2+x^2)/(y^2-x^2)
  1033. */
  1034. gf a, b, c, d;
  1035. gf_sqr ( c, p->x );
  1036. gf_sqr ( a, p->y );
  1037. gf_add ( d, c, a ); // x^2 + y^2
  1038. gf_add ( p->t, p->y, p->x );
  1039. gf_sqr ( b, p->t );
  1040. gf_sub ( b, b, d ); // 2xy
  1041. gf_sub ( p->t, a, c ); // y^2 - x^2
  1042. gf_sqr ( p->x, p->z );
  1043. gf_add ( p->z, p->x, p->x );
  1044. gf_sub ( c, p->z, p->t ); // 2z^2 - y^2 + x^2
  1045. gf_div_i ( a, c );
  1046. gf_mul ( c, a, RISTRETTO_FACTOR );
  1047. gf_mul ( p->x, b, p->t); // (2xy)(y^2-x^2)
  1048. gf_mul ( p->z, p->t, c ); // (y^2-x^2)sd(2z^2 - y^2 + x^2)
  1049. gf_mul ( p->y, d, c ); // (y^2+x^2)sd(2z^2 - y^2 + x^2)
  1050. gf_mul ( p->t, d, b );
  1051. decaf_bzero(a,sizeof(a));
  1052. decaf_bzero(b,sizeof(b));
  1053. decaf_bzero(c,sizeof(c));
  1054. decaf_bzero(d,sizeof(d));
  1055. }
  1056. #elif IMAGINE_TWIST
  1057. {
  1058. gf_mul(p->t,p->x,SQRT_MINUS_ONE);
  1059. gf_copy(p->x,p->t);
  1060. gf_mul(p->t,p->x,p->y);
  1061. }
  1062. #else
  1063. {
  1064. /* 4-isogeny 2xy/(y^2-ax^2), (y^2+ax^2)/(2-y^2-ax^2) */
  1065. gf a, b, c, d;
  1066. gf_sqr ( c, p->x );
  1067. gf_sqr ( a, p->y );
  1068. gf_add ( d, c, a );
  1069. gf_add ( p->t, p->y, p->x );
  1070. gf_sqr ( b, p->t );
  1071. gf_sub ( b, b, d );
  1072. gf_sub ( p->t, a, c );
  1073. gf_sqr ( p->x, p->z );
  1074. gf_add ( p->z, p->x, p->x );
  1075. gf_sub ( a, p->z, d );
  1076. gf_mul ( p->x, a, b );
  1077. gf_mul ( p->z, p->t, a );
  1078. gf_mul ( p->y, p->t, d );
  1079. gf_mul ( p->t, b, d );
  1080. decaf_bzero(a,sizeof(a));
  1081. decaf_bzero(b,sizeof(b));
  1082. decaf_bzero(c,sizeof(c));
  1083. decaf_bzero(d,sizeof(d));
  1084. }
  1085. #endif
  1086. decaf_bzero(enc2,sizeof(enc2));
  1087. assert(API_NS(point_valid)(p) || ~succ);
  1088. return decaf_succeed_if(mask_to_bool(succ));
  1089. }
  1090. decaf_error_t decaf_x25519 (
  1091. uint8_t out[X_PUBLIC_BYTES],
  1092. const uint8_t base[X_PUBLIC_BYTES],
  1093. const uint8_t scalar[X_PRIVATE_BYTES]
  1094. ) {
  1095. gf x1, x2, z2, x3, z3, t1, t2;
  1096. ignore_result(gf_deserialize(x1,base,1,0));
  1097. gf_copy(x2,ONE);
  1098. gf_copy(z2,ZERO);
  1099. gf_copy(x3,x1);
  1100. gf_copy(z3,ONE);
  1101. int t;
  1102. mask_t swap = 0;
  1103. for (t = X_PRIVATE_BITS-1; t>=0; t--) {
  1104. uint8_t sb = scalar[t/8];
  1105. /* Scalar conditioning */
  1106. if (t/8==0) sb &= -(uint8_t)COFACTOR;
  1107. else if (t == X_PRIVATE_BITS-1) sb = -1;
  1108. mask_t k_t = (sb>>(t%8)) & 1;
  1109. k_t = -k_t; /* set to all 0s or all 1s */
  1110. swap ^= k_t;
  1111. gf_cond_swap(x2,x3,swap);
  1112. gf_cond_swap(z2,z3,swap);
  1113. swap = k_t;
  1114. gf_add_nr(t1,x2,z2); /* A = x2 + z2 */ /* 2+e */
  1115. gf_sub_nr(t2,x2,z2); /* B = x2 - z2 */ /* 3+e */
  1116. gf_sub_nr(z2,x3,z3); /* D = x3 - z3 */ /* 3+e */
  1117. gf_mul(x2,t1,z2); /* DA */
  1118. gf_add_nr(z2,z3,x3); /* C = x3 + z3 */ /* 2+e */
  1119. gf_mul(x3,t2,z2); /* CB */
  1120. gf_sub_nr(z3,x2,x3); /* DA-CB */ /* 3+e */
  1121. gf_sqr(z2,z3); /* (DA-CB)^2 */
  1122. gf_mul(z3,x1,z2); /* z3 = x1(DA-CB)^2 */
  1123. gf_add_nr(z2,x2,x3); /* (DA+CB) */ /* 2+e */
  1124. gf_sqr(x3,z2); /* x3 = (DA+CB)^2 */
  1125. gf_sqr(z2,t1); /* AA = A^2 */
  1126. gf_sqr(t1,t2); /* BB = B^2 */
  1127. gf_mul(x2,z2,t1); /* x2 = AA*BB */
  1128. gf_sub_nr(t2,z2,t1); /* E = AA-BB */ /* 3+e */
  1129. gf_mulw(t1,t2,-EDWARDS_D); /* E*-d = a24*E */
  1130. gf_add_nr(t1,t1,z2); /* AA + a24*E */ /* 2+e */
  1131. gf_mul(z2,t2,t1); /* z2 = E(AA+a24*E) */
  1132. }
  1133. /* Finish */
  1134. gf_cond_swap(x2,x3,swap);
  1135. gf_cond_swap(z2,z3,swap);
  1136. gf_invert(z2,z2,0);
  1137. gf_mul(x1,x2,z2);
  1138. gf_serialize(out,x1,1);
  1139. mask_t nz = ~gf_eq(x1,ZERO);
  1140. decaf_bzero(x1,sizeof(x1));
  1141. decaf_bzero(x2,sizeof(x2));
  1142. decaf_bzero(z2,sizeof(z2));
  1143. decaf_bzero(x3,sizeof(x3));
  1144. decaf_bzero(z3,sizeof(z3));
  1145. decaf_bzero(t1,sizeof(t1));
  1146. decaf_bzero(t2,sizeof(t2));
  1147. return decaf_succeed_if(mask_to_bool(nz));
  1148. }
  1149. /* Thanks Johan Pascal */
  1150. void decaf_ed25519_convert_public_key_to_x25519 (
  1151. uint8_t x[DECAF_X25519_PUBLIC_BYTES],
  1152. const uint8_t ed[DECAF_EDDSA_25519_PUBLIC_BYTES]
  1153. ) {
  1154. gf y;
  1155. const uint8_t mask = (uint8_t)(0xFE<<(6));
  1156. ignore_result(gf_deserialize(y, ed, 1, mask));
  1157. {
  1158. gf n,d;
  1159. #if EDDSA_USE_SIGMA_ISOGENY
  1160. /* u = (1+y)/(1-y)*/
  1161. gf_add(n, y, ONE); /* n = y+1 */
  1162. gf_sub(d, ONE, y); /* d = 1-y */
  1163. gf_invert(d, d, 0); /* d = 1/(1-y) */
  1164. gf_mul(y, n, d); /* u = (y+1)/(1-y) */
  1165. gf_serialize(x,y,1);
  1166. #else /* EDDSA_USE_SIGMA_ISOGENY */
  1167. /* u = y^2 * (1-dy^2) / (1-y^2) */
  1168. gf_sqr(n,y); /* y^2*/
  1169. gf_sub(d,ONE,n); /* 1-y^2*/
  1170. gf_invert(d,d,0); /* 1/(1-y^2)*/
  1171. gf_mul(y,n,d); /* y^2 / (1-y^2) */
  1172. gf_mulw(d,n,EDWARDS_D); /* dy^2*/
  1173. gf_sub(d, ONE, d); /* 1-dy^2*/
  1174. gf_mul(n, y, d); /* y^2 * (1-dy^2) / (1-y^2) */
  1175. gf_serialize(x,n,1);
  1176. #endif /* EDDSA_USE_SIGMA_ISOGENY */
  1177. decaf_bzero(y,sizeof(y));
  1178. decaf_bzero(n,sizeof(n));
  1179. decaf_bzero(d,sizeof(d));
  1180. }
  1181. }
  1182. void decaf_x25519_generate_key (
  1183. uint8_t out[X_PUBLIC_BYTES],
  1184. const uint8_t scalar[X_PRIVATE_BYTES]
  1185. ) {
  1186. decaf_x25519_derive_public_key(out,scalar);
  1187. }
  1188. void API_NS(point_mul_by_cofactor_and_encode_like_x25519) (
  1189. uint8_t out[X_PUBLIC_BYTES],
  1190. const point_t p
  1191. ) {
  1192. point_t q;
  1193. point_double_internal(q,p,1);
  1194. for (unsigned i=1; i<COFACTOR/4; i<<=1) point_double_internal(q,q,1);
  1195. gf_invert(q->t,q->x,0); /* 1/x */
  1196. gf_mul(q->z,q->t,q->y); /* y/x */
  1197. gf_sqr(q->y,q->z); /* (y/x)^2 */
  1198. #if IMAGINE_TWIST
  1199. gf_sub(q->y,ZERO,q->y);
  1200. #endif
  1201. gf_serialize(out,q->y,1);
  1202. API_NS(point_destroy(q));
  1203. }
  1204. void decaf_x25519_derive_public_key (
  1205. uint8_t out[X_PUBLIC_BYTES],
  1206. const uint8_t scalar[X_PRIVATE_BYTES]
  1207. ) {
  1208. /* Scalar conditioning */
  1209. uint8_t scalar2[X_PRIVATE_BYTES];
  1210. memcpy(scalar2,scalar,sizeof(scalar2));
  1211. scalar2[0] &= -(uint8_t)COFACTOR;
  1212. scalar2[X_PRIVATE_BYTES-1] &= ~(-1u<<((X_PRIVATE_BITS+7)%8));
  1213. scalar2[X_PRIVATE_BYTES-1] |= 1<<((X_PRIVATE_BITS+7)%8);
  1214. scalar_t the_scalar;
  1215. API_NS(scalar_decode_long)(the_scalar,scalar2,sizeof(scalar2));
  1216. /* We're gonna isogenize by 2, so divide by 2.
  1217. *
  1218. * Why by 2, even though it's a 4-isogeny?
  1219. *
  1220. * The isogeny map looks like
  1221. * Montgomery <-2-> Jacobi <-2-> Edwards
  1222. *
  1223. * Since the Jacobi base point is the PREimage of the iso to
  1224. * the Montgomery curve, and we're going
  1225. * Jacobi -> Edwards -> Jacobi -> Montgomery,
  1226. * we pick up only a factor of 2 over Jacobi -> Montgomery.
  1227. */
  1228. for (unsigned i=1; i<COFACTOR; i<<=1) {
  1229. API_NS(scalar_halve)(the_scalar,the_scalar);
  1230. }
  1231. point_t p;
  1232. API_NS(precomputed_scalarmul)(p,API_NS(precomputed_base),the_scalar);
  1233. API_NS(point_mul_by_cofactor_and_encode_like_x25519)(out,p);
  1234. API_NS(point_destroy)(p);
  1235. }
  1236. /**
  1237. * @cond internal
  1238. * Control for variable-time scalar multiply algorithms.
  1239. */
  1240. struct smvt_control {
  1241. int power, addend;
  1242. };
  1243. static int recode_wnaf (
  1244. struct smvt_control *control, /* [nbits/(table_bits+1) + 3] */
  1245. const scalar_t scalar,
  1246. unsigned int table_bits
  1247. ) {
  1248. unsigned int table_size = SCALAR_BITS/(table_bits+1) + 3;
  1249. int position = table_size - 1; /* at the end */
  1250. /* place the end marker */
  1251. control[position].power = -1;
  1252. control[position].addend = 0;
  1253. position--;
  1254. /* PERF: Could negate scalar if it's large. But then would need more cases
  1255. * in the actual code that uses it, all for an expected reduction of like 1/5 op.
  1256. * Probably not worth it.
  1257. */
  1258. uint64_t current = scalar->limb[0] & 0xFFFF;
  1259. uint32_t mask = (1<<(table_bits+1))-1;
  1260. unsigned int w;
  1261. const unsigned int B_OVER_16 = sizeof(scalar->limb[0]) / 2;
  1262. for (w = 1; w<(SCALAR_BITS-1)/16+3; w++) {
  1263. if (w < (SCALAR_BITS-1)/16+1) {
  1264. /* Refill the 16 high bits of current */
  1265. current += (uint32_t)((scalar->limb[w/B_OVER_16]>>(16*(w%B_OVER_16)))<<16);
  1266. }
  1267. while (current & 0xFFFF) {
  1268. assert(position >= 0);
  1269. uint32_t pos = __builtin_ctz((uint32_t)current), odd = (uint32_t)current >> pos;
  1270. int32_t delta = odd & mask;
  1271. if (odd & 1<<(table_bits+1)) delta -= (1<<(table_bits+1));
  1272. current -= delta << pos;
  1273. control[position].power = pos + 16*(w-1);
  1274. control[position].addend = delta;
  1275. position--;
  1276. }
  1277. current >>= 16;
  1278. }
  1279. assert(current==0);
  1280. position++;
  1281. unsigned int n = table_size - position;
  1282. unsigned int i;
  1283. for (i=0; i<n; i++) {
  1284. control[i] = control[i+position];
  1285. }
  1286. return n-1;
  1287. }
  1288. static void
  1289. prepare_wnaf_table(
  1290. pniels_t *output,
  1291. const point_t working,
  1292. unsigned int tbits
  1293. ) {
  1294. point_t tmp;
  1295. int i;
  1296. pt_to_pniels(output[0], working);
  1297. if (tbits == 0) return;
  1298. API_NS(point_double)(tmp,working);
  1299. pniels_t twop;
  1300. pt_to_pniels(twop, tmp);
  1301. add_pniels_to_pt(tmp, output[0],0);
  1302. pt_to_pniels(output[1], tmp);
  1303. for (i=2; i < 1<<tbits; i++) {
  1304. add_pniels_to_pt(tmp, twop,0);
  1305. pt_to_pniels(output[i], tmp);
  1306. }
  1307. API_NS(point_destroy)(tmp);
  1308. decaf_bzero(twop,sizeof(twop));
  1309. }
  1310. extern const gf API_NS(precomputed_wnaf_as_fe)[];
  1311. static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_fe);
  1312. const size_t API_NS(sizeof_precomputed_wnafs) __attribute((visibility("hidden")))
  1313. = sizeof(niels_t)<<DECAF_WNAF_FIXED_TABLE_BITS;
  1314. void API_NS(precompute_wnafs) (
  1315. niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
  1316. const point_t base
  1317. ) __attribute__ ((visibility ("hidden")));
  1318. void API_NS(precompute_wnafs) (
  1319. niels_t out[1<<DECAF_WNAF_FIXED_TABLE_BITS],
  1320. const point_t base
  1321. ) {
  1322. pniels_t tmp[1<<DECAF_WNAF_FIXED_TABLE_BITS];
  1323. gf zs[1<<DECAF_WNAF_FIXED_TABLE_BITS], zis[1<<DECAF_WNAF_FIXED_TABLE_BITS];
  1324. int i;
  1325. prepare_wnaf_table(tmp,base,DECAF_WNAF_FIXED_TABLE_BITS);
  1326. for (i=0; i<1<<DECAF_WNAF_FIXED_TABLE_BITS; i++) {
  1327. memcpy(out[i], tmp[i]->n, sizeof(niels_t));
  1328. gf_copy(zs[i], tmp[i]->z);
  1329. }
  1330. batch_normalize_niels(out, (const gf *)zs, zis, 1<<DECAF_WNAF_FIXED_TABLE_BITS);
  1331. decaf_bzero(tmp,sizeof(tmp));
  1332. decaf_bzero(zs,sizeof(zs));
  1333. decaf_bzero(zis,sizeof(zis));
  1334. }
  1335. void API_NS(base_double_scalarmul_non_secret) (
  1336. point_t combo,
  1337. const scalar_t scalar1,
  1338. const point_t base2,
  1339. const scalar_t scalar2
  1340. ) {
  1341. const int table_bits_var = DECAF_WNAF_VAR_TABLE_BITS,
  1342. table_bits_pre = DECAF_WNAF_FIXED_TABLE_BITS;
  1343. struct smvt_control control_var[SCALAR_BITS/(table_bits_var+1)+3];
  1344. struct smvt_control control_pre[SCALAR_BITS/(table_bits_pre+1)+3];
  1345. int ncb_pre = recode_wnaf(control_pre, scalar1, table_bits_pre);
  1346. int ncb_var = recode_wnaf(control_var, scalar2, table_bits_var);
  1347. pniels_t precmp_var[1<<table_bits_var];
  1348. prepare_wnaf_table(precmp_var, base2, table_bits_var);
  1349. int contp=0, contv=0, i = control_var[0].power;
  1350. if (i < 0) {
  1351. API_NS(point_copy)(combo, API_NS(point_identity));
  1352. return;
  1353. } else if (i > control_pre[0].power) {
  1354. pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
  1355. contv++;
  1356. } else if (i == control_pre[0].power && i >=0 ) {
  1357. pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
  1358. add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1], i);
  1359. contv++; contp++;
  1360. } else {
  1361. i = control_pre[0].power;
  1362. niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1]);
  1363. contp++;
  1364. }
  1365. for (i--; i >= 0; i--) {
  1366. int cv = (i==control_var[contv].power), cp = (i==control_pre[contp].power);
  1367. point_double_internal(combo,combo,i && !(cv||cp));
  1368. if (cv) {
  1369. assert(control_var[contv].addend);
  1370. if (control_var[contv].addend > 0) {
  1371. add_pniels_to_pt(combo, precmp_var[control_var[contv].addend >> 1], i&&!cp);
  1372. } else {
  1373. sub_pniels_from_pt(combo, precmp_var[(-control_var[contv].addend) >> 1], i&&!cp);
  1374. }
  1375. contv++;
  1376. }
  1377. if (cp) {
  1378. assert(control_pre[contp].addend);
  1379. if (control_pre[contp].addend > 0) {
  1380. add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[contp].addend >> 1], i);
  1381. } else {
  1382. sub_niels_from_pt(combo, API_NS(wnaf_base)[(-control_pre[contp].addend) >> 1], i);
  1383. }
  1384. contp++;
  1385. }
  1386. }
  1387. /* This function is non-secret, but whatever this is cheap. */
  1388. decaf_bzero(control_var,sizeof(control_var));
  1389. decaf_bzero(control_pre,sizeof(control_pre));
  1390. decaf_bzero(precmp_var,sizeof(precmp_var));
  1391. assert(contv == ncb_var); (void)ncb_var;
  1392. assert(contp == ncb_pre); (void)ncb_pre;
  1393. }
  1394. void API_NS(point_destroy) (
  1395. point_t point
  1396. ) {
  1397. decaf_bzero(point, sizeof(point_t));
  1398. }
  1399. void API_NS(precomputed_destroy) (
  1400. precomputed_s *pre
  1401. ) {
  1402. decaf_bzero(pre, API_NS(sizeof_precomputed_s));
  1403. }