You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

462 lines
11 KiB

  1. /* Copyright (c) 2015 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. /**
  5. * @file decaf.c
  6. * @author Mike Hamburg
  7. * @brief Decaf high-level functions.
  8. */
  9. #include "decaf.h"
  10. typedef uint64_t word_t, mask_t; // TODO
  11. typedef __uint128_t dword_t;
  12. typedef __int128_t sdword_t;
  13. #define WBITS 64
  14. #define LBITS 56
  15. #define sv static void
  16. #define NLIMBS 8
  17. typedef word_t gf[NLIMBS];
  18. static const gf ZERO = {0}, ONE = {1}, TWO = {2};
  19. #define LMASK ((1ull<<LBITS)-1)
  20. static const gf P = { LMASK, LMASK, LMASK, LMASK, LMASK-1, LMASK, LMASK, LMASK };
  21. #define FOR_LIMB(i,op) { unsigned int i=0; \
  22. op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; \
  23. }
  24. static const int EDWARDS_D = -39081;
  25. /** Copy x = y */
  26. sv gf_cpy(gf x, const gf y) { FOR_LIMB(i, x[i] = y[i]); }
  27. /** Mostly-unoptimized multiply (PERF), but at least it's unrolled. */
  28. sv gf_mul (gf c, const gf a, const gf b) {
  29. gf aa;
  30. gf_cpy(aa,a);
  31. dword_t accum[NLIMBS] = {0};
  32. FOR_LIMB(i, {
  33. FOR_LIMB(j,{ accum[(i+j)%NLIMBS] += (dword_t)b[i] * aa[j]; });
  34. aa[(NLIMBS-1-i)^(NLIMBS/2)] += aa[NLIMBS-1-i];
  35. });
  36. accum[NLIMBS-1] += accum[NLIMBS-2] >> LBITS;
  37. accum[NLIMBS-2] &= LMASK;
  38. accum[NLIMBS/2] += accum[NLIMBS-1] >> LBITS;
  39. FOR_LIMB(j,{
  40. accum[j] += accum[(j-1)%NLIMBS] >> LBITS;
  41. accum[(j-1)%NLIMBS] &= LMASK;
  42. });
  43. FOR_LIMB(j, c[j] = accum[j] );
  44. }
  45. /** No dedicated square (PERF) */
  46. #define gf_sqr(c,a) gf_mul(c,a,a)
  47. /** Inverse square root using addition chain. */
  48. sv gf_isqrt(gf y, const gf x) {
  49. int i;
  50. #define STEP(s,m,n) gf_mul(s,m,c); gf_cpy(c,s); for (i=0;i<n;i++) gf_sqr(c,c);
  51. gf a, b, c;
  52. gf_sqr ( c, x );
  53. STEP(b,x,1);
  54. STEP(b,x,3);
  55. STEP(a,b,3);
  56. STEP(a,b,9);
  57. STEP(b,a,1);
  58. STEP(a,x,18);
  59. STEP(a,b,37);
  60. STEP(b,a,37);
  61. STEP(b,a,111);
  62. STEP(a,b,1);
  63. STEP(b,x,223);
  64. gf_mul(y,a,c);
  65. }
  66. /** Weak reduce mod p. */
  67. sv gf_reduce(gf x) {
  68. x[NLIMBS/2] += x[NLIMBS-1] >> LBITS;
  69. FOR_LIMB(j,{
  70. x[j] += x[(j-1)%NLIMBS] >> LBITS;
  71. x[(j-1)%NLIMBS] &= LMASK;
  72. });
  73. }
  74. /** Add mod p. Conservatively always weak-reduce. (PERF) */
  75. sv gf_add ( gf x, const gf y, const gf z ) {
  76. FOR_LIMB(i, x[i] = y[i] + z[i] );
  77. gf_reduce(x);
  78. }
  79. /** Subtract mod p. Conservatively always weak-reduce. (PERF) */
  80. sv gf_sub ( gf x, const gf y, const gf z ) {
  81. FOR_LIMB(i, x[i] = y[i] - z[i] + 2*P[i] );
  82. gf_reduce(x);
  83. }
  84. /** Constant time, x = is_z ? z : y */
  85. sv cond_sel(gf x, const gf y, const gf z, mask_t is_z) {
  86. FOR_LIMB(i, x[i] = (y[i] & ~is_z) | (z[i] & is_z) );
  87. }
  88. /** Constant time, if (neg) x=-x; */
  89. sv cond_neg(gf x, mask_t neg) {
  90. gf y;
  91. gf_sub(y,ZERO,x);
  92. cond_sel(x,x,y,neg);
  93. }
  94. /** Constant time, if (swap) (x,y) = (y,x); */
  95. sv cond_swap(gf x, gf y, mask_t swap) {
  96. FOR_LIMB(i, {
  97. word_t s = (x[i] ^ y[i]) & swap;
  98. x[i] ^= s;
  99. y[i] ^= s;
  100. });
  101. }
  102. /**
  103. * Mul by signed int. Not constant-time WRT the sign of that int.
  104. * Just uses a full mul (PERF)
  105. */
  106. sv gf_mlw(gf a, const gf b, int w) {
  107. if (w>0) {
  108. gf ww = {w};
  109. gf_mul(a,b,ww);
  110. } else {
  111. gf ww = {-w};
  112. gf_mul(a,b,ww);
  113. gf_sub(a,ZERO,a);
  114. }
  115. }
  116. /** Canonicalize */
  117. sv gf_canon ( gf a ) {
  118. gf_reduce(a);
  119. /* subtract p with borrow */
  120. sdword_t carry = 0;
  121. FOR_LIMB(i, {
  122. carry = carry + a[i] - P[i];
  123. a[i] = carry & LMASK;
  124. carry >>= LBITS;
  125. });
  126. mask_t addback = carry;
  127. carry = 0;
  128. /* add it back */
  129. FOR_LIMB(i, {
  130. carry = carry + a[i] + (P[i] & addback);
  131. a[i] = carry & LMASK;
  132. carry >>= LBITS;
  133. });
  134. }
  135. /** Compare a==b */
  136. static word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) {
  137. gf c;
  138. gf_sub(c,a,b);
  139. gf_canon(c);
  140. word_t ret=0;
  141. FOR_LIMB(i, ret |= c[i] );
  142. /* Hope the compiler is too dumb to optimize this, thus noinline */
  143. return ((dword_t)ret - 1) >> WBITS;
  144. }
  145. /** Return high bit of x = low bit of 2x mod p */
  146. static word_t hibit(const gf x) {
  147. gf y;
  148. gf_add(y,x,x);
  149. gf_canon(y);
  150. return -(y[0]&1);
  151. }
  152. /* a = use_c ? c : b */
  153. sv decaf_cond_sel (
  154. decaf_point_t a,
  155. const decaf_point_t b,
  156. const decaf_point_t c,
  157. mask_t use_c
  158. ) {
  159. cond_sel(a->x, b->x, c->x, use_c);
  160. cond_sel(a->y, b->y, c->y, use_c);
  161. cond_sel(a->z, b->z, c->z, use_c);
  162. cond_sel(a->t, b->t, c->t, use_c);
  163. }
  164. /* *** API begins here *** */
  165. /** identity = (0,1) */
  166. const decaf_point_t decaf_identity = {{{0},{1},{1},{0}}};
  167. void decaf_encode( unsigned char ser[DECAF_SER_BYTES], const decaf_point_t p ) {
  168. gf a, b, c, d;
  169. gf_mlw ( a, p->y, 1-EDWARDS_D );
  170. gf_mul ( c, a, p->t );
  171. gf_mul ( a, p->x, p->z );
  172. gf_sub ( d, c, a );
  173. gf_add ( a, p->z, p->y );
  174. gf_sub ( b, p->z, p->y );
  175. gf_mul ( c, b, a );
  176. gf_mlw ( b, c, -EDWARDS_D );
  177. gf_isqrt ( a, b );
  178. gf_mlw ( b, a, -EDWARDS_D );
  179. gf_mul ( c, b, a );
  180. gf_mul ( a, c, d );
  181. gf_add ( d, b, b );
  182. gf_mul ( c, d, p->z );
  183. cond_neg ( b, ~hibit(c) );
  184. gf_mul ( c, b, p->y );
  185. gf_add ( a, a, c );
  186. cond_neg ( a, hibit(a) );
  187. // FIXME arch
  188. gf_canon(a);
  189. int j;
  190. FOR_LIMB(i,{
  191. for (j=0; j<7; j++) {
  192. ser[7*i+j] = a[i];
  193. a[i] >>= 8;
  194. }
  195. });
  196. }
  197. /**
  198. * Deserialize a bool, return TRUE if < p.
  199. */
  200. static decaf_bool_t gf_deser(gf s, const unsigned char ser[DECAF_SER_BYTES]) {
  201. // FIXME arch
  202. int j;
  203. FOR_LIMB(i, {
  204. word_t out = 0;
  205. for (j=0; j<7; j++) {
  206. out |= ((word_t)ser[7*i+j])<<(8*j);
  207. }
  208. s[i] = out;
  209. });
  210. sdword_t accum = 0;
  211. FOR_LIMB(i, accum = (accum + s[i] - P[i]) >> WBITS );
  212. return accum;
  213. }
  214. /* Constant-time add or subtract */
  215. sv decaf_add_sub (
  216. decaf_point_t p,
  217. const decaf_point_t q,
  218. const decaf_point_t r,
  219. decaf_bool_t do_sub
  220. ) {
  221. /* Twisted Edward formulas, complete when 4-torsion isn't involved */
  222. gf a, b, c, d;
  223. gf_sub ( b, q->y, q->x );
  224. gf_sub ( c, r->y, r->x );
  225. gf_add ( d, r->y, r->x );
  226. cond_swap(c,d,do_sub);
  227. gf_mul ( a, c, b );
  228. gf_add ( b, q->y, q->x );
  229. gf_mul ( p->y, d, b );
  230. gf_mul ( b, r->t, q->t );
  231. gf_mlw ( p->x, b, 2-2*EDWARDS_D );
  232. gf_add ( b, a, p->y );
  233. gf_sub ( c, p->y, a );
  234. gf_mul ( a, q->z, r->z );
  235. gf_add ( a, a, a );
  236. gf_add ( p->y, a, p->x );
  237. gf_sub ( a, a, p->x );
  238. cond_swap(a,p->y,do_sub);
  239. gf_mul ( p->z, a, p->y );
  240. gf_mul ( p->x, p->y, c );
  241. gf_mul ( p->y, a, b );
  242. gf_mul ( p->t, b, c );
  243. }
  244. decaf_bool_t decaf_decode (
  245. decaf_point_t p,
  246. const unsigned char ser[DECAF_SER_BYTES],
  247. decaf_bool_t allow_identity
  248. ) {
  249. gf s, a, b, c, d, e;
  250. mask_t succ = gf_deser(s, ser);
  251. mask_t zero = gf_eq(s, ZERO);
  252. succ &= allow_identity | ~zero;
  253. succ &= ~hibit(s);
  254. gf_sqr ( a, s );
  255. gf_sub ( p->z, ONE, a );
  256. gf_sqr ( b, p->z );
  257. gf_mlw ( c, a, 4-4*EDWARDS_D );
  258. gf_add ( c, c, b );
  259. gf_mul ( b, c, a );
  260. gf_isqrt ( d, b );
  261. gf_sqr ( e, d );
  262. gf_mul ( a, e, b );
  263. gf_add ( a, a, ONE );
  264. succ &= ~gf_eq ( a, ZERO );
  265. gf_mul ( b, c, d );
  266. cond_neg ( d, hibit(b) );
  267. gf_add ( p->x, s, s );
  268. gf_mul ( c, d, s );
  269. gf_sub ( b, TWO, p->z );
  270. gf_mul ( a, b, c );
  271. gf_mul ( p->y,a,p->z );
  272. gf_mul ( p->t,p->x,a );
  273. p->y[0] -= zero;
  274. /* TODO: do something safe if ~succ? */
  275. return succ;
  276. }
  277. void decaf_sub(decaf_point_t a, const decaf_point_t b, const decaf_point_t c) {
  278. decaf_add_sub(a,b,c,-1);
  279. }
  280. void decaf_add(decaf_point_t a, const decaf_point_t b, const decaf_point_t c) {
  281. decaf_add_sub(a,b,c,0);
  282. }
  283. /* No dedicated point double (PERF) */
  284. #define decaf_dbl(a,b) decaf_add(a,b,b)
  285. void decaf_copy (
  286. decaf_point_t a,
  287. const decaf_point_t b
  288. ) {
  289. gf_cpy(a->x, b->x);
  290. gf_cpy(a->y, b->y);
  291. gf_cpy(a->z, b->z);
  292. gf_cpy(a->t, b->t);
  293. }
  294. void decaf_scalarmul (
  295. decaf_point_t a,
  296. const decaf_point_t b,
  297. const decaf_word_t *scalar,
  298. unsigned int scalar_words
  299. ) {
  300. if (scalar_words == 0) {
  301. decaf_copy(a,decaf_identity);
  302. return;
  303. }
  304. /* w=2 signed window uses about 1.5 adds per bit.
  305. * I figured a few extra lines was worth the 25% speedup.
  306. * NB: if adapting this function to scalarmul by a
  307. * possibly-odd number of unmasked bits, may need to mask.
  308. */
  309. decaf_point_t w,b3,tmp;
  310. decaf_dbl(w,b);
  311. /* b3 = b*3 */
  312. decaf_add(b3,w,b);
  313. int i;
  314. for (i=scalar_words*WBITS-2; i>0; i-=2) {
  315. decaf_word_t bits = scalar[i/WBITS]>>(i%WBITS);
  316. decaf_cond_sel(tmp,b,b3,((bits^(bits>>1))&1)-1);
  317. decaf_dbl(w,w);
  318. decaf_add_sub(w,w,tmp,((bits>>1)&1)-1);
  319. decaf_dbl(w,w);
  320. }
  321. decaf_add_sub(w,w,b,((scalar[0]>>1)&1)-1);
  322. /* low bit is special because fo signed window */
  323. decaf_cond_sel(tmp,b,decaf_identity,-(scalar[0]&1));
  324. decaf_sub(a,w,tmp);
  325. }
  326. decaf_bool_t decaf_eq ( const decaf_point_t p, const decaf_point_t q ) {
  327. /* equality mod 2-torsion compares x/y */
  328. gf a, b;
  329. gf_mul ( a, p->y, q->x );
  330. gf_mul ( b, q->y, p->x );
  331. return gf_eq(a,b);
  332. }
  333. static const int QUADRATIC_NONRESIDUE = -1;
  334. void decaf_nonuniform_map_to_curve (
  335. decaf_point_t p,
  336. const unsigned char ser[DECAF_SER_BYTES]
  337. ) {
  338. /*
  339. sage: XXD = (u*r^2 + 1) * (d - u*r^2) * (1 - u*d*r^2) / (d+1)
  340. sage: factor(XX / (1/XXD))
  341. (u*r^2 - d)^2
  342. sage: factor((ey-1)/(ey+1)/(1/d * 1/XXD))
  343. (u*d*r^2 - 1)^2
  344. sage: factor(XX2 / (u*r^2/XXD))
  345. (u*d*r^2 - 1)^2
  346. sage: factor((ey2-1)/(ey2+1)/(1/d * u*r^2/XXD))
  347. (u*r^2 - d)^2
  348. */
  349. gf r,urr,a,b,c,dee,e,ur2_d,udr2_1;
  350. (void)gf_deser(r,ser);
  351. gf_canon(r); // just in case
  352. gf_sqr(a,r);
  353. gf_mlw(urr,a,QUADRATIC_NONRESIDUE); // urr = u*r^2
  354. gf_mlw(dee,ONE,EDWARDS_D);
  355. gf_add(a,urr,ONE);
  356. gf_sub(ur2_d,dee,urr); // ur2_d = -(ur^2-d)
  357. gf_mul(c,a,ur2_d);
  358. gf_mlw(b,urr,-EDWARDS_D);
  359. gf_add(udr2_1,b,ONE); // udr2_1 = -(udr^2-1)
  360. gf_mul(a,c,udr2_1);
  361. gf_mlw(c,a,EDWARDS_D+1); // c = (u*r^2 + 1) * (d - u*r^2) * (1 - u*d*r^2) * (d+1)
  362. gf_isqrt(b,c); // FIELD: if 5 mod 8, multiply result by u.
  363. gf_sqr(a,b);
  364. gf_mul(e,a,c);
  365. mask_t square = gf_eq(e,ONE);
  366. gf_mul(a,b,r);
  367. cond_sel(b,a,b,square);
  368. cond_neg(b,hibit(b));
  369. gf_mlw(a,b,EDWARDS_D+1);
  370. /* Here: a = sqrt( (d+1) / (ur^2?) * (u*r^2 + 1) * (d - u*r^2) * (1 - u*d*r^2)) */
  371. cond_swap(ur2_d,udr2_1,~square);
  372. gf_mul(e,ur2_d,a);
  373. gf_mul(b,udr2_1,a);
  374. gf_sqr(c,b);
  375. /* Here:
  376. * ed_x = 2e/(1-e^2)
  377. * c = * (ed_y-1)/(ed_y+1)
  378. *
  379. * Special cases:
  380. * e^2 = 1: impossible for cofactor-4 curves (would isogenize to order-4 point)
  381. * e = 0 <-> also c = 0: maps to (0,1), which is fine.
  382. */
  383. gf_sqr(a,e);
  384. gf_sub(a,ONE,a);
  385. gf_add(e,e,e);
  386. gf_add(b,dee,c);
  387. gf_sub(c,dee,c);
  388. gf_mul(p->x,e,c);
  389. gf_mul(p->z,a,c);
  390. gf_mul(p->y,b,a);
  391. gf_mul(p->t,b,e);
  392. }
  393. decaf_bool_t decaf_valid (
  394. const decaf_point_t p
  395. ) {
  396. gf a,b,c;
  397. gf_mul(a,p->x,p->y);
  398. gf_mul(b,p->z,p->t);
  399. mask_t out = gf_eq(a,b);
  400. gf_sqr(a,p->x);
  401. gf_sqr(b,p->y);
  402. gf_sub(a,b,a);
  403. gf_sqr(b,p->t);
  404. gf_mlw(c,b,1-EDWARDS_D);
  405. gf_sqr(b,p->z);
  406. gf_sub(b,b,c);
  407. out &= gf_eq(a,b);
  408. return out;
  409. }