You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

988 lines
28 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #include "word.h"
  5. #include <stdlib.h>
  6. #include <limits.h>
  7. #include <string.h>
  8. #include "intrinsics.h"
  9. #include "scalarmul.h"
  10. #include "barrett_field.h"
  11. #include "constant_time.h"
  12. mask_t
  13. montgomery_ladder (
  14. field_a_t out,
  15. const field_a_t in,
  16. const word_t *scalar,
  17. unsigned int nbits,
  18. unsigned int n_extra_doubles
  19. ) {
  20. montgomery_a_t mont;
  21. deserialize_montgomery(mont, in);
  22. int i,j,n=(nbits-1)%WORD_BITS;
  23. mask_t pflip = 0;
  24. for (j=(nbits+WORD_BITS-1)/WORD_BITS-1; j>=0; j--) {
  25. word_t w = scalar[j];
  26. for (i=n; i>=0; i--) {
  27. mask_t flip = -((w>>i)&1);
  28. constant_time_cond_swap(mont->xa,mont->xd,sizeof(mont->xd),flip^pflip);
  29. constant_time_cond_swap(mont->za,mont->zd,sizeof(mont->xd),flip^pflip);
  30. montgomery_step(mont);
  31. pflip = flip;
  32. }
  33. n = WORD_BITS-1;
  34. }
  35. constant_time_cond_swap(mont->xa,mont->xd,sizeof(mont->xd),pflip);
  36. constant_time_cond_swap(mont->za,mont->zd,sizeof(mont->xd),pflip);
  37. assert(n_extra_doubles < INT_MAX);
  38. for (j=0; j<(int)n_extra_doubles; j++) {
  39. montgomery_step(mont);
  40. }
  41. return serialize_montgomery(out, mont, in);
  42. }
  43. mask_t
  44. decaf_montgomery_ladder (
  45. field_a_t out,
  46. const field_a_t in,
  47. const word_t *scalar,
  48. unsigned int nbits
  49. ) {
  50. montgomery_aux_a_t mont;
  51. decaf_deserialize_montgomery(mont, in);
  52. int i,j,n=(nbits-1)%WORD_BITS;
  53. mask_t pflip = 0;
  54. for (j=(nbits+WORD_BITS-1)/WORD_BITS-1; j>=0; j--) {
  55. word_t w = scalar[j];
  56. for (i=n; i>=0; i--) {
  57. mask_t flip = -((w>>i)&1);
  58. constant_time_cond_swap(mont->xa,mont->xd,sizeof(mont->xd),flip^pflip);
  59. constant_time_cond_swap(mont->za,mont->zd,sizeof(mont->xd),flip^pflip);
  60. montgomery_aux_step(mont);
  61. pflip = flip;
  62. }
  63. n = WORD_BITS-1;
  64. }
  65. constant_time_cond_swap(mont->xa,mont->xd,sizeof(mont->xd),pflip);
  66. constant_time_cond_swap(mont->za,mont->zd,sizeof(mont->xd),pflip);
  67. return decaf_serialize_montgomery(out, mont, pflip);
  68. }
  69. static __inline__ void
  70. __attribute__((unused,always_inline))
  71. constant_time_lookup_tw_pniels (
  72. tw_pniels_a_t out,
  73. const tw_pniels_a_t *in,
  74. int nin,
  75. int idx
  76. ) {
  77. constant_time_lookup(out,in,sizeof(*out),nin,idx);
  78. }
  79. static __inline__ void
  80. __attribute__((unused,always_inline))
  81. constant_time_lookup_tw_extended (
  82. tw_extended_a_t out,
  83. const tw_extended_a_t *in,
  84. int nin,
  85. int idx
  86. ) {
  87. constant_time_lookup(out,in,sizeof(*out),nin,idx);
  88. }
  89. static __inline__ void
  90. __attribute__((unused,always_inline))
  91. constant_time_lookup_tw_niels (
  92. tw_niels_a_t out,
  93. const tw_niels_a_t *in,
  94. int nin,
  95. int idx
  96. ) {
  97. constant_time_lookup(out,in,sizeof(*out),nin,idx);
  98. }
  99. static void
  100. convert_to_signed_window_form (
  101. word_t *out,
  102. const word_t *scalar,
  103. int nwords_scalar,
  104. const word_t *prepared_data,
  105. int nwords_pd
  106. ) {
  107. assert(nwords_pd <= nwords_scalar);
  108. mask_t mask = -(scalar[0]&1);
  109. word_t carry = add_nr_ext_packed(out, scalar, nwords_scalar, prepared_data, nwords_pd, ~mask);
  110. carry += add_nr_ext_packed(out, out, nwords_scalar, prepared_data+nwords_pd, nwords_pd, mask);
  111. assert(!(out[0]&1));
  112. int i;
  113. for (i=0; i<nwords_scalar; i++) {
  114. out[i] >>= 1;
  115. if (i<nwords_scalar-1) {
  116. out[i] |= out[i+1]<<(WORD_BITS-1);
  117. } else {
  118. out[i] |= carry<<(WORD_BITS-1);
  119. }
  120. }
  121. }
  122. void
  123. scalarmul (
  124. tw_extensible_a_t working,
  125. const word_t scalar[SCALAR_WORDS]
  126. ) {
  127. const int WINDOW = SCALARMUL_FIXED_WINDOW_SIZE,
  128. WINDOW_MASK = (1<<WINDOW)-1, WINDOW_T_MASK = WINDOW_MASK >> 1,
  129. NTABLE = 1<<(WINDOW-1),
  130. nbits = ROUND_UP(SCALAR_BITS,WINDOW);
  131. word_t scalar2[SCALAR_WORDS];
  132. convert_to_signed_window_form (
  133. scalar2, scalar, SCALAR_WORDS,
  134. SCALARMUL_FIXED_WINDOW_ADJUSTMENT, SCALAR_WORDS
  135. );
  136. /* FIXME: tabulator is redundant */
  137. tw_extensible_a_t tabulator;
  138. copy_tw_extensible(tabulator, working);
  139. double_tw_extensible(tabulator);
  140. tw_pniels_a_t
  141. pn VECTOR_ALIGNED,
  142. multiples[NTABLE] VECTOR_ALIGNED;
  143. convert_tw_extensible_to_tw_pniels(pn, tabulator);
  144. convert_tw_extensible_to_tw_pniels(multiples[0], working);
  145. int i,j;
  146. for (i=1; i<NTABLE; i++) {
  147. add_tw_pniels_to_tw_extensible(working, pn);
  148. convert_tw_extensible_to_tw_pniels(multiples[i], working);
  149. }
  150. i = nbits - WINDOW;
  151. int bits = scalar2[i/WORD_BITS] >> (i%WORD_BITS) & WINDOW_MASK,
  152. inv = (bits>>(WINDOW-1))-1;
  153. bits ^= inv;
  154. constant_time_lookup_tw_pniels(pn, (const tw_pniels_a_t*)multiples, NTABLE, bits & WINDOW_T_MASK);
  155. cond_negate_tw_pniels(pn, inv);
  156. convert_tw_pniels_to_tw_extensible(working, pn);
  157. for (i-=WINDOW; i>=0; i-=WINDOW) {
  158. for (j=0; j<WINDOW; j++) {
  159. double_tw_extensible(working);
  160. }
  161. bits = scalar2[i/WORD_BITS] >> (i%WORD_BITS);
  162. if (i/WORD_BITS < SCALAR_WORDS-1 && i%WORD_BITS >= WORD_BITS-WINDOW) {
  163. bits ^= scalar2[i/WORD_BITS+1] << (WORD_BITS - (i%WORD_BITS));
  164. }
  165. bits &= WINDOW_MASK;
  166. inv = (bits>>(WINDOW-1))-1;
  167. bits ^= inv;
  168. constant_time_lookup_tw_pniels(pn, (const tw_pniels_a_t*)multiples, NTABLE, bits & WINDOW_T_MASK);
  169. cond_negate_tw_pniels(pn, inv);
  170. add_tw_pniels_to_tw_extensible(working, pn);
  171. }
  172. }
  173. void
  174. scalarmul_ed (
  175. tw_extended_a_t working,
  176. const word_t scalar[SCALAR_WORDS]
  177. ) {
  178. const int WINDOW = SCALARMUL_FIXED_WINDOW_SIZE,
  179. WINDOW_MASK = (1<<WINDOW)-1, WINDOW_T_MASK = WINDOW_MASK >> 1,
  180. NTABLE = 1<<(WINDOW-1),
  181. nbits = ROUND_UP(SCALAR_BITS,WINDOW);
  182. word_t scalar2[SCALAR_WORDS];
  183. convert_to_signed_window_form (
  184. scalar2, scalar, SCALAR_WORDS,
  185. SCALARMUL_FIXED_WINDOW_ADJUSTMENT, SCALAR_WORDS
  186. );
  187. tw_extended_a_t
  188. tmp VECTOR_ALIGNED,
  189. multiples[NTABLE] VECTOR_ALIGNED;
  190. copy_tw_extended(tmp, working);
  191. add_tw_extended(tmp, tmp);
  192. copy_tw_extended(multiples[0], working);
  193. int i,j;
  194. for (i=1; i<NTABLE; i++) {
  195. add_tw_extended(working, tmp);
  196. copy_tw_extended(multiples[i], working);
  197. }
  198. i = nbits - WINDOW;
  199. int bits, inv;
  200. set_identity_tw_extended(working);
  201. for (; i>=0; i-=WINDOW) {
  202. if (i != nbits-WINDOW) {
  203. for (j=0; j<WINDOW; j++) {
  204. add_tw_extended(working,working);
  205. }
  206. }
  207. bits = scalar2[i/WORD_BITS] >> (i%WORD_BITS);
  208. if (i/WORD_BITS < SCALAR_WORDS-1 && i%WORD_BITS >= WORD_BITS-WINDOW) {
  209. bits ^= scalar2[i/WORD_BITS+1] << (WORD_BITS - (i%WORD_BITS));
  210. }
  211. bits &= WINDOW_MASK;
  212. inv = (bits>>(WINDOW-1))-1;
  213. bits ^= inv;
  214. constant_time_lookup_tw_extended(tmp, (const tw_extended_a_t*)multiples, NTABLE, bits & WINDOW_T_MASK);
  215. add_sub_tw_extended(working, working, tmp, inv);
  216. }
  217. }
  218. void
  219. scalarmul_vlook (
  220. tw_extensible_a_t working,
  221. const word_t scalar[SCALAR_WORDS]
  222. ) {
  223. const int WINDOW = SCALARMUL_FIXED_WINDOW_SIZE,
  224. WINDOW_MASK = (1<<WINDOW)-1, WINDOW_T_MASK = WINDOW_MASK >> 1,
  225. NTABLE = 1<<(WINDOW-1),
  226. nbits = ROUND_UP(SCALAR_BITS,WINDOW);
  227. word_t scalar2[SCALAR_WORDS];
  228. convert_to_signed_window_form(
  229. scalar2, scalar, SCALAR_WORDS,
  230. SCALARMUL_FIXED_WINDOW_ADJUSTMENT, SCALAR_WORDS
  231. );
  232. tw_extensible_a_t tabulator;
  233. copy_tw_extensible(tabulator, working);
  234. double_tw_extensible(tabulator);
  235. tw_pniels_a_t
  236. pn VECTOR_ALIGNED,
  237. multiples[NTABLE] VECTOR_ALIGNED;
  238. convert_tw_extensible_to_tw_pniels(pn, tabulator);
  239. convert_tw_extensible_to_tw_pniels(multiples[0], working);
  240. int i,j;
  241. for (i=1; i<NTABLE; i++) {
  242. add_tw_pniels_to_tw_extensible(working, pn);
  243. convert_tw_extensible_to_tw_pniels(multiples[i], working);
  244. }
  245. i = nbits - WINDOW;
  246. int bits = scalar2[i/WORD_BITS] >> (i%WORD_BITS) & WINDOW_MASK,
  247. inv = (bits>>(WINDOW-1))-1;
  248. bits ^= inv;
  249. copy_tw_pniels(pn, multiples[bits & WINDOW_T_MASK]);
  250. cond_negate_tw_pniels(pn, inv);
  251. convert_tw_pniels_to_tw_extensible(working, pn);
  252. for (i-=WINDOW; i>=0; i-=WINDOW) {
  253. for (j=0; j<WINDOW; j++) {
  254. double_tw_extensible(working);
  255. }
  256. bits = scalar2[i/WORD_BITS] >> (i%WORD_BITS);
  257. if (i/WORD_BITS < SCALAR_WORDS-1 && i%WORD_BITS >= WORD_BITS-WINDOW) {
  258. bits ^= scalar2[i/WORD_BITS+1] << (WORD_BITS - (i%WORD_BITS));
  259. }
  260. bits &= WINDOW_MASK;
  261. inv = (bits>>(WINDOW-1))-1;
  262. bits ^= inv;
  263. copy_tw_pniels(pn, multiples[bits & WINDOW_T_MASK]);
  264. cond_negate_tw_pniels(pn, inv);
  265. add_tw_pniels_to_tw_extensible(working, pn);
  266. }
  267. }
  268. static mask_t
  269. schedule_scalar_for_combs (
  270. word_t *scalar2,
  271. const word_t *scalar,
  272. unsigned int nbits,
  273. const struct fixed_base_table_t* table
  274. ) {
  275. unsigned int i;
  276. unsigned int n = table->n, t = table->t, s = table->s;
  277. if (n*t*s < nbits || n < 1 || t < 1 || s < 1) {
  278. return MASK_FAILURE;
  279. }
  280. unsigned int scalar_words = (nbits + WORD_BITS - 1)/WORD_BITS,
  281. scalar2_words = scalar_words;
  282. if (scalar2_words < SCALAR_WORDS)
  283. scalar2_words = SCALAR_WORDS;
  284. word_t scalar3[scalar2_words];
  285. /* Copy scalar to scalar3, but clear its high bits (if there are any) */
  286. for (i=0; i<scalar_words; i++) {
  287. scalar3[i] = scalar[i];
  288. }
  289. if (likely(i) && (nbits % WORD_BITS)) {
  290. scalar3[i-1] &= (((word_t)1) << (nbits%WORD_BITS)) - 1;
  291. }
  292. for (; i<scalar2_words; i++) {
  293. scalar3[i] = 0;
  294. }
  295. convert_to_signed_window_form (
  296. scalar2,
  297. scalar3, scalar2_words,
  298. table->scalar_adjustments , SCALAR_WORDS
  299. );
  300. return MASK_SUCCESS;
  301. }
  302. mask_t
  303. scalarmul_fixed_base (
  304. tw_extensible_a_t out,
  305. const word_t scalar[SCALAR_WORDS],
  306. unsigned int nbits,
  307. const struct fixed_base_table_t* table
  308. ) {
  309. unsigned int i,j,k;
  310. unsigned int n = table->n, t = table->t, s = table->s;
  311. unsigned int scalar2_words = (nbits + WORD_BITS - 1)/WORD_BITS;
  312. if (scalar2_words < SCALAR_WORDS) scalar2_words = SCALAR_WORDS;
  313. word_t scalar2[scalar2_words];
  314. mask_t succ = schedule_scalar_for_combs(scalar2, scalar, nbits, table);
  315. if (!succ) return MASK_FAILURE;
  316. #ifdef __clang_analyzer__
  317. assert(t >= 1);
  318. #endif
  319. tw_niels_a_t ni;
  320. for (i=0; i<s; i++) {
  321. if (i) double_tw_extensible(out);
  322. for (j=0; j<n; j++) {
  323. int tab = 0;
  324. /*
  325. * PERF: This computation takes about 1.5µs on SBR, i.e. 2-3% of the
  326. * time of a keygen or sign op. Surely it is possible to speed it up.
  327. */
  328. for (k=0; k<t; k++) {
  329. unsigned int bit = (s-1-i) + k*s + j*(s*t);
  330. if (bit < scalar2_words * WORD_BITS) {
  331. tab |= (scalar2[bit/WORD_BITS] >> (bit%WORD_BITS) & 1) << k;
  332. }
  333. }
  334. mask_t invert = (tab>>(t-1))-1;
  335. tab ^= invert;
  336. tab &= (1<<(t-1)) - 1;
  337. constant_time_lookup_tw_niels(ni, (const tw_niels_a_t*)table->table + (j<<(t-1)), 1<<(t-1), tab);
  338. cond_negate_tw_niels(ni, invert);
  339. if (i||j) {
  340. add_tw_niels_to_tw_extensible(out, ni);
  341. } else {
  342. convert_tw_niels_to_tw_extensible(out, ni);
  343. }
  344. }
  345. }
  346. return MASK_SUCCESS;
  347. }
  348. mask_t
  349. linear_combo_combs_vt (
  350. tw_extensible_a_t out,
  351. const word_t scalar1[SCALAR_WORDS],
  352. unsigned int nbits1,
  353. const struct fixed_base_table_t* table1,
  354. const word_t scalar2[SCALAR_WORDS],
  355. unsigned int nbits2,
  356. const struct fixed_base_table_t* table2
  357. ) {
  358. unsigned int i,j,k,sc;
  359. unsigned int s1 = table1->s, s2 = table2->s, smax = (s1 > s2) ? s1 : s2;
  360. unsigned int scalar1b_words = (nbits1 + WORD_BITS - 1)/WORD_BITS;
  361. if (scalar1b_words < SCALAR_WORDS) scalar1b_words = SCALAR_WORDS;
  362. unsigned int scalar2b_words = (nbits2 + WORD_BITS - 1)/WORD_BITS;
  363. if (scalar2b_words < SCALAR_WORDS) scalar2b_words = SCALAR_WORDS;
  364. word_t scalar1b[scalar1b_words], scalar2b[scalar2b_words];
  365. /* Schedule the scalars */
  366. mask_t succ;
  367. succ = schedule_scalar_for_combs(scalar1b, scalar1, nbits1, table1);
  368. if (!succ) return MASK_FAILURE;
  369. succ = schedule_scalar_for_combs(scalar2b, scalar2, nbits2, table2);
  370. if (!succ) return MASK_FAILURE;
  371. #ifdef __clang_analyzer__
  372. assert(table1->t >= 1);
  373. assert(table2->t >= 1);
  374. #endif
  375. const struct tw_niels_t *ni;
  376. unsigned int swords[2] = {scalar1b_words, scalar2b_words};
  377. word_t *scalars[2] = {scalar1b,scalar2b};
  378. set_identity_tw_extensible(out);
  379. for (i=0; i<smax; i++) {
  380. if (i) double_tw_extensible(out);
  381. for (sc=0; sc<2; sc++) {
  382. const struct fixed_base_table_t* table = sc ? table2 : table1;
  383. int ii = i-smax+table->s;
  384. if (ii < 0) continue;
  385. assert(ii < (int)table->s);
  386. for (j=0; j<table->n; j++) {
  387. int tab = 0;
  388. for (k=0; k<table->t; k++) {
  389. unsigned int bit = (table->s-1-ii) + k*table->s + j*(table->s*table->t);
  390. if (bit < swords[sc] * WORD_BITS) {
  391. tab |= (scalars[sc][bit/WORD_BITS] >> (bit%WORD_BITS) & 1) << k;
  392. }
  393. }
  394. mask_t invert = (tab>>(table->t-1))-1;
  395. tab ^= invert;
  396. tab &= (1<<(table->t-1)) - 1;
  397. ni = table->table[tab + (j<<(table->t-1))];
  398. if (invert) sub_tw_niels_from_tw_extensible(out, ni);
  399. else add_tw_niels_to_tw_extensible(out, ni);
  400. }
  401. }
  402. }
  403. return MASK_SUCCESS;
  404. }
  405. mask_t
  406. precompute_fixed_base (
  407. struct fixed_base_table_t* out,
  408. const tw_extensible_a_t base,
  409. unsigned int n,
  410. unsigned int t,
  411. unsigned int s,
  412. tw_niels_a_t *prealloc
  413. ) {
  414. if (s < 1 || t < 1 || n < 1 || n*t*s < SCALAR_BITS) {
  415. really_memset(out, 0, sizeof(*out));
  416. return 0;
  417. }
  418. out->n = n;
  419. out->t = t;
  420. out->s = s;
  421. tw_extensible_a_t working, start;
  422. copy_tw_extensible(working, base);
  423. tw_pniels_a_t pn_tmp;
  424. tw_pniels_a_t *doubles = (tw_pniels_a_t *) malloc_vector(sizeof(*doubles) * (t-1));
  425. field_a_t *zs = (field_a_t *) malloc_vector(sizeof(*zs) * (n<<(t-1)));
  426. field_a_t *zis = (field_a_t *) malloc_vector(sizeof(*zis) * (n<<(t-1)));
  427. tw_niels_a_t *table = prealloc;
  428. if (prealloc) {
  429. out->own_table = 0;
  430. } else {
  431. table = (tw_niels_a_t *) malloc_vector(sizeof(*table) * (n<<(t-1)));
  432. out->own_table = 1;
  433. }
  434. out->table = table;
  435. if (!doubles || !zs || !zis || !table) {
  436. free(doubles);
  437. free(zs);
  438. free(zis);
  439. really_memset(out, 0, sizeof(*out));
  440. really_memset(table, 0, sizeof(*table) * (n<<(t-1)));
  441. if (!prealloc) free(table);
  442. return 0;
  443. }
  444. unsigned int i,j,k;
  445. /* Compute the scalar adjustments, equal to 2^nbits-1 mod q */
  446. unsigned int adjustment_size = (n*t*s)/WORD_BITS + 1;
  447. assert(adjustment_size >= SCALAR_WORDS);
  448. word_t adjustment[adjustment_size];
  449. for (i=0; i<adjustment_size; i++) {
  450. adjustment[i] = -1;
  451. }
  452. adjustment[(n*t*s) / WORD_BITS] += ((word_t)1) << ((n*t*s) % WORD_BITS);
  453. /* The low adjustment is 2^nbits - 1 mod q */
  454. barrett_reduce(adjustment, adjustment_size, 0, &curve_prime_order);
  455. word_t *low_adjustment = &out->scalar_adjustments[(SCALAR_WORDS)*(adjustment[0] & 1)],
  456. *high_adjustment = &out->scalar_adjustments[(SCALAR_WORDS)*((~adjustment[0]) & 1)];
  457. for (i=0; i<SCALAR_WORDS; i++) {
  458. low_adjustment[i] = adjustment[i];
  459. }
  460. /* The high adjustment is low + q = low - q_lo + 2^big */
  461. (void)
  462. sub_nr_ext_packed(
  463. high_adjustment,
  464. adjustment, SCALAR_WORDS,
  465. curve_prime_order.p_lo, curve_prime_order.nwords_lo,
  466. -1
  467. );
  468. if (curve_prime_order.p_shift) {
  469. high_adjustment[curve_prime_order.nwords_p - 1] += ((word_t)1)<<curve_prime_order.p_shift;
  470. }
  471. /* OK, now compute the tables */
  472. for (i=0; i<n; i++) {
  473. /* doubling phase */
  474. for (j=0; j<t; j++) {
  475. if (j) {
  476. convert_tw_extensible_to_tw_pniels(pn_tmp, working);
  477. add_tw_pniels_to_tw_extensible(start, pn_tmp);
  478. } else {
  479. copy_tw_extensible(start, working);
  480. }
  481. if (j==t-1 && i==n-1) {
  482. break;
  483. }
  484. double_tw_extensible(working);
  485. if (j<t-1) {
  486. convert_tw_extensible_to_tw_pniels(doubles[j], working);
  487. }
  488. for (k=0; k<s-1; k++) {
  489. double_tw_extensible(working);
  490. }
  491. }
  492. /* Gray-code phase */
  493. for (j=0;; j++) {
  494. int gray = j ^ (j>>1);
  495. int idx = (((i+1)<<(t-1))-1) ^ gray;
  496. convert_tw_extensible_to_tw_pniels(pn_tmp, start);
  497. copy_tw_niels(table[idx], pn_tmp->n);
  498. field_copy(zs[idx], pn_tmp->z);
  499. if (j >= (1u<<(t-1)) - 1) break;
  500. int delta = (j+1) ^ ((j+1)>>1) ^ gray;
  501. for (k=0; delta>1; k++)
  502. delta >>=1;
  503. if (gray & (1<<k)) {
  504. /* start += doubles[k] */
  505. add_tw_pniels_to_tw_extensible(start, doubles[k]);
  506. } else {
  507. /* start -= doubles[k] */
  508. sub_tw_pniels_from_tw_extensible(start, doubles[k]);
  509. }
  510. }
  511. }
  512. field_simultaneous_invert(zis, (const field_a_t*)zs, n<<(t-1));
  513. field_a_t product;
  514. for (i=0; i<n<<(t-1); i++) {
  515. field_mul(product, table[i]->a, zis[i]);
  516. field_strong_reduce(product);
  517. field_copy(table[i]->a, product);
  518. field_mul(product, table[i]->b, zis[i]);
  519. field_strong_reduce(product);
  520. field_copy(table[i]->b, product);
  521. field_mul(product, table[i]->c, zis[i]);
  522. field_strong_reduce(product);
  523. field_copy(table[i]->c, product);
  524. }
  525. mask_t ret = ~field_is_zero(zis[0]);
  526. free(doubles);
  527. free(zs);
  528. free(zis);
  529. if (unlikely(!ret)) {
  530. really_memset(table, 0, sizeof(*table) * (n<<(t-1)));
  531. if (!prealloc) free(table);
  532. really_memset(out, 0, sizeof(*out));
  533. return 0;
  534. }
  535. return ret;
  536. }
  537. void
  538. destroy_fixed_base (
  539. struct fixed_base_table_t* table
  540. ) {
  541. if (table->table) {
  542. really_memset(table->table,0,sizeof(*table->table)*(table->n<<(table->t-1)));
  543. }
  544. if (table->own_table) {
  545. free(table->table);
  546. }
  547. really_memset(table,0,sizeof(*table));
  548. }
  549. mask_t
  550. precompute_fixed_base_wnaf (
  551. tw_niels_a_t *out,
  552. const tw_extensible_a_t const_base,
  553. unsigned int tbits
  554. ) {
  555. int i;
  556. field_a_t *zs = (field_a_t *) malloc_vector(sizeof(*zs)<<tbits);
  557. field_a_t *zis = (field_a_t *) malloc_vector(sizeof(*zis)<<tbits);
  558. if (!zs || !zis) {
  559. free(zs);
  560. free(zis);
  561. return 0;
  562. }
  563. tw_extensible_a_t base;
  564. copy_tw_extensible(base,const_base);
  565. tw_pniels_a_t twop, tmp;
  566. convert_tw_extensible_to_tw_pniels(tmp, base);
  567. field_copy(zs[0], tmp->z);
  568. copy_tw_niels(out[0], tmp->n);
  569. if (tbits > 0) {
  570. double_tw_extensible(base);
  571. convert_tw_extensible_to_tw_pniels(twop, base);
  572. add_tw_pniels_to_tw_extensible(base, tmp);
  573. convert_tw_extensible_to_tw_pniels(tmp, base);
  574. field_copy(zs[1], tmp->z);
  575. copy_tw_niels(out[1], tmp->n);
  576. for (i=2; i < 1<<tbits; i++) {
  577. add_tw_pniels_to_tw_extensible(base, twop);
  578. convert_tw_extensible_to_tw_pniels(tmp, base);
  579. field_copy(zs[i], tmp->z);
  580. copy_tw_niels(out[i], tmp->n);
  581. }
  582. }
  583. field_simultaneous_invert(zis, (const field_a_t *)zs, 1<<tbits);
  584. field_a_t product;
  585. for (i=0; i<1<<tbits; i++) {
  586. field_mul(product, out[i]->a, zis[i]);
  587. field_strong_reduce(product);
  588. field_copy(out[i]->a, product);
  589. field_mul(product, out[i]->b, zis[i]);
  590. field_strong_reduce(product);
  591. field_copy(out[i]->b, product);
  592. field_mul(product, out[i]->c, zis[i]);
  593. field_strong_reduce(product);
  594. field_copy(out[i]->c, product);
  595. }
  596. free(zs);
  597. free(zis);
  598. return -1;
  599. }
  600. /**
  601. * @cond internal
  602. * Control for variable-time scalar multiply algorithms.
  603. */
  604. struct smvt_control {
  605. int power, addend;
  606. };
  607. static int
  608. recode_wnaf(
  609. struct smvt_control *control, /* [nbits/(tableBits+1) + 3] */
  610. const word_t *scalar,
  611. unsigned int nbits,
  612. unsigned int tableBits)
  613. {
  614. int current = 0, i, j;
  615. unsigned int position = 0;
  616. /* PERF: negate scalar if it's large
  617. * PERF: this is a pretty simplistic algorithm. I'm sure there's a faster one...
  618. */
  619. for (i=nbits-1; i >= 0; i--) {
  620. int bit = (scalar[i/WORD_BITS] >> (i%WORD_BITS)) & 1;
  621. current = 2*current + bit;
  622. /*
  623. * Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0
  624. * So current loses (tableBits+1) bits every time. It otherwise gains
  625. * 1 bit per iteration. The number of iterations is
  626. * (nbits + 2 + tableBits), and an additional control word is added at
  627. * the end. So the total number of control words is at most
  628. * ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2.
  629. * There's also the stopper with power -1, for a total of +3.
  630. */
  631. if (current >= (2<<tableBits) || current <= -1 - (2<<tableBits)) {
  632. int delta = (current + 1) >> 1; /* |delta| < 2^tablebits */
  633. current = -(current & 1);
  634. for (j=i; (delta & 1) == 0; j++) {
  635. delta >>= 1;
  636. }
  637. control[position].power = j+1;
  638. control[position].addend = delta;
  639. position++;
  640. assert(position <= nbits/(tableBits+1) + 2);
  641. }
  642. }
  643. if (current) {
  644. for (j=0; (current & 1) == 0; j++) {
  645. current >>= 1;
  646. }
  647. control[position].power = j;
  648. control[position].addend = current;
  649. position++;
  650. assert(position <= nbits/(tableBits+1) + 2);
  651. }
  652. control[position].power = -1;
  653. control[position].addend = 0;
  654. return position;
  655. }
  656. static void
  657. prepare_wnaf_table(
  658. tw_pniels_a_t *output,
  659. tw_extensible_a_t working,
  660. unsigned int tbits
  661. ) {
  662. int i;
  663. convert_tw_extensible_to_tw_pniels(output[0], working);
  664. if (tbits == 0) return;
  665. double_tw_extensible(working);
  666. tw_pniels_a_t twop;
  667. convert_tw_extensible_to_tw_pniels(twop, working);
  668. add_tw_pniels_to_tw_extensible(working, output[0]);
  669. convert_tw_extensible_to_tw_pniels(output[1], working);
  670. for (i=2; i < 1<<tbits; i++) {
  671. add_tw_pniels_to_tw_extensible(working, twop);
  672. convert_tw_extensible_to_tw_pniels(output[i], working);
  673. }
  674. }
  675. void
  676. scalarmul_vt (
  677. tw_extensible_a_t working,
  678. const word_t scalar[SCALAR_WORDS],
  679. unsigned int nbits
  680. ) {
  681. const int table_bits = SCALARMUL_WNAF_TABLE_BITS;
  682. struct smvt_control control[nbits/(table_bits+1)+3];
  683. int control_bits = recode_wnaf(control, scalar, nbits, table_bits);
  684. tw_pniels_a_t precmp[1<<table_bits];
  685. prepare_wnaf_table(precmp, working, table_bits);
  686. if (control_bits > 0) {
  687. assert(control[0].addend > 0);
  688. assert(control[0].power >= 0);
  689. convert_tw_pniels_to_tw_extensible(working, precmp[control[0].addend >> 1]);
  690. } else {
  691. set_identity_tw_extensible(working);
  692. return;
  693. }
  694. int conti = 1, i;
  695. for (i = control[0].power - 1; i >= 0; i--) {
  696. double_tw_extensible(working);
  697. if (i == control[conti].power) {
  698. assert(control[conti].addend);
  699. if (control[conti].addend > 0) {
  700. add_tw_pniels_to_tw_extensible(working, precmp[control[conti].addend >> 1]);
  701. } else {
  702. sub_tw_pniels_from_tw_extensible(working, precmp[(-control[conti].addend) >> 1]);
  703. }
  704. conti++;
  705. assert(conti <= control_bits);
  706. }
  707. }
  708. }
  709. void
  710. scalarmul_fixed_base_wnaf_vt (
  711. tw_extensible_a_t working,
  712. const word_t scalar[SCALAR_WORDS],
  713. unsigned int nbits,
  714. const tw_niels_a_t *precmp,
  715. unsigned int table_bits
  716. ) {
  717. struct smvt_control control[nbits/(table_bits+1)+3];
  718. int control_bits = recode_wnaf(control, scalar, nbits, table_bits);
  719. if (control_bits > 0) {
  720. assert(control[0].addend > 0);
  721. assert(control[0].power >= 0);
  722. convert_tw_niels_to_tw_extensible(working, precmp[control[0].addend >> 1]);
  723. } else {
  724. set_identity_tw_extensible(working);
  725. return;
  726. }
  727. int conti = 1, i;
  728. for (; control[conti].power >= 0; conti++) {
  729. assert(conti <= control_bits);
  730. for (i = control[conti-1].power - control[conti].power; i; i--) {
  731. double_tw_extensible(working);
  732. }
  733. assert(control[conti].addend);
  734. if (control[conti].addend > 0) {
  735. add_tw_niels_to_tw_extensible(working, precmp[control[conti].addend >> 1]);
  736. } else {
  737. sub_tw_niels_from_tw_extensible(working, precmp[(-control[conti].addend) >> 1]);
  738. }
  739. }
  740. for (i = control[conti-1].power; i; i--) {
  741. double_tw_extensible(working);
  742. }
  743. }
  744. void
  745. linear_combo_var_fixed_vt(
  746. tw_extensible_a_t working,
  747. const word_t scalar_var[SCALAR_WORDS],
  748. unsigned int nbits_var,
  749. const word_t scalar_pre[SCALAR_WORDS],
  750. unsigned int nbits_pre,
  751. const tw_niels_a_t *precmp,
  752. unsigned int table_bits_pre
  753. ) {
  754. const int table_bits_var = SCALARMUL_WNAF_COMBO_TABLE_BITS;
  755. struct smvt_control control_var[nbits_var/(table_bits_var+1)+3];
  756. struct smvt_control control_pre[nbits_pre/(table_bits_pre+1)+3];
  757. int ncb_var = recode_wnaf(control_var, scalar_var, nbits_var, table_bits_var);
  758. int ncb_pre = recode_wnaf(control_pre, scalar_pre, nbits_pre, table_bits_pre);
  759. (void)ncb_var;
  760. (void)ncb_pre;
  761. tw_pniels_a_t precmp_var[1<<table_bits_var];
  762. prepare_wnaf_table(precmp_var, working, table_bits_var);
  763. int contp=0, contv=0, i;
  764. i = control_var[0].power;
  765. if (i > control_pre[0].power) {
  766. convert_tw_pniels_to_tw_extensible(working, precmp_var[control_var[0].addend >> 1]);
  767. contv++;
  768. } else if (i == control_pre[0].power && i >=0 ) {
  769. convert_tw_pniels_to_tw_extensible(working, precmp_var[control_var[0].addend >> 1]);
  770. add_tw_niels_to_tw_extensible(working, precmp[control_pre[0].addend >> 1]);
  771. contv++; contp++;
  772. } else {
  773. i = control_pre[0].power;
  774. convert_tw_niels_to_tw_extensible(working, precmp[control_pre[0].addend >> 1]);
  775. contp++;
  776. }
  777. if (i < 0) {
  778. set_identity_tw_extensible(working);
  779. return;
  780. }
  781. for (i--; i >= 0; i--) {
  782. double_tw_extensible(working);
  783. if (i == control_var[contv].power) {
  784. assert(control_var[contv].addend);
  785. if (control_var[contv].addend > 0) {
  786. add_tw_pniels_to_tw_extensible(working, precmp_var[control_var[contv].addend >> 1]);
  787. } else {
  788. sub_tw_pniels_from_tw_extensible(working, precmp_var[(-control_var[contv].addend) >> 1]);
  789. }
  790. contv++;
  791. }
  792. if (i == control_pre[contp].power) {
  793. assert(control_pre[contp].addend);
  794. if (control_pre[contp].addend > 0) {
  795. add_tw_niels_to_tw_extensible(working, precmp[control_pre[contp].addend >> 1]);
  796. } else {
  797. sub_tw_niels_from_tw_extensible(working, precmp[(-control_pre[contp].addend) >> 1]);
  798. }
  799. contp++;
  800. }
  801. }
  802. assert(contv == ncb_var);
  803. assert(contp == ncb_pre);
  804. }