You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

942 lines
27 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #include "word.h"
  5. #include <stdlib.h>
  6. #include <limits.h>
  7. #include <string.h>
  8. #include "intrinsics.h"
  9. #include "scalarmul.h"
  10. #include "barrett_field.h"
  11. #include "constant_time.h"
  12. mask_t
  13. montgomery_ladder (
  14. field_a_t out,
  15. const field_a_t in,
  16. const word_t *scalar,
  17. unsigned int nbits,
  18. unsigned int n_extra_doubles
  19. ) {
  20. montgomery_a_t mont;
  21. deserialize_montgomery(mont, in);
  22. int i,j,n=(nbits-1)%WORD_BITS;
  23. mask_t pflip = 0;
  24. for (j=(nbits+WORD_BITS-1)/WORD_BITS-1; j>=0; j--) {
  25. word_t w = scalar[j];
  26. for (i=n; i>=0; i--) {
  27. mask_t flip = -((w>>i)&1);
  28. constant_time_cond_swap(mont->xa,mont->xd,sizeof(mont->xd),flip^pflip);
  29. constant_time_cond_swap(mont->za,mont->zd,sizeof(mont->xd),flip^pflip);
  30. montgomery_step(mont);
  31. pflip = flip;
  32. }
  33. n = WORD_BITS-1;
  34. }
  35. constant_time_cond_swap(mont->xa,mont->xd,sizeof(mont->xd),pflip);
  36. constant_time_cond_swap(mont->za,mont->zd,sizeof(mont->xd),pflip);
  37. assert(n_extra_doubles < INT_MAX);
  38. for (j=0; j<(int)n_extra_doubles; j++) {
  39. montgomery_step(mont);
  40. }
  41. return serialize_montgomery(out, mont, in);
  42. }
  43. static __inline__ void
  44. __attribute__((unused,always_inline))
  45. constant_time_lookup_tw_pniels (
  46. tw_pniels_a_t out,
  47. const tw_pniels_a_t *in,
  48. int nin,
  49. int idx
  50. ) {
  51. constant_time_lookup(out,in,sizeof(*out),nin,idx);
  52. }
  53. static __inline__ void
  54. __attribute__((unused,always_inline))
  55. constant_time_lookup_tw_niels (
  56. tw_niels_a_t out,
  57. const tw_niels_a_t *in,
  58. int nin,
  59. int idx
  60. ) {
  61. constant_time_lookup(out,in,sizeof(*out),nin,idx);
  62. }
  63. /*
  64. static __inline__ void
  65. constant_time_lookup_tw_pniels (
  66. tw_pniels_a_t out,
  67. const tw_pniels_a_t in,
  68. int nin,
  69. int idx
  70. ) {
  71. big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
  72. big_register_t *o = (big_register_t *)out;
  73. const big_register_t *i = (const big_register_t *)in;
  74. int j;
  75. unsigned int k;
  76. really_memset(out, 0, sizeof(*out));
  77. for (j=0; j<nin; j++, big_i-=big_one) {
  78. big_register_t mask = br_is_zero(big_i);
  79. for (k=0; k<sizeof(*out)/sizeof(*o); k++) {
  80. o[k] |= mask & i[k+j*sizeof(*out)/sizeof(*o)];
  81. }
  82. }
  83. }
  84. static __inline__ void
  85. constant_time_lookup_tw_niels (
  86. tw_niels_a_t out,
  87. const tw_niels_a_t in,
  88. int nin,
  89. int idx
  90. ) {
  91. big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
  92. big_register_t *o = (big_register_t *)out;
  93. const big_register_t *i = (const big_register_t *)in;
  94. int j;
  95. unsigned int k;
  96. really_memset(out, 0, sizeof(*out));
  97. for (j=0; j<nin; j++, big_i-=big_one) {
  98. big_register_t mask = br_is_zero(big_i);
  99. for (k=0; k<sizeof(*out)/sizeof(*o); k++) {
  100. o[k] |= mask & i[k+j*sizeof(*out)/sizeof(*o)];
  101. }
  102. }
  103. }
  104. */
  105. static void
  106. convert_to_signed_window_form (
  107. word_t *out,
  108. const word_t *scalar,
  109. int nwords_scalar,
  110. const word_t *prepared_data,
  111. int nwords_pd
  112. ) {
  113. assert(nwords_pd <= nwords_scalar);
  114. mask_t mask = -(scalar[0]&1);
  115. word_t carry = add_nr_ext_packed(out, scalar, nwords_scalar, prepared_data, nwords_pd, ~mask);
  116. carry += add_nr_ext_packed(out, out, nwords_scalar, prepared_data+nwords_pd, nwords_pd, mask);
  117. assert(!(out[0]&1));
  118. int i;
  119. for (i=0; i<nwords_scalar; i++) {
  120. out[i] >>= 1;
  121. if (i<nwords_scalar-1) {
  122. out[i] |= out[i+1]<<(WORD_BITS-1);
  123. } else {
  124. out[i] |= carry<<(WORD_BITS-1);
  125. }
  126. }
  127. }
  128. void
  129. scalarmul (
  130. tw_extensible_a_t working,
  131. const word_t scalar[SCALAR_WORDS]
  132. ) {
  133. const int WINDOW = SCALARMUL_FIXED_WINDOW_SIZE,
  134. WINDOW_MASK = (1<<WINDOW)-1, WINDOW_T_MASK = WINDOW_MASK >> 1,
  135. NTABLE = 1<<(WINDOW-1),
  136. nbits = ROUND_UP(SCALAR_BITS,WINDOW);
  137. word_t scalar2[SCALAR_WORDS];
  138. convert_to_signed_window_form (
  139. scalar2, scalar, SCALAR_WORDS,
  140. SCALARMUL_FIXED_WINDOW_ADJUSTMENT, SCALAR_WORDS
  141. );
  142. tw_extensible_a_t tabulator;
  143. copy_tw_extensible(tabulator, working);
  144. double_tw_extensible(tabulator);
  145. tw_pniels_a_t
  146. pn VECTOR_ALIGNED,
  147. multiples[NTABLE] VECTOR_ALIGNED;
  148. convert_tw_extensible_to_tw_pniels(pn, tabulator);
  149. convert_tw_extensible_to_tw_pniels(multiples[0], working);
  150. int i,j;
  151. for (i=1; i<NTABLE; i++) {
  152. add_tw_pniels_to_tw_extensible(working, pn);
  153. convert_tw_extensible_to_tw_pniels(multiples[i], working);
  154. }
  155. i = nbits - WINDOW;
  156. int bits = scalar2[i/WORD_BITS] >> (i%WORD_BITS) & WINDOW_MASK,
  157. inv = (bits>>(WINDOW-1))-1;
  158. bits ^= inv;
  159. constant_time_lookup_tw_pniels(pn, multiples, NTABLE, bits & WINDOW_T_MASK);
  160. cond_negate_tw_pniels(pn, inv);
  161. convert_tw_pniels_to_tw_extensible(working, pn);
  162. for (i-=WINDOW; i>=0; i-=WINDOW) {
  163. for (j=0; j<WINDOW; j++) {
  164. double_tw_extensible(working);
  165. }
  166. bits = scalar2[i/WORD_BITS] >> (i%WORD_BITS);
  167. if (i/WORD_BITS < SCALAR_WORDS-1 && i%WORD_BITS >= WORD_BITS-WINDOW) {
  168. bits ^= scalar2[i/WORD_BITS+1] << (WORD_BITS - (i%WORD_BITS));
  169. }
  170. bits &= WINDOW_MASK;
  171. inv = (bits>>(WINDOW-1))-1;
  172. bits ^= inv;
  173. constant_time_lookup_tw_pniels(pn, multiples, NTABLE, bits & WINDOW_T_MASK);
  174. cond_negate_tw_pniels(pn, inv);
  175. add_tw_pniels_to_tw_extensible(working, pn);
  176. }
  177. }
  178. void
  179. scalarmul_vlook (
  180. tw_extensible_a_t working,
  181. const word_t scalar[SCALAR_WORDS]
  182. ) {
  183. const int WINDOW = SCALARMUL_FIXED_WINDOW_SIZE,
  184. WINDOW_MASK = (1<<WINDOW)-1, WINDOW_T_MASK = WINDOW_MASK >> 1,
  185. NTABLE = 1<<(WINDOW-1),
  186. nbits = ROUND_UP(SCALAR_BITS,WINDOW);
  187. word_t scalar2[SCALAR_WORDS];
  188. convert_to_signed_window_form(
  189. scalar2, scalar, SCALAR_WORDS,
  190. SCALARMUL_FIXED_WINDOW_ADJUSTMENT, SCALAR_WORDS
  191. );
  192. tw_extensible_a_t tabulator;
  193. copy_tw_extensible(tabulator, working);
  194. double_tw_extensible(tabulator);
  195. tw_pniels_a_t
  196. pn VECTOR_ALIGNED,
  197. multiples[NTABLE] VECTOR_ALIGNED;
  198. convert_tw_extensible_to_tw_pniels(pn, tabulator);
  199. convert_tw_extensible_to_tw_pniels(multiples[0], working);
  200. int i,j;
  201. for (i=1; i<NTABLE; i++) {
  202. add_tw_pniels_to_tw_extensible(working, pn);
  203. convert_tw_extensible_to_tw_pniels(multiples[i], working);
  204. }
  205. i = nbits - WINDOW;
  206. int bits = scalar2[i/WORD_BITS] >> (i%WORD_BITS) & WINDOW_MASK,
  207. inv = (bits>>(WINDOW-1))-1;
  208. bits ^= inv;
  209. copy_tw_pniels(pn, multiples[bits & WINDOW_T_MASK]);
  210. cond_negate_tw_pniels(pn, inv);
  211. convert_tw_pniels_to_tw_extensible(working, pn);
  212. for (i-=WINDOW; i>=0; i-=WINDOW) {
  213. for (j=0; j<WINDOW; j++) {
  214. double_tw_extensible(working);
  215. }
  216. bits = scalar2[i/WORD_BITS] >> (i%WORD_BITS);
  217. if (i/WORD_BITS < SCALAR_WORDS-1 && i%WORD_BITS >= WORD_BITS-WINDOW) {
  218. bits ^= scalar2[i/WORD_BITS+1] << (WORD_BITS - (i%WORD_BITS));
  219. }
  220. bits &= WINDOW_MASK;
  221. inv = (bits>>(WINDOW-1))-1;
  222. bits ^= inv;
  223. copy_tw_pniels(pn, multiples[bits & WINDOW_T_MASK]);
  224. cond_negate_tw_pniels(pn, inv);
  225. add_tw_pniels_to_tw_extensible(working, pn);
  226. }
  227. }
  228. static mask_t
  229. schedule_scalar_for_combs (
  230. word_t *scalar2,
  231. const word_t *scalar,
  232. unsigned int nbits,
  233. const struct fixed_base_table_t* table
  234. ) {
  235. unsigned int i;
  236. unsigned int n = table->n, t = table->t, s = table->s;
  237. if (n*t*s < nbits || n < 1 || t < 1 || s < 1) {
  238. return MASK_FAILURE;
  239. }
  240. unsigned int scalar_words = (nbits + WORD_BITS - 1)/WORD_BITS,
  241. scalar2_words = scalar_words;
  242. if (scalar2_words < SCALAR_WORDS)
  243. scalar2_words = SCALAR_WORDS;
  244. word_t scalar3[scalar2_words];
  245. /* Copy scalar to scalar3, but clear its high bits (if there are any) */
  246. for (i=0; i<scalar_words; i++) {
  247. scalar3[i] = scalar[i];
  248. }
  249. if (likely(i) && (nbits % WORD_BITS)) {
  250. scalar3[i-1] &= (((word_t)1) << (nbits%WORD_BITS)) - 1;
  251. }
  252. for (; i<scalar2_words; i++) {
  253. scalar3[i] = 0;
  254. }
  255. convert_to_signed_window_form (
  256. scalar2,
  257. scalar3, scalar2_words,
  258. table->scalar_adjustments , SCALAR_WORDS
  259. );
  260. return MASK_SUCCESS;
  261. }
  262. mask_t
  263. scalarmul_fixed_base (
  264. tw_extensible_a_t out,
  265. const word_t scalar[SCALAR_WORDS],
  266. unsigned int nbits,
  267. const struct fixed_base_table_t* table
  268. ) {
  269. unsigned int i,j,k;
  270. unsigned int n = table->n, t = table->t, s = table->s;
  271. unsigned int scalar2_words = (nbits + WORD_BITS - 1)/WORD_BITS;
  272. if (scalar2_words < SCALAR_WORDS) scalar2_words = SCALAR_WORDS;
  273. word_t scalar2[scalar2_words];
  274. mask_t succ = schedule_scalar_for_combs(scalar2, scalar, nbits, table);
  275. if (!succ) return MASK_FAILURE;
  276. #ifdef __clang_analyzer__
  277. assert(t >= 1);
  278. #endif
  279. tw_niels_a_t ni;
  280. for (i=0; i<s; i++) {
  281. if (i) double_tw_extensible(out);
  282. for (j=0; j<n; j++) {
  283. int tab = 0;
  284. /*
  285. * PERF: This computation takes about 1.5µs on SBR, i.e. 2-3% of the
  286. * time of a keygen or sign op. Surely it is possible to speed it up.
  287. */
  288. for (k=0; k<t; k++) {
  289. unsigned int bit = (s-1-i) + k*s + j*(s*t);
  290. if (bit < scalar2_words * WORD_BITS) {
  291. tab |= (scalar2[bit/WORD_BITS] >> (bit%WORD_BITS) & 1) << k;
  292. }
  293. }
  294. mask_t invert = (tab>>(t-1))-1;
  295. tab ^= invert;
  296. tab &= (1<<(t-1)) - 1;
  297. constant_time_lookup_tw_niels(ni, table->table + (j<<(t-1)), 1<<(t-1), tab);
  298. cond_negate_tw_niels(ni, invert);
  299. if (i||j) {
  300. add_tw_niels_to_tw_extensible(out, ni);
  301. } else {
  302. convert_tw_niels_to_tw_extensible(out, ni);
  303. }
  304. }
  305. }
  306. return MASK_SUCCESS;
  307. }
  308. mask_t
  309. linear_combo_combs_vt (
  310. tw_extensible_a_t out,
  311. const word_t scalar1[SCALAR_WORDS],
  312. unsigned int nbits1,
  313. const struct fixed_base_table_t* table1,
  314. const word_t scalar2[SCALAR_WORDS],
  315. unsigned int nbits2,
  316. const struct fixed_base_table_t* table2
  317. ) {
  318. unsigned int i,j,k,sc;
  319. unsigned int s1 = table1->s, s2 = table2->s, smax = (s1 > s2) ? s1 : s2;
  320. unsigned int scalar1b_words = (nbits1 + WORD_BITS - 1)/WORD_BITS;
  321. if (scalar1b_words < SCALAR_WORDS) scalar1b_words = SCALAR_WORDS;
  322. unsigned int scalar2b_words = (nbits2 + WORD_BITS - 1)/WORD_BITS;
  323. if (scalar2b_words < SCALAR_WORDS) scalar2b_words = SCALAR_WORDS;
  324. word_t scalar1b[scalar1b_words], scalar2b[scalar2b_words];
  325. /* Schedule the scalars */
  326. mask_t succ;
  327. succ = schedule_scalar_for_combs(scalar1b, scalar1, nbits1, table1);
  328. if (!succ) return MASK_FAILURE;
  329. succ = schedule_scalar_for_combs(scalar2b, scalar2, nbits2, table2);
  330. if (!succ) return MASK_FAILURE;
  331. #ifdef __clang_analyzer__
  332. assert(table1->t >= 1);
  333. assert(table2->t >= 1);
  334. #endif
  335. tw_niels_a_t ni;
  336. unsigned int swords[2] = {scalar1b_words, scalar2b_words}, started = 0;
  337. word_t *scalars[2] = {scalar1b,scalar2b};
  338. for (i=0; i<smax; i++) {
  339. if (i) double_tw_extensible(out);
  340. for (sc=0; sc<2; sc++) {
  341. const struct fixed_base_table_t* table = sc ? table2 : table1;
  342. int ii = i-smax+table->s;
  343. if (ii < 0) continue;
  344. assert(ii < (int)table->s);
  345. for (j=0; j<table->n; j++) {
  346. int tab = 0;
  347. for (k=0; k<table->t; k++) {
  348. unsigned int bit = (table->s-1-ii) + k*table->s + j*(table->s*table->t);
  349. if (bit < swords[sc] * WORD_BITS) {
  350. tab |= (scalars[sc][bit/WORD_BITS] >> (bit%WORD_BITS) & 1) << k;
  351. }
  352. }
  353. mask_t invert = (tab>>(table->t-1))-1;
  354. tab ^= invert;
  355. tab &= (1<<(table->t-1)) - 1;
  356. copy_tw_niels(ni, table->table[tab + (j<<(table->t-1))]);
  357. cond_negate_tw_niels(ni,invert);
  358. if (started) {
  359. add_tw_niels_to_tw_extensible(out, ni);
  360. } else {
  361. convert_tw_niels_to_tw_extensible(out, ni);
  362. started = 1;
  363. }
  364. }
  365. }
  366. assert(started);
  367. }
  368. return MASK_SUCCESS;
  369. }
  370. mask_t
  371. precompute_fixed_base (
  372. struct fixed_base_table_t* out,
  373. const tw_extensible_a_t base,
  374. unsigned int n,
  375. unsigned int t,
  376. unsigned int s,
  377. tw_niels_a_t *prealloc
  378. ) {
  379. if (s < 1 || t < 1 || n < 1 || n*t*s < SCALAR_BITS) {
  380. really_memset(out, 0, sizeof(*out));
  381. return 0;
  382. }
  383. out->n = n;
  384. out->t = t;
  385. out->s = s;
  386. tw_extensible_a_t working, start;
  387. copy_tw_extensible(working, base);
  388. tw_pniels_a_t pn_tmp;
  389. tw_pniels_a_t *doubles = (tw_pniels_a_t *) malloc_vector(sizeof(*doubles) * (t-1));
  390. field_a_t *zs = (field_a_t *) malloc_vector(sizeof(*zs) * (n<<(t-1)));
  391. field_a_t *zis = (field_a_t *) malloc_vector(sizeof(*zis) * (n<<(t-1)));
  392. tw_niels_a_t *table = prealloc;
  393. if (prealloc) {
  394. out->own_table = 0;
  395. } else {
  396. table = (tw_niels_a_t *) malloc_vector(sizeof(*table) * (n<<(t-1)));
  397. out->own_table = 1;
  398. }
  399. out->table = table;
  400. if (!doubles || !zs || !zis || !table) {
  401. free(doubles);
  402. free(zs);
  403. free(zis);
  404. really_memset(out, 0, sizeof(*out));
  405. really_memset(table, 0, sizeof(*table) * (n<<(t-1)));
  406. if (!prealloc) free(table);
  407. return 0;
  408. }
  409. unsigned int i,j,k;
  410. /* Compute the scalar adjustments, equal to 2^nbits-1 mod q */
  411. unsigned int adjustment_size = (n*t*s)/WORD_BITS + 1;
  412. assert(adjustment_size >= SCALAR_WORDS);
  413. word_t adjustment[adjustment_size];
  414. for (i=0; i<adjustment_size; i++) {
  415. adjustment[i] = -1;
  416. }
  417. adjustment[(n*t*s) / WORD_BITS] += ((word_t)1) << ((n*t*s) % WORD_BITS);
  418. /* The low adjustment is 2^nbits - 1 mod q */
  419. barrett_reduce(adjustment, adjustment_size, 0, &curve_prime_order);
  420. word_t *low_adjustment = &out->scalar_adjustments[(SCALAR_WORDS)*(adjustment[0] & 1)],
  421. *high_adjustment = &out->scalar_adjustments[(SCALAR_WORDS)*((~adjustment[0]) & 1)];
  422. for (i=0; i<SCALAR_WORDS; i++) {
  423. low_adjustment[i] = adjustment[i];
  424. }
  425. /* The high adjustment is low + q = low - q_lo + 2^big */
  426. (void)
  427. sub_nr_ext_packed(
  428. high_adjustment,
  429. adjustment, SCALAR_WORDS,
  430. curve_prime_order.p_lo, curve_prime_order.nwords_lo,
  431. -1
  432. );
  433. if (curve_prime_order.p_shift) {
  434. high_adjustment[curve_prime_order.nwords_p - 1] += ((word_t)1)<<curve_prime_order.p_shift;
  435. }
  436. /* OK, now compute the tables */
  437. for (i=0; i<n; i++) {
  438. /* doubling phase */
  439. for (j=0; j<t; j++) {
  440. if (j) {
  441. convert_tw_extensible_to_tw_pniels(pn_tmp, working);
  442. add_tw_pniels_to_tw_extensible(start, pn_tmp);
  443. } else {
  444. copy_tw_extensible(start, working);
  445. }
  446. if (j==t-1 && i==n-1) {
  447. break;
  448. }
  449. double_tw_extensible(working);
  450. if (j<t-1) {
  451. convert_tw_extensible_to_tw_pniels(doubles[j], working);
  452. }
  453. for (k=0; k<s-1; k++) {
  454. double_tw_extensible(working);
  455. }
  456. }
  457. /* Gray-code phase */
  458. for (j=0;; j++) {
  459. int gray = j ^ (j>>1);
  460. int idx = (((i+1)<<(t-1))-1) ^ gray;
  461. convert_tw_extensible_to_tw_pniels(pn_tmp, start);
  462. copy_tw_niels(table[idx], pn_tmp->n);
  463. field_copy(zs[idx], pn_tmp->z);
  464. if (j >= (1u<<(t-1)) - 1) break;
  465. int delta = (j+1) ^ ((j+1)>>1) ^ gray;
  466. for (k=0; delta>1; k++)
  467. delta >>=1;
  468. if (gray & (1<<k)) {
  469. /* start += doubles[k] */
  470. add_tw_pniels_to_tw_extensible(start, doubles[k]);
  471. } else {
  472. /* start -= doubles[k] */
  473. sub_tw_pniels_from_tw_extensible(start, doubles[k]);
  474. }
  475. }
  476. }
  477. field_simultaneous_invert(zis, zs, n<<(t-1));
  478. field_a_t product;
  479. for (i=0; i<n<<(t-1); i++) {
  480. field_mul(product, table[i]->a, zis[i]);
  481. field_strong_reduce(product);
  482. field_copy(table[i]->a, product);
  483. field_mul(product, table[i]->b, zis[i]);
  484. field_strong_reduce(product);
  485. field_copy(table[i]->b, product);
  486. field_mul(product, table[i]->c, zis[i]);
  487. field_strong_reduce(product);
  488. field_copy(table[i]->c, product);
  489. }
  490. mask_t ret = ~field_is_zero(zis[0]);
  491. free(doubles);
  492. free(zs);
  493. free(zis);
  494. if (unlikely(!ret)) {
  495. really_memset(table, 0, sizeof(*table) * (n<<(t-1)));
  496. if (!prealloc) free(table);
  497. really_memset(out, 0, sizeof(*out));
  498. return 0;
  499. }
  500. return ret;
  501. }
  502. void
  503. destroy_fixed_base (
  504. struct fixed_base_table_t* table
  505. ) {
  506. if (table->table) {
  507. really_memset(table->table,0,sizeof(*table->table)*(table->n<<(table->t-1)));
  508. }
  509. if (table->own_table) {
  510. free(table->table);
  511. }
  512. really_memset(table,0,sizeof(*table));
  513. }
  514. mask_t
  515. precompute_fixed_base_wnaf (
  516. tw_niels_a_t *out,
  517. const tw_extensible_a_t const_base,
  518. unsigned int tbits
  519. ) {
  520. int i;
  521. field_a_t *zs = (field_a_t *) malloc_vector(sizeof(*zs)<<tbits);
  522. field_a_t *zis = (field_a_t *) malloc_vector(sizeof(*zis)<<tbits);
  523. if (!zs || !zis) {
  524. free(zs);
  525. free(zis);
  526. return 0;
  527. }
  528. tw_extensible_a_t base;
  529. copy_tw_extensible(base,const_base);
  530. tw_pniels_a_t twop, tmp;
  531. convert_tw_extensible_to_tw_pniels(tmp, base);
  532. field_copy(zs[0], tmp->z);
  533. copy_tw_niels(out[0], tmp->n);
  534. if (tbits > 0) {
  535. double_tw_extensible(base);
  536. convert_tw_extensible_to_tw_pniels(twop, base);
  537. add_tw_pniels_to_tw_extensible(base, tmp);
  538. convert_tw_extensible_to_tw_pniels(tmp, base);
  539. field_copy(zs[1], tmp->z);
  540. copy_tw_niels(out[1], tmp->n);
  541. for (i=2; i < 1<<tbits; i++) {
  542. add_tw_pniels_to_tw_extensible(base, twop);
  543. convert_tw_extensible_to_tw_pniels(tmp, base);
  544. field_copy(zs[i], tmp->z);
  545. copy_tw_niels(out[i], tmp->n);
  546. }
  547. }
  548. field_simultaneous_invert(zis, zs, 1<<tbits);
  549. field_a_t product;
  550. for (i=0; i<1<<tbits; i++) {
  551. field_mul(product, out[i]->a, zis[i]);
  552. field_strong_reduce(product);
  553. field_copy(out[i]->a, product);
  554. field_mul(product, out[i]->b, zis[i]);
  555. field_strong_reduce(product);
  556. field_copy(out[i]->b, product);
  557. field_mul(product, out[i]->c, zis[i]);
  558. field_strong_reduce(product);
  559. field_copy(out[i]->c, product);
  560. }
  561. free(zs);
  562. free(zis);
  563. return -1;
  564. }
  565. /**
  566. * @cond internal
  567. * Control for variable-time scalar multiply algorithms.
  568. */
  569. struct smvt_control {
  570. int power, addend;
  571. };
  572. static int
  573. recode_wnaf(
  574. struct smvt_control *control, /* [nbits/(tableBits+1) + 3] */
  575. const word_t *scalar,
  576. unsigned int nbits,
  577. unsigned int tableBits)
  578. {
  579. int current = 0, i, j;
  580. unsigned int position = 0;
  581. /* PERF: negate scalar if it's large
  582. * PERF: this is a pretty simplistic algorithm. I'm sure there's a faster one...
  583. */
  584. for (i=nbits-1; i >= 0; i--) {
  585. int bit = (scalar[i/WORD_BITS] >> (i%WORD_BITS)) & 1;
  586. current = 2*current + bit;
  587. /*
  588. * Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0
  589. * So current loses (tableBits+1) bits every time. It otherwise gains
  590. * 1 bit per iteration. The number of iterations is
  591. * (nbits + 2 + tableBits), and an additional control word is added at
  592. * the end. So the total number of control words is at most
  593. * ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2.
  594. * There's also the stopper with power -1, for a total of +3.
  595. */
  596. if (current >= (2<<tableBits) || current <= -1 - (2<<tableBits)) {
  597. int delta = (current + 1) >> 1; /* |delta| < 2^tablebits */
  598. current = -(current & 1);
  599. for (j=i; (delta & 1) == 0; j++) {
  600. delta >>= 1;
  601. }
  602. control[position].power = j+1;
  603. control[position].addend = delta;
  604. position++;
  605. assert(position <= nbits/(tableBits+1) + 2);
  606. }
  607. }
  608. if (current) {
  609. for (j=0; (current & 1) == 0; j++) {
  610. current >>= 1;
  611. }
  612. control[position].power = j;
  613. control[position].addend = current;
  614. position++;
  615. assert(position <= nbits/(tableBits+1) + 2);
  616. }
  617. control[position].power = -1;
  618. control[position].addend = 0;
  619. return position;
  620. }
  621. static void
  622. prepare_wnaf_table(
  623. tw_pniels_a_t *output,
  624. tw_extensible_a_t working,
  625. unsigned int tbits
  626. ) {
  627. int i;
  628. convert_tw_extensible_to_tw_pniels(output[0], working);
  629. if (tbits == 0) return;
  630. double_tw_extensible(working);
  631. tw_pniels_a_t twop;
  632. convert_tw_extensible_to_tw_pniels(twop, working);
  633. add_tw_pniels_to_tw_extensible(working, output[0]);
  634. convert_tw_extensible_to_tw_pniels(output[1], working);
  635. for (i=2; i < 1<<tbits; i++) {
  636. add_tw_pniels_to_tw_extensible(working, twop);
  637. convert_tw_extensible_to_tw_pniels(output[i], working);
  638. }
  639. }
  640. void
  641. scalarmul_vt (
  642. tw_extensible_a_t working,
  643. const word_t scalar[SCALAR_WORDS],
  644. unsigned int nbits
  645. ) {
  646. const int table_bits = SCALARMUL_WNAF_TABLE_BITS;
  647. struct smvt_control control[nbits/(table_bits+1)+3];
  648. int control_bits = recode_wnaf(control, scalar, nbits, table_bits);
  649. tw_pniels_a_t precmp[1<<table_bits];
  650. prepare_wnaf_table(precmp, working, table_bits);
  651. if (control_bits > 0) {
  652. assert(control[0].addend > 0);
  653. assert(control[0].power >= 0);
  654. convert_tw_pniels_to_tw_extensible(working, precmp[control[0].addend >> 1]);
  655. } else {
  656. set_identity_tw_extensible(working);
  657. return;
  658. }
  659. int conti = 1, i;
  660. for (i = control[0].power - 1; i >= 0; i--) {
  661. double_tw_extensible(working);
  662. if (i == control[conti].power) {
  663. assert(control[conti].addend);
  664. if (control[conti].addend > 0) {
  665. add_tw_pniels_to_tw_extensible(working, precmp[control[conti].addend >> 1]);
  666. } else {
  667. sub_tw_pniels_from_tw_extensible(working, precmp[(-control[conti].addend) >> 1]);
  668. }
  669. conti++;
  670. assert(conti <= control_bits);
  671. }
  672. }
  673. }
  674. void
  675. scalarmul_fixed_base_wnaf_vt (
  676. tw_extensible_a_t working,
  677. const word_t scalar[SCALAR_WORDS],
  678. unsigned int nbits,
  679. const tw_niels_a_t *precmp,
  680. unsigned int table_bits
  681. ) {
  682. struct smvt_control control[nbits/(table_bits+1)+3];
  683. int control_bits = recode_wnaf(control, scalar, nbits, table_bits);
  684. if (control_bits > 0) {
  685. assert(control[0].addend > 0);
  686. assert(control[0].power >= 0);
  687. convert_tw_niels_to_tw_extensible(working, precmp[control[0].addend >> 1]);
  688. } else {
  689. set_identity_tw_extensible(working);
  690. return;
  691. }
  692. int conti = 1, i;
  693. for (; control[conti].power >= 0; conti++) {
  694. assert(conti <= control_bits);
  695. for (i = control[conti-1].power - control[conti].power; i; i--) {
  696. double_tw_extensible(working);
  697. }
  698. assert(control[conti].addend);
  699. if (control[conti].addend > 0) {
  700. add_tw_niels_to_tw_extensible(working, precmp[control[conti].addend >> 1]);
  701. } else {
  702. sub_tw_niels_from_tw_extensible(working, precmp[(-control[conti].addend) >> 1]);
  703. }
  704. }
  705. for (i = control[conti-1].power; i; i--) {
  706. double_tw_extensible(working);
  707. }
  708. }
  709. void
  710. linear_combo_var_fixed_vt(
  711. tw_extensible_a_t working,
  712. const word_t scalar_var[SCALAR_WORDS],
  713. unsigned int nbits_var,
  714. const word_t scalar_pre[SCALAR_WORDS],
  715. unsigned int nbits_pre,
  716. const tw_niels_a_t *precmp,
  717. unsigned int table_bits_pre
  718. ) {
  719. const int table_bits_var = SCALARMUL_WNAF_COMBO_TABLE_BITS;
  720. struct smvt_control control_var[nbits_var/(table_bits_var+1)+3];
  721. struct smvt_control control_pre[nbits_pre/(table_bits_pre+1)+3];
  722. int ncb_var = recode_wnaf(control_var, scalar_var, nbits_var, table_bits_var);
  723. int ncb_pre = recode_wnaf(control_pre, scalar_pre, nbits_pre, table_bits_pre);
  724. (void)ncb_var;
  725. (void)ncb_pre;
  726. tw_pniels_a_t precmp_var[1<<table_bits_var];
  727. prepare_wnaf_table(precmp_var, working, table_bits_var);
  728. int contp=0, contv=0, i;
  729. i = control_var[0].power;
  730. if (i > control_pre[0].power) {
  731. convert_tw_pniels_to_tw_extensible(working, precmp_var[control_var[0].addend >> 1]);
  732. contv++;
  733. } else if (i == control_pre[0].power && i >=0 ) {
  734. convert_tw_pniels_to_tw_extensible(working, precmp_var[control_var[0].addend >> 1]);
  735. add_tw_niels_to_tw_extensible(working, precmp[control_pre[0].addend >> 1]);
  736. contv++; contp++;
  737. } else {
  738. i = control_pre[0].power;
  739. convert_tw_niels_to_tw_extensible(working, precmp[control_pre[0].addend >> 1]);
  740. contp++;
  741. }
  742. if (i < 0) {
  743. set_identity_tw_extensible(working);
  744. return;
  745. }
  746. for (i--; i >= 0; i--) {
  747. double_tw_extensible(working);
  748. if (i == control_var[contv].power) {
  749. assert(control_var[contv].addend);
  750. if (control_var[contv].addend > 0) {
  751. add_tw_pniels_to_tw_extensible(working, precmp_var[control_var[contv].addend >> 1]);
  752. } else {
  753. sub_tw_pniels_from_tw_extensible(working, precmp_var[(-control_var[contv].addend) >> 1]);
  754. }
  755. contv++;
  756. }
  757. if (i == control_pre[contp].power) {
  758. assert(control_pre[contp].addend);
  759. if (control_pre[contp].addend > 0) {
  760. add_tw_niels_to_tw_extensible(working, precmp[control_pre[contp].addend >> 1]);
  761. } else {
  762. sub_tw_niels_from_tw_extensible(working, precmp[(-control_pre[contp].addend) >> 1]);
  763. }
  764. contp++;
  765. }
  766. }
  767. assert(contv == ncb_var);
  768. assert(contp == ncb_pre);
  769. }