You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

405 lines
7.3 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #ifndef __P448_H__
  5. #define __P448_H__ 1
  6. #include <stdint.h>
  7. #include <assert.h>
  8. #include "word.h"
  9. typedef struct p448_t {
  10. uint64_t limb[8];
  11. } __attribute__((aligned(32))) p448_t;
  12. #ifdef __cplusplus
  13. extern "C" {
  14. #endif
  15. static __inline__ void
  16. p448_set_ui (
  17. p448_t *out,
  18. uint64_t x
  19. ) __attribute__((unused,always_inline));
  20. static __inline__ void
  21. p448_cond_swap (
  22. p448_t *a,
  23. p448_t *b,
  24. mask_t do_swap
  25. ) __attribute__((unused,always_inline));
  26. static __inline__ void
  27. p448_add (
  28. p448_t *out,
  29. const p448_t *a,
  30. const p448_t *b
  31. ) __attribute__((unused,always_inline));
  32. static __inline__ void
  33. p448_sub (
  34. p448_t *out,
  35. const p448_t *a,
  36. const p448_t *b
  37. ) __attribute__((unused,always_inline));
  38. static __inline__ void
  39. p448_neg (
  40. p448_t *out,
  41. const p448_t *a
  42. ) __attribute__((unused,always_inline));
  43. static __inline__ void
  44. p448_cond_neg (
  45. p448_t *a,
  46. mask_t doNegate
  47. ) __attribute__((unused,always_inline));
  48. static __inline__ void
  49. p448_addw (
  50. p448_t *a,
  51. uint64_t x
  52. ) __attribute__((unused,always_inline));
  53. static __inline__ void
  54. p448_subw (
  55. p448_t *a,
  56. uint64_t x
  57. ) __attribute__((unused,always_inline));
  58. static __inline__ void
  59. p448_copy (
  60. p448_t *out,
  61. const p448_t *a
  62. ) __attribute__((unused,always_inline));
  63. static __inline__ void
  64. p448_weak_reduce (
  65. p448_t *inout
  66. ) __attribute__((unused,always_inline));
  67. void
  68. p448_strong_reduce (
  69. p448_t *inout
  70. );
  71. mask_t
  72. p448_is_zero (
  73. const p448_t *in
  74. );
  75. static
  76. #ifndef GCC_HAS_A_BUG_SO_DONT_INLINE_FIELD_BIAS
  77. __inline__
  78. #endif
  79. void
  80. p448_bias (
  81. p448_t *inout,
  82. int amount
  83. )
  84. #ifdef GCC_HAS_A_BUG_SO_DONT_INLINE_FIELD_BIAS
  85. __attribute__((unused,noinline,optimize("O1")))
  86. #else
  87. __attribute__((unused,always_inline))
  88. #endif
  89. ;
  90. void
  91. p448_mul (
  92. p448_t *__restrict__ out,
  93. const p448_t *a,
  94. const p448_t *b
  95. );
  96. void
  97. p448_mulw (
  98. p448_t *__restrict__ out,
  99. const p448_t *a,
  100. uint64_t b
  101. );
  102. void
  103. p448_sqr (
  104. p448_t *__restrict__ out,
  105. const p448_t *a
  106. );
  107. static __inline__ void
  108. p448_sqrn (
  109. p448_t *__restrict__ y,
  110. const p448_t *x,
  111. int n
  112. ) __attribute__((unused,always_inline));
  113. void
  114. p448_serialize (
  115. uint8_t *serial,
  116. const struct p448_t *x
  117. );
  118. mask_t
  119. p448_deserialize (
  120. p448_t *x,
  121. const uint8_t serial[56]
  122. );
  123. static __inline__ void
  124. p448_mask(
  125. struct p448_t *a,
  126. const struct p448_t *b,
  127. mask_t mask
  128. ) __attribute__((unused,always_inline));
  129. /**
  130. * Returns 1/x.
  131. *
  132. * If x=0, returns 0.
  133. */
  134. void
  135. p448_inverse (
  136. struct p448_t* a,
  137. const struct p448_t* x
  138. );
  139. void
  140. simultaneous_invert_p448 (
  141. struct p448_t *__restrict__ out,
  142. const struct p448_t *in,
  143. unsigned int n
  144. );
  145. static inline mask_t
  146. p448_eq (
  147. const struct p448_t *a,
  148. const struct p448_t *b
  149. ) __attribute__((always_inline,unused));
  150. /* -------------- Inline functions begin here -------------- */
  151. void
  152. p448_set_ui (
  153. p448_t *out,
  154. uint64_t x
  155. ) {
  156. int i;
  157. out->limb[0] = x;
  158. for (i=1; i<8; i++) {
  159. out->limb[i] = 0;
  160. }
  161. }
  162. void
  163. p448_cond_swap (
  164. p448_t *a,
  165. p448_t *b,
  166. mask_t doswap
  167. ) {
  168. big_register_t *aa = (big_register_t*)a;
  169. big_register_t *bb = (big_register_t*)b;
  170. big_register_t m = br_set_to_mask(doswap);
  171. unsigned int i;
  172. for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
  173. big_register_t x = m & (aa[i]^bb[i]);
  174. aa[i] ^= x;
  175. bb[i] ^= x;
  176. }
  177. }
  178. void
  179. p448_add (
  180. p448_t *out,
  181. const p448_t *a,
  182. const p448_t *b
  183. ) {
  184. unsigned int i;
  185. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  186. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
  187. }
  188. /*
  189. unsigned int i;
  190. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  191. out->limb[i] = a->limb[i] + b->limb[i];
  192. }
  193. */
  194. }
  195. void
  196. p448_sub (
  197. p448_t *out,
  198. const p448_t *a,
  199. const p448_t *b
  200. ) {
  201. unsigned int i;
  202. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  203. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
  204. }
  205. /*
  206. unsigned int i;
  207. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  208. out->limb[i] = a->limb[i] - b->limb[i];
  209. }
  210. */
  211. }
  212. void
  213. p448_neg (
  214. struct p448_t *out,
  215. const p448_t *a
  216. ) {
  217. unsigned int i;
  218. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  219. ((uint64xn_t*)out)[i] = -((const uint64xn_t*)a)[i];
  220. }
  221. /*
  222. unsigned int i;
  223. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  224. out->limb[i] = -a->limb[i];
  225. }
  226. */
  227. }
  228. void
  229. p448_cond_neg(
  230. struct p448_t *a,
  231. mask_t doNegate
  232. ) {
  233. unsigned int i;
  234. struct p448_t negated;
  235. big_register_t *aa = (big_register_t *)a;
  236. big_register_t *nn = (big_register_t*)&negated;
  237. big_register_t m = br_set_to_mask(doNegate);
  238. p448_neg(&negated, a);
  239. p448_bias(&negated, 2);
  240. for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
  241. aa[i] = (aa[i] & ~m) | (nn[i] & m);
  242. }
  243. }
  244. void
  245. p448_addw (
  246. p448_t *a,
  247. uint64_t x
  248. ) {
  249. a->limb[0] += x;
  250. }
  251. void
  252. p448_subw (
  253. p448_t *a,
  254. uint64_t x
  255. ) {
  256. a->limb[0] -= x;
  257. }
  258. void
  259. p448_copy (
  260. p448_t *out,
  261. const p448_t *a
  262. ) {
  263. unsigned int i;
  264. for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) {
  265. ((big_register_t *)out)[i] = ((const big_register_t *)a)[i];
  266. }
  267. }
  268. void
  269. p448_bias (
  270. p448_t *a,
  271. int amt
  272. ) {
  273. uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
  274. #if __AVX2__
  275. uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
  276. uint64x4_t *aa = (uint64x4_t*) a;
  277. aa[0] += lo;
  278. aa[1] += hi;
  279. #elif __SSE2__
  280. uint64x2_t lo = {co1,co1}, hi = {co2,co1};
  281. uint64x2_t *aa = (uint64x2_t*) a;
  282. aa[0] += lo;
  283. aa[1] += lo;
  284. aa[2] += hi;
  285. aa[3] += lo;
  286. #else
  287. unsigned int i;
  288. for (i=0; i<sizeof(*a)/sizeof(uint64_t); i++) {
  289. a->limb[i] += (i==4) ? co2 : co1;
  290. }
  291. #endif
  292. }
  293. void
  294. p448_weak_reduce (
  295. p448_t *a
  296. ) {
  297. /* PERF: use pshufb/palignr if anyone cares about speed of this */
  298. uint64_t mask = (1ull<<56) - 1;
  299. uint64_t tmp = a->limb[7] >> 56;
  300. int i;
  301. a->limb[4] += tmp;
  302. for (i=7; i>0; i--) {
  303. a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
  304. }
  305. a->limb[0] = (a->limb[0] & mask) + tmp;
  306. }
  307. void
  308. p448_sqrn (
  309. p448_t *__restrict__ y,
  310. const p448_t *x,
  311. int n
  312. ) {
  313. p448_t tmp;
  314. assert(n>0);
  315. if (n&1) {
  316. p448_sqr(y,x);
  317. n--;
  318. } else {
  319. p448_sqr(&tmp,x);
  320. p448_sqr(y,&tmp);
  321. n-=2;
  322. }
  323. for (; n; n-=2) {
  324. p448_sqr(&tmp,y);
  325. p448_sqr(y,&tmp);
  326. }
  327. }
  328. mask_t
  329. p448_eq (
  330. const struct p448_t *a,
  331. const struct p448_t *b
  332. ) {
  333. struct p448_t ra, rb;
  334. p448_copy(&ra, a);
  335. p448_copy(&rb, b);
  336. p448_weak_reduce(&ra);
  337. p448_weak_reduce(&rb);
  338. p448_sub(&ra, &ra, &rb);
  339. p448_bias(&ra, 2);
  340. return p448_is_zero(&ra);
  341. }
  342. void
  343. p448_mask (
  344. struct p448_t *a,
  345. const struct p448_t *b,
  346. mask_t mask
  347. ) {
  348. unsigned int i;
  349. for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) {
  350. a->limb[i] = b->limb[i] & mask;
  351. }
  352. }
  353. #ifdef __cplusplus
  354. }; /* extern "C" */
  355. #endif
  356. #endif /* __P448_H__ */