You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

251 lines
4.3 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #ifndef __P521_H__
  5. #define __P521_H__ 1
  6. #include <stdint.h>
  7. #include <assert.h>
  8. #include <string.h>
  9. #include "word.h"
  10. #include "constant_time.h"
  11. #define LIMBPERM(x) (((x)%3)*4 + (x)/3)
  12. #define USE_P521_3x3_TRANSPOSE
  13. typedef struct p521_t {
  14. uint64_t limb[12];
  15. } __attribute__((aligned(32))) p521_t;
  16. #ifdef __cplusplus
  17. extern "C" {
  18. #endif
  19. static __inline__ void
  20. p521_set_ui (
  21. p521_t *out,
  22. uint64_t x
  23. ) __attribute__((unused));
  24. static __inline__ void
  25. p521_add_RAW (
  26. p521_t *out,
  27. const p521_t *a,
  28. const p521_t *b
  29. ) __attribute__((unused));
  30. static __inline__ void
  31. p521_sub_RAW (
  32. p521_t *out,
  33. const p521_t *a,
  34. const p521_t *b
  35. ) __attribute__((unused));
  36. static __inline__ void
  37. p521_neg_RAW (
  38. p521_t *out,
  39. const p521_t *a
  40. ) __attribute__((unused));
  41. static __inline__ void
  42. p521_addw (
  43. p521_t *a,
  44. uint64_t x
  45. ) __attribute__((unused));
  46. static __inline__ void
  47. p521_subw (
  48. p521_t *a,
  49. uint64_t x
  50. ) __attribute__((unused));
  51. static __inline__ void
  52. p521_copy (
  53. p521_t *out,
  54. const p521_t *a
  55. ) __attribute__((unused));
  56. static __inline__ void
  57. p521_weak_reduce (
  58. p521_t *inout
  59. ) __attribute__((unused));
  60. void
  61. p521_strong_reduce (
  62. p521_t *inout
  63. );
  64. mask_t
  65. p521_is_zero (
  66. const p521_t *in
  67. );
  68. static __inline__ void
  69. p521_bias (
  70. p521_t *inout,
  71. int amount
  72. ) __attribute__((unused));
  73. void
  74. p521_mul (
  75. p521_t *__restrict__ out,
  76. const p521_t *a,
  77. const p521_t *b
  78. );
  79. void
  80. p521_mulw (
  81. p521_t *__restrict__ out,
  82. const p521_t *a,
  83. uint64_t b
  84. );
  85. void
  86. p521_sqr (
  87. p521_t *__restrict__ out,
  88. const p521_t *a
  89. );
  90. void
  91. p521_serialize (
  92. uint8_t *serial,
  93. const struct p521_t *x
  94. );
  95. mask_t
  96. p521_deserialize (
  97. p521_t *x,
  98. const uint8_t serial[66]
  99. );
  100. /* -------------- Inline functions begin here -------------- */
  101. typedef uint64x4_t uint64x3_t; /* fit it in a vector register */
  102. static const uint64x3_t mask58 = { (1ull<<58) - 1, (1ull<<58) - 1, (1ull<<58) - 1, 0 };
  103. /* Currently requires CLANG. Sorry. */
  104. static inline uint64x3_t
  105. __attribute__((unused))
  106. timesW (
  107. uint64x3_t u
  108. ) {
  109. return u.zxyw + u.zwww;
  110. }
  111. void
  112. p521_set_ui (
  113. p521_t *out,
  114. uint64_t x
  115. ) {
  116. int i;
  117. out->limb[0] = x;
  118. for (i=1; i<12; i++) {
  119. out->limb[i] = 0;
  120. }
  121. }
  122. void
  123. p521_add_RAW (
  124. p521_t *out,
  125. const p521_t *a,
  126. const p521_t *b
  127. ) {
  128. unsigned int i;
  129. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  130. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
  131. }
  132. }
  133. void
  134. p521_sub_RAW (
  135. p521_t *out,
  136. const p521_t *a,
  137. const p521_t *b
  138. ) {
  139. unsigned int i;
  140. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  141. ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
  142. }
  143. }
  144. void
  145. p521_neg_RAW (
  146. struct p521_t *out,
  147. const p521_t *a
  148. ) {
  149. unsigned int i;
  150. for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
  151. ((uint64xn_t*)out)[i] = -((const uint64xn_t*)a)[i];
  152. }
  153. }
  154. void
  155. p521_addw (
  156. p521_t *a,
  157. uint64_t x
  158. ) {
  159. a->limb[0] += x;
  160. }
  161. void
  162. p521_subw (
  163. p521_t *a,
  164. uint64_t x
  165. ) {
  166. a->limb[0] -= x;
  167. }
  168. void
  169. p521_copy (
  170. p521_t *out,
  171. const p521_t *a
  172. ) {
  173. memcpy(out,a,sizeof(*a));
  174. }
  175. void
  176. p521_bias (
  177. p521_t *a,
  178. int amt
  179. ) {
  180. uint64_t co0 = ((1ull<<58)-2)*amt, co1 = ((1ull<<58)-1)*amt;
  181. uint64x4_t vlo = { co0, co1, co1, 0 }, vhi = { co1, co1, co1, 0 };
  182. ((uint64x4_t*)a)[0] += vlo;
  183. ((uint64x4_t*)a)[1] += vhi;
  184. ((uint64x4_t*)a)[2] += vhi;
  185. }
  186. void
  187. p521_weak_reduce (
  188. p521_t *a
  189. ) {
  190. #if 0
  191. int i;
  192. assert(a->limb[3] == 0 && a->limb[7] == 0 && a->limb[11] == 0);
  193. for (i=0; i<12; i++) {
  194. assert(a->limb[i] < 3ull<<61);
  195. }
  196. #endif
  197. uint64x3_t
  198. ot0 = ((uint64x4_t*)a)[0],
  199. ot1 = ((uint64x4_t*)a)[1],
  200. ot2 = ((uint64x4_t*)a)[2];
  201. uint64x3_t out0 = (ot0 & mask58) + timesW(ot2>>58);
  202. uint64x3_t out1 = (ot1 & mask58) + (ot0>>58);
  203. uint64x3_t out2 = (ot2 & mask58) + (ot1>>58);
  204. ((uint64x4_t*)a)[0] = out0;
  205. ((uint64x4_t*)a)[1] = out1;
  206. ((uint64x4_t*)a)[2] = out2;
  207. }
  208. #ifdef __cplusplus
  209. }; /* extern "C" */
  210. #endif
  211. #endif /* __P521_H__ */