You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

240 lines
4.1 KiB

  1. /* Copyright (c) 2014 Cryptography Research, Inc.
  2. * Released under the MIT License. See LICENSE.txt for license information.
  3. */
  4. #ifndef __P448_H__
  5. #define __P448_H__ 1
  6. #include "word.h"
  7. #include <stdint.h>
  8. #include <assert.h>
  9. typedef struct p448_t {
  10. uint32_t limb[16];
  11. } __attribute__((aligned(32))) p448_t;
  12. #define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15)
  13. #define USE_NEON_PERM 1
  14. #ifdef __cplusplus
  15. extern "C" {
  16. #endif
  17. static __inline__ void
  18. p448_set_ui (
  19. p448_t *out,
  20. uint64_t x
  21. ) __attribute__((unused,always_inline));
  22. static __inline__ void
  23. p448_add_RAW (
  24. p448_t *out,
  25. const p448_t *a,
  26. const p448_t *b
  27. ) __attribute__((unused,always_inline));
  28. static __inline__ void
  29. p448_sub_RAW (
  30. p448_t *out,
  31. const p448_t *a,
  32. const p448_t *b
  33. ) __attribute__((unused,always_inline));
  34. static __inline__ void
  35. p448_neg_RAW (
  36. p448_t *out,
  37. const p448_t *a
  38. ) __attribute__((unused,always_inline));
  39. static __inline__ void
  40. p448_addw (
  41. p448_t *a,
  42. uint32_t x
  43. ) __attribute__((unused,always_inline));
  44. static __inline__ void
  45. p448_subw (
  46. p448_t *a,
  47. uint32_t x
  48. ) __attribute__((unused,always_inline));
  49. static __inline__ void
  50. p448_copy (
  51. p448_t *out,
  52. const p448_t *a
  53. ) __attribute__((unused,always_inline));
  54. static __inline__ void
  55. p448_weak_reduce (
  56. p448_t *inout
  57. ) __attribute__((unused,always_inline));
  58. void
  59. p448_strong_reduce (
  60. p448_t *inout
  61. );
  62. mask_t
  63. p448_is_zero (
  64. const p448_t *in
  65. );
  66. static __inline__ void
  67. p448_bias (
  68. p448_t *inout,
  69. int amount
  70. ) __attribute__((unused,always_inline));
  71. void
  72. p448_mul (
  73. p448_t *__restrict__ out,
  74. const p448_t *a,
  75. const p448_t *b
  76. );
  77. void
  78. p448_mulw (
  79. p448_t *__restrict__ out,
  80. const p448_t *a,
  81. uint64_t b
  82. );
  83. void
  84. p448_sqr (
  85. p448_t *__restrict__ out,
  86. const p448_t *a
  87. );
  88. void
  89. p448_serialize (
  90. uint8_t *serial,
  91. const struct p448_t *x
  92. );
  93. mask_t
  94. p448_deserialize (
  95. p448_t *x,
  96. const uint8_t serial[56]
  97. );
  98. /* -------------- Inline functions begin here -------------- */
  99. void
  100. p448_set_ui (
  101. p448_t *out,
  102. uint64_t x
  103. ) {
  104. int i;
  105. for (i=0; i<16; i++) {
  106. out->limb[i] = 0;
  107. }
  108. out->limb[0] = x & ((1<<28)-1);
  109. out->limb[2] = x>>28;
  110. }
  111. void
  112. p448_add_RAW (
  113. p448_t *out,
  114. const p448_t *a,
  115. const p448_t *b
  116. ) {
  117. unsigned int i;
  118. for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
  119. ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
  120. }
  121. }
  122. void
  123. p448_sub_RAW (
  124. p448_t *out,
  125. const p448_t *a,
  126. const p448_t *b
  127. ) {
  128. unsigned int i;
  129. for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
  130. ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
  131. }
  132. /*
  133. unsigned int i;
  134. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  135. out->limb[i] = a->limb[i] - b->limb[i];
  136. }
  137. */
  138. }
  139. void
  140. p448_neg_RAW (
  141. p448_t *out,
  142. const p448_t *a
  143. ) {
  144. unsigned int i;
  145. for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
  146. ((uint32xn_t*)out)[i] = -((const uint32xn_t*)a)[i];
  147. }
  148. /*
  149. unsigned int i;
  150. for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
  151. out->limb[i] = -a->limb[i];
  152. }
  153. */
  154. }
  155. void
  156. p448_addw (
  157. p448_t *a,
  158. uint32_t x
  159. ) {
  160. a->limb[0] += x;
  161. }
  162. void
  163. p448_subw (
  164. p448_t *a,
  165. uint32_t x
  166. ) {
  167. a->limb[0] -= x;
  168. }
  169. void
  170. p448_copy (
  171. p448_t *out,
  172. const p448_t *a
  173. ) {
  174. *out = *a;
  175. }
  176. void
  177. p448_bias (
  178. p448_t *a,
  179. int amt
  180. ) {
  181. uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
  182. uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1};
  183. uint32x4_t *aa = (uint32x4_t*) a;
  184. aa[0] += lo;
  185. aa[1] += hi;
  186. aa[2] += hi;
  187. aa[3] += hi;
  188. }
  189. void
  190. p448_weak_reduce (
  191. p448_t *a
  192. ) {
  193. uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1},
  194. tmp = vshr_n_u32(aa[7],28);
  195. int i;
  196. for (i=7; i>=1; i--) {
  197. aa[i] = vsra_n_u32(aa[i] & vmask, aa[i-1], 28);
  198. }
  199. aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2);
  200. }
  201. #ifdef __cplusplus
  202. }; /* extern "C" */
  203. #endif
  204. #endif /* __P448_H__ */