Continuing demagication and factoring of field code. Removing high-level ops from p448.h and putting them in field.h. That way they won't need rewriting for new fields and architectures. Create constant_time.h which contains constant-time lookups, condswaps, etc. That way the code is the same on all architectures, instead of varying depending on whether the field size is a multiple of the vector register size. I should still add a constant_time_select to factor out field_cond_negate. TODO: I need to test this for correctness and performance on various platforms. It works on my Mac, but since Yosemite the timing is totally unpredictable (background tasks? variable boost?).master
| @@ -22,13 +22,6 @@ p448_set_ui ( | |||||
| p448_t *out, | p448_t *out, | ||||
| uint64_t x | uint64_t x | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| static __inline__ void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t do_swap | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -114,13 +107,6 @@ p448_sqr ( | |||||
| p448_t *__restrict__ out, | p448_t *__restrict__ out, | ||||
| const p448_t *a | const p448_t *a | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) __attribute__((unused,always_inline)); | |||||
| void | void | ||||
| p448_serialize ( | p448_serialize ( | ||||
| @@ -133,24 +119,6 @@ p448_deserialize ( | |||||
| p448_t *x, | p448_t *x, | ||||
| const uint8_t serial[56] | const uint8_t serial[56] | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_mask( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) __attribute__((unused,always_inline)); | |||||
| /** | |||||
| * Returns 1/x. | |||||
| * | |||||
| * If x=0, returns 0. | |||||
| */ | |||||
| void | |||||
| p448_inverse ( | |||||
| struct p448_t* a, | |||||
| const struct p448_t* x | |||||
| ); | |||||
| static inline mask_t | static inline mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| @@ -172,24 +140,6 @@ p448_set_ui ( | |||||
| out->limb[i] = 0; | out->limb[i] = 0; | ||||
| } | } | ||||
| } | } | ||||
| void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t doswap | |||||
| ) { | |||||
| big_register_t *aa = (big_register_t*)a; | |||||
| big_register_t *bb = (big_register_t*)b; | |||||
| big_register_t m = br_set_to_mask(doswap); | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||||
| big_register_t x = m & (aa[i]^bb[i]); | |||||
| aa[i] ^= x; | |||||
| bb[i] ^= x; | |||||
| } | |||||
| } | |||||
| void | void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -315,28 +265,6 @@ p448_weak_reduce ( | |||||
| a->limb[0] = (a->limb[0] & mask) + tmp; | a->limb[0] = (a->limb[0] & mask) + tmp; | ||||
| } | } | ||||
| void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) { | |||||
| p448_t tmp; | |||||
| assert(n>0); | |||||
| if (n&1) { | |||||
| p448_sqr(y,x); | |||||
| n--; | |||||
| } else { | |||||
| p448_sqr(&tmp,x); | |||||
| p448_sqr(y,&tmp); | |||||
| n-=2; | |||||
| } | |||||
| for (; n; n-=2) { | |||||
| p448_sqr(&tmp,y); | |||||
| p448_sqr(y,&tmp); | |||||
| } | |||||
| } | |||||
| mask_t | mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| const struct p448_t *a, | const struct p448_t *a, | ||||
| @@ -352,18 +280,6 @@ p448_eq ( | |||||
| return p448_is_zero(&ra); | return p448_is_zero(&ra); | ||||
| } | } | ||||
| void | |||||
| p448_mask ( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) { | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||||
| a->limb[i] = b->limb[i] & mask; | |||||
| } | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| }; /* extern "C" */ | }; /* extern "C" */ | ||||
| #endif | #endif | ||||
| @@ -22,13 +22,6 @@ p448_set_ui ( | |||||
| p448_t *out, | p448_t *out, | ||||
| uint64_t x | uint64_t x | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| static __inline__ void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t do_swap | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -114,13 +107,6 @@ p448_sqr ( | |||||
| p448_t *__restrict__ out, | p448_t *__restrict__ out, | ||||
| const p448_t *a | const p448_t *a | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) __attribute__((unused,always_inline)); | |||||
| void | void | ||||
| p448_serialize ( | p448_serialize ( | ||||
| @@ -133,24 +119,6 @@ p448_deserialize ( | |||||
| p448_t *x, | p448_t *x, | ||||
| const uint8_t serial[56] | const uint8_t serial[56] | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_mask( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) __attribute__((unused,always_inline)); | |||||
| /** | |||||
| * Returns 1/x. | |||||
| * | |||||
| * If x=0, returns 0. | |||||
| */ | |||||
| void | |||||
| p448_inverse ( | |||||
| struct p448_t* a, | |||||
| const struct p448_t* x | |||||
| ); | |||||
| static inline mask_t | static inline mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| @@ -172,28 +140,6 @@ p448_set_ui ( | |||||
| out->limb[i] = 0; | out->limb[i] = 0; | ||||
| } | } | ||||
| } | } | ||||
| void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t doswap | |||||
| ) { | |||||
| big_register_t *aa = (big_register_t*)a; | |||||
| big_register_t *bb = (big_register_t*)b; | |||||
| #if __ARM_NEON__ | |||||
| big_register_t m = vdupq_n_u32(doswap); | |||||
| #else | |||||
| big_register_t m = doswap; | |||||
| #endif | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||||
| big_register_t x = m & (aa[i]^bb[i]); | |||||
| aa[i] ^= x; | |||||
| bb[i] ^= x; | |||||
| } | |||||
| } | |||||
| void | void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -323,28 +269,6 @@ p448_weak_reduce ( | |||||
| a->limb[0] = (a->limb[0] & mask) + tmp; | a->limb[0] = (a->limb[0] & mask) + tmp; | ||||
| } | } | ||||
| void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) { | |||||
| p448_t tmp; | |||||
| assert(n>0); | |||||
| if (n&1) { | |||||
| p448_sqr(y,x); | |||||
| n--; | |||||
| } else { | |||||
| p448_sqr(&tmp,x); | |||||
| p448_sqr(y,&tmp); | |||||
| n-=2; | |||||
| } | |||||
| for (; n; n-=2) { | |||||
| p448_sqr(&tmp,y); | |||||
| p448_sqr(y,&tmp); | |||||
| } | |||||
| } | |||||
| mask_t | mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| const struct p448_t *a, | const struct p448_t *a, | ||||
| @@ -360,18 +284,6 @@ p448_eq ( | |||||
| return p448_is_zero(&ra); | return p448_is_zero(&ra); | ||||
| } | } | ||||
| void | |||||
| p448_mask ( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) { | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||||
| a->limb[i] = b->limb[i] & mask; | |||||
| } | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| }; /* extern "C" */ | }; /* extern "C" */ | ||||
| #endif | #endif | ||||
| @@ -22,13 +22,6 @@ p448_set_ui ( | |||||
| p448_t *out, | p448_t *out, | ||||
| uint64_t x | uint64_t x | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| static __inline__ void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t do_swap | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -114,13 +107,6 @@ p448_sqr ( | |||||
| p448_t *__restrict__ out, | p448_t *__restrict__ out, | ||||
| const p448_t *a | const p448_t *a | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) __attribute__((unused,always_inline)); | |||||
| void | void | ||||
| p448_serialize ( | p448_serialize ( | ||||
| @@ -133,24 +119,6 @@ p448_deserialize ( | |||||
| p448_t *x, | p448_t *x, | ||||
| const uint8_t serial[56] | const uint8_t serial[56] | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_mask( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) __attribute__((unused,always_inline)); | |||||
| /** | |||||
| * Returns 1/x. | |||||
| * | |||||
| * If x=0, returns 0. | |||||
| */ | |||||
| void | |||||
| p448_inverse ( | |||||
| struct p448_t* a, | |||||
| const struct p448_t* x | |||||
| ); | |||||
| static inline mask_t | static inline mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| @@ -172,24 +140,6 @@ p448_set_ui ( | |||||
| out->limb[i] = 0; | out->limb[i] = 0; | ||||
| } | } | ||||
| } | } | ||||
| void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t doswap | |||||
| ) { | |||||
| big_register_t *aa = (big_register_t*)a; | |||||
| big_register_t *bb = (big_register_t*)b; | |||||
| big_register_t m = br_set_to_mask(doswap); | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||||
| big_register_t x = m & (aa[i]^bb[i]); | |||||
| aa[i] ^= x; | |||||
| bb[i] ^= x; | |||||
| } | |||||
| } | |||||
| void | void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -315,28 +265,6 @@ p448_weak_reduce ( | |||||
| a->limb[0] = (a->limb[0] & mask) + tmp; | a->limb[0] = (a->limb[0] & mask) + tmp; | ||||
| } | } | ||||
| void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) { | |||||
| p448_t tmp; | |||||
| assert(n>0); | |||||
| if (n&1) { | |||||
| p448_sqr(y,x); | |||||
| n--; | |||||
| } else { | |||||
| p448_sqr(&tmp,x); | |||||
| p448_sqr(y,&tmp); | |||||
| n-=2; | |||||
| } | |||||
| for (; n; n-=2) { | |||||
| p448_sqr(&tmp,y); | |||||
| p448_sqr(y,&tmp); | |||||
| } | |||||
| } | |||||
| mask_t | mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| const struct p448_t *a, | const struct p448_t *a, | ||||
| @@ -352,18 +280,6 @@ p448_eq ( | |||||
| return p448_is_zero(&ra); | return p448_is_zero(&ra); | ||||
| } | } | ||||
| void | |||||
| p448_mask ( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) { | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||||
| a->limb[i] = b->limb[i] & mask; | |||||
| } | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| }; /* extern "C" */ | }; /* extern "C" */ | ||||
| #endif | #endif | ||||
| @@ -25,13 +25,6 @@ p448_set_ui ( | |||||
| p448_t *out, | p448_t *out, | ||||
| uint64_t x | uint64_t x | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| static __inline__ void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t do_swap | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -117,13 +110,6 @@ p448_sqr ( | |||||
| p448_t *__restrict__ out, | p448_t *__restrict__ out, | ||||
| const p448_t *a | const p448_t *a | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) __attribute__((unused,always_inline)); | |||||
| void | void | ||||
| p448_serialize ( | p448_serialize ( | ||||
| @@ -136,24 +122,6 @@ p448_deserialize ( | |||||
| p448_t *x, | p448_t *x, | ||||
| const uint8_t serial[56] | const uint8_t serial[56] | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_mask( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) __attribute__((unused,always_inline)); | |||||
| /** | |||||
| * Returns 1/x. | |||||
| * | |||||
| * If x=0, returns 0. | |||||
| */ | |||||
| void | |||||
| p448_inverse ( | |||||
| struct p448_t* a, | |||||
| const struct p448_t* x | |||||
| ); | |||||
| static inline mask_t | static inline mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| @@ -175,24 +143,6 @@ p448_set_ui ( | |||||
| out->limb[0] = x & ((1<<28)-1); | out->limb[0] = x & ((1<<28)-1); | ||||
| out->limb[2] = x>>28; | out->limb[2] = x>>28; | ||||
| } | } | ||||
| void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t doswap | |||||
| ) { | |||||
| big_register_t *aa = (big_register_t*)a; | |||||
| big_register_t *bb = (big_register_t*)b; | |||||
| big_register_t m = br_set_to_mask(doswap); | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||||
| big_register_t x = m & (aa[i]^bb[i]); | |||||
| aa[i] ^= x; | |||||
| bb[i] ^= x; | |||||
| } | |||||
| } | |||||
| void | void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -313,28 +263,6 @@ p448_weak_reduce ( | |||||
| aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2); | aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2); | ||||
| } | } | ||||
| void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) { | |||||
| p448_t tmp; | |||||
| assert(n>0); | |||||
| if (n&1) { | |||||
| p448_sqr(y,x); | |||||
| n--; | |||||
| } else { | |||||
| p448_sqr(&tmp,x); | |||||
| p448_sqr(y,&tmp); | |||||
| n-=2; | |||||
| } | |||||
| for (; n; n-=2) { | |||||
| p448_sqr(&tmp,y); | |||||
| p448_sqr(y,&tmp); | |||||
| } | |||||
| } | |||||
| mask_t | mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| const struct p448_t *a, | const struct p448_t *a, | ||||
| @@ -350,18 +278,6 @@ p448_eq ( | |||||
| return p448_is_zero(&ra); | return p448_is_zero(&ra); | ||||
| } | } | ||||
| void | |||||
| p448_mask ( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) { | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||||
| a->limb[i] = b->limb[i] & mask; | |||||
| } | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| }; /* extern "C" */ | }; /* extern "C" */ | ||||
| #endif | #endif | ||||
| @@ -23,13 +23,6 @@ p448_set_ui ( | |||||
| p448_t *out, | p448_t *out, | ||||
| uint64_t x | uint64_t x | ||||
| ) __attribute__((unused)); | ) __attribute__((unused)); | ||||
| static __inline__ void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t do_swap | |||||
| ) __attribute__((unused)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -121,13 +114,6 @@ p448_sqr ( | |||||
| p448_t *__restrict__ out, | p448_t *__restrict__ out, | ||||
| const p448_t *a | const p448_t *a | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) __attribute__((unused)); | |||||
| void | void | ||||
| p448_serialize ( | p448_serialize ( | ||||
| @@ -140,24 +126,6 @@ p448_deserialize ( | |||||
| p448_t *x, | p448_t *x, | ||||
| const uint8_t serial[56] | const uint8_t serial[56] | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_mask( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) __attribute__((unused)); | |||||
| /** | |||||
| * Returns 1/x. | |||||
| * | |||||
| * If x=0, returns 0. | |||||
| */ | |||||
| void | |||||
| p448_inverse ( | |||||
| struct p448_t* a, | |||||
| const struct p448_t* x | |||||
| ); | |||||
| static inline mask_t | static inline mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| @@ -178,20 +146,6 @@ p448_set_ui ( | |||||
| out->limb[i] = 0; | out->limb[i] = 0; | ||||
| } | } | ||||
| } | } | ||||
| void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t doswap | |||||
| ) { | |||||
| unsigned int i; | |||||
| for (i=0; i<8; i++) { | |||||
| uint64_t x = doswap & (a->limb[i]^b->limb[i]); | |||||
| a->limb[i] ^= x; | |||||
| b->limb[i] ^= x; | |||||
| } | |||||
| } | |||||
| void | void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -313,28 +267,6 @@ p448_weak_reduce ( | |||||
| a->limb[0] = (a->limb[0] & mask) + tmp; | a->limb[0] = (a->limb[0] & mask) + tmp; | ||||
| } | } | ||||
| void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) { | |||||
| p448_t tmp; | |||||
| assert(n>0); | |||||
| if (n&1) { | |||||
| p448_sqr(y,x); | |||||
| n--; | |||||
| } else { | |||||
| p448_sqr(&tmp,x); | |||||
| p448_sqr(y,&tmp); | |||||
| n-=2; | |||||
| } | |||||
| for (; n; n-=2) { | |||||
| p448_sqr(&tmp,y); | |||||
| p448_sqr(y,&tmp); | |||||
| } | |||||
| } | |||||
| mask_t | mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| const struct p448_t *a, | const struct p448_t *a, | ||||
| @@ -347,18 +279,6 @@ p448_eq ( | |||||
| return p448_is_zero(&ra); | return p448_is_zero(&ra); | ||||
| } | } | ||||
| void | |||||
| p448_mask ( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) { | |||||
| unsigned int i; | |||||
| for (i=0; i<8; i++) { | |||||
| a->limb[i] = b->limb[i] & mask; | |||||
| } | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| }; /* extern "C" */ | }; /* extern "C" */ | ||||
| #endif | #endif | ||||
| @@ -22,13 +22,6 @@ p448_set_ui ( | |||||
| p448_t *out, | p448_t *out, | ||||
| uint64_t x | uint64_t x | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| static __inline__ void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t do_swap | |||||
| ) __attribute__((unused,always_inline)); | |||||
| static __inline__ void | static __inline__ void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -114,13 +107,6 @@ p448_sqr ( | |||||
| p448_t *__restrict__ out, | p448_t *__restrict__ out, | ||||
| const p448_t *a | const p448_t *a | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) __attribute__((unused,always_inline)); | |||||
| void | void | ||||
| p448_serialize ( | p448_serialize ( | ||||
| @@ -133,24 +119,6 @@ p448_deserialize ( | |||||
| p448_t *x, | p448_t *x, | ||||
| const uint8_t serial[56] | const uint8_t serial[56] | ||||
| ); | ); | ||||
| static __inline__ void | |||||
| p448_mask( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) __attribute__((unused,always_inline)); | |||||
| /** | |||||
| * Returns 1/x. | |||||
| * | |||||
| * If x=0, returns 0. | |||||
| */ | |||||
| void | |||||
| p448_inverse ( | |||||
| struct p448_t* a, | |||||
| const struct p448_t* x | |||||
| ); | |||||
| static inline mask_t | static inline mask_t | ||||
| p448_eq ( | p448_eq ( | ||||
| @@ -171,24 +139,6 @@ p448_set_ui ( | |||||
| out->limb[i] = 0; | out->limb[i] = 0; | ||||
| } | } | ||||
| } | } | ||||
| void | |||||
| p448_cond_swap ( | |||||
| p448_t *a, | |||||
| p448_t *b, | |||||
| mask_t doswap | |||||
| ) { | |||||
| big_register_t *aa = (big_register_t*)a; | |||||
| big_register_t *bb = (big_register_t*)b; | |||||
| big_register_t m = br_set_to_mask(doswap); | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||||
| big_register_t x = m & (aa[i]^bb[i]); | |||||
| aa[i] ^= x; | |||||
| bb[i] ^= x; | |||||
| } | |||||
| } | |||||
| void | void | ||||
| p448_add ( | p448_add ( | ||||
| @@ -331,55 +281,6 @@ p448_weak_reduce ( | |||||
| a->limb[0] = (a->limb[0] & mask) + tmp; | a->limb[0] = (a->limb[0] & mask) + tmp; | ||||
| } | } | ||||
| void | |||||
| p448_sqrn ( | |||||
| p448_t *__restrict__ y, | |||||
| const p448_t *x, | |||||
| int n | |||||
| ) { | |||||
| p448_t tmp; | |||||
| assert(n>0); | |||||
| if (n&1) { | |||||
| p448_sqr(y,x); | |||||
| n--; | |||||
| } else { | |||||
| p448_sqr(&tmp,x); | |||||
| p448_sqr(y,&tmp); | |||||
| n-=2; | |||||
| } | |||||
| for (; n; n-=2) { | |||||
| p448_sqr(&tmp,y); | |||||
| p448_sqr(y,&tmp); | |||||
| } | |||||
| } | |||||
| mask_t | |||||
| p448_eq ( | |||||
| const struct p448_t *a, | |||||
| const struct p448_t *b | |||||
| ) { | |||||
| struct p448_t ra, rb; | |||||
| p448_copy(&ra, a); | |||||
| p448_copy(&rb, b); | |||||
| p448_weak_reduce(&ra); | |||||
| p448_weak_reduce(&rb); | |||||
| p448_sub(&ra, &ra, &rb); | |||||
| p448_bias(&ra, 2); | |||||
| return p448_is_zero(&ra); | |||||
| } | |||||
| void | |||||
| p448_mask ( | |||||
| struct p448_t *a, | |||||
| const struct p448_t *b, | |||||
| mask_t mask | |||||
| ) { | |||||
| unsigned int i; | |||||
| for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||||
| a->limb[i] = b->limb[i] & mask; | |||||
| } | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| }; /* extern "C" */ | }; /* extern "C" */ | ||||
| #endif | #endif | ||||
| @@ -11,6 +11,21 @@ | |||||
| #include "field.h" | #include "field.h" | ||||
| #include "ec_point.h" // TODO | #include "ec_point.h" // TODO | ||||
| mask_t | |||||
| field_eq ( | |||||
| const struct field_t *a, | |||||
| const struct field_t *b | |||||
| ) { | |||||
| struct field_t ra, rb; | |||||
| field_copy(&ra, a); | |||||
| field_copy(&rb, b); | |||||
| field_weak_reduce(&ra); | |||||
| field_weak_reduce(&rb); | |||||
| field_sub(&ra, &ra, &rb); | |||||
| field_bias(&ra, 2); | |||||
| return field_is_zero(&ra); | |||||
| } | |||||
| void | void | ||||
| field_inverse ( | field_inverse ( | ||||
| struct field_t* a, | struct field_t* a, | ||||
| @@ -52,8 +52,30 @@ field_mulw_scc_wr ( | |||||
| field_weak_reduce(out); | field_weak_reduce(out); | ||||
| } | } | ||||
| void | |||||
| field_isr ( | |||||
| static __inline__ void | |||||
| field_sqrn ( | |||||
| field_t *__restrict__ y, | |||||
| const field_t *x, | |||||
| int n | |||||
| ) { | |||||
| field_t tmp; | |||||
| assert(n>0); | |||||
| if (n&1) { | |||||
| field_sqr(y,x); | |||||
| n--; | |||||
| } else { | |||||
| field_sqr(&tmp,x); | |||||
| field_sqr(y,&tmp); | |||||
| n-=2; | |||||
| } | |||||
| for (; n; n-=2) { | |||||
| field_sqr(&tmp,y); | |||||
| field_sqr(y,&tmp); | |||||
| } | |||||
| } | |||||
| void | |||||
| field_isr ( /* TODO: MAGIC */ | |||||
| struct field_t* a, | struct field_t* a, | ||||
| const struct field_t* x | const struct field_t* x | ||||
| ) { | ) { | ||||
| @@ -433,7 +455,7 @@ serialize_montgomery ( | |||||
| field_mul ( &L0, &a->xd, &L2 ); | field_mul ( &L0, &a->xd, &L2 ); | ||||
| L5 = field_is_zero( &a->zd ); | L5 = field_is_zero( &a->zd ); | ||||
| L6 = - L5; | L6 = - L5; | ||||
| field_mask ( &L1, &L0, L5 ); | |||||
| constant_time_mask ( &L1, &L0, sizeof(L1), L5 ); | |||||
| field_add ( &L2, &L1, &a->zd ); | field_add ( &L2, &L1, &a->zd ); | ||||
| L4 = ~ L5; | L4 = ~ L5; | ||||
| field_mul ( &L1, sbz, &L3 ); | field_mul ( &L1, sbz, &L3 ); | ||||
| @@ -446,7 +468,7 @@ serialize_montgomery ( | |||||
| field_mul ( &L2, &L1, &L0 ); | field_mul ( &L2, &L1, &L0 ); | ||||
| field_sqr ( &L1, &L0 ); | field_sqr ( &L1, &L0 ); | ||||
| field_mul ( &L0, &L3, &L1 ); | field_mul ( &L0, &L3, &L1 ); | ||||
| field_mask ( b, &L2, L4 ); | |||||
| constant_time_mask ( b, &L2, sizeof(L1), L4 ); | |||||
| field_subw ( &L0, 1 ); | field_subw ( &L0, 1 ); | ||||
| field_bias ( &L0, 1 ); | field_bias ( &L0, 1 ); | ||||
| L5 = field_is_zero( &L0 ); | L5 = field_is_zero( &L0 ); | ||||
| @@ -0,0 +1,230 @@ | |||||
| /** | |||||
| * @file constant_time.h | |||||
| * @copyright | |||||
| * Copyright (c) 2014 Cryptography Research, Inc. \n | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | |||||
| * @author Mike Hamburg | |||||
| * | |||||
| * @brief Constant-time routines. | |||||
| */ | |||||
| #ifndef __CONSTANT_TIME_H__ | |||||
| #define __CONSTANT_TIME_H__ 1 | |||||
| #include "word.h" | |||||
| /* | |||||
| * Constant-time operations on hopefully-compile-time-sized memory | |||||
| * regions. Needed for flexibility / demagication: not all fields | |||||
| * have sizes which are multiples of the vector width, necessitating | |||||
| * a change from the Ed448 versions. | |||||
| * | |||||
| * These routines would be much simpler to define at the byte level, | |||||
| * but if not vectorized they would be a significant fraction of the | |||||
| * runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of | |||||
| * signing time, vs 6% on Haswell with its fancy AVX2 vectors. | |||||
| * | |||||
| * If the compiler could do a good job of autovectorizing the code, | |||||
| * we could just leave it with the byte definition. But that's unlikely | |||||
| * on most deployed compilers, especially if you consider that pcmpeq[size] | |||||
| * is much faster than moving a scalar to the vector unit (which is what | |||||
| * a naive autovectorizer will do with constant_time_lookup on Intel). | |||||
| * | |||||
| * Instead, we're putting our trust in the loop unroller and unswitcher. | |||||
| * | |||||
| * TODO: verify correctness and performance on each platform, to make sure | |||||
| * that there are no regressions. | |||||
| */ | |||||
| /** | |||||
| * Unaligned big (vector?) register. | |||||
| */ | |||||
| typedef struct { | |||||
| big_register_t unaligned; | |||||
| } __attribute__((packed)) unaligned_br_t; | |||||
| /** | |||||
| * Unaligned word register, for architectures where that matters. | |||||
| */ | |||||
| typedef struct { | |||||
| word_t unaligned; | |||||
| } __attribute__((packed)) unaligned_word_t; | |||||
| /** | |||||
| * @brief Constant-time conditional swap. | |||||
| * | |||||
| * If doswap, then swap elem_bytes between *a and *b. | |||||
| * | |||||
| * *a and *b must not alias. Also, they must be at least as aligned | |||||
| * as their sizes, if the CPU cares about that sort of thing. | |||||
| */ | |||||
| static __inline__ void | |||||
| __attribute__((unused,always_inline)) | |||||
| constant_time_cond_swap ( | |||||
| void *__restrict__ a_, | |||||
| void *__restrict__ b_, | |||||
| word_t elem_bytes, | |||||
| mask_t doswap | |||||
| ) { | |||||
| word_t k; | |||||
| unsigned char *a = (unsigned char *)a_; | |||||
| unsigned char *b = (unsigned char *)b_; | |||||
| big_register_t br_mask = br_set_to_mask(doswap); | |||||
| for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { | |||||
| if (elem_bytes % sizeof(big_register_t)) { | |||||
| /* unaligned */ | |||||
| big_register_t xor = | |||||
| ((unaligned_br_t*)(&a[k]))->unaligned | |||||
| ^ ((unaligned_br_t*)(&b[k]))->unaligned; | |||||
| xor &= br_mask; | |||||
| ((unaligned_br_t*)(&a[k]))->unaligned ^= xor; | |||||
| ((unaligned_br_t*)(&b[k]))->unaligned ^= xor; | |||||
| } else { | |||||
| /* aligned */ | |||||
| big_register_t xor = | |||||
| *((big_register_t*)(&a[k])) | |||||
| ^ *((big_register_t*)(&b[k])); | |||||
| xor &= br_mask; | |||||
| *((big_register_t*)(&a[k])) ^= xor; | |||||
| *((big_register_t*)(&b[k])) ^= xor; | |||||
| } | |||||
| } | |||||
| if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { | |||||
| for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { | |||||
| if (elem_bytes % sizeof(word_t)) { | |||||
| /* unaligned */ | |||||
| word_t xor = | |||||
| ((unaligned_word_t*)(&a[k]))->unaligned | |||||
| ^ ((unaligned_word_t*)(&b[k]))->unaligned; | |||||
| xor &= doswap; | |||||
| ((unaligned_word_t*)(&a[k]))->unaligned ^= xor; | |||||
| ((unaligned_word_t*)(&b[k]))->unaligned ^= xor; | |||||
| } else { | |||||
| /* aligned */ | |||||
| word_t xor = | |||||
| *((word_t*)(&a[k])) | |||||
| ^ *((word_t*)(&b[k])); | |||||
| xor &= doswap; | |||||
| *((word_t*)(&a[k])) ^= xor; | |||||
| *((word_t*)(&b[k])) ^= xor; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (elem_bytes % sizeof(word_t)) { | |||||
| for (; k<elem_bytes; k+=1) { | |||||
| unsigned char xor = a[k] ^ b[k]; | |||||
| xor &= doswap; | |||||
| a[k] ^= xor; | |||||
| b[k] ^= xor; | |||||
| } | |||||
| } | |||||
| } | |||||
| /** | |||||
| * @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes); | |||||
| * | |||||
| * The table must be at least as aligned as elem_bytes. The output must be vector aligned. | |||||
| * The table and output must not alias. | |||||
| */ | |||||
| static __inline__ void | |||||
| __attribute__((unused,always_inline)) | |||||
| constant_time_lookup ( | |||||
| void *__restrict__ out_, | |||||
| const void *table_, | |||||
| word_t elem_bytes, | |||||
| word_t n_table, | |||||
| word_t idx | |||||
| ) { | |||||
| big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx); | |||||
| /* Can't do pointer arithmetic on void* */ | |||||
| unsigned char *out = (unsigned char *)out_; | |||||
| const unsigned char *table = (const unsigned char *)table_; | |||||
| word_t j,k; | |||||
| really_memset(out, 0, elem_bytes); | |||||
| for (j=0; j<n_table; j++, big_i-=big_one) { | |||||
| big_register_t br_mask = br_is_zero(big_i); | |||||
| for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { | |||||
| if (elem_bytes % sizeof(big_register_t)) { | |||||
| /* input unaligned, output aligned */ | |||||
| *(big_register_t *)(out+k) |= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned; | |||||
| } else { | |||||
| /* aligned */ | |||||
| *(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]); | |||||
| } | |||||
| } | |||||
| word_t mask = word_is_zero(idx^j); | |||||
| if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { | |||||
| for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { | |||||
| if (elem_bytes % sizeof(word_t)) { | |||||
| /* input unaligned, output aligned */ | |||||
| *(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned; | |||||
| } else { | |||||
| /* aligned */ | |||||
| *(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (elem_bytes % sizeof(word_t)) { | |||||
| for (; k<elem_bytes; k+=1) { | |||||
| out[k] |= mask & table[k+j*elem_bytes]; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| /** | |||||
| * @brief Constant-time a = b&mask. | |||||
| * | |||||
| * The input and output must be at least as aligned as elem_bytes. | |||||
| */ | |||||
| static __inline__ void | |||||
| __attribute__((unused,always_inline)) | |||||
| constant_time_mask ( | |||||
| void *__restrict__ a_, | |||||
| const void *b_, | |||||
| word_t elem_bytes, | |||||
| mask_t mask | |||||
| ) { | |||||
| unsigned char *a = (unsigned char *)a_; | |||||
| const unsigned char *b = (const unsigned char *)b_; | |||||
| word_t k; | |||||
| big_register_t br_mask = br_set_to_mask(mask); | |||||
| for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { | |||||
| if (elem_bytes % sizeof(big_register_t)) { | |||||
| /* unaligned */ | |||||
| ((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned; | |||||
| } else { | |||||
| /* aligned */ | |||||
| *(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]); | |||||
| } | |||||
| } | |||||
| if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { | |||||
| for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { | |||||
| if (elem_bytes % sizeof(word_t)) { | |||||
| /* unaligned */ | |||||
| ((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned; | |||||
| } else { | |||||
| /* aligned */ | |||||
| *(word_t *)(a+k) = mask & *(const word_t*)(&b[k]); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (elem_bytes % sizeof(word_t)) { | |||||
| for (; k<elem_bytes; k+=1) { | |||||
| a[k] = mask & b[k]; | |||||
| } | |||||
| } | |||||
| } | |||||
| #endif /* __CONSTANT_TIME_H__ */ | |||||
| @@ -11,6 +11,7 @@ | |||||
| #define __CC_INCLUDED_EC_POINT_H__ | #define __CC_INCLUDED_EC_POINT_H__ | ||||
| #include "field.h" | #include "field.h" | ||||
| #include "constant_time.h" | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| @@ -150,43 +151,6 @@ copy_tw_pniels ( | |||||
| const struct tw_pniels_t* ds | const struct tw_pniels_t* ds | ||||
| ) __attribute__((unused,always_inline)); | ) __attribute__((unused,always_inline)); | ||||
| /** | |||||
| * Returns 1/sqrt(+- x). | |||||
| * | |||||
| * The Legendre symbol of the result is the same as that of the | |||||
| * input. | |||||
| * | |||||
| * If x=0, returns 0. | |||||
| */ | |||||
| void | |||||
| field_isr ( | |||||
| struct field_t* a, | |||||
| const struct field_t* x | |||||
| ); | |||||
| /** | |||||
| * Batch inverts out[i] = 1/in[i] | |||||
| * | |||||
| * If any input is zero, all the outputs will be zero. | |||||
| */ | |||||
| void | |||||
| field_simultaneous_invert ( | |||||
| struct p448_t *__restrict__ out, | |||||
| const struct p448_t *in, | |||||
| unsigned int n | |||||
| ); | |||||
| /** | |||||
| * Returns 1/x. | |||||
| * | |||||
| * If x=0, returns 0. | |||||
| */ | |||||
| void | |||||
| field_inverse ( | |||||
| struct field_t* a, | |||||
| const struct field_t* x | |||||
| ); | |||||
| /** | /** | ||||
| * Add two points on a twisted Edwards curve, one in Extensible form | * Add two points on a twisted Edwards curve, one in Extensible form | ||||
| * and the other in half-Niels form. | * and the other in half-Niels form. | ||||
| @@ -490,7 +454,7 @@ cond_negate_tw_niels ( | |||||
| struct tw_niels_t *n, | struct tw_niels_t *n, | ||||
| mask_t doNegate | mask_t doNegate | ||||
| ) { | ) { | ||||
| field_cond_swap(&n->a, &n->b, doNegate); | |||||
| constant_time_cond_swap(&n->a, &n->b, sizeof(n->a), doNegate); | |||||
| field_cond_neg(&n->c, doNegate); | field_cond_neg(&n->c, doNegate); | ||||
| } | } | ||||
| @@ -9,21 +9,13 @@ | |||||
| #ifndef __FIELD_H__ | #ifndef __FIELD_H__ | ||||
| #define __FIELD_H__ | #define __FIELD_H__ | ||||
| #include "p448.h" | |||||
| #include <string.h> | |||||
| #include "p448.h" | |||||
| #define FIELD_BITS 448 | #define FIELD_BITS 448 | ||||
| #define FIELD_BYTES (1+(FIELD_BITS-1)/8) | |||||
| #define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) | |||||
| /** | |||||
| * @brief For GMP tests: little-endian representation of the field modulus. | |||||
| */ | |||||
| extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; | |||||
| #define field_t p448_t | #define field_t p448_t | ||||
| #define field_mul p448_mul | #define field_mul p448_mul | ||||
| #define field_sqr p448_sqr | #define field_sqr p448_sqr | ||||
| #define field_sqrn p448_sqrn | |||||
| #define field_add p448_add | #define field_add p448_add | ||||
| #define field_sub p448_sub | #define field_sub p448_sub | ||||
| #define field_mulw p448_mulw | #define field_mulw p448_mulw | ||||
| @@ -32,15 +24,80 @@ extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; | |||||
| #define field_neg p448_neg | #define field_neg p448_neg | ||||
| #define field_set_ui p448_set_ui | #define field_set_ui p448_set_ui | ||||
| #define field_bias p448_bias | #define field_bias p448_bias | ||||
| #define field_copy p448_copy | |||||
| #define field_mask p448_mask | |||||
| #define field_weak_reduce p448_weak_reduce | #define field_weak_reduce p448_weak_reduce | ||||
| #define field_strong_reduce p448_strong_reduce | #define field_strong_reduce p448_strong_reduce | ||||
| #define field_cond_swap p448_cond_swap | |||||
| #define field_cond_neg p448_cond_neg | #define field_cond_neg p448_cond_neg | ||||
| #define field_serialize p448_serialize | #define field_serialize p448_serialize | ||||
| #define field_deserialize p448_deserialize | #define field_deserialize p448_deserialize | ||||
| #define field_eq p448_eq | |||||
| #define field_is_zero p448_is_zero | #define field_is_zero p448_is_zero | ||||
| /** @brief Bytes in a field element */ | |||||
| #define FIELD_BYTES (1+(FIELD_BITS-1)/8) | |||||
| /** @brief Words in a field element */ | |||||
| #define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) | |||||
| /** | |||||
| * @brief For GMP tests: little-endian representation of the field modulus. | |||||
| */ | |||||
| extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; | |||||
| /** | |||||
| * Copy one field element to another. | |||||
| */ | |||||
| static inline void | |||||
| __attribute__((unused,always_inline)) | |||||
| field_copy ( | |||||
| struct field_t *__restrict__ a, | |||||
| const struct field_t *__restrict__ b | |||||
| ) { | |||||
| memcpy(a,b,sizeof(*a)); | |||||
| } | |||||
| /** | |||||
| * Returns 1/sqrt(+- x). | |||||
| * | |||||
| * The Legendre symbol of the result is the same as that of the | |||||
| * input. | |||||
| * | |||||
| * If x=0, returns 0. | |||||
| */ | |||||
| void | |||||
| field_isr ( | |||||
| struct field_t* a, | |||||
| const struct field_t* x | |||||
| ); | |||||
| /** | |||||
| * Batch inverts out[i] = 1/in[i] | |||||
| * | |||||
| * If any input is zero, all the outputs will be zero. | |||||
| */ | |||||
| void | |||||
| field_simultaneous_invert ( | |||||
| struct p448_t *__restrict__ out, | |||||
| const struct p448_t *in, | |||||
| unsigned int n | |||||
| ); | |||||
| /** | |||||
| * Returns 1/x. | |||||
| * | |||||
| * If x=0, returns 0. | |||||
| */ | |||||
| void | |||||
| field_inverse ( | |||||
| struct field_t* a, | |||||
| const struct field_t* x | |||||
| ); | |||||
| /** | |||||
| * Returns -1 if a==b, 0 otherwise. | |||||
| */ | |||||
| mask_t | |||||
| field_eq ( | |||||
| const struct field_t *a, | |||||
| const struct field_t *b | |||||
| ); | |||||
| #endif /* __FIELD_H__ */ | #endif /* __FIELD_H__ */ | ||||
| @@ -143,6 +143,15 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); | |||||
| return (big_register_t)x; | return (big_register_t)x; | ||||
| } | } | ||||
| #endif | #endif | ||||
| /** | |||||
| * Return -1 if x==0, and 0 otherwise. | |||||
| */ | |||||
| static __inline__ mask_t | |||||
| __attribute__((always_inline,unused)) | |||||
| word_is_zero(word_t x) { | |||||
| return (mask_t)((((dword_t)(x)) - 1)>>WORD_BITS); | |||||
| } | |||||
| #if __AVX2__ | #if __AVX2__ | ||||
| static __inline__ big_register_t | static __inline__ big_register_t | ||||
| @@ -11,6 +11,7 @@ | |||||
| #include "intrinsics.h" | #include "intrinsics.h" | ||||
| #include "scalarmul.h" | #include "scalarmul.h" | ||||
| #include "barrett_field.h" | #include "barrett_field.h" | ||||
| #include "constant_time.h" | |||||
| mask_t | mask_t | ||||
| montgomery_ladder ( | montgomery_ladder ( | ||||
| @@ -29,15 +30,15 @@ montgomery_ladder ( | |||||
| word_t w = scalar[j]; | word_t w = scalar[j]; | ||||
| for (i=n; i>=0; i--) { | for (i=n; i>=0; i--) { | ||||
| mask_t flip = -((w>>i)&1); | mask_t flip = -((w>>i)&1); | ||||
| field_cond_swap(&mont.xa,&mont.xd,flip^pflip); | |||||
| field_cond_swap(&mont.za,&mont.zd,flip^pflip); | |||||
| constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),flip^pflip); | |||||
| constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),flip^pflip); | |||||
| montgomery_step(&mont); | montgomery_step(&mont); | ||||
| pflip = flip; | pflip = flip; | ||||
| } | } | ||||
| n = WORD_BITS-1; | n = WORD_BITS-1; | ||||
| } | } | ||||
| field_cond_swap(&mont.xa,&mont.xd,pflip); | |||||
| field_cond_swap(&mont.za,&mont.zd,pflip); | |||||
| constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),pflip); | |||||
| constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),pflip); | |||||
| assert(n_extra_doubles < INT_MAX); | assert(n_extra_doubles < INT_MAX); | ||||
| for (j=0; j<(int)n_extra_doubles; j++) { | for (j=0; j<(int)n_extra_doubles; j++) { | ||||
| @@ -47,6 +48,29 @@ montgomery_ladder ( | |||||
| return serialize_montgomery(out, &mont, in); | return serialize_montgomery(out, &mont, in); | ||||
| } | } | ||||
| static __inline__ void | |||||
| __attribute__((unused,always_inline)) | |||||
| constant_time_lookup_tw_pniels ( | |||||
| struct tw_pniels_t *out, | |||||
| const struct tw_pniels_t *in, | |||||
| int nin, | |||||
| int idx | |||||
| ) { | |||||
| constant_time_lookup(out,in,sizeof(*out),nin,idx); | |||||
| } | |||||
| static __inline__ void | |||||
| __attribute__((unused,always_inline)) | |||||
| constant_time_lookup_tw_niels ( | |||||
| struct tw_niels_t *out, | |||||
| const struct tw_niels_t *in, | |||||
| int nin, | |||||
| int idx | |||||
| ) { | |||||
| constant_time_lookup(out,in,sizeof(*out),nin,idx); | |||||
| } | |||||
| /* | |||||
| static __inline__ void | static __inline__ void | ||||
| constant_time_lookup_tw_pniels ( | constant_time_lookup_tw_pniels ( | ||||
| struct tw_pniels_t *out, | struct tw_pniels_t *out, | ||||
| @@ -90,6 +114,7 @@ constant_time_lookup_tw_niels ( | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| */ | |||||
| static void | static void | ||||
| convert_to_signed_window_form ( | convert_to_signed_window_form ( | ||||