From 8d6c51ee2867d99ca957d37a9198cd27c417b4f1 Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Sun, 17 Jan 2016 15:46:56 -0800 Subject: [PATCH] Separate API word size from arch word size. This enables compiling a 32-bit arch on a 64-bit platform, eg NEON on AARCH64. It's probably more useful for cross-platform testing, though. The breakdown is as follows: * decaf_bool_t, decaf_word_t and decaf_error_t are as defined in the API. * DECAF_WORD_BITS is the size of a decaf_word_t. * decaf_word_t is used for scalars, so on every curve the scalar impls are the same (i.e. they follow the API's word size). * SC_LIMB macro always takes a 64-bit word. * non-prefixed word_t, mask_t, etc are as defined by the per-curve arch. * ARCH_WORD_BITS is the size of a word_t. * word_t is used for gf elements, so the curves may have different guts. --- src/decaf.c | 125 +++++++++++++--------- src/include/arch_32/arch_intrinsics.h | 2 +- src/include/arch_arm_32/arch_intrinsics.h | 2 +- src/include/arch_neon/arch_intrinsics.h | 2 +- src/include/arch_ref64/arch_intrinsics.h | 2 +- src/include/arch_x86_64/arch_intrinsics.h | 2 +- src/include/word.h | 13 ++- src/public_include/decaf/common.h | 4 + 8 files changed, 92 insertions(+), 60 deletions(-) diff --git a/src/decaf.c b/src/decaf.c index 5b53af2..644a7ba 100644 --- a/src/decaf.c +++ b/src/decaf.c @@ -38,10 +38,7 @@ extern const gf SQRT_MINUS_ONE; #endif -/* FIXME: this can be different from DECAF_WORD_BITS, and word_t can be different from decaf_word_t, - * eg when mixing and matching implementations for different curves. Homogenize this. - */ -#define WBITS WORD_BITS +#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */ const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}}; extern const scalar_t API_NS(sc_r2); @@ -65,15 +62,41 @@ const size_t API_NS2(alignof,precomputed_s) = 32; #define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i> (i*8*sizeof(word_t))); + } + return ret; +} + /** Constant time, x = is_z ? z : y */ static INLINE void -cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { +cond_sel(gf x, const gf y, const gf z, mask_t is_z) { constant_time_select(x,z,y,sizeof(gf),is_z,0); } /** Constant time, if (neg) x=-x; */ static void -cond_neg(gf x, decaf_bool_t neg) { +cond_neg(gf x, mask_t neg) { gf y; gf_sub(y,ZERO,x); cond_sel(x,x,y,neg); @@ -81,7 +104,7 @@ cond_neg(gf x, decaf_bool_t neg) { /** Constant time, if (swap) (x,y) = (y,x); */ static INLINE void -cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) { +cond_swap(gf x, gf_s *__restrict__ y, mask_t swap) { UNROLL for (unsigned int i=0; ilimb)/sizeof(x->limb[0]); i++) { decaf_word_t s = (x->limb[i] ^ y->limb[i]) & swap; x->limb[i] ^= s; @@ -90,8 +113,8 @@ cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) { } /** Inverse square root using addition chain. */ -static decaf_bool_t -gf_isqrt_chk(gf y, const gf x, decaf_bool_t allow_zero) { +static mask_t +gf_isqrt_chk(gf y, const gf x, mask_t allow_zero) { gf tmp0, tmp1; gf_isr((gf_s *)y, (const gf_s *)x); gf_sqr(tmp0,y); @@ -151,14 +174,14 @@ sc_subx( const scalar_t p, decaf_word_t extra ) { - dsword_t chain = 0; + decaf_dsword_t chain = 0; unsigned int i; for (i=0; ilimb[i]; out->limb[i] = chain; chain >>= WBITS; } - decaf_bool_t borrow = chain+extra; /* = 0 or -1 */ + decaf_word_t borrow = chain+extra; /* = 0 or -1 */ chain = 0; for (i=0; ilimb[i] ^ b->limb[i]; } - return word_is_zero(diff); + return mask_to_bool(word_is_zero(diff)); } /** identity = (0,1) */ @@ -354,9 +377,9 @@ deisogenize ( gf_s *__restrict__ s, gf_s *__restrict__ minus_t_over_s, const point_t p, - decaf_bool_t toggle_hibit_s, - decaf_bool_t toggle_hibit_t_over_s, - decaf_bool_t toggle_rotation + mask_t toggle_hibit_s, + mask_t toggle_hibit_t_over_s, + mask_t toggle_rotation ) { #if COFACTOR == 4 && !IMAGINE_TWIST (void) toggle_rotation; @@ -372,7 +395,7 @@ deisogenize ( gf_sub ( b, p->z, p->y ); gf_mul ( c, b, a ); gf_mulw_sgn ( b, c, -EDWARDS_D ); /* (a-d)(Z+Y)(Z-Y) */ - decaf_bool_t ok = gf_isqrt_chk ( a, b, DECAF_TRUE ); /* r in the paper */ + mask_t ok = gf_isqrt_chk ( a, b, DECAF_TRUE ); /* r in the paper */ (void)ok; assert(ok); gf_mulw_sgn ( b, a, -EDWARDS_D ); /* u in the paper */ gf_mul ( c, b, a ); /* ur */ @@ -413,13 +436,13 @@ deisogenize ( gf_mul ( a, p->z, t ); /* "tz" = T*Z */ gf_sqr ( b, a ); gf_mul ( d, b, c ); /* (TZ)^2 * (Z^2-aX^2) */ - decaf_bool_t ok = gf_isqrt_chk ( b, d, DECAF_TRUE ); + mask_t ok = gf_isqrt_chk ( b, d, DECAF_TRUE ); (void)ok; assert(ok); gf_mul ( d, b, a ); /* "osx" = 1 / sqrt(z^2-ax^2) */ gf_mul ( a, b, c ); gf_mul ( b, a, d ); /* 1/tz */ - decaf_bool_t rotate; + mask_t rotate; #if (COFACTOR == 8) { gf e; @@ -439,7 +462,7 @@ deisogenize ( gf_mul ( c, a, d ); // new "osx" gf_mul ( a, c, p->z ); gf_add ( a, a, a ); // 2 * "osx" * Z - decaf_bool_t tg1 = rotate ^ toggle_hibit_t_over_s ^~ hibit(a); + mask_t tg1 = rotate ^ toggle_hibit_t_over_s ^~ hibit(a); cond_neg ( c, tg1 ); cond_neg ( a, rotate ^ tg1 ); gf_mul ( d, b, p->z ); @@ -462,9 +485,9 @@ decaf_error_t API_NS(point_decode) ( decaf_bool_t allow_identity ) { gf s, a, b, c, d, e, f; - decaf_bool_t succ = gf_deserialize(s, ser), zero = gf_eq(s, ZERO); - allow_identity = ~word_is_zero(allow_identity); - succ &= allow_identity | ~zero; + mask_t succ = gf_deserialize(s, ser); + mask_t zero = gf_eq(s, ZERO); + succ &= bool_to_mask(allow_identity) | ~zero; succ &= ~hibit(s); gf_sqr ( a, s ); #if IMAGINE_TWIST @@ -484,7 +507,7 @@ decaf_error_t API_NS(point_decode) ( gf_mul ( b, e, d ); /* 1/t */ gf_mul ( d, e, c ); /* d = t / (s(1-as^2)) */ gf_mul ( e, d, f ); /* t/s */ - decaf_bool_t negtos = hibit(e); + mask_t negtos = hibit(e); cond_neg(b, negtos); cond_neg(d, negtos); @@ -513,7 +536,7 @@ decaf_error_t API_NS(point_decode) ( assert(API_NS(point_valid)(p) | ~succ); - return decaf_succeed_if(succ); + return decaf_succeed_if(mask_to_bool(succ)); } #if IMAGINE_TWIST @@ -596,7 +619,7 @@ static NOINLINE void point_double_internal ( point_t p, const point_t q, - decaf_bool_t before_double + int before_double ) { gf a, b, c, d; gf_sqr ( c, q->x ); @@ -651,7 +674,7 @@ decaf_error_t API_NS(scalar_decode)( ) { unsigned int i; scalar_decode_short(s, ser, SER_BYTES); - dsword_t accum = 0; + decaf_dsword_t accum = 0; for (i=0; ilimb[i] - sc_p->limb[i]) >> WBITS; } @@ -659,7 +682,7 @@ decaf_error_t API_NS(scalar_decode)( API_NS(scalar_mul)(s,s,API_NS(scalar_one)); /* ham-handed reduce */ - return decaf_succeed_if(accum); + return decaf_succeed_if(~word_is_zero(accum)); } void API_NS(scalar_destroy) ( @@ -726,7 +749,7 @@ void API_NS(scalar_encode)( static INLINE void cond_neg_niels ( niels_t n, - decaf_bool_t neg + mask_t neg ) { cond_swap(n->a, n->b, neg); cond_neg(n->c, neg); @@ -770,7 +793,7 @@ static NOINLINE void add_niels_to_pt ( point_t d, const niels_t e, - decaf_bool_t before_double + int before_double ) { gf a, b, c; gf_sub_nr ( b, d->y, d->x ); @@ -792,7 +815,7 @@ static NOINLINE void sub_niels_from_pt ( point_t d, const niels_t e, - decaf_bool_t before_double + int before_double ) { gf a, b, c; gf_sub_nr ( b, d->y, d->x ); @@ -814,7 +837,7 @@ static void add_pniels_to_pt ( point_t p, const pniels_t pn, - decaf_bool_t before_double + int before_double ) { gf L0; gf_mul ( L0, p->z, pn->z ); @@ -826,7 +849,7 @@ static void sub_pniels_from_pt ( point_t p, const pniels_t pn, - decaf_bool_t before_double + int before_double ) { gf L0; gf_mul ( L0, p->z, pn->z ); @@ -1111,7 +1134,7 @@ decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) { gf a, b; gf_mul ( a, p->y, q->x ); gf_mul ( b, q->y, p->x ); - decaf_bool_t succ = gf_eq(a,b); + mask_t succ = gf_eq(a,b); #if (COFACTOR == 8) && IMAGINE_TWIST gf_mul ( a, p->y, q->y ); @@ -1129,7 +1152,7 @@ decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) { succ |= gf_eq(a,b); #endif - return succ; + return mask_to_bool(succ); } void API_NS(point_from_hash_nonuniform) ( @@ -1166,7 +1189,7 @@ void API_NS(point_from_hash_nonuniform) ( gf_mul(rN,r,N); gf_mul(a,rN,D); - decaf_bool_t square = gf_isqrt_chk(e,a,DECAF_FALSE); + mask_t square = gf_isqrt_chk(e,a,DECAF_FALSE); /* b <- t/s */ cond_sel(c,r0,r,square); /* r? = sqr ? r : 1 */ @@ -1192,7 +1215,7 @@ void API_NS(point_from_hash_nonuniform) ( gf_mul(c,a,b); /* Normalize/negate */ - decaf_bool_t neg_s = hibit(a) ^ ~square; + mask_t neg_s = hibit(a) ^ ~square; cond_neg(a,neg_s); /* ends up negative if ~square */ /* b <- t */ @@ -1222,8 +1245,8 @@ API_NS(invert_elligator_nonuniform) ( const point_t p, uint16_t hint_ ) { - decaf_bool_t hint = hint_; - decaf_bool_t sgn_s = -(hint & 1), + mask_t hint = hint_; + mask_t sgn_s = -(hint & 1), sgn_t_over_s = -(hint>>1 & 1), sgn_r0 = -(hint>>2 & 1), sgn_ed_T = -(hint>>3 & 1); @@ -1234,7 +1257,7 @@ API_NS(invert_elligator_nonuniform) ( gf_mul(b,c,a); gf_sub(b,ONE,b); /* t+1 */ gf_sqr(c,a); /* s^2 */ - decaf_bool_t is_identity = gf_eq(p->t,ZERO); + mask_t is_identity = gf_eq(p->t,ZERO); { /* identity adjustments */ /* in case of identity, currently c=0, t=0, b=1, will encode to 1 */ @@ -1253,7 +1276,7 @@ API_NS(invert_elligator_nonuniform) ( #else gf_sub(d,ZERO,b); #endif - decaf_bool_t succ = gf_isqrt_chk(c,d,DECAF_TRUE); + mask_t succ = gf_isqrt_chk(c,d,DECAF_TRUE); gf_mul(b,a,c); cond_neg(b, sgn_r0^hibit(b)); @@ -1264,7 +1287,7 @@ API_NS(invert_elligator_nonuniform) ( gf_serialize(recovered_hash, b); /* TODO: deal with overflow flag */ - return decaf_succeed_if(succ); + return decaf_succeed_if(mask_to_bool(succ)); } void API_NS(point_from_hash_uniform) ( @@ -1295,7 +1318,7 @@ decaf_bool_t API_NS(point_valid) ( gf a,b,c; gf_mul(a,p->x,p->y); gf_mul(b,p->z,p->t); - decaf_bool_t out = gf_eq(a,b); + mask_t out = gf_eq(a,b); gf_sqr(a,p->x); gf_sqr(b,p->y); gf_sub(a,b,a); @@ -1305,7 +1328,7 @@ decaf_bool_t API_NS(point_valid) ( gf_add(b,b,c); out &= gf_eq(a,b); out &= ~gf_eq(p->z,ZERO); - return out; + return mask_to_bool(out); } void API_NS(point_debugging_torque) ( @@ -1502,7 +1525,7 @@ void API_NS(precomputed_scalarmul) ( } } - decaf_bool_t invert = (tab>>(t-1))-1; + mask_t invert = (tab>>(t-1))-1; tab ^= invert; tab &= (1<<(t-1)) - 1; @@ -1527,8 +1550,7 @@ void API_NS(point_cond_sel) ( const point_t b, decaf_bool_t pick_b ) { - pick_b = ~word_is_zero(pick_b); - constant_time_select(out,b,a,sizeof(point_t),pick_b,0); + constant_time_select(out,b,a,sizeof(point_t),bool_to_mask(pick_b),0); } void API_NS(scalar_cond_sel) ( @@ -1537,8 +1559,7 @@ void API_NS(scalar_cond_sel) ( const scalar_t b, decaf_bool_t pick_b ) { - pick_b = ~word_is_zero(pick_b); - constant_time_select(out,b,a,sizeof(scalar_t),pick_b,sizeof(out->limb[0])); + constant_time_select(out,b,a,sizeof(scalar_t),bool_to_mask(pick_b),sizeof(out->limb[0])); } /* FUTURE: restore Curve25519 Montgomery ladder? */ @@ -1550,13 +1571,13 @@ decaf_error_t API_NS(direct_scalarmul) ( decaf_bool_t short_circuit ) { point_t basep; - decaf_bool_t succ = decaf_successful(API_NS(point_decode)(basep, base, allow_identity)); - if (short_circuit && ~succ) return DECAF_FAILURE; + decaf_error_t succ = API_NS(point_decode)(basep, base, allow_identity); + if (short_circuit && succ != DECAF_SUCCESS) return succ; API_NS(point_cond_sel)(basep, API_NS(point_base), basep, succ); API_NS(point_scalarmul)(basep, basep, scalar); API_NS(point_encode)(scaled, basep); API_NS(point_destroy)(basep); - return decaf_succeed_if(succ); + return succ; } /** @@ -1580,7 +1601,7 @@ static int recode_wnaf ( * PERF MINOR: not technically WNAF, since last digits can be adjacent. Could be rtl. */ for (i=SCALAR_BITS-1; i >= 0; i--) { - int bit = (scalar->limb[i/WORD_BITS] >> (i%WORD_BITS)) & 1; + int bit = (scalar->limb[i/WBITS] >> (i%WBITS)) & 1; current = 2*current + bit; /* diff --git a/src/include/arch_32/arch_intrinsics.h b/src/include/arch_32/arch_intrinsics.h index 4e9d159..f3908a2 100644 --- a/src/include/arch_32/arch_intrinsics.h +++ b/src/include/arch_32/arch_intrinsics.h @@ -5,7 +5,7 @@ #ifndef __ARCH_ARCH_32_ARCH_INTRINSICS_H__ #define __ARCH_ARCH_32_ARCH_INTRINSICS_H__ -#define WORD_BITS 32 +#define ARCH_WORD_BITS 32 static __inline__ __attribute((always_inline,unused)) uint32_t word_is_zero(uint32_t a) { diff --git a/src/include/arch_arm_32/arch_intrinsics.h b/src/include/arch_arm_32/arch_intrinsics.h index 86080b1..7451c6f 100644 --- a/src/include/arch_arm_32/arch_intrinsics.h +++ b/src/include/arch_arm_32/arch_intrinsics.h @@ -5,7 +5,7 @@ #ifndef __ARCH_ARM_32_ARCH_INTRINSICS_H__ #define __ARCH_ARM_32_ARCH_INTRINSICS_H__ -#define WORD_BITS 32 +#define ARCH_WORD_BITS 32 static __inline__ __attribute((always_inline,unused)) uint32_t word_is_zero(uint32_t a) { diff --git a/src/include/arch_neon/arch_intrinsics.h b/src/include/arch_neon/arch_intrinsics.h index 77b742d..1a1e14b 100644 --- a/src/include/arch_neon/arch_intrinsics.h +++ b/src/include/arch_neon/arch_intrinsics.h @@ -5,7 +5,7 @@ #ifndef __ARCH_NEON_ARCH_INTRINSICS_H__ #define __ARCH_NEON_ARCH_INTRINSICS_H__ -#define WORD_BITS 32 +#define ARCH_WORD_BITS 32 static __inline__ __attribute((always_inline,unused)) uint32_t word_is_zero(uint32_t a) { diff --git a/src/include/arch_ref64/arch_intrinsics.h b/src/include/arch_ref64/arch_intrinsics.h index 8413a2e..a1b1a74 100644 --- a/src/include/arch_ref64/arch_intrinsics.h +++ b/src/include/arch_ref64/arch_intrinsics.h @@ -5,7 +5,7 @@ #ifndef __ARCH_REF64_ARCH_INTRINSICS_H__ #define __ARCH_REF64_ARCH_INTRINSICS_H__ -#define WORD_BITS 64 +#define ARCH_WORD_BITS 64 static __inline__ __attribute((always_inline,unused)) uint64_t word_is_zero(uint64_t a) { diff --git a/src/include/arch_x86_64/arch_intrinsics.h b/src/include/arch_x86_64/arch_intrinsics.h index eb48db4..fc4ff5d 100644 --- a/src/include/arch_x86_64/arch_intrinsics.h +++ b/src/include/arch_x86_64/arch_intrinsics.h @@ -5,7 +5,7 @@ #ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__ #define __ARCH_X86_64_ARCH_INTRINSICS_H__ -#define WORD_BITS 64 +#define ARCH_WORD_BITS 64 #include diff --git a/src/include/word.h b/src/include/word.h index 54f2ff8..ced2202 100644 --- a/src/include/word.h +++ b/src/include/word.h @@ -31,7 +31,7 @@ #include #endif -#if (WORD_BITS == 64) +#if (ARCH_WORD_BITS == 64) typedef uint64_t word_t, mask_t; typedef __uint128_t dword_t; typedef int32_t hsword_t; @@ -47,8 +47,7 @@ #define U56LE(x) x##ull #define U60LE(x) x##ull #define letohWORD letoh64 - #define SC_LIMB(x) (x##ull) -#elif (WORD_BITS == 32) +#elif (ARCH_WORD_BITS == 32) typedef uint32_t word_t, mask_t; typedef uint64_t dword_t; typedef int16_t hsword_t; @@ -63,6 +62,14 @@ #define U56LE(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 #define U60LE(x) (x##ull)&((1ull<<30)-1), (x##ull)>>30 #define letohWORD letoh32 +#else + #error "For now, libdecaf only supports 32- and 64-bit architectures." +#endif + +/* Scalar limbs are keyed off of the API word size instead of the arch word size. */ +#if DECAF_WORD_BITS == 64 + #define SC_LIMB(x) (x##ull) +#elif DECAF_WORD_BITS == 32 #define SC_LIMB(x) ((uint32_t)x##ull),(x##ull>>32) #else #error "For now, libdecaf only supports 32- and 64-bit architectures." diff --git a/src/public_include/decaf/common.h b/src/public_include/decaf/common.h index 54d75e4..d415c5d 100644 --- a/src/public_include/decaf/common.h +++ b/src/public_include/decaf/common.h @@ -49,13 +49,17 @@ extern "C" { && !defined(DECAF_FORCE_32_BIT) #define DECAF_WORD_BITS 64 /**< The number of bits in a word */ typedef uint64_t decaf_word_t; /**< Word size for internal computations */ +typedef int64_t decaf_sword_t; /**< Signed word size for internal computations */ typedef uint64_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */ typedef __uint128_t decaf_dword_t; /**< Double-word size for internal computations */ +typedef __int128_t decaf_dsword_t; /**< Signed double-word size for internal computations */ #else #define DECAF_WORD_BITS 32 /**< The number of bits in a word */ typedef uint32_t decaf_word_t; /**< Word size for internal computations */ +typedef int32_t decaf_sword_t; /**< Signed word size for internal computations */ typedef uint32_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */ typedef uint64_t decaf_dword_t; /**< Double-word size for internal computations */ +typedef uint64_t decaf_dsword_t; /**< Signed double-word size for internal computations */ #endif /** DECAF_TRUE = -1 so that DECAF_TRUE & x = x */