diff --git a/src/GENERATED/c/curve25519/decaf.c b/src/GENERATED/c/curve25519/decaf.c index 3760272..9e5a3c7 100644 --- a/src/GENERATED/c/curve25519/decaf.c +++ b/src/GENERATED/c/curve25519/decaf.c @@ -1156,12 +1156,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] & 0x80); enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] &= ~0x80; - mask_t succ = DECAF_TRUE; + mask_t succ = gf_deserialize(p->y, enc2, 1); #if 7 == 0 - succ = word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]); + succ &= word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]); #endif - - succ &= gf_deserialize(p->y, enc2, 1); gf_sqr(p->x,p->y); gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ @@ -1247,7 +1245,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( decaf_bzero(enc2,sizeof(enc2)); assert(API_NS(point_valid)(p) || ~succ); - return decaf_succeed_if(succ); + return decaf_succeed_if(mask_to_bool(succ)); } decaf_error_t decaf_x25519 ( diff --git a/src/GENERATED/c/ed448goldilocks/decaf.c b/src/GENERATED/c/ed448goldilocks/decaf.c index d9ba6fc..8dea34c 100644 --- a/src/GENERATED/c/ed448goldilocks/decaf.c +++ b/src/GENERATED/c/ed448goldilocks/decaf.c @@ -1156,12 +1156,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] & 0x80); enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] &= ~0x80; - mask_t succ = DECAF_TRUE; + mask_t succ = gf_deserialize(p->y, enc2, 1); #if 0 == 0 - succ = word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]); + succ &= word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]); #endif - - succ &= gf_deserialize(p->y, enc2, 1); gf_sqr(p->x,p->y); gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ @@ -1247,7 +1245,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( decaf_bzero(enc2,sizeof(enc2)); assert(API_NS(point_valid)(p) || ~succ); - return decaf_succeed_if(succ); + return decaf_succeed_if(mask_to_bool(succ)); } decaf_error_t decaf_x448 ( diff --git a/src/p448/arch_32/f_impl.c b/src/p448/arch_32/f_impl.c index 1abffe7..0770bd9 100644 --- a/src/p448/arch_32/f_impl.c +++ b/src/p448/arch_32/f_impl.c @@ -4,6 +4,14 @@ #include "f_field.h" +#if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) && !I_HATE_UNROLLED_LOOPS) \ + || defined(DECAF_FORCE_UNROLL) +#define REPEAT8(_x) _x _x _x _x _x _x _x _x +#define FOR_LIMB(_i,_start,_end,_x) do { _i=_start; REPEAT8( if (_i<_end) { _x; } _i++;) } while (0) +#else +#define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0) +#endif + void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { const uint32_t *a = as->limb, *b = bs->limb; uint32_t *c = cs->limb; @@ -19,24 +27,24 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { bb[i] = b[i] + b[i+8]; } - for (j=0; j<8; j++) { + FOR_LIMB(j,0,8,{ accum2 = 0; - for (i=0; i<=j; i++) { + FOR_LIMB (i,0,j+1,{ accum2 += widemul(a[j-i],b[i]); accum1 += widemul(aa[j-i],bb[i]); accum0 += widemul(a[8+j-i], b[8+i]); - } + }); accum1 -= accum2; accum0 += accum2; accum2 = 0; - - for (; i<8; i++) { + + FOR_LIMB (i,j+1,8,{ accum0 -= widemul(a[8+j-i], b[i]); accum2 += widemul(aa[8+j-i], bb[i]); accum1 += widemul(a[16+j-i], b[8+i]); - } + }); accum1 += accum2; accum0 += accum2; @@ -46,7 +54,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { accum0 >>= 28; accum1 >>= 28; - } + }); accum0 += accum1; accum0 += c[8]; @@ -66,24 +74,17 @@ void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) { const uint32_t *a = as->limb; uint32_t *c = cs->limb; - uint64_t accum0, accum8; + uint64_t accum0 = 0, accum8 = 0; uint32_t mask = (1ull<<28)-1; int i; - - accum0 = widemul(b, a[0]); - accum8 = widemul(b, a[8]); - - c[0] = accum0 & mask; accum0 >>= 28; - c[8] = accum8 & mask; accum8 >>= 28; - - for (i=1; i<8; i++) { + FOR_LIMB(i,0,8,{ accum0 += widemul(b, a[i]); accum8 += widemul(b, a[i+8]); c[i] = accum0 & mask; accum0 >>= 28; c[i+8] = accum8 & mask; accum8 >>= 28; - } + }); accum0 += accum8 + c[8]; c[8] = accum0 & mask; diff --git a/src/per_curve/decaf.tmpl.c b/src/per_curve/decaf.tmpl.c index 81d5be4..36ddd73 100644 --- a/src/per_curve/decaf.tmpl.c +++ b/src/per_curve/decaf.tmpl.c @@ -1145,12 +1145,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] & 0x80); enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] &= ~0x80; - mask_t succ = DECAF_TRUE; + mask_t succ = gf_deserialize(p->y, enc2, 1); #if $(gf_bits % 8) == 0 - succ = word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1]); + succ &= word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1]); #endif - - succ &= gf_deserialize(p->y, enc2, 1); gf_sqr(p->x,p->y); gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ @@ -1236,7 +1234,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( decaf_bzero(enc2,sizeof(enc2)); assert(API_NS(point_valid)(p) || ~succ); - return decaf_succeed_if(succ); + return decaf_succeed_if(mask_to_bool(succ)); } decaf_error_t decaf_x$(gf_shortname) (