From 196e9a85f8925759132bac15e2e9679e283f23d6 Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Fri, 20 Nov 2015 14:53:09 -0800 Subject: [PATCH] cleanup word.h --- src/decaf_fast.c | 2 - src/include/field.h | 9 +-- src/include/word.h | 159 ++++++++++++++++++++------------------------ 3 files changed, 74 insertions(+), 96 deletions(-) diff --git a/src/decaf_fast.c b/src/decaf_fast.c index d0c95a9..b5562eb 100644 --- a/src/decaf_fast.c +++ b/src/decaf_fast.c @@ -41,8 +41,6 @@ extern const gf SQRT_MINUS_ONE; extern const gf SQRT_ONE_MINUS_D; /* TODO: Intern this? */ #endif -#define NOINLINE __attribute__((noinline)) -#define INLINE inline __attribute__((always_inline)) #define WBITS DECAF_WORD_BITS const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}}; diff --git a/src/include/field.h b/src/include/field.h index 6a13a92..0121c39 100644 --- a/src/include/field.h +++ b/src/include/field.h @@ -22,17 +22,12 @@ * * If x=0, returns 0. */ -void -gf_isr ( - gf a, - const gf x -); +void gf_isr(gf a, const gf x); /** * Square x, n times. */ -static __inline__ void -__attribute__((unused,always_inline)) +static INLINE UNUSED void gf_sqrn ( gf_s *__restrict__ y, const gf x, diff --git a/src/include/word.h b/src/include/word.h index 0464f37..c66cb36 100644 --- a/src/include/word.h +++ b/src/include/word.h @@ -31,7 +31,7 @@ #if (WORD_BITS == 64) typedef uint32_t hword_t; - typedef uint64_t word_t; + typedef uint64_t word_t, mask_t; typedef __uint128_t dword_t; typedef int32_t hsword_t; typedef int64_t sword_t; @@ -49,7 +49,7 @@ #define SC_LIMB(x) (x##ull) #elif (WORD_BITS == 32) typedef uint16_t hword_t; - typedef uint32_t word_t; + typedef uint32_t word_t, mask_t; typedef uint64_t dword_t; typedef int16_t hsword_t; typedef int32_t sword_t; @@ -65,44 +65,41 @@ #define letohWORD letoh32 #define SC_LIMB(x) (x##ull) #else -#error "For now, libdecaf only supports 32- and 64-bit architectures." + #error "For now, libdecaf only supports 32- and 64-bit architectures." #endif -#define DIV_CEIL(_x,_y) (((_x) + (_y) - 1)/(_y)) -#define ROUND_UP(_x,_y) (DIV_CEIL((_x),(_y))*(_y)) -#define WORDS_FOR_BITS(_x) (DIV_CEIL((_x),WORD_BITS)) - -typedef word_t mask_t; -static const mask_t MASK_FAILURE = 0, MASK_SUCCESS = -(mask_t)1; - +/* General utilities */ +#define NOINLINE __attribute__((noinline)) +#define UNUSED __attribute__((unused)) +#define INLINE __inline__ __attribute__((always_inline)) #ifdef __ARM_NEON__ -typedef uint32x4_t vecmask_t; + typedef uint32x4_t vecmask_t; #elif __clang__ -typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2))); -typedef int64_t int64x2_t __attribute__((ext_vector_type(2))); -typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4))); -typedef int64_t int64x4_t __attribute__((ext_vector_type(4))); -typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4))); -typedef int32_t int32x4_t __attribute__((ext_vector_type(4))); -typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2))); -typedef int32_t int32x2_t __attribute__((ext_vector_type(2))); -typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8))); -typedef int32_t int32x8_t __attribute__((ext_vector_type(8))); -typedef word_t vecmask_t __attribute__((ext_vector_type(4))); -#else /* GCC-cleanliness */ -typedef uint64_t uint64x2_t __attribute__((vector_size(16))); -typedef int64_t int64x2_t __attribute__((vector_size(16))); -typedef uint64_t uint64x4_t __attribute__((vector_size(32))); -typedef int64_t int64x4_t __attribute__((vector_size(32))); -typedef uint32_t uint32x4_t __attribute__((vector_size(16))); -typedef int32_t int32x4_t __attribute__((vector_size(16))); -typedef uint32_t uint32x2_t __attribute__((vector_size(8))); -typedef int32_t int32x2_t __attribute__((vector_size(8))); -typedef uint32_t uint32x8_t __attribute__((vector_size(32))); -typedef int32_t int32x8_t __attribute__((vector_size(32))); -typedef word_t vecmask_t __attribute__((vector_size(32))); + typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2))); + typedef int64_t int64x2_t __attribute__((ext_vector_type(2))); + typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4))); + typedef int64_t int64x4_t __attribute__((ext_vector_type(4))); + typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4))); + typedef int32_t int32x4_t __attribute__((ext_vector_type(4))); + typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2))); + typedef int32_t int32x2_t __attribute__((ext_vector_type(2))); + typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8))); + typedef int32_t int32x8_t __attribute__((ext_vector_type(8))); + typedef word_t vecmask_t __attribute__((ext_vector_type(4))); +#else /* GCC, hopefully? */ + typedef uint64_t uint64x2_t __attribute__((vector_size(16))); + typedef int64_t int64x2_t __attribute__((vector_size(16))); + typedef uint64_t uint64x4_t __attribute__((vector_size(32))); + typedef int64_t int64x4_t __attribute__((vector_size(32))); + typedef uint32_t uint32x4_t __attribute__((vector_size(16))); + typedef int32_t int32x4_t __attribute__((vector_size(16))); + typedef uint32_t uint32x2_t __attribute__((vector_size(8))); + typedef int32_t int32x2_t __attribute__((vector_size(8))); + typedef uint32_t uint32x8_t __attribute__((vector_size(32))); + typedef int32_t int32x8_t __attribute__((vector_size(32))); + typedef word_t vecmask_t __attribute__((vector_size(32))); #endif #if __AVX2__ @@ -111,7 +108,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); typedef uint64x4_t uint64xn_t; typedef uint32x8_t uint32xn_t; - static __inline__ big_register_t + static INLINE big_register_t br_set_to_mask(mask_t x) { uint32_t y = (uint32_t)x; big_register_t ret = {y,y,y,y,y,y,y,y}; @@ -123,7 +120,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); typedef uint64x2_t uint64xn_t; typedef uint32x4_t uint32xn_t; - static __inline__ big_register_t + static INLINE big_register_t br_set_to_mask(mask_t x) { uint32_t y = x; big_register_t ret = {y,y,y,y}; @@ -134,7 +131,8 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); typedef uint32x4_t big_register_t; typedef uint64x2_t uint64xn_t; typedef uint32x4_t uint32xn_t; - static __inline__ big_register_t + + static INLINE big_register_t br_set_to_mask(mask_t x) { return vdupq_n_u32(x); } @@ -143,7 +141,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); typedef uint64_t big_register_t, uint64xn_t; typedef uint32_t uint32xn_t; - static __inline__ big_register_t + static INLINE big_register_t br_set_to_mask(mask_t x) { return (big_register_t)x; } @@ -153,7 +151,7 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); typedef uint32_t uint32xn_t; typedef uint32_t big_register_t; - static __inline__ big_register_t + static INLINE big_register_t br_set_to_mask(mask_t x) { return (big_register_t)x; } @@ -170,49 +168,40 @@ typedef struct { /** * Return -1 if x==0, and 0 otherwise. */ -static __inline__ mask_t -__attribute__((always_inline,unused)) +static INLINE UNUSED mask_t word_is_zero(word_t x) { return (mask_t)((((dword_t)(x)) - 1)>>WORD_BITS); } #if __AVX2__ -static __inline__ big_register_t -br_is_zero(big_register_t x) { - return (big_register_t)(x == br_set_to_mask(0)); -} + static INLINE big_register_t + br_is_zero(big_register_t x) { + return (big_register_t)(x == br_set_to_mask(0)); + } #elif __SSE2__ -static __inline__ big_register_t -br_is_zero(big_register_t x) { - return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128()); - //return (big_register_t)(x == br_set_to_mask(0)); -} + static INLINE big_register_t + br_is_zero(big_register_t x) { + return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128()); + //return (big_register_t)(x == br_set_to_mask(0)); + } #elif __ARM_NEON__ -static __inline__ big_register_t -br_is_zero(big_register_t x) { - return vceqq_u32(x,x^x); -} + static INLINE big_register_t + br_is_zero(big_register_t x) { + return vceqq_u32(x,x^x); + } #else -static __inline__ mask_t -br_is_zero(word_t x) { - return (((dword_t)x) - 1)>>WORD_BITS; -} + static INLINE mask_t + br_is_zero(word_t x) { + return (((dword_t)x) - 1)>>WORD_BITS; + } #endif #ifdef __APPLE__ -static inline uint64_t -htobe64 (uint64_t x) { - __asm__ ("bswapq %0" : "+r"(x)); - return x; -} -static inline uint64_t -htole64 (uint64_t x) { return x; } - -static inline uint64_t -letoh64 (uint64_t x) { return x; } + static INLINE uint64_t htole64 (uint64_t x) { return x; } + static INLINE uint64_t letoh64 (uint64_t x) { return x; } #endif /** @@ -230,20 +219,21 @@ letoh64 (uint64_t x) { return x; } #endif #ifdef HAS_MEMSET_S -#ifdef NEED_MEMSET_S_EXTERN -extern int memset_s(void *, size_t, int, size_t); -#endif -static __inline__ void -really_memset(void *p, char c, size_t s) { - memset_s(p, s, c, s); -} + #ifdef NEED_MEMSET_S_EXTERN + extern int memset_s(void *, size_t, int, size_t); + #endif + static INLINE void + really_memset(void *p, char c, size_t s) { + memset_s(p, s, c, s); + } #else -static __inline__ void __attribute__((always_inline,unused)) -really_memset(void *p, char c, size_t s) { - volatile char *pv = (volatile char *)p; - size_t i; - for (i=0; i