From 4218223dd730034e5c47c1d6832a9ed8455bf484 Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Thu, 14 Jan 2016 16:36:30 -0800 Subject: [PATCH] generate most of f_impl.h. Not tested on most arches yet :-( --- src/gen_headers/f_field_h.py | 40 +++++++-- src/p25519/arch_ref64/f_impl.c | 2 +- src/p25519/arch_ref64/f_impl.h | 78 ----------------- src/p25519/arch_x86_64/f_impl.c | 2 +- src/p25519/arch_x86_64/f_impl.h | 90 -------------------- src/p448/arch_32/f_impl.c | 3 +- src/p448/arch_32/f_impl.h | 76 ----------------- src/p448/arch_arm_32/f_impl.c | 3 +- src/p448/arch_arm_32/f_impl.h | 76 ----------------- src/p448/arch_neon_experimental/f_impl.c | 3 +- src/p448/arch_neon_experimental/f_impl.h | 78 +---------------- src/p448/arch_ref64/f_impl.c | 2 +- src/p448/arch_ref64/f_impl.h | 76 ----------------- src/p448/arch_x86_64/f_impl.c | 2 +- src/p448/arch_x86_64/f_impl.h | 79 ----------------- src/p480/arch_x86_64/f_impl.c | 41 +++++---- src/p480/arch_x86_64/f_impl.h | 100 +++++++++------------- src/p521/arch_ref64/f_impl.c | 40 ++++----- src/p521/arch_ref64/f_impl.h | 98 +++++++++------------ src/p521/arch_x86_64_r12/f_impl.c | 40 ++++----- src/p521/arch_x86_64_r12/f_impl.h | 104 +++-------------------- 21 files changed, 196 insertions(+), 837 deletions(-) diff --git a/src/gen_headers/f_field_h.py b/src/gen_headers/f_field_h.py index 85e45f8..8a01e48 100644 --- a/src/gen_headers/f_field_h.py +++ b/src/gen_headers/f_field_h.py @@ -9,22 +9,52 @@ f_field_h = gen_file( #include "constant_time.h" #include -#include "f_impl.h" + +#include "decaf/decaf_%(gf_bits)s.h" /* HACK in genheader */ +#include "word.h" + #define GF_LIT_LIMB_BITS %(gf_lit_limb_bits)d #define GF_BITS %(gf_bits)d #define gf gf_%(gf_shortname)s_t #define gf_s gf_%(gf_shortname)s_s -#define gf_mul gf_%(gf_shortname)s_mul -#define gf_sqr gf_%(gf_shortname)s_sqr +#define gf_copy gf_%(gf_shortname)s_copy #define gf_add_RAW gf_%(gf_shortname)s_add_RAW #define gf_sub_RAW gf_%(gf_shortname)s_sub_RAW -#define gf_mulw gf_%(gf_shortname)s_mulw #define gf_bias gf_%(gf_shortname)s_bias -#define gf_isr gf_%(gf_shortname)s_isr #define gf_weak_reduce gf_%(gf_shortname)s_weak_reduce #define gf_strong_reduce gf_%(gf_shortname)s_strong_reduce +#define gf_mul gf_%(gf_shortname)s_mul +#define gf_sqr gf_%(gf_shortname)s_sqr +#define gf_mulw gf_%(gf_shortname)s_mulw +#define gf_isr gf_%(gf_shortname)s_isr #define gf_serialize gf_%(gf_shortname)s_serialize #define gf_deserialize gf_%(gf_shortname)s_deserialize #define SQRT_MINUS_ONE P%(gf_shortname)s_SQRT_MINUS_ONE /* might not be defined */ + +#define INLINE_UNUSED __inline__ __attribute__((unused,always_inline)) + +#ifdef __cplusplus +extern "C" { +#endif + +/* Defined below in f_impl.h */ +static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; } +static INLINE_UNUSED void gf_add_RAW (gf out, const gf a, const gf b); +static INLINE_UNUSED void gf_sub_RAW (gf out, const gf a, const gf b); +static INLINE_UNUSED void gf_bias (gf inout, int amount); +static INLINE_UNUSED void gf_weak_reduce (gf inout); + +void gf_strong_reduce (gf inout); +void gf_mul (gf_s *__restrict__ out, const gf a, const gf b); +void gf_mulw (gf_s *__restrict__ out, const gf a, uint64_t b); +void gf_sqr (gf_s *__restrict__ out, const gf a); +void gf_serialize (uint8_t *serial, const gf x); +mask_t gf_deserialize (gf x, const uint8_t serial[(GF_BITS-1)/8+1]); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "f_impl.h" /* Bring in the inline implementations */ """) \ No newline at end of file diff --git a/src/p25519/arch_ref64/f_impl.c b/src/p25519/arch_ref64/f_impl.c index d58ba73..8f24012 100644 --- a/src/p25519/arch_ref64/f_impl.c +++ b/src/p25519/arch_ref64/f_impl.c @@ -2,7 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "f_impl.h" +#include "f_field.h" static __inline__ __uint128_t widemul( const uint64_t a, diff --git a/src/p25519/arch_ref64/f_impl.h b/src/p25519/arch_ref64/f_impl.h index 1cb39ce..835151a 100644 --- a/src/p25519/arch_ref64/f_impl.h +++ b/src/p25519/arch_ref64/f_impl.h @@ -14,88 +14,10 @@ #define LBITS 51 #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} -/* -#define FIELD_LITERAL(a,b,c,d) {{ \ - (a##ull) & LMASK, \ - ((a##ull)>>51 | (b##ull)<<13) & LMASK, \ - ((b##ull)>>38 | (c##ull)<<26) & LMASK, \ - ((c##ull)>>25 | (d##ull)<<39) & LMASK, \ - (d##ull)>>12 \ -}} -*/ - #ifdef __cplusplus extern "C" { #endif -static __inline__ void -gf_25519_add_RAW ( - gf_25519_t out, - const gf_25519_t a, - const gf_25519_t b -) __attribute__((unused)); - -static __inline__ void -gf_25519_sub_RAW ( - gf_25519_t out, - const gf_25519_t a, - const gf_25519_t b -) __attribute__((unused)); - -static __inline__ void -gf_25519_copy ( - gf_25519_t out, - const gf_25519_t a -) __attribute__((unused)); - -static __inline__ void -gf_25519_weak_reduce ( - gf_25519_t inout -) __attribute__((unused)); - -void -gf_25519_strong_reduce ( - gf_25519_t inout -); - -static __inline__ void -gf_25519_bias ( - gf_25519_t inout, - int amount -) __attribute__((unused)); - -void -gf_25519_mul ( - gf_25519_s *__restrict__ out, - const gf_25519_t a, - const gf_25519_t b -); - -void -gf_25519_mulw ( - gf_25519_s *__restrict__ out, - const gf_25519_t a, - uint64_t b -); - -void -gf_25519_sqr ( - gf_25519_s *__restrict__ out, - const gf_25519_t a -); - -void -gf_25519_serialize ( - uint8_t serial[32], - const gf_25519_t x -); - -mask_t -gf_25519_deserialize ( - gf_25519_t x, - const uint8_t serial[32] -); - /* -------------- Inline functions begin here -------------- */ void diff --git a/src/p25519/arch_x86_64/f_impl.c b/src/p25519/arch_x86_64/f_impl.c index f5ea715..377252c 100644 --- a/src/p25519/arch_x86_64/f_impl.c +++ b/src/p25519/arch_x86_64/f_impl.c @@ -2,7 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "f_impl.h" +#include "f_field.h" #include "x86-64-arith.h" static inline uint64_t shr(__uint128_t x, int n) { diff --git a/src/p25519/arch_x86_64/f_impl.h b/src/p25519/arch_x86_64/f_impl.h index a90702d..24cbe19 100644 --- a/src/p25519/arch_x86_64/f_impl.h +++ b/src/p25519/arch_x86_64/f_impl.h @@ -14,88 +14,6 @@ #define DECAF_255_LIMB_BITS 51 #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} -/* -#define FIELD_LITERAL(a,b,c,d) {{ \ - (a##ull) & LMASK, \ - ((a##ull)>>51 | (b##ull)<<13) & LMASK, \ - ((b##ull)>>38 | (c##ull)<<26) & LMASK, \ - ((c##ull)>>25 | (d##ull)<<39) & LMASK, \ - (d##ull)>>12 \ -}} -*/ - -#ifdef __cplusplus -extern "C" { -#endif - -static __inline__ void -gf_25519_add_RAW ( - gf_25519_t out, - const gf_25519_t a, - const gf_25519_t b -) __attribute__((unused)); - -static __inline__ void -gf_25519_sub_RAW ( - gf_25519_t out, - const gf_25519_t a, - const gf_25519_t b -) __attribute__((unused)); - -static __inline__ void -gf_25519_copy ( - gf_25519_t out, - const gf_25519_t a -) __attribute__((unused)); - -static __inline__ void -gf_25519_weak_reduce ( - gf_25519_t inout -) __attribute__((unused)); - -void -gf_25519_strong_reduce ( - gf_25519_t inout -); - -static __inline__ void -gf_25519_bias ( - gf_25519_t inout, - int amount -) __attribute__((unused)); - -void -gf_25519_mul ( - gf_25519_s *__restrict__ out, - const gf_25519_t a, - const gf_25519_t b -); - -void -gf_25519_mulw ( - gf_25519_s *__restrict__ out, - const gf_25519_t a, - uint64_t b -); - -void -gf_25519_sqr ( - gf_25519_s *__restrict__ out, - const gf_25519_t a -); - -void -gf_25519_serialize ( - uint8_t serial[32], - const gf_25519_t x -); - -mask_t -gf_25519_deserialize ( - gf_25519_t x, - const uint8_t serial[32] -); - /* -------------- Inline functions begin here -------------- */ void @@ -123,14 +41,6 @@ gf_25519_sub_RAW ( } } -void -gf_25519_copy ( - gf_25519_t out, - const gf_25519_t a -) { - memcpy(out,a,sizeof(*a)); -} - void gf_25519_bias ( gf_25519_t a, diff --git a/src/p448/arch_32/f_impl.c b/src/p448/arch_32/f_impl.c index bedaf38..bd900c6 100644 --- a/src/p448/arch_32/f_impl.c +++ b/src/p448/arch_32/f_impl.c @@ -2,8 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "word.h" -#include "f_impl.h" +#include "f_field.h" static inline mask_t __attribute__((always_inline)) is_zero ( diff --git a/src/p448/arch_32/f_impl.h b/src/p448/arch_32/f_impl.h index ec7a3d0..7d343e0 100644 --- a/src/p448/arch_32/f_impl.h +++ b/src/p448/arch_32/f_impl.h @@ -22,74 +22,6 @@ typedef struct gf_448_s { extern "C" { #endif -static __inline__ void -gf_448_add_RAW ( - gf_448_t out, - const gf_448_t a, - const gf_448_t b -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_sub_RAW ( - gf_448_t out, - const gf_448_t a, - const gf_448_t b -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_copy ( - gf_448_t out, - const gf_448_t a -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_weak_reduce ( - gf_448_t inout -) __attribute__((unused,always_inline)); - -void -gf_448_strong_reduce ( - gf_448_t inout -); - -static __inline__ void -gf_448_bias ( - gf_448_t inout, - int amount -) __attribute__((unused,always_inline)); - -void -gf_448_mul ( - gf_448_s *__restrict__ out, - const gf_448_t a, - const gf_448_t b -); - -void -gf_448_mulw ( - gf_448_s *__restrict__ out, - const gf_448_t a, - uint64_t b -); - -void -gf_448_sqr ( - gf_448_s *__restrict__ out, - const gf_448_t a -); - -void -gf_448_serialize ( - uint8_t *serial, - const gf_448_t x -); - -mask_t -gf_448_deserialize ( - gf_448_t x, - const uint8_t serial[56] -); - /* -------------- Inline functions begin here -------------- */ void @@ -128,14 +60,6 @@ gf_448_sub_RAW ( */ } -void -gf_448_copy ( - gf_448_t out, - const gf_448_t a -) { - *out = *a; -} - void gf_448_bias ( gf_448_t a, diff --git a/src/p448/arch_arm_32/f_impl.c b/src/p448/arch_arm_32/f_impl.c index 7e7150c..ea831f3 100644 --- a/src/p448/arch_arm_32/f_impl.c +++ b/src/p448/arch_arm_32/f_impl.c @@ -2,8 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "word.h" -#include "f_impl.h" +#include "f_field.h" static inline mask_t __attribute__((always_inline)) is_zero ( diff --git a/src/p448/arch_arm_32/f_impl.h b/src/p448/arch_arm_32/f_impl.h index ec7a3d0..7d343e0 100644 --- a/src/p448/arch_arm_32/f_impl.h +++ b/src/p448/arch_arm_32/f_impl.h @@ -22,74 +22,6 @@ typedef struct gf_448_s { extern "C" { #endif -static __inline__ void -gf_448_add_RAW ( - gf_448_t out, - const gf_448_t a, - const gf_448_t b -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_sub_RAW ( - gf_448_t out, - const gf_448_t a, - const gf_448_t b -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_copy ( - gf_448_t out, - const gf_448_t a -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_weak_reduce ( - gf_448_t inout -) __attribute__((unused,always_inline)); - -void -gf_448_strong_reduce ( - gf_448_t inout -); - -static __inline__ void -gf_448_bias ( - gf_448_t inout, - int amount -) __attribute__((unused,always_inline)); - -void -gf_448_mul ( - gf_448_s *__restrict__ out, - const gf_448_t a, - const gf_448_t b -); - -void -gf_448_mulw ( - gf_448_s *__restrict__ out, - const gf_448_t a, - uint64_t b -); - -void -gf_448_sqr ( - gf_448_s *__restrict__ out, - const gf_448_t a -); - -void -gf_448_serialize ( - uint8_t *serial, - const gf_448_t x -); - -mask_t -gf_448_deserialize ( - gf_448_t x, - const uint8_t serial[56] -); - /* -------------- Inline functions begin here -------------- */ void @@ -128,14 +60,6 @@ gf_448_sub_RAW ( */ } -void -gf_448_copy ( - gf_448_t out, - const gf_448_t a -) { - *out = *a; -} - void gf_448_bias ( gf_448_t a, diff --git a/src/p448/arch_neon_experimental/f_impl.c b/src/p448/arch_neon_experimental/f_impl.c index 9282cb4..002ef40 100644 --- a/src/p448/arch_neon_experimental/f_impl.c +++ b/src/p448/arch_neon_experimental/f_impl.c @@ -2,8 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "word.h" -#include "f_impl.h" +#include "f_field.h" static inline mask_t __attribute__((always_inline)) is_zero ( diff --git a/src/p448/arch_neon_experimental/f_impl.h b/src/p448/arch_neon_experimental/f_impl.h index 2b3894d..a88dec2 100644 --- a/src/p448/arch_neon_experimental/f_impl.h +++ b/src/p448/arch_neon_experimental/f_impl.h @@ -27,75 +27,7 @@ typedef struct gf_448_s { #ifdef __cplusplus extern "C" { #endif - -static __inline__ void -gf_448_add_RAW ( - gf_448_t out, - const gf_448_t a, - const gf_448_t b -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_sub_RAW ( - gf_448_t out, - const gf_448_t a, - const gf_448_t b -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_copy ( - gf_448_t out, - const gf_448_t a -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_weak_reduce ( - gf_448_t inout -) __attribute__((unused,always_inline)); - -void -gf_448_strong_reduce ( - gf_448_t inout -); - -static __inline__ void -gf_448_bias ( - gf_448_t inout, - int amount -) __attribute__((unused,always_inline)); - -void -gf_448_mul ( - gf_448_s *__restrict__ out, - const gf_448_t a, - const gf_448_t b -); - -void -gf_448_mulw ( - gf_448_s *__restrict__ out, - const gf_448_t a, - uint64_t b -); - -void -gf_448_sqr ( - gf_448_s *__restrict__ out, - const gf_448_t a -); - -void -gf_448_serialize ( - uint8_t *serial, - const gf_448_t x -); - -mask_t -gf_448_deserialize ( - gf_448_t x, - const uint8_t serial[56] -); - + /* -------------- Inline functions begin here -------------- */ void @@ -128,14 +60,6 @@ gf_448_sub_RAW ( */ } -void -gf_448_copy ( - gf_448_t out, - const gf_448_t a -) { - *out = *a; -} - void gf_448_bias ( gf_448_t a, diff --git a/src/p448/arch_ref64/f_impl.c b/src/p448/arch_ref64/f_impl.c index c03913d..88bef61 100644 --- a/src/p448/arch_ref64/f_impl.c +++ b/src/p448/arch_ref64/f_impl.c @@ -2,7 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "f_impl.h" +#include "f_field.h" static __inline__ __uint128_t widemul( const uint64_t a, diff --git a/src/p448/arch_ref64/f_impl.h b/src/p448/arch_ref64/f_impl.h index 65add03..cf84d72 100644 --- a/src/p448/arch_ref64/f_impl.h +++ b/src/p448/arch_ref64/f_impl.h @@ -21,74 +21,6 @@ typedef struct gf_448_s { extern "C" { #endif -static __inline__ void -gf_448_add_RAW ( - gf_448_t out, - const gf_448_t a, - const gf_448_t b -) __attribute__((unused)); - -static __inline__ void -gf_448_sub_RAW ( - gf_448_t out, - const gf_448_t a, - const gf_448_t b -) __attribute__((unused)); - -static __inline__ void -gf_448_copy ( - gf_448_t out, - const gf_448_t a -) __attribute__((unused)); - -static __inline__ void -gf_448_weak_reduce ( - gf_448_t inout -) __attribute__((unused)); - -void -gf_448_strong_reduce ( - gf_448_t inout -); - -static __inline__ void -gf_448_bias ( - gf_448_t inout, - int amount -) __attribute__((unused)); - -void -gf_448_mul ( - gf_448_s *__restrict__ out, - const gf_448_t a, - const gf_448_t b -); - -void -gf_448_mulw ( - gf_448_s *__restrict__ out, - const gf_448_t a, - uint64_t b -); - -void -gf_448_sqr ( - gf_448_s *__restrict__ out, - const gf_448_t a -); - -void -gf_448_serialize ( - uint8_t *serial, - const gf_448_t x -); - -mask_t -gf_448_deserialize ( - gf_448_t x, - const uint8_t serial[56] -); - /* -------------- Inline functions begin here -------------- */ void @@ -118,14 +50,6 @@ gf_448_sub_RAW ( gf_448_weak_reduce(out); } -void -gf_448_copy ( - gf_448_t out, - const gf_448_t a -) { - memcpy(out,a,sizeof(*a)); -} - void gf_448_bias ( gf_448_t a, diff --git a/src/p448/arch_x86_64/f_impl.c b/src/p448/arch_x86_64/f_impl.c index e044942..9c02d84 100644 --- a/src/p448/arch_x86_64/f_impl.c +++ b/src/p448/arch_x86_64/f_impl.c @@ -2,7 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "f_impl.h" +#include "f_field.h" #include "x86-64-arith.h" void diff --git a/src/p448/arch_x86_64/f_impl.h b/src/p448/arch_x86_64/f_impl.h index 34da14b..0593398 100644 --- a/src/p448/arch_x86_64/f_impl.h +++ b/src/p448/arch_x86_64/f_impl.h @@ -17,74 +17,6 @@ extern "C" { #endif -static __inline__ void -gf_448_add_RAW ( - gf_448_t out, - const gf_448_t a, - const gf_448_t b -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_sub_RAW ( - gf_448_t out, - const gf_448_t a, - const gf_448_t b -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_copy ( - gf_448_t out, - const gf_448_t a -) __attribute__((unused,always_inline)); - -static __inline__ void -gf_448_weak_reduce ( - gf_448_t inout -) __attribute__((unused,always_inline)); - -void -gf_448_strong_reduce ( - gf_448_t inout -); - -static __inline__ void -gf_448_bias ( - gf_448_t inout, - int amount -) __attribute__((unused,always_inline)); - -void -gf_448_mul ( - gf_448_s *__restrict__ out, - const gf_448_t a, - const gf_448_t b -); - -void -gf_448_mulw ( - gf_448_s *__restrict__ out, - const gf_448_t a, - uint64_t b -); - -void -gf_448_sqr ( - gf_448_s *__restrict__ out, - const gf_448_t a -); - -void -gf_448_serialize ( - uint8_t *serial, - const gf_448_t x -); - -mask_t -gf_448_deserialize ( - gf_448_t x, - const uint8_t serial[56] -); - /* -------------- Inline functions begin here -------------- */ void @@ -123,17 +55,6 @@ gf_448_sub_RAW ( */ } -void -gf_448_copy ( - gf_448_t out, - const gf_448_t a -) { - unsigned int i; - for (i=0; ilimb, *b = bs->limb; uint64_t *c = cs->limb; @@ -146,9 +145,9 @@ p480_mul ( } void -p480_mulw ( - p480_t *__restrict__ cs, - const p480_t *as, +gf_480_mulw ( + gf_480_t *__restrict__ cs, + const gf_480_t *as, uint64_t b ) { const uint64_t *a = as->limb; @@ -191,9 +190,9 @@ p480_mulw ( } void -p480_sqr ( - p480_t *__restrict__ cs, - const p480_t *as +gf_480_sqr ( + gf_480_t *__restrict__ cs, + const gf_480_t *as ) { const uint64_t *a = as->limb; uint64_t *c = cs->limb; @@ -306,8 +305,8 @@ p480_sqr ( } void -p480_strong_reduce ( - p480_t *a +gf_480_strong_reduce ( + gf_480_t *a ) { uint64_t mask = (1ull<<60)-1; @@ -349,14 +348,14 @@ p480_strong_reduce ( } void -p480_serialize ( +gf_480_serialize ( uint8_t *serial, - const struct p480_t *x + const struct gf_480_t *x ) { int i,j,k=0; - p480_t red; - p480_copy(&red, x); - p480_strong_reduce(&red); + gf_480_t red; + gf_480_copy(&red, x); + gf_480_strong_reduce(&red); word_t r = 0; for (i=0; i<8; i+=2) { r = red.limb[i]; @@ -375,8 +374,8 @@ p480_serialize ( } mask_t -p480_deserialize ( - p480_t *x, +gf_480_deserialize ( + gf_480_t *x, const uint8_t serial[60] ) { int i,j,k=0; diff --git a/src/p480/arch_x86_64/f_impl.h b/src/p480/arch_x86_64/f_impl.h index c309200..b55ba97 100644 --- a/src/p480/arch_x86_64/f_impl.h +++ b/src/p480/arch_x86_64/f_impl.h @@ -1,97 +1,77 @@ /* Copyright (c) 2014 Cryptography Research, Inc. * Released under the MIT License. See LICENSE.txt for license information. */ -#ifndef __p480_H__ -#define __p480_H__ 1 +#ifndef __gf_480_H__ +#define __gf_480_H__ 1 #include #include #include "word.h" -typedef struct p480_t { +typedef struct gf_480_t { uint64_t limb[8]; -} __attribute__((aligned(32))) p480_t; +} __attribute__((aligned(32))) gf_480_t; #ifdef __cplusplus extern "C" { #endif - -static __inline__ void -p480_add_RAW ( - p480_t *out, - const p480_t *a, - const p480_t *b -) __attribute__((unused,always_inline)); - -static __inline__ void -p480_sub_RAW ( - p480_t *out, - const p480_t *a, - const p480_t *b -) __attribute__((unused,always_inline)); - -static __inline__ void -p480_copy ( - p480_t *out, - const p480_t *a -) __attribute__((unused,always_inline)); static __inline__ void -p480_weak_reduce ( - p480_t *inout +gf_480_weak_reduce ( + gf_480_t *inout ) __attribute__((unused,always_inline)); void -p480_strong_reduce ( - p480_t *inout +gf_480_strong_reduce ( + gf_480_t *inout ); static __inline__ void -p480_bias ( - p480_t *inout, +gf_480_bias ( + gf_480_t *inout, int amount ) __attribute__((unused,always_inline)); void -p480_mul ( - p480_t *__restrict__ out, - const p480_t *a, - const p480_t *b +gf_480_mul ( + gf_480_t *__restrict__ out, + const gf_480_t *a, + const gf_480_t *b ); void -p480_mulw ( - p480_t *__restrict__ out, - const p480_t *a, +gf_480_mulw ( + gf_480_t *__restrict__ out, + const gf_480_t *a, uint64_t b ); void -p480_sqr ( - p480_t *__restrict__ out, - const p480_t *a +gf_480_sqr ( + gf_480_t *__restrict__ out, + const gf_480_t *a ); void -p480_serialize ( +gf_480_serialize ( uint8_t *serial, - const struct p480_t *x + const struct gf_480_t *x ); mask_t -p480_deserialize ( - p480_t *x, +gf_480_deserialize ( + gf_480_t *x, const uint8_t serial[60] ); /* -------------- Inline functions begin here -------------- */ void -p480_add_RAW ( - p480_t *out, - const p480_t *a, - const p480_t *b +gf_480_add_RAW ( + gf_480_t *out, + const gf_480_t *a, + const gf_480_t *b ) { unsigned int i; for (i=0; ilimb; const uint64_t *a = as->limb, *b = bs->limb; @@ -158,9 +158,9 @@ p521_mul ( } void -p521_mulw ( - p521_t *__restrict__ cs, - const p521_t *as, +gf_521_mulw ( + gf_521_t *__restrict__ cs, + const gf_521_t *as, uint64_t b ) { const uint64_t *a = as->limb; @@ -197,9 +197,9 @@ p521_mulw ( } void -p521_sqr ( - p521_t *__restrict__ cs, - const p521_t *as +gf_521_sqr ( + gf_521_t *__restrict__ cs, + const gf_521_t *as ) { uint64_t *c = cs->limb; const uint64_t *a = as->limb; @@ -306,8 +306,8 @@ p521_sqr ( } void -p521_strong_reduce ( - p521_t *a +gf_521_strong_reduce ( + gf_521_t *a ) { uint64_t mask = (1ull<<58)-1, mask2 = (1ull<<57)-1; @@ -347,14 +347,14 @@ p521_strong_reduce ( } void -p521_serialize ( +gf_521_serialize ( uint8_t *serial, - const struct p521_t *x + const struct gf_521_t *x ) { int i,k=0; - p521_t red; - p521_copy(&red, x); - p521_strong_reduce(&red); + gf_521_t red; + gf_521_copy(&red, x); + gf_521_strong_reduce(&red); uint64_t r=0; int bits = 0; @@ -371,8 +371,8 @@ p521_serialize ( } mask_t -p521_deserialize ( - p521_t *x, +gf_521_deserialize ( + gf_521_t *x, const uint8_t serial[66] ) { int i,k=0,bits=0; diff --git a/src/p521/arch_ref64/f_impl.h b/src/p521/arch_ref64/f_impl.h index 2b63f13..512b1d9 100644 --- a/src/p521/arch_ref64/f_impl.h +++ b/src/p521/arch_ref64/f_impl.h @@ -10,122 +10,102 @@ #include "word.h" -typedef struct p521_t { +typedef struct gf_521_t { uint64_t limb[9]; -} p521_t; +} gf_521_t; #ifdef __cplusplus extern "C" { #endif - -static __inline__ void -p521_add_RAW ( - p521_t *out, - const p521_t *a, - const p521_t *b -) __attribute__((unused)); - -static __inline__ void -p521_sub_RAW ( - p521_t *out, - const p521_t *a, - const p521_t *b -) __attribute__((unused)); - -static __inline__ void -p521_copy ( - p521_t *out, - const p521_t *a -) __attribute__((unused)); static __inline__ void -p521_weak_reduce ( - p521_t *inout +gf_521_weak_reduce ( + gf_521_t *inout ) __attribute__((unused)); void -p521_strong_reduce ( - p521_t *inout +gf_521_strong_reduce ( + gf_521_t *inout ); static __inline__ void -p521_bias ( - p521_t *inout, +gf_521_bias ( + gf_521_t *inout, int amount ) __attribute__((unused)); void -p521_mul ( - p521_t *__restrict__ out, - const p521_t *a, - const p521_t *b +gf_521_mul ( + gf_521_t *__restrict__ out, + const gf_521_t *a, + const gf_521_t *b ); void -p521_mulw ( - p521_t *__restrict__ out, - const p521_t *a, +gf_521_mulw ( + gf_521_t *__restrict__ out, + const gf_521_t *a, uint64_t b ); void -p521_sqr ( - p521_t *__restrict__ out, - const p521_t *a +gf_521_sqr ( + gf_521_t *__restrict__ out, + const gf_521_t *a ); void -p521_serialize ( +gf_521_serialize ( uint8_t *serial, - const struct p521_t *x + const struct gf_521_t *x ); mask_t -p521_deserialize ( - p521_t *x, +gf_521_deserialize ( + gf_521_t *x, const uint8_t serial[66] ); /* -------------- Inline functions begin here -------------- */ void -p521_add_RAW ( - p521_t *out, - const p521_t *a, - const p521_t *b +gf_521_add_RAW ( + gf_521_t *out, + const gf_521_t *a, + const gf_521_t *b ) { unsigned int i; for (i=0; i<9; i++) { out->limb[i] = a->limb[i] + b->limb[i]; } - p521_weak_reduce(out); + gf_521_weak_reduce(out); } void -p521_sub_RAW ( - p521_t *out, - const p521_t *a, - const p521_t *b +gf_521_sub_RAW ( + gf_521_t *out, + const gf_521_t *a, + const gf_521_t *b ) { unsigned int i; uint64_t co1 = ((1ull<<58)-1)*4, co2 = ((1ull<<57)-1)*4; for (i=0; i<9; i++) { out->limb[i] = a->limb[i] - b->limb[i] + ((i==8) ? co2 : co1); } - p521_weak_reduce(out); + gf_521_weak_reduce(out); } void -p521_copy ( - p521_t *out, - const p521_t *a +gf_521_copy ( + gf_521_t *out, + const gf_521_t *a ) { memcpy(out,a,sizeof(*a)); } void -p521_bias ( - p521_t *a, +gf_521_bias ( + gf_521_t *a, int amt ) { (void) a; @@ -133,8 +113,8 @@ p521_bias ( } void -p521_weak_reduce ( - p521_t *a +gf_521_weak_reduce ( + gf_521_t *a ) { uint64_t mask = (1ull<<58) - 1; uint64_t tmp = a->limb[8] >> 57; diff --git a/src/p521/arch_x86_64_r12/f_impl.c b/src/p521/arch_x86_64_r12/f_impl.c index ba3e77b..0b42a4b 100644 --- a/src/p521/arch_x86_64_r12/f_impl.c +++ b/src/p521/arch_x86_64_r12/f_impl.c @@ -2,7 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "f_impl.h" +#include "f_field.h" typedef struct { uint64x3_t lo, hi, hier; @@ -168,10 +168,10 @@ static inline void hexad_sqr_signed ( void -p521_mul ( - p521_t *__restrict__ cs, - const p521_t *as, - const p521_t *bs +gf_521_mul ( + gf_521_t *__restrict__ cs, + const gf_521_t *as, + const gf_521_t *bs ) { int i; @@ -254,9 +254,9 @@ p521_mul ( void -p521_sqr ( - p521_t *__restrict__ cs, - const p521_t *as +gf_521_sqr ( + gf_521_t *__restrict__ cs, + const gf_521_t *as ) { @@ -313,9 +313,9 @@ p521_sqr ( } void -p521_mulw ( - p521_t *__restrict__ cs, - const p521_t *as, +gf_521_mulw ( + gf_521_t *__restrict__ cs, + const gf_521_t *as, uint64_t b ) { @@ -375,8 +375,8 @@ p521_mulw ( void -p521_strong_reduce ( - p521_t *a +gf_521_strong_reduce ( + gf_521_t *a ) { uint64_t mask = (1ull<<58)-1, mask2 = (1ull<<57)-1; @@ -418,14 +418,14 @@ p521_strong_reduce ( } void -p521_serialize ( +gf_521_serialize ( uint8_t *serial, - const struct p521_t *x + const struct gf_521_t *x ) { unsigned int i,k=0; - p521_t red; - p521_copy(&red, x); - p521_strong_reduce(&red); + gf_521_t red; + gf_521_copy(&red, x); + gf_521_strong_reduce(&red); uint64_t r=0; int bits = 0; @@ -442,8 +442,8 @@ p521_serialize ( } mask_t -p521_deserialize ( - p521_t *x, +gf_521_deserialize ( + gf_521_t *x, const uint8_t serial[LIMBPERM(66)] ) { int i,k=0,bits=0; diff --git a/src/p521/arch_x86_64_r12/f_impl.h b/src/p521/arch_x86_64_r12/f_impl.h index 14ecb3f..4616b71 100644 --- a/src/p521/arch_x86_64_r12/f_impl.h +++ b/src/p521/arch_x86_64_r12/f_impl.h @@ -14,82 +14,14 @@ #define LIMBPERM(x) (((x)%3)*4 + (x)/3) #define USE_P521_3x3_TRANSPOSE -typedef struct p521_t { +typedef struct gf_521_s { uint64_t limb[12]; -} __attribute__((aligned(32))) p521_t; +} __attribute__((aligned(32))) gf_521_t; #ifdef __cplusplus extern "C" { #endif -static __inline__ void -p521_add_RAW ( - p521_t *out, - const p521_t *a, - const p521_t *b -) __attribute__((unused)); - -static __inline__ void -p521_sub_RAW ( - p521_t *out, - const p521_t *a, - const p521_t *b -) __attribute__((unused)); - -static __inline__ void -p521_copy ( - p521_t *out, - const p521_t *a -) __attribute__((unused)); - -static __inline__ void -p521_weak_reduce ( - p521_t *inout -) __attribute__((unused)); - -void -p521_strong_reduce ( - p521_t *inout -); - -static __inline__ void -p521_bias ( - p521_t *inout, - int amount -) __attribute__((unused)); - -void -p521_mul ( - p521_t *__restrict__ out, - const p521_t *a, - const p521_t *b -); - -void -p521_mulw ( - p521_t *__restrict__ out, - const p521_t *a, - uint64_t b -); - -void -p521_sqr ( - p521_t *__restrict__ out, - const p521_t *a -); - -void -p521_serialize ( - uint8_t *serial, - const struct p521_t *x -); - -mask_t -p521_deserialize ( - p521_t *x, - const uint8_t serial[66] -); - /* -------------- Inline functions begin here -------------- */ typedef uint64x4_t uint64x3_t; /* fit it in a vector register */ @@ -106,10 +38,10 @@ timesW ( } void -p521_add_RAW ( - p521_t *out, - const p521_t *a, - const p521_t *b +gf_521_add_RAW ( + gf_521_t *out, + const gf_521_t *a, + const gf_521_t *b ) { unsigned int i; for (i=0; i