| @@ -4,14 +4,12 @@ | |||
| #ifndef __P25519_H__ | |||
| #define __P25519_H__ 1 | |||
| #include "f_field.h" | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| #include <string.h> | |||
| #include "decaf/decaf_255.h" | |||
| #include "word.h" | |||
| #define LBITS 51 | |||
| #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | |||
| #ifdef __cplusplus | |||
| @@ -20,54 +18,29 @@ extern "C" { | |||
| /* -------------- Inline functions begin here -------------- */ | |||
| void | |||
| gf_25519_add_RAW ( | |||
| gf_25519_t out, | |||
| const gf_25519_t a, | |||
| const gf_25519_t b | |||
| ) { | |||
| void gf_add_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<5; i++) { | |||
| out->limb[i] = a->limb[i] + b->limb[i]; | |||
| } | |||
| gf_25519_weak_reduce(out); | |||
| gf_weak_reduce(out); | |||
| } | |||
| void | |||
| gf_25519_sub_RAW ( | |||
| gf_25519_t out, | |||
| const gf_25519_t a, | |||
| const gf_25519_t b | |||
| ) { | |||
| void gf_sub_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; | |||
| for (i=0; i<5; i++) { | |||
| out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co2 : co1); | |||
| } | |||
| gf_25519_weak_reduce(out); | |||
| } | |||
| void | |||
| gf_25519_copy ( | |||
| gf_25519_t out, | |||
| const gf_25519_t a | |||
| ) { | |||
| memcpy(out,a,sizeof(*a)); | |||
| gf_weak_reduce(out); | |||
| } | |||
| void | |||
| gf_25519_bias ( | |||
| gf_25519_t a, | |||
| int amt | |||
| ) { | |||
| void gf_bias (gf a, int amt) { | |||
| (void) a; | |||
| (void) amt; | |||
| } | |||
| void | |||
| gf_25519_weak_reduce ( | |||
| gf_25519_t a | |||
| ) { | |||
| void gf_weak_reduce (gf a) { | |||
| uint64_t mask = (1ull<<51) - 1; | |||
| uint64_t tmp = a->limb[4] >> 51; | |||
| int i; | |||
| @@ -4,36 +4,24 @@ | |||
| #ifndef __P25519_H__ | |||
| #define __P25519_H__ 1 | |||
| #include "f_field.h" | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| #include <string.h> | |||
| #include "decaf/decaf_255.h" | |||
| #include "word.h" | |||
| #define DECAF_255_LIMB_BITS 51 | |||
| #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | |||
| /* -------------- Inline functions begin here -------------- */ | |||
| void | |||
| gf_25519_add_RAW ( | |||
| gf_25519_t out, | |||
| const gf_25519_t a, | |||
| const gf_25519_t b | |||
| ) { | |||
| void gf_add_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<5; i++) { | |||
| out->limb[i] = a->limb[i] + b->limb[i]; | |||
| } | |||
| } | |||
| void | |||
| gf_25519_sub_RAW ( | |||
| gf_25519_t out, | |||
| const gf_25519_t a, | |||
| const gf_25519_t b | |||
| ) { | |||
| void gf_sub_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; | |||
| for (i=0; i<5; i++) { | |||
| @@ -41,11 +29,7 @@ gf_25519_sub_RAW ( | |||
| } | |||
| } | |||
| void | |||
| gf_25519_bias ( | |||
| gf_25519_t a, | |||
| int amt | |||
| ) { | |||
| void gf_bias (gf a, int amt) { | |||
| a->limb[0] += ((uint64_t)(amt)<<52) - 38*amt; | |||
| int i; | |||
| for (i=1; i<5; i++) { | |||
| @@ -53,10 +37,7 @@ gf_25519_bias ( | |||
| } | |||
| } | |||
| void | |||
| gf_25519_weak_reduce ( | |||
| gf_25519_t a | |||
| ) { | |||
| void gf_weak_reduce (gf a) { | |||
| uint64_t mask = (1ull<<51) - 1; | |||
| uint64_t tmp = a->limb[4] >> 51; | |||
| int i; | |||
| @@ -4,17 +4,12 @@ | |||
| #ifndef __P448_H__ | |||
| #define __P448_H__ 1 | |||
| #include "word.h" | |||
| #include "f_field.h" | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| typedef struct gf_448_s { | |||
| uint32_t limb[16]; | |||
| } __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; | |||
| #define LBITS 28 | |||
| #define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS | |||
| #define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 | |||
| #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | |||
| {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} | |||
| @@ -24,12 +19,7 @@ extern "C" { | |||
| /* -------------- Inline functions begin here -------------- */ | |||
| void | |||
| gf_448_add_RAW ( | |||
| gf_448_t out, | |||
| const gf_448_t a, | |||
| const gf_448_t b | |||
| ) { | |||
| void gf_add_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | |||
| ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; | |||
| @@ -42,12 +32,7 @@ gf_448_add_RAW ( | |||
| */ | |||
| } | |||
| void | |||
| gf_448_sub_RAW ( | |||
| gf_448_t out, | |||
| const gf_448_t a, | |||
| const gf_448_t b | |||
| ) { | |||
| void gf_sub_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | |||
| ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; | |||
| @@ -60,11 +45,7 @@ gf_448_sub_RAW ( | |||
| */ | |||
| } | |||
| void | |||
| gf_448_bias ( | |||
| gf_448_t a, | |||
| int amt | |||
| ) { | |||
| void gf_bias (gf a, int amt) { | |||
| uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; | |||
| uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; | |||
| uint32x4_t *aa = (uint32x4_t*) a; | |||
| @@ -74,10 +55,7 @@ gf_448_bias ( | |||
| aa[3] += lo; | |||
| } | |||
| void | |||
| gf_448_weak_reduce ( | |||
| gf_448_t a | |||
| ) { | |||
| void gf_weak_reduce (gf a) { | |||
| uint64_t mask = (1ull<<28) - 1; | |||
| uint64_t tmp = a->limb[15] >> 28; | |||
| int i; | |||
| @@ -4,17 +4,12 @@ | |||
| #ifndef __P448_H__ | |||
| #define __P448_H__ 1 | |||
| #include "word.h" | |||
| #include "f_field.h" | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| typedef struct gf_448_s { | |||
| uint32_t limb[16]; | |||
| } __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; | |||
| #define LBITS 28 | |||
| #define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS | |||
| #define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 | |||
| #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | |||
| {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} | |||
| @@ -24,12 +19,7 @@ extern "C" { | |||
| /* -------------- Inline functions begin here -------------- */ | |||
| void | |||
| gf_448_add_RAW ( | |||
| gf_448_t out, | |||
| const gf_448_t a, | |||
| const gf_448_t b | |||
| ) { | |||
| void gf_add_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | |||
| ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; | |||
| @@ -42,12 +32,7 @@ gf_448_add_RAW ( | |||
| */ | |||
| } | |||
| void | |||
| gf_448_sub_RAW ( | |||
| gf_448_t out, | |||
| const gf_448_t a, | |||
| const gf_448_t b | |||
| ) { | |||
| void gf_sub_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | |||
| ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; | |||
| @@ -60,11 +45,7 @@ gf_448_sub_RAW ( | |||
| */ | |||
| } | |||
| void | |||
| gf_448_bias ( | |||
| gf_448_t a, | |||
| int amt | |||
| ) { | |||
| void gf_bias (gf a, int amt) { | |||
| uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; | |||
| uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; | |||
| uint32x4_t *aa = (uint32x4_t*) a; | |||
| @@ -74,10 +55,7 @@ gf_448_bias ( | |||
| aa[3] += lo; | |||
| } | |||
| void | |||
| gf_448_weak_reduce ( | |||
| gf_448_t a | |||
| ) { | |||
| void gf_weak_reduce (gf a) { | |||
| uint64_t mask = (1ull<<28) - 1; | |||
| uint64_t tmp = a->limb[15] >> 28; | |||
| int i; | |||
| @@ -4,20 +4,15 @@ | |||
| #ifndef __P448_H__ | |||
| #define __P448_H__ 1 | |||
| #include "word.h" | |||
| #include "f_field.h" | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| typedef struct gf_448_s { | |||
| uint32_t limb[16]; | |||
| } __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; | |||
| #define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15) | |||
| #define USE_NEON_PERM 1 | |||
| #define LBITS 28 | |||
| #define LIMBHI(x) ((x##ull)>>LBITS) | |||
| #define LIMBLO(x) ((x##ull)&((1ull<<LBITS)-1)) | |||
| #define LIMBHI(x) ((x##ull)>>28) | |||
| #define LIMBLO(x) ((x##ull)&((1ull<<28)-1)) | |||
| # define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | |||
| {{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \ | |||
| LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \ | |||
| @@ -30,24 +25,14 @@ extern "C" { | |||
| /* -------------- Inline functions begin here -------------- */ | |||
| void | |||
| gf_448_add_RAW ( | |||
| gf_448_t out, | |||
| const gf_448_t a, | |||
| const gf_448_t b | |||
| ) { | |||
| void gf_add_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | |||
| ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i]; | |||
| } | |||
| } | |||
| void | |||
| gf_448_sub_RAW ( | |||
| gf_448_t out, | |||
| const gf_448_t a, | |||
| const gf_448_t b | |||
| ) { | |||
| void gf_sub_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | |||
| ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i]; | |||
| @@ -60,11 +45,7 @@ gf_448_sub_RAW ( | |||
| */ | |||
| } | |||
| void | |||
| gf_448_bias ( | |||
| gf_448_t a, | |||
| int amt | |||
| ) { | |||
| void gf_bias (gf a, int amt) { | |||
| uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; | |||
| uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1}; | |||
| uint32x4_t *aa = (uint32x4_t*) a; | |||
| @@ -74,10 +55,7 @@ gf_448_bias ( | |||
| aa[3] += hi; | |||
| } | |||
| void | |||
| gf_448_weak_reduce ( | |||
| gf_448_t a | |||
| ) { | |||
| void gf_weak_reduce (gf a) { | |||
| uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1}, | |||
| tmp = vshr_n_u32(aa[7],28); | |||
| @@ -4,17 +4,12 @@ | |||
| #ifndef __P448_H__ | |||
| #define __P448_H__ 1 | |||
| #include "f_field.h" | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| #include <string.h> | |||
| #include "word.h" | |||
| typedef struct gf_448_s { | |||
| uint64_t limb[8]; | |||
| } __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; | |||
| #define LBITS 56 | |||
| #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} | |||
| #ifdef __cplusplus | |||
| @@ -23,46 +18,29 @@ extern "C" { | |||
| /* -------------- Inline functions begin here -------------- */ | |||
| void | |||
| gf_448_add_RAW ( | |||
| gf_448_t out, | |||
| const gf_448_t a, | |||
| const gf_448_t b | |||
| ) { | |||
| void gf_add_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<8; i++) { | |||
| out->limb[i] = a->limb[i] + b->limb[i]; | |||
| } | |||
| gf_448_weak_reduce(out); | |||
| gf_weak_reduce(out); | |||
| } | |||
| void | |||
| gf_448_sub_RAW ( | |||
| gf_448_t out, | |||
| const gf_448_t a, | |||
| const gf_448_t b | |||
| ) { | |||
| void gf_sub_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2; | |||
| for (i=0; i<8; i++) { | |||
| out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1); | |||
| } | |||
| gf_448_weak_reduce(out); | |||
| gf_weak_reduce(out); | |||
| } | |||
| void | |||
| gf_448_bias ( | |||
| gf_448_t a, | |||
| int amt | |||
| ) { | |||
| void gf_bias (gf a, int amt) { | |||
| (void) a; | |||
| (void) amt; | |||
| } | |||
| void | |||
| gf_448_weak_reduce ( | |||
| gf_448_t a | |||
| ) { | |||
| void gf_weak_reduce (gf a) { | |||
| uint64_t mask = (1ull<<56) - 1; | |||
| uint64_t tmp = a->limb[7] >> 56; | |||
| int i; | |||
| @@ -4,13 +4,11 @@ | |||
| #ifndef __P448_H__ | |||
| #define __P448_H__ 1 | |||
| #include "f_field.h" | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| #include "decaf/decaf_448.h" | |||
| #include "word.h" | |||
| #define LBITS 56 | |||
| #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} | |||
| #ifdef __cplusplus | |||
| @@ -19,12 +17,7 @@ extern "C" { | |||
| /* -------------- Inline functions begin here -------------- */ | |||
| void | |||
| gf_448_add_RAW ( | |||
| gf_448_t out, | |||
| const gf_448_t a, | |||
| const gf_448_t b | |||
| ) { | |||
| void gf_add_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | |||
| ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | |||
| @@ -37,12 +30,7 @@ gf_448_add_RAW ( | |||
| */ | |||
| } | |||
| void | |||
| gf_448_sub_RAW ( | |||
| gf_448_t out, | |||
| const gf_448_t a, | |||
| const gf_448_t b | |||
| ) { | |||
| void gf_sub_RAW (gf out, const gf a, const gf b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | |||
| ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | |||
| @@ -55,11 +43,7 @@ gf_448_sub_RAW ( | |||
| */ | |||
| } | |||
| void | |||
| gf_448_bias ( | |||
| gf_448_t a, | |||
| int amt | |||
| ) { | |||
| void gf_bias (gf a, int amt) { | |||
| uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt; | |||
| #if __AVX2__ | |||
| @@ -82,10 +66,7 @@ gf_448_bias ( | |||
| #endif | |||
| } | |||
| void | |||
| gf_448_weak_reduce ( | |||
| gf_448_t a | |||
| ) { | |||
| void gf_weak_reduce (gf a) { | |||
| /* PERF: use pshufb/palignr if anyone cares about speed of this */ | |||
| uint64_t mask = (1ull<<56) - 1; | |||
| uint64_t tmp = a->limb[7] >> 56; | |||
| @@ -1,78 +1,23 @@ | |||
| /* Copyright (c) 2014 Cryptography Research, Inc. | |||
| * Released under the MIT License. See LICENSE.txt for license information. | |||
| */ | |||
| #ifndef __gf_480_H__ | |||
| #define __gf_480_H__ 1 | |||
| #ifndef __gf_H__ | |||
| #define __gf_H__ 1 | |||
| #include "f_field.h" | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| #include "word.h" | |||
| typedef struct gf_480_t { | |||
| uint64_t limb[8]; | |||
| } __attribute__((aligned(32))) gf_480_t; | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| static __inline__ void | |||
| gf_480_weak_reduce ( | |||
| gf_480_t *inout | |||
| ) __attribute__((unused,always_inline)); | |||
| void | |||
| gf_480_strong_reduce ( | |||
| gf_480_t *inout | |||
| ); | |||
| static __inline__ void | |||
| gf_480_bias ( | |||
| gf_480_t *inout, | |||
| int amount | |||
| ) __attribute__((unused,always_inline)); | |||
| void | |||
| gf_480_mul ( | |||
| gf_480_t *__restrict__ out, | |||
| const gf_480_t *a, | |||
| const gf_480_t *b | |||
| ); | |||
| void | |||
| gf_480_mulw ( | |||
| gf_480_t *__restrict__ out, | |||
| const gf_480_t *a, | |||
| uint64_t b | |||
| ); | |||
| void | |||
| gf_480_sqr ( | |||
| gf_480_t *__restrict__ out, | |||
| const gf_480_t *a | |||
| ); | |||
| void | |||
| gf_480_serialize ( | |||
| uint8_t *serial, | |||
| const struct gf_480_t *x | |||
| ); | |||
| mask_t | |||
| gf_480_deserialize ( | |||
| gf_480_t *x, | |||
| const uint8_t serial[60] | |||
| ); | |||
| /* -------------- Inline functions begin here -------------- */ | |||
| void | |||
| gf_480_add_RAW ( | |||
| gf_480_t *out, | |||
| const gf_480_t *a, | |||
| const gf_480_t *b | |||
| ) { | |||
| void gf_add_RAW (gf *out, const gf *a, const gf *b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | |||
| ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | |||
| @@ -85,12 +30,7 @@ gf_480_add_RAW ( | |||
| */ | |||
| } | |||
| void | |||
| gf_480_sub_RAW ( | |||
| gf_480_t *out, | |||
| const gf_480_t *a, | |||
| const gf_480_t *b | |||
| ) { | |||
| void gf_sub_RAW (gf *out, const gf *a, const gf *b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | |||
| ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | |||
| @@ -103,21 +43,15 @@ gf_480_sub_RAW ( | |||
| */ | |||
| } | |||
| void | |||
| gf_480_copy ( | |||
| gf_480_t *out, | |||
| const gf_480_t *a | |||
| ) { | |||
| void gf_copy (gf *out, const gf *a) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) { | |||
| ((big_register_t *)out)[i] = ((const big_register_t *)a)[i]; | |||
| } | |||
| } | |||
| void | |||
| gf_480_bias ( | |||
| gf_480_t *a, | |||
| int amt | |||
| void gf_bias ( | |||
| gf *a, int amt | |||
| ) { | |||
| uint64_t co1 = ((1ull<<60)-1)*amt, co2 = co1-amt; | |||
| @@ -141,10 +75,7 @@ gf_480_bias ( | |||
| #endif | |||
| } | |||
| void | |||
| gf_480_weak_reduce ( | |||
| gf_480_t *a | |||
| ) { | |||
| void gf_weak_reduce (gf *a) { | |||
| /* PERF: use pshufb/palignr if anyone cares about speed of this */ | |||
| uint64_t mask = (1ull<<60) - 1; | |||
| uint64_t tmp = a->limb[7] >> 60; | |||
| @@ -160,4 +91,4 @@ gf_480_weak_reduce ( | |||
| }; /* extern "C" */ | |||
| #endif | |||
| #endif /* __gf_480_H__ */ | |||
| #endif /* __gf_H__ */ | |||
| @@ -4,118 +4,41 @@ | |||
| #ifndef __P521_H__ | |||
| #define __P521_H__ 1 | |||
| #include "f_field.h" | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| #include <string.h> | |||
| #include "word.h" | |||
| typedef struct gf_521_t { | |||
| uint64_t limb[9]; | |||
| } gf_521_t; | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| static __inline__ void | |||
| gf_521_weak_reduce ( | |||
| gf_521_t *inout | |||
| ) __attribute__((unused)); | |||
| void | |||
| gf_521_strong_reduce ( | |||
| gf_521_t *inout | |||
| ); | |||
| static __inline__ void | |||
| gf_521_bias ( | |||
| gf_521_t *inout, | |||
| int amount | |||
| ) __attribute__((unused)); | |||
| void | |||
| gf_521_mul ( | |||
| gf_521_t *__restrict__ out, | |||
| const gf_521_t *a, | |||
| const gf_521_t *b | |||
| ); | |||
| void | |||
| gf_521_mulw ( | |||
| gf_521_t *__restrict__ out, | |||
| const gf_521_t *a, | |||
| uint64_t b | |||
| ); | |||
| void | |||
| gf_521_sqr ( | |||
| gf_521_t *__restrict__ out, | |||
| const gf_521_t *a | |||
| ); | |||
| void | |||
| gf_521_serialize ( | |||
| uint8_t *serial, | |||
| const struct gf_521_t *x | |||
| ); | |||
| mask_t | |||
| gf_521_deserialize ( | |||
| gf_521_t *x, | |||
| const uint8_t serial[66] | |||
| ); | |||
| /* -------------- Inline functions begin here -------------- */ | |||
| void | |||
| gf_521_add_RAW ( | |||
| gf_521_t *out, | |||
| const gf_521_t *a, | |||
| const gf_521_t *b | |||
| ) { | |||
| void gf_add_RAW (gf *out, const gf *a, const gf *b) { | |||
| unsigned int i; | |||
| for (i=0; i<9; i++) { | |||
| out->limb[i] = a->limb[i] + b->limb[i]; | |||
| } | |||
| gf_521_weak_reduce(out); | |||
| gf_weak_reduce(out); | |||
| } | |||
| void | |||
| gf_521_sub_RAW ( | |||
| gf_521_t *out, | |||
| const gf_521_t *a, | |||
| const gf_521_t *b | |||
| ) { | |||
| void gf_sub_RAW (gf *out, const gf *a, const gf *b) { | |||
| unsigned int i; | |||
| uint64_t co1 = ((1ull<<58)-1)*4, co2 = ((1ull<<57)-1)*4; | |||
| for (i=0; i<9; i++) { | |||
| out->limb[i] = a->limb[i] - b->limb[i] + ((i==8) ? co2 : co1); | |||
| } | |||
| gf_521_weak_reduce(out); | |||
| } | |||
| void | |||
| gf_521_copy ( | |||
| gf_521_t *out, | |||
| const gf_521_t *a | |||
| ) { | |||
| memcpy(out,a,sizeof(*a)); | |||
| gf_weak_reduce(out); | |||
| } | |||
| void | |||
| gf_521_bias ( | |||
| gf_521_t *a, | |||
| int amt | |||
| ) { | |||
| void gf_bias (gf *a, int amt) { | |||
| (void) a; | |||
| (void) amt; | |||
| } | |||
| void | |||
| gf_521_weak_reduce ( | |||
| gf_521_t *a | |||
| ) { | |||
| void gf_weak_reduce (gf *a) { | |||
| uint64_t mask = (1ull<<58) - 1; | |||
| uint64_t tmp = a->limb[8] >> 57; | |||
| int i; | |||
| @@ -4,20 +4,18 @@ | |||
| #ifndef __P521_H__ | |||
| #define __P521_H__ 1 | |||
| #include "f_field.h" | |||
| #include <stdint.h> | |||
| #include <assert.h> | |||
| #include <string.h> | |||
| #include "word.h" | |||
| #include "constant_time.h" | |||
| /* FIXME: Currenmtlty desn't work at all, because the struct is declared [9] and not [12] */ | |||
| #define LIMBPERM(x) (((x)%3)*4 + (x)/3) | |||
| #define USE_P521_3x3_TRANSPOSE | |||
| typedef struct gf_521_s { | |||
| uint64_t limb[12]; | |||
| } __attribute__((aligned(32))) gf_521_t; | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| @@ -29,43 +27,25 @@ typedef uint64x4_t uint64x3_t; /* fit it in a vector register */ | |||
| static const uint64x3_t mask58 = { (1ull<<58) - 1, (1ull<<58) - 1, (1ull<<58) - 1, 0 }; | |||
| /* Currently requires CLANG. Sorry. */ | |||
| static inline uint64x3_t | |||
| __attribute__((unused)) | |||
| timesW ( | |||
| uint64x3_t u | |||
| ) { | |||
| return u.zxyw + u.zwww; | |||
| static inline uint64x3_t timesW (uint64x3_t u) { | |||
| return u.zxyw + u.zwww; | |||
| } | |||
| void | |||
| gf_521_add_RAW ( | |||
| gf_521_t *out, | |||
| const gf_521_t *a, | |||
| const gf_521_t *b | |||
| ) { | |||
| void gf_add_RAW (gf *out, const gf *a, const gf *b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | |||
| ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | |||
| } | |||
| } | |||
| void | |||
| gf_521_sub_RAW ( | |||
| gf_521_t *out, | |||
| const gf_521_t *a, | |||
| const gf_521_t *b | |||
| ) { | |||
| void gf_sub_RAW (gf *out, const gf *a, const gf *b) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | |||
| ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | |||
| } | |||
| } | |||
| void | |||
| gf_521_bias ( | |||
| gf_521_t *a, | |||
| int amt | |||
| ) { | |||
| void gf_bias (gf *a, int amt) { | |||
| uint64_t co0 = ((1ull<<58)-2)*amt, co1 = ((1ull<<58)-1)*amt; | |||
| uint64x4_t vlo = { co0, co1, co1, 0 }, vhi = { co1, co1, co1, 0 }; | |||
| ((uint64x4_t*)a)[0] += vlo; | |||
| @@ -73,10 +53,7 @@ gf_521_bias ( | |||
| ((uint64x4_t*)a)[2] += vhi; | |||
| } | |||
| void | |||
| gf_521_weak_reduce ( | |||
| gf_521_t *a | |||
| ) { | |||
| void gf_weak_reduce (gf *a) { | |||
| #if 0 | |||
| int i; | |||
| assert(a->limb[3] == 0 && a->limb[7] == 0 && a->limb[11] == 0); | |||
| @@ -84,7 +61,6 @@ gf_521_weak_reduce ( | |||
| assert(a->limb[i] < 3ull<<61); | |||
| } | |||
| #endif | |||
| uint64x3_t | |||
| ot0 = ((uint64x4_t*)a)[0], | |||
| ot1 = ((uint64x4_t*)a)[1], | |||