| @@ -126,7 +126,7 @@ $(GEN_HEADERS): src/gen_headers/*.py src/public_include/decaf/* | |||
| ################################################################ | |||
| define define_field | |||
| ARCH_FOR_$(1) ?= $(2) | |||
| COMPONENTS_OF_$(1) = $$(BUILD_OBJ)/$(1)_impl.o $$(BUILD_OBJ)/$(1)_arithmetic.o | |||
| COMPONENTS_OF_$(1) = $$(BUILD_OBJ)/$(1)_impl.o $$(BUILD_OBJ)/$(1)_arithmetic.o $$(BUILD_OBJ)/$(1)_per_field.o | |||
| LIBCOMPONENTS += $$(COMPONENTS_OF_$(1)) | |||
| $$(BUILD_ASM)/$(1)_arithmetic.s: src/$(1)/f_arithmetic.c $$(HEADERS) | |||
| @@ -138,6 +138,11 @@ $$(BUILD_ASM)/$(1)_impl.s: src/$(1)/$$(ARCH_FOR_$(1))/f_impl.c $$(HEADERS) | |||
| $$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$$(ARCH_FOR_$(1)) -I $(BUILD_H)/$(1) \ | |||
| -I $(BUILD_H)/$(1)/$$(ARCH_FOR_$(1)) -I src/include/$$(ARCH_FOR_$(1)) \ | |||
| -S -c -o $$@ $$< | |||
| $$(BUILD_ASM)/$(1)_per_field.s: src/per_field.c $$(HEADERS) | |||
| $$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$$(ARCH_FOR_$(1)) -I $(BUILD_H)/$(1) \ | |||
| -I $(BUILD_H)/$(1)/$$(ARCH_FOR_$(1)) -I src/include/$$(ARCH_FOR_$(1)) \ | |||
| -S -c -o $$@ $$< | |||
| endef | |||
| ################################################################ | |||
| @@ -171,8 +176,8 @@ $$(BUILD_ASM)/decaf_$(1).s: src/decaf.c $$(HEADERS) | |||
| $$(BUILD_ASM)/decaf_crypto_$(1).s: src/decaf_crypto.c $$(HEADERS) | |||
| $$(CC) $$(CFLAGS) \ | |||
| -I src/curve_$(1)/ \ | |||
| -I $(BUILD_H)/curve_$(1) \ | |||
| -I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) -I src/include/$$(ARCH_FOR_$(2)) \ | |||
| -I $(BUILD_H)/curve_$(1) -I $(BUILD_H)/$(2) -I $(BUILD_H)/$(2)/$$(ARCH_FOR_$(2)) \ | |||
| -S -c -o $$@ $$< | |||
| LIBCOMPONENTS += $$(BUILD_OBJ)/decaf_$(1).o $$(BUILD_OBJ)/decaf_tables_$(1).o | |||
| @@ -8,7 +8,6 @@ | |||
| #define scalar_t decaf_255_scalar_t | |||
| #define point_t decaf_255_point_t | |||
| #define precomputed_s decaf_255_precomputed_s | |||
| #define SER_BYTES DECAF_255_SER_BYTES | |||
| #define IMAGINE_TWIST 1 | |||
| #define P_MOD_8 5 | |||
| #define COFACTOR 8 | |||
| @@ -7,7 +7,6 @@ | |||
| #define scalar_t decaf_448_scalar_t | |||
| #define point_t decaf_448_point_t | |||
| #define precomputed_s decaf_448_precomputed_s | |||
| #define SER_BYTES DECAF_448_SER_BYTES | |||
| #define IMAGINE_TWIST 0 | |||
| #define P_MOD_8 7 | |||
| #define COFACTOR 4 | |||
| @@ -50,17 +50,8 @@ extern const gf SQRT_ONE_MINUS_D; /* TODO: Intern this? */ | |||
| const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}}; | |||
| extern const scalar_t API_NS(sc_r2); | |||
| extern const decaf_word_t API_NS(MONTGOMERY_FACTOR); | |||
| extern const point_t API_NS(point_base); | |||
| /* These are externally exposed (but private) instead of static so that | |||
| * f_arithmetic.c can use it | |||
| */ | |||
| #define ONE API_NS(ONE) | |||
| #define ZERO API_NS(ZERO) | |||
| #define gf_eq API_NS(gf_eq) | |||
| const gf ZERO = {{{0}}}, ONE = {{{1}}}; | |||
| /* Projective Niels coordinates */ | |||
| typedef struct { gf a, b, c; } niels_s, niels_t[1]; | |||
| typedef struct { niels_t n; gf z; } __attribute__((aligned(32))) pniels_s, pniels_t[1]; /* MAGIC alignment */ | |||
| @@ -75,93 +66,9 @@ const precomputed_s *API_NS(precomputed_base) = | |||
| const size_t API_NS2(sizeof,precomputed_s) = sizeof(precomputed_s); | |||
| const size_t API_NS2(alignof,precomputed_s) = 32; | |||
| /* TODO PERF: Vectorize vs unroll */ | |||
| #ifdef __clang__ | |||
| #if 100*__clang_major__ + __clang_minor__ > 305 | |||
| #define UNROLL _Pragma("clang loop unroll(full)") // PERF TODO: vectorize? | |||
| #endif | |||
| #endif | |||
| #ifndef UNROLL | |||
| #define UNROLL | |||
| #endif | |||
| #define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }} | |||
| #define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }} | |||
| /* FUTURE: move this code from per-curve to per-field header | |||
| * (like f_arithmetic.c but same for all fields) | |||
| */ | |||
| void gf_serialize (uint8_t serial[SER_BYTES], const gf x) { | |||
| gf red; | |||
| gf_copy(red, x); | |||
| gf_strong_reduce(red); | |||
| unsigned int j=0, fill=0; | |||
| dword_t buffer = 0; | |||
| UNROLL for (unsigned int i=0; i<SER_BYTES; i++) { | |||
| if (fill < 8 && j < NLIMBS) { | |||
| buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill; | |||
| fill += LIMB_PLACE_VALUE(LIMBPERM(j)); | |||
| j++; | |||
| } | |||
| serial[i] = buffer; | |||
| fill -= 8; | |||
| buffer >>= 8; | |||
| } | |||
| } | |||
| mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES]) { | |||
| unsigned int j=0, fill=0; | |||
| dword_t buffer = 0; | |||
| dsword_t scarry = 0; | |||
| UNROLL for (unsigned int i=0; i<NLIMBS; i++) { | |||
| UNROLL while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < SER_BYTES) { | |||
| buffer |= ((dword_t)serial[j]) << fill; | |||
| fill += 8; | |||
| j++; | |||
| } | |||
| x->limb[LIMBPERM(i)] = (i<NLIMBS-1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer; | |||
| fill -= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||
| buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||
| scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8*sizeof(word_t)); | |||
| } | |||
| return word_is_zero(buffer) & ~word_is_zero(scarry); | |||
| } | |||
| void gf_strong_reduce (gf a) { | |||
| /* first, clear high */ | |||
| gf_weak_reduce(a); /* PERF: only really need one step of this, but whatevs */ | |||
| /* now the total is less than 2p */ | |||
| /* compute total_value - p. No need to reduce mod p. */ | |||
| dsword_t scarry = 0; | |||
| for (unsigned int i=0; i<NLIMBS; i++) { | |||
| scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]; | |||
| a->limb[i] = scarry & LIMB_MASK(LIMBPERM(i)); | |||
| scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||
| } | |||
| /* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
| * common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | |||
| * so let's add back in p. will carry back off the top for 2^255. | |||
| */ | |||
| assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
| word_t scarry_0 = scarry; | |||
| dword_t carry = 0; | |||
| /* add it back */ | |||
| for (unsigned int i=0; i<NLIMBS; i++) { | |||
| carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]); | |||
| a->limb[i] = carry & LIMB_MASK(LIMBPERM(i)); | |||
| carry >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||
| } | |||
| assert(word_is_zero(carry + scarry_0)); | |||
| } | |||
| /** Constant time, x = is_z ? z : y */ | |||
| static INLINE void | |||
| cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { | |||
| @@ -186,21 +93,6 @@ cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) { | |||
| } | |||
| } | |||
| /** Compare a==b */ | |||
| /* Not static because it's used in inverse square root. */ | |||
| decaf_word_t gf_eq(const gf a, const gf b); | |||
| decaf_word_t gf_eq(const gf a, const gf b) { | |||
| gf c; | |||
| gf_sub(c,a,b); | |||
| gf_strong_reduce(c); | |||
| decaf_word_t ret=0; | |||
| for (unsigned int i=0; i<sizeof(c->limb)/sizeof(c->limb[0]); i++) { | |||
| ret |= c->limb[i]; | |||
| } | |||
| return word_is_zero(ret); | |||
| } | |||
| /** Inverse square root using addition chain. */ | |||
| static decaf_bool_t | |||
| gf_isqrt_chk(gf y, const gf x, decaf_bool_t allow_zero) { | |||
| @@ -8,6 +8,7 @@ | |||
| * @brief Example Decaf crypto routines | |||
| */ | |||
| #include "f_field.h" /* for SER_BYTES; FUTURE: find a better way to do this? */ | |||
| #include <decaf/crypto.h> | |||
| #include <string.h> | |||
| @@ -14,14 +14,19 @@ f_field_h = gen_file( | |||
| #define __DECAF_%(gf_shortname)s_GF_DEFINED__ 1 | |||
| #define NLIMBS (%(gf_impl_bits)d/sizeof(word_t)/8) | |||
| #define SER_BYTES ((%(gf_bits)d-1)/8 + 1) | |||
| typedef struct gf_%(gf_shortname)s_s { | |||
| word_t limb[NLIMBS]; | |||
| } __attribute__((aligned(32))) gf_%(gf_shortname)s_s, gf_%(gf_shortname)s_t[1]; | |||
| #define GF_LIT_LIMB_BITS %(gf_lit_limb_bits)d | |||
| #define GF_BITS %(gf_bits)d | |||
| #define ZERO gf_%(gf_shortname)s_ZERO | |||
| #define ONE gf_%(gf_shortname)s_ONE | |||
| #define MODULUS gf_%(gf_shortname)s_MODULUS | |||
| #define gf gf_%(gf_shortname)s_t | |||
| #define gf_s gf_%(gf_shortname)s_s | |||
| #define gf_eq gf_%(gf_shortname)s_eq | |||
| #define gf_copy gf_%(gf_shortname)s_copy | |||
| #define gf_add_RAW gf_%(gf_shortname)s_add_RAW | |||
| #define gf_sub_RAW gf_%(gf_shortname)s_sub_RAW | |||
| @@ -34,7 +39,6 @@ typedef struct gf_%(gf_shortname)s_s { | |||
| #define gf_isr gf_%(gf_shortname)s_isr | |||
| #define gf_serialize gf_%(gf_shortname)s_serialize | |||
| #define gf_deserialize gf_%(gf_shortname)s_deserialize | |||
| #define MODULUS gf_%(gf_shortname)s_MODULUS | |||
| #define SQRT_MINUS_ONE P%(gf_shortname)s_SQRT_MINUS_ONE /* might not be defined */ | |||
| @@ -44,7 +48,7 @@ typedef struct gf_%(gf_shortname)s_s { | |||
| extern "C" { | |||
| #endif | |||
| const gf MODULUS; | |||
| const gf MODULUS, ZERO, ONE; | |||
| /* Defined below in f_impl.h */ | |||
| static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; } | |||
| @@ -58,6 +62,7 @@ void gf_mul (gf_s *__restrict__ out, const gf a, const gf b); | |||
| void gf_mulw (gf_s *__restrict__ out, const gf a, uint64_t b); | |||
| void gf_sqr (gf_s *__restrict__ out, const gf a); | |||
| void gf_serialize (uint8_t *serial, const gf x); | |||
| mask_t gf_eq (const gf x, const gf y); | |||
| mask_t gf_deserialize (gf x, const uint8_t serial[(GF_BITS-1)/8+1]); | |||
| #ifdef __cplusplus | |||
| @@ -241,4 +241,15 @@ malloc_vector(size_t size) { | |||
| } | |||
| } | |||
| /* PERF: vectorize vs unroll */ | |||
| #ifdef __clang__ | |||
| #if 100*__clang_major__ + __clang_minor__ > 305 | |||
| #define UNROLL _Pragma("clang loop unroll(full)") // PERF TODO: vectorize? | |||
| #endif | |||
| #endif | |||
| #ifndef UNROLL | |||
| #define UNROLL | |||
| #endif | |||
| #endif /* __WORD_H__ */ | |||
| @@ -11,7 +11,7 @@ | |||
| #include "field.h" | |||
| #include "constant_time.h" | |||
| const gf_25519_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL( | |||
| const gf_25519_t SQRT_MINUS_ONE = {FIELD_LITERAL( | |||
| 0x61b274a0ea0b0, | |||
| 0x0d5a5fc8f189d, | |||
| 0x7ef5e9cbd0c60, | |||
| @@ -22,10 +22,6 @@ const gf_25519_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL( | |||
| const gf MODULUS = {FIELD_LITERAL( | |||
| 0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff | |||
| )}; | |||
| /* TODO put in header */ | |||
| extern const gf_25519_t decaf_255_ONE; | |||
| extern mask_t decaf_255_gf_eq(const gf_25519_t a, const gf_25519_t b); | |||
| /* Guarantee: a^2 x = 0 if x = 0; else a^2 x = 1 or SQRT_MINUS_ONE; */ | |||
| void gf_isr ( | |||
| @@ -44,8 +40,8 @@ void gf_isr ( | |||
| st[i&1][0] = tmp2[0]; | |||
| } | |||
| mask_t mask = decaf_255_gf_eq(st[1],decaf_255_ONE) | decaf_255_gf_eq(st[1],SQRT_MINUS_ONE); | |||
| mask_t mask = gf_eq(st[1],ONE) | gf_eq(st[1],SQRT_MINUS_ONE); | |||
| constant_time_select(tmp1, decaf_255_ONE, SQRT_MINUS_ONE, sizeof(tmp1), mask, 0); | |||
| constant_time_select(tmp1, ONE, SQRT_MINUS_ONE, sizeof(tmp1), mask, 0); | |||
| gf_mul(a,tmp1,st[0]); | |||
| } | |||
| @@ -0,0 +1,99 @@ | |||
| /** | |||
| * @cond internal | |||
| * @file decaf_crypto.c | |||
| * @copyright | |||
| * Copyright (c) 2015-2016 Cryptography Research, Inc. \n | |||
| * Released under the MIT License. See LICENSE.txt for license information. | |||
| * @author Mike Hamburg | |||
| * @brief Generic arithmetic which has to be compiled per field. | |||
| */ | |||
| #include "field.h" | |||
| const gf ZERO = {{{0}}}, ONE = {{{1}}}; | |||
| /** Serialize to wire format. */ | |||
| void gf_serialize (uint8_t serial[SER_BYTES], const gf x) { | |||
| gf red; | |||
| gf_copy(red, x); | |||
| gf_strong_reduce(red); | |||
| unsigned int j=0, fill=0; | |||
| dword_t buffer = 0; | |||
| UNROLL for (unsigned int i=0; i<SER_BYTES; i++) { | |||
| if (fill < 8 && j < NLIMBS) { | |||
| buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill; | |||
| fill += LIMB_PLACE_VALUE(LIMBPERM(j)); | |||
| j++; | |||
| } | |||
| serial[i] = buffer; | |||
| fill -= 8; | |||
| buffer >>= 8; | |||
| } | |||
| } | |||
| /** Deserialize from wire format; return -1 on success and 0 on failure. */ | |||
| mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES]) { | |||
| unsigned int j=0, fill=0; | |||
| dword_t buffer = 0; | |||
| dsword_t scarry = 0; | |||
| UNROLL for (unsigned int i=0; i<NLIMBS; i++) { | |||
| UNROLL while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < SER_BYTES) { | |||
| buffer |= ((dword_t)serial[j]) << fill; | |||
| fill += 8; | |||
| j++; | |||
| } | |||
| x->limb[LIMBPERM(i)] = (i<NLIMBS-1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer; | |||
| fill -= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||
| buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||
| scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8*sizeof(word_t)); | |||
| } | |||
| return word_is_zero(buffer) & ~word_is_zero(scarry); | |||
| } | |||
| /** Reduce to canonical form. */ | |||
| void gf_strong_reduce (gf a) { | |||
| /* first, clear high */ | |||
| gf_weak_reduce(a); /* Determined to have negligible perf impact. */ | |||
| /* now the total is less than 2p */ | |||
| /* compute total_value - p. No need to reduce mod p. */ | |||
| dsword_t scarry = 0; | |||
| for (unsigned int i=0; i<NLIMBS; i++) { | |||
| scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]; | |||
| a->limb[i] = scarry & LIMB_MASK(LIMBPERM(i)); | |||
| scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||
| } | |||
| /* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
| * common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | |||
| * so let's add back in p. will carry back off the top for 2^255. | |||
| */ | |||
| assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
| word_t scarry_0 = scarry; | |||
| dword_t carry = 0; | |||
| /* add it back */ | |||
| for (unsigned int i=0; i<NLIMBS; i++) { | |||
| carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]); | |||
| a->limb[i] = carry & LIMB_MASK(LIMBPERM(i)); | |||
| carry >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||
| } | |||
| assert(word_is_zero(carry + scarry_0)); | |||
| } | |||
| /** Compare a==b */ | |||
| mask_t gf_eq(const gf a, const gf b) { | |||
| gf c; | |||
| gf_sub(c,a,b); | |||
| gf_strong_reduce(c); | |||
| mask_t ret=0; | |||
| for (unsigned int i=0; i<sizeof(c->limb)/sizeof(c->limb[0]); i++) { | |||
| ret |= c->limb[i]; | |||
| } | |||
| return word_is_zero(ret); | |||
| } | |||