From 16693dfaa7bb8508534e27e85794f348dcba0a4b Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Fri, 16 Jan 2015 16:23:53 -0800 Subject: [PATCH 01/15] port simplifying changes from decaf branch; going to make a few more as well --- src/ec_point.c | 145 +++++++++++++++++------------------------------ src/p448/field.h | 123 ---------------------------------------- 2 files changed, 52 insertions(+), 216 deletions(-) delete mode 100644 src/p448/field.h diff --git a/src/ec_point.c b/src/ec_point.c index c13279e..a625641 100644 --- a/src/ec_point.c +++ b/src/ec_point.c @@ -53,27 +53,42 @@ field_mulw_scc_wr ( field_weak_reduce(out); } +static __inline__ void +field_subx ( + struct field_t *d, + const struct field_t *a, + const struct field_t *b +) { + field_sub ( d, a, b ); + field_bias( d, 2 ); + IF32( field_weak_reduce ( d ) ); +} + +static __inline__ void +field_negx ( + struct field_t *d, + const struct field_t *a +) { + field_neg ( d, a ); + field_bias( d, 2 ); + IF32( field_weak_reduce ( d ) ); +} + void add_tw_niels_to_tw_extensible ( struct tw_extensible_t* d, const struct tw_niels_t* e ) { struct field_t L0, L1; - field_sub ( &L1, &d->y, &d->x ); - field_bias ( &L1, 2 ); - IF32( field_weak_reduce( &L1 ) ); + field_subx ( &L1, &d->y, &d->x ); field_mul ( &L0, &e->a, &L1 ); field_add ( &L1, &d->x, &d->y ); field_mul ( &d->y, &e->b, &L1 ); field_mul ( &L1, &d->u, &d->t ); field_mul ( &d->x, &e->c, &L1 ); field_add ( &d->u, &L0, &d->y ); - field_sub ( &d->t, &d->y, &L0 ); - field_bias ( &d->t, 2 ); - IF32( field_weak_reduce( &d->t ) ); - field_sub ( &d->y, &d->z, &d->x ); - field_bias ( &d->y, 2 ); - IF32( field_weak_reduce( &d->y ) ); + field_subx ( &d->t, &d->y, &L0 ); + field_subx ( &d->y, &d->z, &d->x ); field_add ( &L0, &d->x, &d->z ); field_mul ( &d->z, &L0, &d->y ); field_mul ( &d->x, &d->y, &d->t ); @@ -86,22 +101,16 @@ sub_tw_niels_from_tw_extensible ( const struct tw_niels_t* e ) { struct field_t L0, L1; - field_sub ( &L1, &d->y, &d->x ); - field_bias ( &L1, 2 ); - IF32( field_weak_reduce( &L1 ) ); + field_subx ( &L1, &d->y, &d->x ); field_mul ( &L0, &e->b, &L1 ); field_add ( &L1, &d->x, &d->y ); field_mul ( &d->y, &e->a, &L1 ); field_mul ( &L1, &d->u, &d->t ); field_mul ( &d->x, &e->c, &L1 ); field_add ( &d->u, &L0, &d->y ); - field_sub ( &d->t, &d->y, &L0 ); - field_bias ( &d->t, 2 ); - IF32( field_weak_reduce( &d->t ) ); + field_subx ( &d->t, &d->y, &L0 ); field_add ( &d->y, &d->x, &d->z ); - field_sub ( &L0, &d->z, &d->x ); - field_bias ( &L0, 2 ); - IF32( field_weak_reduce( &L0 ) ); + field_subx ( &L0, &d->z, &d->x ); field_mul ( &d->z, &L0, &d->y ); field_mul ( &d->x, &d->y, &d->t ); field_mul ( &d->y, &L0, &d->u ); @@ -142,9 +151,7 @@ double_tw_extensible ( field_sub ( &a->t, &L1, &a->u ); field_bias ( &a->t, 3 ); IF32( field_weak_reduce( &a->t ) ); - field_sub ( &L1, &L0, &L2 ); - field_bias ( &L1, 2 ); - IF32( field_weak_reduce( &L1 ) ); + field_subx ( &L1, &L0, &L2 ); field_sqr ( &a->x, &a->z ); field_bias ( &a->x, 2-is32 /*is32 ? 1 : 2*/ ); field_add ( &a->z, &a->x, &a->x ); @@ -168,9 +175,7 @@ double_extensible ( field_sub ( &a->t, &a->u, &L1 ); field_bias ( &a->t, 3 ); IF32( field_weak_reduce( &a->t ) ); - field_sub ( &a->u, &L0, &L2 ); - field_bias ( &a->u, 2 ); - IF32( field_weak_reduce( &a->u ) ); + field_subx ( &a->u, &L0, &L2 ); field_sqr ( &a->x, &a->z ); field_bias ( &a->x, 2 ); field_add ( &a->z, &a->x, &a->x ); @@ -195,9 +200,7 @@ twist_and_double ( field_sub ( &b->t, &L0, &b->u ); field_bias ( &b->t, 3 ); IF32( field_weak_reduce( &b->t ) ); - field_sub ( &L0, &b->z, &b->x ); - field_bias ( &L0, 2 ); - IF32( field_weak_reduce( &L0 ) ); + field_subx ( &L0, &b->z, &b->x ); field_sqr ( &b->x, &a->z ); field_bias ( &b->x, 2 ); field_add ( &b->z, &b->x, &b->x ); @@ -222,9 +225,7 @@ untwist_and_double ( field_sub ( &b->t, &b->u, &L0 ); field_bias ( &b->t, 3 ); IF32( field_weak_reduce( &b->t ) ); - field_sub ( &b->u, &b->z, &b->x ); - field_bias ( &b->u, 2 ); - IF32( field_weak_reduce( &b->u ) ); + field_subx ( &b->u, &b->z, &b->x ); field_sqr ( &b->x, &a->z ); field_bias ( &b->x, 2-is32 /*is32 ? 1 : 2*/ ); field_add ( &b->z, &b->x, &b->x ); @@ -296,9 +297,7 @@ convert_tw_pniels_to_tw_extensible ( const struct tw_pniels_t* d ) { field_add ( &e->u, &d->n.b, &d->n.a ); - field_sub ( &e->t, &d->n.b, &d->n.a ); - field_bias ( &e->t, 2 ); - IF32( field_weak_reduce( &e->t ) ); + field_subx ( &e->t, &d->n.b, &d->n.a ); field_mul ( &e->x, &d->z, &e->t ); field_mul ( &e->y, &d->z, &e->u ); field_sqr ( &e->z, &d->z ); @@ -325,28 +324,20 @@ montgomery_step ( ) { struct field_t L0, L1; field_add ( &L0, &a->zd, &a->xd ); - field_sub ( &L1, &a->xd, &a->zd ); - field_bias ( &L1, 2 ); - IF32( field_weak_reduce( &L1 ) ); - field_sub ( &a->zd, &a->xa, &a->za ); - field_bias ( &a->zd, 2 ); - IF32( field_weak_reduce( &a->zd ) ); + field_subx ( &L1, &a->xd, &a->zd ); + field_subx ( &a->zd, &a->xa, &a->za ); field_mul ( &a->xd, &L0, &a->zd ); field_add ( &a->zd, &a->za, &a->xa ); field_mul ( &a->za, &L1, &a->zd ); field_add ( &a->xa, &a->za, &a->xd ); field_sqr ( &a->zd, &a->xa ); field_mul ( &a->xa, &a->z0, &a->zd ); - field_sub ( &a->zd, &a->xd, &a->za ); - field_bias ( &a->zd, 2 ); - IF32( field_weak_reduce( &a->zd ) ); + field_subx ( &a->zd, &a->xd, &a->za ); field_sqr ( &a->za, &a->zd ); field_sqr ( &a->xd, &L0 ); field_sqr ( &L0, &L1 ); field_mulw_scc ( &a->zd, &a->xd, 1-EDWARDS_D ); /* FIXME PERF MULW */ - field_sub ( &L1, &a->xd, &L0 ); - field_bias ( &L1, 2 ); - IF32( field_weak_reduce( &L1 ) ); + field_subx ( &L1, &a->xd, &L0 ); field_mul ( &a->xd, &L0, &a->zd ); field_sub ( &L0, &a->zd, &L1 ); field_bias ( &L0, 4 - 2*is32 /*is32 ? 2 : 4*/ ); @@ -375,19 +366,13 @@ serialize_montgomery ( mask_t L4, L5, L6; struct field_t L0, L1, L2, L3; field_mul ( &L3, &a->z0, &a->zd ); - field_sub ( &L1, &L3, &a->xd ); - field_bias ( &L1, 2 ); - IF32( field_weak_reduce( &L1 ) ); + field_subx ( &L1, &L3, &a->xd ); field_mul ( &L3, &a->za, &L1 ); field_mul ( &L2, &a->z0, &a->xd ); - field_sub ( &L1, &L2, &a->zd ); - field_bias ( &L1, 2 ); - IF32( field_weak_reduce( &L1 ) ); + field_subx ( &L1, &L2, &a->zd ); field_mul ( &L0, &a->xa, &L1 ); field_add ( &L2, &L0, &L3 ); - field_sub ( &L1, &L3, &L0 ); - field_bias ( &L1, 2 ); - IF32( field_weak_reduce( &L1 ) ); + field_subx ( &L1, &L3, &L0 ); field_mul ( &L3, &L1, &L2 ); field_copy ( &L2, &a->z0 ); field_addw ( &L2, 1 ); @@ -427,9 +412,7 @@ serialize_extensible ( const struct extensible_t* a ) { struct field_t L0, L1, L2; - field_sub ( &L0, &a->y, &a->z ); - field_bias ( &L0, 2 ); - IF32( field_weak_reduce( &L0 ) ); + field_subx ( &L0, &a->y, &a->z ); field_add ( b, &a->z, &a->y ); field_mul ( &L1, &a->z, &a->x ); field_mul ( &L2, &L0, &L1 ); @@ -477,16 +460,10 @@ twist_even ( mask_t L0, L1; field_sqr ( &b->y, &a->z ); field_sqr ( &b->z, &a->x ); - field_sub ( &b->u, &b->y, &b->z ); - field_bias ( &b->u, 2 ); - IF32( field_weak_reduce( &b->u ) ); - field_sub ( &b->z, &a->z, &a->x ); - field_bias ( &b->z, 2 ); - IF32( field_weak_reduce( &b->z ) ); + field_subx ( &b->u, &b->y, &b->z ); + field_subx ( &b->z, &a->z, &a->x ); field_mul ( &b->y, &b->z, &a->y ); - field_sub ( &b->z, &a->z, &a->y ); - field_bias ( &b->z, 2 ); - IF32( field_weak_reduce( &b->z ) ); + field_subx ( &b->z, &a->z, &a->y ); field_mul ( &b->x, &b->z, &b->y ); field_mul ( &b->t, &b->x, &b->u ); field_mul ( &b->y, &b->x, &b->t ); @@ -519,13 +496,9 @@ test_only_twist ( field_add ( &b->y, &b->z, &b->z ); field_add ( &b->u, &b->y, &b->y ); IF32( field_weak_reduce( &b->u ) ); - field_sub ( &b->y, &a->z, &a->x ); - field_bias ( &b->y, 2 ); - IF32( field_weak_reduce( &b->y ) ); + field_subx ( &b->y, &a->z, &a->x ); field_mul ( &b->x, &b->y, &a->y ); - field_sub ( &b->z, &a->z, &a->y ); - field_bias ( &b->z, 2 ); - IF32( field_weak_reduce( &b->z ) ); + field_subx ( &b->z, &a->z, &a->y ); field_mul ( &b->t, &b->z, &b->x ); field_mul ( &L1, &b->t, &b->u ); field_mul ( &b->x, &b->t, &L1 ); @@ -535,14 +508,10 @@ test_only_twist ( field_mul ( &b->t, &b->x, &L1 ); field_add ( &L1, &a->y, &a->x ); IF32( field_weak_reduce( &L1 ) ); - field_sub ( &L0, &a->x, &a->y ); - field_bias ( &L0, 2 ); - IF32( field_weak_reduce( &L0 ) ); + field_subx ( &L0, &a->x, &a->y ); field_mul ( &b->x, &b->t, &L0 ); field_add ( &L0, &b->x, &L1 ); - field_sub ( &b->t, &L1, &b->x ); - field_bias ( &b->t, 2 ); - IF32( field_weak_reduce( &b->t ) ); + field_subx ( &b->t, &L1, &b->x ); field_mul ( &b->x, &L0, &b->u ); L2 = field_is_zero( &b->y ); L3 = - L2; @@ -567,9 +536,7 @@ is_even_pt ( struct field_t L0, L1, L2; field_sqr ( &L2, &a->z ); field_sqr ( &L1, &a->x ); - field_sub ( &L0, &L2, &L1 ); - field_bias ( &L0, 2 ); - field_weak_reduce( &L0 ); + field_subx ( &L0, &L2, &L1 ); return field_is_square ( &L0 ); } @@ -602,9 +569,7 @@ deserialize_affine ( IF32( field_weak_reduce( &L3 ) ); field_copy ( &a->y, &L1 ); field_subw ( &a->y, 1 ); - field_neg ( &a->x, &a->y ); - field_bias ( &a->x, 2 ); - IF32( field_weak_reduce( &a->x ) ); + field_negx ( &a->x, &a->y ); field_mul ( &a->y, &a->x, &L3 ); field_sqr ( &L2, &a->x ); field_mul ( &L0, &L2, &a->y ); @@ -641,9 +606,7 @@ deserialize_and_twist_approx ( IF32( field_weak_reduce( &a->y ) ); field_sqr ( &a->x, &a->z ); field_subw ( &a->x, 1 ); - field_neg ( &a->u, &a->x ); - field_bias ( &a->u, 2 ); - IF32( field_weak_reduce( &a->u ) ); + field_negx ( &a->u, &a->x ); field_mul ( &a->x, sdm1, &a->u ); field_mul ( &L0, &a->x, &a->y ); field_mul ( &a->t, &L0, &a->y ); @@ -659,9 +622,7 @@ deserialize_and_twist_approx ( field_mul ( &L0, &a->u, &a->x ); field_copy ( &a->x, &a->z ); field_subw ( &a->x, 1 ); - field_neg ( &L1, &a->x ); - field_bias ( &L1, 2 ); - IF32( field_weak_reduce( &L1 ) ); + field_negx ( &L1, &a->x ); field_mul ( &a->x, &L1, &L0 ); field_mul ( &L0, &a->u, &a->y ); field_addw ( &a->z, 1 ); @@ -772,9 +733,7 @@ elligator_2s_inject ( field_sqr ( &L3, &a->x ); field_copy ( &a->y, &L3 ); field_subw ( &a->y, 1 ); - field_neg ( &L4, &a->y ); - field_bias ( &L4, 2 ); - IF32( field_weak_reduce( &L4 ) ); + field_negx ( &L4, &a->y ); field_sqr ( &L2, &L4 ); field_mulw ( &L7, &L2, (EDWARDS_D-1)*(EDWARDS_D-1) ); field_mulw ( &L8, &L3, 4*(EDWARDS_D+1)*(EDWARDS_D+1) ); diff --git a/src/p448/field.h b/src/p448/field.h deleted file mode 100644 index bf36e95..0000000 --- a/src/p448/field.h +++ /dev/null @@ -1,123 +0,0 @@ -/** - * @file field.h - * @brief Field switch code. - * @copyright - * Copyright (c) 2014 Cryptography Research, Inc. \n - * Released under the MIT License. See LICENSE.txt for license information. - * @author Mike Hamburg - */ -#ifndef __FIELD_H__ -#define __FIELD_H__ - -#include -#include "constant_time.h" - -#include "p448.h" -#define FIELD_BITS 448 -#define field_t p448_t -#define field_mul p448_mul -#define field_sqr p448_sqr -#define field_add p448_add -#define field_sub p448_sub -#define field_mulw p448_mulw -#define field_addw p448_addw -#define field_subw p448_subw -#define field_neg p448_neg -#define field_set_ui p448_set_ui -#define field_bias p448_bias -#define field_cond_neg p448_cond_neg -#define field_inverse p448_inverse -#define field_eq p448_eq -#define field_isr p448_isr -#define field_simultaneous_invert p448_simultaneous_invert -#define field_weak_reduce p448_weak_reduce -#define field_strong_reduce p448_strong_reduce -#define field_serialize p448_serialize -#define field_deserialize p448_deserialize -#define field_is_zero p448_is_zero - -/** @brief Bytes in a field element */ -#define FIELD_BYTES (1+(FIELD_BITS-1)/8) - -/** @brief Words in a field element */ -#define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) - -/** - * @brief For GMP tests: little-endian representation of the field modulus. - */ -extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; - -/** - * Copy one field element to another. - */ -static inline void -__attribute__((unused,always_inline)) -field_copy ( - struct field_t *__restrict__ a, - const struct field_t *__restrict__ b -) { - memcpy(a,b,sizeof(*a)); -} - -/** - * Negate a in place if doNegate. - */ -static inline void -__attribute__((unused,always_inline)) -field_cond_neg( - field_t *a, - mask_t doNegate -) { - struct field_t negated; - field_neg(&negated, a); - field_bias(&negated, 2); - constant_time_select(a, &negated, a, sizeof(negated), doNegate); -} - -/** - * Returns 1/sqrt(+- x). - * - * The Legendre symbol of the result is the same as that of the - * input. - * - * If x=0, returns 0. - */ -void -field_isr ( - struct field_t* a, - const struct field_t* x -); - -/** - * Batch inverts out[i] = 1/in[i] - * - * If any input is zero, all the outputs will be zero. - */ -void -field_simultaneous_invert ( - struct field_t *__restrict__ out, - const struct field_t *in, - unsigned int n -); - -/** - * Returns 1/x. - * - * If x=0, returns 0. - */ -void -field_inverse ( - struct field_t* a, - const struct field_t* x -); - -/** - * Returns -1 if a==b, 0 otherwise. - */ -mask_t -field_eq ( - const struct field_t *a, - const struct field_t *b -); - -#endif /* __FIELD_H__ */ From 942066a16dcb76ae01c5d1187f033022c5e28ae8 Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Thu, 22 Jan 2015 14:37:45 -0800 Subject: [PATCH 02/15] remove probably-unnecessary optimizations (still needs benching to make sure) --- src/arithmetic.c | 5 +- src/ec_point.c | 398 +++++++++---------------- src/include/field.h | 123 +++++++- src/p448/arch_32/p448.h | 12 +- src/p448/arch_arm_32/p448.h | 12 +- src/p448/arch_neon/p448.h | 12 +- src/p448/arch_neon_experimental/p448.h | 12 +- src/p448/arch_ref64/p448.h | 12 +- src/p448/arch_x86_64/p448.h | 12 +- src/p448/f_field.h | 11 +- src/p480/arch_x86_64/p480.h | 12 +- src/p480/f_field.h | 11 +- src/p521/arch_ref64/p521.h | 12 +- src/p521/arch_x86_64_r12/p521.h | 12 +- src/p521/f_field.h | 9 +- test/bench.c | 5 - test/test_arithmetic.c | 10 +- 17 files changed, 321 insertions(+), 359 deletions(-) diff --git a/src/arithmetic.c b/src/arithmetic.c index add3b49..4530aa3 100644 --- a/src/arithmetic.c +++ b/src/arithmetic.c @@ -21,7 +21,7 @@ field_eq ( field_copy(&rb, b); field_weak_reduce(&ra); field_weak_reduce(&rb); - field_sub(&ra, &ra, &rb); + field_sub_RAW(&ra, &ra, &rb); field_bias(&ra, 2); return field_is_zero(&ra); } @@ -47,8 +47,7 @@ field_is_square ( field_isr ( &L0, x ); field_sqr ( &L1, &L0 ); field_mul ( &L0, x, &L1 ); - field_subw ( &L0, 1 ); - field_bias ( &L0, 1 ); + field_subw( &L0, 1 ); L3 = field_is_zero( &L0 ); L2 = field_is_zero( x ); return L3 | L2; diff --git a/src/ec_point.c b/src/ec_point.c index a625641..a486df1 100644 --- a/src/ec_point.c +++ b/src/ec_point.c @@ -12,84 +12,23 @@ #include "ec_point.h" #include "magic.h" -#define is32 (GOLDI_BITS == 32 || FIELD_BITS != 448) -/* TODO XXX PERF FIXME: better detection of overflow conditions */ - -/* I wanted to just use if (is32) - * But clang's -Wunreachable-code flags it. - * I wanted to keep that warning on. - */ -#if (is32) -#define IF32(s) (s) -#else -#define IF32(s) -#endif - -/* Multiply by signed curve constant */ -static __inline__ void -field_mulw_scc ( - struct field_t* __restrict__ out, - const struct field_t *a, - int64_t scc -) { - if (scc >= 0) { - field_mulw(out, a, scc); - } else { - field_mulw(out, a, -scc); - field_neg(out,out); - field_bias(out,2); - } -} - -/* Multiply by signed curve constant and weak reduce if biased */ -static __inline__ void -field_mulw_scc_wr ( - struct field_t* __restrict__ out, - const struct field_t *a, - int64_t scc -) { - field_mulw_scc(out, a, scc); - if (scc < 0) - field_weak_reduce(out); -} - -static __inline__ void -field_subx ( - struct field_t *d, - const struct field_t *a, - const struct field_t *b -) { - field_sub ( d, a, b ); - field_bias( d, 2 ); - IF32( field_weak_reduce ( d ) ); -} - -static __inline__ void -field_negx ( - struct field_t *d, - const struct field_t *a -) { - field_neg ( d, a ); - field_bias( d, 2 ); - IF32( field_weak_reduce ( d ) ); -} - void add_tw_niels_to_tw_extensible ( struct tw_extensible_t* d, const struct tw_niels_t* e ) { + ANALYZE_THIS_ROUTINE_CAREFULLY; struct field_t L0, L1; - field_subx ( &L1, &d->y, &d->x ); + field_sub ( &L1, &d->y, &d->x ); field_mul ( &L0, &e->a, &L1 ); - field_add ( &L1, &d->x, &d->y ); + field_add_nr ( &L1, &d->x, &d->y ); field_mul ( &d->y, &e->b, &L1 ); field_mul ( &L1, &d->u, &d->t ); field_mul ( &d->x, &e->c, &L1 ); - field_add ( &d->u, &L0, &d->y ); - field_subx ( &d->t, &d->y, &L0 ); - field_subx ( &d->y, &d->z, &d->x ); - field_add ( &L0, &d->x, &d->z ); + field_add_nr ( &d->u, &L0, &d->y ); + field_subx_nr ( &d->t, &d->y, &L0 ); + field_subx_nr ( &d->y, &d->z, &d->x ); + field_add_nr ( &L0, &d->x, &d->z ); field_mul ( &d->z, &L0, &d->y ); field_mul ( &d->x, &d->y, &d->t ); field_mul ( &d->y, &L0, &d->u ); @@ -100,17 +39,18 @@ sub_tw_niels_from_tw_extensible ( struct tw_extensible_t* d, const struct tw_niels_t* e ) { + ANALYZE_THIS_ROUTINE_CAREFULLY; struct field_t L0, L1; - field_subx ( &L1, &d->y, &d->x ); + field_subx_nr ( &L1, &d->y, &d->x ); field_mul ( &L0, &e->b, &L1 ); - field_add ( &L1, &d->x, &d->y ); + field_add_nr ( &L1, &d->x, &d->y ); field_mul ( &d->y, &e->a, &L1 ); field_mul ( &L1, &d->u, &d->t ); field_mul ( &d->x, &e->c, &L1 ); - field_add ( &d->u, &L0, &d->y ); - field_subx ( &d->t, &d->y, &L0 ); - field_add ( &d->y, &d->x, &d->z ); - field_subx ( &L0, &d->z, &d->x ); + field_add_nr ( &d->u, &L0, &d->y ); + field_subx_nr ( &d->t, &d->y, &L0 ); + field_add_nr ( &d->y, &d->x, &d->z ); + field_subx_nr ( &L0, &d->z, &d->x ); field_mul ( &d->z, &L0, &d->y ); field_mul ( &d->x, &d->y, &d->t ); field_mul ( &d->y, &L0, &d->u ); @@ -142,20 +82,21 @@ void double_tw_extensible ( struct tw_extensible_t* a ) { + ANALYZE_THIS_ROUTINE_CAREFULLY; struct field_t L0, L1, L2; field_sqr ( &L2, &a->x ); field_sqr ( &L0, &a->y ); - field_add ( &a->u, &L2, &L0 ); - field_add ( &a->t, &a->y, &a->x ); + field_add_nr ( &a->u, &L2, &L0 ); + field_add_nr ( &a->t, &a->y, &a->x ); field_sqr ( &L1, &a->t ); - field_sub ( &a->t, &L1, &a->u ); + field_sub_nr ( &a->t, &L1, &a->u ); field_bias ( &a->t, 3 ); IF32( field_weak_reduce( &a->t ) ); - field_subx ( &L1, &L0, &L2 ); + field_subx_nr ( &L1, &L0, &L2 ); field_sqr ( &a->x, &a->z ); field_bias ( &a->x, 2-is32 /*is32 ? 1 : 2*/ ); - field_add ( &a->z, &a->x, &a->x ); - field_sub ( &L0, &a->z, &L1 ); + field_add_nr ( &a->z, &a->x, &a->x ); + field_sub_nr ( &L0, &a->z, &L1 ); IF32( field_weak_reduce( &L0 ) ); field_mul ( &a->z, &L1, &L0 ); field_mul ( &a->x, &L0, &a->t ); @@ -166,20 +107,21 @@ void double_extensible ( struct extensible_t* a ) { + ANALYZE_THIS_ROUTINE_CAREFULLY; struct field_t L0, L1, L2; field_sqr ( &L2, &a->x ); field_sqr ( &L0, &a->y ); - field_add ( &L1, &L2, &L0 ); - field_add ( &a->t, &a->y, &a->x ); + field_add_nr ( &L1, &L2, &L0 ); + field_add_nr ( &a->t, &a->y, &a->x ); field_sqr ( &a->u, &a->t ); - field_sub ( &a->t, &a->u, &L1 ); + field_sub_nr ( &a->t, &a->u, &L1 ); field_bias ( &a->t, 3 ); IF32( field_weak_reduce( &a->t ) ); - field_subx ( &a->u, &L0, &L2 ); + field_subx_nr ( &a->u, &L0, &L2 ); field_sqr ( &a->x, &a->z ); field_bias ( &a->x, 2 ); - field_add ( &a->z, &a->x, &a->x ); - field_sub ( &L0, &a->z, &L1 ); + field_add_nr ( &a->z, &a->x, &a->x ); + field_sub_nr ( &L0, &a->z, &L1 ); IF32( field_weak_reduce( &L0 ) ); field_mul ( &a->z, &L1, &L0 ); field_mul ( &a->x, &L0, &a->t ); @@ -194,18 +136,14 @@ twist_and_double ( struct field_t L0; field_sqr ( &b->x, &a->x ); field_sqr ( &b->z, &a->y ); - field_add ( &b->u, &b->x, &b->z ); - field_add ( &b->t, &a->y, &a->x ); + field_add ( &b->u, &b->x, &b->z ); + field_add ( &b->t, &a->y, &a->x ); field_sqr ( &L0, &b->t ); - field_sub ( &b->t, &L0, &b->u ); - field_bias ( &b->t, 3 ); - IF32( field_weak_reduce( &b->t ) ); - field_subx ( &L0, &b->z, &b->x ); + field_sub ( &b->t, &L0, &b->u ); + field_sub ( &L0, &b->z, &b->x ); field_sqr ( &b->x, &a->z ); - field_bias ( &b->x, 2 ); - field_add ( &b->z, &b->x, &b->x ); - field_sub ( &b->y, &b->z, &b->u ); - IF32( field_weak_reduce( &b->y ) ); + field_add ( &b->z, &b->x, &b->x ); + field_sub ( &b->y, &b->z, &b->u ); field_mul ( &b->z, &L0, &b->y ); field_mul ( &b->x, &b->y, &b->t ); field_mul ( &b->y, &L0, &b->u ); @@ -219,18 +157,14 @@ untwist_and_double ( struct field_t L0; field_sqr ( &b->x, &a->x ); field_sqr ( &b->z, &a->y ); - field_add ( &L0, &b->x, &b->z ); - field_add ( &b->t, &a->y, &a->x ); + field_add ( &L0, &b->x, &b->z ); + field_add ( &b->t, &a->y, &a->x ); field_sqr ( &b->u, &b->t ); - field_sub ( &b->t, &b->u, &L0 ); - field_bias ( &b->t, 3 ); - IF32( field_weak_reduce( &b->t ) ); - field_subx ( &b->u, &b->z, &b->x ); + field_sub ( &b->t, &b->u, &L0 ); + field_sub ( &b->u, &b->z, &b->x ); field_sqr ( &b->x, &a->z ); - field_bias ( &b->x, 2-is32 /*is32 ? 1 : 2*/ ); - field_add ( &b->z, &b->x, &b->x ); - field_sub ( &b->y, &b->z, &b->u ); - IF32( field_weak_reduce( &b->y ) ); + field_add ( &b->z, &b->x, &b->x ); + field_sub ( &b->y, &b->z, &b->u ); field_mul ( &b->z, &L0, &b->y ); field_mul ( &b->x, &b->y, &b->t ); field_mul ( &b->y, &L0, &b->u ); @@ -241,11 +175,8 @@ convert_tw_affine_to_tw_pniels ( struct tw_pniels_t* b, const struct tw_affine_t* a ) { - field_sub ( &b->n.a, &a->y, &a->x ); - field_bias ( &b->n.a, 2 ); - field_weak_reduce( &b->n.a ); - field_add ( &b->n.b, &a->x, &a->y ); - field_weak_reduce( &b->n.b ); + field_sub ( &b->n.a, &a->y, &a->x ); + field_add ( &b->n.b, &a->x, &a->y ); field_mul ( &b->z, &a->y, &a->x ); field_mulw_scc_wr ( &b->n.c, &b->z, 2*EDWARDS_D-2 ); field_set_ui( &b->z, 2 ); @@ -280,15 +211,11 @@ convert_tw_extensible_to_tw_pniels ( struct tw_pniels_t* b, const struct tw_extensible_t* a ) { - field_sub ( &b->n.a, &a->y, &a->x ); - field_bias ( &b->n.a, 2 ); - field_weak_reduce( &b->n.a ); - field_add ( &b->n.b, &a->x, &a->y ); - field_weak_reduce( &b->n.b ); + field_sub ( &b->n.a, &a->y, &a->x ); + field_add ( &b->n.b, &a->x, &a->y ); field_mul ( &b->z, &a->u, &a->t ); field_mulw_scc_wr ( &b->n.c, &b->z, 2*EDWARDS_D-2 ); - field_add ( &b->z, &a->z, &a->z ); - field_weak_reduce( &b->z ); + field_add ( &b->z, &a->z, &a->z ); } void @@ -296,8 +223,8 @@ convert_tw_pniels_to_tw_extensible ( struct tw_extensible_t* e, const struct tw_pniels_t* d ) { - field_add ( &e->u, &d->n.b, &d->n.a ); - field_subx ( &e->t, &d->n.b, &d->n.a ); + field_add ( &e->u, &d->n.b, &d->n.a ); + field_sub ( &e->t, &d->n.b, &d->n.a ); field_mul ( &e->x, &d->z, &e->t ); field_mul ( &e->y, &d->z, &e->u ); field_sqr ( &e->z, &d->z ); @@ -308,11 +235,8 @@ convert_tw_niels_to_tw_extensible ( struct tw_extensible_t* e, const struct tw_niels_t* d ) { - field_add ( &e->y, &d->b, &d->a ); - field_weak_reduce( &e->y ); - field_sub ( &e->x, &d->b, &d->a ); - field_bias ( &e->x, 2 ); - field_weak_reduce( &e->x ); + field_add ( &e->y, &d->b, &d->a ); + field_sub ( &e->x, &d->b, &d->a ); field_set_ui( &e->z, 1 ); field_copy ( &e->t, &e->x ); field_copy ( &e->u, &e->y ); @@ -322,24 +246,25 @@ void montgomery_step ( struct montgomery_t* a ) { + ANALYZE_THIS_ROUTINE_CAREFULLY; struct field_t L0, L1; - field_add ( &L0, &a->zd, &a->xd ); - field_subx ( &L1, &a->xd, &a->zd ); - field_subx ( &a->zd, &a->xa, &a->za ); + field_add_nr ( &L0, &a->zd, &a->xd ); + field_sub ( &L1, &a->xd, &a->zd ); + field_sub ( &a->zd, &a->xa, &a->za ); field_mul ( &a->xd, &L0, &a->zd ); - field_add ( &a->zd, &a->za, &a->xa ); + field_add_nr ( &a->zd, &a->za, &a->xa ); field_mul ( &a->za, &L1, &a->zd ); - field_add ( &a->xa, &a->za, &a->xd ); + field_add_nr ( &a->xa, &a->za, &a->xd ); field_sqr ( &a->zd, &a->xa ); field_mul ( &a->xa, &a->z0, &a->zd ); - field_subx ( &a->zd, &a->xd, &a->za ); + field_sub ( &a->zd, &a->xd, &a->za ); field_sqr ( &a->za, &a->zd ); field_sqr ( &a->xd, &L0 ); field_sqr ( &L0, &L1 ); field_mulw_scc ( &a->zd, &a->xd, 1-EDWARDS_D ); /* FIXME PERF MULW */ - field_subx ( &L1, &a->xd, &L0 ); + field_sub ( &L1, &a->xd, &L0 ); field_mul ( &a->xd, &L0, &a->zd ); - field_sub ( &L0, &a->zd, &L1 ); + field_sub_nr ( &L0, &a->zd, &L1 ); field_bias ( &L0, 4 - 2*is32 /*is32 ? 2 : 4*/ ); IF32( field_weak_reduce( &L0 ) ); field_mul ( &a->zd, &L0, &L1 ); @@ -366,27 +291,26 @@ serialize_montgomery ( mask_t L4, L5, L6; struct field_t L0, L1, L2, L3; field_mul ( &L3, &a->z0, &a->zd ); - field_subx ( &L1, &L3, &a->xd ); + field_sub ( &L1, &L3, &a->xd ); field_mul ( &L3, &a->za, &L1 ); field_mul ( &L2, &a->z0, &a->xd ); - field_subx ( &L1, &L2, &a->zd ); + field_sub ( &L1, &L2, &a->zd ); field_mul ( &L0, &a->xa, &L1 ); - field_add ( &L2, &L0, &L3 ); - field_subx ( &L1, &L3, &L0 ); + field_add ( &L2, &L0, &L3 ); + field_sub ( &L1, &L3, &L0 ); field_mul ( &L3, &L1, &L2 ); field_copy ( &L2, &a->z0 ); field_addw ( &L2, 1 ); field_sqr ( &L0, &L2 ); field_mulw_scc_wr ( &L1, &L0, EDWARDS_D-1 ); - field_add ( &L2, &a->z0, &a->z0 ); - field_add ( &L0, &L2, &L2 ); - field_add ( &L2, &L0, &L1 ); - IF32( field_weak_reduce( &L2 ) ); + field_add ( &L2, &a->z0, &a->z0 ); + field_add ( &L0, &L2, &L2 ); + field_add ( &L2, &L0, &L1 ); field_mul ( &L0, &a->xd, &L2 ); L5 = field_is_zero( &a->zd ); L6 = - L5; constant_time_mask ( &L1, &L0, sizeof(L1), L5 ); - field_add ( &L2, &L1, &a->zd ); + field_add ( &L2, &L1, &a->zd ); L4 = ~ L5; field_mul ( &L1, sbz, &L3 ); field_addw ( &L1, L6 ); @@ -399,8 +323,7 @@ serialize_montgomery ( field_sqr ( &L1, &L0 ); field_mul ( &L0, &L3, &L1 ); constant_time_mask ( b, &L2, sizeof(L1), L4 ); - field_subw ( &L0, 1 ); - field_bias ( &L0, 1 ); + field_subw( &L0, 1 ); L5 = field_is_zero( &L0 ); L4 = field_is_zero( sbz ); return L5 | L4; @@ -412,8 +335,8 @@ serialize_extensible ( const struct extensible_t* a ) { struct field_t L0, L1, L2; - field_subx ( &L0, &a->y, &a->z ); - field_add ( b, &a->z, &a->y ); + field_sub ( &L0, &a->y, &a->z ); + field_add ( b, &a->z, &a->y ); field_mul ( &L1, &a->z, &a->x ); field_mul ( &L2, &L0, &L1 ); field_mul ( &L1, &L2, &L0 ); @@ -432,15 +355,13 @@ untwist_and_double_and_serialize ( ) { struct field_t L0, L1, L2, L3; field_mul ( &L3, &a->y, &a->x ); - field_add ( b, &a->y, &a->x ); + field_add ( b, &a->y, &a->x ); field_sqr ( &L1, b ); - field_add ( &L2, &L3, &L3 ); - field_sub ( b, &L1, &L2 ); - field_bias ( b, 3 ); - IF32( field_weak_reduce( b ) ); + field_add ( &L2, &L3, &L3 ); + field_sub ( b, &L1, &L2 ); field_sqr ( &L2, &a->z ); field_sqr ( &L1, &L2 ); - field_add ( b, b, b ); + field_add ( b, b, b ); field_mulw_scc ( &L2, b, EDWARDS_D-1 ); field_mulw_scc ( b, &L2, EDWARDS_D-1 ); field_mul ( &L0, &L2, &L1 ); @@ -457,13 +378,12 @@ twist_even ( struct tw_extensible_t* b, const struct extensible_t* a ) { - mask_t L0, L1; field_sqr ( &b->y, &a->z ); field_sqr ( &b->z, &a->x ); - field_subx ( &b->u, &b->y, &b->z ); - field_subx ( &b->z, &a->z, &a->x ); + field_sub ( &b->u, &b->y, &b->z ); + field_sub ( &b->z, &a->z, &a->x ); field_mul ( &b->y, &b->z, &a->y ); - field_subx ( &b->z, &a->z, &a->y ); + field_sub ( &b->z, &a->z, &a->y ); field_mul ( &b->x, &b->z, &b->y ); field_mul ( &b->t, &b->x, &b->u ); field_mul ( &b->y, &b->x, &b->t ); @@ -473,10 +393,7 @@ twist_even ( field_mul ( &b->t, &b->y, &b->x ); field_mul ( &b->x, &a->x, &b->u ); field_mul ( &b->y, &a->y, &b->u ); - L1 = field_is_zero( &b->z ); - L0 = - L1; - field_addw ( &b->y, L0 ); - field_weak_reduce( &b->y ); + field_addw ( &b->y, -field_is_zero( &b->z ) ); field_set_ui( &b->z, 1 ); field_copy ( &b->t, &b->x ); field_copy ( &b->u, &b->y ); @@ -487,18 +404,15 @@ test_only_twist ( struct tw_extensible_t* b, const struct extensible_t* a ) { - mask_t L2, L3; struct field_t L0, L1; field_sqr ( &b->u, &a->z ); field_sqr ( &b->y, &a->x ); - field_sub ( &b->z, &b->u, &b->y ); - field_bias ( &b->z, 2 ); - field_add ( &b->y, &b->z, &b->z ); - field_add ( &b->u, &b->y, &b->y ); - IF32( field_weak_reduce( &b->u ) ); - field_subx ( &b->y, &a->z, &a->x ); + field_sub ( &b->z, &b->u, &b->y ); + field_add ( &b->y, &b->z, &b->z ); + field_add ( &b->u, &b->y, &b->y ); + field_sub ( &b->y, &a->z, &a->x ); field_mul ( &b->x, &b->y, &a->y ); - field_subx ( &b->z, &a->z, &a->y ); + field_sub ( &b->z, &a->z, &a->y ); field_mul ( &b->t, &b->z, &b->x ); field_mul ( &L1, &b->t, &b->u ); field_mul ( &b->x, &b->t, &L1 ); @@ -506,25 +420,16 @@ test_only_twist ( field_mul ( &b->u, &b->t, &L0 ); field_sqr ( &L1, &L0 ); field_mul ( &b->t, &b->x, &L1 ); - field_add ( &L1, &a->y, &a->x ); - IF32( field_weak_reduce( &L1 ) ); - field_subx ( &L0, &a->x, &a->y ); + field_add ( &L1, &a->y, &a->x ); + field_sub ( &L0, &a->x, &a->y ); field_mul ( &b->x, &b->t, &L0 ); - field_add ( &L0, &b->x, &L1 ); - field_subx ( &b->t, &L1, &b->x ); + field_add ( &L0, &b->x, &L1 ); + field_sub ( &b->t, &L1, &b->x ); field_mul ( &b->x, &L0, &b->u ); - L2 = field_is_zero( &b->y ); - L3 = - L2; - field_addw ( &b->x, L3 ); - field_weak_reduce( &b->x ); + field_addw ( &b->x, -field_is_zero( &b->y ) ); field_mul ( &b->y, &b->t, &b->u ); - L2 = field_is_zero( &b->z ); - L3 = - L2; - field_addw ( &b->y, L3 ); - field_weak_reduce( &b->y ); - L3 = field_is_zero( &a->y ); - L2 = L3 + 1; - field_set_ui( &b->z, L2 ); + field_addw ( &b->y, -field_is_zero( &b->z ) ); + field_set_ui( &b->z, 1+field_is_zero( &a->y ) ); field_copy ( &b->t, &b->x ); field_copy ( &b->u, &b->y ); } @@ -536,7 +441,7 @@ is_even_pt ( struct field_t L0, L1, L2; field_sqr ( &L2, &a->z ); field_sqr ( &L1, &a->x ); - field_subx ( &L0, &L2, &L1 ); + field_sub ( &L0, &L2, &L1 ); return field_is_square ( &L0 ); } @@ -547,8 +452,7 @@ is_even_tw ( struct field_t L0, L1, L2; field_sqr ( &L2, &a->z ); field_sqr ( &L1, &a->x ); - field_add ( &L0, &L1, &L2 ); - field_weak_reduce( &L0 ); + field_add ( &L0, &L1, &L2 ); return field_is_square ( &L0 ); } @@ -563,13 +467,12 @@ deserialize_affine ( field_addw ( &L3, 1 ); field_sqr ( &L2, &L3 ); field_mulw_scc ( &a->x, &L2, EDWARDS_D-1 ); /* PERF MULW */ - field_add ( &L3, &L1, &L1 ); /* FIXME: i adjusted the bias here, was it right? */ - field_add ( &a->y, &L3, &L3 ); - field_add ( &L3, &a->y, &a->x ); - IF32( field_weak_reduce( &L3 ) ); + field_add ( &L3, &L1, &L1 ); /* FIXME: i adjusted the bias here, was it right? */ + field_add ( &a->y, &L3, &L3 ); + field_add ( &L3, &a->y, &a->x ); field_copy ( &a->y, &L1 ); - field_subw ( &a->y, 1 ); field_negx ( &a->x, &a->y ); + field_addw ( &a->x, 1 ); field_mul ( &a->y, &a->x, &L3 ); field_sqr ( &L2, &a->x ); field_mul ( &L0, &L2, &a->y ); @@ -579,12 +482,11 @@ deserialize_affine ( field_sqr ( &L2, &L3 ); field_mul ( &L3, &L0, &L2 ); field_mul ( &L0, &a->x, &L3 ); - field_add ( &L2, &a->y, &a->y ); + field_add ( &L2, &a->y, &a->y ); field_mul ( &a->x, sz, &L2 ); field_addw ( &L1, 1 ); field_mul ( &a->y, &L1, &L3 ); - field_subw ( &L0, 1 ); - field_bias ( &L0, 1 ); + field_subw( &L0, 1 ); return field_is_zero( &L0 ); } @@ -600,13 +502,12 @@ deserialize_and_twist_approx ( field_addw ( &a->y, 1 ); field_sqr ( &L0, &a->y ); field_mulw_scc ( &a->x, &L0, EDWARDS_D-1 ); - field_add ( &a->y, &a->z, &a->z ); - field_add ( &a->u, &a->y, &a->y ); - field_add ( &a->y, &a->u, &a->x ); - IF32( field_weak_reduce( &a->y ) ); + field_add ( &a->y, &a->z, &a->z ); + field_add ( &a->u, &a->y, &a->y ); + field_add ( &a->y, &a->u, &a->x ); field_sqr ( &a->x, &a->z ); - field_subw ( &a->x, 1 ); field_negx ( &a->u, &a->x ); + field_addw ( &a->u, 1 ); field_mul ( &a->x, sdm1, &a->u ); field_mul ( &L0, &a->x, &a->y ); field_mul ( &a->t, &L0, &a->y ); @@ -618,17 +519,16 @@ deserialize_and_twist_approx ( field_sqr ( &L1, &L0 ); field_mul ( &a->u, &a->t, &L1 ); field_mul ( &a->t, &a->x, &a->u ); - field_add ( &a->x, sz, sz ); + field_add ( &a->x, sz, sz ); field_mul ( &L0, &a->u, &a->x ); field_copy ( &a->x, &a->z ); - field_subw ( &a->x, 1 ); field_negx ( &L1, &a->x ); + field_addw ( &L1, 1 ); field_mul ( &a->x, &L1, &L0 ); field_mul ( &L0, &a->u, &a->y ); field_addw ( &a->z, 1 ); field_mul ( &a->y, &a->z, &L0 ); - field_subw ( &a->t, 1 ); - field_bias ( &a->t, 1 ); + field_subw( &a->t, 1 ); mask_t ret = field_is_zero( &a->t ); field_set_ui( &a->z, 1 ); field_copy ( &a->t, &a->x ); @@ -673,11 +573,9 @@ eq_affine ( ) { mask_t L1, L2; struct field_t L0; - field_sub ( &L0, &a->x, &b->x ); - field_bias ( &L0, 2 ); + field_sub ( &L0, &a->x, &b->x ); L2 = field_is_zero( &L0 ); - field_sub ( &L0, &a->y, &b->y ); - field_bias ( &L0, 2 ); + field_sub ( &L0, &a->y, &b->y ); L1 = field_is_zero( &L0 ); return L2 & L1; } @@ -691,13 +589,11 @@ eq_extensible ( struct field_t L0, L1, L2; field_mul ( &L2, &b->z, &a->x ); field_mul ( &L1, &a->z, &b->x ); - field_sub ( &L0, &L2, &L1 ); - field_bias ( &L0, 2 ); + field_sub ( &L0, &L2, &L1 ); L4 = field_is_zero( &L0 ); field_mul ( &L2, &b->z, &a->y ); field_mul ( &L1, &a->z, &b->y ); - field_sub ( &L0, &L2, &L1 ); - field_bias ( &L0, 2 ); + field_sub ( &L0, &L2, &L1 ); L3 = field_is_zero( &L0 ); return L4 & L3; } @@ -711,13 +607,11 @@ eq_tw_extensible ( struct field_t L0, L1, L2; field_mul ( &L2, &b->z, &a->x ); field_mul ( &L1, &a->z, &b->x ); - field_sub ( &L0, &L2, &L1 ); - field_bias ( &L0, 2 ); + field_sub ( &L0, &L2, &L1 ); L4 = field_is_zero( &L0 ); field_mul ( &L2, &b->z, &a->y ); field_mul ( &L1, &a->z, &b->y ); - field_sub ( &L0, &L2, &L1 ); - field_bias ( &L0, 2 ); + field_sub ( &L0, &L2, &L1 ); L3 = field_is_zero( &L0 ); return L4 & L3; } @@ -727,22 +621,18 @@ elligator_2s_inject ( struct affine_t* a, const struct field_t* r ) { - mask_t L0, L1; struct field_t L2, L3, L4, L5, L6, L7, L8; field_sqr ( &a->x, r ); field_sqr ( &L3, &a->x ); field_copy ( &a->y, &L3 ); - field_subw ( &a->y, 1 ); field_negx ( &L4, &a->y ); + field_addw ( &L4, 1 ); field_sqr ( &L2, &L4 ); field_mulw ( &L7, &L2, (EDWARDS_D-1)*(EDWARDS_D-1) ); field_mulw ( &L8, &L3, 4*(EDWARDS_D+1)*(EDWARDS_D+1) ); - field_add ( &a->y, &L8, &L7 ); - IF32( field_weak_reduce( &a->y ) ); + field_add ( &a->y, &L8, &L7 ); field_mulw ( &L8, &L2, 4*(EDWARDS_D)*(EDWARDS_D-1) ); - field_sub ( &L7, &a->y, &L8 ); - field_bias ( &L7, 2 ); - IF32( field_weak_reduce( &L7 ) ); + field_sub ( &L7, &a->y, &L8 ); field_mulw_scc ( &L6, &a->y, -2-2*EDWARDS_D ); field_mul ( &L5, &L7, &L6 ); /* FIXME Stability problem (API stability, not crash) / possible bug. @@ -769,27 +659,20 @@ elligator_2s_inject ( field_mul ( &L8, &L7, &L6 ); field_mul ( &L7, &L8, &L6 ); field_copy ( &L6, &a->x ); - field_subw ( &L6, 1 ); field_addw ( &a->x, 1 ); field_mul ( &L5, &a->x, &L8 ); - field_sub ( &a->x, &L6, &L5 ); - field_bias ( &a->x, 3 ); - IF32( field_weak_reduce( &a->x ) ); + field_addw ( &L5, 1 ); + field_sub ( &a->x, &L6, &L5 ); field_mul ( &L5, &L4, &a->x ); field_mulw_scc_wr ( &a->x, &L5, -2-2*EDWARDS_D ); - field_add ( &L4, &L3, &L3 ); - field_add ( &L3, &L4, &L2 ); - field_subw ( &L3, 2 ); - field_bias ( &L3, 1 ); - IF32( field_weak_reduce( &L3 ) ); + field_add ( &L4, &L3, &L3 ); + field_add ( &L3, &L4, &L2 ); + field_subw( &L3, 2 ); field_mul ( &L2, &L3, &L8 ); field_mulw ( &L3, &L2, 2*(EDWARDS_D+1)*(EDWARDS_D-1) ); - field_add ( &L2, &L3, &a->y ); + field_add ( &L2, &L3, &a->y ); field_mul ( &a->y, &L7, &L2 ); - L1 = field_is_zero( &L8 ); - L0 = - L1; - field_addw ( &a->y, L0 ); - field_weak_reduce( &a->y ); + field_addw ( &a->y, -field_is_zero( &L8 ) ); } mask_t @@ -799,12 +682,11 @@ validate_affine ( struct field_t L0, L1, L2, L3; field_sqr ( &L0, &a->y ); field_sqr ( &L1, &a->x ); - field_add ( &L3, &L1, &L0 ); - field_subw ( &L3, 1 ); + field_add ( &L3, &L1, &L0 ); field_mulw_scc ( &L2, &L1, EDWARDS_D ); field_mul ( &L1, &L0, &L2 ); - field_sub ( &L0, &L3, &L1 ); - field_bias ( &L0, 3 ); + field_addw ( &L1, 1 ); + field_sub ( &L0, &L3, &L1 ); return field_is_zero( &L0 ); } @@ -821,28 +703,26 @@ validate_tw_extensible ( field_mul ( &L1, &ext->t, &ext->u ); field_mul ( &L2, &ext->z, &L1 ); field_mul ( &L0, &ext->x, &ext->y ); - field_neg ( &L1, &L0 ); - field_add ( &L0, &L1, &L2 ); - field_bias ( &L0, 2 ); + field_negx ( &L1, &L0 ); + field_add ( &L0, &L1, &L2 ); L5 = field_is_zero( &L0 ); /* * Check invariant: * 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2 */ field_sqr ( &L2, &ext->y ); - field_neg ( &L1, &L2 ); + field_negx ( &L1, &L2 ); field_sqr ( &L0, &ext->x ); - field_add ( &L2, &L0, &L1 ); + field_add ( &L2, &L0, &L1 ); field_sqr ( &L3, &ext->u ); field_sqr ( &L0, &ext->t ); field_mul ( &L1, &L0, &L3 ); field_mulw_scc ( &L3, &L1, EDWARDS_D ); - field_add ( &L0, &L3, &L2 ); - field_neg ( &L3, &L1 ); - field_add ( &L2, &L3, &L0 ); + field_add ( &L0, &L3, &L2 ); + field_negx ( &L3, &L1 ); + field_add ( &L2, &L3, &L0 ); field_sqr ( &L1, &ext->z ); - field_add ( &L0, &L1, &L2 ); - field_bias ( &L0, 2 ); + field_add ( &L0, &L1, &L2 ); L4 = field_is_zero( &L0 ); return L5 & L4 &~ field_is_zero(&ext->z); } @@ -858,18 +738,17 @@ validate_extensible ( * 0 = d*t^2*u^2 - x^2 - y^2 + z^2 */ field_sqr ( &L2, &ext->y ); - field_neg ( &L1, &L2 ); + field_negx ( &L1, &L2 ); field_sqr ( &L0, &ext->z ); - field_add ( &L2, &L0, &L1 ); + field_add ( &L2, &L0, &L1 ); field_sqr ( &L3, &ext->u ); field_sqr ( &L0, &ext->t ); field_mul ( &L1, &L0, &L3 ); field_mulw_scc ( &L0, &L1, EDWARDS_D ); - field_add ( &L1, &L0, &L2 ); + field_add ( &L1, &L0, &L2 ); field_sqr ( &L0, &ext->x ); - field_neg ( &L2, &L0 ); - field_add ( &L0, &L2, &L1 ); - field_bias ( &L0, 2 ); + field_negx ( &L2, &L0 ); + field_add ( &L0, &L2, &L1 ); L5 = field_is_zero( &L0 ); /* * Check invariant: @@ -878,9 +757,8 @@ validate_extensible ( field_mul ( &L1, &ext->t, &ext->u ); field_mul ( &L2, &ext->z, &L1 ); field_mul ( &L0, &ext->x, &ext->y ); - field_neg ( &L1, &L0 ); - field_add ( &L0, &L1, &L2 ); - field_bias ( &L0, 2 ); + field_negx ( &L1, &L0 ); + field_add ( &L0, &L1, &L2 ); L4 = field_is_zero( &L0 ); return L5 & L4 &~ field_is_zero(&ext->z); } diff --git a/src/include/field.h b/src/include/field.h index 6a9b0e7..d375c09 100644 --- a/src/include/field.h +++ b/src/include/field.h @@ -14,6 +14,13 @@ #include "f_field.h" #include +#define is32 (GOLDI_BITS == 32 || FIELD_BITS != 448) +#if (is32) +#define IF32(s) (s) +#else +#define IF32(s) +#endif + /** @brief Bytes in a field element */ #define FIELD_BYTES (1+(FIELD_BITS-1)/8) @@ -53,21 +60,6 @@ field_copy ( memcpy(a,b,sizeof(*a)); } -/** - * Negate a in place if doNegate. - */ -static inline void -__attribute__((unused,always_inline)) -field_cond_neg( - field_t *a, - mask_t doNegate -) { - struct field_t negated; - field_neg(&negated, a); - field_bias(&negated, 2); - constant_time_select(a, &negated, a, sizeof(negated), doNegate); -} - /** * Returns 1/sqrt(+- x). * @@ -140,4 +132,105 @@ field_sqrn ( } } +/* Multiply by signed curve constant */ +static __inline__ void +field_mulw_scc ( + struct field_t* __restrict__ out, + const struct field_t *a, + int64_t scc +) { + if (scc >= 0) { + field_mulw(out, a, scc); + } else { + field_mulw(out, a, -scc); + field_neg_RAW(out,out); + field_bias(out,2); + } +} + +/* Multiply by signed curve constant and weak reduce if biased */ +static __inline__ void +field_mulw_scc_wr ( + struct field_t* __restrict__ out, + const struct field_t *a, + int64_t scc +) { + field_mulw_scc(out, a, scc); + if (scc < 0) + field_weak_reduce(out); +} + +static __inline__ void +field_subx_RAW ( + struct field_t *d, + const struct field_t *a, + const struct field_t *b +) { + field_sub_RAW ( d, a, b ); + field_bias( d, 2 ); + IF32( field_weak_reduce ( d ) ); +} + +static __inline__ void +field_sub ( + struct field_t *d, + const struct field_t *a, + const struct field_t *b +) { + field_sub_RAW ( d, a, b ); + field_bias( d, 2 ); + field_weak_reduce ( d ); +} + +static __inline__ void +field_add ( + struct field_t *d, + const struct field_t *a, + const struct field_t *b +) { + field_add_RAW ( d, a, b ); + field_weak_reduce ( d ); +} + +static __inline__ void +field_subw ( + struct field_t *d, + word_t c +) { + field_subw_RAW ( d, c ); + field_bias( d, 1 ); + field_weak_reduce ( d ); +} + +static __inline__ void +field_negx ( + struct field_t *d, + const struct field_t *a +) { + field_neg_RAW ( d, a ); + field_bias( d, 2 ); + field_weak_reduce ( d ); +} + +/** + * Negate a in place if doNegate. + */ +static inline void +__attribute__((unused,always_inline)) +field_cond_neg ( + field_t *a, + mask_t doNegate +) { + struct field_t negated; + field_negx(&negated, a); + constant_time_select(a, &negated, a, sizeof(negated), doNegate); +} + +/** Require the warning annotation on raw routines */ +#define ANALYZE_THIS_ROUTINE_CAREFULLY const int ANNOTATE___ANALYZE_THIS_ROUTINE_CAREFULLY = 0; +#define MUST_BE_CAREFUL (void) ANNOTATE___ANALYZE_THIS_ROUTINE_CAREFULLY +#define field_add_nr(a,b,c) { MUST_BE_CAREFUL; field_add_RAW(a,b,c); } +#define field_sub_nr(a,b,c) { MUST_BE_CAREFUL; field_sub_RAW(a,b,c); } +#define field_subx_nr(a,b,c) { MUST_BE_CAREFUL; field_subx_RAW(a,b,c); } + #endif // __FIELD_H__ diff --git a/src/p448/arch_32/p448.h b/src/p448/arch_32/p448.h index cf90611..f0406cd 100644 --- a/src/p448/arch_32/p448.h +++ b/src/p448/arch_32/p448.h @@ -24,21 +24,21 @@ p448_set_ui ( ) __attribute__((unused,always_inline)); static __inline__ void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p448_neg ( +p448_neg_RAW ( p448_t *out, const p448_t *a ) __attribute__((unused,always_inline)); @@ -130,7 +130,7 @@ p448_set_ui ( } void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -148,7 +148,7 @@ p448_add ( } void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -166,7 +166,7 @@ p448_sub ( } void -p448_neg ( +p448_neg_RAW ( p448_t *out, const p448_t *a ) { diff --git a/src/p448/arch_arm_32/p448.h b/src/p448/arch_arm_32/p448.h index cf90611..f0406cd 100644 --- a/src/p448/arch_arm_32/p448.h +++ b/src/p448/arch_arm_32/p448.h @@ -24,21 +24,21 @@ p448_set_ui ( ) __attribute__((unused,always_inline)); static __inline__ void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p448_neg ( +p448_neg_RAW ( p448_t *out, const p448_t *a ) __attribute__((unused,always_inline)); @@ -130,7 +130,7 @@ p448_set_ui ( } void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -148,7 +148,7 @@ p448_add ( } void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -166,7 +166,7 @@ p448_sub ( } void -p448_neg ( +p448_neg_RAW ( p448_t *out, const p448_t *a ) { diff --git a/src/p448/arch_neon/p448.h b/src/p448/arch_neon/p448.h index cf90611..f0406cd 100644 --- a/src/p448/arch_neon/p448.h +++ b/src/p448/arch_neon/p448.h @@ -24,21 +24,21 @@ p448_set_ui ( ) __attribute__((unused,always_inline)); static __inline__ void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p448_neg ( +p448_neg_RAW ( p448_t *out, const p448_t *a ) __attribute__((unused,always_inline)); @@ -130,7 +130,7 @@ p448_set_ui ( } void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -148,7 +148,7 @@ p448_add ( } void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -166,7 +166,7 @@ p448_sub ( } void -p448_neg ( +p448_neg_RAW ( p448_t *out, const p448_t *a ) { diff --git a/src/p448/arch_neon_experimental/p448.h b/src/p448/arch_neon_experimental/p448.h index 144d86c..f7d338a 100644 --- a/src/p448/arch_neon_experimental/p448.h +++ b/src/p448/arch_neon_experimental/p448.h @@ -27,21 +27,21 @@ p448_set_ui ( ) __attribute__((unused,always_inline)); static __inline__ void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p448_neg ( +p448_neg_RAW ( p448_t *out, const p448_t *a ) __attribute__((unused,always_inline)); @@ -133,7 +133,7 @@ p448_set_ui ( } void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -145,7 +145,7 @@ p448_add ( } void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -163,7 +163,7 @@ p448_sub ( } void -p448_neg ( +p448_neg_RAW ( p448_t *out, const p448_t *a ) { diff --git a/src/p448/arch_ref64/p448.h b/src/p448/arch_ref64/p448.h index bf43b79..d6670c3 100644 --- a/src/p448/arch_ref64/p448.h +++ b/src/p448/arch_ref64/p448.h @@ -25,21 +25,21 @@ p448_set_ui ( ) __attribute__((unused)); static __inline__ void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused)); static __inline__ void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused)); static __inline__ void -p448_neg ( +p448_neg_RAW ( p448_t *out, const p448_t *a ) __attribute__((unused)); @@ -136,7 +136,7 @@ p448_set_ui ( } void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -149,7 +149,7 @@ p448_add ( } void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -163,7 +163,7 @@ p448_sub ( } void -p448_neg ( +p448_neg_RAW ( struct p448_t *out, const p448_t *a ) { diff --git a/src/p448/arch_x86_64/p448.h b/src/p448/arch_x86_64/p448.h index 0772d23..20b7597 100644 --- a/src/p448/arch_x86_64/p448.h +++ b/src/p448/arch_x86_64/p448.h @@ -24,21 +24,21 @@ p448_set_ui ( ) __attribute__((unused,always_inline)); static __inline__ void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p448_neg ( +p448_neg_RAW ( p448_t *out, const p448_t *a ) __attribute__((unused,always_inline)); @@ -129,7 +129,7 @@ p448_set_ui ( } void -p448_add ( +p448_add_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -147,7 +147,7 @@ p448_add ( } void -p448_sub ( +p448_sub_RAW ( p448_t *out, const p448_t *a, const p448_t *b @@ -165,7 +165,7 @@ p448_sub ( } void -p448_neg ( +p448_neg_RAW ( struct p448_t *out, const p448_t *a ) { diff --git a/src/p448/f_field.h b/src/p448/f_field.h index c743c8d..7284194 100644 --- a/src/p448/f_field.h +++ b/src/p448/f_field.h @@ -9,23 +9,22 @@ #ifndef __F_FIELD_H__ #define __F_FIELD_H__ 1 -#include #include "constant_time.h" +#include #include "p448.h" #define FIELD_BITS 448 #define field_t p448_t #define field_mul p448_mul #define field_sqr p448_sqr -#define field_add p448_add -#define field_sub p448_sub +#define field_add_RAW p448_add_RAW +#define field_sub_RAW p448_sub_RAW #define field_mulw p448_mulw #define field_addw p448_addw -#define field_subw p448_subw -#define field_neg p448_neg +#define field_subw_RAW p448_subw +#define field_neg_RAW p448_neg_RAW #define field_set_ui p448_set_ui #define field_bias p448_bias -#define field_cond_neg p448_cond_neg #define field_inverse p448_inverse #define field_eq p448_eq #define field_isr p448_isr diff --git a/src/p480/arch_x86_64/p480.h b/src/p480/arch_x86_64/p480.h index a49c6d0..ea841aa 100644 --- a/src/p480/arch_x86_64/p480.h +++ b/src/p480/arch_x86_64/p480.h @@ -24,21 +24,21 @@ p480_set_ui ( ) __attribute__((unused,always_inline)); static __inline__ void -p480_add ( +p480_add_RAW ( p480_t *out, const p480_t *a, const p480_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p480_sub ( +p480_sub_RAW ( p480_t *out, const p480_t *a, const p480_t *b ) __attribute__((unused,always_inline)); static __inline__ void -p480_neg ( +p480_neg_RAW ( p480_t *out, const p480_t *a ) __attribute__((unused,always_inline)); @@ -129,7 +129,7 @@ p480_set_ui ( } void -p480_add ( +p480_add_RAW ( p480_t *out, const p480_t *a, const p480_t *b @@ -147,7 +147,7 @@ p480_add ( } void -p480_sub ( +p480_sub_RAW ( p480_t *out, const p480_t *a, const p480_t *b @@ -165,7 +165,7 @@ p480_sub ( } void -p480_neg ( +p480_neg_RAW ( struct p480_t *out, const p480_t *a ) { diff --git a/src/p480/f_field.h b/src/p480/f_field.h index 397f83d..c681bd3 100644 --- a/src/p480/f_field.h +++ b/src/p480/f_field.h @@ -9,23 +9,22 @@ #ifndef __F_FIELD_H__ #define __F_FIELD_H__ 1 -#include #include "constant_time.h" +#include #include "p480.h" #define FIELD_BITS 480 #define field_t p480_t #define field_mul p480_mul #define field_sqr p480_sqr -#define field_add p480_add -#define field_sub p480_sub +#define field_add_RAW p480_add_RAW +#define field_sub_RAW p480_sub_RAW #define field_mulw p480_mulw #define field_addw p480_addw -#define field_subw p480_subw -#define field_neg p480_neg +#define field_subw_RAW p480_subw +#define field_neg_RAW p480_neg_RAW #define field_set_ui p480_set_ui #define field_bias p480_bias -#define field_cond_neg p480_cond_neg #define field_inverse p480_inverse #define field_eq p480_eq #define field_isr p480_isr diff --git a/src/p521/arch_ref64/p521.h b/src/p521/arch_ref64/p521.h index c4dbf69..ff458a6 100644 --- a/src/p521/arch_ref64/p521.h +++ b/src/p521/arch_ref64/p521.h @@ -25,21 +25,21 @@ p521_set_ui ( ) __attribute__((unused)); static __inline__ void -p521_add ( +p521_add_RAW ( p521_t *out, const p521_t *a, const p521_t *b ) __attribute__((unused)); static __inline__ void -p521_sub ( +p521_sub_RAW ( p521_t *out, const p521_t *a, const p521_t *b ) __attribute__((unused)); static __inline__ void -p521_neg ( +p521_neg_RAW ( p521_t *out, const p521_t *a ) __attribute__((unused)); @@ -136,7 +136,7 @@ p521_set_ui ( } void -p521_add ( +p521_add_RAW ( p521_t *out, const p521_t *a, const p521_t *b @@ -149,7 +149,7 @@ p521_add ( } void -p521_sub ( +p521_sub_RAW ( p521_t *out, const p521_t *a, const p521_t *b @@ -163,7 +163,7 @@ p521_sub ( } void -p521_neg ( +p521_neg_RAW ( struct p521_t *out, const p521_t *a ) { diff --git a/src/p521/arch_x86_64_r12/p521.h b/src/p521/arch_x86_64_r12/p521.h index f51e91b..568784b 100644 --- a/src/p521/arch_x86_64_r12/p521.h +++ b/src/p521/arch_x86_64_r12/p521.h @@ -29,21 +29,21 @@ p521_set_ui ( ) __attribute__((unused)); static __inline__ void -p521_add ( +p521_add_RAW ( p521_t *out, const p521_t *a, const p521_t *b ) __attribute__((unused)); static __inline__ void -p521_sub ( +p521_sub_RAW ( p521_t *out, const p521_t *a, const p521_t *b ) __attribute__((unused)); static __inline__ void -p521_neg ( +p521_neg_RAW ( p521_t *out, const p521_t *a ) __attribute__((unused)); @@ -147,7 +147,7 @@ p521_set_ui ( } void -p521_add ( +p521_add_RAW ( p521_t *out, const p521_t *a, const p521_t *b @@ -159,7 +159,7 @@ p521_add ( } void -p521_sub ( +p521_sub_RAW ( p521_t *out, const p521_t *a, const p521_t *b @@ -171,7 +171,7 @@ p521_sub ( } void -p521_neg ( +p521_neg_RAW ( struct p521_t *out, const p521_t *a ) { diff --git a/src/p521/f_field.h b/src/p521/f_field.h index f17fe3d..6331072 100644 --- a/src/p521/f_field.h +++ b/src/p521/f_field.h @@ -17,15 +17,14 @@ #define field_t p521_t #define field_mul p521_mul #define field_sqr p521_sqr -#define field_add p521_add -#define field_sub p521_sub +#define field_add_RAW p521_add_RAW +#define field_sub_RAW p521_sub_RAW #define field_mulw p521_mulw #define field_addw p521_addw -#define field_subw p521_subw -#define field_neg p521_neg +#define field_subw_RAW p521_subw +#define field_neg_RAW p521_neg_RAW #define field_set_ui p521_set_ui #define field_bias p521_bias -#define field_cond_neg p521_cond_neg #define field_inverse p521_inverse #define field_eq p521_eq #define field_isr p521_isr diff --git a/test/bench.c b/test/bench.c index ddf8097..31fd9eb 100644 --- a/test/bench.c +++ b/test/bench.c @@ -177,7 +177,6 @@ int main(int argc, char **argv) { field_mul(&c,&b,&a); field_sqr(&b,&c); field_subw(&b,1); - field_bias(&b,1); if (!field_is_zero(&b)) { printf("ISR validation failure!\n"); field_print("a", &a); @@ -232,7 +231,6 @@ int main(int argc, char **argv) { convert_affine_to_extensible(&exta,&affine); serialize_extensible(&b, &exta); field_sub(&c,&b,&a); - field_bias(&c,2); if (!field_is_zero(&c)) { printf("Reserialize validation failure!\n"); field_print("a", &a); @@ -635,7 +633,6 @@ int main(int argc, char **argv) { ignore_result(montgomery_ladder(&b,&a,&z,WORD_BITS,0)); field_sub(&d,&b,&c); - field_bias(&d,2); if (!field_is_zero(&d)) { printf("Odd ladder validation failure %d!\n", ++failures); field_print("a", &a); @@ -661,7 +658,6 @@ int main(int argc, char **argv) { untwist_and_double_and_serialize(&c, &ext); field_sub(&d,&b,&c); - field_bias(&d,2); if (good && !field_is_zero(&d)){ printf("Iso+serial validation failure %d!\n", ++failures); @@ -717,7 +713,6 @@ int main(int argc, char **argv) { serialize_extensible(&c, &exta); field_sub(&d,&b,&c); - field_bias(&d,2); if (!field_is_zero(&d)){ printf("PreWNAF combo validation failure %d!\n", ++failures); diff --git a/test/test_arithmetic.c b/test/test_arithmetic.c index bbdbf43..7c45407 100644 --- a/test/test_arithmetic.c +++ b/test/test_arithmetic.c @@ -83,7 +83,7 @@ static mask_t field_assert_eq_gmp( return MASK_SUCCESS; } -static mask_t test_add_sub ( +static mask_t test_add_sub_RAW ( const mpz_t x, const mpz_t y, word_t word @@ -95,11 +95,11 @@ static mask_t test_add_sub ( succ &= mpz_to_field(&yy,y); mpz_init(t); - field_add(&tt,&xx,&yy); + field_add_RAW(&tt,&xx,&yy); mpz_add(t,x,y); succ &= field_assert_eq_gmp("add",&xx,&yy,&tt,t,0,2.1); - field_sub(&tt,&xx,&yy); + field_sub_RAW(&tt,&xx,&yy); field_bias(&tt,2); mpz_sub(t,x,y); succ &= field_assert_eq_gmp("sub",&xx,&yy,&tt,t,0,3.1); @@ -232,13 +232,13 @@ int test_arithmetic (void) { word_t word = gmp_urandomm_ui (state, 1ull< Date: Thu, 22 Jan 2015 15:35:58 -0800 Subject: [PATCH 03/15] going to GMP-style element[1] types --- src/arithmetic.c | 77 ++-- src/ec_point.c | 955 ++++++++++++++++++++-------------------- src/goldilocks.c | 70 +-- src/include/ec_point.h | 83 ++-- src/include/field.h | 79 ++-- src/include/magic.h | 2 +- src/include/scalarmul.h | 4 +- src/p448/f_arithmetic.c | 54 +-- src/p448/magic.c | 16 +- src/p480/f_arithmetic.c | 54 +-- src/p480/magic.c | 12 +- src/p521/f_arithmetic.c | 54 +-- src/p521/magic.c | 12 +- src/scalarmul.c | 70 +-- test/bench.c | 172 ++++---- test/test.c | 2 +- test/test.h | 2 +- test/test_arithmetic.c | 94 ++-- test/test_pointops.c | 48 +- test/test_scalarmul.c | 90 ++-- 20 files changed, 974 insertions(+), 976 deletions(-) diff --git a/src/arithmetic.c b/src/arithmetic.c index 4530aa3..89be5c4 100644 --- a/src/arithmetic.c +++ b/src/arithmetic.c @@ -13,78 +13,75 @@ mask_t field_eq ( - const struct field_t *a, - const struct field_t *b + const field_a_t a, + const field_a_t b ) { - struct field_t ra, rb; - field_copy(&ra, a); - field_copy(&rb, b); - field_weak_reduce(&ra); - field_weak_reduce(&rb); - field_sub_RAW(&ra, &ra, &rb); - field_bias(&ra, 2); - return field_is_zero(&ra); + field_a_t ra, rb; + field_copy(ra, a); + field_copy(rb, b); + field_weak_reduce(ra); + field_weak_reduce(rb); + field_sub_RAW(ra, ra, rb); + field_bias(ra, 2); + return field_is_zero(ra); } void field_inverse ( - struct field_t* a, - const struct field_t* x + field_a_t a, + const field_a_t x ) { - struct field_t L0, L1; - field_isr ( &L0, x ); - field_sqr ( &L1, &L0 ); - field_sqr ( &L0, &L1 ); - field_mul ( a, x, &L0 ); + field_a_t L0, L1; + field_isr ( L0, x ); + field_sqr ( L1, L0 ); + field_sqr ( L0, L1 ); + field_mul ( a, x, L0 ); } mask_t field_is_square ( - const struct field_t* x + const field_a_t x ) { - mask_t L2, L3; - struct field_t L0, L1; - field_isr ( &L0, x ); - field_sqr ( &L1, &L0 ); - field_mul ( &L0, x, &L1 ); - field_subw( &L0, 1 ); - L3 = field_is_zero( &L0 ); - L2 = field_is_zero( x ); - return L3 | L2; + field_a_t L0, L1; + field_isr ( L0, x ); + field_sqr ( L1, L0 ); + field_mul ( L0, x, L1 ); + field_subw( L0, 1 ); + return field_is_zero( L0 ) | field_is_zero( x ); } void field_simultaneous_invert ( - struct field_t *__restrict__ out, - const struct field_t *in, + field_a_t *__restrict__ out, + const field_a_t *in, unsigned int n ) { if (n==0) { return; } else if (n==1) { - field_inverse(out,in); + field_inverse(out[0],in[0]); return; } - field_copy(&out[1], &in[0]); + field_copy(out[1], in[0]); int i; for (i=1; i<(int) (n-1); i++) { - field_mul(&out[i+1], &out[i], &in[i]); + field_mul(out[i+1], out[i], in[i]); } - field_mul(&out[0], &out[n-1], &in[n-1]); + field_mul(out[0], out[n-1], in[n-1]); - struct field_t tmp; - field_inverse(&tmp, &out[0]); - field_copy(&out[0], &tmp); + field_a_t tmp; + field_inverse(tmp, out[0]); + field_copy(out[0], tmp); /* at this point, out[0] = product(in[i]) ^ -1 * out[i] = product(in[0]..in[i-1]) if i != 0 */ for (i=n-1; i>0; i--) { - field_mul(&tmp, &out[i], &out[0]); - field_copy(&out[i], &tmp); + field_mul(tmp, out[i], out[0]); + field_copy(out[i], tmp); - field_mul(&tmp, &out[0], &in[i]); - field_copy(&out[0], &tmp); + field_mul(tmp, out[0], in[i]); + field_copy(out[0], tmp); } } diff --git a/src/ec_point.c b/src/ec_point.c index a486df1..905ba60 100644 --- a/src/ec_point.c +++ b/src/ec_point.c @@ -18,20 +18,20 @@ add_tw_niels_to_tw_extensible ( const struct tw_niels_t* e ) { ANALYZE_THIS_ROUTINE_CAREFULLY; - struct field_t L0, L1; - field_sub ( &L1, &d->y, &d->x ); - field_mul ( &L0, &e->a, &L1 ); - field_add_nr ( &L1, &d->x, &d->y ); - field_mul ( &d->y, &e->b, &L1 ); - field_mul ( &L1, &d->u, &d->t ); - field_mul ( &d->x, &e->c, &L1 ); - field_add_nr ( &d->u, &L0, &d->y ); - field_subx_nr ( &d->t, &d->y, &L0 ); - field_subx_nr ( &d->y, &d->z, &d->x ); - field_add_nr ( &L0, &d->x, &d->z ); - field_mul ( &d->z, &L0, &d->y ); - field_mul ( &d->x, &d->y, &d->t ); - field_mul ( &d->y, &L0, &d->u ); + field_a_t L0, L1; + field_sub ( L1, d->y, d->x ); + field_mul ( L0, e->a, L1 ); + field_add_nr ( L1, d->x, d->y ); + field_mul ( d->y, e->b, L1 ); + field_mul ( L1, d->u, d->t ); + field_mul ( d->x, e->c, L1 ); + field_add_nr ( d->u, L0, d->y ); + field_subx_nr ( d->t, d->y, L0 ); + field_subx_nr ( d->y, d->z, d->x ); + field_add_nr ( L0, d->x, d->z ); + field_mul ( d->z, L0, d->y ); + field_mul ( d->x, d->y, d->t ); + field_mul ( d->y, L0, d->u ); } void @@ -40,20 +40,20 @@ sub_tw_niels_from_tw_extensible ( const struct tw_niels_t* e ) { ANALYZE_THIS_ROUTINE_CAREFULLY; - struct field_t L0, L1; - field_subx_nr ( &L1, &d->y, &d->x ); - field_mul ( &L0, &e->b, &L1 ); - field_add_nr ( &L1, &d->x, &d->y ); - field_mul ( &d->y, &e->a, &L1 ); - field_mul ( &L1, &d->u, &d->t ); - field_mul ( &d->x, &e->c, &L1 ); - field_add_nr ( &d->u, &L0, &d->y ); - field_subx_nr ( &d->t, &d->y, &L0 ); - field_add_nr ( &d->y, &d->x, &d->z ); - field_subx_nr ( &L0, &d->z, &d->x ); - field_mul ( &d->z, &L0, &d->y ); - field_mul ( &d->x, &d->y, &d->t ); - field_mul ( &d->y, &L0, &d->u ); + field_a_t L0, L1; + field_subx_nr ( L1, d->y, d->x ); + field_mul ( L0, e->b, L1 ); + field_add_nr ( L1, d->x, d->y ); + field_mul ( d->y, e->a, L1 ); + field_mul ( L1, d->u, d->t ); + field_mul ( d->x, e->c, L1 ); + field_add_nr ( d->u, L0, d->y ); + field_subx_nr ( d->t, d->y, L0 ); + field_add_nr ( d->y, d->x, d->z ); + field_subx_nr ( L0, d->z, d->x ); + field_mul ( d->z, L0, d->y ); + field_mul ( d->x, d->y, d->t ); + field_mul ( d->y, L0, d->u ); } void @@ -61,9 +61,9 @@ add_tw_pniels_to_tw_extensible ( struct tw_extensible_t* e, const struct tw_pniels_t* a ) { - struct field_t L0; - field_mul ( &L0, &e->z, &a->z ); - field_copy ( &e->z, &L0 ); + field_a_t L0; + field_mul ( L0, e->z, a->z ); + field_copy ( e->z, L0 ); add_tw_niels_to_tw_extensible( e, &a->n ); } @@ -72,9 +72,9 @@ sub_tw_pniels_from_tw_extensible ( struct tw_extensible_t* e, const struct tw_pniels_t* a ) { - struct field_t L0; - field_mul ( &L0, &e->z, &a->z ); - field_copy ( &e->z, &L0 ); + field_a_t L0; + field_mul ( L0, e->z, a->z ); + field_copy ( e->z, L0 ); sub_tw_niels_from_tw_extensible( e, &a->n ); } @@ -83,24 +83,24 @@ double_tw_extensible ( struct tw_extensible_t* a ) { ANALYZE_THIS_ROUTINE_CAREFULLY; - struct field_t L0, L1, L2; - field_sqr ( &L2, &a->x ); - field_sqr ( &L0, &a->y ); - field_add_nr ( &a->u, &L2, &L0 ); - field_add_nr ( &a->t, &a->y, &a->x ); - field_sqr ( &L1, &a->t ); - field_sub_nr ( &a->t, &L1, &a->u ); - field_bias ( &a->t, 3 ); - IF32( field_weak_reduce( &a->t ) ); - field_subx_nr ( &L1, &L0, &L2 ); - field_sqr ( &a->x, &a->z ); - field_bias ( &a->x, 2-is32 /*is32 ? 1 : 2*/ ); - field_add_nr ( &a->z, &a->x, &a->x ); - field_sub_nr ( &L0, &a->z, &L1 ); - IF32( field_weak_reduce( &L0 ) ); - field_mul ( &a->z, &L1, &L0 ); - field_mul ( &a->x, &L0, &a->t ); - field_mul ( &a->y, &L1, &a->u ); + field_a_t L0, L1, L2; + field_sqr ( L2, a->x ); + field_sqr ( L0, a->y ); + field_add_nr ( a->u, L2, L0 ); + field_add_nr ( a->t, a->y, a->x ); + field_sqr ( L1, a->t ); + field_sub_nr ( a->t, L1, a->u ); + field_bias ( a->t, 3 ); + IF32( field_weak_reduce( a->t ) ); + field_subx_nr ( L1, L0, L2 ); + field_sqr ( a->x, a->z ); + field_bias ( a->x, 2-is32 /*is32 ? 1 : 2*/ ); + field_add_nr ( a->z, a->x, a->x ); + field_sub_nr ( L0, a->z, L1 ); + IF32( field_weak_reduce( L0 ) ); + field_mul ( a->z, L1, L0 ); + field_mul ( a->x, L0, a->t ); + field_mul ( a->y, L1, a->u ); } void @@ -108,24 +108,24 @@ double_extensible ( struct extensible_t* a ) { ANALYZE_THIS_ROUTINE_CAREFULLY; - struct field_t L0, L1, L2; - field_sqr ( &L2, &a->x ); - field_sqr ( &L0, &a->y ); - field_add_nr ( &L1, &L2, &L0 ); - field_add_nr ( &a->t, &a->y, &a->x ); - field_sqr ( &a->u, &a->t ); - field_sub_nr ( &a->t, &a->u, &L1 ); - field_bias ( &a->t, 3 ); - IF32( field_weak_reduce( &a->t ) ); - field_subx_nr ( &a->u, &L0, &L2 ); - field_sqr ( &a->x, &a->z ); - field_bias ( &a->x, 2 ); - field_add_nr ( &a->z, &a->x, &a->x ); - field_sub_nr ( &L0, &a->z, &L1 ); - IF32( field_weak_reduce( &L0 ) ); - field_mul ( &a->z, &L1, &L0 ); - field_mul ( &a->x, &L0, &a->t ); - field_mul ( &a->y, &L1, &a->u ); + field_a_t L0, L1, L2; + field_sqr ( L2, a->x ); + field_sqr ( L0, a->y ); + field_add_nr ( L1, L2, L0 ); + field_add_nr ( a->t, a->y, a->x ); + field_sqr ( a->u, a->t ); + field_sub_nr ( a->t, a->u, L1 ); + field_bias ( a->t, 3 ); + IF32( field_weak_reduce( a->t ) ); + field_subx_nr ( a->u, L0, L2 ); + field_sqr ( a->x, a->z ); + field_bias ( a->x, 2 ); + field_add_nr ( a->z, a->x, a->x ); + field_sub_nr ( L0, a->z, L1 ); + IF32( field_weak_reduce( L0 ) ); + field_mul ( a->z, L1, L0 ); + field_mul ( a->x, L0, a->t ); + field_mul ( a->y, L1, a->u ); } void @@ -133,20 +133,20 @@ twist_and_double ( struct tw_extensible_t* b, const struct extensible_t* a ) { - struct field_t L0; - field_sqr ( &b->x, &a->x ); - field_sqr ( &b->z, &a->y ); - field_add ( &b->u, &b->x, &b->z ); - field_add ( &b->t, &a->y, &a->x ); - field_sqr ( &L0, &b->t ); - field_sub ( &b->t, &L0, &b->u ); - field_sub ( &L0, &b->z, &b->x ); - field_sqr ( &b->x, &a->z ); - field_add ( &b->z, &b->x, &b->x ); - field_sub ( &b->y, &b->z, &b->u ); - field_mul ( &b->z, &L0, &b->y ); - field_mul ( &b->x, &b->y, &b->t ); - field_mul ( &b->y, &L0, &b->u ); + field_a_t L0; + field_sqr ( b->x, a->x ); + field_sqr ( b->z, a->y ); + field_add ( b->u, b->x, b->z ); + field_add ( b->t, a->y, a->x ); + field_sqr ( L0, b->t ); + field_sub ( b->t, L0, b->u ); + field_sub ( L0, b->z, b->x ); + field_sqr ( b->x, a->z ); + field_add ( b->z, b->x, b->x ); + field_sub ( b->y, b->z, b->u ); + field_mul ( b->z, L0, b->y ); + field_mul ( b->x, b->y, b->t ); + field_mul ( b->y, L0, b->u ); } void @@ -154,20 +154,20 @@ untwist_and_double ( struct extensible_t* b, const struct tw_extensible_t* a ) { - struct field_t L0; - field_sqr ( &b->x, &a->x ); - field_sqr ( &b->z, &a->y ); - field_add ( &L0, &b->x, &b->z ); - field_add ( &b->t, &a->y, &a->x ); - field_sqr ( &b->u, &b->t ); - field_sub ( &b->t, &b->u, &L0 ); - field_sub ( &b->u, &b->z, &b->x ); - field_sqr ( &b->x, &a->z ); - field_add ( &b->z, &b->x, &b->x ); - field_sub ( &b->y, &b->z, &b->u ); - field_mul ( &b->z, &L0, &b->y ); - field_mul ( &b->x, &b->y, &b->t ); - field_mul ( &b->y, &L0, &b->u ); + field_a_t L0; + field_sqr ( b->x, a->x ); + field_sqr ( b->z, a->y ); + field_add ( L0, b->x, b->z ); + field_add ( b->t, a->y, a->x ); + field_sqr ( b->u, b->t ); + field_sub ( b->t, b->u, L0 ); + field_sub ( b->u, b->z, b->x ); + field_sqr ( b->x, a->z ); + field_add ( b->z, b->x, b->x ); + field_sub ( b->y, b->z, b->u ); + field_mul ( b->z, L0, b->y ); + field_mul ( b->x, b->y, b->t ); + field_mul ( b->y, L0, b->u ); } void @@ -175,11 +175,11 @@ convert_tw_affine_to_tw_pniels ( struct tw_pniels_t* b, const struct tw_affine_t* a ) { - field_sub ( &b->n.a, &a->y, &a->x ); - field_add ( &b->n.b, &a->x, &a->y ); - field_mul ( &b->z, &a->y, &a->x ); - field_mulw_scc_wr ( &b->n.c, &b->z, 2*EDWARDS_D-2 ); - field_set_ui( &b->z, 2 ); + field_sub ( b->n.a, a->y, a->x ); + field_add ( b->n.b, a->x, a->y ); + field_mul ( b->z, a->y, a->x ); + field_mulw_scc_wr ( b->n.c, b->z, 2*EDWARDS_D-2 ); + field_set_ui( b->z, 2 ); } void @@ -187,11 +187,11 @@ convert_tw_affine_to_tw_extensible ( struct tw_extensible_t* b, const struct tw_affine_t* a ) { - field_copy ( &b->x, &a->x ); - field_copy ( &b->y, &a->y ); - field_set_ui( &b->z, 1 ); - field_copy ( &b->t, &a->x ); - field_copy ( &b->u, &a->y ); + field_copy ( b->x, a->x ); + field_copy ( b->y, a->y ); + field_set_ui( b->z, 1 ); + field_copy ( b->t, a->x ); + field_copy ( b->u, a->y ); } void @@ -199,11 +199,11 @@ convert_affine_to_extensible ( struct extensible_t* b, const struct affine_t* a ) { - field_copy ( &b->x, &a->x ); - field_copy ( &b->y, &a->y ); - field_set_ui( &b->z, 1 ); - field_copy ( &b->t, &a->x ); - field_copy ( &b->u, &a->y ); + field_copy ( b->x, a->x ); + field_copy ( b->y, a->y ); + field_set_ui( b->z, 1 ); + field_copy ( b->t, a->x ); + field_copy ( b->u, a->y ); } void @@ -211,11 +211,11 @@ convert_tw_extensible_to_tw_pniels ( struct tw_pniels_t* b, const struct tw_extensible_t* a ) { - field_sub ( &b->n.a, &a->y, &a->x ); - field_add ( &b->n.b, &a->x, &a->y ); - field_mul ( &b->z, &a->u, &a->t ); - field_mulw_scc_wr ( &b->n.c, &b->z, 2*EDWARDS_D-2 ); - field_add ( &b->z, &a->z, &a->z ); + field_sub ( b->n.a, a->y, a->x ); + field_add ( b->n.b, a->x, a->y ); + field_mul ( b->z, a->u, a->t ); + field_mulw_scc_wr ( b->n.c, b->z, 2*EDWARDS_D-2 ); + field_add ( b->z, a->z, a->z ); } void @@ -223,11 +223,11 @@ convert_tw_pniels_to_tw_extensible ( struct tw_extensible_t* e, const struct tw_pniels_t* d ) { - field_add ( &e->u, &d->n.b, &d->n.a ); - field_sub ( &e->t, &d->n.b, &d->n.a ); - field_mul ( &e->x, &d->z, &e->t ); - field_mul ( &e->y, &d->z, &e->u ); - field_sqr ( &e->z, &d->z ); + field_add ( e->u, d->n.b, d->n.a ); + field_sub ( e->t, d->n.b, d->n.a ); + field_mul ( e->x, d->z, e->t ); + field_mul ( e->y, d->z, e->u ); + field_sqr ( e->z, d->z ); } void @@ -235,11 +235,11 @@ convert_tw_niels_to_tw_extensible ( struct tw_extensible_t* e, const struct tw_niels_t* d ) { - field_add ( &e->y, &d->b, &d->a ); - field_sub ( &e->x, &d->b, &d->a ); - field_set_ui( &e->z, 1 ); - field_copy ( &e->t, &e->x ); - field_copy ( &e->u, &e->y ); + field_add ( e->y, d->b, d->a ); + field_sub ( e->x, d->b, d->a ); + field_set_ui( e->z, 1 ); + field_copy ( e->t, e->x ); + field_copy ( e->u, e->y ); } void @@ -247,130 +247,130 @@ montgomery_step ( struct montgomery_t* a ) { ANALYZE_THIS_ROUTINE_CAREFULLY; - struct field_t L0, L1; - field_add_nr ( &L0, &a->zd, &a->xd ); - field_sub ( &L1, &a->xd, &a->zd ); - field_sub ( &a->zd, &a->xa, &a->za ); - field_mul ( &a->xd, &L0, &a->zd ); - field_add_nr ( &a->zd, &a->za, &a->xa ); - field_mul ( &a->za, &L1, &a->zd ); - field_add_nr ( &a->xa, &a->za, &a->xd ); - field_sqr ( &a->zd, &a->xa ); - field_mul ( &a->xa, &a->z0, &a->zd ); - field_sub ( &a->zd, &a->xd, &a->za ); - field_sqr ( &a->za, &a->zd ); - field_sqr ( &a->xd, &L0 ); - field_sqr ( &L0, &L1 ); - field_mulw_scc ( &a->zd, &a->xd, 1-EDWARDS_D ); /* FIXME PERF MULW */ - field_sub ( &L1, &a->xd, &L0 ); - field_mul ( &a->xd, &L0, &a->zd ); - field_sub_nr ( &L0, &a->zd, &L1 ); - field_bias ( &L0, 4 - 2*is32 /*is32 ? 2 : 4*/ ); - IF32( field_weak_reduce( &L0 ) ); - field_mul ( &a->zd, &L0, &L1 ); + field_a_t L0, L1; + field_add_nr ( L0, a->zd, a->xd ); + field_sub ( L1, a->xd, a->zd ); + field_sub ( a->zd, a->xa, a->za ); + field_mul ( a->xd, L0, a->zd ); + field_add_nr ( a->zd, a->za, a->xa ); + field_mul ( a->za, L1, a->zd ); + field_add_nr ( a->xa, a->za, a->xd ); + field_sqr ( a->zd, a->xa ); + field_mul ( a->xa, a->z0, a->zd ); + field_sub ( a->zd, a->xd, a->za ); + field_sqr ( a->za, a->zd ); + field_sqr ( a->xd, L0 ); + field_sqr ( L0, L1 ); + field_mulw_scc ( a->zd, a->xd, 1-EDWARDS_D ); /* FIXME PERF MULW */ + field_sub ( L1, a->xd, L0 ); + field_mul ( a->xd, L0, a->zd ); + field_sub_nr ( L0, a->zd, L1 ); + field_bias ( L0, 4 - 2*is32 /*is32 ? 2 : 4*/ ); + IF32( field_weak_reduce( L0 ) ); + field_mul ( a->zd, L0, L1 ); } void deserialize_montgomery ( struct montgomery_t* a, - const struct field_t* sbz + const field_a_t sbz ) { - field_sqr ( &a->z0, sbz ); - field_set_ui( &a->xd, 1 ); - field_set_ui( &a->zd, 0 ); - field_set_ui( &a->xa, 1 ); - field_copy ( &a->za, &a->z0 ); + field_sqr ( a->z0, sbz ); + field_set_ui( a->xd, 1 ); + field_set_ui( a->zd, 0 ); + field_set_ui( a->xa, 1 ); + field_copy ( a->za, a->z0 ); } mask_t serialize_montgomery ( - struct field_t* b, + field_a_t b, const struct montgomery_t* a, - const struct field_t* sbz + const field_a_t sbz ) { mask_t L4, L5, L6; - struct field_t L0, L1, L2, L3; - field_mul ( &L3, &a->z0, &a->zd ); - field_sub ( &L1, &L3, &a->xd ); - field_mul ( &L3, &a->za, &L1 ); - field_mul ( &L2, &a->z0, &a->xd ); - field_sub ( &L1, &L2, &a->zd ); - field_mul ( &L0, &a->xa, &L1 ); - field_add ( &L2, &L0, &L3 ); - field_sub ( &L1, &L3, &L0 ); - field_mul ( &L3, &L1, &L2 ); - field_copy ( &L2, &a->z0 ); - field_addw ( &L2, 1 ); - field_sqr ( &L0, &L2 ); - field_mulw_scc_wr ( &L1, &L0, EDWARDS_D-1 ); - field_add ( &L2, &a->z0, &a->z0 ); - field_add ( &L0, &L2, &L2 ); - field_add ( &L2, &L0, &L1 ); - field_mul ( &L0, &a->xd, &L2 ); - L5 = field_is_zero( &a->zd ); + field_a_t L0, L1, L2, L3; + field_mul ( L3, a->z0, a->zd ); + field_sub ( L1, L3, a->xd ); + field_mul ( L3, a->za, L1 ); + field_mul ( L2, a->z0, a->xd ); + field_sub ( L1, L2, a->zd ); + field_mul ( L0, a->xa, L1 ); + field_add ( L2, L0, L3 ); + field_sub ( L1, L3, L0 ); + field_mul ( L3, L1, L2 ); + field_copy ( L2, a->z0 ); + field_addw ( L2, 1 ); + field_sqr ( L0, L2 ); + field_mulw_scc_wr ( L1, L0, EDWARDS_D-1 ); + field_add ( L2, a->z0, a->z0 ); + field_add ( L0, L2, L2 ); + field_add ( L2, L0, L1 ); + field_mul ( L0, a->xd, L2 ); + L5 = field_is_zero( a->zd ); L6 = - L5; - constant_time_mask ( &L1, &L0, sizeof(L1), L5 ); - field_add ( &L2, &L1, &a->zd ); + constant_time_mask ( L1, L0, sizeof(L1), L5 ); + field_add ( L2, L1, a->zd ); L4 = ~ L5; - field_mul ( &L1, sbz, &L3 ); - field_addw ( &L1, L6 ); - field_mul ( &L3, &L2, &L1 ); - field_mul ( &L1, &L3, &L2 ); - field_mul ( &L2, &L3, &a->xd ); - field_mul ( &L3, &L1, &L2 ); - field_isr ( &L0, &L3 ); - field_mul ( &L2, &L1, &L0 ); - field_sqr ( &L1, &L0 ); - field_mul ( &L0, &L3, &L1 ); - constant_time_mask ( b, &L2, sizeof(L1), L4 ); - field_subw( &L0, 1 ); - L5 = field_is_zero( &L0 ); + field_mul ( L1, sbz, L3 ); + field_addw ( L1, L6 ); + field_mul ( L3, L2, L1 ); + field_mul ( L1, L3, L2 ); + field_mul ( L2, L3, a->xd ); + field_mul ( L3, L1, L2 ); + field_isr ( L0, L3 ); + field_mul ( L2, L1, L0 ); + field_sqr ( L1, L0 ); + field_mul ( L0, L3, L1 ); + constant_time_mask ( b, L2, sizeof(L1), L4 ); + field_subw( L0, 1 ); + L5 = field_is_zero( L0 ); L4 = field_is_zero( sbz ); return L5 | L4; } void serialize_extensible ( - struct field_t* b, + field_a_t b, const struct extensible_t* a ) { - struct field_t L0, L1, L2; - field_sub ( &L0, &a->y, &a->z ); - field_add ( b, &a->z, &a->y ); - field_mul ( &L1, &a->z, &a->x ); - field_mul ( &L2, &L0, &L1 ); - field_mul ( &L1, &L2, &L0 ); - field_mul ( &L0, &L2, b ); - field_mul ( &L2, &L1, &L0 ); - field_isr ( &L0, &L2 ); - field_mul ( b, &L1, &L0 ); - field_sqr ( &L1, &L0 ); - field_mul ( &L0, &L2, &L1 ); + field_a_t L0, L1, L2; + field_sub ( L0, a->y, a->z ); + field_add ( b, a->z, a->y ); + field_mul ( L1, a->z, a->x ); + field_mul ( L2, L0, L1 ); + field_mul ( L1, L2, L0 ); + field_mul ( L0, L2, b ); + field_mul ( L2, L1, L0 ); + field_isr ( L0, L2 ); + field_mul ( b, L1, L0 ); + field_sqr ( L1, L0 ); + field_mul ( L0, L2, L1 ); } void untwist_and_double_and_serialize ( - struct field_t* b, + field_a_t b, const struct tw_extensible_t* a ) { - struct field_t L0, L1, L2, L3; - field_mul ( &L3, &a->y, &a->x ); - field_add ( b, &a->y, &a->x ); - field_sqr ( &L1, b ); - field_add ( &L2, &L3, &L3 ); - field_sub ( b, &L1, &L2 ); - field_sqr ( &L2, &a->z ); - field_sqr ( &L1, &L2 ); + field_a_t L0, L1, L2, L3; + field_mul ( L3, a->y, a->x ); + field_add ( b, a->y, a->x ); + field_sqr ( L1, b ); + field_add ( L2, L3, L3 ); + field_sub ( b, L1, L2 ); + field_sqr ( L2, a->z ); + field_sqr ( L1, L2 ); field_add ( b, b, b ); - field_mulw_scc ( &L2, b, EDWARDS_D-1 ); - field_mulw_scc ( b, &L2, EDWARDS_D-1 ); - field_mul ( &L0, &L2, &L1 ); - field_mul ( &L2, b, &L0 ); - field_isr ( &L0, &L2 ); - field_mul ( &L1, b, &L0 ); - field_sqr ( b, &L0 ); - field_mul ( &L0, &L2, b ); - field_mul ( b, &L1, &L3 ); + field_mulw_scc ( L2, b, EDWARDS_D-1 ); + field_mulw_scc ( b, L2, EDWARDS_D-1 ); + field_mul ( L0, L2, L1 ); + field_mul ( L2, b, L0 ); + field_isr ( L0, L2 ); + field_mul ( L1, b, L0 ); + field_sqr ( b, L0 ); + field_mul ( L0, L2, b ); + field_mul ( b, L1, L3 ); } void @@ -378,25 +378,25 @@ twist_even ( struct tw_extensible_t* b, const struct extensible_t* a ) { - field_sqr ( &b->y, &a->z ); - field_sqr ( &b->z, &a->x ); - field_sub ( &b->u, &b->y, &b->z ); - field_sub ( &b->z, &a->z, &a->x ); - field_mul ( &b->y, &b->z, &a->y ); - field_sub ( &b->z, &a->z, &a->y ); - field_mul ( &b->x, &b->z, &b->y ); - field_mul ( &b->t, &b->x, &b->u ); - field_mul ( &b->y, &b->x, &b->t ); - field_isr ( &b->t, &b->y ); - field_mul ( &b->u, &b->x, &b->t ); - field_sqr ( &b->x, &b->t ); - field_mul ( &b->t, &b->y, &b->x ); - field_mul ( &b->x, &a->x, &b->u ); - field_mul ( &b->y, &a->y, &b->u ); - field_addw ( &b->y, -field_is_zero( &b->z ) ); - field_set_ui( &b->z, 1 ); - field_copy ( &b->t, &b->x ); - field_copy ( &b->u, &b->y ); + field_sqr ( b->y, a->z ); + field_sqr ( b->z, a->x ); + field_sub ( b->u, b->y, b->z ); + field_sub ( b->z, a->z, a->x ); + field_mul ( b->y, b->z, a->y ); + field_sub ( b->z, a->z, a->y ); + field_mul ( b->x, b->z, b->y ); + field_mul ( b->t, b->x, b->u ); + field_mul ( b->y, b->x, b->t ); + field_isr ( b->t, b->y ); + field_mul ( b->u, b->x, b->t ); + field_sqr ( b->x, b->t ); + field_mul ( b->t, b->y, b->x ); + field_mul ( b->x, a->x, b->u ); + field_mul ( b->y, a->y, b->u ); + field_addw ( b->y, -field_is_zero( b->z ) ); + field_set_ui( b->z, 1 ); + field_copy ( b->t, b->x ); + field_copy ( b->u, b->y ); } void @@ -404,135 +404,134 @@ test_only_twist ( struct tw_extensible_t* b, const struct extensible_t* a ) { - struct field_t L0, L1; - field_sqr ( &b->u, &a->z ); - field_sqr ( &b->y, &a->x ); - field_sub ( &b->z, &b->u, &b->y ); - field_add ( &b->y, &b->z, &b->z ); - field_add ( &b->u, &b->y, &b->y ); - field_sub ( &b->y, &a->z, &a->x ); - field_mul ( &b->x, &b->y, &a->y ); - field_sub ( &b->z, &a->z, &a->y ); - field_mul ( &b->t, &b->z, &b->x ); - field_mul ( &L1, &b->t, &b->u ); - field_mul ( &b->x, &b->t, &L1 ); - field_isr ( &L0, &b->x ); - field_mul ( &b->u, &b->t, &L0 ); - field_sqr ( &L1, &L0 ); - field_mul ( &b->t, &b->x, &L1 ); - field_add ( &L1, &a->y, &a->x ); - field_sub ( &L0, &a->x, &a->y ); - field_mul ( &b->x, &b->t, &L0 ); - field_add ( &L0, &b->x, &L1 ); - field_sub ( &b->t, &L1, &b->x ); - field_mul ( &b->x, &L0, &b->u ); - field_addw ( &b->x, -field_is_zero( &b->y ) ); - field_mul ( &b->y, &b->t, &b->u ); - field_addw ( &b->y, -field_is_zero( &b->z ) ); - field_set_ui( &b->z, 1+field_is_zero( &a->y ) ); - field_copy ( &b->t, &b->x ); - field_copy ( &b->u, &b->y ); + field_a_t L0, L1; + field_sqr ( b->u, a->z ); + field_sqr ( b->y, a->x ); + field_sub ( b->z, b->u, b->y ); + field_add ( b->y, b->z, b->z ); + field_add ( b->u, b->y, b->y ); + field_sub ( b->y, a->z, a->x ); + field_mul ( b->x, b->y, a->y ); + field_sub ( b->z, a->z, a->y ); + field_mul ( b->t, b->z, b->x ); + field_mul ( L1, b->t, b->u ); + field_mul ( b->x, b->t, L1 ); + field_isr ( L0, b->x ); + field_mul ( b->u, b->t, L0 ); + field_sqr ( L1, L0 ); + field_mul ( b->t, b->x, L1 ); + field_add ( L1, a->y, a->x ); + field_sub ( L0, a->x, a->y ); + field_mul ( b->x, b->t, L0 ); + field_add ( L0, b->x, L1 ); + field_sub ( b->t, L1, b->x ); + field_mul ( b->x, L0, b->u ); + field_addw ( b->x, -field_is_zero( b->y ) ); + field_mul ( b->y, b->t, b->u ); + field_addw ( b->y, -field_is_zero( b->z ) ); + field_set_ui( b->z, 1+field_is_zero( a->y ) ); + field_copy ( b->t, b->x ); + field_copy ( b->u, b->y ); } mask_t is_even_pt ( const struct extensible_t* a ) { - struct field_t L0, L1, L2; - field_sqr ( &L2, &a->z ); - field_sqr ( &L1, &a->x ); - field_sub ( &L0, &L2, &L1 ); - return field_is_square ( &L0 ); + field_a_t L0, L1, L2; + field_sqr ( L2, a->z ); + field_sqr ( L1, a->x ); + field_sub ( L0, L2, L1 ); + return field_is_square ( L0 ); } mask_t is_even_tw ( const struct tw_extensible_t* a ) { - struct field_t L0, L1, L2; - field_sqr ( &L2, &a->z ); - field_sqr ( &L1, &a->x ); - field_add ( &L0, &L1, &L2 ); - return field_is_square ( &L0 ); + field_a_t L0, L1, L2; + field_sqr ( L2, a->z ); + field_sqr ( L1, a->x ); + field_add ( L0, L1, L2 ); + return field_is_square ( L0 ); } mask_t deserialize_affine ( struct affine_t* a, - const struct field_t* sz -) { - struct field_t L0, L1, L2, L3; - field_sqr ( &L1, sz ); - field_copy ( &L3, &L1 ); - field_addw ( &L3, 1 ); - field_sqr ( &L2, &L3 ); - field_mulw_scc ( &a->x, &L2, EDWARDS_D-1 ); /* PERF MULW */ - field_add ( &L3, &L1, &L1 ); /* FIXME: i adjusted the bias here, was it right? */ - field_add ( &a->y, &L3, &L3 ); - field_add ( &L3, &a->y, &a->x ); - field_copy ( &a->y, &L1 ); - field_negx ( &a->x, &a->y ); - field_addw ( &a->x, 1 ); - field_mul ( &a->y, &a->x, &L3 ); - field_sqr ( &L2, &a->x ); - field_mul ( &L0, &L2, &a->y ); - field_mul ( &a->y, &a->x, &L0 ); - field_isr ( &L3, &a->y ); - field_mul ( &a->y, &L2, &L3 ); - field_sqr ( &L2, &L3 ); - field_mul ( &L3, &L0, &L2 ); - field_mul ( &L0, &a->x, &L3 ); - field_add ( &L2, &a->y, &a->y ); - field_mul ( &a->x, sz, &L2 ); - field_addw ( &L1, 1 ); - field_mul ( &a->y, &L1, &L3 ); - field_subw( &L0, 1 ); - return field_is_zero( &L0 ); + const field_a_t sz +) { + field_a_t L0, L1, L2, L3; + field_sqr ( L1, sz ); + field_copy ( L3, L1 ); + field_addw ( L3, 1 ); + field_sqr ( L2, L3 ); + field_mulw_scc ( a->x, L2, EDWARDS_D-1 ); /* PERF MULW */ + field_add ( L3, L1, L1 ); /* FIXME: i adjusted the bias here, was it right? */ + field_add ( a->y, L3, L3 ); + field_add ( L3, a->y, a->x ); + field_copy ( a->y, L1 ); + field_neg ( a->x, a->y ); + field_addw ( a->x, 1 ); + field_mul ( a->y, a->x, L3 ); + field_sqr ( L2, a->x ); + field_mul ( L0, L2, a->y ); + field_mul ( a->y, a->x, L0 ); + field_isr ( L3, a->y ); + field_mul ( a->y, L2, L3 ); + field_sqr ( L2, L3 ); + field_mul ( L3, L0, L2 ); + field_mul ( L0, a->x, L3 ); + field_add ( L2, a->y, a->y ); + field_mul ( a->x, sz, L2 ); + field_addw ( L1, 1 ); + field_mul ( a->y, L1, L3 ); + field_subw( L0, 1 ); + return field_is_zero( L0 ); } mask_t deserialize_and_twist_approx ( struct tw_extensible_t* a, - const struct field_t* sdm1, - const struct field_t* sz -) { - struct field_t L0, L1; - field_sqr ( &a->z, sz ); - field_copy ( &a->y, &a->z ); - field_addw ( &a->y, 1 ); - field_sqr ( &L0, &a->y ); - field_mulw_scc ( &a->x, &L0, EDWARDS_D-1 ); - field_add ( &a->y, &a->z, &a->z ); - field_add ( &a->u, &a->y, &a->y ); - field_add ( &a->y, &a->u, &a->x ); - field_sqr ( &a->x, &a->z ); - field_negx ( &a->u, &a->x ); - field_addw ( &a->u, 1 ); - field_mul ( &a->x, sdm1, &a->u ); - field_mul ( &L0, &a->x, &a->y ); - field_mul ( &a->t, &L0, &a->y ); - field_mul ( &a->u, &a->x, &a->t ); - field_mul ( &a->t, &a->u, &L0 ); - field_mul ( &a->y, &a->x, &a->t ); - field_isr ( &L0, &a->y ); - field_mul ( &a->y, &a->u, &L0 ); - field_sqr ( &L1, &L0 ); - field_mul ( &a->u, &a->t, &L1 ); - field_mul ( &a->t, &a->x, &a->u ); - field_add ( &a->x, sz, sz ); - field_mul ( &L0, &a->u, &a->x ); - field_copy ( &a->x, &a->z ); - field_negx ( &L1, &a->x ); - field_addw ( &L1, 1 ); - field_mul ( &a->x, &L1, &L0 ); - field_mul ( &L0, &a->u, &a->y ); - field_addw ( &a->z, 1 ); - field_mul ( &a->y, &a->z, &L0 ); - field_subw( &a->t, 1 ); - mask_t ret = field_is_zero( &a->t ); - field_set_ui( &a->z, 1 ); - field_copy ( &a->t, &a->x ); - field_copy ( &a->u, &a->y ); + const field_a_t sz +) { + field_a_t L0, L1; + field_sqr ( a->z, sz ); + field_copy ( a->y, a->z ); + field_addw ( a->y, 1 ); + field_sqr ( L0, a->y ); + field_mulw_scc ( a->x, L0, EDWARDS_D-1 ); + field_add ( a->y, a->z, a->z ); + field_add ( a->u, a->y, a->y ); + field_add ( a->y, a->u, a->x ); + field_sqr ( a->x, a->z ); + field_neg ( a->u, a->x ); + field_addw ( a->u, 1 ); + field_mul ( a->x, sqrt_d_minus_1, a->u ); + field_mul ( L0, a->x, a->y ); + field_mul ( a->t, L0, a->y ); + field_mul ( a->u, a->x, a->t ); + field_mul ( a->t, a->u, L0 ); + field_mul ( a->y, a->x, a->t ); + field_isr ( L0, a->y ); + field_mul ( a->y, a->u, L0 ); + field_sqr ( L1, L0 ); + field_mul ( a->u, a->t, L1 ); + field_mul ( a->t, a->x, a->u ); + field_add ( a->x, sz, sz ); + field_mul ( L0, a->u, a->x ); + field_copy ( a->x, a->z ); + field_neg ( L1, a->x ); + field_addw ( L1, 1 ); + field_mul ( a->x, L1, L0 ); + field_mul ( L0, a->u, a->y ); + field_addw ( a->z, 1 ); + field_mul ( a->y, a->z, L0 ); + field_subw( a->t, 1 ); + mask_t ret = field_is_zero( a->t ); + field_set_ui( a->z, 1 ); + field_copy ( a->t, a->x ); + field_copy ( a->u, a->y ); return ret; } @@ -540,30 +539,30 @@ void set_identity_extensible ( struct extensible_t* a ) { - field_set_ui( &a->x, 0 ); - field_set_ui( &a->y, 1 ); - field_set_ui( &a->z, 1 ); - field_set_ui( &a->t, 0 ); - field_set_ui( &a->u, 0 ); + field_set_ui( a->x, 0 ); + field_set_ui( a->y, 1 ); + field_set_ui( a->z, 1 ); + field_set_ui( a->t, 0 ); + field_set_ui( a->u, 0 ); } void set_identity_tw_extensible ( struct tw_extensible_t* a ) { - field_set_ui( &a->x, 0 ); - field_set_ui( &a->y, 1 ); - field_set_ui( &a->z, 1 ); - field_set_ui( &a->t, 0 ); - field_set_ui( &a->u, 0 ); + field_set_ui( a->x, 0 ); + field_set_ui( a->y, 1 ); + field_set_ui( a->z, 1 ); + field_set_ui( a->t, 0 ); + field_set_ui( a->u, 0 ); } void set_identity_affine ( struct affine_t* a ) { - field_set_ui( &a->x, 0 ); - field_set_ui( &a->y, 1 ); + field_set_ui( a->x, 0 ); + field_set_ui( a->y, 1 ); } mask_t @@ -572,11 +571,11 @@ eq_affine ( const struct affine_t* b ) { mask_t L1, L2; - struct field_t L0; - field_sub ( &L0, &a->x, &b->x ); - L2 = field_is_zero( &L0 ); - field_sub ( &L0, &a->y, &b->y ); - L1 = field_is_zero( &L0 ); + field_a_t L0; + field_sub ( L0, a->x, b->x ); + L2 = field_is_zero( L0 ); + field_sub ( L0, a->y, b->y ); + L1 = field_is_zero( L0 ); return L2 & L1; } @@ -586,15 +585,15 @@ eq_extensible ( const struct extensible_t* b ) { mask_t L3, L4; - struct field_t L0, L1, L2; - field_mul ( &L2, &b->z, &a->x ); - field_mul ( &L1, &a->z, &b->x ); - field_sub ( &L0, &L2, &L1 ); - L4 = field_is_zero( &L0 ); - field_mul ( &L2, &b->z, &a->y ); - field_mul ( &L1, &a->z, &b->y ); - field_sub ( &L0, &L2, &L1 ); - L3 = field_is_zero( &L0 ); + field_a_t L0, L1, L2; + field_mul ( L2, b->z, a->x ); + field_mul ( L1, a->z, b->x ); + field_sub ( L0, L2, L1 ); + L4 = field_is_zero( L0 ); + field_mul ( L2, b->z, a->y ); + field_mul ( L1, a->z, b->y ); + field_sub ( L0, L2, L1 ); + L3 = field_is_zero( L0 ); return L4 & L3; } @@ -604,39 +603,39 @@ eq_tw_extensible ( const struct tw_extensible_t* b ) { mask_t L3, L4; - struct field_t L0, L1, L2; - field_mul ( &L2, &b->z, &a->x ); - field_mul ( &L1, &a->z, &b->x ); - field_sub ( &L0, &L2, &L1 ); - L4 = field_is_zero( &L0 ); - field_mul ( &L2, &b->z, &a->y ); - field_mul ( &L1, &a->z, &b->y ); - field_sub ( &L0, &L2, &L1 ); - L3 = field_is_zero( &L0 ); + field_a_t L0, L1, L2; + field_mul ( L2, b->z, a->x ); + field_mul ( L1, a->z, b->x ); + field_sub ( L0, L2, L1 ); + L4 = field_is_zero( L0 ); + field_mul ( L2, b->z, a->y ); + field_mul ( L1, a->z, b->y ); + field_sub ( L0, L2, L1 ); + L3 = field_is_zero( L0 ); return L4 & L3; } void elligator_2s_inject ( struct affine_t* a, - const struct field_t* r -) { - struct field_t L2, L3, L4, L5, L6, L7, L8; - field_sqr ( &a->x, r ); - field_sqr ( &L3, &a->x ); - field_copy ( &a->y, &L3 ); - field_negx ( &L4, &a->y ); - field_addw ( &L4, 1 ); - field_sqr ( &L2, &L4 ); - field_mulw ( &L7, &L2, (EDWARDS_D-1)*(EDWARDS_D-1) ); - field_mulw ( &L8, &L3, 4*(EDWARDS_D+1)*(EDWARDS_D+1) ); - field_add ( &a->y, &L8, &L7 ); - field_mulw ( &L8, &L2, 4*(EDWARDS_D)*(EDWARDS_D-1) ); - field_sub ( &L7, &a->y, &L8 ); - field_mulw_scc ( &L6, &a->y, -2-2*EDWARDS_D ); - field_mul ( &L5, &L7, &L6 ); + const field_a_t r +) { + field_a_t L2, L3, L4, L5, L6, L7, L8; + field_sqr ( a->x, r ); + field_sqr ( L3, a->x ); + field_copy ( a->y, L3 ); + field_neg ( L4, a->y ); + field_addw ( L4, 1 ); + field_sqr ( L2, L4 ); + field_mulw ( L7, L2, (EDWARDS_D-1)*(EDWARDS_D-1) ); + field_mulw ( L8, L3, 4*(EDWARDS_D+1)*(EDWARDS_D+1) ); + field_add ( a->y, L8, L7 ); + field_mulw ( L8, L2, 4*(EDWARDS_D)*(EDWARDS_D-1) ); + field_sub ( L7, a->y, L8 ); + field_mulw_scc ( L6, a->y, -2-2*EDWARDS_D ); + field_mul ( L5, L7, L6 ); /* FIXME Stability problem (API stability, not crash) / possible bug. - * change to: p448_mul ( &L5, &L7, &L4 ); ? + * change to: p448_mul ( L5, L7, L4 ); ? * This isn't a deep change: it's for sign adjustment. * Need to check which one leads to the correct sign, probably by writig * the invert routine. @@ -647,47 +646,47 @@ elligator_2s_inject ( * Could compute be, (be)^2, (be)^3, a b^3 e^3, a b^3 e^4. = 4M+S * instead of 6M. */ - field_mul ( &L8, &L5, &L4 ); - field_mul ( &L4, &L5, &L6 ); - field_mul ( &L5, &L7, &L8 ); - field_mul ( &L8, &L5, &L4 ); - field_mul ( &L4, &L7, &L8 ); - field_isr ( &L6, &L4 ); - field_mul ( &L4, &L5, &L6 ); - field_sqr ( &L5, &L6 ); - field_mul ( &L6, &L8, &L5 ); - field_mul ( &L8, &L7, &L6 ); - field_mul ( &L7, &L8, &L6 ); - field_copy ( &L6, &a->x ); - field_addw ( &a->x, 1 ); - field_mul ( &L5, &a->x, &L8 ); - field_addw ( &L5, 1 ); - field_sub ( &a->x, &L6, &L5 ); - field_mul ( &L5, &L4, &a->x ); - field_mulw_scc_wr ( &a->x, &L5, -2-2*EDWARDS_D ); - field_add ( &L4, &L3, &L3 ); - field_add ( &L3, &L4, &L2 ); - field_subw( &L3, 2 ); - field_mul ( &L2, &L3, &L8 ); - field_mulw ( &L3, &L2, 2*(EDWARDS_D+1)*(EDWARDS_D-1) ); - field_add ( &L2, &L3, &a->y ); - field_mul ( &a->y, &L7, &L2 ); - field_addw ( &a->y, -field_is_zero( &L8 ) ); + field_mul ( L8, L5, L4 ); + field_mul ( L4, L5, L6 ); + field_mul ( L5, L7, L8 ); + field_mul ( L8, L5, L4 ); + field_mul ( L4, L7, L8 ); + field_isr ( L6, L4 ); + field_mul ( L4, L5, L6 ); + field_sqr ( L5, L6 ); + field_mul ( L6, L8, L5 ); + field_mul ( L8, L7, L6 ); + field_mul ( L7, L8, L6 ); + field_copy ( L6, a->x ); + field_addw ( a->x, 1 ); + field_mul ( L5, a->x, L8 ); + field_addw ( L5, 1 ); + field_sub ( a->x, L6, L5 ); + field_mul ( L5, L4, a->x ); + field_mulw_scc_wr ( a->x, L5, -2-2*EDWARDS_D ); + field_add ( L4, L3, L3 ); + field_add ( L3, L4, L2 ); + field_subw( L3, 2 ); + field_mul ( L2, L3, L8 ); + field_mulw ( L3, L2, 2*(EDWARDS_D+1)*(EDWARDS_D-1) ); + field_add ( L2, L3, a->y ); + field_mul ( a->y, L7, L2 ); + field_addw ( a->y, -field_is_zero( L8 ) ); } mask_t validate_affine ( const struct affine_t* a ) { - struct field_t L0, L1, L2, L3; - field_sqr ( &L0, &a->y ); - field_sqr ( &L1, &a->x ); - field_add ( &L3, &L1, &L0 ); - field_mulw_scc ( &L2, &L1, EDWARDS_D ); - field_mul ( &L1, &L0, &L2 ); - field_addw ( &L1, 1 ); - field_sub ( &L0, &L3, &L1 ); - return field_is_zero( &L0 ); + field_a_t L0, L1, L2, L3; + field_sqr ( L0, a->y ); + field_sqr ( L1, a->x ); + field_add ( L3, L1, L0 ); + field_mulw_scc ( L2, L1, EDWARDS_D ); + field_mul ( L1, L0, L2 ); + field_addw ( L1, 1 ); + field_sub ( L0, L3, L1 ); + return field_is_zero( L0 ); } mask_t @@ -695,36 +694,36 @@ validate_tw_extensible ( const struct tw_extensible_t* ext ) { mask_t L4, L5; - struct field_t L0, L1, L2, L3; + field_a_t L0, L1, L2, L3; /* * Check invariant: * 0 = -x*y + z*t*u */ - field_mul ( &L1, &ext->t, &ext->u ); - field_mul ( &L2, &ext->z, &L1 ); - field_mul ( &L0, &ext->x, &ext->y ); - field_negx ( &L1, &L0 ); - field_add ( &L0, &L1, &L2 ); - L5 = field_is_zero( &L0 ); + field_mul ( L1, ext->t, ext->u ); + field_mul ( L2, ext->z, L1 ); + field_mul ( L0, ext->x, ext->y ); + field_neg ( L1, L0 ); + field_add ( L0, L1, L2 ); + L5 = field_is_zero( L0 ); /* * Check invariant: * 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2 */ - field_sqr ( &L2, &ext->y ); - field_negx ( &L1, &L2 ); - field_sqr ( &L0, &ext->x ); - field_add ( &L2, &L0, &L1 ); - field_sqr ( &L3, &ext->u ); - field_sqr ( &L0, &ext->t ); - field_mul ( &L1, &L0, &L3 ); - field_mulw_scc ( &L3, &L1, EDWARDS_D ); - field_add ( &L0, &L3, &L2 ); - field_negx ( &L3, &L1 ); - field_add ( &L2, &L3, &L0 ); - field_sqr ( &L1, &ext->z ); - field_add ( &L0, &L1, &L2 ); - L4 = field_is_zero( &L0 ); - return L5 & L4 &~ field_is_zero(&ext->z); + field_sqr ( L2, ext->y ); + field_neg ( L1, L2 ); + field_sqr ( L0, ext->x ); + field_add ( L2, L0, L1 ); + field_sqr ( L3, ext->u ); + field_sqr ( L0, ext->t ); + field_mul ( L1, L0, L3 ); + field_mulw_scc ( L3, L1, EDWARDS_D ); + field_add ( L0, L3, L2 ); + field_neg ( L3, L1 ); + field_add ( L2, L3, L0 ); + field_sqr ( L1, ext->z ); + field_add ( L0, L1, L2 ); + L4 = field_is_zero( L0 ); + return L5 & L4 &~ field_is_zero(ext->z); } mask_t @@ -732,33 +731,33 @@ validate_extensible ( const struct extensible_t* ext ) { mask_t L4, L5; - struct field_t L0, L1, L2, L3; + field_a_t L0, L1, L2, L3; /* * Check invariant: * 0 = d*t^2*u^2 - x^2 - y^2 + z^2 */ - field_sqr ( &L2, &ext->y ); - field_negx ( &L1, &L2 ); - field_sqr ( &L0, &ext->z ); - field_add ( &L2, &L0, &L1 ); - field_sqr ( &L3, &ext->u ); - field_sqr ( &L0, &ext->t ); - field_mul ( &L1, &L0, &L3 ); - field_mulw_scc ( &L0, &L1, EDWARDS_D ); - field_add ( &L1, &L0, &L2 ); - field_sqr ( &L0, &ext->x ); - field_negx ( &L2, &L0 ); - field_add ( &L0, &L2, &L1 ); - L5 = field_is_zero( &L0 ); + field_sqr ( L2, ext->y ); + field_neg ( L1, L2 ); + field_sqr ( L0, ext->z ); + field_add ( L2, L0, L1 ); + field_sqr ( L3, ext->u ); + field_sqr ( L0, ext->t ); + field_mul ( L1, L0, L3 ); + field_mulw_scc ( L0, L1, EDWARDS_D ); + field_add ( L1, L0, L2 ); + field_sqr ( L0, ext->x ); + field_neg ( L2, L0 ); + field_add ( L0, L2, L1 ); + L5 = field_is_zero( L0 ); /* * Check invariant: * 0 = -x*y + z*t*u */ - field_mul ( &L1, &ext->t, &ext->u ); - field_mul ( &L2, &ext->z, &L1 ); - field_mul ( &L0, &ext->x, &ext->y ); - field_negx ( &L1, &L0 ); - field_add ( &L0, &L1, &L2 ); - L4 = field_is_zero( &L0 ); - return L5 & L4 &~ field_is_zero(&ext->z); + field_mul ( L1, ext->t, ext->u ); + field_mul ( L2, ext->z, L1 ); + field_mul ( L0, ext->x, ext->y ); + field_neg ( L1, L0 ); + field_add ( L0, L1, L2 ); + L4 = field_is_zero( L0 ); + return L5 & L4 &~ field_is_zero(ext->z); } diff --git a/src/goldilocks.c b/src/goldilocks.c index f86e1ab..7cba9c4 100644 --- a/src/goldilocks.c +++ b/src/goldilocks.c @@ -162,7 +162,7 @@ goldilocks_derive_private_key ( struct sha512_ctx_t ctx; struct tw_extensible_t exta; - struct field_t pk; + field_a_t pk; sha512_init(&ctx); sha512_update(&ctx, (const unsigned char *)"derivepk", GOLDI_DIVERSIFY_BYTES); @@ -173,9 +173,9 @@ goldilocks_derive_private_key ( barrett_serialize(privkey->opaque, sk, GOLDI_FIELD_BYTES); scalarmul_fixed_base(&exta, sk, GOLDI_SCALAR_BITS, &goldilocks_global.fixed_base); - untwist_and_double_and_serialize(&pk, &exta); + untwist_and_double_and_serialize(pk, &exta); - field_serialize(&privkey->opaque[GOLDI_FIELD_BYTES], &pk); + field_serialize(&privkey->opaque[GOLDI_FIELD_BYTES], pk); return GOLDI_EOK; } @@ -225,11 +225,11 @@ goldilocks_private_to_public ( struct goldilocks_public_key_t *pubkey, const struct goldilocks_private_key_t *privkey ) { - struct field_t pk; - mask_t msucc = field_deserialize(&pk,&privkey->opaque[GOLDI_FIELD_BYTES]); + field_a_t pk; + mask_t msucc = field_deserialize(pk,&privkey->opaque[GOLDI_FIELD_BYTES]); if (msucc) { - field_serialize(pubkey->opaque, &pk); + field_serialize(pubkey->opaque, pk); return GOLDI_EOK; } else { return GOLDI_ECORRUPT; @@ -252,15 +252,15 @@ goldilocks_shared_secret_core ( assert(GOLDI_SHARED_SECRET_BYTES == SHA512_OUTPUT_BYTES); word_t sk[GOLDI_FIELD_WORDS]; - struct field_t pk; + field_a_t pk; - mask_t succ = field_deserialize(&pk,your_pubkey->opaque), msucc = -1; + mask_t succ = field_deserialize(pk,your_pubkey->opaque), msucc = -1; #ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS - struct field_t sum, prod; - msucc &= field_deserialize(&sum,&my_privkey->opaque[GOLDI_FIELD_BYTES]); - field_mul(&prod,&pk,&sum); - field_add(&sum,&pk,&sum); + field_a_t sum, prod; + msucc &= field_deserialize(sum,&my_privkey->opaque[GOLDI_FIELD_BYTES]); + field_mul(prod,pk,sum); + field_add(sum,pk,sum); #endif msucc &= barrett_deserialize(sk,my_privkey->opaque,&curve_prime_order); @@ -269,17 +269,17 @@ goldilocks_shared_secret_core ( if (pre) { struct tw_extensible_t tw; succ &= scalarmul_fixed_base(&tw, sk, GOLDI_SCALAR_BITS, &pre->table); - untwist_and_double_and_serialize(&pk, &tw); + untwist_and_double_and_serialize(pk, &tw); } else { - succ &= montgomery_ladder(&pk,&pk,sk,GOLDI_SCALAR_BITS,1); + succ &= montgomery_ladder(pk,pk,sk,GOLDI_SCALAR_BITS,1); } #else (void)pre; - succ &= montgomery_ladder(&pk,&pk,sk,GOLDI_SCALAR_BITS,1); + succ &= montgomery_ladder(pk,pk,sk,GOLDI_SCALAR_BITS,1); #endif - field_serialize(gxy,&pk); + field_serialize(gxy,pk); /* obliterate records of our failure by adjusting with obliteration key */ struct sha512_ctx_t ctx; @@ -300,9 +300,9 @@ goldilocks_shared_secret_core ( #ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS /* stir in the sum and product of the pubkeys. */ uint8_t a_pk[GOLDI_FIELD_BYTES]; - field_serialize(a_pk, &sum); + field_serialize(a_pk, sum); sha512_update(&ctx, a_pk, GOLDI_FIELD_BYTES); - field_serialize(a_pk, &prod); + field_serialize(a_pk, prod); sha512_update(&ctx, a_pk, GOLDI_FIELD_BYTES); #endif @@ -383,11 +383,11 @@ goldilocks_sign ( /* 4[nonce]G */ uint8_t signature_tmp[GOLDI_FIELD_BYTES]; struct tw_extensible_t exta; - struct field_t gsk; + field_a_t gsk; scalarmul_fixed_base(&exta, tk, GOLDI_SCALAR_BITS, &goldilocks_global.fixed_base); double_tw_extensible(&exta); - untwist_and_double_and_serialize(&gsk, &exta); - field_serialize(signature_tmp, &gsk); + untwist_and_double_and_serialize(gsk, &exta); + field_serialize(signature_tmp, gsk); word_t challenge[GOLDI_FIELD_WORDS]; goldilocks_derive_challenge ( @@ -437,10 +437,10 @@ goldilocks_verify ( return GOLDI_EUNINIT; } - struct field_t pk; + field_a_t pk; word_t s[GOLDI_FIELD_WORDS]; - mask_t succ = field_deserialize(&pk,pubkey->opaque); + mask_t succ = field_deserialize(pk,pubkey->opaque); if (!succ) return GOLDI_EINVAL; succ = barrett_deserialize(s, &signature[GOLDI_FIELD_BYTES], &curve_prime_order); @@ -449,14 +449,14 @@ goldilocks_verify ( word_t challenge[GOLDI_FIELD_WORDS]; goldilocks_derive_challenge(challenge, pubkey->opaque, signature, message, message_len); - struct field_t eph; + field_a_t eph; struct tw_extensible_t pk_text; /* deserialize [nonce]G */ - succ = field_deserialize(&eph, signature); + succ = field_deserialize(eph, signature); if (!succ) return GOLDI_EINVAL; - succ = deserialize_and_twist_approx(&pk_text, &sqrt_d_minus_1, &pk); + succ = deserialize_and_twist_approx(&pk_text, pk); if (!succ) return GOLDI_EINVAL; linear_combo_var_fixed_vt( &pk_text, @@ -464,9 +464,9 @@ goldilocks_verify ( s, GOLDI_SCALAR_BITS, goldilocks_global.wnafs, WNAF_PRECMP_BITS ); - untwist_and_double_and_serialize( &pk, &pk_text ); + untwist_and_double_and_serialize( pk, &pk_text ); - succ = field_eq(&eph, &pk); + succ = field_eq(eph, pk); return succ ? 0 : GOLDI_EINVAL; } #endif @@ -485,14 +485,14 @@ goldilocks_precompute_public_key ( struct tw_extensible_t pk_text; - struct field_t pk; - mask_t succ = field_deserialize(&pk, pub->opaque); + field_a_t pk; + mask_t succ = field_deserialize(pk, pub->opaque); if (!succ) { free(precom); return NULL; } - succ = deserialize_and_twist_approx(&pk_text, &sqrt_d_minus_1, &pk); + succ = deserialize_and_twist_approx(&pk_text, pk); if (!succ) { free(precom); return NULL; @@ -538,11 +538,11 @@ goldilocks_verify_precomputed ( word_t challenge[GOLDI_FIELD_WORDS]; goldilocks_derive_challenge(challenge, pubkey->pub.opaque, signature, message, message_len); - struct field_t eph, pk; + field_a_t eph, pk; struct tw_extensible_t pk_text; /* deserialize [nonce]G */ - succ = field_deserialize(&eph, signature); + succ = field_deserialize(eph, signature); if (!succ) return GOLDI_EINVAL; succ = linear_combo_combs_vt ( @@ -552,9 +552,9 @@ goldilocks_verify_precomputed ( ); if (!succ) return GOLDI_EINVAL; - untwist_and_double_and_serialize( &pk, &pk_text ); + untwist_and_double_and_serialize( pk, &pk_text ); - succ = field_eq(&eph, &pk); + succ = field_eq(eph, pk); return succ ? 0 : GOLDI_EINVAL; } diff --git a/src/include/ec_point.h b/src/include/ec_point.h index 74bbe91..9d0f4f3 100644 --- a/src/include/ec_point.h +++ b/src/include/ec_point.h @@ -21,21 +21,21 @@ extern "C" { * Affine point on an Edwards curve. */ struct affine_t { - struct field_t x, y; + field_a_t x, y; }; /** * Affine point on a twisted Edwards curve. */ struct tw_affine_t { - struct field_t x, y; + field_a_t x, y; }; /** * Montgomery buffer. */ struct montgomery_t { - struct field_t z0, xd, zd, xa, za; + field_a_t z0, xd, zd, xa, za; }; /** @@ -57,7 +57,7 @@ struct montgomery_t { * instead. */ struct extensible_t { - struct field_t x, y, z, t, u; + field_a_t x, y, z, t, u; }; /** @@ -65,7 +65,7 @@ struct extensible_t { * suitable for accumulators. */ struct tw_extensible_t { - struct field_t x, y, z, t, u; + field_a_t x, y, z, t, u; }; /** @@ -74,7 +74,7 @@ struct tw_extensible_t { * Good for mixed readdition; suitable for fixed tables. */ struct tw_niels_t { - struct field_t a, b, c; + field_a_t a, b, c; }; /** @@ -84,7 +84,7 @@ struct tw_niels_t { */ struct tw_pniels_t { struct tw_niels_t n; - struct field_t z; + field_a_t z; }; @@ -273,14 +273,14 @@ montgomery_step ( void deserialize_montgomery ( struct montgomery_t* a, - const struct field_t* sbz + const field_a_t sbz ); mask_t serialize_montgomery ( - struct field_t* b, + field_a_t b, const struct montgomery_t* a, - const struct field_t* sbz + const field_a_t sbz ); /** @@ -296,7 +296,7 @@ serialize_montgomery ( */ void serialize_extensible ( - struct field_t* b, + field_a_t b, const struct extensible_t* a ); @@ -305,7 +305,7 @@ serialize_extensible ( */ void untwist_and_double_and_serialize ( - struct field_t* b, + field_a_t b, const struct tw_extensible_t* a ); @@ -345,7 +345,7 @@ test_only_twist ( mask_t field_is_square ( - const struct field_t* x + const field_a_t x ); mask_t @@ -364,7 +364,7 @@ is_even_tw ( mask_t deserialize_affine ( struct affine_t* a, - const struct field_t* sz + const field_a_t sz ); /** @@ -377,8 +377,7 @@ deserialize_affine ( mask_t deserialize_and_twist_approx ( struct tw_extensible_t* a, - const struct field_t* sdm1, - const struct field_t* sz + const field_a_t sz ); void @@ -417,7 +416,7 @@ eq_tw_extensible ( void elligator_2s_inject ( struct affine_t* a, - const struct field_t* r + const field_a_t r ); mask_t @@ -454,8 +453,8 @@ cond_negate_tw_niels ( struct tw_niels_t *n, mask_t doNegate ) { - constant_time_cond_swap(&n->a, &n->b, sizeof(n->a), doNegate); - field_cond_neg(&n->c, doNegate); + constant_time_cond_swap(n->a, n->b, sizeof(n->a), doNegate); + field_cond_neg(n->c, doNegate); } /** @@ -475,8 +474,8 @@ copy_affine ( struct affine_t* a, const struct affine_t* ds ) { - field_copy ( &a->x, &ds->x ); - field_copy ( &a->y, &ds->y ); + field_copy ( a->x, ds->x ); + field_copy ( a->y, ds->y ); } void @@ -484,8 +483,8 @@ copy_tw_affine ( struct tw_affine_t* a, const struct tw_affine_t* ds ) { - field_copy ( &a->x, &ds->x ); - field_copy ( &a->y, &ds->y ); + field_copy ( a->x, ds->x ); + field_copy ( a->y, ds->y ); } void @@ -493,11 +492,11 @@ copy_montgomery ( struct montgomery_t* a, const struct montgomery_t* ds ) { - field_copy ( &a->z0, &ds->z0 ); - field_copy ( &a->xd, &ds->xd ); - field_copy ( &a->zd, &ds->zd ); - field_copy ( &a->xa, &ds->xa ); - field_copy ( &a->za, &ds->za ); + field_copy ( a->z0, ds->z0 ); + field_copy ( a->xd, ds->xd ); + field_copy ( a->zd, ds->zd ); + field_copy ( a->xa, ds->xa ); + field_copy ( a->za, ds->za ); } void @@ -505,11 +504,11 @@ copy_extensible ( struct extensible_t* a, const struct extensible_t* ds ) { - field_copy ( &a->x, &ds->x ); - field_copy ( &a->y, &ds->y ); - field_copy ( &a->z, &ds->z ); - field_copy ( &a->t, &ds->t ); - field_copy ( &a->u, &ds->u ); + field_copy ( a->x, ds->x ); + field_copy ( a->y, ds->y ); + field_copy ( a->z, ds->z ); + field_copy ( a->t, ds->t ); + field_copy ( a->u, ds->u ); } void @@ -517,11 +516,11 @@ copy_tw_extensible ( struct tw_extensible_t* a, const struct tw_extensible_t* ds ) { - field_copy ( &a->x, &ds->x ); - field_copy ( &a->y, &ds->y ); - field_copy ( &a->z, &ds->z ); - field_copy ( &a->t, &ds->t ); - field_copy ( &a->u, &ds->u ); + field_copy ( a->x, ds->x ); + field_copy ( a->y, ds->y ); + field_copy ( a->z, ds->z ); + field_copy ( a->t, ds->t ); + field_copy ( a->u, ds->u ); } void @@ -529,9 +528,9 @@ copy_tw_niels ( struct tw_niels_t* a, const struct tw_niels_t* ds ) { - field_copy ( &a->a, &ds->a ); - field_copy ( &a->b, &ds->b ); - field_copy ( &a->c, &ds->c ); + field_copy ( a->a, ds->a ); + field_copy ( a->b, ds->b ); + field_copy ( a->c, ds->c ); } void @@ -540,7 +539,7 @@ copy_tw_pniels ( const struct tw_pniels_t* ds ) { copy_tw_niels( &a->n, &ds->n ); - field_copy ( &a->z, &ds->z ); + field_copy ( a->z, ds->z ); } #ifdef __cplusplus diff --git a/src/include/field.h b/src/include/field.h index d375c09..80e9b6f 100644 --- a/src/include/field.h +++ b/src/include/field.h @@ -14,6 +14,9 @@ #include "f_field.h" #include +typedef struct field_t field_a_t[1]; +#define field_a_restrict_t struct field_t *__restrict__ + #define is32 (GOLDI_BITS == 32 || FIELD_BITS != 448) #if (is32) #define IF32(s) (s) @@ -54,8 +57,8 @@ extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; static inline void __attribute__((unused,always_inline)) field_copy ( - struct field_t *__restrict__ a, - const struct field_t *__restrict__ b + field_a_restrict_t a, + const field_a_restrict_t b ) { memcpy(a,b,sizeof(*a)); } @@ -70,8 +73,8 @@ field_copy ( */ void field_isr ( - struct field_t* a, - const struct field_t* x + field_a_t a, + const field_a_t x ); /** @@ -81,8 +84,8 @@ field_isr ( */ void field_simultaneous_invert ( - struct field_t *__restrict__ out, - const struct field_t *in, + field_a_t *__restrict__ out, + const field_a_t *in, unsigned int n ); @@ -93,8 +96,8 @@ field_simultaneous_invert ( */ void field_inverse ( - struct field_t* a, - const struct field_t* x + field_a_t a, + const field_a_t x ); /** @@ -102,8 +105,8 @@ field_inverse ( */ mask_t field_eq ( - const struct field_t *a, - const struct field_t *b + const field_a_t a, + const field_a_t b ); /** @@ -112,31 +115,31 @@ field_eq ( static __inline__ void __attribute__((unused,always_inline)) field_sqrn ( - field_t *__restrict__ y, - const field_t *x, + field_a_restrict_t y, + const field_a_t x, int n ) { - field_t tmp; + field_a_t tmp; assert(n>0); if (n&1) { field_sqr(y,x); n--; } else { - field_sqr(&tmp,x); - field_sqr(y,&tmp); + field_sqr(tmp,x); + field_sqr(y,tmp); n-=2; } for (; n; n-=2) { - field_sqr(&tmp,y); - field_sqr(y,&tmp); + field_sqr(tmp,y); + field_sqr(y,tmp); } } /* Multiply by signed curve constant */ static __inline__ void field_mulw_scc ( - struct field_t* __restrict__ out, - const struct field_t *a, + field_a_restrict_t out, + const field_a_t a, int64_t scc ) { if (scc >= 0) { @@ -151,8 +154,8 @@ field_mulw_scc ( /* Multiply by signed curve constant and weak reduce if biased */ static __inline__ void field_mulw_scc_wr ( - struct field_t* __restrict__ out, - const struct field_t *a, + field_a_restrict_t out, + const field_a_t a, int64_t scc ) { field_mulw_scc(out, a, scc); @@ -162,9 +165,9 @@ field_mulw_scc_wr ( static __inline__ void field_subx_RAW ( - struct field_t *d, - const struct field_t *a, - const struct field_t *b + field_a_t d, + const field_a_t a, + const field_a_t b ) { field_sub_RAW ( d, a, b ); field_bias( d, 2 ); @@ -173,9 +176,9 @@ field_subx_RAW ( static __inline__ void field_sub ( - struct field_t *d, - const struct field_t *a, - const struct field_t *b + field_a_t d, + const field_a_t a, + const field_a_t b ) { field_sub_RAW ( d, a, b ); field_bias( d, 2 ); @@ -184,9 +187,9 @@ field_sub ( static __inline__ void field_add ( - struct field_t *d, - const struct field_t *a, - const struct field_t *b + field_a_t d, + const field_a_t a, + const field_a_t b ) { field_add_RAW ( d, a, b ); field_weak_reduce ( d ); @@ -194,7 +197,7 @@ field_add ( static __inline__ void field_subw ( - struct field_t *d, + field_a_t d, word_t c ) { field_subw_RAW ( d, c ); @@ -203,9 +206,9 @@ field_subw ( } static __inline__ void -field_negx ( - struct field_t *d, - const struct field_t *a +field_neg ( + field_a_t d, + const field_a_t a ) { field_neg_RAW ( d, a ); field_bias( d, 2 ); @@ -218,12 +221,12 @@ field_negx ( static inline void __attribute__((unused,always_inline)) field_cond_neg ( - field_t *a, + field_a_t a, mask_t doNegate ) { - struct field_t negated; - field_negx(&negated, a); - constant_time_select(a, &negated, a, sizeof(negated), doNegate); + field_a_t negated; + field_neg(negated, a); + constant_time_select(a, negated, a, sizeof(negated), doNegate); } /** Require the warning annotation on raw routines */ diff --git a/src/include/magic.h b/src/include/magic.h index 4b8394d..1627a6b 100644 --- a/src/include/magic.h +++ b/src/include/magic.h @@ -45,7 +45,7 @@ /** * @brief sqrt(d-1), used for point formats and twisting. */ -extern const struct field_t sqrt_d_minus_1; +extern const field_a_t sqrt_d_minus_1; /** * @brief The base point for Goldilocks. diff --git a/src/include/scalarmul.h b/src/include/scalarmul.h index bd97cc9..ecb1782 100644 --- a/src/include/scalarmul.h +++ b/src/include/scalarmul.h @@ -90,8 +90,8 @@ struct fixed_base_table_t { */ mask_t montgomery_ladder ( - struct field_t *out, - const struct field_t *in, + field_a_t out, + const field_a_t in, const word_t *scalar, unsigned int nbits, unsigned int n_extra_doubles diff --git a/src/p448/f_arithmetic.c b/src/p448/f_arithmetic.c index 82f35b8..c9b87e5 100644 --- a/src/p448/f_arithmetic.c +++ b/src/p448/f_arithmetic.c @@ -12,32 +12,32 @@ void field_isr ( - struct field_t* a, - const struct field_t* x + field_a_t a, + const field_a_t x ) { - struct field_t L0, L1, L2; - field_sqr ( &L1, x ); - field_mul ( &L2, x, &L1 ); - field_sqr ( &L1, &L2 ); - field_mul ( &L2, x, &L1 ); - field_sqrn ( &L1, &L2, 3 ); - field_mul ( &L0, &L2, &L1 ); - field_sqrn ( &L1, &L0, 3 ); - field_mul ( &L0, &L2, &L1 ); - field_sqrn ( &L2, &L0, 9 ); - field_mul ( &L1, &L0, &L2 ); - field_sqr ( &L0, &L1 ); - field_mul ( &L2, x, &L0 ); - field_sqrn ( &L0, &L2, 18 ); - field_mul ( &L2, &L1, &L0 ); - field_sqrn ( &L0, &L2, 37 ); - field_mul ( &L1, &L2, &L0 ); - field_sqrn ( &L0, &L1, 37 ); - field_mul ( &L1, &L2, &L0 ); - field_sqrn ( &L0, &L1, 111 ); - field_mul ( &L2, &L1, &L0 ); - field_sqr ( &L0, &L2 ); - field_mul ( &L1, x, &L0 ); - field_sqrn ( &L0, &L1, 223 ); - field_mul ( a, &L2, &L0 ); + field_a_t L0, L1, L2; + field_sqr ( L1, x ); + field_mul ( L2, x, L1 ); + field_sqr ( L1, L2 ); + field_mul ( L2, x, L1 ); + field_sqrn ( L1, L2, 3 ); + field_mul ( L0, L2, L1 ); + field_sqrn ( L1, L0, 3 ); + field_mul ( L0, L2, L1 ); + field_sqrn ( L2, L0, 9 ); + field_mul ( L1, L0, L2 ); + field_sqr ( L0, L1 ); + field_mul ( L2, x, L0 ); + field_sqrn ( L0, L2, 18 ); + field_mul ( L2, L1, L0 ); + field_sqrn ( L0, L2, 37 ); + field_mul ( L1, L2, L0 ); + field_sqrn ( L0, L1, 37 ); + field_mul ( L1, L2, L0 ); + field_sqrn ( L0, L1, 111 ); + field_mul ( L2, L1, L0 ); + field_sqr ( L0, L2 ); + field_mul ( L1, x, L0 ); + field_sqrn ( L0, L1, 223 ); + field_mul ( a, L2, L0 ); } diff --git a/src/p448/magic.c b/src/p448/magic.c index b1e7ca5..20c5fa5 100644 --- a/src/p448/magic.c +++ b/src/p448/magic.c @@ -35,17 +35,17 @@ const word_t SCALARMUL_FIXED_WINDOW_ADJUSTMENT[2*SCALAR_WORDS] = { const struct affine_t goldilocks_base_point = { #ifdef USE_NEON_PERM - {{ 0xaed939f,0xc59d070,0xf0de840,0x5f065c3, 0xf4ba0c7,0xdf73324,0xc170033,0x3a6a26a, + {{{ 0xaed939f,0xc59d070,0xf0de840,0x5f065c3, 0xf4ba0c7,0xdf73324,0xc170033,0x3a6a26a, 0x4c63d96,0x4609845,0xf3932d9,0x1b4faff, 0x6147eaa,0xa2692ff,0x9cecfa9,0x297ea0e - }}, + }}}, #else - {{ U56LE(0xf0de840aed939f), U56LE(0xc170033f4ba0c7), + {{{ U56LE(0xf0de840aed939f), U56LE(0xc170033f4ba0c7), U56LE(0xf3932d94c63d96), U56LE(0x9cecfa96147eaa), U56LE(0x5f065c3c59d070), U56LE(0x3a6a26adf73324), U56LE(0x1b4faff4609845), U56LE(0x297ea0ea2692ff) - }}, + }}}, #endif - {{ 19 }} + {{{ 19 }}} }; static const word_t curve_prime_order_lo[(224+WORD_BITS-1)/WORD_BITS] = { @@ -61,8 +61,8 @@ const struct barrett_prime_t curve_prime_order = { curve_prime_order_lo }; -const struct field_t -sqrt_d_minus_1 = {{ +const field_a_t +sqrt_d_minus_1 = {{{ #ifdef USE_NEON_PERM 0x6749f46,0x24d9770,0xd2e2183,0xa49f7b4, 0xb4f0179,0x8c5f656,0x888db42,0xdcac462, @@ -78,4 +78,4 @@ sqrt_d_minus_1 = {{ U56LE(0x49443b8748734a), U56LE(0x12fec0c0b25b7a) #endif -}}; +}}}; diff --git a/src/p480/f_arithmetic.c b/src/p480/f_arithmetic.c index d616e42..bc8e657 100644 --- a/src/p480/f_arithmetic.c +++ b/src/p480/f_arithmetic.c @@ -12,32 +12,32 @@ void field_isr ( - struct field_t* a, - const struct field_t* x + field_a_t a, + const field_a_t x ) { - struct field_t L0, L1, L2, L3; - field_sqr ( &L2, x ); - field_mul ( &L1, x, &L2 ); - field_sqrn ( &L0, &L1, 2 ); - field_mul ( &L2, &L1, &L0 ); - field_sqrn ( &L0, &L2, 4 ); - field_mul ( &L1, &L2, &L0 ); - field_sqr ( &L0, &L1 ); - field_mul ( &L2, x, &L0 ); - field_sqrn ( &L0, &L2, 8 ); - field_mul ( &L2, &L1, &L0 ); - field_sqrn ( &L0, &L2, 17 ); - field_mul ( &L1, &L2, &L0 ); - field_sqrn ( &L0, &L1, 17 ); - field_mul ( &L1, &L2, &L0 ); - field_sqrn ( &L3, &L1, 17 ); - field_mul ( &L0, &L2, &L3 ); - field_sqrn ( &L2, &L0, 51 ); - field_mul ( &L0, &L1, &L2 ); - field_sqrn ( &L1, &L0, 119 ); - field_mul ( &L2, &L0, &L1 ); - field_sqr ( &L0, &L2 ); - field_mul ( &L1, x, &L0 ); - field_sqrn ( &L0, &L1, 239 ); - field_mul ( a, &L2, &L0 ); + field_a_t L0, L1, L2, L3; + field_sqr ( L2, x ); + field_mul ( L1, x, L2 ); + field_sqrn ( L0, L1, 2 ); + field_mul ( L2, L1, L0 ); + field_sqrn ( L0, L2, 4 ); + field_mul ( L1, L2, L0 ); + field_sqr ( L0, L1 ); + field_mul ( L2, x, L0 ); + field_sqrn ( L0, L2, 8 ); + field_mul ( L2, L1, L0 ); + field_sqrn ( L0, L2, 17 ); + field_mul ( L1, L2, L0 ); + field_sqrn ( L0, L1, 17 ); + field_mul ( L1, L2, L0 ); + field_sqrn ( L3, L1, 17 ); + field_mul ( L0, L2, L3 ); + field_sqrn ( L2, L0, 51 ); + field_mul ( L0, L1, L2 ); + field_sqrn ( L1, L0, 119 ); + field_mul ( L2, L0, L1 ); + field_sqr ( L0, L2 ); + field_mul ( L1, x, L0 ); + field_sqrn ( L0, L1, 239 ); + field_mul ( a, L2, L0 ); } diff --git a/src/p480/magic.c b/src/p480/magic.c index ee90a0a..8615071 100644 --- a/src/p480/magic.c +++ b/src/p480/magic.c @@ -36,7 +36,7 @@ const word_t SCALARMUL_FIXED_WINDOW_ADJUSTMENT[2*SCALAR_WORDS] = { }; const struct affine_t goldilocks_base_point = { - {{ + {{{ U60LE(0x849ff7f845c30d3), U60LE(0x7dda488553a4c5b), U60LE(0x1d3a2d9844831ea), @@ -45,8 +45,8 @@ const struct affine_t goldilocks_base_point = { U60LE(0xfc955e59aeefa65), U60LE(0x3ab247cd530013c), U60LE(0x7ca42af3d564280) - }}, - {{ 5 }} + }}}, + {{{ 5 }}} }; static const word_t curve_prime_order_lo[(240+WORD_BITS-1)/WORD_BITS] = { @@ -62,7 +62,7 @@ const struct barrett_prime_t curve_prime_order = { curve_prime_order_lo }; -const struct field_t -sqrt_d_minus_1 = {{ +const field_a_t +sqrt_d_minus_1 = {{{ 232 /* Whoa, it comes out even. */ -}}; +}}}; diff --git a/src/p521/f_arithmetic.c b/src/p521/f_arithmetic.c index 7fbdfb8..37c0b50 100644 --- a/src/p521/f_arithmetic.c +++ b/src/p521/f_arithmetic.c @@ -12,32 +12,32 @@ void field_isr ( - struct field_t* a, - const struct field_t* x + field_a_t a, + const field_a_t x ) { - struct field_t L0, L1, L2; - field_sqr ( &L1, x ); - field_mul ( &L0, x, &L1 ); - field_sqrn ( &L2, &L0, 2 ); - field_mul ( &L1, &L0, &L2 ); - field_sqrn ( &L2, &L1, 4 ); - field_mul ( &L0, &L1, &L2 ); - field_sqrn ( &L2, &L0, 8 ); - field_mul ( &L1, &L0, &L2 ); - field_sqrn ( &L2, &L1, 16 ); - field_mul ( &L0, &L1, &L2 ); - field_sqrn ( &L2, &L0, 32 ); - field_mul ( &L1, &L0, &L2 ); - field_sqr ( &L2, &L1 ); - field_mul ( &L0, x, &L2 ); - field_sqrn ( &L2, &L0, 64 ); - field_mul ( &L0, &L1, &L2 ); - field_sqrn ( &L2, &L0, 129 ); - field_mul ( &L1, &L0, &L2 ); - field_sqr ( &L2, &L1 ); - field_mul ( &L0, x, &L2 ); - field_sqrn ( &L2, &L0, 259 ); - field_mul ( &L1, &L0, &L2 ); - field_sqr ( &L0, &L1 ); - field_mul ( a, x, &L0 ); + field_a_t L0, L1, L2; + field_sqr ( L1, x ); + field_mul ( L0, x, L1 ); + field_sqrn ( L2, L0, 2 ); + field_mul ( L1, L0, L2 ); + field_sqrn ( L2, L1, 4 ); + field_mul ( L0, L1, L2 ); + field_sqrn ( L2, L0, 8 ); + field_mul ( L1, L0, L2 ); + field_sqrn ( L2, L1, 16 ); + field_mul ( L0, L1, L2 ); + field_sqrn ( L2, L0, 32 ); + field_mul ( L1, L0, L2 ); + field_sqr ( L2, L1 ); + field_mul ( L0, x, L2 ); + field_sqrn ( L2, L0, 64 ); + field_mul ( L0, L1, L2 ); + field_sqrn ( L2, L0, 129 ); + field_mul ( L1, L0, L2 ); + field_sqr ( L2, L1 ); + field_mul ( L0, x, L2 ); + field_sqrn ( L2, L0, 259 ); + field_mul ( L1, L0, L2 ); + field_sqr ( L0, L1 ); + field_mul ( a, x, L0 ); } diff --git a/src/p521/magic.c b/src/p521/magic.c index 93ccc33..f8ab264 100644 --- a/src/p521/magic.c +++ b/src/p521/magic.c @@ -39,7 +39,7 @@ const word_t SCALARMUL_FIXED_WINDOW_ADJUSTMENT[2*SCALAR_WORDS] = { }; const struct affine_t goldilocks_base_point = { - {{ + {{{ #ifdef USE_P521_3x3_TRANSPOSE U58LE(0x02a940a2f19ba6c), U58LE(0x3331c90d2c6ba52), @@ -64,8 +64,8 @@ const struct affine_t goldilocks_base_point = { U58LE(0x06277e432c8a5ac), U58LE(0x0752cb45c48648b) #endif - }}, - {{ 12 }} + }}}, + {{{ 12 }}} }; static const word_t curve_prime_order_lo[(261+WORD_BITS-1)/WORD_BITS] = { @@ -82,8 +82,8 @@ const struct barrett_prime_t curve_prime_order = { curve_prime_order_lo }; -const struct field_t -sqrt_d_minus_1 = {{ +const field_a_t +sqrt_d_minus_1 = {{{ #ifdef USE_P521_3x3_TRANSPOSE U58LE(0x1e2be72c1c81990), U58LE(0x207dfc238a33e46), @@ -108,4 +108,4 @@ sqrt_d_minus_1 = {{ U58LE(0x0524b9e715937f5), U58LE(0x0a9ea3ac10d6aed) #endif -}}; +}}}; diff --git a/src/scalarmul.c b/src/scalarmul.c index b85a42c..cf95984 100644 --- a/src/scalarmul.c +++ b/src/scalarmul.c @@ -15,8 +15,8 @@ mask_t montgomery_ladder ( - struct field_t *out, - const struct field_t *in, + field_a_t out, + const field_a_t in, const word_t *scalar, unsigned int nbits, unsigned int n_extra_doubles @@ -30,15 +30,15 @@ montgomery_ladder ( word_t w = scalar[j]; for (i=n; i>=0; i--) { mask_t flip = -((w>>i)&1); - constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),flip^pflip); - constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),flip^pflip); + constant_time_cond_swap(mont.xa,mont.xd,sizeof(mont.xd),flip^pflip); + constant_time_cond_swap(mont.za,mont.zd,sizeof(mont.xd),flip^pflip); montgomery_step(&mont); pflip = flip; } n = WORD_BITS-1; } - constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),pflip); - constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),pflip); + constant_time_cond_swap(mont.xa,mont.xd,sizeof(mont.xd),pflip); + constant_time_cond_swap(mont.za,mont.zd,sizeof(mont.xd),pflip); assert(n_extra_doubles < INT_MAX); for (j=0; j<(int)n_extra_doubles; j++) { @@ -475,8 +475,8 @@ precompute_fixed_base ( struct tw_pniels_t pn_tmp; struct tw_pniels_t *doubles = (struct tw_pniels_t *) malloc_vector(sizeof(*doubles) * (t-1)); - struct field_t *zs = (struct field_t *) malloc_vector(sizeof(*zs) * (n<<(t-1))); - struct field_t *zis = (struct field_t *) malloc_vector(sizeof(*zis) * (n<<(t-1))); + field_a_t *zs = (field_a_t *) malloc_vector(sizeof(*zs) * (n<<(t-1))); + field_a_t *zis = (field_a_t *) malloc_vector(sizeof(*zis) * (n<<(t-1))); struct tw_niels_t *table = prealloc; if (prealloc) { @@ -562,7 +562,7 @@ precompute_fixed_base ( convert_tw_extensible_to_tw_pniels(&pn_tmp, &start); copy_tw_niels(&table[idx], &pn_tmp.n); - field_copy(&zs[idx], &pn_tmp.z); + field_copy(zs[idx], pn_tmp.z); if (j >= (1u<<(t-1)) - 1) break; int delta = (j+1) ^ ((j+1)>>1) ^ gray; @@ -584,22 +584,22 @@ precompute_fixed_base ( field_simultaneous_invert(zis, zs, n<<(t-1)); - field_t product; + field_a_t product; for (i=0; i 0) { @@ -659,32 +659,32 @@ precompute_fixed_base_wnaf ( add_tw_pniels_to_tw_extensible(&base, &tmp); convert_tw_extensible_to_tw_pniels(&tmp, &base); - field_copy(&zs[1], &tmp.z); + field_copy(zs[1], tmp.z); copy_tw_niels(&out[1], &tmp.n); for (i=2; i < 1<x); - field_print(" y", &a->y); - field_print(" z", &a->z); - field_inverse(&zi, &a->z); - field_mul(&scaled, &zi, &a->x); - field_print(" X", &scaled); - field_mul(&scaled, &zi, &a->y); - field_print(" Y", &scaled); + field_a_t zi, scaled; + field_print(" x", a->x); + field_print(" y", a->y); + field_print(" z", a->z); + field_inverse(zi, a->z); + field_mul(scaled, zi, a->x); + field_print(" X", scaled); + field_mul(scaled, zi, a->y); + field_print(" Y", scaled); printf("\n"); } @@ -165,10 +165,10 @@ add_double_test ( if (~succ) { printf(" Bases were:\n"); - field_print(" x1", &base1->x); - field_print(" y1", &base1->y); - field_print(" x2", &base2->x); - field_print(" y2", &base2->y); + field_print(" x1", base1->x); + field_print(" y1", base1->y); + field_print(" x2", base2->x); + field_print(" y2", base2->y); } return succ ? 0 : -1; @@ -211,18 +211,18 @@ single_twisting_test ( succ = 0; } /* FUTURE: quadness */ - field_t sera,serb; - untwist_and_double_and_serialize(&sera,&text); + field_a_t sera,serb; + untwist_and_double_and_serialize(sera,&text); copy_extensible(&tmpext,&exb); double_extensible(&tmpext); - serialize_extensible(&serb,&tmpext); + serialize_extensible(serb,&tmpext); /* check that their (doubled; FUTURE?) serializations are equal */ - if (~field_eq(&sera,&serb)) { + if (~field_eq(sera,serb)) { youfail(); printf(" Different serialization from twist + double ()\n"); - field_print(" t", &sera); - field_print(" b", &serb); + field_print(" t", sera); + field_print(" b", serb); succ = 0; } @@ -242,8 +242,8 @@ single_twisting_test ( if (~succ) { printf(" Base was:\n"); - field_print(" x", &base->x); - field_print(" y", &base->y); + field_print(" x", base->x); + field_print(" y", base->y); } @@ -252,7 +252,7 @@ single_twisting_test ( int test_pointops (void) { struct affine_t base, pbase; - struct field_t serf; + field_a_t serf; struct crandom_state_t crand; crandom_init_from_buffer(&crand, "test_pointops random initializer"); @@ -277,7 +277,7 @@ int test_pointops (void) { #endif /* TODO: we need a field generate, which can return random or pathological. */ - mask_t succ = field_deserialize(&serf, ser); + mask_t succ = field_deserialize(serf, ser); if (!succ) { youfail(); printf(" Unlikely: fail at field_deserialize\n"); @@ -287,7 +287,7 @@ int test_pointops (void) { if (i) { copy_affine(&pbase, &base); } - elligator_2s_inject(&base, &serf); + elligator_2s_inject(&base, serf); if (i) { ret = add_double_test(&base, &pbase); diff --git a/test/test_scalarmul.c b/test/test_scalarmul.c index 89db764..d21be13 100644 --- a/test/test_scalarmul.c +++ b/test/test_scalarmul.c @@ -12,19 +12,19 @@ /* 0 = succeed, 1 = inval, -1 = fail */ static int single_scalarmul_compatibility_test ( - const struct field_t *base, + const field_a_t base, const word_t *scalar, int nbits ) { struct tw_extensible_t text, work; - struct field_t mont, ct, vl, vt; + field_a_t mont, ct, vl, vt; int ret = 0, i; mask_t succ, succm; - succ = deserialize_and_twist_approx(&text, &sqrt_d_minus_1, base); + succ = deserialize_and_twist_approx(&text, base); - succm = montgomery_ladder(&mont,base,scalar,nbits,1); + succm = montgomery_ladder(mont,base,scalar,nbits,1); if (succ != succm) { youfail(); @@ -52,7 +52,7 @@ single_scalarmul_compatibility_test ( const int nparams = sizeof(params)/sizeof(params[0]); struct fixed_base_table_t fbt; const int nsizes = 6; - struct field_t fbout[nparams], wout[nsizes]; + field_a_t fbout[nparams], wout[nsizes]; memset(&fbt, 0, sizeof(fbt)); memset(&fbout, 0, sizeof(fbout)); memset(&wout, 0, sizeof(wout)); @@ -75,7 +75,7 @@ single_scalarmul_compatibility_test ( continue; } - untwist_and_double_and_serialize(&fbout[i], &work); + untwist_and_double_and_serialize(fbout[i], &work); } /* compute using precomp wNAF */ @@ -91,7 +91,7 @@ single_scalarmul_compatibility_test ( scalarmul_fixed_base_wnaf_vt(&work, scalar, nbits, pre, i); - untwist_and_double_and_serialize(&wout[i], &work); + untwist_and_double_and_serialize(wout[i], &work); } mask_t consistent = MASK_SUCCESS; @@ -100,31 +100,31 @@ single_scalarmul_compatibility_test ( /* window methods currently only work on FIELD_BITS bits. */ copy_tw_extensible(&work, &text); scalarmul(&work, scalar); - untwist_and_double_and_serialize(&ct, &work); + untwist_and_double_and_serialize(ct, &work); copy_tw_extensible(&work, &text); scalarmul_vlook(&work, scalar); - untwist_and_double_and_serialize(&vl, &work); + untwist_and_double_and_serialize(vl, &work); copy_tw_extensible(&work, &text); scalarmul_vt(&work, scalar, nbits); - untwist_and_double_and_serialize(&vt, &work); + untwist_and_double_and_serialize(vt, &work); /* check consistency mont vs window */ - consistent &= field_eq(&mont, &ct); - consistent &= field_eq(&mont, &vl); - consistent &= field_eq(&mont, &vt); + consistent &= field_eq(mont, ct); + consistent &= field_eq(mont, vl); + consistent &= field_eq(mont, vt); } /* check consistency mont vs combs */ for (i=0; i Date: Thu, 22 Jan 2015 16:22:37 -0800 Subject: [PATCH 04/15] respace --- src/arithmetic.c | 20 +- src/ec_point.c | 768 +++++++++++++++++++++++------------------------ 2 files changed, 394 insertions(+), 394 deletions(-) diff --git a/src/arithmetic.c b/src/arithmetic.c index 89be5c4..dee62e7 100644 --- a/src/arithmetic.c +++ b/src/arithmetic.c @@ -9,7 +9,7 @@ */ #include "field.h" -#include "ec_point.h" // TODO +#include "ec_point.h" mask_t field_eq ( @@ -32,10 +32,10 @@ field_inverse ( const field_a_t x ) { field_a_t L0, L1; - field_isr ( L0, x ); - field_sqr ( L1, L0 ); - field_sqr ( L0, L1 ); - field_mul ( a, x, L0 ); + field_isr ( L0, x ); + field_sqr ( L1, L0 ); + field_sqr ( L0, L1 ); + field_mul ( a, x, L0 ); } mask_t @@ -43,11 +43,11 @@ field_is_square ( const field_a_t x ) { field_a_t L0, L1; - field_isr ( L0, x ); - field_sqr ( L1, L0 ); - field_mul ( L0, x, L1 ); - field_subw( L0, 1 ); - return field_is_zero( L0 ) | field_is_zero( x ); + field_isr ( L0, x ); + field_sqr ( L1, L0 ); + field_mul ( L0, x, L1 ); + field_subw( L0, 1 ); + return field_is_zero( L0 ) | field_is_zero( x ); } void diff --git a/src/ec_point.c b/src/ec_point.c index 905ba60..e78852b 100644 --- a/src/ec_point.c +++ b/src/ec_point.c @@ -19,19 +19,19 @@ add_tw_niels_to_tw_extensible ( ) { ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1; - field_sub ( L1, d->y, d->x ); - field_mul ( L0, e->a, L1 ); - field_add_nr ( L1, d->x, d->y ); - field_mul ( d->y, e->b, L1 ); - field_mul ( L1, d->u, d->t ); - field_mul ( d->x, e->c, L1 ); - field_add_nr ( d->u, L0, d->y ); - field_subx_nr ( d->t, d->y, L0 ); + field_sub ( L1, d->y, d->x ); + field_mul ( L0, e->a, L1 ); + field_add_nr ( L1, d->x, d->y ); + field_mul ( d->y, e->b, L1 ); + field_mul ( L1, d->u, d->t ); + field_mul ( d->x, e->c, L1 ); + field_add_nr ( d->u, L0, d->y ); + field_subx_nr ( d->t, d->y, L0 ); field_subx_nr ( d->y, d->z, d->x ); - field_add_nr ( L0, d->x, d->z ); - field_mul ( d->z, L0, d->y ); - field_mul ( d->x, d->y, d->t ); - field_mul ( d->y, L0, d->u ); + field_add_nr ( L0, d->x, d->z ); + field_mul ( d->z, L0, d->y ); + field_mul ( d->x, d->y, d->t ); + field_mul ( d->y, L0, d->u ); } void @@ -41,19 +41,19 @@ sub_tw_niels_from_tw_extensible ( ) { ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1; - field_subx_nr ( L1, d->y, d->x ); - field_mul ( L0, e->b, L1 ); - field_add_nr ( L1, d->x, d->y ); - field_mul ( d->y, e->a, L1 ); - field_mul ( L1, d->u, d->t ); - field_mul ( d->x, e->c, L1 ); - field_add_nr ( d->u, L0, d->y ); - field_subx_nr ( d->t, d->y, L0 ); - field_add_nr ( d->y, d->x, d->z ); - field_subx_nr ( L0, d->z, d->x ); - field_mul ( d->z, L0, d->y ); - field_mul ( d->x, d->y, d->t ); - field_mul ( d->y, L0, d->u ); + field_subx_nr ( L1, d->y, d->x ); + field_mul ( L0, e->b, L1 ); + field_add_nr ( L1, d->x, d->y ); + field_mul ( d->y, e->a, L1 ); + field_mul ( L1, d->u, d->t ); + field_mul ( d->x, e->c, L1 ); + field_add_nr ( d->u, L0, d->y ); + field_subx_nr ( d->t, d->y, L0 ); + field_add_nr ( d->y, d->x, d->z ); + field_subx_nr ( L0, d->z, d->x ); + field_mul ( d->z, L0, d->y ); + field_mul ( d->x, d->y, d->t ); + field_mul ( d->y, L0, d->u ); } void @@ -62,9 +62,9 @@ add_tw_pniels_to_tw_extensible ( const struct tw_pniels_t* a ) { field_a_t L0; - field_mul ( L0, e->z, a->z ); - field_copy ( e->z, L0 ); - add_tw_niels_to_tw_extensible( e, &a->n ); + field_mul ( L0, e->z, a->z ); + field_copy ( e->z, L0 ); + add_tw_niels_to_tw_extensible( e, &a->n ); } void @@ -73,9 +73,9 @@ sub_tw_pniels_from_tw_extensible ( const struct tw_pniels_t* a ) { field_a_t L0; - field_mul ( L0, e->z, a->z ); - field_copy ( e->z, L0 ); - sub_tw_niels_from_tw_extensible( e, &a->n ); + field_mul ( L0, e->z, a->z ); + field_copy ( e->z, L0 ); + sub_tw_niels_from_tw_extensible( e, &a->n ); } void @@ -84,23 +84,23 @@ double_tw_extensible ( ) { ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1, L2; - field_sqr ( L2, a->x ); - field_sqr ( L0, a->y ); - field_add_nr ( a->u, L2, L0 ); - field_add_nr ( a->t, a->y, a->x ); - field_sqr ( L1, a->t ); - field_sub_nr ( a->t, L1, a->u ); - field_bias ( a->t, 3 ); + field_sqr ( L2, a->x ); + field_sqr ( L0, a->y ); + field_add_nr ( a->u, L2, L0 ); + field_add_nr ( a->t, a->y, a->x ); + field_sqr ( L1, a->t ); + field_sub_nr ( a->t, L1, a->u ); + field_bias ( a->t, 3 ); IF32( field_weak_reduce( a->t ) ); - field_subx_nr ( L1, L0, L2 ); - field_sqr ( a->x, a->z ); - field_bias ( a->x, 2-is32 /*is32 ? 1 : 2*/ ); - field_add_nr ( a->z, a->x, a->x ); - field_sub_nr ( L0, a->z, L1 ); - IF32( field_weak_reduce( L0 ) ); - field_mul ( a->z, L1, L0 ); - field_mul ( a->x, L0, a->t ); - field_mul ( a->y, L1, a->u ); + field_subx_nr ( L1, L0, L2 ); + field_sqr ( a->x, a->z ); + field_bias ( a->x, 2-is32 /*is32 ? 1 : 2*/ ); + field_add_nr ( a->z, a->x, a->x ); + field_sub_nr ( L0, a->z, L1 ); + IF32( field_weak_reduce( L0 ) ); + field_mul ( a->z, L1, L0 ); + field_mul ( a->x, L0, a->t ); + field_mul ( a->y, L1, a->u ); } void @@ -109,23 +109,23 @@ double_extensible ( ) { ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1, L2; - field_sqr ( L2, a->x ); - field_sqr ( L0, a->y ); - field_add_nr ( L1, L2, L0 ); - field_add_nr ( a->t, a->y, a->x ); - field_sqr ( a->u, a->t ); - field_sub_nr ( a->t, a->u, L1 ); - field_bias ( a->t, 3 ); + field_sqr ( L2, a->x ); + field_sqr ( L0, a->y ); + field_add_nr ( L1, L2, L0 ); + field_add_nr ( a->t, a->y, a->x ); + field_sqr ( a->u, a->t ); + field_sub_nr ( a->t, a->u, L1 ); + field_bias ( a->t, 3 ); IF32( field_weak_reduce( a->t ) ); - field_subx_nr ( a->u, L0, L2 ); - field_sqr ( a->x, a->z ); - field_bias ( a->x, 2 ); - field_add_nr ( a->z, a->x, a->x ); - field_sub_nr ( L0, a->z, L1 ); - IF32( field_weak_reduce( L0 ) ); - field_mul ( a->z, L1, L0 ); - field_mul ( a->x, L0, a->t ); - field_mul ( a->y, L1, a->u ); + field_subx_nr ( a->u, L0, L2 ); + field_sqr ( a->x, a->z ); + field_bias ( a->x, 2 ); + field_add_nr ( a->z, a->x, a->x ); + field_sub_nr ( L0, a->z, L1 ); + IF32( field_weak_reduce( L0 ) ); + field_mul ( a->z, L1, L0 ); + field_mul ( a->x, L0, a->t ); + field_mul ( a->y, L1, a->u ); } void @@ -134,19 +134,19 @@ twist_and_double ( const struct extensible_t* a ) { field_a_t L0; - field_sqr ( b->x, a->x ); - field_sqr ( b->z, a->y ); + field_sqr ( b->x, a->x ); + field_sqr ( b->z, a->y ); field_add ( b->u, b->x, b->z ); field_add ( b->t, a->y, a->x ); - field_sqr ( L0, b->t ); - field_sub ( b->t, L0, b->u ); - field_sub ( L0, b->z, b->x ); - field_sqr ( b->x, a->z ); + field_sqr ( L0, b->t ); + field_sub ( b->t, L0, b->u ); + field_sub ( L0, b->z, b->x ); + field_sqr ( b->x, a->z ); field_add ( b->z, b->x, b->x ); field_sub ( b->y, b->z, b->u ); - field_mul ( b->z, L0, b->y ); - field_mul ( b->x, b->y, b->t ); - field_mul ( b->y, L0, b->u ); + field_mul ( b->z, L0, b->y ); + field_mul ( b->x, b->y, b->t ); + field_mul ( b->y, L0, b->u ); } void @@ -155,19 +155,19 @@ untwist_and_double ( const struct tw_extensible_t* a ) { field_a_t L0; - field_sqr ( b->x, a->x ); - field_sqr ( b->z, a->y ); - field_add ( L0, b->x, b->z ); + field_sqr ( b->x, a->x ); + field_sqr ( b->z, a->y ); + field_add ( L0, b->x, b->z ); field_add ( b->t, a->y, a->x ); - field_sqr ( b->u, b->t ); - field_sub ( b->t, b->u, L0 ); + field_sqr ( b->u, b->t ); + field_sub ( b->t, b->u, L0 ); field_sub ( b->u, b->z, b->x ); - field_sqr ( b->x, a->z ); + field_sqr ( b->x, a->z ); field_add ( b->z, b->x, b->x ); field_sub ( b->y, b->z, b->u ); - field_mul ( b->z, L0, b->y ); - field_mul ( b->x, b->y, b->t ); - field_mul ( b->y, L0, b->u ); + field_mul ( b->z, L0, b->y ); + field_mul ( b->x, b->y, b->t ); + field_mul ( b->y, L0, b->u ); } void @@ -177,9 +177,9 @@ convert_tw_affine_to_tw_pniels ( ) { field_sub ( b->n.a, a->y, a->x ); field_add ( b->n.b, a->x, a->y ); - field_mul ( b->z, a->y, a->x ); + field_mul ( b->z, a->y, a->x ); field_mulw_scc_wr ( b->n.c, b->z, 2*EDWARDS_D-2 ); - field_set_ui( b->z, 2 ); + field_set_ui( b->z, 2 ); } void @@ -189,7 +189,7 @@ convert_tw_affine_to_tw_extensible ( ) { field_copy ( b->x, a->x ); field_copy ( b->y, a->y ); - field_set_ui( b->z, 1 ); + field_set_ui( b->z, 1 ); field_copy ( b->t, a->x ); field_copy ( b->u, a->y ); } @@ -201,7 +201,7 @@ convert_affine_to_extensible ( ) { field_copy ( b->x, a->x ); field_copy ( b->y, a->y ); - field_set_ui( b->z, 1 ); + field_set_ui( b->z, 1 ); field_copy ( b->t, a->x ); field_copy ( b->u, a->y ); } @@ -213,7 +213,7 @@ convert_tw_extensible_to_tw_pniels ( ) { field_sub ( b->n.a, a->y, a->x ); field_add ( b->n.b, a->x, a->y ); - field_mul ( b->z, a->u, a->t ); + field_mul ( b->z, a->u, a->t ); field_mulw_scc_wr ( b->n.c, b->z, 2*EDWARDS_D-2 ); field_add ( b->z, a->z, a->z ); } @@ -225,9 +225,9 @@ convert_tw_pniels_to_tw_extensible ( ) { field_add ( e->u, d->n.b, d->n.a ); field_sub ( e->t, d->n.b, d->n.a ); - field_mul ( e->x, d->z, e->t ); - field_mul ( e->y, d->z, e->u ); - field_sqr ( e->z, d->z ); + field_mul ( e->x, d->z, e->t ); + field_mul ( e->y, d->z, e->u ); + field_sqr ( e->z, d->z ); } void @@ -237,7 +237,7 @@ convert_tw_niels_to_tw_extensible ( ) { field_add ( e->y, d->b, d->a ); field_sub ( e->x, d->b, d->a ); - field_set_ui( e->z, 1 ); + field_set_ui( e->z, 1 ); field_copy ( e->t, e->x ); field_copy ( e->u, e->y ); } @@ -248,26 +248,26 @@ montgomery_step ( ) { ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1; - field_add_nr ( L0, a->zd, a->xd ); - field_sub ( L1, a->xd, a->zd ); + field_add_nr ( L0, a->zd, a->xd ); + field_sub ( L1, a->xd, a->zd ); field_sub ( a->zd, a->xa, a->za ); - field_mul ( a->xd, L0, a->zd ); - field_add_nr ( a->zd, a->za, a->xa ); - field_mul ( a->za, L1, a->zd ); - field_add_nr ( a->xa, a->za, a->xd ); - field_sqr ( a->zd, a->xa ); - field_mul ( a->xa, a->z0, a->zd ); + field_mul ( a->xd, L0, a->zd ); + field_add_nr ( a->zd, a->za, a->xa ); + field_mul ( a->za, L1, a->zd ); + field_add_nr ( a->xa, a->za, a->xd ); + field_sqr ( a->zd, a->xa ); + field_mul ( a->xa, a->z0, a->zd ); field_sub ( a->zd, a->xd, a->za ); - field_sqr ( a->za, a->zd ); - field_sqr ( a->xd, L0 ); - field_sqr ( L0, L1 ); + field_sqr ( a->za, a->zd ); + field_sqr ( a->xd, L0 ); + field_sqr ( L0, L1 ); field_mulw_scc ( a->zd, a->xd, 1-EDWARDS_D ); /* FIXME PERF MULW */ - field_sub ( L1, a->xd, L0 ); - field_mul ( a->xd, L0, a->zd ); - field_sub_nr ( L0, a->zd, L1 ); - field_bias ( L0, 4 - 2*is32 /*is32 ? 2 : 4*/ ); - IF32( field_weak_reduce( L0 ) ); - field_mul ( a->zd, L0, L1 ); + field_sub ( L1, a->xd, L0 ); + field_mul ( a->xd, L0, a->zd ); + field_sub_nr ( L0, a->zd, L1 ); + field_bias ( L0, 4 - 2*is32 /*is32 ? 2 : 4*/ ); + IF32( field_weak_reduce( L0 ) ); + field_mul ( a->zd, L0, L1 ); } void @@ -275,10 +275,10 @@ deserialize_montgomery ( struct montgomery_t* a, const field_a_t sbz ) { - field_sqr ( a->z0, sbz ); - field_set_ui( a->xd, 1 ); - field_set_ui( a->zd, 0 ); - field_set_ui( a->xa, 1 ); + field_sqr ( a->z0, sbz ); + field_set_ui( a->xd, 1 ); + field_set_ui( a->zd, 0 ); + field_set_ui( a->xa, 1 ); field_copy ( a->za, a->z0 ); } @@ -290,42 +290,42 @@ serialize_montgomery ( ) { mask_t L4, L5, L6; field_a_t L0, L1, L2, L3; - field_mul ( L3, a->z0, a->zd ); - field_sub ( L1, L3, a->xd ); - field_mul ( L3, a->za, L1 ); - field_mul ( L2, a->z0, a->xd ); - field_sub ( L1, L2, a->zd ); - field_mul ( L0, a->xa, L1 ); - field_add ( L2, L0, L3 ); - field_sub ( L1, L3, L0 ); - field_mul ( L3, L1, L2 ); - field_copy ( L2, a->z0 ); - field_addw ( L2, 1 ); - field_sqr ( L0, L2 ); - field_mulw_scc_wr ( L1, L0, EDWARDS_D-1 ); - field_add ( L2, a->z0, a->z0 ); - field_add ( L0, L2, L2 ); - field_add ( L2, L0, L1 ); - field_mul ( L0, a->xd, L2 ); - L5 = field_is_zero( a->zd ); - L6 = - L5; - constant_time_mask ( L1, L0, sizeof(L1), L5 ); - field_add ( L2, L1, a->zd ); - L4 = ~ L5; - field_mul ( L1, sbz, L3 ); - field_addw ( L1, L6 ); - field_mul ( L3, L2, L1 ); - field_mul ( L1, L3, L2 ); - field_mul ( L2, L3, a->xd ); - field_mul ( L3, L1, L2 ); - field_isr ( L0, L3 ); - field_mul ( L2, L1, L0 ); - field_sqr ( L1, L0 ); - field_mul ( L0, L3, L1 ); - constant_time_mask ( b, L2, sizeof(L1), L4 ); - field_subw( L0, 1 ); - L5 = field_is_zero( L0 ); - L4 = field_is_zero( sbz ); + field_mul ( L3, a->z0, a->zd ); + field_sub ( L1, L3, a->xd ); + field_mul ( L3, a->za, L1 ); + field_mul ( L2, a->z0, a->xd ); + field_sub ( L1, L2, a->zd ); + field_mul ( L0, a->xa, L1 ); + field_add ( L2, L0, L3 ); + field_sub ( L1, L3, L0 ); + field_mul ( L3, L1, L2 ); + field_copy ( L2, a->z0 ); + field_addw ( L2, 1 ); + field_sqr ( L0, L2 ); + field_mulw_scc_wr ( L1, L0, EDWARDS_D-1 ); + field_add ( L2, a->z0, a->z0 ); + field_add ( L0, L2, L2 ); + field_add ( L2, L0, L1 ); + field_mul ( L0, a->xd, L2 ); + L5 = field_is_zero( a->zd ); + L6 = - L5; + constant_time_mask ( L1, L0, sizeof(L1), L5 ); + field_add ( L2, L1, a->zd ); + L4 = ~ L5; + field_mul ( L1, sbz, L3 ); + field_addw ( L1, L6 ); + field_mul ( L3, L2, L1 ); + field_mul ( L1, L3, L2 ); + field_mul ( L2, L3, a->xd ); + field_mul ( L3, L1, L2 ); + field_isr ( L0, L3 ); + field_mul ( L2, L1, L0 ); + field_sqr ( L1, L0 ); + field_mul ( L0, L3, L1 ); + constant_time_mask ( b, L2, sizeof(L1), L4 ); + field_subw( L0, 1 ); + L5 = field_is_zero( L0 ); + L4 = field_is_zero( sbz ); return L5 | L4; } @@ -335,17 +335,17 @@ serialize_extensible ( const struct extensible_t* a ) { field_a_t L0, L1, L2; - field_sub ( L0, a->y, a->z ); - field_add ( b, a->z, a->y ); - field_mul ( L1, a->z, a->x ); - field_mul ( L2, L0, L1 ); - field_mul ( L1, L2, L0 ); - field_mul ( L0, L2, b ); - field_mul ( L2, L1, L0 ); - field_isr ( L0, L2 ); - field_mul ( b, L1, L0 ); - field_sqr ( L1, L0 ); - field_mul ( L0, L2, L1 ); + field_sub ( L0, a->y, a->z ); + field_add ( b, a->z, a->y ); + field_mul ( L1, a->z, a->x ); + field_mul ( L2, L0, L1 ); + field_mul ( L1, L2, L0 ); + field_mul ( L0, L2, b ); + field_mul ( L2, L1, L0 ); + field_isr ( L0, L2 ); + field_mul ( b, L1, L0 ); + field_sqr ( L1, L0 ); + field_mul ( L0, L2, L1 ); } void @@ -354,23 +354,23 @@ untwist_and_double_and_serialize ( const struct tw_extensible_t* a ) { field_a_t L0, L1, L2, L3; - field_mul ( L3, a->y, a->x ); - field_add ( b, a->y, a->x ); - field_sqr ( L1, b ); - field_add ( L2, L3, L3 ); - field_sub ( b, L1, L2 ); - field_sqr ( L2, a->z ); - field_sqr ( L1, L2 ); - field_add ( b, b, b ); - field_mulw_scc ( L2, b, EDWARDS_D-1 ); - field_mulw_scc ( b, L2, EDWARDS_D-1 ); - field_mul ( L0, L2, L1 ); - field_mul ( L2, b, L0 ); - field_isr ( L0, L2 ); - field_mul ( L1, b, L0 ); - field_sqr ( b, L0 ); - field_mul ( L0, L2, b ); - field_mul ( b, L1, L3 ); + field_mul ( L3, a->y, a->x ); + field_add ( b, a->y, a->x ); + field_sqr ( L1, b ); + field_add ( L2, L3, L3 ); + field_sub ( b, L1, L2 ); + field_sqr ( L2, a->z ); + field_sqr ( L1, L2 ); + field_add ( b, b, b ); + field_mulw_scc ( L2, b, EDWARDS_D-1 ); + field_mulw_scc ( b, L2, EDWARDS_D-1 ); + field_mul ( L0, L2, L1 ); + field_mul ( L2, b, L0 ); + field_isr ( L0, L2 ); + field_mul ( L1, b, L0 ); + field_sqr ( b, L0 ); + field_mul ( L0, L2, b ); + field_mul ( b, L1, L3 ); } void @@ -378,23 +378,23 @@ twist_even ( struct tw_extensible_t* b, const struct extensible_t* a ) { - field_sqr ( b->y, a->z ); - field_sqr ( b->z, a->x ); + field_sqr ( b->y, a->z ); + field_sqr ( b->z, a->x ); field_sub ( b->u, b->y, b->z ); field_sub ( b->z, a->z, a->x ); - field_mul ( b->y, b->z, a->y ); + field_mul ( b->y, b->z, a->y ); field_sub ( b->z, a->z, a->y ); - field_mul ( b->x, b->z, b->y ); - field_mul ( b->t, b->x, b->u ); - field_mul ( b->y, b->x, b->t ); - field_isr ( b->t, b->y ); - field_mul ( b->u, b->x, b->t ); - field_sqr ( b->x, b->t ); - field_mul ( b->t, b->y, b->x ); - field_mul ( b->x, a->x, b->u ); - field_mul ( b->y, a->y, b->u ); - field_addw ( b->y, -field_is_zero( b->z ) ); - field_set_ui( b->z, 1 ); + field_mul ( b->x, b->z, b->y ); + field_mul ( b->t, b->x, b->u ); + field_mul ( b->y, b->x, b->t ); + field_isr ( b->t, b->y ); + field_mul ( b->u, b->x, b->t ); + field_sqr ( b->x, b->t ); + field_mul ( b->t, b->y, b->x ); + field_mul ( b->x, a->x, b->u ); + field_mul ( b->y, a->y, b->u ); + field_addw ( b->y, -field_is_zero( b->z ) ); + field_set_ui( b->z, 1 ); field_copy ( b->t, b->x ); field_copy ( b->u, b->y ); } @@ -405,29 +405,29 @@ test_only_twist ( const struct extensible_t* a ) { field_a_t L0, L1; - field_sqr ( b->u, a->z ); - field_sqr ( b->y, a->x ); + field_sqr ( b->u, a->z ); + field_sqr ( b->y, a->x ); field_sub ( b->z, b->u, b->y ); field_add ( b->y, b->z, b->z ); field_add ( b->u, b->y, b->y ); field_sub ( b->y, a->z, a->x ); - field_mul ( b->x, b->y, a->y ); + field_mul ( b->x, b->y, a->y ); field_sub ( b->z, a->z, a->y ); - field_mul ( b->t, b->z, b->x ); - field_mul ( L1, b->t, b->u ); - field_mul ( b->x, b->t, L1 ); - field_isr ( L0, b->x ); - field_mul ( b->u, b->t, L0 ); - field_sqr ( L1, L0 ); - field_mul ( b->t, b->x, L1 ); - field_add ( L1, a->y, a->x ); - field_sub ( L0, a->x, a->y ); - field_mul ( b->x, b->t, L0 ); - field_add ( L0, b->x, L1 ); - field_sub ( b->t, L1, b->x ); - field_mul ( b->x, L0, b->u ); + field_mul ( b->t, b->z, b->x ); + field_mul ( L1, b->t, b->u ); + field_mul ( b->x, b->t, L1 ); + field_isr ( L0, b->x ); + field_mul ( b->u, b->t, L0 ); + field_sqr ( L1, L0 ); + field_mul ( b->t, b->x, L1 ); + field_add ( L1, a->y, a->x ); + field_sub ( L0, a->x, a->y ); + field_mul ( b->x, b->t, L0 ); + field_add ( L0, b->x, L1 ); + field_sub ( b->t, L1, b->x ); + field_mul ( b->x, L0, b->u ); field_addw ( b->x, -field_is_zero( b->y ) ); - field_mul ( b->y, b->t, b->u ); + field_mul ( b->y, b->t, b->u ); field_addw ( b->y, -field_is_zero( b->z ) ); field_set_ui( b->z, 1+field_is_zero( a->y ) ); field_copy ( b->t, b->x ); @@ -439,10 +439,10 @@ is_even_pt ( const struct extensible_t* a ) { field_a_t L0, L1, L2; - field_sqr ( L2, a->z ); - field_sqr ( L1, a->x ); - field_sub ( L0, L2, L1 ); - return field_is_square ( L0 ); + field_sqr ( L2, a->z ); + field_sqr ( L1, a->x ); + field_sub ( L0, L2, L1 ); + return field_is_square ( L0 ); } mask_t @@ -450,10 +450,10 @@ is_even_tw ( const struct tw_extensible_t* a ) { field_a_t L0, L1, L2; - field_sqr ( L2, a->z ); - field_sqr ( L1, a->x ); - field_add ( L0, L1, L2 ); - return field_is_square ( L0 ); + field_sqr ( L2, a->z ); + field_sqr ( L1, a->x ); + field_add ( L0, L1, L2 ); + return field_is_square ( L0 ); } mask_t @@ -462,32 +462,32 @@ deserialize_affine ( const field_a_t sz ) { field_a_t L0, L1, L2, L3; - field_sqr ( L1, sz ); - field_copy ( L3, L1 ); - field_addw ( L3, 1 ); - field_sqr ( L2, L3 ); - field_mulw_scc ( a->x, L2, EDWARDS_D-1 ); /* PERF MULW */ - field_add ( L3, L1, L1 ); /* FIXME: i adjusted the bias here, was it right? */ - field_add ( a->y, L3, L3 ); - field_add ( L3, a->y, a->x ); - field_copy ( a->y, L1 ); + field_sqr ( L1, sz ); + field_copy ( L3, L1 ); + field_addw ( L3, 1 ); + field_sqr ( L2, L3 ); + field_mulw_scc ( a->x, L2, EDWARDS_D-1 ); /* PERF MULW */ + field_add ( L3, L1, L1 ); /* FIXME: i adjusted the bias here, was it right? */ + field_add ( a->y, L3, L3 ); + field_add ( L3, a->y, a->x ); + field_copy ( a->y, L1 ); field_neg ( a->x, a->y ); - field_addw ( a->x, 1 ); - field_mul ( a->y, a->x, L3 ); - field_sqr ( L2, a->x ); - field_mul ( L0, L2, a->y ); - field_mul ( a->y, a->x, L0 ); - field_isr ( L3, a->y ); - field_mul ( a->y, L2, L3 ); - field_sqr ( L2, L3 ); - field_mul ( L3, L0, L2 ); - field_mul ( L0, a->x, L3 ); - field_add ( L2, a->y, a->y ); - field_mul ( a->x, sz, L2 ); - field_addw ( L1, 1 ); - field_mul ( a->y, L1, L3 ); - field_subw( L0, 1 ); - return field_is_zero( L0 ); + field_addw ( a->x, 1 ); + field_mul ( a->y, a->x, L3 ); + field_sqr ( L2, a->x ); + field_mul ( L0, L2, a->y ); + field_mul ( a->y, a->x, L0 ); + field_isr ( L3, a->y ); + field_mul ( a->y, L2, L3 ); + field_sqr ( L2, L3 ); + field_mul ( L3, L0, L2 ); + field_mul ( L0, a->x, L3 ); + field_add ( L2, a->y, a->y ); + field_mul ( a->x, sz, L2 ); + field_addw ( L1, 1 ); + field_mul ( a->y, L1, L3 ); + field_subw( L0, 1 ); + return field_is_zero( L0 ); } mask_t @@ -496,40 +496,40 @@ deserialize_and_twist_approx ( const field_a_t sz ) { field_a_t L0, L1; - field_sqr ( a->z, sz ); + field_sqr ( a->z, sz ); field_copy ( a->y, a->z ); - field_addw ( a->y, 1 ); - field_sqr ( L0, a->y ); + field_addw ( a->y, 1 ); + field_sqr ( L0, a->y ); field_mulw_scc ( a->x, L0, EDWARDS_D-1 ); field_add ( a->y, a->z, a->z ); field_add ( a->u, a->y, a->y ); field_add ( a->y, a->u, a->x ); - field_sqr ( a->x, a->z ); + field_sqr ( a->x, a->z ); field_neg ( a->u, a->x ); - field_addw ( a->u, 1 ); - field_mul ( a->x, sqrt_d_minus_1, a->u ); - field_mul ( L0, a->x, a->y ); - field_mul ( a->t, L0, a->y ); - field_mul ( a->u, a->x, a->t ); - field_mul ( a->t, a->u, L0 ); - field_mul ( a->y, a->x, a->t ); - field_isr ( L0, a->y ); - field_mul ( a->y, a->u, L0 ); - field_sqr ( L1, L0 ); - field_mul ( a->u, a->t, L1 ); - field_mul ( a->t, a->x, a->u ); - field_add ( a->x, sz, sz ); - field_mul ( L0, a->u, a->x ); + field_addw ( a->u, 1 ); + field_mul ( a->x, sqrt_d_minus_1, a->u ); + field_mul ( L0, a->x, a->y ); + field_mul ( a->t, L0, a->y ); + field_mul ( a->u, a->x, a->t ); + field_mul ( a->t, a->u, L0 ); + field_mul ( a->y, a->x, a->t ); + field_isr ( L0, a->y ); + field_mul ( a->y, a->u, L0 ); + field_sqr ( L1, L0 ); + field_mul ( a->u, a->t, L1 ); + field_mul ( a->t, a->x, a->u ); + field_add ( a->x, sz, sz ); + field_mul ( L0, a->u, a->x ); field_copy ( a->x, a->z ); - field_neg ( L1, a->x ); - field_addw ( L1, 1 ); - field_mul ( a->x, L1, L0 ); - field_mul ( L0, a->u, a->y ); - field_addw ( a->z, 1 ); - field_mul ( a->y, a->z, L0 ); - field_subw( a->t, 1 ); + field_neg ( L1, a->x ); + field_addw ( L1, 1 ); + field_mul ( a->x, L1, L0 ); + field_mul ( L0, a->u, a->y ); + field_addw ( a->z, 1 ); + field_mul ( a->y, a->z, L0 ); + field_subw( a->t, 1 ); mask_t ret = field_is_zero( a->t ); - field_set_ui( a->z, 1 ); + field_set_ui( a->z, 1 ); field_copy ( a->t, a->x ); field_copy ( a->u, a->y ); return ret; @@ -539,30 +539,30 @@ void set_identity_extensible ( struct extensible_t* a ) { - field_set_ui( a->x, 0 ); - field_set_ui( a->y, 1 ); - field_set_ui( a->z, 1 ); - field_set_ui( a->t, 0 ); - field_set_ui( a->u, 0 ); + field_set_ui( a->x, 0 ); + field_set_ui( a->y, 1 ); + field_set_ui( a->z, 1 ); + field_set_ui( a->t, 0 ); + field_set_ui( a->u, 0 ); } void set_identity_tw_extensible ( struct tw_extensible_t* a ) { - field_set_ui( a->x, 0 ); - field_set_ui( a->y, 1 ); - field_set_ui( a->z, 1 ); - field_set_ui( a->t, 0 ); - field_set_ui( a->u, 0 ); + field_set_ui( a->x, 0 ); + field_set_ui( a->y, 1 ); + field_set_ui( a->z, 1 ); + field_set_ui( a->t, 0 ); + field_set_ui( a->u, 0 ); } void set_identity_affine ( struct affine_t* a ) { - field_set_ui( a->x, 0 ); - field_set_ui( a->y, 1 ); + field_set_ui( a->x, 0 ); + field_set_ui( a->y, 1 ); } mask_t @@ -572,10 +572,10 @@ eq_affine ( ) { mask_t L1, L2; field_a_t L0; - field_sub ( L0, a->x, b->x ); - L2 = field_is_zero( L0 ); - field_sub ( L0, a->y, b->y ); - L1 = field_is_zero( L0 ); + field_sub ( L0, a->x, b->x ); + L2 = field_is_zero( L0 ); + field_sub ( L0, a->y, b->y ); + L1 = field_is_zero( L0 ); return L2 & L1; } @@ -586,14 +586,14 @@ eq_extensible ( ) { mask_t L3, L4; field_a_t L0, L1, L2; - field_mul ( L2, b->z, a->x ); - field_mul ( L1, a->z, b->x ); - field_sub ( L0, L2, L1 ); - L4 = field_is_zero( L0 ); - field_mul ( L2, b->z, a->y ); - field_mul ( L1, a->z, b->y ); - field_sub ( L0, L2, L1 ); - L3 = field_is_zero( L0 ); + field_mul ( L2, b->z, a->x ); + field_mul ( L1, a->z, b->x ); + field_sub ( L0, L2, L1 ); + L4 = field_is_zero( L0 ); + field_mul ( L2, b->z, a->y ); + field_mul ( L1, a->z, b->y ); + field_sub ( L0, L2, L1 ); + L3 = field_is_zero( L0 ); return L4 & L3; } @@ -604,14 +604,14 @@ eq_tw_extensible ( ) { mask_t L3, L4; field_a_t L0, L1, L2; - field_mul ( L2, b->z, a->x ); - field_mul ( L1, a->z, b->x ); - field_sub ( L0, L2, L1 ); - L4 = field_is_zero( L0 ); - field_mul ( L2, b->z, a->y ); - field_mul ( L1, a->z, b->y ); - field_sub ( L0, L2, L1 ); - L3 = field_is_zero( L0 ); + field_mul ( L2, b->z, a->x ); + field_mul ( L1, a->z, b->x ); + field_sub ( L0, L2, L1 ); + L4 = field_is_zero( L0 ); + field_mul ( L2, b->z, a->y ); + field_mul ( L1, a->z, b->y ); + field_sub ( L0, L2, L1 ); + L3 = field_is_zero( L0 ); return L4 & L3; } @@ -621,21 +621,21 @@ elligator_2s_inject ( const field_a_t r ) { field_a_t L2, L3, L4, L5, L6, L7, L8; - field_sqr ( a->x, r ); - field_sqr ( L3, a->x ); - field_copy ( a->y, L3 ); - field_neg ( L4, a->y ); - field_addw ( L4, 1 ); - field_sqr ( L2, L4 ); - field_mulw ( L7, L2, (EDWARDS_D-1)*(EDWARDS_D-1) ); - field_mulw ( L8, L3, 4*(EDWARDS_D+1)*(EDWARDS_D+1) ); - field_add ( a->y, L8, L7 ); - field_mulw ( L8, L2, 4*(EDWARDS_D)*(EDWARDS_D-1) ); - field_sub ( L7, a->y, L8 ); - field_mulw_scc ( L6, a->y, -2-2*EDWARDS_D ); - field_mul ( L5, L7, L6 ); + field_sqr ( a->x, r ); + field_sqr ( L3, a->x ); + field_copy ( a->y, L3 ); + field_neg ( L4, a->y ); + field_addw ( L4, 1 ); + field_sqr ( L2, L4 ); + field_mulw ( L7, L2, (EDWARDS_D-1)*(EDWARDS_D-1) ); + field_mulw ( L8, L3, 4*(EDWARDS_D+1)*(EDWARDS_D+1) ); + field_add ( a->y, L8, L7 ); + field_mulw ( L8, L2, 4*(EDWARDS_D)*(EDWARDS_D-1) ); + field_sub ( L7, a->y, L8 ); + field_mulw_scc ( L6, a->y, -2-2*EDWARDS_D ); + field_mul ( L5, L7, L6 ); /* FIXME Stability problem (API stability, not crash) / possible bug. - * change to: p448_mul ( L5, L7, L4 ); ? + * change to: p448_mul ( L5, L7, L4 ); ? * This isn't a deep change: it's for sign adjustment. * Need to check which one leads to the correct sign, probably by writig * the invert routine. @@ -646,32 +646,32 @@ elligator_2s_inject ( * Could compute be, (be)^2, (be)^3, a b^3 e^3, a b^3 e^4. = 4M+S * instead of 6M. */ - field_mul ( L8, L5, L4 ); - field_mul ( L4, L5, L6 ); - field_mul ( L5, L7, L8 ); - field_mul ( L8, L5, L4 ); - field_mul ( L4, L7, L8 ); - field_isr ( L6, L4 ); - field_mul ( L4, L5, L6 ); - field_sqr ( L5, L6 ); - field_mul ( L6, L8, L5 ); - field_mul ( L8, L7, L6 ); - field_mul ( L7, L8, L6 ); - field_copy ( L6, a->x ); - field_addw ( a->x, 1 ); - field_mul ( L5, a->x, L8 ); - field_addw ( L5, 1 ); - field_sub ( a->x, L6, L5 ); - field_mul ( L5, L4, a->x ); - field_mulw_scc_wr ( a->x, L5, -2-2*EDWARDS_D ); - field_add ( L4, L3, L3 ); - field_add ( L3, L4, L2 ); - field_subw( L3, 2 ); - field_mul ( L2, L3, L8 ); - field_mulw ( L3, L2, 2*(EDWARDS_D+1)*(EDWARDS_D-1) ); - field_add ( L2, L3, a->y ); - field_mul ( a->y, L7, L2 ); - field_addw ( a->y, -field_is_zero( L8 ) ); + field_mul ( L8, L5, L4 ); + field_mul ( L4, L5, L6 ); + field_mul ( L5, L7, L8 ); + field_mul ( L8, L5, L4 ); + field_mul ( L4, L7, L8 ); + field_isr ( L6, L4 ); + field_mul ( L4, L5, L6 ); + field_sqr ( L5, L6 ); + field_mul ( L6, L8, L5 ); + field_mul ( L8, L7, L6 ); + field_mul ( L7, L8, L6 ); + field_copy ( L6, a->x ); + field_addw ( a->x, 1 ); + field_mul ( L5, a->x, L8 ); + field_addw ( L5, 1 ); + field_sub ( a->x, L6, L5 ); + field_mul ( L5, L4, a->x ); + field_mulw_scc_wr ( a->x, L5, -2-2*EDWARDS_D ); + field_add ( L4, L3, L3 ); + field_add ( L3, L4, L2 ); + field_subw( L3, 2 ); + field_mul ( L2, L3, L8 ); + field_mulw ( L3, L2, 2*(EDWARDS_D+1)*(EDWARDS_D-1) ); + field_add ( L2, L3, a->y ); + field_mul ( a->y, L7, L2 ); + field_addw ( a->y, -field_is_zero( L8 ) ); } mask_t @@ -679,14 +679,14 @@ validate_affine ( const struct affine_t* a ) { field_a_t L0, L1, L2, L3; - field_sqr ( L0, a->y ); - field_sqr ( L1, a->x ); - field_add ( L3, L1, L0 ); - field_mulw_scc ( L2, L1, EDWARDS_D ); - field_mul ( L1, L0, L2 ); - field_addw ( L1, 1 ); - field_sub ( L0, L3, L1 ); - return field_is_zero( L0 ); + field_sqr ( L0, a->y ); + field_sqr ( L1, a->x ); + field_add ( L3, L1, L0 ); + field_mulw_scc ( L2, L1, EDWARDS_D ); + field_mul ( L1, L0, L2 ); + field_addw ( L1, 1 ); + field_sub ( L0, L3, L1 ); + return field_is_zero( L0 ); } mask_t @@ -699,30 +699,30 @@ validate_tw_extensible ( * Check invariant: * 0 = -x*y + z*t*u */ - field_mul ( L1, ext->t, ext->u ); - field_mul ( L2, ext->z, L1 ); - field_mul ( L0, ext->x, ext->y ); - field_neg ( L1, L0 ); - field_add ( L0, L1, L2 ); - L5 = field_is_zero( L0 ); + field_mul ( L1, ext->t, ext->u ); + field_mul ( L2, ext->z, L1 ); + field_mul ( L0, ext->x, ext->y ); + field_neg ( L1, L0 ); + field_add ( L0, L1, L2 ); + L5 = field_is_zero( L0 ); /* * Check invariant: * 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2 */ - field_sqr ( L2, ext->y ); - field_neg ( L1, L2 ); - field_sqr ( L0, ext->x ); - field_add ( L2, L0, L1 ); - field_sqr ( L3, ext->u ); - field_sqr ( L0, ext->t ); - field_mul ( L1, L0, L3 ); - field_mulw_scc ( L3, L1, EDWARDS_D ); - field_add ( L0, L3, L2 ); - field_neg ( L3, L1 ); - field_add ( L2, L3, L0 ); - field_sqr ( L1, ext->z ); - field_add ( L0, L1, L2 ); - L4 = field_is_zero( L0 ); + field_sqr ( L2, ext->y ); + field_neg ( L1, L2 ); + field_sqr ( L0, ext->x ); + field_add ( L2, L0, L1 ); + field_sqr ( L3, ext->u ); + field_sqr ( L0, ext->t ); + field_mul ( L1, L0, L3 ); + field_mulw_scc ( L3, L1, EDWARDS_D ); + field_add ( L0, L3, L2 ); + field_neg ( L3, L1 ); + field_add ( L2, L3, L0 ); + field_sqr ( L1, ext->z ); + field_add ( L0, L1, L2 ); + L4 = field_is_zero( L0 ); return L5 & L4 &~ field_is_zero(ext->z); } @@ -736,28 +736,28 @@ validate_extensible ( * Check invariant: * 0 = d*t^2*u^2 - x^2 - y^2 + z^2 */ - field_sqr ( L2, ext->y ); - field_neg ( L1, L2 ); - field_sqr ( L0, ext->z ); - field_add ( L2, L0, L1 ); - field_sqr ( L3, ext->u ); - field_sqr ( L0, ext->t ); - field_mul ( L1, L0, L3 ); - field_mulw_scc ( L0, L1, EDWARDS_D ); - field_add ( L1, L0, L2 ); - field_sqr ( L0, ext->x ); - field_neg ( L2, L0 ); - field_add ( L0, L2, L1 ); - L5 = field_is_zero( L0 ); + field_sqr ( L2, ext->y ); + field_neg ( L1, L2 ); + field_sqr ( L0, ext->z ); + field_add ( L2, L0, L1 ); + field_sqr ( L3, ext->u ); + field_sqr ( L0, ext->t ); + field_mul ( L1, L0, L3 ); + field_mulw_scc ( L0, L1, EDWARDS_D ); + field_add ( L1, L0, L2 ); + field_sqr ( L0, ext->x ); + field_neg ( L2, L0 ); + field_add ( L0, L2, L1 ); + L5 = field_is_zero( L0 ); /* * Check invariant: * 0 = -x*y + z*t*u */ - field_mul ( L1, ext->t, ext->u ); - field_mul ( L2, ext->z, L1 ); - field_mul ( L0, ext->x, ext->y ); - field_neg ( L1, L0 ); - field_add ( L0, L1, L2 ); - L4 = field_is_zero( L0 ); + field_mul ( L1, ext->t, ext->u ); + field_mul ( L2, ext->z, L1 ); + field_mul ( L0, ext->x, ext->y ); + field_neg ( L1, L0 ); + field_add ( L0, L1, L2 ); + L4 = field_is_zero( L0 ); return L5 & L4 &~ field_is_zero(ext->z); } From 1f1836de1278c922f536ae3c59ba1883e5068f4c Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Thu, 22 Jan 2015 17:21:20 -0800 Subject: [PATCH 05/15] gmp-style foo_t[1] for points too --- src/crandom.c | 8 +- src/ec_point.c | 124 ++++++++--------- src/goldilocks.c | 128 ++++++++--------- src/include/crandom.h | 9 +- src/include/ec_point.h | 198 +++++++++++++------------- src/include/magic.h | 2 +- src/include/scalarmul.h | 28 ++-- src/include/sha512.h | 10 +- src/p448/magic.c | 4 +- src/p480/magic.c | 4 +- src/p521/magic.c | 4 +- src/scalarmul.c | 298 ++++++++++++++++++++-------------------- src/sha512.c | 8 +- test/bench.c | 10 +- test/test_pointops.c | 4 +- test/test_scalarmul.c | 4 +- test/test_sha512.c | 12 +- 17 files changed, 428 insertions(+), 427 deletions(-) diff --git a/src/crandom.c b/src/crandom.c index da0c3c9..4f36644 100644 --- a/src/crandom.c +++ b/src/crandom.c @@ -301,7 +301,7 @@ crandom_chacha_expand(u_int64_t iv, int crandom_init_from_file( - struct crandom_state_t *state, + crandom_state_a_t state, const char *filename, int reseed_interval, int reseeds_mandatory @@ -338,7 +338,7 @@ crandom_init_from_file( void crandom_init_from_buffer( - struct crandom_state_t *state, + crandom_state_a_t state, const char initial_seed[32] ) { memcpy(state->seed, initial_seed, 32); @@ -350,7 +350,7 @@ crandom_init_from_buffer( int crandom_generate( - struct crandom_state_t *state, + crandom_state_a_t state, unsigned char *output, unsigned long long length ) { @@ -475,7 +475,7 @@ crandom_generate( void crandom_destroy( - struct crandom_state_t *state + crandom_state_a_t state ) { if (state->magic == CRANDOM_MAGIC && state->randomfd) { (void) close(state->randomfd); diff --git a/src/ec_point.c b/src/ec_point.c index e78852b..2582372 100644 --- a/src/ec_point.c +++ b/src/ec_point.c @@ -14,8 +14,8 @@ void add_tw_niels_to_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e + tw_extensible_a_t d, + const tw_niels_a_t e ) { ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1; @@ -36,8 +36,8 @@ add_tw_niels_to_tw_extensible ( void sub_tw_niels_from_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e + tw_extensible_a_t d, + const tw_niels_a_t e ) { ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1; @@ -58,29 +58,29 @@ sub_tw_niels_from_tw_extensible ( void add_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a + tw_extensible_a_t e, + const tw_pniels_a_t a ) { field_a_t L0; field_mul ( L0, e->z, a->z ); field_copy ( e->z, L0 ); - add_tw_niels_to_tw_extensible( e, &a->n ); + add_tw_niels_to_tw_extensible( e, a->n ); } void sub_tw_pniels_from_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a + tw_extensible_a_t e, + const tw_pniels_a_t a ) { field_a_t L0; field_mul ( L0, e->z, a->z ); field_copy ( e->z, L0 ); - sub_tw_niels_from_tw_extensible( e, &a->n ); + sub_tw_niels_from_tw_extensible( e, a->n ); } void double_tw_extensible ( - struct tw_extensible_t* a + tw_extensible_a_t a ) { ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1, L2; @@ -105,7 +105,7 @@ double_tw_extensible ( void double_extensible ( - struct extensible_t* a + extensible_a_t a ) { ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1, L2; @@ -130,8 +130,8 @@ double_extensible ( void twist_and_double ( - struct tw_extensible_t* b, - const struct extensible_t* a + tw_extensible_a_t b, + const extensible_a_t a ) { field_a_t L0; field_sqr ( b->x, a->x ); @@ -151,8 +151,8 @@ twist_and_double ( void untwist_and_double ( - struct extensible_t* b, - const struct tw_extensible_t* a + extensible_a_t b, + const tw_extensible_a_t a ) { field_a_t L0; field_sqr ( b->x, a->x ); @@ -172,20 +172,20 @@ untwist_and_double ( void convert_tw_affine_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_affine_t* a + tw_pniels_a_t b, + const tw_affine_a_t a ) { - field_sub ( b->n.a, a->y, a->x ); - field_add ( b->n.b, a->x, a->y ); + field_sub ( b->n->a, a->y, a->x ); + field_add ( b->n->b, a->x, a->y ); field_mul ( b->z, a->y, a->x ); - field_mulw_scc_wr ( b->n.c, b->z, 2*EDWARDS_D-2 ); + field_mulw_scc_wr ( b->n->c, b->z, 2*EDWARDS_D-2 ); field_set_ui( b->z, 2 ); } void convert_tw_affine_to_tw_extensible ( - struct tw_extensible_t* b, - const struct tw_affine_t* a + tw_extensible_a_t b, + const tw_affine_a_t a ) { field_copy ( b->x, a->x ); field_copy ( b->y, a->y ); @@ -196,8 +196,8 @@ convert_tw_affine_to_tw_extensible ( void convert_affine_to_extensible ( - struct extensible_t* b, - const struct affine_t* a + extensible_a_t b, + const affine_a_t a ) { field_copy ( b->x, a->x ); field_copy ( b->y, a->y ); @@ -208,23 +208,23 @@ convert_affine_to_extensible ( void convert_tw_extensible_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_extensible_t* a + tw_pniels_a_t b, + const tw_extensible_a_t a ) { - field_sub ( b->n.a, a->y, a->x ); - field_add ( b->n.b, a->x, a->y ); + field_sub ( b->n->a, a->y, a->x ); + field_add ( b->n->b, a->x, a->y ); field_mul ( b->z, a->u, a->t ); - field_mulw_scc_wr ( b->n.c, b->z, 2*EDWARDS_D-2 ); + field_mulw_scc_wr ( b->n->c, b->z, 2*EDWARDS_D-2 ); field_add ( b->z, a->z, a->z ); } void convert_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* d + tw_extensible_a_t e, + const tw_pniels_a_t d ) { - field_add ( e->u, d->n.b, d->n.a ); - field_sub ( e->t, d->n.b, d->n.a ); + field_add ( e->u, d->n->b, d->n->a ); + field_sub ( e->t, d->n->b, d->n->a ); field_mul ( e->x, d->z, e->t ); field_mul ( e->y, d->z, e->u ); field_sqr ( e->z, d->z ); @@ -232,8 +232,8 @@ convert_tw_pniels_to_tw_extensible ( void convert_tw_niels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_niels_t* d + tw_extensible_a_t e, + const tw_niels_a_t d ) { field_add ( e->y, d->b, d->a ); field_sub ( e->x, d->b, d->a ); @@ -244,7 +244,7 @@ convert_tw_niels_to_tw_extensible ( void montgomery_step ( - struct montgomery_t* a + montgomery_a_t a ) { ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1; @@ -272,7 +272,7 @@ montgomery_step ( void deserialize_montgomery ( - struct montgomery_t* a, + montgomery_a_t a, const field_a_t sbz ) { field_sqr ( a->z0, sbz ); @@ -285,7 +285,7 @@ deserialize_montgomery ( mask_t serialize_montgomery ( field_a_t b, - const struct montgomery_t* a, + const montgomery_a_t a, const field_a_t sbz ) { mask_t L4, L5, L6; @@ -332,7 +332,7 @@ serialize_montgomery ( void serialize_extensible ( field_a_t b, - const struct extensible_t* a + const extensible_a_t a ) { field_a_t L0, L1, L2; field_sub ( L0, a->y, a->z ); @@ -351,7 +351,7 @@ serialize_extensible ( void untwist_and_double_and_serialize ( field_a_t b, - const struct tw_extensible_t* a + const tw_extensible_a_t a ) { field_a_t L0, L1, L2, L3; field_mul ( L3, a->y, a->x ); @@ -375,8 +375,8 @@ untwist_and_double_and_serialize ( void twist_even ( - struct tw_extensible_t* b, - const struct extensible_t* a + tw_extensible_a_t b, + const extensible_a_t a ) { field_sqr ( b->y, a->z ); field_sqr ( b->z, a->x ); @@ -401,8 +401,8 @@ twist_even ( void test_only_twist ( - struct tw_extensible_t* b, - const struct extensible_t* a + tw_extensible_a_t b, + const extensible_a_t a ) { field_a_t L0, L1; field_sqr ( b->u, a->z ); @@ -436,7 +436,7 @@ test_only_twist ( mask_t is_even_pt ( - const struct extensible_t* a + const extensible_a_t a ) { field_a_t L0, L1, L2; field_sqr ( L2, a->z ); @@ -447,7 +447,7 @@ is_even_pt ( mask_t is_even_tw ( - const struct tw_extensible_t* a + const tw_extensible_a_t a ) { field_a_t L0, L1, L2; field_sqr ( L2, a->z ); @@ -458,7 +458,7 @@ is_even_tw ( mask_t deserialize_affine ( - struct affine_t* a, + affine_a_t a, const field_a_t sz ) { field_a_t L0, L1, L2, L3; @@ -492,7 +492,7 @@ deserialize_affine ( mask_t deserialize_and_twist_approx ( - struct tw_extensible_t* a, + tw_extensible_a_t a, const field_a_t sz ) { field_a_t L0, L1; @@ -537,7 +537,7 @@ deserialize_and_twist_approx ( void set_identity_extensible ( - struct extensible_t* a + extensible_a_t a ) { field_set_ui( a->x, 0 ); field_set_ui( a->y, 1 ); @@ -548,7 +548,7 @@ set_identity_extensible ( void set_identity_tw_extensible ( - struct tw_extensible_t* a + tw_extensible_a_t a ) { field_set_ui( a->x, 0 ); field_set_ui( a->y, 1 ); @@ -559,7 +559,7 @@ set_identity_tw_extensible ( void set_identity_affine ( - struct affine_t* a + affine_a_t a ) { field_set_ui( a->x, 0 ); field_set_ui( a->y, 1 ); @@ -567,8 +567,8 @@ set_identity_affine ( mask_t eq_affine ( - const struct affine_t* a, - const struct affine_t* b + const affine_a_t a, + const affine_a_t b ) { mask_t L1, L2; field_a_t L0; @@ -581,8 +581,8 @@ eq_affine ( mask_t eq_extensible ( - const struct extensible_t* a, - const struct extensible_t* b + const extensible_a_t a, + const extensible_a_t b ) { mask_t L3, L4; field_a_t L0, L1, L2; @@ -599,8 +599,8 @@ eq_extensible ( mask_t eq_tw_extensible ( - const struct tw_extensible_t* a, - const struct tw_extensible_t* b + const tw_extensible_a_t a, + const tw_extensible_a_t b ) { mask_t L3, L4; field_a_t L0, L1, L2; @@ -617,7 +617,7 @@ eq_tw_extensible ( void elligator_2s_inject ( - struct affine_t* a, + affine_a_t a, const field_a_t r ) { field_a_t L2, L3, L4, L5, L6, L7, L8; @@ -676,7 +676,7 @@ elligator_2s_inject ( mask_t validate_affine ( - const struct affine_t* a + const affine_a_t a ) { field_a_t L0, L1, L2, L3; field_sqr ( L0, a->y ); @@ -691,7 +691,7 @@ validate_affine ( mask_t validate_tw_extensible ( - const struct tw_extensible_t* ext + const tw_extensible_a_t ext ) { mask_t L4, L5; field_a_t L0, L1, L2, L3; @@ -728,7 +728,7 @@ validate_tw_extensible ( mask_t validate_extensible ( - const struct extensible_t* ext + const extensible_a_t ext ) { mask_t L4, L5; field_a_t L0, L1, L2, L3; diff --git a/src/goldilocks.c b/src/goldilocks.c index 7cba9c4..1c647f4 100644 --- a/src/goldilocks.c +++ b/src/goldilocks.c @@ -41,7 +41,7 @@ #else #define FIELD_HASH_BYTES (SHA512_OUTPUT_BYTES * ((FIELD_BYTES-1)/SHA512_OUTPUT_BYTES + 1)) static inline void field_hash_final ( - struct sha512_ctx_t *ctx, + sha512_ctx_a_t *ctx, unsigned char out[FIELD_HASH_BYTES] ) { /* SHA PRNG I guess? I really should have used SHAKE */ @@ -67,19 +67,19 @@ struct goldilocks_precomputed_public_key_t { /* FUTURE: auto. */ static struct { - const char * volatile state; + const char * volatile status; #if GOLDILOCKS_USE_PTHREAD pthread_mutex_t mutex; #endif - struct tw_niels_t combs[COMB_N << (COMB_T-1)]; + tw_niels_a_t combs[COMB_N << (COMB_T-1)]; struct fixed_base_table_t fixed_base; - struct tw_niels_t wnafs[1<= sizeof(sk)); - struct sha512_ctx_t ctx; - struct tw_extensible_t exta; + sha512_ctx_a_t ctx; + tw_extensible_a_t exta; field_a_t pk; - sha512_init(&ctx); - sha512_update(&ctx, (const unsigned char *)"derivepk", GOLDI_DIVERSIFY_BYTES); - sha512_update(&ctx, proto, GOLDI_SYMKEY_BYTES); - field_hash_final(&ctx, (unsigned char *)skb); + sha512_init(ctx); + sha512_update(ctx, (const unsigned char *)"derivepk", GOLDI_DIVERSIFY_BYTES); + sha512_update(ctx, proto, GOLDI_SYMKEY_BYTES); + field_hash_final(ctx, (unsigned char *)skb); barrett_deserialize_and_reduce(sk, skb, sizeof(skb), &curve_prime_order); barrett_serialize(privkey->opaque, sk, GOLDI_FIELD_BYTES); - scalarmul_fixed_base(&exta, sk, GOLDI_SCALAR_BITS, &goldilocks_global.fixed_base); - untwist_and_double_and_serialize(pk, &exta); + scalarmul_fixed_base(exta, sk, GOLDI_SCALAR_BITS, &goldilocks_global.fixed_base); + untwist_and_double_and_serialize(pk, exta); field_serialize(&privkey->opaque[GOLDI_FIELD_BYTES], pk); @@ -204,7 +204,7 @@ goldilocks_keygen ( if (ml_ret) return ml_ret; #endif - int ret = crandom_generate(&goldilocks_global.rand, proto, sizeof(proto)); + int ret = crandom_generate(goldilocks_global.rand, proto, sizeof(proto)); #if GOLDILOCKS_USE_PTHREAD ml_ret = pthread_mutex_unlock(&goldilocks_global.mutex); @@ -267,9 +267,9 @@ goldilocks_shared_secret_core ( #if GOLDI_IMPLEMENT_PRECOMPUTED_KEYS if (pre) { - struct tw_extensible_t tw; - succ &= scalarmul_fixed_base(&tw, sk, GOLDI_SCALAR_BITS, &pre->table); - untwist_and_double_and_serialize(pk, &tw); + tw_extensible_a_t tw; + succ &= scalarmul_fixed_base(tw, sk, GOLDI_SCALAR_BITS, &pre->table); + untwist_and_double_and_serialize(pk, tw); } else { succ &= montgomery_ladder(pk,pk,sk,GOLDI_SCALAR_BITS,1); } @@ -282,8 +282,8 @@ goldilocks_shared_secret_core ( field_serialize(gxy,pk); /* obliterate records of our failure by adjusting with obliteration key */ - struct sha512_ctx_t ctx; - sha512_init(&ctx); + sha512_ctx_a_t ctx; + sha512_init(ctx); #ifdef EXPERIMENT_ECDH_OBLITERATE_CT uint8_t oblit[GOLDI_DIVERSIFY_BYTES + GOLDI_SYMKEY_BYTES]; @@ -294,21 +294,21 @@ goldilocks_shared_secret_core ( for (i=0; iopaque[2*GOLDI_FIELD_BYTES+i] & ~(succ&msucc); } - sha512_update(&ctx, oblit, sizeof(oblit)); + sha512_update(ctx, oblit, sizeof(oblit)); #endif #ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS /* stir in the sum and product of the pubkeys. */ uint8_t a_pk[GOLDI_FIELD_BYTES]; field_serialize(a_pk, sum); - sha512_update(&ctx, a_pk, GOLDI_FIELD_BYTES); + sha512_update(ctx, a_pk, GOLDI_FIELD_BYTES); field_serialize(a_pk, prod); - sha512_update(&ctx, a_pk, GOLDI_FIELD_BYTES); + sha512_update(ctx, a_pk, GOLDI_FIELD_BYTES); #endif /* stir in the shared key and finish */ - sha512_update(&ctx, gxy, GOLDI_FIELD_BYTES); - sha512_final(&ctx, shared); + sha512_update(ctx, gxy, GOLDI_FIELD_BYTES); + sha512_final(ctx, shared); return (GOLDI_ECORRUPT & ~msucc) | (GOLDI_EINVAL & msucc &~ succ) @@ -340,12 +340,12 @@ goldilocks_derive_challenge( ) { /* challenge = H(pk, [nonceG], message). */ unsigned char sha_out[FIELD_HASH_BYTES]; - struct sha512_ctx_t ctx; - sha512_init(&ctx); - sha512_update(&ctx, pubkey, GOLDI_FIELD_BYTES); - sha512_update(&ctx, gnonce, GOLDI_FIELD_BYTES); - sha512_update(&ctx, message, message_len); - field_hash_final(&ctx, sha_out); + sha512_ctx_a_t ctx; + sha512_init(ctx); + sha512_update(ctx, pubkey, GOLDI_FIELD_BYTES); + sha512_update(ctx, gnonce, GOLDI_FIELD_BYTES); + sha512_update(ctx, message, message_len); + field_hash_final(ctx, sha_out); barrett_deserialize_and_reduce(challenge, sha_out, sizeof(sha_out), &curve_prime_order); } @@ -371,22 +371,22 @@ goldilocks_sign ( /* Derive a nonce. TODO: use HMAC. FUTURE: factor. */ unsigned char sha_out[FIELD_HASH_BYTES]; word_t tk[GOLDI_FIELD_WORDS]; - struct sha512_ctx_t ctx; - sha512_init(&ctx); - sha512_update(&ctx, (const unsigned char *)"signonce", 8); - sha512_update(&ctx, &privkey->opaque[2*GOLDI_FIELD_BYTES], GOLDI_SYMKEY_BYTES); - sha512_update(&ctx, message, message_len); - sha512_update(&ctx, &privkey->opaque[2*GOLDI_FIELD_BYTES], GOLDI_SYMKEY_BYTES); - field_hash_final(&ctx, sha_out); + sha512_ctx_a_t ctx; + sha512_init(ctx); + sha512_update(ctx, (const unsigned char *)"signonce", 8); + sha512_update(ctx, &privkey->opaque[2*GOLDI_FIELD_BYTES], GOLDI_SYMKEY_BYTES); + sha512_update(ctx, message, message_len); + sha512_update(ctx, &privkey->opaque[2*GOLDI_FIELD_BYTES], GOLDI_SYMKEY_BYTES); + field_hash_final(ctx, sha_out); barrett_deserialize_and_reduce(tk, sha_out, sizeof(sha_out), &curve_prime_order); /* 4[nonce]G */ uint8_t signature_tmp[GOLDI_FIELD_BYTES]; - struct tw_extensible_t exta; + tw_extensible_a_t exta; field_a_t gsk; - scalarmul_fixed_base(&exta, tk, GOLDI_SCALAR_BITS, &goldilocks_global.fixed_base); - double_tw_extensible(&exta); - untwist_and_double_and_serialize(gsk, &exta); + scalarmul_fixed_base(exta, tk, GOLDI_SCALAR_BITS, &goldilocks_global.fixed_base); + double_tw_extensible(exta); + untwist_and_double_and_serialize(gsk, exta); field_serialize(signature_tmp, gsk); word_t challenge[GOLDI_FIELD_WORDS]; @@ -450,21 +450,21 @@ goldilocks_verify ( goldilocks_derive_challenge(challenge, pubkey->opaque, signature, message, message_len); field_a_t eph; - struct tw_extensible_t pk_text; + tw_extensible_a_t pk_text; /* deserialize [nonce]G */ succ = field_deserialize(eph, signature); if (!succ) return GOLDI_EINVAL; - succ = deserialize_and_twist_approx(&pk_text, pk); + succ = deserialize_and_twist_approx(pk_text, pk); if (!succ) return GOLDI_EINVAL; - linear_combo_var_fixed_vt( &pk_text, + linear_combo_var_fixed_vt( pk_text, challenge, GOLDI_SCALAR_BITS, s, GOLDI_SCALAR_BITS, goldilocks_global.wnafs, WNAF_PRECMP_BITS ); - untwist_and_double_and_serialize( pk, &pk_text ); + untwist_and_double_and_serialize( pk, pk_text ); succ = field_eq(eph, pk); return succ ? 0 : GOLDI_EINVAL; @@ -483,7 +483,7 @@ goldilocks_precompute_public_key ( if (!precom) return NULL; - struct tw_extensible_t pk_text; + tw_extensible_a_t pk_text; field_a_t pk; mask_t succ = field_deserialize(pk, pub->opaque); @@ -492,13 +492,13 @@ goldilocks_precompute_public_key ( return NULL; } - succ = deserialize_and_twist_approx(&pk_text, pk); + succ = deserialize_and_twist_approx(pk_text, pk); if (!succ) { free(precom); return NULL; } - succ = precompute_fixed_base(&precom->table, &pk_text, + succ = precompute_fixed_base(&precom->table, pk_text, COMB_N, COMB_T, COMB_S, NULL); if (!succ) { free(precom); @@ -539,20 +539,20 @@ goldilocks_verify_precomputed ( goldilocks_derive_challenge(challenge, pubkey->pub.opaque, signature, message, message_len); field_a_t eph, pk; - struct tw_extensible_t pk_text; + tw_extensible_a_t pk_text; /* deserialize [nonce]G */ succ = field_deserialize(eph, signature); if (!succ) return GOLDI_EINVAL; succ = linear_combo_combs_vt ( - &pk_text, + pk_text, challenge, GOLDI_SCALAR_BITS, &pubkey->table, s, GOLDI_SCALAR_BITS, &goldilocks_global.fixed_base ); if (!succ) return GOLDI_EINVAL; - untwist_and_double_and_serialize( pk, &pk_text ); + untwist_and_double_and_serialize( pk, pk_text ); succ = field_eq(eph, pk); return succ ? 0 : GOLDI_EINVAL; diff --git a/src/include/crandom.h b/src/include/crandom.h index 90cc374..c9f4c26 100644 --- a/src/include/crandom.h +++ b/src/include/crandom.h @@ -39,6 +39,7 @@ struct crandom_state_t { int reseeds_mandatory; int randomfd; } __attribute__((aligned(16))) ; +typedef struct crandom_state_t crandom_state_a_t[1]; #ifdef __cplusplus extern "C" { @@ -64,7 +65,7 @@ extern "C" { */ int crandom_init_from_file ( - struct crandom_state_t *state, + crandom_state_a_t state, const char *filename, int reseed_interval, int reseeds_mandatory @@ -87,7 +88,7 @@ crandom_init_from_file ( */ void crandom_init_from_buffer ( - struct crandom_state_t *state, + crandom_state_a_t state, const char initial_seed[32] ); @@ -118,7 +119,7 @@ crandom_init_from_buffer ( */ int crandom_generate ( - struct crandom_state_t *state, + crandom_state_a_t state, unsigned char *output, unsigned long long length ); @@ -131,7 +132,7 @@ crandom_generate ( */ void crandom_destroy ( - struct crandom_state_t *state + crandom_state_a_t state ); #ifdef __cplusplus diff --git a/src/include/ec_point.h b/src/include/ec_point.h index 9d0f4f3..54ab9cb 100644 --- a/src/include/ec_point.h +++ b/src/include/ec_point.h @@ -20,23 +20,23 @@ extern "C" { /** * Affine point on an Edwards curve. */ -struct affine_t { +typedef struct affine_t { field_a_t x, y; -}; +} affine_a_t[1]; /** * Affine point on a twisted Edwards curve. */ -struct tw_affine_t { +typedef struct tw_affine_t { field_a_t x, y; -}; +} tw_affine_a_t[1]; /** * Montgomery buffer. */ -struct montgomery_t { +typedef struct montgomery_t { field_a_t z0, xd, zd, xa, za; -}; +} montgomery_a_t[1]; /** * Extensible coordinates for Edwards curves, suitable for @@ -56,36 +56,36 @@ struct montgomery_t { * lookahead trick. It might be worth considering that trick * instead. */ -struct extensible_t { +typedef struct extensible_t { field_a_t x, y, z, t, u; -}; +} extensible_a_t[1]; /** * Extensible coordinates for twisted Edwards curves, * suitable for accumulators. */ -struct tw_extensible_t { +typedef struct tw_extensible_t { field_a_t x, y, z, t, u; -}; +} tw_extensible_a_t[1]; /** * Niels coordinates for twisted Edwards curves. * * Good for mixed readdition; suitable for fixed tables. */ -struct tw_niels_t { +typedef struct tw_niels_t { field_a_t a, b, c; -}; +} tw_niels_a_t[1]; /** * Projective niels coordinates for twisted Edwards curves. * * Good for readdition; suitable for temporary tables. */ -struct tw_pniels_t { - struct tw_niels_t n; +typedef struct tw_pniels_t { + tw_niels_a_t n; field_a_t z; -}; +} tw_pniels_a_t[1]; /** @@ -93,8 +93,8 @@ struct tw_pniels_t { */ static __inline__ void copy_affine ( - struct affine_t* a, - const struct affine_t* ds + affine_a_t a, + const affine_a_t ds ) __attribute__((unused,always_inline)); /** @@ -102,8 +102,8 @@ copy_affine ( */ static __inline__ void copy_tw_affine ( - struct tw_affine_t* a, - const struct tw_affine_t* ds + tw_affine_a_t a, + const tw_affine_a_t ds ) __attribute__((unused,always_inline)); /** @@ -111,8 +111,8 @@ copy_tw_affine ( */ static __inline__ void copy_montgomery ( - struct montgomery_t* a, - const struct montgomery_t* ds + montgomery_a_t a, + const montgomery_a_t ds ) __attribute__((unused,always_inline)); /** @@ -120,8 +120,8 @@ copy_montgomery ( */ static __inline__ void copy_extensible ( - struct extensible_t* a, - const struct extensible_t* ds + extensible_a_t a, + const extensible_a_t ds ) __attribute__((unused,always_inline)); /** @@ -129,8 +129,8 @@ copy_extensible ( */ static __inline__ void copy_tw_extensible ( - struct tw_extensible_t* a, - const struct tw_extensible_t* ds + tw_extensible_a_t a, + const tw_extensible_a_t ds ) __attribute__((unused,always_inline)); /** @@ -138,8 +138,8 @@ copy_tw_extensible ( */ static __inline__ void copy_tw_niels ( - struct tw_niels_t* a, - const struct tw_niels_t* ds + tw_niels_a_t a, + const tw_niels_a_t ds ) __attribute__((unused,always_inline)); /** @@ -147,8 +147,8 @@ copy_tw_niels ( */ static __inline__ void copy_tw_pniels ( - struct tw_pniels_t* a, - const struct tw_pniels_t* ds + tw_pniels_a_t a, + const tw_pniels_a_t ds ) __attribute__((unused,always_inline)); /** @@ -157,8 +157,8 @@ copy_tw_pniels ( */ void add_tw_niels_to_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e + tw_extensible_a_t d, + const tw_niels_a_t e ); /** @@ -167,8 +167,8 @@ add_tw_niels_to_tw_extensible ( */ void sub_tw_niels_from_tw_extensible ( - struct tw_extensible_t* d, - const struct tw_niels_t* e + tw_extensible_a_t d, + const tw_niels_a_t e ); /** @@ -177,8 +177,8 @@ sub_tw_niels_from_tw_extensible ( */ void add_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a + tw_extensible_a_t e, + const tw_pniels_a_t a ); /** @@ -187,8 +187,8 @@ add_tw_pniels_to_tw_extensible ( */ void sub_tw_pniels_from_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* a + tw_extensible_a_t e, + const tw_pniels_a_t a ); /** @@ -196,7 +196,7 @@ sub_tw_pniels_from_tw_extensible ( */ void double_tw_extensible ( - struct tw_extensible_t* a + tw_extensible_a_t a ); /** @@ -204,7 +204,7 @@ double_tw_extensible ( */ void double_extensible ( - struct extensible_t* a + extensible_a_t a ); /** @@ -214,8 +214,8 @@ double_extensible ( */ void twist_and_double ( - struct tw_extensible_t* b, - const struct extensible_t* a + tw_extensible_a_t b, + const extensible_a_t a ); /** @@ -225,61 +225,61 @@ twist_and_double ( */ void untwist_and_double ( - struct extensible_t* b, - const struct tw_extensible_t* a + extensible_a_t b, + const tw_extensible_a_t a ); void convert_tw_affine_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_affine_t* a + tw_pniels_a_t b, + const tw_affine_a_t a ); void convert_tw_affine_to_tw_extensible ( - struct tw_extensible_t* b, - const struct tw_affine_t* a + tw_extensible_a_t b, + const tw_affine_a_t a ); void convert_affine_to_extensible ( - struct extensible_t* b, - const struct affine_t* a + extensible_a_t b, + const affine_a_t a ); void convert_tw_extensible_to_tw_pniels ( - struct tw_pniels_t* b, - const struct tw_extensible_t* a + tw_pniels_a_t b, + const tw_extensible_a_t a ); void convert_tw_pniels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_pniels_t* d + tw_extensible_a_t e, + const tw_pniels_a_t d ); void convert_tw_niels_to_tw_extensible ( - struct tw_extensible_t* e, - const struct tw_niels_t* d + tw_extensible_a_t e, + const tw_niels_a_t d ); void montgomery_step ( - struct montgomery_t* a + montgomery_a_t a ); void deserialize_montgomery ( - struct montgomery_t* a, + montgomery_a_t a, const field_a_t sbz ); mask_t serialize_montgomery ( field_a_t b, - const struct montgomery_t* a, + const montgomery_a_t a, const field_a_t sbz ); @@ -297,7 +297,7 @@ serialize_montgomery ( void serialize_extensible ( field_a_t b, - const struct extensible_t* a + const extensible_a_t a ); /** @@ -306,7 +306,7 @@ serialize_extensible ( void untwist_and_double_and_serialize ( field_a_t b, - const struct tw_extensible_t* a + const tw_extensible_a_t a ); /** @@ -320,8 +320,8 @@ untwist_and_double_and_serialize ( */ void twist_even ( - struct tw_extensible_t* b, - const struct extensible_t* a + tw_extensible_a_t b, + const extensible_a_t a ); /** @@ -339,8 +339,8 @@ twist_even ( */ void test_only_twist ( - struct tw_extensible_t* b, - const struct extensible_t* a + tw_extensible_a_t b, + const extensible_a_t a ); mask_t @@ -350,12 +350,12 @@ field_is_square ( mask_t is_even_pt ( - const struct extensible_t* a + const extensible_a_t a ); mask_t is_even_tw ( - const struct tw_extensible_t* a + const tw_extensible_a_t a ); /** @@ -363,7 +363,7 @@ is_even_tw ( */ mask_t deserialize_affine ( - struct affine_t* a, + affine_a_t a, const field_a_t sz ); @@ -376,52 +376,52 @@ deserialize_affine ( */ mask_t deserialize_and_twist_approx ( - struct tw_extensible_t* a, + tw_extensible_a_t a, const field_a_t sz ); void set_identity_extensible ( - struct extensible_t* a + extensible_a_t a ); void set_identity_tw_extensible ( - struct tw_extensible_t* a + tw_extensible_a_t a ); void set_identity_affine ( - struct affine_t* a + affine_a_t a ); mask_t eq_affine ( - const struct affine_t* a, - const struct affine_t* b + const affine_a_t a, + const affine_a_t b ); mask_t eq_extensible ( - const struct extensible_t* a, - const struct extensible_t* b + const extensible_a_t a, + const extensible_a_t b ); mask_t eq_tw_extensible ( - const struct tw_extensible_t* a, - const struct tw_extensible_t* b + const tw_extensible_a_t a, + const tw_extensible_a_t b ); void elligator_2s_inject ( - struct affine_t* a, + affine_a_t a, const field_a_t r ); mask_t validate_affine ( - const struct affine_t* a + const affine_a_t a ); /** @@ -431,7 +431,7 @@ validate_affine ( */ mask_t validate_tw_extensible ( - const struct tw_extensible_t* ext + const tw_extensible_a_t ext ); /** @@ -441,7 +441,7 @@ validate_tw_extensible ( */ mask_t validate_extensible ( - const struct extensible_t* ext + const extensible_a_t ext ); /** @@ -450,7 +450,7 @@ validate_extensible ( static __inline__ void __attribute__((unused)) cond_negate_tw_niels ( - struct tw_niels_t *n, + tw_niels_a_t n, mask_t doNegate ) { constant_time_cond_swap(n->a, n->b, sizeof(n->a), doNegate); @@ -463,16 +463,16 @@ cond_negate_tw_niels ( static __inline__ void __attribute__((unused)) cond_negate_tw_pniels ( - struct tw_pniels_t *n, + tw_pniels_a_t n, mask_t doNegate ) { - cond_negate_tw_niels(&n->n, doNegate); + cond_negate_tw_niels(n->n, doNegate); } void copy_affine ( - struct affine_t* a, - const struct affine_t* ds + affine_a_t a, + const affine_a_t ds ) { field_copy ( a->x, ds->x ); field_copy ( a->y, ds->y ); @@ -480,8 +480,8 @@ copy_affine ( void copy_tw_affine ( - struct tw_affine_t* a, - const struct tw_affine_t* ds + tw_affine_a_t a, + const tw_affine_a_t ds ) { field_copy ( a->x, ds->x ); field_copy ( a->y, ds->y ); @@ -489,8 +489,8 @@ copy_tw_affine ( void copy_montgomery ( - struct montgomery_t* a, - const struct montgomery_t* ds + montgomery_a_t a, + const montgomery_a_t ds ) { field_copy ( a->z0, ds->z0 ); field_copy ( a->xd, ds->xd ); @@ -501,8 +501,8 @@ copy_montgomery ( void copy_extensible ( - struct extensible_t* a, - const struct extensible_t* ds + extensible_a_t a, + const extensible_a_t ds ) { field_copy ( a->x, ds->x ); field_copy ( a->y, ds->y ); @@ -513,8 +513,8 @@ copy_extensible ( void copy_tw_extensible ( - struct tw_extensible_t* a, - const struct tw_extensible_t* ds + tw_extensible_a_t a, + const tw_extensible_a_t ds ) { field_copy ( a->x, ds->x ); field_copy ( a->y, ds->y ); @@ -525,8 +525,8 @@ copy_tw_extensible ( void copy_tw_niels ( - struct tw_niels_t* a, - const struct tw_niels_t* ds + tw_niels_a_t a, + const tw_niels_a_t ds ) { field_copy ( a->a, ds->a ); field_copy ( a->b, ds->b ); @@ -535,10 +535,10 @@ copy_tw_niels ( void copy_tw_pniels ( - struct tw_pniels_t* a, - const struct tw_pniels_t* ds + tw_pniels_a_t a, + const tw_pniels_a_t ds ) { - copy_tw_niels( &a->n, &ds->n ); + copy_tw_niels( a->n, ds->n ); field_copy ( a->z, ds->z ); } diff --git a/src/include/magic.h b/src/include/magic.h index 1627a6b..1d186f2 100644 --- a/src/include/magic.h +++ b/src/include/magic.h @@ -50,7 +50,7 @@ extern const field_a_t sqrt_d_minus_1; /** * @brief The base point for Goldilocks. */ -extern const struct affine_t goldilocks_base_point; +extern const affine_a_t goldilocks_base_point; /** * @brief The Goldilocks prime subgroup order. diff --git a/src/include/scalarmul.h b/src/include/scalarmul.h index ecb1782..dab8a99 100644 --- a/src/include/scalarmul.h +++ b/src/include/scalarmul.h @@ -30,7 +30,7 @@ typedef word_t scalar_t[SCALAR_WORDS]; */ struct fixed_base_table_t { /** Comb tables containing multiples of the base point. */ - struct tw_niels_t *table; + tw_niels_a_t *table; /** Adjustments to the scalar in even and odd cases, respectively. */ word_t scalar_adjustments[2*SCALAR_WORDS]; @@ -109,7 +109,7 @@ montgomery_ladder ( */ void scalarmul ( - struct tw_extensible_t *working, + tw_extensible_a_t working, const word_t scalar[SCALAR_WORDS] /* TODO? int nbits */ ); @@ -130,7 +130,7 @@ scalarmul ( */ void scalarmul_vlook ( - struct tw_extensible_t *working, + tw_extensible_a_t working, const word_t scalar[SCALAR_WORDS] ); @@ -161,11 +161,11 @@ scalarmul_vlook ( mask_t precompute_fixed_base ( struct fixed_base_table_t *out, - const struct tw_extensible_t *base, + const tw_extensible_a_t base, unsigned int n, unsigned int t, unsigned int s, - struct tw_niels_t *prealloc + tw_niels_a_t *prealloc ) __attribute__((warn_unused_result)); /** @@ -197,7 +197,7 @@ destroy_fixed_base ( */ mask_t scalarmul_fixed_base ( - struct tw_extensible_t *out, + tw_extensible_a_t out, const word_t *scalar, unsigned int nbits, const struct fixed_base_table_t *table @@ -215,7 +215,7 @@ scalarmul_fixed_base ( */ void scalarmul_vt ( - struct tw_extensible_t *working, + tw_extensible_a_t working, const word_t *scalar, unsigned int nbits ); @@ -236,8 +236,8 @@ scalarmul_vt ( */ mask_t precompute_fixed_base_wnaf ( - struct tw_niels_t *out, - const struct tw_extensible_t *base, + tw_niels_a_t *out, + const tw_extensible_a_t base, unsigned int tbits ) __attribute__((warn_unused_result)); @@ -256,10 +256,10 @@ precompute_fixed_base_wnaf ( */ void scalarmul_fixed_base_wnaf_vt ( - struct tw_extensible_t *out, + tw_extensible_a_t out, const word_t *scalar, unsigned int nbits, - const struct tw_niels_t *precmp, + const tw_niels_a_t *precmp, unsigned int table_bits ); @@ -281,12 +281,12 @@ scalarmul_fixed_base_wnaf_vt ( */ void linear_combo_var_fixed_vt ( - struct tw_extensible_t *working, + tw_extensible_a_t working, const word_t scalar_var[SCALAR_WORDS], unsigned int nbits_var, const word_t scalar_pre[SCALAR_WORDS], unsigned int nbits_pre, - const struct tw_niels_t *precmp, + const tw_niels_a_t *precmp, unsigned int table_bits_pre ); @@ -309,7 +309,7 @@ linear_combo_var_fixed_vt ( */ mask_t linear_combo_combs_vt ( - struct tw_extensible_t *out, + tw_extensible_a_t out, const word_t scalar1[SCALAR_WORDS], unsigned int nbits1, const struct fixed_base_table_t *table1, diff --git a/src/include/sha512.h b/src/include/sha512.h index 760e31e..807f73d 100644 --- a/src/include/sha512.h +++ b/src/include/sha512.h @@ -17,28 +17,28 @@ extern "C" { * * This structure is opaque. */ -struct sha512_ctx_t { +typedef struct { /** @privatesection */ uint64_t chain[8]; uint8_t block[128]; uint64_t nbytes; -}; +} sha512_ctx_a_t[1]; void sha512_init ( - struct sha512_ctx_t *ctx + sha512_ctx_a_t ctx ); void sha512_update ( - struct sha512_ctx_t *ctx, + sha512_ctx_a_t ctx, const unsigned char *data, uint64_t bytes ); void sha512_final ( - struct sha512_ctx_t *ctx, + sha512_ctx_a_t ctx, uint8_t result[SHA512_OUTPUT_BYTES] ); diff --git a/src/p448/magic.c b/src/p448/magic.c index 20c5fa5..74b433c 100644 --- a/src/p448/magic.c +++ b/src/p448/magic.c @@ -33,7 +33,7 @@ const word_t SCALARMUL_FIXED_WINDOW_ADJUSTMENT[2*SCALAR_WORDS] = { U64LE(0x0000000000000000) }; -const struct affine_t goldilocks_base_point = { +const affine_a_t goldilocks_base_point = {{ #ifdef USE_NEON_PERM {{{ 0xaed939f,0xc59d070,0xf0de840,0x5f065c3, 0xf4ba0c7,0xdf73324,0xc170033,0x3a6a26a, 0x4c63d96,0x4609845,0xf3932d9,0x1b4faff, 0x6147eaa,0xa2692ff,0x9cecfa9,0x297ea0e @@ -46,7 +46,7 @@ const struct affine_t goldilocks_base_point = { }}}, #endif {{{ 19 }}} -}; +}}; static const word_t curve_prime_order_lo[(224+WORD_BITS-1)/WORD_BITS] = { U64LE(0xdc873d6d54a7bb0d), diff --git a/src/p480/magic.c b/src/p480/magic.c index 8615071..7ae8304 100644 --- a/src/p480/magic.c +++ b/src/p480/magic.c @@ -35,7 +35,7 @@ const word_t SCALARMUL_FIXED_WINDOW_ADJUSTMENT[2*SCALAR_WORDS] = { 0x00000000 }; -const struct affine_t goldilocks_base_point = { +const affine_a_t goldilocks_base_point = {{ {{{ U60LE(0x849ff7f845c30d3), U60LE(0x7dda488553a4c5b), @@ -47,7 +47,7 @@ const struct affine_t goldilocks_base_point = { U60LE(0x7ca42af3d564280) }}}, {{{ 5 }}} -}; +}}; static const word_t curve_prime_order_lo[(240+WORD_BITS-1)/WORD_BITS] = { U64LE(0x72e70941cf8da597), diff --git a/src/p521/magic.c b/src/p521/magic.c index f8ab264..4613958 100644 --- a/src/p521/magic.c +++ b/src/p521/magic.c @@ -38,7 +38,7 @@ const word_t SCALARMUL_FIXED_WINDOW_ADJUSTMENT[2*SCALAR_WORDS] = { 0x0 }; -const struct affine_t goldilocks_base_point = { +const affine_a_t goldilocks_base_point = {{ {{{ #ifdef USE_P521_3x3_TRANSPOSE U58LE(0x02a940a2f19ba6c), @@ -66,7 +66,7 @@ const struct affine_t goldilocks_base_point = { #endif }}}, {{{ 12 }}} -}; +}}; static const word_t curve_prime_order_lo[(261+WORD_BITS-1)/WORD_BITS] = { U64LE(0xbf15dbca0ae7f295), diff --git a/src/scalarmul.c b/src/scalarmul.c index cf95984..12925b2 100644 --- a/src/scalarmul.c +++ b/src/scalarmul.c @@ -21,8 +21,8 @@ montgomery_ladder ( unsigned int nbits, unsigned int n_extra_doubles ) { - struct montgomery_t mont; - deserialize_montgomery(&mont, in); + montgomery_a_t mont; + deserialize_montgomery(mont, in); int i,j,n=(nbits-1)%WORD_BITS; mask_t pflip = 0; @@ -30,29 +30,29 @@ montgomery_ladder ( word_t w = scalar[j]; for (i=n; i>=0; i--) { mask_t flip = -((w>>i)&1); - constant_time_cond_swap(mont.xa,mont.xd,sizeof(mont.xd),flip^pflip); - constant_time_cond_swap(mont.za,mont.zd,sizeof(mont.xd),flip^pflip); - montgomery_step(&mont); + constant_time_cond_swap(mont->xa,mont->xd,sizeof(mont->xd),flip^pflip); + constant_time_cond_swap(mont->za,mont->zd,sizeof(mont->xd),flip^pflip); + montgomery_step(mont); pflip = flip; } n = WORD_BITS-1; } - constant_time_cond_swap(mont.xa,mont.xd,sizeof(mont.xd),pflip); - constant_time_cond_swap(mont.za,mont.zd,sizeof(mont.xd),pflip); + constant_time_cond_swap(mont->xa,mont->xd,sizeof(mont->xd),pflip); + constant_time_cond_swap(mont->za,mont->zd,sizeof(mont->xd),pflip); assert(n_extra_doubles < INT_MAX); for (j=0; j<(int)n_extra_doubles; j++) { - montgomery_step(&mont); + montgomery_step(mont); } - return serialize_montgomery(out, &mont, in); + return serialize_montgomery(out, mont, in); } static __inline__ void __attribute__((unused,always_inline)) constant_time_lookup_tw_pniels ( - struct tw_pniels_t *out, - const struct tw_pniels_t *in, + tw_pniels_a_t out, + const tw_pniels_a_t *in, int nin, int idx ) { @@ -62,8 +62,8 @@ constant_time_lookup_tw_pniels ( static __inline__ void __attribute__((unused,always_inline)) constant_time_lookup_tw_niels ( - struct tw_niels_t *out, - const struct tw_niels_t *in, + tw_niels_a_t out, + const tw_niels_a_t *in, int nin, int idx ) { @@ -73,8 +73,8 @@ constant_time_lookup_tw_niels ( /* static __inline__ void constant_time_lookup_tw_pniels ( - struct tw_pniels_t *out, - const struct tw_pniels_t *in, + tw_pniels_a_t out, + const tw_pniels_a_t in, int nin, int idx ) { @@ -95,8 +95,8 @@ constant_time_lookup_tw_pniels ( static __inline__ void constant_time_lookup_tw_niels ( - struct tw_niels_t *out, - const struct tw_niels_t *in, + tw_niels_a_t out, + const tw_niels_a_t in, int nin, int idx ) { @@ -145,7 +145,7 @@ convert_to_signed_window_form ( void scalarmul ( - struct tw_extensible_t *working, + tw_extensible_a_t working, const word_t scalar[SCALAR_WORDS] ) { const int WINDOW = SCALARMUL_FIXED_WINDOW_SIZE, @@ -159,20 +159,20 @@ scalarmul ( SCALARMUL_FIXED_WINDOW_ADJUSTMENT, SCALAR_WORDS ); - struct tw_extensible_t tabulator; - copy_tw_extensible(&tabulator, working); - double_tw_extensible(&tabulator); + tw_extensible_a_t tabulator; + copy_tw_extensible(tabulator, working); + double_tw_extensible(tabulator); - struct tw_pniels_t - pn VECTOR_ALIGNED, - multiples[NTABLE] VECTOR_ALIGNED; - convert_tw_extensible_to_tw_pniels(&pn, &tabulator); - convert_tw_extensible_to_tw_pniels(&multiples[0], working); + tw_pniels_a_t + pn VECTOR_ALIGNED, + multiples[NTABLE] VECTOR_ALIGNED; + convert_tw_extensible_to_tw_pniels(pn, tabulator); + convert_tw_extensible_to_tw_pniels(multiples[0], working); int i,j; for (i=1; i>(WINDOW-1))-1; bits ^= inv; - constant_time_lookup_tw_pniels(&pn, multiples, NTABLE, bits & WINDOW_T_MASK); - cond_negate_tw_pniels(&pn, inv); - convert_tw_pniels_to_tw_extensible(working, &pn); + constant_time_lookup_tw_pniels(pn, multiples, NTABLE, bits & WINDOW_T_MASK); + cond_negate_tw_pniels(pn, inv); + convert_tw_pniels_to_tw_extensible(working, pn); for (i-=WINDOW; i>=0; i-=WINDOW) { @@ -200,15 +200,15 @@ scalarmul ( inv = (bits>>(WINDOW-1))-1; bits ^= inv; - constant_time_lookup_tw_pniels(&pn, multiples, NTABLE, bits & WINDOW_T_MASK); - cond_negate_tw_pniels(&pn, inv); - add_tw_pniels_to_tw_extensible(working, &pn); + constant_time_lookup_tw_pniels(pn, multiples, NTABLE, bits & WINDOW_T_MASK); + cond_negate_tw_pniels(pn, inv); + add_tw_pniels_to_tw_extensible(working, pn); } } void scalarmul_vlook ( - struct tw_extensible_t *working, + tw_extensible_a_t working, const word_t scalar[SCALAR_WORDS] ) { const int WINDOW = SCALARMUL_FIXED_WINDOW_SIZE, @@ -223,20 +223,20 @@ scalarmul_vlook ( ); - struct tw_extensible_t tabulator; - copy_tw_extensible(&tabulator, working); - double_tw_extensible(&tabulator); + tw_extensible_a_t tabulator; + copy_tw_extensible(tabulator, working); + double_tw_extensible(tabulator); - struct tw_pniels_t - pn VECTOR_ALIGNED, - multiples[NTABLE] VECTOR_ALIGNED; - convert_tw_extensible_to_tw_pniels(&pn, &tabulator); - convert_tw_extensible_to_tw_pniels(&multiples[0], working); + tw_pniels_a_t + pn VECTOR_ALIGNED, + multiples[NTABLE] VECTOR_ALIGNED; + convert_tw_extensible_to_tw_pniels(pn, tabulator); + convert_tw_extensible_to_tw_pniels(multiples[0], working); int i,j; for (i=1; i>(WINDOW-1))-1; bits ^= inv; - copy_tw_pniels(&pn, &multiples[bits & WINDOW_T_MASK]); - cond_negate_tw_pniels(&pn, inv); - convert_tw_pniels_to_tw_extensible(working, &pn); + copy_tw_pniels(pn, multiples[bits & WINDOW_T_MASK]); + cond_negate_tw_pniels(pn, inv); + convert_tw_pniels_to_tw_extensible(working, pn); for (i-=WINDOW; i>=0; i-=WINDOW) { @@ -264,9 +264,9 @@ scalarmul_vlook ( inv = (bits>>(WINDOW-1))-1; bits ^= inv; - copy_tw_pniels(&pn, &multiples[bits & WINDOW_T_MASK]); - cond_negate_tw_pniels(&pn, inv); - add_tw_pniels_to_tw_extensible(working, &pn); + copy_tw_pniels(pn, multiples[bits & WINDOW_T_MASK]); + cond_negate_tw_pniels(pn, inv); + add_tw_pniels_to_tw_extensible(working, pn); } } @@ -275,7 +275,7 @@ schedule_scalar_for_combs ( word_t *scalar2, const word_t *scalar, unsigned int nbits, - const struct fixed_base_table_t *table + const struct fixed_base_table_t* table ) { unsigned int i; unsigned int n = table->n, t = table->t, s = table->s; @@ -312,10 +312,10 @@ schedule_scalar_for_combs ( mask_t scalarmul_fixed_base ( - struct tw_extensible_t *out, + tw_extensible_a_t out, const word_t scalar[SCALAR_WORDS], unsigned int nbits, - const struct fixed_base_table_t *table + const struct fixed_base_table_t* table ) { unsigned int i,j,k; unsigned int n = table->n, t = table->t, s = table->s; @@ -332,7 +332,7 @@ scalarmul_fixed_base ( assert(t >= 1); #endif - struct tw_niels_t ni; + tw_niels_a_t ni; for (i=0; itable + (j<<(t-1)), 1<<(t-1), tab); - cond_negate_tw_niels(&ni, invert); + constant_time_lookup_tw_niels(ni, table->table + (j<<(t-1)), 1<<(t-1), tab); + cond_negate_tw_niels(ni, invert); if (i||j) { - add_tw_niels_to_tw_extensible(out, &ni); + add_tw_niels_to_tw_extensible(out, ni); } else { - convert_tw_niels_to_tw_extensible(out, &ni); + convert_tw_niels_to_tw_extensible(out, ni); } } } @@ -370,13 +370,13 @@ scalarmul_fixed_base ( mask_t linear_combo_combs_vt ( - struct tw_extensible_t *out, + tw_extensible_a_t out, const word_t scalar1[SCALAR_WORDS], unsigned int nbits1, - const struct fixed_base_table_t *table1, + const struct fixed_base_table_t* table1, const word_t scalar2[SCALAR_WORDS], unsigned int nbits2, - const struct fixed_base_table_t *table2 + const struct fixed_base_table_t* table2 ) { unsigned int i,j,k,sc; unsigned int s1 = table1->s, s2 = table2->s, smax = (s1 > s2) ? s1 : s2; @@ -402,7 +402,7 @@ linear_combo_combs_vt ( assert(table2->t >= 1); #endif - struct tw_niels_t ni; + tw_niels_a_t ni; unsigned int swords[2] = {scalar1b_words, scalar2b_words}, started = 0; word_t *scalars[2] = {scalar1b,scalar2b}; @@ -411,7 +411,7 @@ linear_combo_combs_vt ( if (i) double_tw_extensible(out); for (sc=0; sc<2; sc++) { - const struct fixed_base_table_t *table = sc ? table2 : table1; + const struct fixed_base_table_t* table = sc ? table2 : table1; int ii = i-smax+table->s; if (ii < 0) continue; @@ -432,13 +432,13 @@ linear_combo_combs_vt ( tab ^= invert; tab &= (1<<(table->t-1)) - 1; - copy_tw_niels(&ni, &table->table[tab + (j<<(table->t-1))]); - cond_negate_tw_niels(&ni,invert); + copy_tw_niels(ni, table->table[tab + (j<<(table->t-1))]); + cond_negate_tw_niels(ni,invert); if (started) { - add_tw_niels_to_tw_extensible(out, &ni); + add_tw_niels_to_tw_extensible(out, ni); } else { - convert_tw_niels_to_tw_extensible(out, &ni); + convert_tw_niels_to_tw_extensible(out, ni); started = 1; } @@ -454,12 +454,12 @@ linear_combo_combs_vt ( mask_t precompute_fixed_base ( - struct fixed_base_table_t *out, - const struct tw_extensible_t *base, + struct fixed_base_table_t* out, + const tw_extensible_a_t base, unsigned int n, unsigned int t, unsigned int s, - struct tw_niels_t *prealloc + tw_niels_a_t *prealloc ) { if (s < 1 || t < 1 || n < 1 || n*t*s < SCALAR_BITS) { really_memset(out, 0, sizeof(*out)); @@ -470,19 +470,19 @@ precompute_fixed_base ( out->t = t; out->s = s; - struct tw_extensible_t working, start; - copy_tw_extensible(&working, base); - struct tw_pniels_t pn_tmp; + tw_extensible_a_t working, start; + copy_tw_extensible(working, base); + tw_pniels_a_t pn_tmp; - struct tw_pniels_t *doubles = (struct tw_pniels_t *) malloc_vector(sizeof(*doubles) * (t-1)); + tw_pniels_a_t *doubles = (tw_pniels_a_t *) malloc_vector(sizeof(*doubles) * (t-1)); field_a_t *zs = (field_a_t *) malloc_vector(sizeof(*zs) * (n<<(t-1))); field_a_t *zis = (field_a_t *) malloc_vector(sizeof(*zis) * (n<<(t-1))); - struct tw_niels_t *table = prealloc; + tw_niels_a_t *table = prealloc; if (prealloc) { out->own_table = 0; } else { - table = (struct tw_niels_t *) malloc_vector(sizeof(*table) * (n<<(t-1))); + table = (tw_niels_a_t *) malloc_vector(sizeof(*table) * (n<<(t-1))); out->own_table = 1; } out->table = table; @@ -535,23 +535,23 @@ precompute_fixed_base ( /* doubling phase */ for (j=0; j>1); int idx = (((i+1)<<(t-1))-1) ^ gray; - convert_tw_extensible_to_tw_pniels(&pn_tmp, &start); - copy_tw_niels(&table[idx], &pn_tmp.n); - field_copy(zs[idx], pn_tmp.z); + convert_tw_extensible_to_tw_pniels(pn_tmp, start); + copy_tw_niels(table[idx], pn_tmp->n); + field_copy(zs[idx], pn_tmp->z); if (j >= (1u<<(t-1)) - 1) break; int delta = (j+1) ^ ((j+1)>>1) ^ gray; @@ -572,10 +572,10 @@ precompute_fixed_base ( if (gray & (1<a, zis[i]); field_strong_reduce(product); - field_copy(table[i].a, product); + field_copy(table[i]->a, product); - field_mul(product, table[i].b, zis[i]); + field_mul(product, table[i]->b, zis[i]); field_strong_reduce(product); - field_copy(table[i].b, product); + field_copy(table[i]->b, product); - field_mul(product, table[i].c, zis[i]); + field_mul(product, table[i]->c, zis[i]); field_strong_reduce(product); - field_copy(table[i].c, product); + field_copy(table[i]->c, product); } mask_t ret = ~field_is_zero(zis[0]); @@ -617,7 +617,7 @@ precompute_fixed_base ( void destroy_fixed_base ( - struct fixed_base_table_t *table + struct fixed_base_table_t* table ) { if (table->table) { really_memset(table->table,0,sizeof(*table->table)*(table->n<<(table->t-1))); @@ -630,8 +630,8 @@ destroy_fixed_base ( mask_t precompute_fixed_base_wnaf ( - struct tw_niels_t *out, - const struct tw_extensible_t *const_base, + tw_niels_a_t *out, + const tw_extensible_a_t const_base, unsigned int tbits ) { int i; @@ -644,29 +644,29 @@ precompute_fixed_base_wnaf ( return 0; } - struct tw_extensible_t base; - copy_tw_extensible(&base,const_base); + tw_extensible_a_t base; + copy_tw_extensible(base,const_base); - struct tw_pniels_t twop, tmp; + tw_pniels_a_t twop, tmp; - convert_tw_extensible_to_tw_pniels(&tmp, &base); - field_copy(zs[0], tmp.z); - copy_tw_niels(&out[0], &tmp.n); + convert_tw_extensible_to_tw_pniels(tmp, base); + field_copy(zs[0], tmp->z); + copy_tw_niels(out[0], tmp->n); if (tbits > 0) { - double_tw_extensible(&base); - convert_tw_extensible_to_tw_pniels(&twop, &base); - add_tw_pniels_to_tw_extensible(&base, &tmp); + double_tw_extensible(base); + convert_tw_extensible_to_tw_pniels(twop, base); + add_tw_pniels_to_tw_extensible(base, tmp); - convert_tw_extensible_to_tw_pniels(&tmp, &base); - field_copy(zs[1], tmp.z); - copy_tw_niels(&out[1], &tmp.n); + convert_tw_extensible_to_tw_pniels(tmp, base); + field_copy(zs[1], tmp->z); + copy_tw_niels(out[1], tmp->n); for (i=2; i < 1<z); + copy_tw_niels(out[i], tmp->n); } } @@ -674,17 +674,17 @@ precompute_fixed_base_wnaf ( field_a_t product; for (i=0; i<1<a, zis[i]); field_strong_reduce(product); - field_copy(out[i].a, product); + field_copy(out[i]->a, product); - field_mul(product, out[i].b, zis[i]); + field_mul(product, out[i]->b, zis[i]); field_strong_reduce(product); - field_copy(out[i].b, product); + field_copy(out[i]->b, product); - field_mul(product, out[i].c, zis[i]); + field_mul(product, out[i]->c, zis[i]); field_strong_reduce(product); - field_copy(out[i].c, product); + field_copy(out[i]->c, product); } free(zs); @@ -760,31 +760,31 @@ recode_wnaf( static void prepare_wnaf_table( - struct tw_pniels_t *output, - struct tw_extensible_t *working, + tw_pniels_a_t *output, + tw_extensible_a_t working, unsigned int tbits ) { int i; - convert_tw_extensible_to_tw_pniels(&output[0], working); + convert_tw_extensible_to_tw_pniels(output[0], working); if (tbits == 0) return; double_tw_extensible(working); - struct tw_pniels_t twop; - convert_tw_extensible_to_tw_pniels(&twop, working); + tw_pniels_a_t twop; + convert_tw_extensible_to_tw_pniels(twop, working); - add_tw_pniels_to_tw_extensible(working, &output[0]); - convert_tw_extensible_to_tw_pniels(&output[1], working); + add_tw_pniels_to_tw_extensible(working, output[0]); + convert_tw_extensible_to_tw_pniels(output[1], working); for (i=2; i < 1< 0) { assert(control[0].addend > 0); assert(control[0].power >= 0); - convert_tw_pniels_to_tw_extensible(working, &precmp[control[0].addend >> 1]); + convert_tw_pniels_to_tw_extensible(working, precmp[control[0].addend >> 1]); } else { set_identity_tw_extensible(working); return; @@ -813,9 +813,9 @@ scalarmul_vt ( assert(control[conti].addend); if (control[conti].addend > 0) { - add_tw_pniels_to_tw_extensible(working, &precmp[control[conti].addend >> 1]); + add_tw_pniels_to_tw_extensible(working, precmp[control[conti].addend >> 1]); } else { - sub_tw_pniels_from_tw_extensible(working, &precmp[(-control[conti].addend) >> 1]); + sub_tw_pniels_from_tw_extensible(working, precmp[(-control[conti].addend) >> 1]); } conti++; assert(conti <= control_bits); @@ -825,10 +825,10 @@ scalarmul_vt ( void scalarmul_fixed_base_wnaf_vt ( - struct tw_extensible_t *working, + tw_extensible_a_t working, const word_t scalar[SCALAR_WORDS], unsigned int nbits, - const struct tw_niels_t *precmp, + const tw_niels_a_t *precmp, unsigned int table_bits ) { struct smvt_control control[nbits/(table_bits+1)+3]; @@ -838,7 +838,7 @@ scalarmul_fixed_base_wnaf_vt ( if (control_bits > 0) { assert(control[0].addend > 0); assert(control[0].power >= 0); - convert_tw_niels_to_tw_extensible(working, &precmp[control[0].addend >> 1]); + convert_tw_niels_to_tw_extensible(working, precmp[control[0].addend >> 1]); } else { set_identity_tw_extensible(working); return; @@ -853,9 +853,9 @@ scalarmul_fixed_base_wnaf_vt ( assert(control[conti].addend); if (control[conti].addend > 0) { - add_tw_niels_to_tw_extensible(working, &precmp[control[conti].addend >> 1]); + add_tw_niels_to_tw_extensible(working, precmp[control[conti].addend >> 1]); } else { - sub_tw_niels_from_tw_extensible(working, &precmp[(-control[conti].addend) >> 1]); + sub_tw_niels_from_tw_extensible(working, precmp[(-control[conti].addend) >> 1]); } } @@ -866,12 +866,12 @@ scalarmul_fixed_base_wnaf_vt ( void linear_combo_var_fixed_vt( - struct tw_extensible_t *working, + tw_extensible_a_t working, const word_t scalar_var[SCALAR_WORDS], unsigned int nbits_var, const word_t scalar_pre[SCALAR_WORDS], unsigned int nbits_pre, - const struct tw_niels_t *precmp, + const tw_niels_a_t *precmp, unsigned int table_bits_pre ) { const int table_bits_var = SCALARMUL_WNAF_COMBO_TABLE_BITS; @@ -883,22 +883,22 @@ linear_combo_var_fixed_vt( (void)ncb_var; (void)ncb_pre; - struct tw_pniels_t precmp_var[1< control_pre[0].power) { - convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]); + convert_tw_pniels_to_tw_extensible(working, precmp_var[control_var[0].addend >> 1]); contv++; } else if (i == control_pre[0].power && i >=0 ) { - convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]); - add_tw_niels_to_tw_extensible(working, &precmp[control_pre[0].addend >> 1]); + convert_tw_pniels_to_tw_extensible(working, precmp_var[control_var[0].addend >> 1]); + add_tw_niels_to_tw_extensible(working, precmp[control_pre[0].addend >> 1]); contv++; contp++; } else { i = control_pre[0].power; - convert_tw_niels_to_tw_extensible(working, &precmp[control_pre[0].addend >> 1]); + convert_tw_niels_to_tw_extensible(working, precmp[control_pre[0].addend >> 1]); contp++; } @@ -914,9 +914,9 @@ linear_combo_var_fixed_vt( assert(control_var[contv].addend); if (control_var[contv].addend > 0) { - add_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[contv].addend >> 1]); + add_tw_pniels_to_tw_extensible(working, precmp_var[control_var[contv].addend >> 1]); } else { - sub_tw_pniels_from_tw_extensible(working, &precmp_var[(-control_var[contv].addend) >> 1]); + sub_tw_pniels_from_tw_extensible(working, precmp_var[(-control_var[contv].addend) >> 1]); } contv++; } @@ -925,9 +925,9 @@ linear_combo_var_fixed_vt( assert(control_pre[contp].addend); if (control_pre[contp].addend > 0) { - add_tw_niels_to_tw_extensible(working, &precmp[control_pre[contp].addend >> 1]); + add_tw_niels_to_tw_extensible(working, precmp[control_pre[contp].addend >> 1]); } else { - sub_tw_niels_from_tw_extensible(working, &precmp[(-control_pre[contp].addend) >> 1]); + sub_tw_niels_from_tw_extensible(working, precmp[(-control_pre[contp].addend) >> 1]); } contp++; } diff --git a/src/sha512.c b/src/sha512.c index 82f81ad..9a11bd0 100644 --- a/src/sha512.c +++ b/src/sha512.c @@ -72,7 +72,7 @@ static inline uint64_t maj(uint64_t h1, uint64_t h2, uint64_t h3) { static void sha512_process_block ( - struct sha512_ctx_t *ctx + sha512_ctx_a_t ctx ) { uint64_t i, tmp, a, b, *w = (uint64_t *) ctx->block, @@ -119,7 +119,7 @@ sha512_process_block ( void sha512_init ( - struct sha512_ctx_t *ctx + sha512_ctx_a_t ctx ) { ctx->nbytes = 0; memcpy(ctx->chain, sha512_init_state, sizeof(sha512_init_state)); @@ -128,7 +128,7 @@ sha512_init ( void sha512_update ( - struct sha512_ctx_t *ctx, + sha512_ctx_a_t ctx, const unsigned char *data, uint64_t bytes ) { @@ -153,7 +153,7 @@ sha512_update ( void sha512_final ( - struct sha512_ctx_t *ctx, + sha512_ctx_a_t ctx, uint8_t result[64] ) { uint64_t fill = ctx->nbytes % 128, i; diff --git a/test/bench.c b/test/bench.c index 350415b..2c16c65 100644 --- a/test/bench.c +++ b/test/bench.c @@ -146,19 +146,19 @@ int main(int argc, char **argv) { when = now() - when; printf("rand448: %5.1fns\n", when * 1e9 / i); - struct sha512_ctx_t sha; + sha512_ctx_a_t sha; uint8_t hashout[128]; when = now(); for (i=0; i Date: Thu, 22 Jan 2015 17:45:36 -0800 Subject: [PATCH 06/15] fix performance regression for montgomery ladder --- src/ec_point.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ec_point.c b/src/ec_point.c index 2582372..613a12e 100644 --- a/src/ec_point.c +++ b/src/ec_point.c @@ -249,20 +249,20 @@ montgomery_step ( ANALYZE_THIS_ROUTINE_CAREFULLY; field_a_t L0, L1; field_add_nr ( L0, a->zd, a->xd ); - field_sub ( L1, a->xd, a->zd ); - field_sub ( a->zd, a->xa, a->za ); + field_subx_nr ( L1, a->xd, a->zd ); + field_subx_nr ( a->zd, a->xa, a->za ); field_mul ( a->xd, L0, a->zd ); field_add_nr ( a->zd, a->za, a->xa ); field_mul ( a->za, L1, a->zd ); field_add_nr ( a->xa, a->za, a->xd ); field_sqr ( a->zd, a->xa ); field_mul ( a->xa, a->z0, a->zd ); - field_sub ( a->zd, a->xd, a->za ); + field_subx_nr ( a->zd, a->xd, a->za ); field_sqr ( a->za, a->zd ); field_sqr ( a->xd, L0 ); field_sqr ( L0, L1 ); field_mulw_scc ( a->zd, a->xd, 1-EDWARDS_D ); /* FIXME PERF MULW */ - field_sub ( L1, a->xd, L0 ); + field_subx_nr ( L1, a->xd, L0 ); field_mul ( a->xd, L0, a->zd ); field_sub_nr ( L0, a->zd, L1 ); field_bias ( L0, 4 - 2*is32 /*is32 ? 2 : 4*/ ); From d2e5e0fe8ccb343f5e5675f388b49a3ebe454640 Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Thu, 22 Jan 2015 18:06:35 -0800 Subject: [PATCH 07/15] const** related fixes for gcc --- src/goldilocks.c | 2 +- src/scalarmul.c | 56 ++++--------------------------------------- test/bench.c | 10 ++++---- test/test_scalarmul.c | 4 ++-- 4 files changed, 13 insertions(+), 59 deletions(-) diff --git a/src/goldilocks.c b/src/goldilocks.c index 1c647f4..32c9a12 100644 --- a/src/goldilocks.c +++ b/src/goldilocks.c @@ -462,7 +462,7 @@ goldilocks_verify ( linear_combo_var_fixed_vt( pk_text, challenge, GOLDI_SCALAR_BITS, s, GOLDI_SCALAR_BITS, - goldilocks_global.wnafs, WNAF_PRECMP_BITS ); + (const tw_niels_a_t*)goldilocks_global.wnafs, WNAF_PRECMP_BITS ); untwist_and_double_and_serialize( pk, pk_text ); diff --git a/src/scalarmul.c b/src/scalarmul.c index 12925b2..af7f72a 100644 --- a/src/scalarmul.c +++ b/src/scalarmul.c @@ -70,52 +70,6 @@ constant_time_lookup_tw_niels ( constant_time_lookup(out,in,sizeof(*out),nin,idx); } -/* -static __inline__ void -constant_time_lookup_tw_pniels ( - tw_pniels_a_t out, - const tw_pniels_a_t in, - int nin, - int idx -) { - big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx); - big_register_t *o = (big_register_t *)out; - const big_register_t *i = (const big_register_t *)in; - int j; - unsigned int k; - - really_memset(out, 0, sizeof(*out)); - for (j=0; j>(WINDOW-1))-1; bits ^= inv; - constant_time_lookup_tw_pniels(pn, multiples, NTABLE, bits & WINDOW_T_MASK); + constant_time_lookup_tw_pniels(pn, (const tw_pniels_a_t*)multiples, NTABLE, bits & WINDOW_T_MASK); cond_negate_tw_pniels(pn, inv); convert_tw_pniels_to_tw_extensible(working, pn); @@ -200,7 +154,7 @@ scalarmul ( inv = (bits>>(WINDOW-1))-1; bits ^= inv; - constant_time_lookup_tw_pniels(pn, multiples, NTABLE, bits & WINDOW_T_MASK); + constant_time_lookup_tw_pniels(pn, (const tw_pniels_a_t*)multiples, NTABLE, bits & WINDOW_T_MASK); cond_negate_tw_pniels(pn, inv); add_tw_pniels_to_tw_extensible(working, pn); } @@ -355,7 +309,7 @@ scalarmul_fixed_base ( tab ^= invert; tab &= (1<<(t-1)) - 1; - constant_time_lookup_tw_niels(ni, table->table + (j<<(t-1)), 1<<(t-1), tab); + constant_time_lookup_tw_niels(ni, (const tw_niels_a_t*)table->table + (j<<(t-1)), 1<<(t-1), tab); cond_negate_tw_niels(ni, invert); if (i||j) { add_tw_niels_to_tw_extensible(out, ni); @@ -582,7 +536,7 @@ precompute_fixed_base ( } } - field_simultaneous_invert(zis, zs, n<<(t-1)); + field_simultaneous_invert(zis, (const field_a_t*)zs, n<<(t-1)); field_a_t product; for (i=0; i Date: Thu, 22 Jan 2015 18:25:17 -0800 Subject: [PATCH 08/15] fix test issue found by scan-build --- test/test_pointops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_pointops.c b/test/test_pointops.c index 5f0ec09..bf53afd 100644 --- a/test/test_pointops.c +++ b/test/test_pointops.c @@ -295,7 +295,7 @@ int test_pointops (void) { } ret = single_twisting_test(&base); - //if (ret) return ret; + if (ret) return ret; } return 0; From 57e721ec6b8c157e32ba6bdb8411f750d735124b Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Thu, 22 Jan 2015 18:52:04 -0800 Subject: [PATCH 09/15] fix(?) perf regr in verify pre --- src/scalarmul.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/scalarmul.c b/src/scalarmul.c index af7f72a..93d9443 100644 --- a/src/scalarmul.c +++ b/src/scalarmul.c @@ -356,11 +356,13 @@ linear_combo_combs_vt ( assert(table2->t >= 1); #endif - tw_niels_a_t ni; + const struct tw_niels_t *ni; - unsigned int swords[2] = {scalar1b_words, scalar2b_words}, started = 0; + unsigned int swords[2] = {scalar1b_words, scalar2b_words}; word_t *scalars[2] = {scalar1b,scalar2b}; + set_identity_tw_extensible(out); + for (i=0; it-1)) - 1; - copy_tw_niels(ni, table->table[tab + (j<<(table->t-1))]); - cond_negate_tw_niels(ni,invert); + ni = table->table[tab + (j<<(table->t-1))]; - if (started) { - add_tw_niels_to_tw_extensible(out, ni); - } else { - convert_tw_niels_to_tw_extensible(out, ni); - started = 1; - } - + if (invert) sub_tw_niels_from_tw_extensible(out, ni); + else add_tw_niels_to_tw_extensible(out, ni); } } - - assert(started); } return MASK_SUCCESS; From f4424c3d17fdddedf9b745a6bdc31029749b0633 Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Fri, 23 Jan 2015 15:38:43 -0800 Subject: [PATCH 10/15] fix sha512 goof on p521 --- src/goldilocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/goldilocks.c b/src/goldilocks.c index 32c9a12..866ed10 100644 --- a/src/goldilocks.c +++ b/src/goldilocks.c @@ -41,7 +41,7 @@ #else #define FIELD_HASH_BYTES (SHA512_OUTPUT_BYTES * ((FIELD_BYTES-1)/SHA512_OUTPUT_BYTES + 1)) static inline void field_hash_final ( - sha512_ctx_a_t *ctx, + sha512_ctx_a_t ctx, unsigned char out[FIELD_HASH_BYTES] ) { /* SHA PRNG I guess? I really should have used SHAKE */ From de6d61e55437969f68fd0529f90456d7e1c79406 Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Sun, 1 Feb 2015 10:16:49 -0800 Subject: [PATCH 11/15] fuse crandom seed+buffer because they are accessed as a single object. probably needs a bit more testing though --- include/goldilocks.h | 2 +- src/crandom.c | 20 ++++++++++---------- src/include/crandom.h | 7 ++++--- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/include/goldilocks.h b/include/goldilocks.h index 1631c2f..e4d4496 100644 --- a/include/goldilocks.h +++ b/include/goldilocks.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. +/* Copyright (c) 2014-2015 Cryptography Research, Inc. * Released under the MIT License. See LICENSE.txt for license information. */ diff --git a/src/crandom.c b/src/crandom.c index 4f36644..83999c9 100644 --- a/src/crandom.c +++ b/src/crandom.c @@ -319,7 +319,7 @@ crandom_init_from_file( ssize_t offset = 0, red; do { - red = read(state->randomfd, state->seed + offset, 32 - offset); + red = read(state->randomfd, state->seedBuffer + offset, 32 - offset); if (red > 0) offset += red; } while (red > 0 && offset < 32); @@ -328,7 +328,7 @@ crandom_init_from_file( return err ? err : -1; } - memset(state->buffer, 0, 96); + memset(state->seedBuffer+32, 0, 96); state->magic = CRANDOM_MAGIC; state->reseeds_mandatory = reseeds_mandatory; @@ -341,8 +341,8 @@ crandom_init_from_buffer( crandom_state_a_t state, const char initial_seed[32] ) { - memcpy(state->seed, initial_seed, 32); - memset(state->buffer, 0, 96); + memcpy(state->seedBuffer, initial_seed, 32); + memset(state->seedBuffer+32, 0, 96); state->reseed_countdown = state->reseed_interval = state->fill = state->ctr = state->reseeds_mandatory = 0; state->randomfd = -1; state->magic = CRANDOM_MAGIC; @@ -425,7 +425,7 @@ crandom_generate( state->reseed_countdown = state->reseed_interval; ssize_t offset = 0, red; do { - red = read(state->randomfd, state->buffer + offset, 32 - offset); + red = read(state->randomfd, state->seedBuffer + 32 + offset, 32 - offset); if (red > 0) offset += red; } while (red > 0 && offset < 32); @@ -454,19 +454,19 @@ crandom_generate( int i; for (i=0; i<32; i++) { /* Stir in the buffer. If somehow the read failed, it'll be zeros. */ - state->seed[i] ^= state->buffer[i]; + state->seedBuffer[i] ^= state->seedBuffer[i+32]; } } } - crandom_chacha_expand(iv,state->ctr,20,128,state->seed,state->seed); + crandom_chacha_expand(iv,state->ctr,20,128,state->seedBuffer,state->seedBuffer); state->ctr++; - state->fill = sizeof(state->buffer); + state->fill = sizeof(state->seedBuffer)-32; } unsigned long long copy = (length > state->fill) ? state->fill : length; state->fill -= copy; - memcpy(output, state->buffer + state->fill, copy); - really_memset(state->buffer + state->fill, 0, copy); + memcpy(output, state->seedBuffer + 32 + state->fill, copy); + really_memset(state->seedBuffer + 32 + state->fill, 0, copy); output += copy; length -= copy; } diff --git a/src/include/crandom.h b/src/include/crandom.h index c9f4c26..06dc583 100644 --- a/src/include/crandom.h +++ b/src/include/crandom.h @@ -1,5 +1,5 @@ /* Copyright (c) 2011 Stanford University. - * Copyright (c) 2014 Cryptography Research, Inc. + * Copyright (c) 2014-2015 Cryptography Research, Inc. * Released under the MIT License. See LICENSE.txt for license information. */ @@ -29,8 +29,9 @@ */ struct crandom_state_t { /** @privatesection */ - unsigned char seed[32]; - unsigned char buffer[96]; + /* unsigned char seed[32]; */ + /* unsigned char buffer[96]; */ + unsigned char seedBuffer[32+96]; uint64_t ctr; uint64_t magic; unsigned int fill; From b981251732a95080777a54c27767b1fe4a2599c6 Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Sun, 1 Mar 2015 13:17:51 -0800 Subject: [PATCH 12/15] fix rax input for rdrand detection --- src/crandom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crandom.c b/src/crandom.c index 83999c9..4808d3e 100644 --- a/src/crandom.c +++ b/src/crandom.c @@ -27,10 +27,10 @@ unsigned int crandom_detect_features(void) { if (c & 1<<25) out |= AESNI; if (c & 1<<28) out |= AVX; if (b & 1<<5) out |= AVX2; + if (c & 1<<30) out |= RDRAND; a=0x80000001; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); if (c & 1<<11) out |= XOP; - if (c & 1<<30) out |= RDRAND; # endif return out; From 5cf6038179e345d312f004cf332c8f27590cc221 Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Sun, 1 Mar 2015 13:23:31 -0800 Subject: [PATCH 13/15] adjust history.txt. Also, that last fix on RDRAND is thanks to John Mark Gurney. --- HISTORY.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/HISTORY.txt b/HISTORY.txt index 983cbcb..4901d1a 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -1,3 +1,7 @@ +March 1, 2015: + Not much to report. Most of the fixes since Oct 27 last year have + been bug fixes or simplifications, or in the Decaf branch. + October 27, 2014: Added more support for >512-bit primes. Changed shared secret to not overflow the buffer in this case. Changed hashing to From 393785a384cf41ebf85cd078b4fe71eb2f46c528 Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Sat, 7 Mar 2015 16:15:51 -0800 Subject: [PATCH 14/15] fix some mul/sqr-after-add bugs on arch_neon_experimental. Deprecate arch_neon because the experiment seems to be a success anyway --- src/p448/arch_neon/arch_config.h | 1 - src/p448/arch_neon/neon_emulation.h | 155 ------ src/p448/arch_neon/p448.c | 723 ------------------------- src/p448/arch_neon/p448.h | 241 --------- src/p448/arch_neon_experimental/p448.c | 110 ++-- test/test_arithmetic.c | 17 +- 6 files changed, 69 insertions(+), 1178 deletions(-) delete mode 100644 src/p448/arch_neon/arch_config.h delete mode 100644 src/p448/arch_neon/neon_emulation.h delete mode 100644 src/p448/arch_neon/p448.c delete mode 100644 src/p448/arch_neon/p448.h diff --git a/src/p448/arch_neon/arch_config.h b/src/p448/arch_neon/arch_config.h deleted file mode 100644 index 47bbe3e..0000000 --- a/src/p448/arch_neon/arch_config.h +++ /dev/null @@ -1 +0,0 @@ -#define WORD_BITS 32 diff --git a/src/p448/arch_neon/neon_emulation.h b/src/p448/arch_neon/neon_emulation.h deleted file mode 100644 index a97978c..0000000 --- a/src/p448/arch_neon/neon_emulation.h +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -/** - * @file "neon_emulation.h" - * @brief NEON intrinsic emulation using clang's vector extensions. - * - * This lets you test and debug NEON code on x86. - */ - -#ifndef __NEON_EMULATION_H__ -#define __NEON_EMULATION_H__ 1 - -/** @cond internal */ - -#include "word.h" - -#include -#include - -static __inline__ int64x2_t vaddw_s32 (int64x2_t a, int32x2_t b) { - a.x += b.x; - a.y += b.y; - return a; -} - -static __inline__ int64x2_t __attribute__((gnu_inline,always_inline)) -xx_vaddup_s64(int64x2_t x) { - x.y += x.x; - return x; -} - -typedef struct { int32x2_t val[2]; } int32x2x2_t; -static inline int32x2x2_t vtrn_s32 (int32x2_t x, int32x2_t y) { - int32x2x2_t out = {{{ x.x, y.x }, {x.y, y.y}}}; - return out; -} - -static __inline__ void __attribute__((gnu_inline,always_inline)) -xx_vtrnq_s64 ( - int64x2_t *x, - int64x2_t *y -) { - int64_t tmp = (*x).y; - (*x).y = (*y).x; - (*y).x = tmp; -} - -int64x2_t vsraq_n_s64 ( - int64x2_t a, - int64x2_t v, - const int x -) { - return a + (v >> x); -} - -int64x2_t vshrq_n_s64 ( - int64x2_t v, - const int x -) { - return v >> x; -} - -static inline int64_t vgetq_lane_s64 ( - int64x2_t acc, - const int lane -) { - return lane ? acc.y : acc.x; -} - -static inline int32_t vget_lane_s32 ( - int32x2_t acc, - const int lane -) { - return lane ? acc.y : acc.x; -} - -static inline int64x2_t vmlal_lane_s32 ( - int64x2_t acc, - int32x2_t x, - int32x2_t y, - int lane -) { - int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y }; - return acc + xx*(lane?yy.yy:yy.xx); -} - -static inline int64x2_t vmlsl_lane_s32 ( - int64x2_t acc, - int32x2_t x, - int32x2_t y, - int lane -) { - int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y }; - return acc - xx*(lane?yy.yy:yy.xx); -} - -static inline int64x2_t vqdmlsl_lane_s32 ( - int64x2_t acc, - int32x2_t x, - int32x2_t y, - int lane -) { - int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y }; - int64x2_t tmp = xx*(lane?yy.yy:yy.xx); - assert(tmp.x >> 63 == tmp.x>>62); - assert(tmp.y >> 63 == tmp.y>>62); - return acc - 2*tmp; -} - -static inline int64x2_t vqdmlal_lane_s32 ( - int64x2_t acc, - int32x2_t x, - int32x2_t y, - int lane -) { - int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y }; - int64x2_t tmp = xx*(lane?yy.yy:yy.xx); - assert(tmp.x >> 63 == tmp.x>>62); - assert(tmp.y >> 63 == tmp.y>>62); - return acc + 2*tmp; -} - -static inline int64x2_t vqdmull_lane_s32 ( - int32x2_t x, - int32x2_t y, - int lane -) { - int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y }; - int64x2_t tmp = xx*(lane?yy.yy:yy.xx); - assert(tmp.x >> 63 == tmp.x>>62); - assert(tmp.y >> 63 == tmp.y>>62); - return 2*tmp; -} - -static inline int32x2_t vmovn_s64( - int64x2_t x -) { - int32x2_t y = {x.x,x.y}; - return y; -} - -static inline int64x2_t vmull_lane_s32 ( - int32x2_t x, - int32x2_t y, - int lane -) { - int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y }; - return xx*(lane?yy.yy:yy.xx); -} - -/** @endcond */ - -#endif /* __NEON_EMULATION_H__ */ diff --git a/src/p448/arch_neon/p448.c b/src/p448/arch_neon/p448.c deleted file mode 100644 index 956f356..0000000 --- a/src/p448/arch_neon/p448.c +++ /dev/null @@ -1,723 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ - -#include "word.h" -#include "p448.h" - -static inline mask_t __attribute__((always_inline)) -is_zero ( - word_t x -) { - dword_t xx = x; - xx--; - return xx >> WORD_BITS; -} - -static uint64_t widemul_32 ( - const uint32_t a, - const uint32_t b -) { - return ((uint64_t)a)* b; -} - -#ifdef __ARM_NEON__ -static __inline__ void __attribute__((gnu_inline,always_inline)) -xx_vtrnq_s64 ( - int64x2_t *x, - int64x2_t *y -) { - __asm__ __volatile__ ("vswp %f0, %e1" : "+w"(*x), "+w"(*y)); -} - -static __inline__ int64x2_t __attribute__((gnu_inline,always_inline)) -xx_vaddup_s64(int64x2_t x) { - __asm__ ("vadd.s64 %f0, %e0" : "+w"(x)); - return x; -} -#else -#include "neon_emulation.h" -#endif /* ARM_NEON */ - -static inline void __attribute__((gnu_inline,always_inline,unused)) -smlal ( - uint64_t *acc, - const uint32_t a, - const uint32_t b -) { - *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b; -} - -static inline void __attribute__((gnu_inline,always_inline,unused)) -smlal2 ( - uint64_t *acc, - const uint32_t a, - const uint32_t b -) { - *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b * 2; -} - -static inline void __attribute__((gnu_inline,always_inline,unused)) -smull ( - uint64_t *acc, - const uint32_t a, - const uint32_t b -) { - *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b; -} - -static inline void __attribute__((gnu_inline,always_inline,unused)) -smull2 ( - uint64_t *acc, - const uint32_t a, - const uint32_t b -) { - *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b * 2; -} - -void -p448_mul ( - p448_t *__restrict__ cs, - const p448_t *as, - const p448_t *bs -) { - const uint32_t *a = as->limb, *b = bs->limb; - uint32_t *c = cs->limb; - - - const int32x2_t - *val = (const int32x2_t *)a, - *vbl = (const int32x2_t *)b, - *vah = (const int32x2_t *)(&a[8]), - *vbh = (const int32x2_t *)(&b[8]); - - int32x2_t - *vcl = (int32x2_t *)c, - *vch = (int32x2_t *)(&c[8]), - vmask = {(1<<28) - 1, (1<<28)-1}; - - int64x2_t accumx0a, accumx0b; - int64x2_t accumx1a, accumx1b; - int64x2_t accumx2a, accumx2b; - int64x2_t accumx3a, accumx3b; - int64x2_t accumx4a, accumx4b; - int64x2_t accumx5a, accumx5b; - int64x2_t accumx6a, accumx6b; - int64x2_t accumx7a, accumx7b; - int64x2_t carry; - int32x2x2_t trn_res; - int32x2_t delta; - - accumx0a = vmull_lane_s32( delta = val[1] + vah[1], vbh[3], 0); - accumx1a = vmull_lane_s32( delta, vbh[3], 1); - accumx0a = vmlal_lane_s32(accumx0a, delta = val[2] + vah[2], vbh[2], 0); - accumx1a = vmlal_lane_s32(accumx1a, delta, vbh[2], 1); - accumx0a = vmlal_lane_s32(accumx0a, delta = val[3] + vah[3], vbh[1], 0); - accumx1a = vmlal_lane_s32(accumx1a, delta, vbh[1], 1); - accumx0b = vmull_lane_s32( delta = val[0] + vah[0], vbh[0], 0); - accumx1b = vmull_lane_s32( delta, vbh[0], 1); - accumx0b = vmlal_lane_s32(accumx0b, vah[1], vbl[3], 0); - accumx1b = vmlal_lane_s32(accumx1b, vah[1], vbl[3], 1); - accumx0b = vmlal_lane_s32(accumx0b, vah[2], vbl[2], 0); - accumx1b = vmlal_lane_s32(accumx1b, vah[2], vbl[2], 1); - accumx0b = vmlal_lane_s32(accumx0b, vah[3], vbl[1], 0); - accumx1b = vmlal_lane_s32(accumx1b, vah[3], vbl[1], 1); - accumx0b += accumx0a; - accumx1b += accumx1a; - accumx0a = vmlal_lane_s32(accumx0a, vah[0], vbl[0], 0); - accumx1a = vmlal_lane_s32(accumx1a, vah[0], vbl[0], 1); - accumx0a = vmlal_lane_s32(accumx0a, val[1], delta = vbl[3] - vbh[3], 0); - accumx1a = vmlal_lane_s32(accumx1a, val[1], delta, 1); - accumx0a = vmlal_lane_s32(accumx0a, val[2], delta = vbl[2] - vbh[2], 0); - accumx1a = vmlal_lane_s32(accumx1a, val[2], delta, 1); - accumx0a = vmlal_lane_s32(accumx0a, val[3], delta = vbl[1] - vbh[1], 0); - accumx1a = vmlal_lane_s32(accumx1a, val[3], delta, 1); - accumx0a += accumx0b; - accumx1a += accumx1b; - accumx0b = vmlal_lane_s32(accumx0b, val[0], delta = vbl[0] - vbh[0], 0); - accumx1b = vmlal_lane_s32(accumx1b, val[0], delta, 1); - xx_vtrnq_s64(&accumx0a, &accumx0b); - xx_vtrnq_s64(&accumx1a, &accumx1b); - accumx0b += accumx1a; - accumx0b = vsraq_n_s64(accumx0b,accumx0a,28); - accumx1b = vsraq_n_s64(accumx1b,accumx0b,28); - trn_res = vtrn_s32(vmovn_s64(accumx0a), vmovn_s64(accumx0b)); - vcl[0] = trn_res.val[1] & vmask; - vch[0] = trn_res.val[0] & vmask; - - - - - accumx2a = vmull_lane_s32( delta = val[2] + vah[2], vbh[3], 0); - accumx3a = vmull_lane_s32( delta, vbh[3], 1); - accumx2a = vmlal_lane_s32(accumx2a, delta = val[3] + vah[3], vbh[2], 0); - accumx3a = vmlal_lane_s32(accumx3a, delta, vbh[2], 1); - accumx2b = vmull_lane_s32( delta = val[0] + vah[0], vbh[1], 0); - accumx3b = vmull_lane_s32( delta, vbh[1], 1); - accumx2b = vmlal_lane_s32(accumx2b, delta = val[1] + vah[1], vbh[0], 0); - accumx3b = vmlal_lane_s32(accumx3b, delta, vbh[0], 1); - accumx2b = vmlal_lane_s32(accumx2b, vah[2], vbl[3], 0); - accumx3b = vmlal_lane_s32(accumx3b, vah[2], vbl[3], 1); - accumx2b = vmlal_lane_s32(accumx2b, vah[3], vbl[2], 0); - accumx3b = vmlal_lane_s32(accumx3b, vah[3], vbl[2], 1); - accumx2b += accumx2a; - accumx3b += accumx3a; - accumx2a = vmlal_lane_s32(accumx2a, vah[0], vbl[1], 0); - accumx3a = vmlal_lane_s32(accumx3a, vah[0], vbl[1], 1); - accumx2a = vmlal_lane_s32(accumx2a, vah[1], vbl[0], 0); - accumx3a = vmlal_lane_s32(accumx3a, vah[1], vbl[0], 1); - accumx2a = vmlal_lane_s32(accumx2a, val[2], delta = vbl[3] - vbh[3], 0); - accumx3a = vmlal_lane_s32(accumx3a, val[2], delta, 1); - accumx2a = vmlal_lane_s32(accumx2a, val[3], delta = vbl[2] - vbh[2], 0); - accumx3a = vmlal_lane_s32(accumx3a, val[3], delta, 1); - accumx2a += accumx2b; - accumx3a += accumx3b; - accumx2b = vmlal_lane_s32(accumx2b, val[0], delta = vbl[1] - vbh[1], 0); - accumx3b = vmlal_lane_s32(accumx3b, val[0], delta, 1); - accumx2b = vmlal_lane_s32(accumx2b, val[1], delta = vbl[0] - vbh[0], 0); - accumx3b = vmlal_lane_s32(accumx3b, val[1], delta, 1); - xx_vtrnq_s64(&accumx2a, &accumx2b); - xx_vtrnq_s64(&accumx3a, &accumx3b); - accumx2a += accumx1b; - accumx2b += accumx3a; - accumx2b = vsraq_n_s64(accumx2b,accumx2a,28); - accumx3b = vsraq_n_s64(accumx3b,accumx2b,28); - trn_res = vtrn_s32(vmovn_s64(accumx2a), vmovn_s64(accumx2b)); - vcl[1] = trn_res.val[1] & vmask; - vch[1] = trn_res.val[0] & vmask; - carry = accumx3b; - - - - - accumx4a = vmull_lane_s32( delta = val[3] + vah[3], vbh[3], 0); - accumx5a = vmull_lane_s32( delta, vbh[3], 1); - accumx4b = accumx4a; - accumx5b = accumx5a; - accumx4b = vmlal_lane_s32(accumx4b, delta = val[0] + vah[0], vbh[2], 0); - accumx5b = vmlal_lane_s32(accumx5b, delta, vbh[2], 1); - accumx4b = vmlal_lane_s32(accumx4b, delta = val[1] + vah[1], vbh[1], 0); - accumx5b = vmlal_lane_s32(accumx5b, delta, vbh[1], 1); - accumx4b = vmlal_lane_s32(accumx4b, delta = val[2] + vah[2], vbh[0], 0); - accumx5b = vmlal_lane_s32(accumx5b, delta, vbh[0], 1); - accumx4b = vmlal_lane_s32(accumx4b, vah[3], vbl[3], 0); - accumx5b = vmlal_lane_s32(accumx5b, vah[3], vbl[3], 1); - accumx4a += accumx4b; - accumx5a += accumx5b; - accumx4a = vmlal_lane_s32(accumx4a, vah[0], vbl[2], 0); - accumx5a = vmlal_lane_s32(accumx5a, vah[0], vbl[2], 1); - accumx4a = vmlal_lane_s32(accumx4a, vah[1], vbl[1], 0); - accumx5a = vmlal_lane_s32(accumx5a, vah[1], vbl[1], 1); - accumx4a = vmlal_lane_s32(accumx4a, vah[2], vbl[0], 0); - accumx5a = vmlal_lane_s32(accumx5a, vah[2], vbl[0], 1); - accumx4a = vmlal_lane_s32(accumx4a, val[3], delta = vbl[3] - vbh[3], 0); - accumx5a = vmlal_lane_s32(accumx5a, val[3], delta, 1); - /**/ - accumx4b = vmlal_lane_s32(accumx4b, val[0], delta = vbl[2] - vbh[2], 0); - accumx5b = vmlal_lane_s32(accumx5b, val[0], delta, 1); - accumx4b = vmlal_lane_s32(accumx4b, val[1], delta = vbl[1] - vbh[1], 0); - accumx5b = vmlal_lane_s32(accumx5b, val[1], delta, 1); - accumx4b = vmlal_lane_s32(accumx4b, val[2], delta = vbl[0] - vbh[0], 0); - accumx5b = vmlal_lane_s32(accumx5b, val[2], delta, 1); - - xx_vtrnq_s64(&accumx4a, &accumx4b); - xx_vtrnq_s64(&accumx5a, &accumx5b); - accumx4a += carry; - accumx4b += accumx5a; - accumx4b = vsraq_n_s64(accumx4b,accumx4a,28); - accumx5b = vsraq_n_s64(accumx5b,accumx4b,28); - - trn_res = vtrn_s32(vmovn_s64(accumx4a), vmovn_s64(accumx4b)); - vcl[2] = trn_res.val[1] & vmask; - vch[2] = trn_res.val[0] & vmask; - - - - - accumx6b = vmull_lane_s32( delta = val[0] + vah[0], vbh[3], 0); - accumx7b = vmull_lane_s32( delta, vbh[3], 1); - accumx6b = vmlal_lane_s32(accumx6b, delta = val[1] + vah[1], vbh[2], 0); - accumx7b = vmlal_lane_s32(accumx7b, delta, vbh[2], 1); - accumx6b = vmlal_lane_s32(accumx6b, delta = val[2] + vah[2], vbh[1], 0); - accumx7b = vmlal_lane_s32(accumx7b, delta, vbh[1], 1); - accumx6b = vmlal_lane_s32(accumx6b, delta = val[3] + vah[3], vbh[0], 0); - accumx7b = vmlal_lane_s32(accumx7b, delta, vbh[0], 1); - accumx6a = accumx6b; - accumx7a = accumx7b; - accumx6a = vmlal_lane_s32(accumx6a, vah[0], vbl[3], 0); - accumx7a = vmlal_lane_s32(accumx7a, vah[0], vbl[3], 1); - accumx6a = vmlal_lane_s32(accumx6a, vah[1], vbl[2], 0); - accumx7a = vmlal_lane_s32(accumx7a, vah[1], vbl[2], 1); - accumx6a = vmlal_lane_s32(accumx6a, vah[2], vbl[1], 0); - accumx7a = vmlal_lane_s32(accumx7a, vah[2], vbl[1], 1); - accumx6a = vmlal_lane_s32(accumx6a, vah[3], vbl[0], 0); - accumx7a = vmlal_lane_s32(accumx7a, vah[3], vbl[0], 1); - /**/ - accumx6b = vmlal_lane_s32(accumx6b, val[0], delta = vbl[3] - vbh[3], 0); - accumx7b = vmlal_lane_s32(accumx7b, val[0], delta, 1); - accumx6b = vmlal_lane_s32(accumx6b, val[1], delta = vbl[2] - vbh[2], 0); - accumx7b = vmlal_lane_s32(accumx7b, val[1], delta, 1); - accumx6b = vmlal_lane_s32(accumx6b, val[2], delta = vbl[1] - vbh[1], 0); - accumx7b = vmlal_lane_s32(accumx7b, val[2], delta, 1); - accumx6b = vmlal_lane_s32(accumx6b, val[3], delta = vbl[0] - vbh[0], 0); - accumx7b = vmlal_lane_s32(accumx7b, val[3], delta, 1); - - xx_vtrnq_s64(&accumx6a, &accumx6b); - xx_vtrnq_s64(&accumx7a, &accumx7b); - accumx6a += accumx5b; - accumx6b += accumx7a; - - accumx6b = vsraq_n_s64(accumx6b,accumx6a,28); - accumx7b = vsraq_n_s64(accumx7b,accumx6b,28); - trn_res = vtrn_s32(vmovn_s64(accumx6a), vmovn_s64(accumx6b)); - vcl[3] = trn_res.val[1] & vmask; - vch[3] = trn_res.val[0] & vmask; - - - accumx7b = xx_vaddup_s64(accumx7b); - - int32x2_t t0 = vcl[0], t1 = vch[0]; - trn_res = vtrn_s32(t0,t1); - t0 = trn_res.val[0]; t1 = trn_res.val[1]; - - accumx7b = vaddw_s32(accumx7b, t0); - t0 = vmovn_s64(accumx7b) & vmask; - - accumx7b = vshrq_n_s64(accumx7b,28); - accumx7b = vaddw_s32(accumx7b, t1); - t1 = vmovn_s64(accumx7b) & vmask; - trn_res = vtrn_s32(t0,t1); - vcl[0] = trn_res.val[0]; - vch[0] = trn_res.val[1]; - accumx7b = vshrq_n_s64(accumx7b,28); - - t0 = vmovn_s64(accumx7b); - - uint32_t - c0 = vget_lane_s32(t0,0), - c1 = vget_lane_s32(t0,1); - c[2] += c0; - c[10] += c1; -} - -void -p448_sqr ( - p448_t *__restrict__ cs, - const p448_t *as -) { - /* FUTURE possible improvements: - * don't use nega-phi algorithm, so as to avoid extra phi-twiddle at end - * or use phi/nega-phi for everything, montgomery style - * or find some sort of phi algorithm which doesn't have this problem - * break up lanemuls so that only diags get 1mul'd instead of diag 2x2 blocks - * - * These improvements are all pretty minor, but I guess together they might matter? - */ - - const uint32_t *b = as->limb; - uint32_t *c = cs->limb; - - int32x2_t vbm[4]; - - const int32x2_t - *vbl = (const int32x2_t *)b, - *vbh = (const int32x2_t *)(&b[8]); - - int i; - for (i=0; i<4; i++) { - vbm[i] = vbl[i] - vbh[i]; - } - - int32x2_t - *vcl = (int32x2_t *)c, - *vch = (int32x2_t *)(&c[8]), - vmask = {(1<<28) - 1, (1<<28)-1}; - - int64x2_t accumx0a, accumx0b; - int64x2_t accumx1a, accumx1b; - int64x2_t accumx2a, accumx2b; - int64x2_t accumx3a, accumx3b; - int64x2_t accumx4a, accumx4b; - int64x2_t accumx5a, accumx5b; - int64x2_t accumx6a, accumx6b; - int64x2_t accumx7a, accumx7b; - int64x2_t carry; - int32x2x2_t trn_res; - - accumx0a = vqdmull_lane_s32( vbh[1], vbh[3], 0); - accumx1a = vqdmull_lane_s32( vbh[1], vbh[3], 1); - accumx2a = vqdmull_lane_s32( vbh[2], vbh[3], 0); - accumx3a = vqdmull_lane_s32( vbh[2], vbh[3], 1); - accumx0a = vmlal_lane_s32(accumx0a, vbh[2], vbh[2], 0); - accumx1a = vmlal_lane_s32(accumx1a, vbh[2], vbh[2], 1); - accumx2b = accumx2a; - accumx3b = accumx3a; - accumx2b = vqdmlal_lane_s32(accumx2b, vbh[0], vbh[1], 0); - accumx3b = vqdmlal_lane_s32(accumx3b, vbh[0], vbh[1], 1); - accumx0b = accumx0a; - accumx1b = accumx1a; - accumx0b = vmlal_lane_s32(accumx0b, vbh[0], vbh[0], 0); - accumx1b = vmlal_lane_s32(accumx1b, vbh[0], vbh[0], 1); - accumx0b = vqdmlal_lane_s32(accumx0b, vbl[1], vbl[3], 0); - accumx1b = vqdmlal_lane_s32(accumx1b, vbl[1], vbl[3], 1); - accumx2b = vqdmlal_lane_s32(accumx2b, vbl[2], vbl[3], 0); - accumx3b = vqdmlal_lane_s32(accumx3b, vbl[2], vbl[3], 1); - accumx0b = vmlal_lane_s32(accumx0b, vbl[2], vbl[2], 0); - accumx1b = vmlal_lane_s32(accumx1b, vbl[2], vbl[2], 1); - accumx2a += accumx2b; - accumx3a += accumx3b; - accumx2a = vqdmlal_lane_s32(accumx2a, vbl[0], vbl[1], 0); - accumx3a = vqdmlal_lane_s32(accumx3a, vbl[0], vbl[1], 1); - accumx0a += accumx0b; - accumx1a += accumx1b; - accumx0a = vmlal_lane_s32(accumx0a, vbl[0], vbl[0], 0); - accumx1a = vmlal_lane_s32(accumx1a, vbl[0], vbl[0], 1); - accumx0a = vqdmlsl_lane_s32(accumx0a, vbm[1], vbm[3], 0); - accumx1a = vqdmlsl_lane_s32(accumx1a, vbm[1], vbm[3], 1); - accumx0a = vmlsl_lane_s32(accumx0a, vbm[2], vbm[2], 0); - accumx1a = vmlsl_lane_s32(accumx1a, vbm[2], vbm[2], 1); - accumx2a = vqdmlsl_lane_s32(accumx2a, vbm[2], vbm[3], 0); - accumx3a = vqdmlsl_lane_s32(accumx3a, vbm[2], vbm[3], 1); - accumx0b += accumx0a; - accumx1b += accumx1a; - accumx0b = vmlsl_lane_s32(accumx0b, vbm[0], vbm[0], 0); - accumx1b = vmlsl_lane_s32(accumx1b, vbm[0], vbm[0], 1); - accumx2b += accumx2a; - accumx3b += accumx3a; - accumx2b = vqdmlsl_lane_s32(accumx2b, vbm[0], vbm[1], 0); - accumx3b = vqdmlsl_lane_s32(accumx3b, vbm[0], vbm[1], 1); - xx_vtrnq_s64(&accumx0b, &accumx0a); - xx_vtrnq_s64(&accumx1b, &accumx1a); - xx_vtrnq_s64(&accumx2b, &accumx2a); - xx_vtrnq_s64(&accumx3b, &accumx3a); - accumx0a += accumx1b; - accumx0a = vsraq_n_s64(accumx0a,accumx0b,28); - accumx1a = vsraq_n_s64(accumx1a,accumx0a,28); - accumx2b += accumx1a; - accumx2a += accumx3b; - accumx2a = vsraq_n_s64(accumx2a,accumx2b,28); - accumx3a = vsraq_n_s64(accumx3a,accumx2a,28); - trn_res = vtrn_s32(vmovn_s64(accumx0b), vmovn_s64(accumx0a)); - vcl[0] = trn_res.val[1] & vmask; - vch[0] = trn_res.val[0] & vmask; - trn_res = vtrn_s32(vmovn_s64(accumx2b), vmovn_s64(accumx2a)); - vcl[1] = trn_res.val[1] & vmask; - vch[1] = trn_res.val[0] & vmask; - carry = accumx3a; - - accumx4a = vmull_lane_s32( vbh[3], vbh[3], 0); - accumx5a = vmull_lane_s32( vbh[3], vbh[3], 1); - accumx6b = vqdmull_lane_s32( vbh[0], vbh[3], 0); - accumx7b = vqdmull_lane_s32( vbh[0], vbh[3], 1); - accumx4b = accumx4a; - accumx5b = accumx5a; - accumx4b = vqdmlal_lane_s32(accumx4b, vbh[0], vbh[2], 0); - accumx5b = vqdmlal_lane_s32(accumx5b, vbh[0], vbh[2], 1); - accumx6b = vqdmlal_lane_s32(accumx6b, vbh[1], vbh[2], 0); - accumx7b = vqdmlal_lane_s32(accumx7b, vbh[1], vbh[2], 1); - accumx4b = vmlal_lane_s32(accumx4b, vbh[1], vbh[1], 0); - accumx5b = vmlal_lane_s32(accumx5b, vbh[1], vbh[1], 1); - accumx4b = vmlal_lane_s32(accumx4b, vbl[3], vbl[3], 0); - accumx5b = vmlal_lane_s32(accumx5b, vbl[3], vbl[3], 1); - accumx6a = accumx6b; - accumx7a = accumx7b; - accumx6a = vqdmlal_lane_s32(accumx6a, vbl[0], vbl[3], 0); - accumx7a = vqdmlal_lane_s32(accumx7a, vbl[0], vbl[3], 1); - accumx4a += accumx4b; - accumx5a += accumx5b; - accumx4a = vqdmlal_lane_s32(accumx4a, vbl[0], vbl[2], 0); - accumx5a = vqdmlal_lane_s32(accumx5a, vbl[0], vbl[2], 1); - accumx6a = vqdmlal_lane_s32(accumx6a, vbl[1], vbl[2], 0); - accumx7a = vqdmlal_lane_s32(accumx7a, vbl[1], vbl[2], 1); - accumx4a = vmlal_lane_s32(accumx4a, vbl[1], vbl[1], 0); - accumx5a = vmlal_lane_s32(accumx5a, vbl[1], vbl[1], 1); - accumx4a = vmlsl_lane_s32(accumx4a, vbm[3], vbm[3], 0); - accumx5a = vmlsl_lane_s32(accumx5a, vbm[3], vbm[3], 1); - accumx6b += accumx6a; - accumx7b += accumx7a; - accumx6b = vqdmlsl_lane_s32(accumx6b, vbm[0], vbm[3], 0); - accumx7b = vqdmlsl_lane_s32(accumx7b, vbm[0], vbm[3], 1); - accumx4b += accumx4a; - accumx5b += accumx5a; - accumx4b = vqdmlsl_lane_s32(accumx4b, vbm[0], vbm[2], 0); - accumx5b = vqdmlsl_lane_s32(accumx5b, vbm[0], vbm[2], 1); - accumx4b = vmlsl_lane_s32(accumx4b, vbm[1], vbm[1], 0); - accumx5b = vmlsl_lane_s32(accumx5b, vbm[1], vbm[1], 1); - accumx6b = vqdmlsl_lane_s32(accumx6b, vbm[1], vbm[2], 0); - accumx7b = vqdmlsl_lane_s32(accumx7b, vbm[1], vbm[2], 1); - - xx_vtrnq_s64(&accumx4b, &accumx4a); - xx_vtrnq_s64(&accumx5b, &accumx5a); - xx_vtrnq_s64(&accumx6b, &accumx6a); - xx_vtrnq_s64(&accumx7b, &accumx7a); - accumx4b += carry; - accumx4a += accumx5b; - accumx4a = vsraq_n_s64(accumx4a,accumx4b,28); - accumx5a = vsraq_n_s64(accumx5a,accumx4a,28); - accumx6b += accumx5a; - accumx6a += accumx7b; - - trn_res = vtrn_s32(vmovn_s64(accumx4b), vmovn_s64(accumx4a)); - vcl[2] = trn_res.val[1] & vmask; - vch[2] = trn_res.val[0] & vmask; - accumx6a = vsraq_n_s64(accumx6a,accumx6b,28); - accumx7a = vsraq_n_s64(accumx7a,accumx6a,28); - trn_res = vtrn_s32(vmovn_s64(accumx6b), vmovn_s64(accumx6a)); - vcl[3] = trn_res.val[1] & vmask; - vch[3] = trn_res.val[0] & vmask; - - accumx7a = xx_vaddup_s64(accumx7a); - - int32x2_t t0 = vcl[0], t1 = vch[0]; - trn_res = vtrn_s32(t0,t1); - t0 = trn_res.val[0]; t1 = trn_res.val[1]; - - accumx7a = vaddw_s32(accumx7a, t0); - t0 = vmovn_s64(accumx7a) & vmask; - - accumx7a = vshrq_n_s64(accumx7a,28); - accumx7a = vaddw_s32(accumx7a, t1); - t1 = vmovn_s64(accumx7a) & vmask; - trn_res = vtrn_s32(t0,t1); - vcl[0] = trn_res.val[0]; - vch[0] = trn_res.val[1]; - accumx7a = vshrq_n_s64(accumx7a,28); - - t0 = vmovn_s64(accumx7a); - - uint32_t - c0 = vget_lane_s32(t0,0), - c1 = vget_lane_s32(t0,1); - c[2] += c0; - c[10] += c1; -} - -void -p448_mulw ( - p448_t *__restrict__ cs, - const p448_t *as, - uint64_t b -) { - const uint32_t bhi = b>>28, blo = b & ((1<<28)-1); - - const uint32_t *a = as->limb; - uint32_t *c = cs->limb; - - uint64_t accum0, accum8; - uint32_t mask = (1ull<<28)-1; - - int i; - - uint32_t c0, c8, n0, n8; - accum0 = widemul_32(bhi, a[15]); - accum8 = widemul_32(bhi, a[15] + a[7]); - c0 = a[0]; c8 = a[8]; - smlal(&accum0, blo, c0); - smlal(&accum8, blo, c8); - - c[0] = accum0 & mask; accum0 >>= 28; - c[8] = accum8 & mask; accum8 >>= 28; - - i=1; - { - n0 = a[i]; n8 = a[i+8]; - smlal(&accum0, bhi, c0); - smlal(&accum8, bhi, c8); - smlal(&accum0, blo, n0); - smlal(&accum8, blo, n8); - - c[i] = accum0 & mask; accum0 >>= 28; - c[i+8] = accum8 & mask; accum8 >>= 28; - i++; - } - { - c0 = a[i]; c8 = a[i+8]; - smlal(&accum0, bhi, n0); - smlal(&accum8, bhi, n8); - smlal(&accum0, blo, c0); - smlal(&accum8, blo, c8); - - c[i] = accum0 & mask; accum0 >>= 28; - c[i+8] = accum8 & mask; accum8 >>= 28; - i++; - } - { - n0 = a[i]; n8 = a[i+8]; - smlal(&accum0, bhi, c0); - smlal(&accum8, bhi, c8); - smlal(&accum0, blo, n0); - smlal(&accum8, blo, n8); - - c[i] = accum0 & mask; accum0 >>= 28; - c[i+8] = accum8 & mask; accum8 >>= 28; - i++; - } - { - c0 = a[i]; c8 = a[i+8]; - smlal(&accum0, bhi, n0); - smlal(&accum8, bhi, n8); - smlal(&accum0, blo, c0); - smlal(&accum8, blo, c8); - - c[i] = accum0 & mask; accum0 >>= 28; - c[i+8] = accum8 & mask; accum8 >>= 28; - i++; - } - { - n0 = a[i]; n8 = a[i+8]; - smlal(&accum0, bhi, c0); - smlal(&accum8, bhi, c8); - smlal(&accum0, blo, n0); - smlal(&accum8, blo, n8); - - c[i] = accum0 & mask; accum0 >>= 28; - c[i+8] = accum8 & mask; accum8 >>= 28; - i++; - } - { - c0 = a[i]; c8 = a[i+8]; - smlal(&accum0, bhi, n0); - smlal(&accum8, bhi, n8); - smlal(&accum0, blo, c0); - smlal(&accum8, blo, c8); - - c[i] = accum0 & mask; accum0 >>= 28; - c[i+8] = accum8 & mask; accum8 >>= 28; - i++; - } - { - n0 = a[i]; n8 = a[i+8]; - smlal(&accum0, bhi, c0); - smlal(&accum8, bhi, c8); - smlal(&accum0, blo, n0); - smlal(&accum8, blo, n8); - - c[i] = accum0 & mask; accum0 >>= 28; - c[i+8] = accum8 & mask; accum8 >>= 28; - i++; - } - - accum0 += accum8 + c[8]; - c[8] = accum0 & mask; - c[9] += accum0 >> 28; - - accum8 += c[0]; - c[0] = accum8 & mask; - c[1] += accum8 >> 28; -} - -void -p448_strong_reduce ( - p448_t *a -) { - word_t mask = (1ull<<28)-1; - - /* first, clear high */ - a->limb[8] += a->limb[15]>>28; - a->limb[0] += a->limb[15]>>28; - a->limb[15] &= mask; - - /* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ - - /* compute total_value - p. No need to reduce mod p. */ - - dsword_t scarry = 0; - int i; - for (i=0; i<16; i++) { - scarry = scarry + a->limb[i] - ((i==8)?mask-1:mask); - a->limb[i] = scarry & mask; - scarry >>= 28; - } - - /* uncommon case: it was >= p, so now scarry = 0 and this = x - * common case: it was < p, so now scarry = -1 and this = x - p + 2^448 - * so let's add back in p. will carry back off the top for 2^448. - */ - - assert(is_zero(scarry) | is_zero(scarry+1)); - - word_t scarry_mask = scarry & mask; - dword_t carry = 0; - - /* add it back */ - for (i=0; i<16; i++) { - carry = carry + a->limb[i] + ((i==8)?(scarry_mask&~1):scarry_mask); - a->limb[i] = carry & mask; - carry >>= 28; - } - - assert(is_zero(carry + scarry)); -} - -mask_t -p448_is_zero ( - const struct p448_t *a -) { - struct p448_t b; - p448_copy(&b,a); - p448_strong_reduce(&b); - - uint32_t any = 0; - int i; - for (i=0; i<16; i++) { - any |= b.limb[i]; - } - return is_zero(any); -} - -void -p448_serialize ( - uint8_t *serial, - const struct p448_t *x -) { - int i,j; - p448_t red; - p448_copy(&red, x); - p448_strong_reduce(&red); - for (i=0; i<8; i++) { - uint64_t limb = red.limb[2*i] + (((uint64_t)red.limb[2*i+1])<<28); - for (j=0; j<7; j++) { - serial[7*i+j] = limb; - limb >>= 8; - } - assert(limb == 0); - } -} - -mask_t -p448_deserialize ( - p448_t *x, - const uint8_t serial[56] -) { - int i,j; - for (i=0; i<8; i++) { - uint64_t out = 0; - for (j=0; j<7; j++) { - out |= ((uint64_t)serial[7*i+j])<<(8*j); - } - x->limb[2*i] = out & ((1ull<<28)-1); - x->limb[2*i+1] = out >> 28; - } - - /* Check for reduction. - * - * The idea is to create a variable ge which is all ones (rather, 56 ones) - * if and only if the low $i$ words of $x$ are >= those of p. - * - * Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) - */ - uint32_t ge = -1, mask = (1ull<<28)-1; - for (i=0; i<8; i++) { - ge &= x->limb[i]; - } - - /* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ - ge = (ge & (x->limb[8] + 1)) | is_zero(x->limb[8] ^ mask); - - /* Propagate the rest */ - for (i=9; i<16; i++) { - ge &= x->limb[i]; - } - - return ~is_zero(ge ^ mask); -} diff --git a/src/p448/arch_neon/p448.h b/src/p448/arch_neon/p448.h deleted file mode 100644 index f0406cd..0000000 --- a/src/p448/arch_neon/p448.h +++ /dev/null @@ -1,241 +0,0 @@ -/* Copyright (c) 2014 Cryptography Research, Inc. - * Released under the MIT License. See LICENSE.txt for license information. - */ -#ifndef __P448_H__ -#define __P448_H__ 1 - -#include "word.h" - -#include -#include - -typedef struct p448_t { - uint32_t limb[16]; -} __attribute__((aligned(32))) p448_t; - -#ifdef __cplusplus -extern "C" { -#endif - -static __inline__ void -p448_set_ui ( - p448_t *out, - uint64_t x -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_sub_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_neg_RAW ( - p448_t *out, - const p448_t *a -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_addw ( - p448_t *a, - uint32_t x -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_subw ( - p448_t *a, - uint32_t x -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_copy ( - p448_t *out, - const p448_t *a -) __attribute__((unused,always_inline)); - -static __inline__ void -p448_weak_reduce ( - p448_t *inout -) __attribute__((unused,always_inline)); - -void -p448_strong_reduce ( - p448_t *inout -); - -mask_t -p448_is_zero ( - const p448_t *in -); - -static __inline__ void -p448_bias ( - p448_t *inout, - int amount -) __attribute__((unused,always_inline)); - -void -p448_mul ( - p448_t *__restrict__ out, - const p448_t *a, - const p448_t *b -); - -void -p448_mulw ( - p448_t *__restrict__ out, - const p448_t *a, - uint64_t b -); - -void -p448_sqr ( - p448_t *__restrict__ out, - const p448_t *a -); - -void -p448_serialize ( - uint8_t *serial, - const struct p448_t *x -); - -mask_t -p448_deserialize ( - p448_t *x, - const uint8_t serial[56] -); - -/* -------------- Inline functions begin here -------------- */ - -void -p448_set_ui ( - p448_t *out, - uint64_t x -) { - int i; - out->limb[0] = x & ((1<<28)-1); - out->limb[1] = x>>28; - for (i=2; i<16; i++) { - out->limb[i] = 0; - } -} - -void -p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - out->limb[i] = a->limb[i] + b->limb[i]; - } - */ -} - -void -p448_sub_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - out->limb[i] = a->limb[i] - b->limb[i]; - } - */ -} - -void -p448_neg_RAW ( - p448_t *out, - const p448_t *a -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - out->limb[i] = -a->limb[i]; - } - */ -} - -void -p448_addw ( - p448_t *a, - uint32_t x -) { - a->limb[0] += x; -} - -void -p448_subw ( - p448_t *a, - uint32_t x -) { - a->limb[0] -= x; -} - -void -p448_copy ( - p448_t *out, - const p448_t *a -) { - *out = *a; -} - -void -p448_bias ( - p448_t *a, - int amt -) { - uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt; - uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; - uint32x4_t *aa = (uint32x4_t*) a; - aa[0] += lo; - aa[1] += lo; - aa[2] += hi; - aa[3] += lo; -} - -void -p448_weak_reduce ( - p448_t *a -) { - uint64_t mask = (1ull<<28) - 1; - uint64_t tmp = a->limb[15] >> 28; - int i; - a->limb[8] += tmp; - for (i=15; i>0; i--) { - a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>28); - } - a->limb[0] = (a->limb[0] & mask) + tmp; -} - -#ifdef __cplusplus -}; /* extern "C" */ -#endif - -#endif /* __P448_H__ */ diff --git a/src/p448/arch_neon_experimental/p448.c b/src/p448/arch_neon_experimental/p448.c index 0e2dc5d..6338d24 100644 --- a/src/p448/arch_neon_experimental/p448.c +++ b/src/p448/arch_neon_experimental/p448.c @@ -179,7 +179,7 @@ p448_mul ( VMAC(vmlsl.s32,_a1b,_al2_1,_bl0_1,1) VMAC(vmlal.s32,_a1b,_al0_0,_bs0_0,1) - VOP3(vsra.s64,_a0a,_a0b,"#28") + VOP3(vsra.u64,_a0a,_a0b,"#28") VOP3(vsub.i32,_bs0_1,_bl0_1,_bh0_1) VOP2(vmovn.i64,_a0b_0,_a0b) @@ -190,7 +190,7 @@ p448_mul ( VMAC(vmull.s32,_a0a,_as2_0,_bs2_1,0) VOP2(vmovn.i64,_a0b_1,_a1b) VMAC(vmlal.s32,_a0a,_as2_1,_bs2_0,0) - VOP3(vsra.s64,_a1a,_a1b,"#28") + VOP3(vsra.u64,_a1a,_a1b,"#28") VMAC(vmlal.s32,_a0a,_as0_0,_bh0_1,0) VOP2(vbic.i32,_a0b,"#0xf0000000") VMAC(vmlal.s32,_a0a,_as0_1,_bh0_0,0) @@ -227,7 +227,7 @@ p448_mul ( VMAC(vmlal.s32,_a1b,_al0_0,_bs0_1,1) VMAC(vmlal.s32,_a1b,_al0_1,_bs0_0,1) - VOP3(vsra.s64,_a0a,_a0b,"#28") + VOP3(vsra.u64,_a0a,_a0b,"#28") VOP3(vsub.i32,_bs2_0,_bl2_0,_bh2_0) VOP2(vmovn.i64,_a0b_0,_a0b) @@ -237,7 +237,7 @@ p448_mul ( VMAC(vmull.s32,_a0a,_as2_1,_bs2_1,0) VOP2(vmovn.i64,_a0b_1,_a1b) VMAC(vmlal.s32,_a0a,_as0_0,_bh2_0,0) - VOP3(vsra.s64,_a1a,_a1b,"#28") + VOP3(vsra.u64,_a1a,_a1b,"#28") VMAC(vmlal.s32,_a0a,_as0_1,_bh0_1,0) VOP2(vbic.i32,_a0b,"#0xf0000000") VMAC(vmlal.s32,_a0a,_as2_0,_bh0_0,0) @@ -275,7 +275,7 @@ p448_mul ( VMAC(vmlal.s32,_a1b,_al2_0,_bs0_0,1) VOP3(vsub.i32,_bs2_1,_bl2_1,_bh2_1) - VOP3(vsra.s64,_a0a,_a0b,"#28") + VOP3(vsra.u64,_a0a,_a0b,"#28") VOP2(vmovn.i64,_a0b_0,_a0b) VOP2(vswp,_a1b_1,_a1a_0) @@ -284,7 +284,7 @@ p448_mul ( VMAC(vmull.s32,_a0a,_as0_0,_bh2_1,0) VOP2(vmovn.i64,_a0b_1,_a1b) VMAC(vmlal.s32,_a0a,_as0_1,_bh2_0,0) - VOP3(vsra.s64,_a1a,_a1b,"#28") + VOP3(vsra.u64,_a1a,_a1b,"#28") VMAC(vmlal.s32,_a0a,_as2_0,_bh0_1,0) VOP2(vbic.i32,_a0b,"#0xf0000000") VMAC(vmlal.s32,_a0a,_as2_1,_bh0_0,0) @@ -321,14 +321,14 @@ p448_mul ( VMAC(vmlal.s32,_a1b,_al2_0,_bs0_1,1) VMAC(vmlal.s32,_a1b,_al2_1,_bs0_0,1) - VOP3(vsra.s64,_a0a,_a0b,"#28") + VOP3(vsra.u64,_a0a,_a0b,"#28") VOP2(vmovn.i64,_a0b_0,_a0b) VOP2(vswp,_a1b_1,_a1a_0) VOP3(vadd.i64,_a0a,_a0a,_a1b) VOP2(vmovn.i64,_a0b_1,_a0a) - VOP3(vsra.s64,_a1a,_a0a,"#28") + VOP3(vsra.u64,_a1a,_a0a,"#28") VOP2(vbic.i32,_a0b,"#0xf0000000") @@ -376,43 +376,43 @@ p448_sqr ( __asm__ __volatile__ ( "vld2.32 {"_bl0_0","_bl0_1","_bh0_0","_bh0_1"}, [%[b],:128]!" "\n\t" - VOP3(vadd.i32,_bs0_1,_bl0_1,_bh0_1) - VOP3(vsub.i32,_bs0_0,_bl0_0,_bh0_0) - VOP3(vadd.i32,_as0,_bl0,_bh0) + VOP3(vadd.i32,_bs0_1,_bl0_1,_bh0_1) /* 0 .. 2^30 */ + VOP3(vsub.i32,_bs0_0,_bl0_0,_bh0_0) /* +- 2^29 */ + VOP3(vadd.i32,_as0,_bl0,_bh0) /* 0 .. 2^30 */ "vld2.32 {"_bl2_0","_bl2_1","_bh2_0","_bh2_1"}, [%[b],:128]!" "\n\t" - VOP3(vadd.i32,_bs2,_bl2,_bh2) + VOP3(vadd.i32,_bs2,_bl2,_bh2) /* 0 .. 2^30 */ VOP2(vmov,_as2,_bs2) - VMAC(vqdmull.s32,_a0b,_as0_1,_bs2_1,0) - VMAC(vmlal.s32,_a0b,_as2_0,_bs2_0,0) - VMAC(vmlal.s32,_a0b,_as0_0,_bh0_0,0) + VMAC(vqdmull.s32,_a0b,_as0_1,_bs2_1,0) /* 0 .. 8 * 2^58. danger for vqdmlal is 32 */ + VMAC(vmlal.s32,_a0b,_as2_0,_bs2_0,0) /* 0 .. 12 */ + VMAC(vmlal.s32,_a0b,_as0_0,_bh0_0,0) /* 0 .. 14 */ - VMAC(vqdmull.s32,_a1b,_as0_1,_bs2_1,1) - VMAC(vmlal.s32,_a1b,_as2_0,_bs2_0,1) - VMAC(vmlal.s32,_a1b,_as0_0,_bh0_0,1) + VMAC(vqdmull.s32,_a1b,_as0_1,_bs2_1,1) /* 0 .. 8 */ + VMAC(vmlal.s32,_a1b,_as2_0,_bs2_0,1) /* 0 .. 14 */ + VMAC(vmlal.s32,_a1b,_as0_0,_bh0_0,1) /* 0 .. 16 */ - VOP2(vmov,_a0a,_a0b) - VMAC(vqdmlal.s32,_a0a,_bh0_1,_bh2_1,0) - VMAC(vmlal.s32,_a0a,_bh2_0,_bh2_0,0) - VMAC(vmlal.s32,_a0a,_bh0_0,_bl0_0,0) + VOP2(vmov,_a0a,_a0b) /* 0 .. 14 */ + VMAC(vqdmlal.s32,_a0a,_bh0_1,_bh2_1,0) /* 0 .. 16 */ + VMAC(vmlal.s32,_a0a,_bh2_0,_bh2_0,0) /* 0 .. 17 */ + VMAC(vmlal.s32,_a0a,_bh0_0,_bl0_0,0) /* 0 .. 18 */ - VMAC(vqdmlsl.s32,_a0b,_bl0_1,_bl2_1,0) - VMAC(vmlsl.s32,_a0b,_bl2_0,_bl2_0,0) - VMAC(vmlal.s32,_a0b,_bl0_0,_bs0_0,0) + VMAC(vqdmlsl.s32,_a0b,_bl0_1,_bl2_1,0) /*-2 .. 14 */ + VMAC(vmlsl.s32,_a0b,_bl2_0,_bl2_0,0) /*-3 .. 14 */ + VMAC(vmlal.s32,_a0b,_bl0_0,_bs0_0,0) /*-4 .. 15 */ VOP2(vmov,_a1a,_a1b) - VMAC(vqdmlal.s32,_a1a,_bh0_1,_bh2_1,1) - VMAC(vmlal.s32,_a1a,_bh2_0,_bh2_0,1) - VMAC(vmlal.s32,_a1a,_bh0_0,_bl0_0,1) + VMAC(vqdmlal.s32,_a1a,_bh0_1,_bh2_1,1) /* 0 .. 18 */ + VMAC(vmlal.s32,_a1a,_bh2_0,_bh2_0,1) /* 0 .. 19 */ + VMAC(vmlal.s32,_a1a,_bh0_0,_bl0_0,1) /* 0 .. 20 */ VOP2(vswp,_a0b_1,_a0a_0) - VMAC(vqdmlsl.s32,_a1b,_bl0_1,_bl2_1,1) - VMAC(vmlsl.s32,_a1b,_bl2_0,_bl2_0,1) - VMAC(vmlal.s32,_a1b,_bl0_0,_bs0_0,1) + VMAC(vqdmlsl.s32,_a1b,_bl0_1,_bl2_1,1) /*-2 .. 16 */ + VMAC(vmlsl.s32,_a1b,_bl2_0,_bl2_0,1) /*-3 .. 16 */ + VMAC(vmlal.s32,_a1b,_bl0_0,_bs0_0,1) /*-4 .. 17 */ - VOP3(vsra.s64,_a0a,_a0b,"#28") + VOP3(vsra.u64,_a0a,_a0b,"#28") VOP3(vsub.i32,_bs0_1,_bl0_1,_bh0_1) VOP2(vmovn.i64,_a0b_0,_a0b) @@ -420,35 +420,35 @@ p448_sqr ( VOP3(vadd.i64,_a1b,_a0a,_a1b) - VMAC(vqdmull.s32,_a0a,_as2_0,_bs2_1,0) + VMAC(vqdmull.s32,_a0a,_as2_0,_bs2_1,0) /* 0 .. 8 */ VOP2(vmovn.i64,_a0b_1,_a1b) - VOP3(vsra.s64,_a1a,_a1b,"#28") - VMAC(vqdmlal.s32,_a0a,_as0_0,_bh0_1,0) + VOP3(vsra.u64,_a1a,_a1b,"#28") + VMAC(vqdmlal.s32,_a0a,_as0_0,_bh0_1,0) /* 0 .. 12 */ VOP2(vbic.i32,_a0b,"#0xf0000000") "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t" - VMAC(vqdmull.s32,_a1b,_as2_0,_bs2_1,1) - VMAC(vqdmlal.s32,_a1b,_as0_0,_bh0_1,1) + VMAC(vqdmull.s32,_a1b,_as2_0,_bs2_1,1) /* 0 .. 8 */ + VMAC(vqdmlal.s32,_a1b,_as0_0,_bh0_1,1) /* 0 .. 12 */ - VOP2(vmov,_a0b_1,_a0a_1) - VOP3(vadd.i64,_a0b_0,_a0a_0,_a1a_0) - VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1) - VMAC(vqdmlal.s32,_a0a,_bh2_0,_bh2_1,0) - VMAC(vqdmlal.s32,_a0a,_bh0_0,_bl0_1,0) + VOP2(vmov,_a0b,_a0a) /* 0 .. 12 */ + VMAC(vqdmlal.s32,_a0a,_bh2_0,_bh2_1,0) /* 0 .. 14 */ + VMAC(vqdmlal.s32,_a0a,_bh0_0,_bl0_1,0) /* 0 .. 16 */ - VMAC(vqdmlsl.s32,_a0b,_bl2_0,_bl2_1,0) - VMAC(vqdmlal.s32,_a0b,_bl0_0,_bs0_1,0) + VMAC(vqdmlsl.s32,_a0b,_bl2_0,_bl2_1,0) /*-2 .. 12 */ + VMAC(vqdmlal.s32,_a0b,_bl0_0,_bs0_1,0) /*-4 .. 14 */ + VOP3(vadd.i64,_a0a_0,_a0a_0,_a1a_1) + VOP3(vadd.i64,_a0b_0,_a0b_0,_a1a_0) - VOP2(vmov,_a1a,_a1b) - VMAC(vqdmlal.s32,_a1a,_bh2_0,_bh2_1,1) - VMAC(vqdmlal.s32,_a1a,_bh0_0,_bl0_1,1) + VOP2(vmov,_a1a,_a1b) /* 0 .. 12 */ + VMAC(vqdmlal.s32,_a1a,_bh2_0,_bh2_1,1) /* 0 .. 14 */ + VMAC(vqdmlal.s32,_a1a,_bh0_0,_bl0_1,1) /* 0 .. 16 */ VOP2(vswp,_a0b_1,_a0a_0) - VMAC(vqdmlsl.s32,_a1b,_bl2_0,_bl2_1,1) - VMAC(vqdmlal.s32,_a1b,_bl0_0,_bs0_1,1) + VMAC(vqdmlsl.s32,_a1b,_bl2_0,_bl2_1,1) /*-2 .. 12 */ + VMAC(vqdmlal.s32,_a1b,_bl0_0,_bs0_1,1) /*-4 .. 14 */ - VOP3(vsra.s64,_a0a,_a0b,"#28") + VOP3(vsra.u64,_a0a,_a0b,"#28") VOP3(vsub.i32,_bs2_0,_bl2_0,_bh2_0) VOP2(vmovn.i64,_a0b_0,_a0b) @@ -458,7 +458,7 @@ p448_sqr ( VMAC(vmull.s32,_a0a,_as2_1,_bs2_1,0) VOP2(vmovn.i64,_a0b_1,_a1b) VMAC(vqdmlal.s32,_a0a,_as0_0,_bh2_0,0) - VOP3(vsra.s64,_a1a,_a1b,"#28") + VOP3(vsra.u64,_a1a,_a1b,"#28") VMAC(vmlal.s32,_a0a,_as0_1,_bh0_1,0) VOP2(vbic.i32,_a0b,"#0xf0000000") "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t" @@ -490,7 +490,7 @@ p448_sqr ( VMAC(vmlal.s32,_a1b,_bl0_1,_bs0_1,1) VOP3(vsub.i32,_bs2_1,_bl2_1,_bh2_1) - VOP3(vsra.s64,_a0a,_a0b,"#28") + VOP3(vsra.u64,_a0a,_a0b,"#28") VOP2(vmovn.i64,_a0b_0,_a0b) VOP2(vswp,_a1b_1,_a1a_0) @@ -498,7 +498,7 @@ p448_sqr ( VMAC(vqdmull.s32,_a0a,_as0_0,_bh2_1,0) VOP2(vmovn.i64,_a0b_1,_a1b) - VOP3(vsra.s64,_a1a,_a1b,"#28") + VOP3(vsra.u64,_a1a,_a1b,"#28") VMAC(vqdmlal.s32,_a0a,_as2_0,_bh0_1,0) VOP2(vbic.i32,_a0b,"#0xf0000000") "vstmia %[c]!, {"_a0b_0", "_a0b_1"}" "\n\t" @@ -524,14 +524,14 @@ p448_sqr ( VMAC(vqdmlal.s32,_a1b,_bl0_0,_bs2_1,1) VMAC(vqdmlal.s32,_a1b,_bl2_0,_bs0_1,1) - VOP3(vsra.s64,_a0a,_a0b,"#28") + VOP3(vsra.u64,_a0a,_a0b,"#28") VOP2(vmovn.i64,_a0b_0,_a0b) VOP2(vswp,_a1b_1,_a1a_0) VOP3(vadd.i64,_a0a,_a0a,_a1b) VOP2(vmovn.i64,_a0b_1,_a0a) - VOP3(vsra.s64,_a1a,_a0a,"#28") + VOP3(vsra.u64,_a1a,_a0a,"#28") VOP2(vbic.i32,_a0b,"#0xf0000000") diff --git a/test/test_arithmetic.c b/test/test_arithmetic.c index d1bc3f2..ed88f66 100644 --- a/test/test_arithmetic.c +++ b/test/test_arithmetic.c @@ -132,12 +132,14 @@ static mask_t test_mul_sqr ( const mpz_t y, word_t word ) { - field_a_t xx,yy,tt; - mpz_t t; + ANALYZE_THIS_ROUTINE_CAREFULLY; + field_a_t xx,yy,tt,zz; + mpz_t t, z; mask_t succ = MASK_SUCCESS; succ = mpz_to_field(xx,x); succ &= mpz_to_field(yy,y); mpz_init(t); + mpz_init(z); field_mul(tt,xx,yy); mpz_mul(t,x,y); @@ -150,17 +152,26 @@ static mask_t test_mul_sqr ( field_sqr(tt,xx); mpz_mul(t,x,x); succ &= field_assert_eq_gmp("sqrx",xx,yy,tt,t,0,1.1); - + field_sqr(tt,yy); mpz_mul(t,y,y); succ &= field_assert_eq_gmp("sqy",xx,yy,tt,t,0,1.1); + field_add_nr(zz,xx,xx); + mpz_add(z,x,x); + mpz_mul(t,z,z); + field_mul(tt,zz,zz); + succ &= field_assert_eq_gmp("msr4",xx,yy,tt,t,0,1.1); + field_sqr(tt,zz); + succ &= field_assert_eq_gmp("sqr4",xx,yy,tt,t,0,1.1); + if (!succ) { field_print(" x", xx); field_print(" y", yy); } mpz_clear(t); + mpz_clear(z); return succ; } From 9ce5cbf53ca27a11f18b07f80b8c23ec938f0336 Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Sun, 22 Mar 2015 18:47:14 -0700 Subject: [PATCH 15/15] perf improvement in keygen, sign --- src/include/constant_time.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/include/constant_time.h b/src/include/constant_time.h index 405c2f5..b114146 100644 --- a/src/include/constant_time.h +++ b/src/include/constant_time.h @@ -12,6 +12,7 @@ #define __CONSTANT_TIME_H__ 1 #include "word.h" +#include /* * Constant-time operations on hopefully-compile-time-sized memory @@ -148,7 +149,7 @@ constant_time_lookup ( const unsigned char *table = (const unsigned char *)table_; word_t j,k; - really_memset(out, 0, elem_bytes); + memset(out, 0, elem_bytes); for (j=0; j