@@ -1,269 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#include "barrett_field.h" | |||||
#include <assert.h> | |||||
word_t | |||||
add_nr_ext_packed( | |||||
word_t *out, | |||||
const word_t *a, | |||||
int nwords_a, | |||||
const word_t *c, | |||||
int nwords_c, | |||||
word_t mask | |||||
) { | |||||
int i; | |||||
dword_t carry = 0; | |||||
for (i=0; i<nwords_c; i++) { | |||||
out[i] = carry = carry + a[i] + (c[i]&mask); | |||||
carry >>= WORD_BITS; | |||||
} | |||||
for (; i<nwords_a; i++) { | |||||
out[i] = carry = carry + a[i]; | |||||
carry >>= WORD_BITS; | |||||
} | |||||
return carry; | |||||
} | |||||
static __inline__ word_t | |||||
add_nr_packed( | |||||
word_t *a, | |||||
const word_t *c, | |||||
int nwords | |||||
) { | |||||
int i; | |||||
dword_t carry = 0; | |||||
for (i=0; i<nwords; i++) { | |||||
a[i] = carry = carry + a[i] + c[i]; | |||||
carry >>= WORD_BITS; | |||||
} | |||||
return carry; | |||||
} | |||||
static __inline__ word_t | |||||
sub_nr_packed( | |||||
word_t *a, | |||||
const word_t *c, | |||||
int nwords | |||||
) { | |||||
int i; | |||||
dsword_t carry = 0; | |||||
for (i=0; i<nwords; i++) { | |||||
a[i] = carry = carry + a[i] - c[i]; | |||||
carry >>= WORD_BITS; | |||||
} | |||||
return carry; | |||||
} | |||||
word_t | |||||
sub_nr_ext_packed( | |||||
word_t *out, | |||||
const word_t *a, | |||||
int nwords_a, | |||||
const word_t *c, | |||||
int nwords_c, | |||||
word_t mask | |||||
) { | |||||
int i; | |||||
dsword_t carry = 0; | |||||
for (i=0; i<nwords_c; i++) { | |||||
out[i] = carry = carry + a[i] - (c[i]&mask); | |||||
carry >>= WORD_BITS; | |||||
} | |||||
for (; i<nwords_a; i++) { | |||||
out[i] = carry = carry + a[i]; | |||||
carry >>= WORD_BITS; | |||||
} | |||||
return carry; | |||||
} | |||||
static word_t | |||||
widemac( | |||||
word_t *accum, | |||||
int nwords_accum, | |||||
const word_t *mier, | |||||
int nwords_mier, | |||||
word_t mand, | |||||
word_t carry | |||||
) { | |||||
int i; | |||||
assert(nwords_accum >= nwords_mier); | |||||
for (i=0; i<nwords_mier; i++) { | |||||
/* UMAAL chain for the wordy part of p */ | |||||
dword_t product = ((dword_t)mand) * mier[i]; | |||||
product += accum[i]; | |||||
product += carry; | |||||
accum[i] = product; | |||||
carry = product >> WORD_BITS; | |||||
} | |||||
for (; i<nwords_accum; i++) { | |||||
dword_t sum = ((dword_t)carry) + accum[i]; | |||||
accum[i] = sum; | |||||
carry = sum >> WORD_BITS; | |||||
} | |||||
return carry; | |||||
} | |||||
void | |||||
barrett_negate ( | |||||
word_t *a, | |||||
int nwords_a, | |||||
const word_t *p_lo, | |||||
int nwords_p, | |||||
int nwords_lo, | |||||
int p_shift | |||||
) { | |||||
int i; | |||||
dsword_t carry = 0; | |||||
barrett_reduce(a,nwords_a,0,p_lo,nwords_p,nwords_lo,p_shift); | |||||
/* Have p = 2^big - p_lo. Want p - a = 2^big - p_lo - a */ | |||||
for (i=0; i<nwords_lo; i++) { | |||||
a[i] = carry = carry - p_lo[i] - a[i]; | |||||
carry >>= WORD_BITS; | |||||
} | |||||
for (; i<nwords_p; i++) { | |||||
a[i] = carry = carry - a[i]; | |||||
if (i<nwords_p-1) { | |||||
carry >>= WORD_BITS; | |||||
} | |||||
} | |||||
a[nwords_p-1] = carry = carry + (((word_t)1) << p_shift); | |||||
for (; i<nwords_a; i++) { | |||||
assert(!a[i]); | |||||
} | |||||
assert(!(carry>>64)); | |||||
} | |||||
void | |||||
barrett_reduce( | |||||
word_t *a, | |||||
int nwords_a, | |||||
word_t a_carry, | |||||
const word_t *p_lo, | |||||
int nwords_p, | |||||
int nwords_lo, | |||||
int p_shift | |||||
) { | |||||
/* TODO: non 2^k-c primes. */ | |||||
int repeat, nwords_left_in_a=nwords_a; | |||||
/* TODO: is there a point to this a_carry business? */ | |||||
assert(a_carry < ((word_t)1)<<p_shift && nwords_a >= nwords_p); | |||||
for (; nwords_left_in_a >= nwords_p; nwords_left_in_a--) { | |||||
for (repeat=0; repeat<2; repeat++) { | |||||
/* PERF: surely a more careful implementation could | |||||
* avoid this double round | |||||
*/ | |||||
word_t mand = a[nwords_left_in_a-1] >> p_shift; | |||||
a[nwords_left_in_a-1] &= (((word_t)1)<<p_shift)-1; | |||||
if (p_shift && !repeat) { | |||||
/* collect high bits when there are any */ | |||||
if (nwords_left_in_a < nwords_a) { | |||||
mand |= a[nwords_left_in_a] << (WORD_BITS-p_shift); | |||||
a[nwords_left_in_a] = 0; | |||||
} else { | |||||
mand |= a_carry << (WORD_BITS-p_shift); | |||||
} | |||||
} | |||||
word_t carry = widemac(a+nwords_left_in_a-nwords_p, nwords_p, p_lo, nwords_lo, mand, 0); | |||||
assert(!carry); | |||||
(void)carry; | |||||
} | |||||
} | |||||
assert(nwords_left_in_a == nwords_p-1); | |||||
/* OK, but it still isn't reduced. Add and subtract p_lo. */ | |||||
word_t cout = add_nr_ext_packed(a,a,nwords_p,p_lo,nwords_lo,-1); | |||||
if (p_shift) { | |||||
cout = (cout<<(WORD_BITS-p_shift)) + (a[nwords_p-1]>>p_shift); | |||||
a[nwords_p-1] &= (((word_t)1)<<p_shift)-1; | |||||
} | |||||
/* mask = carry-1: if no carry then do sub, otherwise don't */ | |||||
sub_nr_ext_packed(a,a,nwords_p,p_lo,nwords_lo,cout-1); | |||||
} | |||||
/* PERF: This function is horribly slow. Enough to break 1%. */ | |||||
void | |||||
barrett_mul_or_mac( | |||||
word_t *accum, | |||||
int nwords_accum, | |||||
const word_t *a, | |||||
int nwords_a, | |||||
const word_t *b, | |||||
int nwords_b, | |||||
const word_t *p_lo, | |||||
int nwords_p, | |||||
int nwords_lo, | |||||
int p_shift, | |||||
mask_t doMac | |||||
) { | |||||
assert(nwords_accum >= nwords_p); | |||||
/* nwords_tmp = max(nwords_a + 1, nwords_p + 1, nwords_accum if doMac); */ | |||||
int nwords_tmp = (nwords_a > nwords_p) ? nwords_a : nwords_p; | |||||
nwords_tmp++; | |||||
if (nwords_tmp < nwords_accum && doMac) | |||||
nwords_tmp = nwords_accum; | |||||
word_t tmp[nwords_tmp]; | |||||
int bpos, i; | |||||
for (i=0; i<nwords_tmp; i++) { | |||||
tmp[i] = 0; | |||||
} | |||||
for (bpos=nwords_b-1; bpos >= 0; bpos--) { | |||||
/* Invariant at the beginning of the loop: the high word is unused. */ | |||||
assert(tmp[nwords_tmp-1] == 0); | |||||
/* shift up */ | |||||
for (i=nwords_tmp-2; i>=0; i--) { | |||||
tmp[i+1] = tmp[i]; | |||||
} | |||||
tmp[0] = 0; | |||||
/* mac and reduce */ | |||||
word_t carry = widemac(tmp, nwords_tmp, a, nwords_a, b[bpos], 0); | |||||
/* the mac can't carry, because nwords_tmp >= nwords_a+1 and its high word is clear */ | |||||
assert(!carry); | |||||
barrett_reduce(tmp, nwords_tmp, carry, p_lo, nwords_p, nwords_lo, p_shift); | |||||
/* at this point, the number of words used is nwords_p <= nwords_tmp-1, | |||||
* so the high word is again clear */ | |||||
} | |||||
if (doMac) { | |||||
word_t cout = add_nr_packed(tmp, accum, nwords_accum); | |||||
barrett_reduce(tmp, nwords_tmp, cout, p_lo, nwords_p, nwords_lo, p_shift); | |||||
} | |||||
for (i=0; i<nwords_tmp && i<nwords_accum; i++) { | |||||
accum[i] = tmp[i]; | |||||
} | |||||
for (; i<nwords_tmp; i++) { | |||||
assert(tmp[i] == 0); | |||||
} | |||||
for (; i<nwords_accum; i++) { | |||||
accum[i] = 0; | |||||
} | |||||
} |
@@ -1,126 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#ifndef __BARRETT_FIELD_H__ | |||||
#define __BARRETT_FIELD_H__ 1 | |||||
#include "word.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
void | |||||
barrett_reduce( | |||||
word_t *a, | |||||
int nwords_a, | |||||
word_t a_carry, | |||||
const word_t *p_lo, | |||||
int nwords_p, | |||||
int nwords_lo, | |||||
int p_shift | |||||
); | |||||
/* | |||||
* out = a+(c&mask), with carry returned. | |||||
* #out must equal #a (HACK?) | |||||
*/ | |||||
word_t | |||||
add_nr_ext_packed( | |||||
word_t *out, | |||||
const word_t *a, | |||||
int nwords_a, | |||||
const word_t *c, | |||||
int nwords_c, | |||||
word_t mask | |||||
); | |||||
word_t | |||||
sub_nr_ext_packed( | |||||
word_t *out, | |||||
const word_t *a, | |||||
int nwords_a, | |||||
const word_t *c, | |||||
int nwords_c, | |||||
word_t mask | |||||
); | |||||
void | |||||
barrett_negate ( | |||||
word_t *a, | |||||
int nwords_a, | |||||
const word_t *p_lo, | |||||
int nwords_p, | |||||
int nwords_lo, | |||||
int p_shift | |||||
); | |||||
/* | |||||
* If doMac, accum = accum + a*b mod p. | |||||
* Otherwise, accum = a*b mod p. | |||||
* | |||||
* This function is not __restrict__; you may pass accum, | |||||
* a, b, etc all from the same location. | |||||
*/ | |||||
void | |||||
barrett_mul_or_mac( | |||||
word_t *accum, | |||||
int nwords_accum, | |||||
const word_t *a, | |||||
int nwords_a, | |||||
const word_t *b, | |||||
int nwords_b, | |||||
const word_t *p_lo, | |||||
int nwords_p, | |||||
int nwords_lo, | |||||
int p_shift, | |||||
mask_t doMac | |||||
); | |||||
static inline void | |||||
barrett_mul( | |||||
word_t *out, | |||||
int nwords_out, | |||||
const word_t *a, | |||||
int nwords_a, | |||||
const word_t *b, | |||||
int nwords_b, | |||||
const word_t *p_lo, | |||||
int nwords_p, | |||||
int nwords_lo, | |||||
int p_shift | |||||
) { | |||||
barrett_mul_or_mac(out,nwords_out,a,nwords_a,b,nwords_b,p_lo,nwords_p,nwords_lo,p_shift,0); | |||||
} | |||||
static inline void | |||||
barrett_mac( | |||||
word_t *out, | |||||
int nwords_out, | |||||
const word_t *a, | |||||
int nwords_a, | |||||
const word_t *b, | |||||
int nwords_b, | |||||
const word_t *p_lo, | |||||
int nwords_p, | |||||
int nwords_lo, | |||||
int p_shift | |||||
) { | |||||
barrett_mul_or_mac(out,nwords_out,a,nwords_a,b,nwords_b,p_lo,nwords_p,nwords_lo,p_shift,-1); | |||||
} | |||||
#ifdef __cplusplus | |||||
}; /* extern "C" */ | |||||
#endif | |||||
#endif /* __BARRETT_FIELD_H__ */ |
@@ -1,827 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#include <sys/time.h> | |||||
#include <sys/types.h> | |||||
#include <stdlib.h> | |||||
#include <stdio.h> | |||||
#include <memory.h> | |||||
#include "p448.h" | |||||
#include "ec_point.h" | |||||
#include "scalarmul.h" | |||||
#include "barrett_field.h" | |||||
#include "crandom.h" | |||||
#include "goldilocks.h" | |||||
#include "sha512.h" | |||||
word_t q448_lo[4] = { | |||||
0xdc873d6d54a7bb0dull, | |||||
0xde933d8d723a70aaull, | |||||
0x3bb124b65129c96full, | |||||
0x000000008335dc16ull | |||||
}; | |||||
double now() { | |||||
struct timeval tv; | |||||
gettimeofday(&tv, NULL); | |||||
return tv.tv_sec + tv.tv_usec/1000000.0; | |||||
} | |||||
void p448_randomize( struct crandom_state_t *crand, struct p448_t *a ) { | |||||
crandom_generate(crand, (unsigned char *)a, sizeof(*a)); | |||||
p448_strong_reduce(a); | |||||
} | |||||
void q448_randomize( struct crandom_state_t *crand, uint64_t sk[7] ) { | |||||
crandom_generate(crand, (unsigned char *)sk, sizeof(uint64_t)*7); | |||||
} | |||||
void p448_print( const char *descr, const struct p448_t *a ) { | |||||
p448_t b; | |||||
p448_copy(&b, a); | |||||
p448_strong_reduce(&b); | |||||
int j; | |||||
printf("%s = 0x", descr); | |||||
for (j=7; j>=0; j--) { | |||||
printf("%014llx", (unsigned long long)b.limb[j]); | |||||
} | |||||
printf("\n"); | |||||
} | |||||
void p448_print_full( const char *descr, const struct p448_t *a ) { | |||||
int j; | |||||
printf("%s = 0x", descr); | |||||
for (j=7; j>=0; j--) { | |||||
printf("%02llx_%014llx ", a->limb[j]>>56, (unsigned long long)a->limb[j]&(1ull<<56)-1); | |||||
} | |||||
printf("\n"); | |||||
} | |||||
void q448_print( const char *descr, const uint64_t secret[7] ) { | |||||
int j; | |||||
printf("%s = 0x", descr); | |||||
for (j=6; j>=0; j--) { | |||||
printf("%016llx", (unsigned long long)secret[j]); | |||||
} | |||||
printf("\n"); | |||||
} | |||||
int main(int argc, char **argv) { | |||||
(void)argc; | |||||
(void)argv; | |||||
struct tw_extensible_t ext; | |||||
struct extensible_t exta; | |||||
struct tw_niels_t niels; | |||||
struct tw_pniels_t pniels; | |||||
struct affine_t affine; | |||||
struct montgomery_t mb; | |||||
struct p448_t a,b,c,d; | |||||
double when; | |||||
int i,j; | |||||
/* Bad randomness so we can debug. */ | |||||
char initial_seed[32]; | |||||
for (i=0; i<32; i++) initial_seed[i] = i; | |||||
struct crandom_state_t crand; | |||||
crandom_init_from_buffer(&crand, initial_seed); | |||||
uint64_t sk[7],tk[7]; | |||||
q448_randomize(&crand, sk); | |||||
when = now(); | |||||
for (i=0; i<10000000; i++) { | |||||
p448_mul(&c, &b, &a); | |||||
} | |||||
when = now() - when; | |||||
printf("mul: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<10000000; i++) { | |||||
p448_sqr(&c, &a); | |||||
} | |||||
when = now() - when; | |||||
printf("sqr: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<5000000; i++) { | |||||
p448_mul(&c, &b, &a); | |||||
p448_mul(&a, &b, &c); | |||||
} | |||||
when = now() - when; | |||||
printf("mul dep: %5.1fns\n", when * 1e9 / i / 2); | |||||
when = now(); | |||||
for (i=0; i<10000000; i++) { | |||||
p448_mulw(&c, &b, 1234562); | |||||
} | |||||
when = now() - when; | |||||
printf("mulw: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<100000; i++) { | |||||
p448_randomize(&crand, &a); | |||||
} | |||||
when = now() - when; | |||||
printf("rand448: %5.1fns\n", when * 1e9 / i); | |||||
struct sha512_ctx_t sha; | |||||
uint8_t hashout[128]; | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
sha512_init(&sha); | |||||
sha512_final(&sha, hashout); | |||||
} | |||||
when = now() - when; | |||||
printf("sha512 1blk: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
sha512_update(&sha, hashout, 128); | |||||
} | |||||
when = now() - when; | |||||
printf("sha512 blk: %5.1fns (%0.2f MB/s)\n", when * 1e9 / i, 128*i/when/1e6); | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
p448_isr(&c, &a); | |||||
} | |||||
when = now() - when; | |||||
printf("isr auto: %5.1fµs\n", when * 1e6 / i); | |||||
for (i=0; i<100; i++) { | |||||
p448_randomize(&crand, &a); | |||||
p448_isr(&d,&a); | |||||
p448_sqr(&b,&d); | |||||
p448_mul(&c,&b,&a); | |||||
p448_sqr(&b,&c); | |||||
p448_subw(&b,1); | |||||
p448_bias(&b,1); | |||||
if (!p448_is_zero(&b)) { | |||||
printf("ISR validation failure!\n"); | |||||
p448_print("a", &a); | |||||
p448_print("s", &d); | |||||
} | |||||
} | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
elligator_2s_inject(&affine, &a); | |||||
} | |||||
when = now() - when; | |||||
printf("elligator: %5.1fµs\n", when * 1e6 / i); | |||||
for (i=0; i<100; i++) { | |||||
p448_randomize(&crand, &a); | |||||
elligator_2s_inject(&affine, &a); | |||||
if (!validate_affine(&affine)) { | |||||
printf("Elligator validation failure!\n"); | |||||
p448_print("a", &a); | |||||
p448_print("x", &affine.x); | |||||
p448_print("y", &affine.y); | |||||
} | |||||
} | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
deserialize_affine(&affine, &a); | |||||
} | |||||
when = now() - when; | |||||
printf("decompress: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
serialize_extensible(&a, &exta); | |||||
} | |||||
when = now() - when; | |||||
printf("compress: %5.1fµs\n", when * 1e6 / i); | |||||
int goods = 0; | |||||
for (i=0; i<100; i++) { | |||||
p448_randomize(&crand, &a); | |||||
mask_t good = deserialize_affine(&affine, &a); | |||||
if (good & !validate_affine(&affine)) { | |||||
printf("Deserialize validation failure!\n"); | |||||
p448_print("a", &a); | |||||
p448_print("x", &affine.x); | |||||
p448_print("y", &affine.y); | |||||
} else if (good) { | |||||
goods++; | |||||
convert_affine_to_extensible(&exta,&affine); | |||||
serialize_extensible(&b, &exta); | |||||
p448_sub(&c,&b,&a); | |||||
p448_bias(&c,2); | |||||
if (!p448_is_zero(&c)) { | |||||
printf("Reserialize validation failure!\n"); | |||||
p448_print("a", &a); | |||||
p448_print("x", &affine.x); | |||||
p448_print("y", &affine.y); | |||||
deserialize_affine(&affine, &b); | |||||
p448_print("b", &b); | |||||
p448_print("x", &affine.x); | |||||
p448_print("y", &affine.y); | |||||
printf("\n"); | |||||
} | |||||
} | |||||
} | |||||
if (goods<i/3) { | |||||
printf("Deserialization validation failure! Deserialized %d/%d points\n", goods, i); | |||||
} | |||||
uint64_t lsk[12]; | |||||
for (i=0;i<10; i++) { | |||||
for (j=11; j>=0; j--) { | |||||
lsk[j] = random(); | |||||
lsk[j] = lsk[j]<<22 ^ random(); | |||||
lsk[j] = lsk[j]<<22 ^ random(); | |||||
} | |||||
} | |||||
when = now(); | |||||
for (i=0; i<1000000; i++) { | |||||
barrett_reduce(lsk,12,0,q448_lo,7,4,62); | |||||
} | |||||
when = now() - when; | |||||
printf("barrett red: %5.1fns\n", when * 1e9 / i); | |||||
// | |||||
// when = now(); | |||||
// for (i=0; i<100000; i++) { | |||||
// barrett_mac(lsk,7,lsk,7,lsk,7,q448_lo,7,4,62); | |||||
// } | |||||
// when = now() - when; | |||||
// printf("barrett mac: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<1000000; i++) { | |||||
add_tw_niels_to_tw_extensible(&ext, &niels); | |||||
} | |||||
when = now() - when; | |||||
printf("exti+niels: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<1000000; i++) { | |||||
add_tw_pniels_to_tw_extensible(&ext, &pniels); | |||||
} | |||||
when = now() - when; | |||||
printf("exti+pniels: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<1000000; i++) { | |||||
double_tw_extensible(&ext); | |||||
} | |||||
when = now() - when; | |||||
printf("exti dbl: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<1000000; i++) { | |||||
untwist_and_double(&exta, &ext); | |||||
} | |||||
when = now() - when; | |||||
printf("i->a isog: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<1000000; i++) { | |||||
twist_and_double(&ext, &exta); | |||||
} | |||||
when = now() - when; | |||||
printf("a->i isog: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<1000000; i++) { | |||||
montgomery_step(&mb); | |||||
} | |||||
when = now() - when; | |||||
printf("monty step: %5.1fns\n", when * 1e9 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
p448_montgomery_ladder(&a,&b,sk,448,0); | |||||
} | |||||
when = now() - when; | |||||
printf("full ladder: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
edwards_scalar_multiply(&ext,sk); | |||||
} | |||||
when = now() - when; | |||||
printf("edwards smz: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
edwards_scalar_multiply_vlook(&ext,sk); | |||||
untwist_and_double_and_serialize(&a,&ext); | |||||
} | |||||
when = now() - when; | |||||
printf("edwards svl: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
q448_randomize(&crand, sk); | |||||
edwards_scalar_multiply_vt(&ext,sk); | |||||
} | |||||
when = now() - when; | |||||
printf("edwards vtm: %5.1fµs\n", when * 1e6 / i); | |||||
struct tw_niels_t wnaft[1<<6]; | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
precompute_for_wnaf(wnaft,&ext,6); | |||||
} | |||||
when = now() - when; | |||||
printf("wnaf6 pre: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
q448_randomize(&crand, sk); | |||||
edwards_scalar_multiply_vt_pre(&ext,sk,wnaft,6); | |||||
} | |||||
when = now() - when; | |||||
printf("edwards vt6: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
precompute_for_wnaf(wnaft,&ext,4); | |||||
} | |||||
when = now() - when; | |||||
printf("wnaf4 pre: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
q448_randomize(&crand, sk); | |||||
edwards_scalar_multiply_vt_pre(&ext,sk,wnaft,4); | |||||
} | |||||
when = now() - when; | |||||
printf("edwards vt4: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
precompute_for_wnaf(wnaft,&ext,5); | |||||
} | |||||
when = now() - when; | |||||
printf("wnaf5 pre: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
q448_randomize(&crand, sk); | |||||
edwards_scalar_multiply_vt_pre(&ext,sk,wnaft,5); | |||||
} | |||||
when = now() - when; | |||||
printf("edwards vt5: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
q448_randomize(&crand, sk); | |||||
q448_randomize(&crand, tk); | |||||
edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5); | |||||
} | |||||
when = now() - when; | |||||
printf("vt vf combo: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
deserialize_affine(&affine, &a); | |||||
convert_affine_to_extensible(&exta,&affine); | |||||
twist_and_double(&ext,&exta); | |||||
edwards_scalar_multiply(&ext,sk); | |||||
untwist_and_double(&exta,&ext); | |||||
serialize_extensible(&b, &exta); | |||||
} | |||||
when = now() - when; | |||||
printf("edwards sm: %5.1fµs\n", when * 1e6 / i); | |||||
struct tw_niels_t table[80] __attribute__((aligned(32))); | |||||
while (1) { | |||||
p448_randomize(&crand, &a); | |||||
if (deserialize_affine(&affine, &a)) break; | |||||
} | |||||
convert_affine_to_extensible(&exta,&affine); | |||||
twist_and_double(&ext,&exta); | |||||
when = now(); | |||||
for (i=0; i<1000; i++) { | |||||
precompute_for_combs(table, &ext, 5, 5, 18); | |||||
} | |||||
when = now() - when; | |||||
printf("pre(5,5,18): %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
edwards_comb(&ext, sk, table, 5, 5, 18); | |||||
} | |||||
when = now() - when; | |||||
printf("com(5,5,18): %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
edwards_comb(&ext, sk, table, 3, 5, 30); | |||||
} | |||||
when = now() - when; | |||||
printf("com(3,5,30): %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
edwards_comb(&ext, sk, table, 8, 4, 14); | |||||
} | |||||
when = now() - when; | |||||
printf("com(4,4,28): %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
q448_randomize(&crand, sk); | |||||
edwards_comb(&ext, sk, table, 5, 5, 18); | |||||
untwist_and_double(&exta,&ext); | |||||
serialize_extensible(&b, &exta); | |||||
} | |||||
when = now() - when; | |||||
printf("keygen: %5.1fµs\n", when * 1e6 / i); | |||||
printf("\nGoldilocks:\n"); | |||||
int res = goldilocks_init(); | |||||
assert(!res); | |||||
struct goldilocks_public_key_t gpk,hpk; | |||||
struct goldilocks_private_key_t gsk,hsk; | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
if (i&1) { | |||||
res = goldilocks_keygen(&gsk,&gpk); | |||||
} else { | |||||
res = goldilocks_keygen(&hsk,&hpk); | |||||
} | |||||
assert(!res); | |||||
} | |||||
when = now() - when; | |||||
printf("keygen: %5.1fµs\n", when * 1e6 / i); | |||||
uint8_t ss1[64],ss2[64]; | |||||
int gres1,gres2; | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
if (i&1) { | |||||
gres1 = goldilocks_shared_secret(ss1,&gsk,&hpk); | |||||
} else { | |||||
gres2 = goldilocks_shared_secret(ss2,&hsk,&gpk); | |||||
} | |||||
} | |||||
when = now() - when; | |||||
printf("ecdh: %5.1fµs\n", when * 1e6 / i); | |||||
if (gres1 || gres2 || memcmp(ss1,ss2,64)) { | |||||
printf("[FAIL] %d %d\n",gres1,gres2); | |||||
printf("ss1 = "); | |||||
for (i=0; i<56; i++) { | |||||
printf("%02x", ss1[i]); | |||||
} | |||||
printf("\nss2 = "); | |||||
for (i=0; i<56; i++) { | |||||
printf("%02x", ss2[i]); | |||||
} | |||||
printf("\n"); | |||||
} | |||||
uint8_t sout[56*2]; | |||||
const char *message = "hello world"; | |||||
uint64_t message_len = strlen(message); | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
res = goldilocks_sign(sout,(const unsigned char *)message,message_len,&gsk); | |||||
assert(!res); | |||||
} | |||||
when = now() - when; | |||||
printf("sign: %5.1fµs\n", when * 1e6 / i); | |||||
when = now(); | |||||
for (i=0; i<10000; i++) { | |||||
res = goldilocks_verify(sout,(const unsigned char *)message,message_len,&gpk); | |||||
} | |||||
when = now() - when; | |||||
printf("verify: %5.1fµs\n", when * 1e6 / i); | |||||
printf("\nTesting...\n"); | |||||
int failures=0, successes = 0; | |||||
for (i=0; i<1000; i++) { | |||||
(void)goldilocks_keygen(&gsk,&gpk); | |||||
goldilocks_sign(sout,(const unsigned char *)message,message_len,&gsk); | |||||
res = goldilocks_verify(sout,(const unsigned char *)message,message_len,&gpk); | |||||
if (res) failures++; | |||||
} | |||||
if (failures) { | |||||
printf("FAIL %d/%d signature checks!\n", failures, i); | |||||
} | |||||
failures=0; successes = 0; | |||||
for (i=0; i<1000; i++) { | |||||
p448_randomize(&crand, &a); | |||||
uint64_t two = 2; | |||||
mask_t good = p448_montgomery_ladder(&b,&a,&two,2,0); | |||||
if (!good) continue; | |||||
uint64_t x = rand(), y=rand(), z=x*y; | |||||
p448_montgomery_ladder(&b,&a,&x,64,0); | |||||
p448_montgomery_ladder(&c,&b,&y,64,0); | |||||
p448_montgomery_ladder(&b,&a,&z,64,0); | |||||
p448_sub(&d,&b,&c); | |||||
p448_bias(&d,2); | |||||
if (!p448_is_zero(&d)) { | |||||
printf("Odd ladder validation failure %d!\n", ++failures); | |||||
p448_print("a", &a); | |||||
printf("x=%llx, y=%llx, z=%llx\n", x,y,z); | |||||
p448_print("c", &c); | |||||
p448_print("b", &b); | |||||
printf("\n"); | |||||
} | |||||
} | |||||
failures = 0; | |||||
for (i=0; i<1000; i++) { | |||||
mask_t good; | |||||
do { | |||||
p448_randomize(&crand, &a); | |||||
good = deserialize_affine(&affine, &a); | |||||
} while (!good); | |||||
convert_affine_to_extensible(&exta,&affine); | |||||
twist_and_double(&ext,&exta); | |||||
untwist_and_double(&exta,&ext); | |||||
serialize_extensible(&b, &exta); | |||||
untwist_and_double_and_serialize(&c, &ext); | |||||
p448_sub(&d,&b,&c); | |||||
p448_bias(&d,2); | |||||
if (good && !p448_is_zero(&d)){ | |||||
printf("Iso+serial validation failure %d!\n", ++failures); | |||||
p448_print("a", &a); | |||||
p448_print("b", &b); | |||||
p448_print("c", &c); | |||||
printf("\n"); | |||||
} else if (good) { | |||||
successes ++; | |||||
} | |||||
} | |||||
if (successes < i/3) { | |||||
printf("Iso+serial variation: only %d/%d successful.\n", successes, i); | |||||
} | |||||
failures = 0; | |||||
uint64_t four = 4; | |||||
for (i=0; i<1000; i++) { | |||||
p448_randomize(&crand, &a); | |||||
q448_randomize(&crand, sk); | |||||
mask_t good = p448_montgomery_ladder(&b,&a,&four,3,0); | |||||
good &= p448_montgomery_ladder(&c,&b,sk,448,0); | |||||
mask_t goodb = deserialize_affine(&affine, &a); | |||||
convert_affine_to_extensible(&exta,&affine); | |||||
twist_and_double(&ext,&exta); | |||||
edwards_scalar_multiply(&ext,sk); | |||||
untwist_and_double(&exta,&ext); | |||||
serialize_extensible(&b, &exta); | |||||
p448_sub(&d,&b,&c); | |||||
p448_bias(&d,2); | |||||
if (good != goodb) { | |||||
printf("Compatibility validation failure %d: good: %d != %d\n", ++failures, (int)(-good), (int)(-goodb)); | |||||
} else if (good && !p448_is_zero(&d)){ | |||||
printf("Compatibility validation failure %d!\n", ++failures); | |||||
p448_print("a", &a); | |||||
q448_print("s", sk); | |||||
p448_print("c", &c); | |||||
p448_print("b", &b); | |||||
printf("\n"); | |||||
} else if (good) { | |||||
successes ++; | |||||
} | |||||
} | |||||
if (successes < i/3) { | |||||
printf("Compatibility variation: only %d/%d successful.\n", successes, i); | |||||
} | |||||
successes = failures = 0; | |||||
for (i=0; i<1000; i++) { | |||||
p448_randomize(&crand, &a); | |||||
q448_randomize(&crand, sk); | |||||
if (!i) bzero(&sk, sizeof(sk)); | |||||
mask_t good = p448_montgomery_ladder(&b,&a,&four,3,0); | |||||
good &= p448_montgomery_ladder(&c,&b,sk,448,0); | |||||
if (!good) continue; | |||||
deserialize_affine(&affine, &a); | |||||
convert_affine_to_extensible(&exta,&affine); | |||||
twist_and_double(&ext,&exta); | |||||
precompute_for_combs(table, &ext, 5, 5, 18); | |||||
edwards_comb(&ext, sk, table, 5, 5, 18); | |||||
untwist_and_double(&exta,&ext); | |||||
serialize_extensible(&b, &exta); | |||||
p448_sub(&d,&b,&c); | |||||
p448_bias(&d,2); | |||||
if (!p448_is_zero(&d)){ | |||||
printf("Comb validation failure %d!\n", ++failures); | |||||
p448_print("a", &a); | |||||
q448_print("s", sk); | |||||
p448_print("c", &c); | |||||
p448_print("b", &b); | |||||
printf("\n"); | |||||
} else if (good) { | |||||
successes ++; | |||||
} | |||||
} | |||||
if (successes < i/3) { | |||||
printf("Comb variation: only %d/%d successful.\n", successes, i); | |||||
} | |||||
successes = failures = 0; | |||||
for (i=0; i<1000; i++) { | |||||
p448_randomize(&crand, &a); | |||||
q448_randomize(&crand, sk); | |||||
if (!i) bzero(&sk, sizeof(sk)); | |||||
mask_t good = deserialize_affine(&affine, &a); | |||||
if (!good) continue; | |||||
convert_affine_to_extensible(&exta,&affine); | |||||
twist_and_double(&ext,&exta); | |||||
struct tw_extensible_t exu; | |||||
copy_tw_extensible(&exu, &ext); | |||||
edwards_scalar_multiply(&ext,sk); | |||||
untwist_and_double(&exta,&ext); | |||||
serialize_extensible(&b, &exta); | |||||
edwards_scalar_multiply_vt(&exu,sk); | |||||
untwist_and_double(&exta,&exu); | |||||
serialize_extensible(&c, &exta); | |||||
p448_sub(&d,&b,&c); | |||||
p448_bias(&d,2); | |||||
if (!p448_is_zero(&d)){ | |||||
printf("WNAF validation failure %d!\n", ++failures); | |||||
p448_print("a", &a); | |||||
q448_print("s", sk); | |||||
p448_print("c", &c); | |||||
p448_print("b", &b); | |||||
printf("\n"); | |||||
} else if (good) { | |||||
successes ++; | |||||
} | |||||
} | |||||
if (successes < i/3) { | |||||
printf("WNAF variation: only %d/%d successful.\n", successes, i); | |||||
} | |||||
successes = failures = 0; | |||||
for (i=0; i<1000; i++) { | |||||
p448_randomize(&crand, &a); | |||||
q448_randomize(&crand, sk); | |||||
if (!i) bzero(&sk, sizeof(sk)); | |||||
mask_t good = deserialize_affine(&affine, &a); | |||||
if (!good) continue; | |||||
convert_affine_to_extensible(&exta,&affine); | |||||
twist_and_double(&ext,&exta); | |||||
struct tw_extensible_t exu; | |||||
copy_tw_extensible(&exu, &ext); | |||||
edwards_scalar_multiply(&ext,sk); | |||||
untwist_and_double(&exta,&ext); | |||||
serialize_extensible(&b, &exta); | |||||
precompute_for_wnaf(wnaft,&exu,5); | |||||
edwards_scalar_multiply_vt_pre(&exu,sk,wnaft,5); | |||||
untwist_and_double(&exta,&exu); | |||||
serialize_extensible(&c, &exta); | |||||
p448_sub(&d,&b,&c); | |||||
p448_bias(&d,2); | |||||
if (!p448_is_zero(&d)){ | |||||
printf("PreWNAF validation failure %d!\n", ++failures); | |||||
p448_print("a", &a); | |||||
q448_print("s", sk); | |||||
p448_print("c", &c); | |||||
p448_print("b", &b); | |||||
for (j=0; j<1<<5; j++) { | |||||
printf("WNAFT %d\n", j); | |||||
p448_print(" a",&wnaft[j].a); | |||||
p448_print(" b",&wnaft[j].b); | |||||
p448_print(" c",&wnaft[j].c); | |||||
} | |||||
printf("\n\n"); | |||||
} else if (good) { | |||||
successes ++; | |||||
} | |||||
} | |||||
if (successes < i/3) { | |||||
printf("PreWNAF variation: only %d/%d successful.\n", successes, i); | |||||
} | |||||
successes = failures = 0; | |||||
for (i=0; i<1000; i++) { | |||||
struct p448_t aa; | |||||
struct tw_extensible_t exu,exv,exw; | |||||
mask_t good; | |||||
do { | |||||
p448_randomize(&crand, &a); | |||||
good = deserialize_affine(&affine, &a); | |||||
convert_affine_to_extensible(&exta,&affine); | |||||
twist_and_double(&ext,&exta); | |||||
} while (!good); | |||||
do { | |||||
p448_randomize(&crand, &aa); | |||||
good = deserialize_affine(&affine, &aa); | |||||
convert_affine_to_extensible(&exta,&affine); | |||||
twist_and_double(&exu,&exta); | |||||
} while (!good); | |||||
p448_randomize(&crand, &aa); | |||||
q448_randomize(&crand, sk); | |||||
if (i==0 || i==2) bzero(&sk, sizeof(sk)); | |||||
q448_randomize(&crand, tk); | |||||
if (i==0 || i==1) bzero(&tk, sizeof(tk)); | |||||
copy_tw_extensible(&exv, &ext); | |||||
copy_tw_extensible(&exw, &exu); | |||||
edwards_scalar_multiply(&exv,sk); | |||||
edwards_scalar_multiply(&exw,tk); | |||||
convert_tw_extensible_to_tw_pniels(&pniels, &exw); | |||||
add_tw_pniels_to_tw_extensible(&exv,&pniels); | |||||
untwist_and_double(&exta,&exv); | |||||
serialize_extensible(&b, &exta); | |||||
precompute_for_wnaf(wnaft,&exu,5); | |||||
edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5); | |||||
untwist_and_double(&exta,&exv); | |||||
serialize_extensible(&c, &exta); | |||||
p448_sub(&d,&b,&c); | |||||
p448_bias(&d,2); | |||||
if (!p448_is_zero(&d)){ | |||||
printf("PreWNAF combo validation failure %d!\n", ++failures); | |||||
p448_print("a", &a); | |||||
p448_print("A", &aa); | |||||
q448_print("s", sk); | |||||
q448_print("t", tk); | |||||
p448_print("c", &c); | |||||
p448_print("b", &b); | |||||
printf("\n\n"); | |||||
} else if (good) { | |||||
successes ++; | |||||
} | |||||
} | |||||
if (successes < i) { | |||||
printf("PreWNAF combo variation: only %d/%d successful.\n", successes, i); | |||||
} | |||||
successes = failures = 0; | |||||
for (i=0; i<1000; i++) { | |||||
p448_randomize(&crand, &a); | |||||
q448_randomize(&crand, sk); | |||||
q448_randomize(&crand, tk); | |||||
uint64_t two = 2; | |||||
mask_t good = p448_montgomery_ladder(&b,&a,&two,2,0); | |||||
p448_montgomery_ladder(&b,&a,sk,448,0); | |||||
p448_montgomery_ladder(&d,&b,tk,448,0); | |||||
p448_montgomery_ladder(&b,&a,tk,448,0); | |||||
p448_montgomery_ladder(&c,&b,sk,448,0); | |||||
p448_sub(&b,&c,&d); | |||||
p448_bias(&b,2); | |||||
mask_t success = p448_is_zero(&b) | ~good; | |||||
if (!success) { | |||||
printf("Ladder validation failure %d!\n", ++failures); | |||||
p448_print("a", &a); | |||||
q448_print("s", sk); | |||||
q448_print("t", tk); | |||||
p448_print("c", &c); | |||||
p448_print("d", &d); | |||||
printf("\n"); | |||||
} | |||||
} | |||||
return 0; | |||||
} |
@@ -1,442 +0,0 @@ | |||||
/* Copyright (c) 2011 Stanford University. | |||||
* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
/* Chacha random number generator code copied from crandom */ | |||||
#include "intrinsics.h" | |||||
#include "crandom.h" | |||||
#include <stdio.h> | |||||
volatile unsigned int crandom_features = 0; | |||||
unsigned int crandom_detect_features() { | |||||
unsigned int out = GEN; | |||||
# if (defined(__i386__) || defined(__x86_64__)) | |||||
u_int32_t a,b,c,d; | |||||
a=1; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); | |||||
out |= GEN; | |||||
if (d & 1<<26) out |= SSE2; | |||||
if (d & 1<< 9) out |= SSSE3; | |||||
if (c & 1<<25) out |= AESNI; | |||||
if (c & 1<<28) out |= AVX; | |||||
if (b & 1<<5) out |= AVX2; | |||||
a=0x80000001; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d)); | |||||
if (c & 1<<11) out |= XOP; | |||||
if (c & 1<<30) out |= RDRAND; | |||||
# endif | |||||
return out; | |||||
} | |||||
INTRINSIC u_int64_t rdrand(int abort_on_fail) { | |||||
uint64_t out = 0; | |||||
int tries = 1000; | |||||
if (HAVE(RDRAND)) { | |||||
# if defined(__x86_64__) | |||||
u_int64_t out, a=0; | |||||
for (; tries && !a; tries--) { | |||||
__asm__ __volatile__ ( | |||||
"rdrand %0\n\tsetc %%al" | |||||
: "=r"(out), "+a"(a) :: "cc" | |||||
); | |||||
} | |||||
# elif (defined(__i386__)) | |||||
u_int32_t reg, a=0; | |||||
uint64_t out; | |||||
for (; tries && !a; tries--) { | |||||
__asm__ __volatile__ ( | |||||
"rdrand %0\n\tsetc %%al" | |||||
: "=r"(reg), "+a"(a) :: "cc" | |||||
); | |||||
} | |||||
out = reg; a = 0; | |||||
for (; tries && !a; tries--) { | |||||
__asm__ __volatile__ ( | |||||
"rdrand %0\n\tsetc %%al" | |||||
: "=r"(reg), "+a"(a) :: "cc" | |||||
); | |||||
} | |||||
out = out << 32 | reg; | |||||
return out; | |||||
# else | |||||
abort(); // whut | |||||
# endif | |||||
} else { | |||||
tries = 0; | |||||
} | |||||
if (abort_on_fail && !tries) { | |||||
abort(); | |||||
} | |||||
return out; | |||||
} | |||||
/* ------------------------------- Vectorized code ------------------------------- */ | |||||
#define shuffle(x,i) _mm_shuffle_epi32(x, \ | |||||
i + ((i+1)&3)*4 + ((i+2)&3)*16 + ((i+3)&3)*64) | |||||
#define add _mm_add_epi32 | |||||
#define add64 _mm_add_epi64 | |||||
#define NEED_XOP (MIGHT_HAVE(XOP)) | |||||
#define NEED_SSSE3 (MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP)) | |||||
#define NEED_SSE2 (MIGHT_HAVE(SSE2) && !MUST_HAVE(SSSE3)) | |||||
#define NEED_CONV (!MUST_HAVE(SSE2)) | |||||
#if NEED_XOP | |||||
static __inline__ void | |||||
quarter_round_xop( | |||||
ssereg *a, | |||||
ssereg *b, | |||||
ssereg *c, | |||||
ssereg *d | |||||
) { | |||||
*a = add(*a,*b); *d = xop_rotate(16, *d ^ *a); | |||||
*c = add(*c,*d); *b = xop_rotate(12, *b ^ *c); | |||||
*a = add(*a,*b); *d = xop_rotate(8, *d ^ *a); | |||||
*c = add(*c,*d); *b = xop_rotate(7, *b ^ *c); | |||||
} | |||||
#endif | |||||
#if NEED_SSSE3 | |||||
static const ssereg shuffle8 = { 0x0605040702010003ull, 0x0E0D0C0F0A09080Bull }; | |||||
static const ssereg shuffle16 = { 0x0504070601000302ull, 0x0D0C0F0E09080B0Aull }; | |||||
INTRINSIC ssereg ssse3_rotate_8(ssereg a) { | |||||
return _mm_shuffle_epi8(a, shuffle8); | |||||
} | |||||
INTRINSIC ssereg ssse3_rotate_16(ssereg a) { | |||||
return _mm_shuffle_epi8(a, shuffle16); | |||||
} | |||||
static __inline__ void | |||||
quarter_round_ssse3( | |||||
ssereg *a, | |||||
ssereg *b, | |||||
ssereg *c, | |||||
ssereg *d | |||||
) { | |||||
*a = add(*a,*b); *d = ssse3_rotate_16(*d ^ *a); | |||||
*c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c); | |||||
*a = add(*a,*b); *d = ssse3_rotate_8( *d ^ *a); | |||||
*c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c); | |||||
} | |||||
#endif /* MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP) */ | |||||
#if NEED_SSE2 | |||||
static __inline__ void | |||||
quarter_round_sse2( | |||||
ssereg *a, | |||||
ssereg *b, | |||||
ssereg *c, | |||||
ssereg *d | |||||
) { | |||||
*a = add(*a,*b); *d = sse2_rotate(16, *d ^ *a); | |||||
*c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c); | |||||
*a = add(*a,*b); *d = sse2_rotate(8, *d ^ *a); | |||||
*c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c); | |||||
} | |||||
#endif | |||||
#define DOUBLE_ROUND(qrf) { \ | |||||
qrf(&a1,&b1,&c1,&d1); \ | |||||
qrf(&a2,&b2,&c2,&d2); \ | |||||
b1 = shuffle(b1,1); \ | |||||
c1 = shuffle(c1,2); \ | |||||
d1 = shuffle(d1,3); \ | |||||
b2 = shuffle(b2,1); \ | |||||
c2 = shuffle(c2,2); \ | |||||
d2 = shuffle(d2,3); \ | |||||
\ | |||||
qrf(&a1,&b1,&c1,&d1); \ | |||||
qrf(&a2,&b2,&c2,&d2); \ | |||||
b1 = shuffle(b1,3); \ | |||||
c1 = shuffle(c1,2); \ | |||||
d1 = shuffle(d1,1); \ | |||||
b2 = shuffle(b2,3); \ | |||||
c2 = shuffle(c2,2); \ | |||||
d2 = shuffle(d2,1); \ | |||||
} | |||||
#define OUTPUT_FUNCTION { \ | |||||
output[0] = add(a1,aa); \ | |||||
output[1] = add(b1,bb); \ | |||||
output[2] = add(c1,cc); \ | |||||
output[3] = add(d1,dd); \ | |||||
output[4] = add(a2,aa); \ | |||||
output[5] = add(b2,bb); \ | |||||
output[6] = add(c2,add(cc,p)); \ | |||||
output[7] = add(d2,dd); \ | |||||
\ | |||||
output += 8; \ | |||||
\ | |||||
cc = add64(add64(cc,p), p); \ | |||||
a1 = a2 = aa; \ | |||||
b1 = b2 = bb; \ | |||||
c1 = cc; c2 = add64(cc,p);\ | |||||
d1 = d2 = dd; \ | |||||
} | |||||
/* ------------------------------------------------------------------------------- */ | |||||
INTRINSIC u_int32_t rotate(int r, u_int32_t a) { | |||||
return a<<r ^ a>>(32-r); | |||||
} | |||||
static __inline__ void | |||||
quarter_round(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d) { | |||||
*a = *a + *b; *d = rotate(16, *d^*a); | |||||
*c = *c + *d; *b = rotate(12, *b^*c); | |||||
*a = *a + *b; *d = rotate(8, *d^*a); | |||||
*c = *c + *d; *b = rotate(7, *b^*c); | |||||
} | |||||
static void | |||||
crandom_chacha_expand(u_int64_t iv, | |||||
u_int64_t ctr, | |||||
int nr, | |||||
int output_size, | |||||
const unsigned char *key_, | |||||
unsigned char *output_) { | |||||
# if MIGHT_HAVE_SSE2 | |||||
if (HAVE(SSE2)) { | |||||
ssereg *key = (ssereg *)key_; | |||||
ssereg *output = (ssereg *)output_; | |||||
ssereg a1 = key[0], a2 = a1, aa = a1, | |||||
b1 = key[1], b2 = b1, bb = b1, | |||||
c1 = {iv, ctr}, c2 = {iv, ctr+1}, cc = c1, | |||||
d1 = {0x3320646e61707865ull, 0x6b20657479622d32ull}, | |||||
d2 = d1, dd = d1, | |||||
p = {0, 1}; | |||||
int i,r; | |||||
# if (NEED_XOP) | |||||
if (HAVE(XOP)) { | |||||
for (i=0; i<output_size; i+=128) { | |||||
for (r=nr; r>0; r-=2) | |||||
DOUBLE_ROUND(quarter_round_xop); | |||||
OUTPUT_FUNCTION; | |||||
} | |||||
return; | |||||
} | |||||
# endif | |||||
# if (NEED_SSSE3) | |||||
if (HAVE(SSSE3)) { | |||||
for (i=0; i<output_size; i+=128) { | |||||
for (r=nr; r>0; r-=2) | |||||
DOUBLE_ROUND(quarter_round_ssse3); | |||||
OUTPUT_FUNCTION; | |||||
} | |||||
return; | |||||
} | |||||
# endif | |||||
# if (NEED_SSE2) | |||||
if (HAVE(SSE2)) { | |||||
for (i=0; i<output_size; i+=128) { | |||||
for (r=nr; r>0; r-=2) | |||||
DOUBLE_ROUND(quarter_round_sse2); | |||||
OUTPUT_FUNCTION; | |||||
} | |||||
return; | |||||
} | |||||
# endif | |||||
} | |||||
# endif | |||||
# if NEED_CONV | |||||
{ | |||||
const u_int32_t *key = (const u_int32_t *)key_; | |||||
u_int32_t | |||||
x[16], | |||||
input[16] = { | |||||
key[0], key[1], key[2], key[3], | |||||
key[4], key[5], key[6], key[7], | |||||
iv, iv>>32, ctr, ctr>>32, | |||||
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 | |||||
}, | |||||
*output = (u_int32_t *)output_; | |||||
int i, r; | |||||
for (i=0; i<output_size; i+= 64) { | |||||
for (r=0; r<16; r++) { | |||||
x[r] = input[r]; | |||||
} | |||||
for (r=nr; r>0; r-=2) { | |||||
quarter_round(&x[0], &x[4], &x[8], &x[12]); | |||||
quarter_round(&x[1], &x[5], &x[9], &x[13]); | |||||
quarter_round(&x[2], &x[6], &x[10], &x[14]); | |||||
quarter_round(&x[3], &x[7], &x[11], &x[15]); | |||||
quarter_round(&x[0], &x[5], &x[10], &x[15]); | |||||
quarter_round(&x[1], &x[6], &x[11], &x[12]); | |||||
quarter_round(&x[2], &x[7], &x[8], &x[13]); | |||||
quarter_round(&x[3], &x[4], &x[9], &x[14]); | |||||
} | |||||
for (r=0; r<16; r++) { | |||||
output[r] = x[r] + input[r]; | |||||
} | |||||
output += 16; | |||||
input[11] ++; | |||||
if (!input[11]) input[12]++; | |||||
} | |||||
} | |||||
#endif /* NEED_CONV */ | |||||
} | |||||
/* "return 4", cf xkcd #221 */ | |||||
#define CRANDOM_MAGIC 0x72657475726e2034ull | |||||
int | |||||
crandom_init_from_file( | |||||
struct crandom_state_t *state, | |||||
const char *filename, | |||||
int reseed_interval, | |||||
int reseeds_mandatory | |||||
) { | |||||
state->fill = 0; | |||||
state->reseed_countdown = reseed_interval; | |||||
state->reseed_interval = reseed_interval; | |||||
state->ctr = 0; | |||||
state->randomfd = open(filename, O_RDONLY); | |||||
if (state->randomfd == -1) { | |||||
int err = errno; | |||||
return err ? err : -1; | |||||
} | |||||
ssize_t offset = 0, red; | |||||
do { | |||||
red = read(state->randomfd, state->seed + offset, 32 - offset); | |||||
if (red > 0) offset += red; | |||||
} while (red > 0 && offset < 32); | |||||
if (offset < 32) { | |||||
int err = errno; | |||||
return err ? err : -1; | |||||
} | |||||
memset(state->buffer, 0, 96); | |||||
state->magic = CRANDOM_MAGIC; | |||||
state->reseeds_mandatory = reseeds_mandatory; | |||||
return 0; | |||||
} | |||||
void | |||||
crandom_init_from_buffer( | |||||
struct crandom_state_t *state, | |||||
const char initial_seed[32] | |||||
) { | |||||
memcpy(state->seed, initial_seed, 32); | |||||
memset(state->buffer, 0, 96); | |||||
state->reseed_countdown = state->reseed_interval = state->fill = state->ctr = state->reseeds_mandatory = 0; | |||||
state->randomfd = -1; | |||||
state->magic = CRANDOM_MAGIC; | |||||
} | |||||
int | |||||
crandom_generate( | |||||
struct crandom_state_t *state, | |||||
unsigned char *output, | |||||
unsigned long long length | |||||
) { | |||||
/* the generator isn't seeded; maybe they ignored the return value of init_from_file */ | |||||
if (unlikely(state->magic != CRANDOM_MAGIC)) { | |||||
abort(); | |||||
} | |||||
int ret = 0; | |||||
while (length) { | |||||
if (unlikely(state->fill <= 0)) { | |||||
uint64_t iv = 0; | |||||
if (state->reseed_interval) { | |||||
/* it's nondeterministic, stir in some rdrand() or rdtsc() */ | |||||
if (HAVE(RDRAND)) { | |||||
iv = rdrand(0); | |||||
if (!iv) iv = rdtsc(); | |||||
} else { | |||||
iv = rdtsc(); | |||||
} | |||||
state->reseed_countdown--; | |||||
if (unlikely(state->reseed_countdown <= 0)) { | |||||
/* reseed by xoring in random state */ | |||||
state->reseed_countdown = state->reseed_interval; | |||||
ssize_t offset = 0, red; | |||||
do { | |||||
red = read(state->randomfd, state->buffer + offset, 32 - offset); | |||||
if (red > 0) offset += red; | |||||
} while (red > 0 && offset < 32); | |||||
if (offset < 32) { | |||||
/* The read failed. Signal an error with the return code. | |||||
* | |||||
* If reseeds are mandatory, crash. | |||||
* | |||||
* If not, the generator is still probably safe to use, because reseeding | |||||
* is basically over-engineering for caution. Also, the user might ignore | |||||
* the return code, so we still need to fill the request. | |||||
* | |||||
* Set reseed_countdown = 1 so we'll try again later. If the user's | |||||
* performance sucks as a result of ignoring the error code while calling | |||||
* us in a loop, well, that's life. | |||||
*/ | |||||
if (state->reseeds_mandatory) { | |||||
abort(); | |||||
} | |||||
ret = errno; | |||||
if (ret == 0) ret = -1; | |||||
state->reseed_countdown = 1; | |||||
} | |||||
int i; | |||||
for (i=0; i<32; i++) { | |||||
/* Stir in the buffer. If somehow the read failed, it'll be zeros. */ | |||||
state->seed[i] ^= state->buffer[i]; | |||||
} | |||||
} | |||||
} | |||||
crandom_chacha_expand(iv,state->ctr,20,128,state->seed,state->seed); | |||||
state->ctr++; | |||||
state->fill = sizeof(state->buffer); | |||||
} | |||||
unsigned long long copy = (length > state->fill) ? state->fill : length; | |||||
state->fill -= copy; | |||||
memcpy(output, state->buffer + state->fill, copy); | |||||
memset(state->buffer + state->fill, 0, copy); | |||||
output += copy; length -= copy; | |||||
} | |||||
return ret; | |||||
} | |||||
void | |||||
crandom_destroy( | |||||
struct crandom_state_t *state | |||||
) { | |||||
if (state->magic == CRANDOM_MAGIC && state->randomfd) { | |||||
(void) close(state->randomfd); | |||||
/* Ignore the return value from close(), because what would it mean? | |||||
* "Your random device, which you were reading over NFS, lost some data"? | |||||
*/ | |||||
} | |||||
memset(state, 0, sizeof(*state)); | |||||
} |
@@ -1,140 +0,0 @@ | |||||
/* Copyright (c) 2011 Stanford University. | |||||
* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
/** | |||||
* @file crandom.h | |||||
* @author Mike Hamburg | |||||
* @brief A miniature version of the (as of yet incomplete) crandom project. | |||||
*/ | |||||
#ifndef __GOLDI_CRANDOM_H__ | |||||
#define __GOLDI_CRANDOM_H__ 1 | |||||
#include <stdint.h> /* for uint64_t */ | |||||
#include <fcntl.h> /* for open */ | |||||
#include <errno.h> /* for returning errors after open */ | |||||
#include <stdlib.h> /* for abort */ | |||||
#include <string.h> /* for memcpy */ | |||||
#include <strings.h> /* for bzero */ | |||||
#include <unistd.h> /* for read */ | |||||
/** | |||||
* @brief The state of a crandom generator. | |||||
* | |||||
* This object is opaque. It is not protected by a lock, and so must | |||||
* not be accessed by multiple threads at the same time. | |||||
*/ | |||||
struct crandom_state_t { | |||||
/** @privatesection */ | |||||
unsigned char seed[32]; | |||||
unsigned char buffer[96]; | |||||
uint64_t ctr; | |||||
uint64_t magic; | |||||
unsigned int fill; | |||||
int reseed_countdown; | |||||
int reseed_interval; | |||||
int reseeds_mandatory; | |||||
int randomfd; | |||||
} __attribute__((aligned(16))) ; | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
/** | |||||
* Initialize a crandom state from the chosen file. | |||||
* | |||||
* This function initializes a state from a given state file, or | |||||
* from a random device (eg. /dev/random or /dev/urandom). | |||||
* | |||||
* You must check the return value of this function. | |||||
* | |||||
* @param [out] state The crandom state variable to initalize. | |||||
* @param [in] filename The name of the seed file or random device. | |||||
* @param [in] reseed_interval The number of 96-byte blocks which can be | |||||
* generated without reseeding. Suggest 10000. | |||||
* @param [in] reseeds_mandatory If nonzero, call abort() if a reseed fails. | |||||
* Suggest 1. | |||||
* | |||||
* @retval 0 Success. | |||||
* @retval Nonzero An error to be interpreted by strerror(). | |||||
*/ | |||||
int | |||||
crandom_init_from_file ( | |||||
struct crandom_state_t *state, | |||||
const char *filename, | |||||
int reseed_interval, | |||||
int reseeds_mandatory | |||||
) __attribute__((warn_unused_result)); | |||||
/** | |||||
* Initialize a crandom state from a buffer, for deterministic operation. | |||||
* | |||||
* This function is used to initialize a crandom state deterministically, | |||||
* mainly for testing purposes. It can also be used to expand a secret | |||||
* random value deterministically. | |||||
* | |||||
* @warning The crandom implementation is not guaranteed to be stable. | |||||
* That is, a later release might produce a different random stream from | |||||
* the same seed. | |||||
* | |||||
* @param [out] state The crandom state variable to initalize. | |||||
* @param [in] initial_seed The seed value. | |||||
*/ | |||||
void | |||||
crandom_init_from_buffer ( | |||||
struct crandom_state_t *state, | |||||
const char initial_seed[32] | |||||
); | |||||
/** | |||||
* Fill the output buffer with random data. | |||||
* | |||||
* This function uses the given crandom state to produce pseudorandom data | |||||
* in the output buffer. | |||||
* | |||||
* This function may perform reads from the state's random device if it needs | |||||
* to reseed. This could block if that file is a blocking source, such as | |||||
* a pipe or /dev/random on Linux. If reseeding fails and the state has | |||||
* reseeds_mandatory set, this function will call abort(). Otherwise, it will | |||||
* return an error code, but it will still randomize the buffer. | |||||
* | |||||
* If called on a corrupted, uninitialized or destroyed state, this function | |||||
* will abort(). | |||||
* | |||||
* @warning This function is not thread-safe with respect to the state. Don't | |||||
* call it from multiple threads with the same state at the same time. | |||||
* | |||||
* @param [inout] state The crandom state to use for generation. | |||||
* @param [out] output The buffer to fill with random data. | |||||
* @param [in] length The length of the buffer. | |||||
* | |||||
* @retval 0 Success. | |||||
* @retval Nonezero A non-mandatory reseed operation failed. | |||||
*/ | |||||
int | |||||
crandom_generate ( | |||||
struct crandom_state_t *state, | |||||
unsigned char *output, | |||||
unsigned long long length | |||||
); | |||||
/** | |||||
* Destroy the random state. Further calls to crandom_generate() on that state | |||||
* will abort(). | |||||
* | |||||
* @param [inout] state The state to be destroyed. | |||||
*/ | |||||
void | |||||
crandom_destroy ( | |||||
struct crandom_state_t *state | |||||
); | |||||
#ifdef __cplusplus | |||||
}; /* extern "C" */ | |||||
#endif | |||||
#endif /* __GOLDI_CRANDOM_H__ */ |
@@ -1,745 +0,0 @@ | |||||
/** | |||||
* @cond internal | |||||
* @file ec_point.c | |||||
* @copyright | |||||
* Copyright (c) 2014 Cryptography Research, Inc. \n | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
* @author Mike Hamburg | |||||
* @warning This file was automatically generated. | |||||
*/ | |||||
#include "ec_point.h" | |||||
void | |||||
p448_isr ( | |||||
struct p448_t* a, | |||||
const struct p448_t* x | |||||
) { | |||||
struct p448_t L0, L1, L2; | |||||
p448_sqr ( &L1, x ); | |||||
p448_mul ( &L2, x, &L1 ); | |||||
p448_sqr ( &L1, &L2 ); | |||||
p448_mul ( &L2, x, &L1 ); | |||||
p448_sqrn ( &L1, &L2, 3 ); | |||||
p448_mul ( &L0, &L2, &L1 ); | |||||
p448_sqrn ( &L1, &L0, 3 ); | |||||
p448_mul ( &L0, &L2, &L1 ); | |||||
p448_sqrn ( &L2, &L0, 9 ); | |||||
p448_mul ( &L1, &L0, &L2 ); | |||||
p448_sqr ( &L0, &L1 ); | |||||
p448_mul ( &L2, x, &L0 ); | |||||
p448_sqrn ( &L0, &L2, 18 ); | |||||
p448_mul ( &L2, &L1, &L0 ); | |||||
p448_sqrn ( &L0, &L2, 37 ); | |||||
p448_mul ( &L1, &L2, &L0 ); | |||||
p448_sqrn ( &L0, &L1, 37 ); | |||||
p448_mul ( &L1, &L2, &L0 ); | |||||
p448_sqrn ( &L0, &L1, 111 ); | |||||
p448_mul ( &L2, &L1, &L0 ); | |||||
p448_sqr ( &L0, &L2 ); | |||||
p448_mul ( &L1, x, &L0 ); | |||||
p448_sqrn ( &L0, &L1, 223 ); | |||||
p448_mul ( a, &L2, &L0 ); | |||||
} | |||||
void | |||||
p448_inverse ( | |||||
struct p448_t* a, | |||||
const struct p448_t* x | |||||
) { | |||||
struct p448_t L0, L1; | |||||
p448_isr ( &L0, x ); | |||||
p448_sqr ( &L1, &L0 ); | |||||
p448_sqr ( &L0, &L1 ); | |||||
p448_mul ( a, x, &L0 ); | |||||
} | |||||
void | |||||
add_tw_niels_to_tw_extensible ( | |||||
struct tw_extensible_t* d, | |||||
const struct tw_niels_t* e | |||||
) { | |||||
struct p448_t L0, L1; | |||||
p448_bias ( &d->y, 2 ); | |||||
p448_bias ( &d->z, 2 ); | |||||
p448_sub ( &L1, &d->y, &d->x ); | |||||
p448_mul ( &L0, &e->a, &L1 ); | |||||
p448_add ( &L1, &d->x, &d->y ); | |||||
p448_mul ( &d->y, &e->b, &L1 ); | |||||
p448_bias ( &d->y, 2 ); | |||||
p448_mul ( &L1, &d->u, &d->t ); | |||||
p448_mul ( &d->x, &e->c, &L1 ); | |||||
p448_add ( &d->u, &L0, &d->y ); | |||||
p448_sub ( &d->t, &d->y, &L0 ); | |||||
p448_sub ( &d->y, &d->z, &d->x ); | |||||
p448_add ( &L0, &d->x, &d->z ); | |||||
p448_mul ( &d->z, &L0, &d->y ); | |||||
p448_mul ( &d->x, &d->y, &d->t ); | |||||
p448_mul ( &d->y, &L0, &d->u ); | |||||
} | |||||
void | |||||
sub_tw_niels_from_tw_extensible ( | |||||
struct tw_extensible_t* d, | |||||
const struct tw_niels_t* e | |||||
) { | |||||
struct p448_t L0, L1; | |||||
p448_bias ( &d->y, 2 ); | |||||
p448_bias ( &d->z, 2 ); | |||||
p448_sub ( &L1, &d->y, &d->x ); | |||||
p448_mul ( &L0, &e->b, &L1 ); | |||||
p448_add ( &L1, &d->x, &d->y ); | |||||
p448_mul ( &d->y, &e->a, &L1 ); | |||||
p448_bias ( &d->y, 2 ); | |||||
p448_mul ( &L1, &d->u, &d->t ); | |||||
p448_mul ( &d->x, &e->c, &L1 ); | |||||
p448_add ( &d->u, &L0, &d->y ); | |||||
p448_sub ( &d->t, &d->y, &L0 ); | |||||
p448_add ( &d->y, &d->x, &d->z ); | |||||
p448_sub ( &L0, &d->z, &d->x ); | |||||
p448_mul ( &d->z, &L0, &d->y ); | |||||
p448_mul ( &d->x, &d->y, &d->t ); | |||||
p448_mul ( &d->y, &L0, &d->u ); | |||||
} | |||||
void | |||||
add_tw_pniels_to_tw_extensible ( | |||||
struct tw_extensible_t* e, | |||||
const struct tw_pniels_t* a | |||||
) { | |||||
struct p448_t L0; | |||||
p448_mul ( &L0, &e->z, &a->z ); | |||||
p448_copy ( &e->z, &L0 ); | |||||
add_tw_niels_to_tw_extensible( e, &a->n ); | |||||
} | |||||
void | |||||
sub_tw_pniels_from_tw_extensible ( | |||||
struct tw_extensible_t* e, | |||||
const struct tw_pniels_t* a | |||||
) { | |||||
struct p448_t L0; | |||||
p448_mul ( &L0, &e->z, &a->z ); | |||||
p448_copy ( &e->z, &L0 ); | |||||
sub_tw_niels_from_tw_extensible( e, &a->n ); | |||||
} | |||||
void | |||||
double_tw_extensible ( | |||||
struct tw_extensible_t* a | |||||
) { | |||||
struct p448_t L0, L1, L2; | |||||
p448_sqr ( &L2, &a->x ); | |||||
p448_sqr ( &L0, &a->y ); | |||||
p448_add ( &a->u, &L2, &L0 ); | |||||
p448_add ( &a->t, &a->y, &a->x ); | |||||
p448_sqr ( &L1, &a->t ); | |||||
p448_bias ( &L1, 3 ); | |||||
p448_sub ( &a->t, &L1, &a->u ); | |||||
p448_sub ( &L1, &L0, &L2 ); | |||||
p448_bias ( &L1, 2 ); | |||||
p448_sqr ( &a->x, &a->z ); | |||||
p448_bias ( &a->x, 2 ); | |||||
p448_add ( &a->z, &a->x, &a->x ); | |||||
p448_sub ( &L0, &a->z, &L1 ); | |||||
p448_mul ( &a->z, &L1, &L0 ); | |||||
p448_mul ( &a->x, &L0, &a->t ); | |||||
p448_mul ( &a->y, &L1, &a->u ); | |||||
} | |||||
void | |||||
double_extensible ( | |||||
struct extensible_t* a | |||||
) { | |||||
struct p448_t L0, L1, L2; | |||||
p448_sqr ( &L2, &a->x ); | |||||
p448_sqr ( &L0, &a->y ); | |||||
p448_add ( &L1, &L2, &L0 ); | |||||
p448_add ( &a->t, &a->y, &a->x ); | |||||
p448_sqr ( &a->u, &a->t ); | |||||
p448_bias ( &a->u, 3 ); | |||||
p448_sub ( &a->t, &a->u, &L1 ); | |||||
p448_sub ( &a->u, &L0, &L2 ); | |||||
p448_bias ( &a->u, 2 ); | |||||
p448_sqr ( &a->x, &a->z ); | |||||
p448_bias ( &a->x, 2 ); | |||||
p448_add ( &a->z, &a->x, &a->x ); | |||||
p448_sub ( &L0, &a->z, &L1 ); | |||||
p448_mul ( &a->z, &L1, &L0 ); | |||||
p448_mul ( &a->x, &L0, &a->t ); | |||||
p448_mul ( &a->y, &L1, &a->u ); | |||||
} | |||||
void | |||||
twist_and_double ( | |||||
struct tw_extensible_t* b, | |||||
const struct extensible_t* a | |||||
) { | |||||
struct p448_t L0; | |||||
p448_sqr ( &b->x, &a->x ); | |||||
p448_sqr ( &b->z, &a->y ); | |||||
p448_add ( &b->u, &b->x, &b->z ); | |||||
p448_add ( &b->t, &a->y, &a->x ); | |||||
p448_sqr ( &L0, &b->t ); | |||||
p448_bias ( &L0, 3 ); | |||||
p448_sub ( &b->t, &L0, &b->u ); | |||||
p448_sub ( &L0, &b->z, &b->x ); | |||||
p448_bias ( &L0, 2 ); | |||||
p448_sqr ( &b->x, &a->z ); | |||||
p448_bias ( &b->x, 2 ); | |||||
p448_add ( &b->z, &b->x, &b->x ); | |||||
p448_sub ( &b->y, &b->z, &b->u ); | |||||
p448_mul ( &b->z, &L0, &b->y ); | |||||
p448_mul ( &b->x, &b->y, &b->t ); | |||||
p448_mul ( &b->y, &L0, &b->u ); | |||||
} | |||||
void | |||||
untwist_and_double ( | |||||
struct extensible_t* b, | |||||
const struct tw_extensible_t* a | |||||
) { | |||||
struct p448_t L0; | |||||
p448_sqr ( &b->x, &a->x ); | |||||
p448_sqr ( &b->z, &a->y ); | |||||
p448_add ( &L0, &b->x, &b->z ); | |||||
p448_add ( &b->t, &a->y, &a->x ); | |||||
p448_sqr ( &b->u, &b->t ); | |||||
p448_bias ( &b->u, 3 ); | |||||
p448_sub ( &b->t, &b->u, &L0 ); | |||||
p448_sub ( &b->u, &b->z, &b->x ); | |||||
p448_bias ( &b->u, 2 ); | |||||
p448_sqr ( &b->x, &a->z ); | |||||
p448_bias ( &b->x, 2 ); | |||||
p448_add ( &b->z, &b->x, &b->x ); | |||||
p448_sub ( &b->y, &b->z, &b->u ); | |||||
p448_mul ( &b->z, &L0, &b->y ); | |||||
p448_mul ( &b->x, &b->y, &b->t ); | |||||
p448_mul ( &b->y, &L0, &b->u ); | |||||
} | |||||
void | |||||
convert_tw_affine_to_tw_pniels ( | |||||
struct tw_pniels_t* b, | |||||
const struct tw_affine_t* a | |||||
) { | |||||
p448_sub ( &b->n.a, &a->y, &a->x ); | |||||
p448_bias ( &b->n.a, 2 ); | |||||
p448_weak_reduce( &b->n.a ); | |||||
p448_add ( &b->n.b, &a->x, &a->y ); | |||||
p448_weak_reduce( &b->n.b ); | |||||
p448_mul ( &b->n.c, &a->y, &a->x ); | |||||
p448_mulw ( &b->z, &b->n.c, 78164 ); | |||||
p448_neg ( &b->n.c, &b->z ); | |||||
p448_bias ( &b->n.c, 2 ); | |||||
p448_weak_reduce( &b->n.c ); | |||||
p448_set_ui( &b->z, 2 ); | |||||
} | |||||
void | |||||
convert_tw_affine_to_tw_extensible ( | |||||
struct tw_extensible_t* b, | |||||
const struct tw_affine_t* a | |||||
) { | |||||
p448_copy ( &b->x, &a->x ); | |||||
p448_copy ( &b->y, &a->y ); | |||||
p448_set_ui( &b->z, 1 ); | |||||
p448_copy ( &b->t, &a->x ); | |||||
p448_copy ( &b->u, &a->y ); | |||||
} | |||||
void | |||||
convert_affine_to_extensible ( | |||||
struct extensible_t* b, | |||||
const struct affine_t* a | |||||
) { | |||||
p448_copy ( &b->x, &a->x ); | |||||
p448_copy ( &b->y, &a->y ); | |||||
p448_set_ui( &b->z, 1 ); | |||||
p448_copy ( &b->t, &a->x ); | |||||
p448_copy ( &b->u, &a->y ); | |||||
} | |||||
void | |||||
convert_tw_extensible_to_tw_pniels ( | |||||
struct tw_pniels_t* b, | |||||
const struct tw_extensible_t* a | |||||
) { | |||||
p448_sub ( &b->n.a, &a->y, &a->x ); | |||||
p448_bias ( &b->n.a, 2 ); | |||||
p448_weak_reduce( &b->n.a ); | |||||
p448_add ( &b->n.b, &a->x, &a->y ); | |||||
p448_weak_reduce( &b->n.b ); | |||||
p448_mul ( &b->n.c, &a->u, &a->t ); | |||||
p448_mulw ( &b->z, &b->n.c, 78164 ); | |||||
p448_neg ( &b->n.c, &b->z ); | |||||
p448_bias ( &b->n.c, 2 ); | |||||
p448_weak_reduce( &b->n.c ); | |||||
p448_add ( &b->z, &a->z, &a->z ); | |||||
p448_weak_reduce( &b->z ); | |||||
} | |||||
void | |||||
convert_tw_pniels_to_tw_extensible ( | |||||
struct tw_extensible_t* e, | |||||
const struct tw_pniels_t* d | |||||
) { | |||||
p448_add ( &e->u, &d->n.b, &d->n.a ); | |||||
p448_sub ( &e->t, &d->n.b, &d->n.a ); | |||||
p448_bias ( &e->t, 2 ); | |||||
p448_mul ( &e->x, &d->z, &e->t ); | |||||
p448_mul ( &e->y, &d->z, &e->u ); | |||||
p448_sqr ( &e->z, &d->z ); | |||||
} | |||||
void | |||||
convert_tw_niels_to_tw_extensible ( | |||||
struct tw_extensible_t* e, | |||||
const struct tw_niels_t* d | |||||
) { | |||||
p448_add ( &e->y, &d->b, &d->a ); | |||||
p448_weak_reduce( &e->y ); | |||||
p448_sub ( &e->x, &d->b, &d->a ); | |||||
p448_bias ( &e->x, 2 ); | |||||
p448_weak_reduce( &e->x ); | |||||
p448_set_ui( &e->z, 1 ); | |||||
p448_copy ( &e->t, &e->x ); | |||||
p448_copy ( &e->u, &e->y ); | |||||
} | |||||
void | |||||
montgomery_step ( | |||||
struct montgomery_t* a | |||||
) { | |||||
struct p448_t L0, L1; | |||||
p448_bias ( &a->xd, 2 ); | |||||
p448_bias ( &a->xa, 2 ); | |||||
p448_add ( &L0, &a->zd, &a->xd ); | |||||
p448_sub ( &L1, &a->xd, &a->zd ); | |||||
p448_sub ( &a->zd, &a->xa, &a->za ); | |||||
p448_mul ( &a->xd, &L0, &a->zd ); | |||||
p448_bias ( &a->xd, 2 ); | |||||
p448_add ( &a->zd, &a->za, &a->xa ); | |||||
p448_mul ( &a->za, &L1, &a->zd ); | |||||
p448_add ( &a->xa, &a->za, &a->xd ); | |||||
p448_sqr ( &a->zd, &a->xa ); | |||||
p448_mul ( &a->xa, &a->z0, &a->zd ); | |||||
p448_sub ( &a->zd, &a->xd, &a->za ); | |||||
p448_sqr ( &a->za, &a->zd ); | |||||
p448_sqr ( &a->xd, &L0 ); | |||||
p448_bias ( &a->xd, 2 ); | |||||
p448_sqr ( &L0, &L1 ); | |||||
p448_mulw ( &a->zd, &a->xd, 39082 ); | |||||
p448_bias ( &a->zd, 4 ); | |||||
p448_sub ( &L1, &a->xd, &L0 ); | |||||
p448_mul ( &a->xd, &L0, &a->zd ); | |||||
p448_sub ( &L0, &a->zd, &L1 ); | |||||
p448_mul ( &a->zd, &L0, &L1 ); | |||||
} | |||||
void | |||||
serialize_montgomery ( | |||||
struct p448_t* sign, | |||||
struct p448_t* ser, | |||||
const struct montgomery_t* a, | |||||
const struct p448_t* sbz | |||||
) { | |||||
struct p448_t L0, L1, L2, L3; | |||||
p448_mul ( &L2, &a->z0, &a->zd ); | |||||
p448_bias ( &L2, 2 ); | |||||
p448_sub ( &L0, &L2, &a->xd ); | |||||
p448_mul ( &L2, &a->za, &L0 ); | |||||
p448_bias ( &L2, 2 ); | |||||
p448_mul ( &L1, &a->z0, &a->xd ); | |||||
p448_bias ( &L1, 2 ); | |||||
p448_sub ( &L0, &L1, &a->zd ); | |||||
p448_mul ( &L3, &a->xa, &L0 ); | |||||
p448_add ( &L1, &L3, &L2 ); | |||||
p448_sub ( &L0, &L2, &L3 ); | |||||
p448_mul ( &L2, &L0, &L1 ); | |||||
p448_mul ( &L0, sbz, &L2 ); | |||||
p448_mul ( &L2, &a->zd, &L0 ); | |||||
p448_mul ( sign, &L2, &a->zd ); | |||||
p448_mul ( ser, &L2, &a->xd ); | |||||
p448_mul ( &L2, sign, ser ); | |||||
p448_isr ( &L1, &L2 ); | |||||
p448_mul ( ser, sign, &L1 ); | |||||
p448_sqr ( &L0, &L1 ); | |||||
p448_mul ( sign, &L2, &L0 ); | |||||
} | |||||
void | |||||
serialize_extensible ( | |||||
struct p448_t* b, | |||||
const struct extensible_t* a | |||||
) { | |||||
struct p448_t L0, L1, L2; | |||||
p448_sub ( &L0, &a->y, &a->z ); | |||||
p448_bias ( &L0, 2 ); | |||||
p448_add ( b, &a->z, &a->y ); | |||||
p448_mul ( &L1, &a->z, &a->x ); | |||||
p448_mul ( &L2, &L0, &L1 ); | |||||
p448_mul ( &L1, &L2, &L0 ); | |||||
p448_mul ( &L0, &L2, b ); | |||||
p448_mul ( &L2, &L1, &L0 ); | |||||
p448_isr ( &L0, &L2 ); | |||||
p448_mul ( b, &L1, &L0 ); | |||||
p448_sqr ( &L1, &L0 ); | |||||
p448_mul ( &L0, &L2, &L1 ); | |||||
} | |||||
void | |||||
untwist_and_double_and_serialize ( | |||||
struct p448_t* b, | |||||
const struct tw_extensible_t* a | |||||
) { | |||||
struct p448_t L0, L1, L2, L3; | |||||
p448_mul ( &L3, &a->y, &a->x ); | |||||
p448_add ( b, &a->y, &a->x ); | |||||
p448_sqr ( &L1, b ); | |||||
p448_add ( &L2, &L3, &L3 ); | |||||
p448_sub ( b, &L1, &L2 ); | |||||
p448_bias ( b, 3 ); | |||||
p448_sqr ( &L2, &a->z ); | |||||
p448_sqr ( &L1, &L2 ); | |||||
p448_add ( &L2, b, b ); | |||||
p448_mulw ( b, &L2, 39082 ); | |||||
p448_neg ( &L2, b ); | |||||
p448_bias ( &L2, 2 ); | |||||
p448_mulw ( &L0, &L2, 39082 ); | |||||
p448_neg ( b, &L0 ); | |||||
p448_bias ( b, 2 ); | |||||
p448_mul ( &L0, &L2, &L1 ); | |||||
p448_mul ( &L2, b, &L0 ); | |||||
p448_isr ( &L0, &L2 ); | |||||
p448_mul ( &L1, b, &L0 ); | |||||
p448_sqr ( b, &L0 ); | |||||
p448_mul ( &L0, &L2, b ); | |||||
p448_mul ( b, &L1, &L3 ); | |||||
} | |||||
void | |||||
twist ( | |||||
struct tw_extensible_t* b, | |||||
const struct extensible_t* a | |||||
) { | |||||
mask_t L0, L1; | |||||
p448_sqr ( &b->y, &a->z ); | |||||
p448_sqr ( &b->z, &a->x ); | |||||
p448_sub ( &b->u, &b->y, &b->z ); | |||||
p448_bias ( &b->u, 2 ); | |||||
p448_sub ( &b->z, &a->z, &a->x ); | |||||
p448_bias ( &b->z, 2 ); | |||||
p448_mul ( &b->y, &b->z, &a->y ); | |||||
p448_sub ( &b->z, &a->z, &a->y ); | |||||
p448_bias ( &b->z, 2 ); | |||||
p448_mul ( &b->x, &b->z, &b->y ); | |||||
p448_mul ( &b->t, &b->x, &b->u ); | |||||
p448_mul ( &b->y, &b->x, &b->t ); | |||||
p448_isr ( &b->t, &b->y ); | |||||
p448_mul ( &b->u, &b->x, &b->t ); | |||||
p448_sqr ( &b->x, &b->t ); | |||||
p448_mul ( &b->t, &b->y, &b->x ); | |||||
p448_mul ( &b->x, &a->x, &b->u ); | |||||
p448_mul ( &b->y, &a->y, &b->u ); | |||||
L1 = p448_is_zero( &b->z ); | |||||
L0 = - L1; | |||||
p448_addw ( &b->y, L0 ); | |||||
p448_weak_reduce( &b->y ); | |||||
p448_set_ui( &b->z, 1 ); | |||||
p448_copy ( &b->t, &b->x ); | |||||
p448_copy ( &b->u, &b->y ); | |||||
} | |||||
mask_t | |||||
deserialize_affine ( | |||||
struct affine_t* a, | |||||
const struct p448_t* sz | |||||
) { | |||||
struct p448_t L0, L1, L2, L3; | |||||
p448_sqr ( &L1, sz ); | |||||
p448_copy ( &L3, &L1 ); | |||||
p448_addw ( &L3, 1 ); | |||||
p448_sqr ( &a->x, &L3 ); | |||||
p448_mulw ( &L3, &a->x, 39082 ); | |||||
p448_neg ( &a->x, &L3 ); | |||||
p448_add ( &L3, &L1, &L1 ); | |||||
p448_bias ( &L3, 1 ); | |||||
p448_add ( &a->y, &L3, &L3 ); | |||||
p448_add ( &L3, &a->y, &a->x ); | |||||
p448_copy ( &a->y, &L1 ); | |||||
p448_subw ( &a->y, 1 ); | |||||
p448_neg ( &a->x, &a->y ); | |||||
p448_bias ( &a->x, 2 ); | |||||
p448_mul ( &a->y, &a->x, &L3 ); | |||||
p448_sqr ( &L2, &a->x ); | |||||
p448_mul ( &L0, &L2, &a->y ); | |||||
p448_mul ( &a->y, &a->x, &L0 ); | |||||
p448_isr ( &L3, &a->y ); | |||||
p448_mul ( &a->y, &L2, &L3 ); | |||||
p448_sqr ( &L2, &L3 ); | |||||
p448_mul ( &L3, &L0, &L2 ); | |||||
p448_mul ( &L0, &a->x, &L3 ); | |||||
p448_bias ( &L0, 1 ); | |||||
p448_add ( &L2, &a->y, &a->y ); | |||||
p448_mul ( &a->x, sz, &L2 ); | |||||
p448_addw ( &L1, 1 ); | |||||
p448_mul ( &a->y, &L1, &L3 ); | |||||
p448_subw ( &L0, 1 ); | |||||
return p448_is_zero( &L0 ); | |||||
} | |||||
mask_t | |||||
deserialize_and_twist_approx ( | |||||
struct tw_extensible_t* a, | |||||
const struct p448_t* sdm1, | |||||
const struct p448_t* sz | |||||
) { | |||||
struct p448_t L0, L1; | |||||
p448_sqr ( &a->z, sz ); | |||||
p448_copy ( &a->y, &a->z ); | |||||
p448_addw ( &a->y, 1 ); | |||||
p448_sqr ( &a->x, &a->y ); | |||||
p448_mulw ( &a->y, &a->x, 39082 ); | |||||
p448_neg ( &a->x, &a->y ); | |||||
p448_add ( &a->y, &a->z, &a->z ); | |||||
p448_bias ( &a->y, 1 ); | |||||
p448_add ( &a->u, &a->y, &a->y ); | |||||
p448_add ( &a->y, &a->u, &a->x ); | |||||
p448_sqr ( &a->x, &a->z ); | |||||
p448_subw ( &a->x, 1 ); | |||||
p448_neg ( &a->u, &a->x ); | |||||
p448_bias ( &a->u, 2 ); | |||||
p448_mul ( &a->x, sdm1, &a->u ); | |||||
p448_mul ( &L0, &a->x, &a->y ); | |||||
p448_mul ( &a->t, &L0, &a->y ); | |||||
p448_mul ( &a->u, &a->x, &a->t ); | |||||
p448_mul ( &a->t, &a->u, &L0 ); | |||||
p448_mul ( &a->y, &a->x, &a->t ); | |||||
p448_isr ( &L0, &a->y ); | |||||
p448_mul ( &a->y, &a->u, &L0 ); | |||||
p448_sqr ( &L1, &L0 ); | |||||
p448_mul ( &a->u, &a->t, &L1 ); | |||||
p448_mul ( &a->t, &a->x, &a->u ); | |||||
p448_bias ( &a->t, 1 ); | |||||
p448_add ( &a->x, sz, sz ); | |||||
p448_mul ( &L0, &a->u, &a->x ); | |||||
p448_copy ( &a->x, &a->z ); | |||||
p448_subw ( &a->x, 1 ); | |||||
p448_neg ( &L1, &a->x ); | |||||
p448_bias ( &L1, 2 ); | |||||
p448_mul ( &a->x, &L1, &L0 ); | |||||
p448_mul ( &L0, &a->u, &a->y ); | |||||
p448_addw ( &a->z, 1 ); | |||||
p448_mul ( &a->y, &a->z, &L0 ); | |||||
p448_subw ( &a->t, 1 ); | |||||
mask_t ret = p448_is_zero( &a->t ); | |||||
p448_set_ui( &a->z, 1 ); | |||||
p448_copy ( &a->t, &a->x ); | |||||
p448_copy ( &a->u, &a->y ); | |||||
return ret; | |||||
} | |||||
void | |||||
set_identity_extensible ( | |||||
struct extensible_t* a | |||||
) { | |||||
p448_set_ui( &a->x, 0 ); | |||||
p448_set_ui( &a->y, 1 ); | |||||
p448_set_ui( &a->z, 1 ); | |||||
p448_set_ui( &a->t, 0 ); | |||||
p448_set_ui( &a->u, 0 ); | |||||
} | |||||
void | |||||
set_identity_tw_extensible ( | |||||
struct tw_extensible_t* a | |||||
) { | |||||
p448_set_ui( &a->x, 0 ); | |||||
p448_set_ui( &a->y, 1 ); | |||||
p448_set_ui( &a->z, 1 ); | |||||
p448_set_ui( &a->t, 0 ); | |||||
p448_set_ui( &a->u, 0 ); | |||||
} | |||||
void | |||||
set_identity_affine ( | |||||
struct affine_t* a | |||||
) { | |||||
p448_set_ui( &a->x, 0 ); | |||||
p448_set_ui( &a->y, 1 ); | |||||
} | |||||
mask_t | |||||
eq_affine ( | |||||
const struct affine_t* a, | |||||
const struct affine_t* b | |||||
) { | |||||
mask_t L1, L2; | |||||
struct p448_t L0; | |||||
p448_sub ( &L0, &a->x, &b->x ); | |||||
p448_bias ( &L0, 2 ); | |||||
L2 = p448_is_zero( &L0 ); | |||||
p448_sub ( &L0, &a->y, &b->y ); | |||||
p448_bias ( &L0, 2 ); | |||||
L1 = p448_is_zero( &L0 ); | |||||
return L2 & L1; | |||||
} | |||||
mask_t | |||||
eq_extensible ( | |||||
const struct extensible_t* a, | |||||
const struct extensible_t* b | |||||
) { | |||||
mask_t L3, L4; | |||||
struct p448_t L0, L1, L2; | |||||
p448_mul ( &L2, &b->z, &a->x ); | |||||
p448_mul ( &L1, &a->z, &b->x ); | |||||
p448_sub ( &L0, &L2, &L1 ); | |||||
p448_bias ( &L0, 2 ); | |||||
L4 = p448_is_zero( &L0 ); | |||||
p448_mul ( &L2, &b->z, &a->y ); | |||||
p448_mul ( &L1, &a->z, &b->y ); | |||||
p448_sub ( &L0, &L2, &L1 ); | |||||
p448_bias ( &L0, 2 ); | |||||
L3 = p448_is_zero( &L0 ); | |||||
return L4 & L3; | |||||
} | |||||
mask_t | |||||
eq_tw_extensible ( | |||||
const struct tw_extensible_t* a, | |||||
const struct tw_extensible_t* b | |||||
) { | |||||
mask_t L3, L4; | |||||
struct p448_t L0, L1, L2; | |||||
p448_mul ( &L2, &b->z, &a->x ); | |||||
p448_mul ( &L1, &a->z, &b->x ); | |||||
p448_sub ( &L0, &L2, &L1 ); | |||||
p448_bias ( &L0, 2 ); | |||||
L4 = p448_is_zero( &L0 ); | |||||
p448_mul ( &L2, &b->z, &a->y ); | |||||
p448_mul ( &L1, &a->z, &b->y ); | |||||
p448_sub ( &L0, &L2, &L1 ); | |||||
p448_bias ( &L0, 2 ); | |||||
L3 = p448_is_zero( &L0 ); | |||||
return L4 & L3; | |||||
} | |||||
void | |||||
elligator_2s_inject ( | |||||
struct affine_t* a, | |||||
const struct p448_t* r | |||||
) { | |||||
mask_t L0, L1; | |||||
struct p448_t L2, L3, L4, L5, L6, L7, L8, L9; | |||||
p448_sqr ( &a->x, r ); | |||||
p448_sqr ( &L3, &a->x ); | |||||
p448_copy ( &a->y, &L3 ); | |||||
p448_subw ( &a->y, 1 ); | |||||
p448_neg ( &L9, &a->y ); | |||||
p448_bias ( &L9, 2 ); | |||||
p448_sqr ( &L2, &L9 ); | |||||
p448_bias ( &L2, 1 ); | |||||
p448_mulw ( &L7, &L2, 1527402724 ); | |||||
p448_bias ( &L7, 2 ); | |||||
p448_mulw ( &L8, &L3, 6108985600 ); | |||||
p448_add ( &a->y, &L8, &L7 ); | |||||
p448_mulw ( &L8, &L2, 6109454568 ); | |||||
p448_sub ( &L7, &a->y, &L8 ); | |||||
p448_mulw ( &L4, &a->y, 78160 ); | |||||
p448_mul ( &L6, &L7, &L9 ); | |||||
p448_mul ( &L8, &L6, &L4 ); | |||||
p448_mul ( &L4, &L7, &L8 ); | |||||
p448_isr ( &L5, &L4 ); | |||||
p448_mul ( &L4, &L6, &L5 ); | |||||
p448_sqr ( &L6, &L5 ); | |||||
p448_mul ( &L5, &L8, &L6 ); | |||||
p448_mul ( &L8, &L7, &L5 ); | |||||
p448_mul ( &L7, &L8, &L5 ); | |||||
p448_copy ( &L6, &a->x ); | |||||
p448_subw ( &L6, 1 ); | |||||
p448_addw ( &a->x, 1 ); | |||||
p448_mul ( &L5, &a->x, &L8 ); | |||||
p448_sub ( &a->x, &L6, &L5 ); | |||||
p448_bias ( &a->x, 3 ); | |||||
p448_mul ( &L5, &L4, &a->x ); | |||||
p448_mulw ( &L4, &L5, 78160 ); | |||||
p448_neg ( &a->x, &L4 ); | |||||
p448_bias ( &a->x, 2 ); | |||||
p448_weak_reduce( &a->x ); | |||||
p448_add ( &L4, &L3, &L3 ); | |||||
p448_add ( &L3, &L4, &L2 ); | |||||
p448_subw ( &L3, 2 ); | |||||
p448_mul ( &L2, &L3, &L8 ); | |||||
p448_mulw ( &L3, &L2, 3054649120 ); | |||||
p448_add ( &L2, &L3, &a->y ); | |||||
p448_mul ( &a->y, &L7, &L2 ); | |||||
L1 = p448_is_zero( &L9 ); | |||||
L0 = - L1; | |||||
p448_addw ( &a->y, L0 ); | |||||
p448_weak_reduce( &a->y ); | |||||
} | |||||
mask_t | |||||
validate_affine ( | |||||
const struct affine_t* a | |||||
) { | |||||
struct p448_t L0, L1, L2, L3; | |||||
p448_sqr ( &L0, &a->y ); | |||||
p448_sqr ( &L2, &a->x ); | |||||
p448_add ( &L3, &L2, &L0 ); | |||||
p448_subw ( &L3, 1 ); | |||||
p448_mulw ( &L1, &L2, 39081 ); | |||||
p448_neg ( &L2, &L1 ); | |||||
p448_bias ( &L2, 2 ); | |||||
p448_mul ( &L1, &L0, &L2 ); | |||||
p448_sub ( &L0, &L3, &L1 ); | |||||
p448_bias ( &L0, 3 ); | |||||
return p448_is_zero( &L0 ); | |||||
} | |||||
mask_t | |||||
validate_tw_extensible ( | |||||
const struct tw_extensible_t* ext | |||||
) { | |||||
mask_t L4, L5; | |||||
struct p448_t L0, L1, L2, L3; | |||||
/* | |||||
* Check invariant: | |||||
* 0 = -x*y + z*t*u | |||||
*/ | |||||
p448_mul ( &L0, &ext->t, &ext->u ); | |||||
p448_mul ( &L2, &ext->z, &L0 ); | |||||
p448_addw ( &L2, 0 ); | |||||
p448_mul ( &L1, &ext->x, &ext->y ); | |||||
p448_neg ( &L0, &L1 ); | |||||
p448_add ( &L1, &L0, &L2 ); | |||||
p448_bias ( &L1, 2 ); | |||||
L5 = p448_is_zero( &L1 ); | |||||
/* | |||||
* Check invariant: | |||||
* 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2 | |||||
*/ | |||||
p448_sqr ( &L2, &ext->y ); | |||||
p448_neg ( &L0, &L2 ); | |||||
p448_addw ( &L0, 0 ); | |||||
p448_sqr ( &L1, &ext->x ); | |||||
p448_bias ( &L1, 4 ); | |||||
p448_add ( &L2, &L1, &L0 ); | |||||
p448_sqr ( &L3, &ext->u ); | |||||
p448_sqr ( &L1, &ext->t ); | |||||
p448_mul ( &L0, &L1, &L3 ); | |||||
p448_mulw ( &L1, &L0, 39081 ); | |||||
p448_neg ( &L3, &L1 ); | |||||
p448_add ( &L1, &L3, &L2 ); | |||||
p448_neg ( &L3, &L0 ); | |||||
p448_add ( &L2, &L3, &L1 ); | |||||
p448_sqr ( &L1, &ext->z ); | |||||
p448_add ( &L0, &L1, &L2 ); | |||||
L4 = p448_is_zero( &L0 ); | |||||
return L5 & L4; | |||||
} | |||||
@@ -1,503 +0,0 @@ | |||||
/** | |||||
* @file ec_point.h | |||||
* @copyright | |||||
* Copyright (c) 2014 Cryptography Research, Inc. \n | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
* @author Mike Hamburg | |||||
* @warning This file was automatically generated. | |||||
*/ | |||||
#ifndef __CC_INCLUDED_EC_POINT_H__ | |||||
#define __CC_INCLUDED_EC_POINT_H__ | |||||
#include "p448.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
/** | |||||
* Affine point on an Edwards curve. | |||||
*/ | |||||
struct affine_t { | |||||
struct p448_t x, y; | |||||
}; | |||||
/** | |||||
* Affine point on a twisted Edwards curve. | |||||
*/ | |||||
struct tw_affine_t { | |||||
struct p448_t x, y; | |||||
}; | |||||
/** | |||||
* Montgomery buffer. | |||||
*/ | |||||
struct montgomery_t { | |||||
struct p448_t z0, xd, zd, xa, za; | |||||
}; | |||||
/** | |||||
* Extensible coordinates for Edwards curves, suitable for | |||||
* accumulators. | |||||
* | |||||
* Represents the point (x/z, y/z). The extra coordinates | |||||
* t,u satisfy xy = tuz, allowing for conversion to Extended | |||||
* form by multiplying t and u. | |||||
* | |||||
* The idea is that you don't have to do this multiplication | |||||
* when doubling the accumulator, because the t-coordinate | |||||
* isn't used there. At the same time, as long as you only | |||||
* have one point in extensible form, additions don't cost | |||||
* extra. | |||||
* | |||||
* This is essentially a lazier version of Hisil et al's | |||||
* lookahead trick. It might be worth considering that trick | |||||
* instead. | |||||
*/ | |||||
struct extensible_t { | |||||
struct p448_t x, y, z, t, u; | |||||
}; | |||||
/** | |||||
* Extensible coordinates for twisted Edwards curves, | |||||
* suitable for accumulators. | |||||
*/ | |||||
struct tw_extensible_t { | |||||
struct p448_t x, y, z, t, u; | |||||
}; | |||||
/** | |||||
* Niels coordinates for twisted Edwards curves. | |||||
* | |||||
* Good for mixed readdition; suitable for fixed tables. | |||||
*/ | |||||
struct tw_niels_t { | |||||
struct p448_t a, b, c; | |||||
}; | |||||
/** | |||||
* Projective niels coordinates for twisted Edwards curves. | |||||
* | |||||
* Good for readdition; suitable for temporary tables. | |||||
*/ | |||||
struct tw_pniels_t { | |||||
struct tw_niels_t n; | |||||
struct p448_t z; | |||||
}; | |||||
/** | |||||
* Auto-generated copy method. | |||||
*/ | |||||
static __inline__ void | |||||
copy_affine ( | |||||
struct affine_t* a, | |||||
const struct affine_t* ds | |||||
) __attribute__((unused,always_inline)); | |||||
/** | |||||
* Auto-generated copy method. | |||||
*/ | |||||
static __inline__ void | |||||
copy_tw_affine ( | |||||
struct tw_affine_t* a, | |||||
const struct tw_affine_t* ds | |||||
) __attribute__((unused,always_inline)); | |||||
/** | |||||
* Auto-generated copy method. | |||||
*/ | |||||
static __inline__ void | |||||
copy_montgomery ( | |||||
struct montgomery_t* a, | |||||
const struct montgomery_t* ds | |||||
) __attribute__((unused,always_inline)); | |||||
/** | |||||
* Auto-generated copy method. | |||||
*/ | |||||
static __inline__ void | |||||
copy_extensible ( | |||||
struct extensible_t* a, | |||||
const struct extensible_t* ds | |||||
) __attribute__((unused,always_inline)); | |||||
/** | |||||
* Auto-generated copy method. | |||||
*/ | |||||
static __inline__ void | |||||
copy_tw_extensible ( | |||||
struct tw_extensible_t* a, | |||||
const struct tw_extensible_t* ds | |||||
) __attribute__((unused,always_inline)); | |||||
/** | |||||
* Auto-generated copy method. | |||||
*/ | |||||
static __inline__ void | |||||
copy_tw_niels ( | |||||
struct tw_niels_t* a, | |||||
const struct tw_niels_t* ds | |||||
) __attribute__((unused,always_inline)); | |||||
/** | |||||
* Auto-generated copy method. | |||||
*/ | |||||
static __inline__ void | |||||
copy_tw_pniels ( | |||||
struct tw_pniels_t* a, | |||||
const struct tw_pniels_t* ds | |||||
) __attribute__((unused,always_inline)); | |||||
/** | |||||
* Returns 1/sqrt(+- x). | |||||
* | |||||
* The Legendre symbol of the result is the same as that of the | |||||
* input. | |||||
* | |||||
* If x=0, returns 0. | |||||
*/ | |||||
void | |||||
p448_isr ( | |||||
struct p448_t* a, | |||||
const struct p448_t* x | |||||
); | |||||
/** | |||||
* Returns 1/x. | |||||
* | |||||
* If x=0, returns 0. | |||||
*/ | |||||
void | |||||
p448_inverse ( | |||||
struct p448_t* a, | |||||
const struct p448_t* x | |||||
); | |||||
/** | |||||
* Add two points on a twisted Edwards curve, one in Extensible form | |||||
* and the other in half-Niels form. | |||||
*/ | |||||
void | |||||
add_tw_niels_to_tw_extensible ( | |||||
struct tw_extensible_t* d, | |||||
const struct tw_niels_t* e | |||||
); | |||||
/** | |||||
* Add two points on a twisted Edwards curve, one in Extensible form | |||||
* and the other in half-Niels form. | |||||
*/ | |||||
void | |||||
sub_tw_niels_from_tw_extensible ( | |||||
struct tw_extensible_t* d, | |||||
const struct tw_niels_t* e | |||||
); | |||||
/** | |||||
* Add two points on a twisted Edwards curve, one in Extensible form | |||||
* and the other in projective Niels form. | |||||
*/ | |||||
void | |||||
add_tw_pniels_to_tw_extensible ( | |||||
struct tw_extensible_t* e, | |||||
const struct tw_pniels_t* a | |||||
); | |||||
/** | |||||
* Add two points on a twisted Edwards curve, one in Extensible form | |||||
* and the other in projective Niels form. | |||||
*/ | |||||
void | |||||
sub_tw_pniels_from_tw_extensible ( | |||||
struct tw_extensible_t* e, | |||||
const struct tw_pniels_t* a | |||||
); | |||||
/** | |||||
* Double a point on a twisted Edwards curve, in "extensible" coordinates. | |||||
*/ | |||||
void | |||||
double_tw_extensible ( | |||||
struct tw_extensible_t* a | |||||
); | |||||
/** | |||||
* Double a point on an Edwards curve, in "extensible" coordinates. | |||||
*/ | |||||
void | |||||
double_extensible ( | |||||
struct extensible_t* a | |||||
); | |||||
/** | |||||
* Double a point, and transfer it to the twisted curve. | |||||
* | |||||
* That is, apply the 4-isogeny. | |||||
*/ | |||||
void | |||||
twist_and_double ( | |||||
struct tw_extensible_t* b, | |||||
const struct extensible_t* a | |||||
); | |||||
/** | |||||
* Double a point, and transfer it to the untwisted curve. | |||||
* | |||||
* That is, apply the dual isogeny. | |||||
*/ | |||||
void | |||||
untwist_and_double ( | |||||
struct extensible_t* b, | |||||
const struct tw_extensible_t* a | |||||
); | |||||
void | |||||
convert_tw_affine_to_tw_pniels ( | |||||
struct tw_pniels_t* b, | |||||
const struct tw_affine_t* a | |||||
); | |||||
void | |||||
convert_tw_affine_to_tw_extensible ( | |||||
struct tw_extensible_t* b, | |||||
const struct tw_affine_t* a | |||||
); | |||||
void | |||||
convert_affine_to_extensible ( | |||||
struct extensible_t* b, | |||||
const struct affine_t* a | |||||
); | |||||
void | |||||
convert_tw_extensible_to_tw_pniels ( | |||||
struct tw_pniels_t* b, | |||||
const struct tw_extensible_t* a | |||||
); | |||||
void | |||||
convert_tw_pniels_to_tw_extensible ( | |||||
struct tw_extensible_t* e, | |||||
const struct tw_pniels_t* d | |||||
); | |||||
void | |||||
convert_tw_niels_to_tw_extensible ( | |||||
struct tw_extensible_t* e, | |||||
const struct tw_niels_t* d | |||||
); | |||||
void | |||||
montgomery_step ( | |||||
struct montgomery_t* a | |||||
); | |||||
void | |||||
serialize_montgomery ( | |||||
struct p448_t* sign, | |||||
struct p448_t* ser, | |||||
const struct montgomery_t* a, | |||||
const struct p448_t* sbz | |||||
); | |||||
/** | |||||
* Serialize a point on an Edwards curve. | |||||
* | |||||
* The serialized form would be sqrt((z-y)/(z+y)) with sign of xz. | |||||
* | |||||
* It would be on 4y^2/(1-d) = x^3 + 2(1+d)/(1-d) * x^2 + x. | |||||
* | |||||
* But 4/(1-d) isn't square, so we need to twist it: | |||||
* | |||||
* -x is on 4y^2/(d-1) = x^3 + 2(d+1)/(d-1) * x^2 + x | |||||
*/ | |||||
void | |||||
serialize_extensible ( | |||||
struct p448_t* b, | |||||
const struct extensible_t* a | |||||
); | |||||
/** | |||||
* | |||||
*/ | |||||
void | |||||
untwist_and_double_and_serialize ( | |||||
struct p448_t* b, | |||||
const struct tw_extensible_t* a | |||||
); | |||||
/** | |||||
* Expensive transfer from untwisted to twisted. Roughly equivalent to halve and isogeny. | |||||
* Correctly transfers point of order 2. | |||||
* | |||||
* Can't have x=+1 (it's not even). There is code to fix the exception that would otherwise | |||||
* occur at (0,1). | |||||
* | |||||
* Input point must be even. | |||||
*/ | |||||
void | |||||
twist ( | |||||
struct tw_extensible_t* b, | |||||
const struct extensible_t* a | |||||
); | |||||
/** | |||||
* Deserialize a point to an untwisted affine curve. | |||||
*/ | |||||
mask_t | |||||
deserialize_affine ( | |||||
struct affine_t* a, | |||||
const struct p448_t* sz | |||||
); | |||||
/** | |||||
* Deserialize a point and transfer it to the twist. | |||||
* | |||||
* Not guaranteed to preserve the 4-torsion component. | |||||
* | |||||
* Refuses to deserialize +-1, which are the points of order 2. | |||||
*/ | |||||
mask_t | |||||
deserialize_and_twist_approx ( | |||||
struct tw_extensible_t* a, | |||||
const struct p448_t* sdm1, | |||||
const struct p448_t* sz | |||||
); | |||||
void | |||||
set_identity_extensible ( | |||||
struct extensible_t* a | |||||
); | |||||
void | |||||
set_identity_tw_extensible ( | |||||
struct tw_extensible_t* a | |||||
); | |||||
void | |||||
set_identity_affine ( | |||||
struct affine_t* a | |||||
); | |||||
mask_t | |||||
eq_affine ( | |||||
const struct affine_t* a, | |||||
const struct affine_t* b | |||||
); | |||||
mask_t | |||||
eq_extensible ( | |||||
const struct extensible_t* a, | |||||
const struct extensible_t* b | |||||
); | |||||
mask_t | |||||
eq_tw_extensible ( | |||||
const struct tw_extensible_t* a, | |||||
const struct tw_extensible_t* b | |||||
); | |||||
void | |||||
elligator_2s_inject ( | |||||
struct affine_t* a, | |||||
const struct p448_t* r | |||||
); | |||||
mask_t | |||||
validate_affine ( | |||||
const struct affine_t* a | |||||
); | |||||
/** | |||||
* Check the invariants for struct tw_extensible_t. | |||||
* PERF: This function was automatically generated | |||||
* with no regard for speed. | |||||
*/ | |||||
mask_t | |||||
validate_tw_extensible ( | |||||
const struct tw_extensible_t* ext | |||||
); | |||||
void | |||||
copy_affine ( | |||||
struct affine_t* a, | |||||
const struct affine_t* ds | |||||
) { | |||||
p448_copy ( &a->x, &ds->x ); | |||||
p448_copy ( &a->y, &ds->y ); | |||||
} | |||||
void | |||||
copy_tw_affine ( | |||||
struct tw_affine_t* a, | |||||
const struct tw_affine_t* ds | |||||
) { | |||||
p448_copy ( &a->x, &ds->x ); | |||||
p448_copy ( &a->y, &ds->y ); | |||||
} | |||||
void | |||||
copy_montgomery ( | |||||
struct montgomery_t* a, | |||||
const struct montgomery_t* ds | |||||
) { | |||||
p448_copy ( &a->z0, &ds->z0 ); | |||||
p448_copy ( &a->xd, &ds->xd ); | |||||
p448_copy ( &a->zd, &ds->zd ); | |||||
p448_copy ( &a->xa, &ds->xa ); | |||||
p448_copy ( &a->za, &ds->za ); | |||||
} | |||||
void | |||||
copy_extensible ( | |||||
struct extensible_t* a, | |||||
const struct extensible_t* ds | |||||
) { | |||||
p448_copy ( &a->x, &ds->x ); | |||||
p448_copy ( &a->y, &ds->y ); | |||||
p448_copy ( &a->z, &ds->z ); | |||||
p448_copy ( &a->t, &ds->t ); | |||||
p448_copy ( &a->u, &ds->u ); | |||||
} | |||||
void | |||||
copy_tw_extensible ( | |||||
struct tw_extensible_t* a, | |||||
const struct tw_extensible_t* ds | |||||
) { | |||||
p448_copy ( &a->x, &ds->x ); | |||||
p448_copy ( &a->y, &ds->y ); | |||||
p448_copy ( &a->z, &ds->z ); | |||||
p448_copy ( &a->t, &ds->t ); | |||||
p448_copy ( &a->u, &ds->u ); | |||||
} | |||||
void | |||||
copy_tw_niels ( | |||||
struct tw_niels_t* a, | |||||
const struct tw_niels_t* ds | |||||
) { | |||||
p448_copy ( &a->a, &ds->a ); | |||||
p448_copy ( &a->b, &ds->b ); | |||||
p448_copy ( &a->c, &ds->c ); | |||||
} | |||||
void | |||||
copy_tw_pniels ( | |||||
struct tw_pniels_t* a, | |||||
const struct tw_pniels_t* ds | |||||
) { | |||||
copy_tw_niels( &a->n, &ds->n ); | |||||
p448_copy ( &a->z, &ds->z ); | |||||
} | |||||
#ifdef __cplusplus | |||||
}; /* extern "C" */ | |||||
#endif | |||||
#endif /* __CC_INCLUDED_EC_POINT_H__ */ |
@@ -1,5 +0,0 @@ | |||||
_goldilocks_init | |||||
_goldilocks_keygen | |||||
_goldilocks_shared_secret | |||||
_goldilocks_sign | |||||
_goldilocks_verify |
@@ -1,299 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#include <errno.h> | |||||
#include "goldilocks.h" | |||||
#include "ec_point.h" | |||||
#include "scalarmul.h" | |||||
#include "barrett_field.h" | |||||
#include "crandom.h" | |||||
#include "sha512.h" | |||||
#ifndef GOLDILOCKS_RANDOM_INIT_FILE | |||||
#define GOLDILOCKS_RANDOM_INIT_FILE "/dev/urandom" | |||||
#endif | |||||
#ifndef GOLDILOCKS_RANDOM_RESEED_INTERVAL | |||||
#define GOLDILOCKS_RANDOM_RESEED_INTERVAL 10000 | |||||
#endif | |||||
/* We'll check it ourselves */ | |||||
#ifndef GOLDILOCKS_RANDOM_RESEEDS_MANDATORY | |||||
#define GOLDILOCKS_RANDOM_RESEEDS_MANDATORY 0 | |||||
#endif | |||||
/* TODO: word size; precompute */ | |||||
const struct affine_t goldilocks_base_point = { | |||||
{{ 0xf0de840aed939full, 0xc170033f4ba0c7ull, 0xf3932d94c63d96ull, 0x9cecfa96147eaaull, | |||||
0x5f065c3c59d070ull, 0x3a6a26adf73324ull, 0x1b4faff4609845ull, 0x297ea0ea2692ffull | |||||
}}, | |||||
{{ 19, 0, 0, 0, 0, 0, 0, 0 }} | |||||
}; | |||||
// /* TODO: direct */ | |||||
// void | |||||
// transfer_and_serialize(struct p448_t *out, const struct tw_extensible_t *twext) { | |||||
// struct extensible_t ext; | |||||
// transfer_tw_to_un(&ext, twext); | |||||
// serialize_extensible(out, &ext); | |||||
// } | |||||
// FIXME: threading | |||||
// TODO: autogen instead of init | |||||
struct { | |||||
struct tw_niels_t combs[80]; | |||||
struct tw_niels_t wnafs[32]; | |||||
struct crandom_state_t rand; | |||||
} goldilocks_global; | |||||
int | |||||
goldilocks_init () { | |||||
struct extensible_t ext; | |||||
struct tw_extensible_t text; | |||||
/* Sanity check: the base point is on the curve. */ | |||||
assert(validate_affine(&goldilocks_base_point)); | |||||
/* Convert it to twisted Edwards. */ | |||||
convert_affine_to_extensible(&ext, &goldilocks_base_point); | |||||
twist(&text, &ext); | |||||
//p448_transfer_un_to_tw(&text, &ext); | |||||
/* Precompute the tables. */ | |||||
precompute_for_combs(goldilocks_global.combs, &text, 5, 5, 18); | |||||
precompute_for_wnaf(goldilocks_global.wnafs, &text, 5); | |||||
return crandom_init_from_file(&goldilocks_global.rand, | |||||
GOLDILOCKS_RANDOM_INIT_FILE, | |||||
GOLDILOCKS_RANDOM_RESEED_INTERVAL, | |||||
GOLDILOCKS_RANDOM_RESEEDS_MANDATORY); | |||||
} | |||||
static word_t | |||||
q448_lo[4] = { | |||||
0xdc873d6d54a7bb0dull, | |||||
0xde933d8d723a70aaull, | |||||
0x3bb124b65129c96full, | |||||
0x000000008335dc16ull | |||||
}; | |||||
static const struct p448_t | |||||
sqrt_d_minus_1 = {{ | |||||
0xd2e21836749f46ull, | |||||
0x888db42b4f0179ull, | |||||
0x5a189aabdeea38ull, | |||||
0x51e65ca6f14c06ull, | |||||
0xa49f7b424d9770ull, | |||||
0xdcac4628c5f656ull, | |||||
0x49443b8748734aull, | |||||
0x12fec0c0b25b7aull | |||||
}}; | |||||
int | |||||
goldilocks_keygen ( | |||||
struct goldilocks_private_key_t *privkey, | |||||
struct goldilocks_public_key_t *pubkey | |||||
) { | |||||
// TODO: check for init. Also maybe take CRANDOM object? API... | |||||
word_t sk[448*2/WORD_BITS]; | |||||
struct tw_extensible_t exta; | |||||
struct p448_t pk; | |||||
int ret = crandom_generate(&goldilocks_global.rand, (unsigned char *)sk, sizeof(sk)); | |||||
barrett_reduce(sk,sizeof(sk)/sizeof(sk[0]),0,q448_lo,7,4,62); // TODO word size | |||||
q448_serialize(privkey->opaque, sk); | |||||
edwards_comb(&exta, sk, goldilocks_global.combs, 5, 5, 18); | |||||
//transfer_and_serialize_qtor(&pk, &sqrt_d_minus_1, &exta); | |||||
untwist_and_double_and_serialize(&pk, &exta); | |||||
p448_serialize(pubkey->opaque, &pk); | |||||
memcpy(&privkey->opaque[56], pubkey->opaque, 56); | |||||
int ret2 = crandom_generate(&goldilocks_global.rand, &privkey->opaque[112], 32); | |||||
if (!ret) ret = ret2; | |||||
return ret ? GOLDI_ENODICE : GOLDI_EOK; | |||||
} | |||||
int | |||||
goldilocks_shared_secret ( | |||||
uint8_t shared[64], | |||||
const struct goldilocks_private_key_t *my_privkey, | |||||
const struct goldilocks_public_key_t *your_pubkey | |||||
) { | |||||
word_t sk[448/WORD_BITS]; | |||||
struct p448_t pk; | |||||
mask_t succ = p448_deserialize(&pk,your_pubkey->opaque), msucc = -1; | |||||
#ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS | |||||
struct p448_t sum, prod; | |||||
msucc &= p448_deserialize(&sum,&my_privkey->opaque[56]); | |||||
p448_mul(&prod,&pk,&sum); | |||||
p448_add(&sum,&pk,&sum); | |||||
#endif | |||||
msucc &= q448_deserialize(sk,my_privkey->opaque); | |||||
succ &= p448_montgomery_ladder(&pk,&pk,sk,446,2); | |||||
p448_serialize(shared,&pk); | |||||
/* obliterate records of our failure by adjusting with obliteration key */ | |||||
struct sha512_ctx_t ctx; | |||||
sha512_init(&ctx); | |||||
#ifdef EXPERIMENT_ECDH_OBLITERATE_CT | |||||
uint8_t oblit[40]; | |||||
unsigned i; | |||||
for (i=0; i<8; i++) { | |||||
oblit[i] = "noshared"[i] & ~(succ&msucc); | |||||
} | |||||
for (i=0; i<32; i++) { | |||||
oblit[8+i] = my_privkey->opaque[112+i] & ~(succ&msucc); | |||||
} | |||||
sha512_update(&ctx, oblit, 40); | |||||
#endif | |||||
#ifdef EXPERIMENT_ECDH_STIR_IN_PUBKEYS | |||||
/* stir in the sum and product of the pubkeys. */ | |||||
uint8_t a_pk[56]; | |||||
p448_serialize(a_pk, &sum); | |||||
sha512_update(&ctx, a_pk, 56); | |||||
p448_serialize(a_pk, &prod); | |||||
sha512_update(&ctx, a_pk, 56); | |||||
#endif | |||||
/* stir in the shared key and finish */ | |||||
sha512_update(&ctx, shared, 56); | |||||
sha512_final(&ctx, shared); | |||||
return (GOLDI_ECORRUPT & ~msucc) | |||||
| (GOLDI_EINVAL & msucc &~ succ) | |||||
| (GOLDI_EOK & msucc & succ); | |||||
} | |||||
int | |||||
goldilocks_sign ( | |||||
uint8_t signature_out[56*2], | |||||
const uint8_t *message, | |||||
uint64_t message_len, | |||||
const struct goldilocks_private_key_t *privkey | |||||
) { | |||||
/* challenge = H(pk, [nonceG], message). FIXME: endian. */ | |||||
word_t skw[448/WORD_BITS]; | |||||
mask_t succ = q448_deserialize(skw,privkey->opaque); | |||||
if (!succ) { | |||||
memset(skw,0,sizeof(skw)); | |||||
return GOLDI_ECORRUPT; | |||||
} | |||||
/* Derive a nonce. TODO: use HMAC. FIXME: endian. FUTURE: factor. */ | |||||
word_t tk[512/WORD_BITS]; | |||||
struct sha512_ctx_t ctx; | |||||
sha512_init(&ctx); | |||||
sha512_update(&ctx, (const unsigned char *)"signonce", 8); | |||||
sha512_update(&ctx, &privkey->opaque[112], 32); | |||||
sha512_update(&ctx, message, message_len); | |||||
sha512_update(&ctx, &privkey->opaque[112], 32); | |||||
sha512_final(&ctx, (unsigned char *)tk); | |||||
barrett_reduce(tk,512/WORD_BITS,0,q448_lo,7,4,62); // TODO word size | |||||
/* 4[nonce]G */ | |||||
uint8_t signature_tmp[56]; | |||||
struct tw_extensible_t exta; | |||||
struct p448_t gsk; | |||||
edwards_comb(&exta, tk, goldilocks_global.combs, 5, 5, 18); | |||||
double_tw_extensible(&exta); | |||||
untwist_and_double_and_serialize(&gsk, &exta); | |||||
p448_serialize(signature_tmp, &gsk); | |||||
word_t challenge[512/WORD_BITS]; | |||||
sha512_update(&ctx, &privkey->opaque[56], 56); | |||||
sha512_update(&ctx, signature_tmp, 56); | |||||
sha512_update(&ctx, message, message_len); | |||||
sha512_final(&ctx, (unsigned char *)challenge); | |||||
// reduce challenge and sub. | |||||
barrett_negate(challenge,512/WORD_BITS,q448_lo,7,4,62); | |||||
barrett_mac( | |||||
tk,512/WORD_BITS, | |||||
challenge,512/WORD_BITS, | |||||
skw,448/WORD_BITS, | |||||
q448_lo,7,4,62 | |||||
); | |||||
word_t carry = add_nr_ext_packed(tk,tk,512/WORD_BITS,tk,512/WORD_BITS,-1); | |||||
barrett_reduce(tk,512/WORD_BITS,carry,q448_lo,7,4,62); | |||||
memcpy(signature_out, signature_tmp, 56); | |||||
q448_serialize(signature_out+56, tk); | |||||
memset((unsigned char *)tk,0,sizeof(tk)); | |||||
memset((unsigned char *)skw,0,sizeof(skw)); | |||||
memset((unsigned char *)challenge,0,sizeof(challenge)); | |||||
/* response = 2(nonce_secret - sk*challenge) | |||||
* Nonce = 8[nonce_secret]*G | |||||
* PK = 2[sk]*G, except doubled (TODO) | |||||
* so [2] ( [response]G + 2[challenge]PK ) = Nonce | |||||
*/ | |||||
return 0; | |||||
} | |||||
int | |||||
goldilocks_verify ( | |||||
const uint8_t signature[56*2], | |||||
const uint8_t *message, | |||||
uint64_t message_len, | |||||
const struct goldilocks_public_key_t *pubkey | |||||
) { | |||||
struct p448_t pk; | |||||
word_t s[448/WORD_BITS]; | |||||
mask_t succ = p448_deserialize(&pk,pubkey->opaque); | |||||
if (!succ) return GOLDI_EINVAL; | |||||
succ = q448_deserialize(s, &signature[56]); | |||||
if (!succ) return GOLDI_EINVAL; | |||||
/* challenge = H(pk, [nonceG], message). FIXME: endian. */ | |||||
word_t challenge[512/WORD_BITS]; | |||||
struct sha512_ctx_t ctx; | |||||
sha512_init(&ctx); | |||||
sha512_update(&ctx, pubkey->opaque, 56); | |||||
sha512_update(&ctx, signature, 56); | |||||
sha512_update(&ctx, message, message_len); | |||||
sha512_final(&ctx, (unsigned char *)challenge); | |||||
barrett_reduce(challenge,512/WORD_BITS,0,q448_lo,7,4,62); | |||||
struct p448_t eph; | |||||
struct tw_extensible_t pk_text; | |||||
/* deserialize [nonce]G */ | |||||
succ = p448_deserialize(&eph, signature); | |||||
if (!succ) return GOLDI_EINVAL; | |||||
// succ = affine_deserialize(&pk_aff,&pk); | |||||
// if (!succ) return EINVAL; | |||||
// | |||||
// convert_affine_to_extensible(&pk_ext,&pk_aff); | |||||
// transfer_un_to_tw(&pk_text,&pk_ext); | |||||
succ = deserialize_and_twist_approx(&pk_text, &sqrt_d_minus_1, &pk); | |||||
if (!succ) return GOLDI_EINVAL; | |||||
edwards_combo_var_fixed_vt( &pk_text, challenge, s, goldilocks_global.wnafs, 5 ); | |||||
untwist_and_double_and_serialize( &pk, &pk_text ); | |||||
p448_sub(&eph, &eph, &pk); | |||||
p448_bias(&eph, 2); | |||||
succ = p448_is_zero(&eph); | |||||
return succ ? 0 : GOLDI_EINVAL; | |||||
} |
@@ -1,171 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
/** | |||||
* @file goldilocks.h | |||||
* @author Mike Hamburg | |||||
* @brief Goldilocks high-level functions. | |||||
*/ | |||||
#ifndef __GOLDILOCKS_H__ | |||||
#define __GOLDILOCKS_H__ 1 | |||||
#include <stdint.h> | |||||
/** | |||||
* @brief Serialized form of a Goldilocks public key. | |||||
* | |||||
* @warning This isn't even my final form! | |||||
*/ | |||||
struct goldilocks_public_key_t { | |||||
uint8_t opaque[56]; /**< Serialized data. */ | |||||
}; | |||||
/** | |||||
* @brief Serialized form of a Goldilocks private key. | |||||
* | |||||
* Contains 56 bytes of actual private key, 56 bytes of | |||||
* public key, and 32 bytes of symmetric key for randomization. | |||||
* | |||||
* @warning This isn't even my final form! | |||||
*/ | |||||
struct goldilocks_private_key_t { | |||||
uint8_t opaque[144]; /**< Serialized data. */ | |||||
}; | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
/** @brief No error. */ | |||||
static const int GOLDI_EOK = 0; | |||||
/** @brief Error: your key is corrupt. */ | |||||
static const int GOLDI_ECORRUPT = 44801; | |||||
/** @brief Error: other party's key is corrupt. */ | |||||
static const int GOLDI_EINVAL = 44802; | |||||
/** @brief Error: not enough entropy. */ | |||||
static const int GOLDI_ENODICE = 44804; | |||||
/** | |||||
* @brief Initialize Goldilocks' precomputed tables and | |||||
* random number generator. | |||||
* @retval GOLDI_EOK Success. | |||||
* @retval Nonzero An error occurred. | |||||
*/ | |||||
int | |||||
goldilocks_init(); | |||||
/** | |||||
* @brief Generate a new random keypair. | |||||
* @param [out] privkey The generated private key. | |||||
* @param [out] pubkey The generated public key. | |||||
* | |||||
* @warning This isn't even my final form! | |||||
* | |||||
* @retval GOLDI_EOK Success. | |||||
* @retval GOLDI_ENODICE Insufficient entropy. | |||||
*/ | |||||
int | |||||
goldilocks_keygen ( | |||||
struct goldilocks_private_key_t *privkey, | |||||
struct goldilocks_public_key_t *pubkey | |||||
) __attribute__((warn_unused_result)); | |||||
/** | |||||
* @brief Generate a Diffie-Hellman shared secret in constant time. | |||||
* | |||||
* This function uses some compile-time flags whose merit remains to | |||||
* be decided. | |||||
* | |||||
* If the flag EXPERIMENT_ECDH_OBLITERATE_CT is set, prepend 40 bytes | |||||
* of zeros to the secret before hashing. In the case that the other | |||||
* party's key is detectably corrupt, instead the symmetric part | |||||
* of the secret key is used to produce a pseudorandom value. | |||||
* | |||||
* If EXPERIMENT_ECDH_STIR_IN_PUBKEYS is set, the sum and product of | |||||
* the two parties' public keys is prepended to the hash. | |||||
* | |||||
* @warning This isn't even my final form! | |||||
* | |||||
* @param [out] shared The shared secret established with the other party. | |||||
* @param [in] my_privkey My private key. | |||||
* @param [in] your_pubkey The other party's public key. | |||||
* | |||||
* @retval GOLDI_EOK Success. | |||||
* @retval GOLDI_ECORRUPT My key is corrupt. | |||||
* @retval GOLDI_EINVAL The other party's key is corrupt. | |||||
*/ | |||||
int | |||||
goldilocks_shared_secret ( | |||||
uint8_t shared[64], | |||||
const struct goldilocks_private_key_t *my_privkey, | |||||
const struct goldilocks_public_key_t *your_pubkey | |||||
) __attribute__((warn_unused_result)); | |||||
/** | |||||
* @brief Sign a message. | |||||
* | |||||
* The signature is deterministic, using the symmetric secret found in the | |||||
* secret key to form a nonce. | |||||
* | |||||
* The technique used in signing is a modified Schnorr system, like EdDSA. | |||||
* | |||||
* @warning This isn't even my final form! | |||||
* @warning This function contains endian bugs. (TODO) | |||||
* | |||||
* @param [out] signature_out Space for the output signature. | |||||
* @param [in] message The message to be signed. | |||||
* @param [in] message_len The length of the message to be signed. | |||||
* @param [in] privkey My private key. | |||||
* | |||||
* @retval GOLDI_EOK Success. | |||||
* @retval GOLDI_ECORRUPT My key is corrupt. | |||||
*/ | |||||
int | |||||
goldilocks_sign ( | |||||
uint8_t signature_out[56*2], | |||||
const uint8_t *message, | |||||
uint64_t message_len, | |||||
const struct goldilocks_private_key_t *privkey | |||||
); | |||||
/** | |||||
* @brief Verify a signature. | |||||
* | |||||
* This function is fairly strict. It will correctly detect when | |||||
* the signature has the wrong cofactor companent. Once deserialization | |||||
* of numbers is strictified (TODO) it will limit the response to being | |||||
* less than q as well. | |||||
* | |||||
* Currently this function does not detect when the public key is weird, | |||||
* eg 0, has cofactor, etc. As a result, a party with a bogus public | |||||
* key could create signatures that succeed on some systems and fail on | |||||
* others. | |||||
* | |||||
* @warning This isn't even my final form! | |||||
* @warning This function contains endian bugs. (TODO) | |||||
* | |||||
* @param [out] signature_out The signature. | |||||
* @param [in] message The message to be verified. | |||||
* @param [in] message_len The length of the message to be verified. | |||||
* @param [in] pubkey The signer's public key. | |||||
* | |||||
* @retval GOLDI_EOK Success. | |||||
* @retval GOLDI_EINVAL The public key or signature is corrupt. | |||||
*/ | |||||
int | |||||
goldilocks_verify ( | |||||
const uint8_t signature[56*2], | |||||
const uint8_t *message, | |||||
uint64_t message_len, | |||||
const struct goldilocks_public_key_t *pubkey | |||||
) __attribute__((warn_unused_result)); | |||||
#ifdef __cplusplus | |||||
}; /* extern "C" */ | |||||
#endif | |||||
#endif /* __GOLDILOCKS_H__ */ |
@@ -1,199 +0,0 @@ | |||||
/* Copyright (c) 2011 Stanford University. | |||||
* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
/** @file crandom.h | |||||
* @brief cRandom intrinsics header. | |||||
*/ | |||||
#ifndef __CRANDOM_INTRINSICS_H__ | |||||
#define __CRANDOM_INTRINSICS_H__ 1 | |||||
#include <sys/types.h> | |||||
#include <immintrin.h> | |||||
#define INTRINSIC \ | |||||
static __inline__ __attribute__((__gnu_inline__, __always_inline__)) | |||||
#define GEN 1 | |||||
#define SSE2 2 | |||||
#define SSSE3 4 | |||||
#define AESNI 8 | |||||
#define XOP 16 | |||||
#define AVX 32 | |||||
#define AVX2 64 | |||||
#define RDRAND 128 | |||||
INTRINSIC u_int64_t rdtsc() { | |||||
u_int64_t out = 0; | |||||
# if (defined(__i386__) || defined(__x86_64__)) | |||||
__asm__ __volatile__ ("rdtsc" : "=A"(out)); | |||||
# endif | |||||
return out; | |||||
} | |||||
/** | |||||
* Return x unchanged, but confuse the compiler. | |||||
* | |||||
* This is mainly for use in test scripts, to prevent the value from | |||||
* being constant-folded or removed by dead code elimination. | |||||
* | |||||
* @param x A 64-bit number. | |||||
* @return The same number in a register. | |||||
*/ | |||||
INTRINSIC u_int64_t opacify(u_int64_t x) { | |||||
__asm__ volatile("mov %0, %0" : "+r"(x)); | |||||
return x; | |||||
} | |||||
#ifdef __AVX2__ | |||||
# define MIGHT_HAVE_AVX2 1 | |||||
# ifndef MUST_HAVE_AVX2 | |||||
# define MUST_HAVE_AVX2 0 | |||||
# endif | |||||
#else | |||||
# define MIGHT_HAVE_AVX2 0 | |||||
# define MUST_HAVE_AVX2 0 | |||||
#endif | |||||
#ifdef __AVX__ | |||||
# define MIGHT_HAVE_AVX 1 | |||||
# ifndef MUST_HAVE_AVX | |||||
# define MUST_HAVE_AVX MUST_HAVE_AVX2 | |||||
# endif | |||||
#else | |||||
# define MIGHT_HAVE_AVX 0 | |||||
# define MUST_HAVE_AVX 0 | |||||
#endif | |||||
#ifdef __SSSE3__ | |||||
# define MIGHT_HAVE_SSSE3 1 | |||||
# ifndef MUST_HAVE_SSSE3 | |||||
# define MUST_HAVE_SSSE3 MUST_HAVE_AVX | |||||
# endif | |||||
#else | |||||
# define MIGHT_HAVE_SSSE3 0 | |||||
# define MUST_HAVE_SSSE3 0 | |||||
#endif | |||||
#ifdef __SSE2__ | |||||
# define MIGHT_HAVE_SSE2 1 | |||||
# ifndef MUST_HAVE_SSE2 | |||||
# define MUST_HAVE_SSE2 MUST_HAVE_SSSE3 | |||||
# endif | |||||
typedef __m128i ssereg; | |||||
# define pslldq _mm_slli_epi32 | |||||
# define pshufd _mm_shuffle_epi32 | |||||
INTRINSIC ssereg sse2_rotate(int r, ssereg a) { | |||||
return _mm_slli_epi32(a, r) ^ _mm_srli_epi32(a, 32-r); | |||||
} | |||||
#else | |||||
# define MIGHT_HAVE_SSE2 0 | |||||
# define MUST_HAVE_SSE2 0 | |||||
#endif | |||||
#ifdef __AES__ | |||||
/* don't include intrinsics file, because not all platforms have it */ | |||||
# define MIGHT_HAVE_AESNI 1 | |||||
# ifndef MIGHT_HAVE_RDRAND | |||||
# define MIGHT_HAVE_RDRAND 1 | |||||
# endif | |||||
# ifndef MUST_HAVE_RDRAND | |||||
# define MUST_HAVE_RDRAND 0 | |||||
# endif | |||||
# ifndef MUST_HAVE_AESNI | |||||
# define MUST_HAVE_AESNI 0 | |||||
# endif | |||||
INTRINSIC ssereg aeskeygenassist(int rc, ssereg x) { | |||||
ssereg out; | |||||
__asm__("aeskeygenassist %2, %1, %0" : "=x"(out) : "x"(x), "g"(rc)); | |||||
return out; | |||||
} | |||||
INTRINSIC ssereg aesenc(ssereg subkey, ssereg block) { | |||||
ssereg out = block; | |||||
__asm__("aesenc %1, %0" : "+x"(out) : "x"(subkey)); | |||||
return out; | |||||
} | |||||
INTRINSIC ssereg aesenclast(ssereg subkey, ssereg block) { | |||||
ssereg out = block; | |||||
__asm__("aesenclast %1, %0" : "+x"(out) : "x"(subkey)); | |||||
return out; | |||||
} | |||||
#else | |||||
# define MIGHT_HAVE_AESNI 0 | |||||
# define MUST_HAVE_AESNI 0 | |||||
# define MIGHT_HAVE_RDRAND 0 | |||||
# define MUST_HAVE_RDRAND 0 | |||||
#endif | |||||
#ifdef __XOP__ | |||||
/* don't include intrinsics file, because not all platforms have it */ | |||||
# define MIGHT_HAVE_XOP 1 | |||||
# ifndef MUST_HAVE_XOP | |||||
# define MUST_HAVE_XOP 0 | |||||
# endif | |||||
INTRINSIC ssereg xop_rotate(int amount, ssereg x) { | |||||
ssereg out; | |||||
__asm__ ("vprotd %1, %2, %0" : "=x"(out) : "x"(x), "g"(amount)); | |||||
return out; | |||||
} | |||||
#else | |||||
# define MIGHT_HAVE_XOP 0 | |||||
# define MUST_HAVE_XOP 0 | |||||
#endif | |||||
#define MIGHT_MASK \ | |||||
( SSE2 * MIGHT_HAVE_SSE2 \ | |||||
| SSSE3 * MIGHT_HAVE_SSSE3 \ | |||||
| AESNI * MIGHT_HAVE_AESNI \ | |||||
| XOP * MIGHT_HAVE_XOP \ | |||||
| AVX * MIGHT_HAVE_AVX \ | |||||
| RDRAND * MIGHT_HAVE_RDRAND \ | |||||
| AVX2 * MIGHT_HAVE_AVX2) | |||||
#define MUST_MASK \ | |||||
( SSE2 * MUST_HAVE_SSE2 \ | |||||
| SSSE3 * MUST_HAVE_SSSE3 \ | |||||
| AESNI * MUST_HAVE_AESNI \ | |||||
| XOP * MUST_HAVE_XOP \ | |||||
| AVX * MUST_HAVE_AVX \ | |||||
| RDRAND * MUST_HAVE_RDRAND \ | |||||
| AVX2 * MUST_HAVE_AVX2 ) | |||||
#define MIGHT_HAVE(feature) ((MIGHT_MASK & feature) == feature) | |||||
#define MUST_HAVE(feature) ((MUST_MASK & feature) == feature) | |||||
#ifdef __cplusplus | |||||
# define extern_c extern "C" | |||||
#else | |||||
# define extern_c | |||||
#endif | |||||
extern_c | |||||
unsigned int crandom_detect_features(); | |||||
#ifndef likely | |||||
# define likely(x) __builtin_expect((x),1) | |||||
# define unlikely(x) __builtin_expect((x),0) | |||||
#endif | |||||
extern volatile unsigned int crandom_features; | |||||
INTRINSIC int HAVE(unsigned int feature) { | |||||
unsigned int features; | |||||
if (!MIGHT_HAVE(feature)) return 0; | |||||
if (MUST_HAVE(feature)) return 1; | |||||
features = crandom_features; | |||||
if (unlikely(!features)) | |||||
crandom_features = features = crandom_detect_features(); | |||||
return likely((features & feature) == feature); | |||||
} | |||||
#endif /* __CRANDOM_INTRINSICS_H__ */ |
@@ -1,446 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#include "p448.h" | |||||
#include "x86-64-arith.h" | |||||
void | |||||
p448_mul ( | |||||
p448_t *__restrict__ cs, | |||||
const p448_t *as, | |||||
const p448_t *bs | |||||
) { | |||||
const uint64_t *a = as->limb, *b = bs->limb; | |||||
uint64_t *c = cs->limb; | |||||
__uint128_t accum0 = 0, accum1 = 0, accum2; | |||||
uint64_t mask = (1ull<<56) - 1; | |||||
uint64_t aa[4], bb[4]; | |||||
/* For some reason clang doesn't vectorize this without prompting? */ | |||||
unsigned int i; | |||||
for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) { | |||||
((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i]; | |||||
((uint64xn_t*)bb)[i] = ((const uint64xn_t*)b)[i] + ((const uint64xn_t*)(&b[4]))[i]; | |||||
} | |||||
/* | |||||
for (int i=0; i<4; i++) { | |||||
aa[i] = a[i] + a[i+4]; | |||||
bb[i] = b[i] + b[i+4]; | |||||
} | |||||
*/ | |||||
accum2 = widemul(&a[0],&b[3]); | |||||
accum1 = widemul(&aa[0],&bb[3]); | |||||
accum0 = widemul(&a[4],&b[7]); | |||||
mac(&accum2, &a[1], &b[2]); | |||||
mac(&accum1, &aa[1], &bb[2]); | |||||
mac(&accum0, &a[5], &b[6]); | |||||
mac(&accum2, &a[2], &b[1]); | |||||
mac(&accum1, &aa[2], &bb[1]); | |||||
mac(&accum0, &a[6], &b[5]); | |||||
mac(&accum2, &a[3], &b[0]); | |||||
mac(&accum1, &aa[3], &bb[0]); | |||||
mac(&accum0, &a[7], &b[4]); | |||||
accum1 -= accum2; | |||||
accum0 += accum2; | |||||
c[3] = ((uint64_t)(accum0)) & mask; | |||||
c[7] = ((uint64_t)(accum1)) & mask; | |||||
accum0 >>= 56; | |||||
accum1 >>= 56; | |||||
{ | |||||
accum2 = accum1; | |||||
accum1 += accum0; | |||||
accum0 = accum2; | |||||
} | |||||
accum2 = widemul(&a[0],&b[0]); | |||||
accum1 -= accum2; | |||||
accum0 += accum2; | |||||
accum2 = widemul(&aa[1],&bb[3]); | |||||
msb(&accum0, &a[1], &b[3]); | |||||
mac(&accum1, &a[5], &b[7]); | |||||
msb(&accum0, &a[2], &b[2]); | |||||
mac(&accum2, &aa[2], &bb[2]); | |||||
mac(&accum1, &a[6], &b[6]); | |||||
msb(&accum0, &a[3], &b[1]); | |||||
mac(&accum1, &a[7], &b[5]); | |||||
mac(&accum2, &aa[3], &bb[1]); | |||||
accum0 += accum2; | |||||
accum1 += accum2; | |||||
mac(&accum0, &a[4], &b[4]); | |||||
mac(&accum1, &aa[0], &bb[0]); | |||||
c[0] = ((uint64_t)(accum0)) & mask; | |||||
c[4] = ((uint64_t)(accum1)) & mask; | |||||
accum0 >>= 56; | |||||
accum1 >>= 56; | |||||
accum2 = widemul(&aa[2],&bb[3]); | |||||
msb(&accum0, &a[2], &b[3]); | |||||
mac(&accum1, &a[6], &b[7]); | |||||
mac(&accum2, &aa[3], &bb[2]); | |||||
msb(&accum0, &a[3], &b[2]); | |||||
mac(&accum1, &a[7], &b[6]); | |||||
accum1 += accum2; | |||||
accum0 += accum2; | |||||
accum2 = widemul(&a[0],&b[1]); | |||||
mac(&accum1, &aa[0], &bb[1]); | |||||
mac(&accum0, &a[4], &b[5]); | |||||
mac(&accum2, &a[1], &b[0]); | |||||
mac(&accum1, &aa[1], &bb[0]); | |||||
mac(&accum0, &a[5], &b[4]); | |||||
accum1 -= accum2; | |||||
accum0 += accum2; | |||||
c[1] = ((uint64_t)(accum0)) & mask; | |||||
c[5] = ((uint64_t)(accum1)) & mask; | |||||
accum0 >>= 56; | |||||
accum1 >>= 56; | |||||
accum2 = widemul(&aa[3],&bb[3]); | |||||
msb(&accum0, &a[3], &b[3]); | |||||
mac(&accum1, &a[7], &b[7]); | |||||
accum1 += accum2; | |||||
accum0 += accum2; | |||||
accum2 = widemul(&a[0],&b[2]); | |||||
mac(&accum1, &aa[0], &bb[2]); | |||||
mac(&accum0, &a[4], &b[6]); | |||||
mac(&accum2, &a[1], &b[1]); | |||||
mac(&accum1, &aa[1], &bb[1]); | |||||
mac(&accum0, &a[5], &b[5]); | |||||
mac(&accum2, &a[2], &b[0]); | |||||
mac(&accum1, &aa[2], &bb[0]); | |||||
mac(&accum0, &a[6], &b[4]); | |||||
accum1 -= accum2; | |||||
accum0 += accum2; | |||||
c[2] = ((uint64_t)(accum0)) & mask; | |||||
c[6] = ((uint64_t)(accum1)) & mask; | |||||
accum0 >>= 56; | |||||
accum1 >>= 56; | |||||
accum0 += c[3]; | |||||
accum1 += c[7]; | |||||
c[3] = ((uint64_t)(accum0)) & mask; | |||||
c[7] = ((uint64_t)(accum1)) & mask; | |||||
/* we could almost stop here, but it wouldn't be stable, so... */ | |||||
accum0 >>= 56; | |||||
accum1 >>= 56; | |||||
c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | |||||
c[0] += ((uint64_t)(accum1)); | |||||
} | |||||
void | |||||
p448_mulw ( | |||||
p448_t *__restrict__ cs, | |||||
const p448_t *as, | |||||
uint64_t b | |||||
) { | |||||
const uint64_t *a = as->limb; | |||||
uint64_t *c = cs->limb; | |||||
__uint128_t accum0, accum4; | |||||
uint64_t mask = (1ull<<56) - 1; | |||||
accum0 = widemul_rm(b, &a[0]); | |||||
accum4 = widemul_rm(b, &a[4]); | |||||
c[0] = accum0 & mask; accum0 >>= 56; | |||||
c[4] = accum4 & mask; accum4 >>= 56; | |||||
mac_rm(&accum0, b, &a[1]); | |||||
mac_rm(&accum4, b, &a[5]); | |||||
c[1] = accum0 & mask; accum0 >>= 56; | |||||
c[5] = accum4 & mask; accum4 >>= 56; | |||||
mac_rm(&accum0, b, &a[2]); | |||||
mac_rm(&accum4, b, &a[6]); | |||||
c[2] = accum0 & mask; accum0 >>= 56; | |||||
c[6] = accum4 & mask; accum4 >>= 56; | |||||
mac_rm(&accum0, b, &a[3]); | |||||
mac_rm(&accum4, b, &a[7]); | |||||
c[3] = accum0 & mask; accum0 >>= 56; | |||||
c[7] = accum4 & mask; accum4 >>= 56; | |||||
c[4] += accum0 + accum4; | |||||
c[0] += accum4; | |||||
} | |||||
void | |||||
p448_sqr ( | |||||
p448_t *__restrict__ cs, | |||||
const p448_t *as | |||||
) { | |||||
const uint64_t *a = as->limb; | |||||
uint64_t *c = cs->limb; | |||||
__uint128_t accum0 = 0, accum1 = 0, accum2; | |||||
uint64_t mask = (1ull<<56) - 1; | |||||
uint64_t aa[4]; | |||||
/* For some reason clang doesn't vectorize this without prompting? */ | |||||
unsigned int i; | |||||
for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) { | |||||
((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i]; | |||||
} | |||||
accum2 = widemul(&a[0],&a[3]); | |||||
accum1 = widemul(&aa[0],&aa[3]); | |||||
accum0 = widemul(&a[4],&a[7]); | |||||
mac(&accum2, &a[1], &a[2]); | |||||
mac(&accum1, &aa[1], &aa[2]); | |||||
mac(&accum0, &a[5], &a[6]); | |||||
accum1 -= accum2; | |||||
accum0 += accum2; | |||||
c[3] = ((uint64_t)(accum0))<<1 & mask; | |||||
c[7] = ((uint64_t)(accum1))<<1 & mask; | |||||
accum0 >>= 55; | |||||
accum1 >>= 55; | |||||
{ | |||||
accum2 = accum1; | |||||
accum1 += accum0; | |||||
accum0 = accum2; | |||||
} | |||||
accum2 = widemul(&a[0],&a[0]); | |||||
accum1 -= accum2; | |||||
accum0 += accum2; | |||||
accum2 = widemul2(&aa[1],&aa[3]); | |||||
msb2(&accum0, &a[1], &a[3]); | |||||
mac2(&accum1, &a[5], &a[7]); | |||||
msb(&accum0, &a[2], &a[2]); | |||||
mac(&accum2, &aa[2], &aa[2]); | |||||
mac(&accum1, &a[6], &a[6]); | |||||
accum0 += accum2; | |||||
accum1 += accum2; | |||||
mac(&accum0, &a[4], &a[4]); | |||||
mac(&accum1, &aa[0], &aa[0]); | |||||
c[0] = ((uint64_t)(accum0)) & mask; | |||||
c[4] = ((uint64_t)(accum1)) & mask; | |||||
accum0 >>= 56; | |||||
accum1 >>= 56; | |||||
accum2 = widemul2(&aa[2],&aa[3]); | |||||
msb2(&accum0, &a[2], &a[3]); | |||||
mac2(&accum1, &a[6], &a[7]); | |||||
accum1 += accum2; | |||||
accum0 += accum2; | |||||
accum2 = widemul2(&a[0],&a[1]); | |||||
mac2(&accum1, &aa[0], &aa[1]); | |||||
mac2(&accum0, &a[4], &a[5]); | |||||
accum1 -= accum2; | |||||
accum0 += accum2; | |||||
c[1] = ((uint64_t)(accum0)) & mask; | |||||
c[5] = ((uint64_t)(accum1)) & mask; | |||||
accum0 >>= 56; | |||||
accum1 >>= 56; | |||||
accum2 = widemul(&aa[3],&aa[3]); | |||||
msb(&accum0, &a[3], &a[3]); | |||||
mac(&accum1, &a[7], &a[7]); | |||||
accum1 += accum2; | |||||
accum0 += accum2; | |||||
accum2 = widemul2(&a[0],&a[2]); | |||||
mac2(&accum1, &aa[0], &aa[2]); | |||||
mac2(&accum0, &a[4], &a[6]); | |||||
mac(&accum2, &a[1], &a[1]); | |||||
mac(&accum1, &aa[1], &aa[1]); | |||||
mac(&accum0, &a[5], &a[5]); | |||||
accum1 -= accum2; | |||||
accum0 += accum2; | |||||
c[2] = ((uint64_t)(accum0)) & mask; | |||||
c[6] = ((uint64_t)(accum1)) & mask; | |||||
accum0 >>= 56; | |||||
accum1 >>= 56; | |||||
accum0 += c[3]; | |||||
accum1 += c[7]; | |||||
c[3] = ((uint64_t)(accum0)) & mask; | |||||
c[7] = ((uint64_t)(accum1)) & mask; | |||||
/* we could almost stop here, but it wouldn't be stable, so... */ | |||||
accum0 >>= 56; | |||||
accum1 >>= 56; | |||||
c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | |||||
c[0] += ((uint64_t)(accum1)); | |||||
} | |||||
void | |||||
p448_strong_reduce ( | |||||
p448_t *a | |||||
) { | |||||
uint64_t mask = (1ull<<56)-1; | |||||
/* first, clear high */ | |||||
a->limb[4] += a->limb[7]>>56; | |||||
a->limb[0] += a->limb[7]>>56; | |||||
a->limb[7] &= mask; | |||||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
__int128_t scarry = 0; | |||||
int i; | |||||
for (i=0; i<8; i++) { | |||||
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); | |||||
a->limb[i] = scarry & mask; | |||||
scarry >>= 56; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||||
* so let's add back in p. will carry back off the top for 2^448. | |||||
*/ | |||||
assert(is_zero(scarry) | is_zero(scarry+1)); | |||||
uint64_t scarry_mask = scarry & mask; | |||||
__uint128_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<8; i++) { | |||||
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); | |||||
a->limb[i] = carry & mask; | |||||
carry >>= 56; | |||||
} | |||||
assert(is_zero(carry + scarry)); | |||||
} | |||||
mask_t | |||||
p448_is_zero ( | |||||
const struct p448_t *a | |||||
) { | |||||
struct p448_t b; | |||||
p448_copy(&b,a); | |||||
p448_strong_reduce(&b); | |||||
uint64_t any = 0; | |||||
int i; | |||||
for (i=0; i<8; i++) { | |||||
any |= b.limb[i]; | |||||
} | |||||
return is_zero(any); | |||||
} | |||||
void | |||||
p448_serialize ( | |||||
uint8_t *serial, | |||||
const struct p448_t *x | |||||
) { | |||||
int i,j; | |||||
p448_t red; | |||||
p448_copy(&red, x); | |||||
p448_strong_reduce(&red); | |||||
for (i=0; i<8; i++) { | |||||
for (j=0; j<7; j++) { | |||||
serial[7*i+j] = red.limb[i]; | |||||
red.limb[i] >>= 8; | |||||
} | |||||
assert(red.limb[i] == 0); | |||||
} | |||||
} | |||||
void | |||||
q448_serialize ( | |||||
uint8_t *serial, | |||||
const word_t x[7] | |||||
) { | |||||
int i,j; | |||||
for (i=0; i<7; i++) { | |||||
for (j=0; j<8; j++) { | |||||
serial[8*i+j] = x[i]>>(8*j); | |||||
} | |||||
} | |||||
} | |||||
mask_t | |||||
q448_deserialize ( | |||||
word_t x[7], | |||||
const uint8_t serial[56] | |||||
) { | |||||
int i,j; | |||||
for (i=0; i<7; i++) { | |||||
word_t out = 0; | |||||
for (j=0; j<8; j++) { | |||||
out |= ((word_t)serial[8*i+j])<<(8*j); | |||||
} | |||||
x[i] = out; | |||||
} | |||||
/* TODO: check for reduction */ | |||||
return MASK_SUCCESS; | |||||
} | |||||
mask_t | |||||
p448_deserialize ( | |||||
p448_t *x, | |||||
const uint8_t serial[56] | |||||
) { | |||||
int i,j; | |||||
for (i=0; i<8; i++) { | |||||
word_t out = 0; | |||||
for (j=0; j<7; j++) { | |||||
out |= ((word_t)serial[7*i+j])<<(8*j); | |||||
} | |||||
x->limb[i] = out; | |||||
} | |||||
/* TODO: check for reduction */ | |||||
return MASK_SUCCESS; | |||||
} |
@@ -1,330 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#ifndef __P448_H__ | |||||
#define __P448_H__ 1 | |||||
#include <stdint.h> | |||||
#include <assert.h> | |||||
#include "word.h" | |||||
typedef struct p448_t { | |||||
uint64_t limb[8]; | |||||
} __attribute__((aligned(32))) p448_t; | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
static __inline__ void | |||||
p448_set_ui ( | |||||
p448_t *out, | |||||
uint64_t x | |||||
) __attribute__((unused,always_inline)); | |||||
static __inline__ void | |||||
p448_cond_swap ( | |||||
p448_t *a, | |||||
p448_t *b, | |||||
mask_t do_swap | |||||
) __attribute__((unused,always_inline)); | |||||
static __inline__ void | |||||
p448_add ( | |||||
p448_t *out, | |||||
const p448_t *a, | |||||
const p448_t *b | |||||
) __attribute__((unused,always_inline)); | |||||
static __inline__ void | |||||
p448_sub ( | |||||
p448_t *out, | |||||
const p448_t *a, | |||||
const p448_t *b | |||||
) __attribute__((unused,always_inline)); | |||||
static __inline__ void | |||||
p448_neg ( | |||||
p448_t *out, | |||||
const p448_t *a | |||||
) __attribute__((unused,always_inline)); | |||||
static __inline__ void | |||||
p448_cond_neg ( | |||||
p448_t *a, | |||||
mask_t doNegate | |||||
) __attribute__((unused,always_inline)); | |||||
static __inline__ void | |||||
p448_addw ( | |||||
p448_t *a, | |||||
uint64_t x | |||||
) __attribute__((unused,always_inline)); | |||||
static __inline__ void | |||||
p448_subw ( | |||||
p448_t *a, | |||||
uint64_t x | |||||
) __attribute__((unused,always_inline)); | |||||
static __inline__ void | |||||
p448_copy ( | |||||
p448_t *out, | |||||
const p448_t *a | |||||
) __attribute__((unused,always_inline)); | |||||
static __inline__ void | |||||
p448_weak_reduce ( | |||||
p448_t *inout | |||||
) __attribute__((unused,always_inline)); | |||||
void | |||||
p448_strong_reduce ( | |||||
p448_t *inout | |||||
); | |||||
mask_t | |||||
p448_is_zero ( | |||||
const p448_t *in | |||||
); | |||||
static __inline__ void | |||||
p448_bias ( | |||||
p448_t *inout, | |||||
int amount | |||||
) __attribute__((unused,always_inline)); | |||||
void | |||||
p448_mul ( | |||||
p448_t *__restrict__ out, | |||||
const p448_t *a, | |||||
const p448_t *b | |||||
); | |||||
void | |||||
p448_mulw ( | |||||
p448_t *__restrict__ out, | |||||
const p448_t *a, | |||||
uint64_t b | |||||
); | |||||
void | |||||
p448_sqr ( | |||||
p448_t *__restrict__ out, | |||||
const p448_t *a | |||||
); | |||||
static __inline__ void | |||||
p448_sqrn ( | |||||
p448_t *__restrict__ y, | |||||
const p448_t *x, | |||||
int n | |||||
) __attribute__((unused,always_inline)); | |||||
void | |||||
p448_serialize ( | |||||
uint8_t *serial, | |||||
const struct p448_t *x | |||||
); | |||||
void | |||||
q448_serialize ( | |||||
uint8_t *serial, | |||||
const word_t x[7] | |||||
); | |||||
mask_t | |||||
q448_deserialize ( | |||||
word_t x[7], | |||||
const uint8_t serial[56] | |||||
); | |||||
mask_t | |||||
p448_deserialize ( | |||||
p448_t *x, | |||||
const uint8_t serial[56] | |||||
); | |||||
/* -------------- Inline functions begin here -------------- */ | |||||
void | |||||
p448_set_ui ( | |||||
p448_t *out, | |||||
uint64_t x | |||||
) { | |||||
int i; | |||||
out->limb[0] = x; | |||||
for (i=1; i<8; i++) { | |||||
out->limb[i] = 0; | |||||
} | |||||
} | |||||
void | |||||
p448_cond_swap ( | |||||
p448_t *a, | |||||
p448_t *b, | |||||
mask_t doswap | |||||
) { | |||||
big_register_t *aa = (big_register_t*)a; | |||||
big_register_t *bb = (big_register_t*)b; | |||||
big_register_t m = doswap; | |||||
unsigned int i; | |||||
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||||
big_register_t x = m & (aa[i]^bb[i]); | |||||
aa[i] ^= x; | |||||
bb[i] ^= x; | |||||
} | |||||
} | |||||
void | |||||
p448_add ( | |||||
p448_t *out, | |||||
const p448_t *a, | |||||
const p448_t *b | |||||
) { | |||||
unsigned int i; | |||||
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | |||||
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | |||||
} | |||||
/* | |||||
unsigned int i; | |||||
for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) { | |||||
out->limb[i] = a->limb[i] + b->limb[i]; | |||||
} | |||||
*/ | |||||
} | |||||
void | |||||
p448_sub ( | |||||
p448_t *out, | |||||
const p448_t *a, | |||||
const p448_t *b | |||||
) { | |||||
unsigned int i; | |||||
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | |||||
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i]; | |||||
} | |||||
/* | |||||
unsigned int i; | |||||
for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) { | |||||
out->limb[i] = a->limb[i] - b->limb[i]; | |||||
} | |||||
*/ | |||||
} | |||||
void | |||||
p448_neg ( | |||||
p448_t *out, | |||||
const p448_t *a | |||||
) { | |||||
unsigned int i; | |||||
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | |||||
((uint64xn_t*)out)[i] = -((const uint64xn_t*)a)[i]; | |||||
} | |||||
/* | |||||
unsigned int i; | |||||
for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) { | |||||
out->limb[i] = -a->limb[i]; | |||||
} | |||||
*/ | |||||
} | |||||
void | |||||
p448_cond_neg( | |||||
p448_t *a, | |||||
mask_t doNegate | |||||
) { | |||||
unsigned int i; | |||||
struct p448_t negated; | |||||
big_register_t *aa = (big_register_t *)a; | |||||
big_register_t *nn = (big_register_t*)&negated; | |||||
big_register_t m = doNegate; | |||||
p448_neg(&negated, a); | |||||
p448_bias(&negated, 2); | |||||
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||||
aa[i] = (aa[i] & ~m) | (nn[i] & m); | |||||
} | |||||
} | |||||
void | |||||
p448_addw ( | |||||
p448_t *a, | |||||
uint64_t x | |||||
) { | |||||
a->limb[0] += x; | |||||
} | |||||
void | |||||
p448_subw ( | |||||
p448_t *a, | |||||
uint64_t x | |||||
) { | |||||
a->limb[0] -= x; | |||||
} | |||||
void | |||||
p448_copy ( | |||||
p448_t *out, | |||||
const p448_t *a | |||||
) { | |||||
*out = *a; | |||||
} | |||||
void | |||||
p448_bias ( | |||||
p448_t *a, | |||||
int amt | |||||
) { | |||||
uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt; | |||||
uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1}; | |||||
uint64x4_t *aa = (uint64x4_t*) a; | |||||
aa[0] += lo; | |||||
aa[1] += hi; | |||||
} | |||||
void | |||||
p448_weak_reduce ( | |||||
p448_t *a | |||||
) { | |||||
/* PERF: use pshufb/palignr if anyone cares about speed of this */ | |||||
uint64_t mask = (1ull<<56) - 1; | |||||
uint64_t tmp = a->limb[7] >> 56; | |||||
int i; | |||||
a->limb[4] += tmp; | |||||
for (i=7; i>0; i--) { | |||||
a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56); | |||||
} | |||||
a->limb[0] = (a->limb[0] & mask) + tmp; | |||||
} | |||||
void | |||||
p448_sqrn ( | |||||
p448_t *__restrict__ y, | |||||
const p448_t *x, | |||||
int n | |||||
) { | |||||
p448_t tmp; | |||||
assert(n>0); | |||||
if (n&1) { | |||||
p448_sqr(y,x); | |||||
n--; | |||||
} else { | |||||
p448_sqr(&tmp,x); | |||||
p448_sqr(y,&tmp); | |||||
n-=2; | |||||
} | |||||
for (; n; n-=2) { | |||||
p448_sqr(&tmp,y); | |||||
p448_sqr(y,&tmp); | |||||
} | |||||
} | |||||
#ifdef __cplusplus | |||||
}; /* extern "C" */ | |||||
#endif | |||||
#endif /* __P448_H__ */ |
@@ -1,776 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#include <stdlib.h> | |||||
#include "scalarmul.h" | |||||
#include "string.h" | |||||
#include "barrett_field.h" | |||||
mask_t | |||||
p448_montgomery_ladder( | |||||
struct p448_t *out, | |||||
const struct p448_t *in, | |||||
const uint64_t *scalar, | |||||
int nbits, | |||||
int n_extra_doubles | |||||
) { | |||||
struct montgomery_t mont; | |||||
p448_sqr(&mont.z0,in); | |||||
p448_copy(&mont.za,&mont.z0); | |||||
p448_set_ui(&mont.xa,1); | |||||
p448_set_ui(&mont.zd,0); | |||||
p448_set_ui(&mont.xd,1); | |||||
int i,j,n=(nbits-1)&63; | |||||
mask_t pflip = 0; | |||||
for (j=(nbits+63)/64-1; j>=0; j--) { | |||||
uint64_t w = scalar[j]; | |||||
for (i=n; i>=0; i--) { | |||||
mask_t flip = -((w>>i)&1); | |||||
p448_cond_swap(&mont.xa,&mont.xd,flip^pflip); | |||||
p448_cond_swap(&mont.za,&mont.zd,flip^pflip); | |||||
montgomery_step(&mont); | |||||
pflip = flip; | |||||
} | |||||
n = 63; | |||||
} | |||||
p448_cond_swap(&mont.xa,&mont.xd,pflip); | |||||
p448_cond_swap(&mont.za,&mont.zd,pflip); | |||||
for (j=0; j<n_extra_doubles; j++) { | |||||
montgomery_step(&mont); | |||||
} | |||||
struct p448_t sign; | |||||
serialize_montgomery(&sign, out, &mont, in); | |||||
p448_addw(&sign,1); | |||||
return ~p448_is_zero(&sign); | |||||
} | |||||
static __inline__ void | |||||
cond_negate_tw_niels( | |||||
struct tw_niels_t *n, | |||||
mask_t doNegate | |||||
) { | |||||
p448_cond_swap(&n->a, &n->b, doNegate); | |||||
p448_cond_neg(&n->c, doNegate); | |||||
} | |||||
static __inline__ void | |||||
cond_negate_tw_pniels( | |||||
struct tw_pniels_t *n, | |||||
mask_t doNegate | |||||
) { | |||||
cond_negate_tw_niels(&n->n, doNegate); | |||||
} | |||||
void | |||||
constant_time_lookup_tw_pniels( | |||||
struct tw_pniels_t *out, | |||||
const struct tw_pniels_t *in, | |||||
int nin, | |||||
int idx | |||||
) { | |||||
big_register_t big_one = 1, big_i = idx; | |||||
big_register_t *o = (big_register_t *)out; | |||||
const big_register_t *i = (const big_register_t *)in; | |||||
int j; | |||||
unsigned int k; | |||||
memset(out, 0, sizeof(*out)); | |||||
for (j=0; j<nin; j++, big_i-=big_one) { | |||||
big_register_t mask = br_is_zero(big_i); | |||||
for (k=0; k<sizeof(*out)/sizeof(*o); k++) { | |||||
o[k] |= mask & i[k+j*sizeof(*out)/sizeof(*o)]; | |||||
} | |||||
} | |||||
} | |||||
static __inline__ void | |||||
constant_time_lookup_tw_niels( | |||||
struct tw_niels_t *out, | |||||
const struct tw_niels_t *in, | |||||
int nin, | |||||
int idx | |||||
) { | |||||
big_register_t big_one = 1, big_i = idx; | |||||
big_register_t *o = (big_register_t *)out; | |||||
const big_register_t *i = (const big_register_t *)in; | |||||
int j; | |||||
unsigned int k; | |||||
memset(out, 0, sizeof(*out)); | |||||
for (j=0; j<nin; j++, big_i-=big_one) { | |||||
big_register_t mask = br_is_zero(big_i); | |||||
for (k=0; k<sizeof(*out)/sizeof(*o); k++) { | |||||
o[k] |= mask & i[k+j*sizeof(*out)/sizeof(*o)]; | |||||
} | |||||
} | |||||
} | |||||
static void | |||||
convert_to_signed_window_form( | |||||
word_t *out, | |||||
const word_t *scalar, | |||||
const word_t *prepared_data, | |||||
int nwords | |||||
) { | |||||
mask_t mask = -(scalar[0]&1); | |||||
word_t carry = add_nr_ext_packed(out, scalar, nwords, prepared_data, nwords, ~mask); | |||||
carry += add_nr_ext_packed(out, out, nwords, prepared_data+nwords, nwords, mask); | |||||
assert(!(out[0]&1)); | |||||
int i; | |||||
for (i=0; i<nwords; i++) { | |||||
out[i] >>= 1; | |||||
if (i<nwords-1) { | |||||
out[i] |= out[i+1]<<(WORD_BITS-1); | |||||
} else { | |||||
out[i] |= carry<<(WORD_BITS-1); | |||||
} | |||||
} | |||||
} | |||||
void | |||||
edwards_scalar_multiply( | |||||
struct tw_extensible_t *working, | |||||
const uint64_t scalar[7] | |||||
) { | |||||
const int nbits=448; /* HACK? */ | |||||
word_t prepared_data[14] = { | |||||
0x9595b847fdf73126ull, | |||||
0x9bb9b8a856af5200ull, | |||||
0xb3136e22f37d5c4full, | |||||
0x0000000189a19442ull, | |||||
0x0000000000000000ull, | |||||
0x0000000000000000ull, | |||||
0x4000000000000000ull, | |||||
0x721cf5b5529eec33ull, | |||||
0x7a4cf635c8e9c2abull, | |||||
0xeec492d944a725bfull, | |||||
0x000000020cd77058ull, | |||||
0x0000000000000000ull, | |||||
0x0000000000000000ull, | |||||
0x0000000000000000ull | |||||
}; /* TODO: split off */ | |||||
uint64_t scalar2[7]; | |||||
convert_to_signed_window_form(scalar2,scalar,prepared_data,7); | |||||
struct tw_extensible_t tabulator; | |||||
copy_tw_extensible(&tabulator, working); | |||||
double_tw_extensible(&tabulator); | |||||
struct tw_pniels_t pn, multiples[8]; | |||||
convert_tw_extensible_to_tw_pniels(&pn, &tabulator); | |||||
convert_tw_extensible_to_tw_pniels(&multiples[0], working); | |||||
int i; | |||||
for (i=1; i<8; i++) { | |||||
add_tw_pniels_to_tw_extensible(working, &pn); | |||||
convert_tw_extensible_to_tw_pniels(&multiples[i], working); | |||||
} | |||||
i = nbits - 4; | |||||
int bits = scalar2[i/64] >> (i%64) & 0xF, | |||||
inv = (bits>>3)-1; | |||||
bits ^= inv; | |||||
constant_time_lookup_tw_pniels(&pn, multiples, 8, bits&7); | |||||
cond_negate_tw_pniels(&pn, inv); | |||||
convert_tw_pniels_to_tw_extensible(working, &pn); | |||||
for (i-=4; i>=0; i-=4) { | |||||
double_tw_extensible(working); | |||||
double_tw_extensible(working); | |||||
double_tw_extensible(working); | |||||
double_tw_extensible(working); | |||||
bits = scalar2[i/64] >> (i%64) & 0xF; | |||||
inv = (bits>>3)-1; | |||||
bits ^= inv; | |||||
constant_time_lookup_tw_pniels(&pn, multiples, 8, bits&7); | |||||
cond_negate_tw_pniels(&pn, inv); | |||||
add_tw_pniels_to_tw_extensible(working, &pn); | |||||
} | |||||
} | |||||
void | |||||
edwards_scalar_multiply_vlook( | |||||
struct tw_extensible_t *working, | |||||
const uint64_t scalar[7] | |||||
) { | |||||
const int nbits=448; /* HACK? */ | |||||
word_t prepared_data[14] = { | |||||
0x9595b847fdf73126ull, | |||||
0x9bb9b8a856af5200ull, | |||||
0xb3136e22f37d5c4full, | |||||
0x0000000189a19442ull, | |||||
0x0000000000000000ull, | |||||
0x0000000000000000ull, | |||||
0x4000000000000000ull, | |||||
0x721cf5b5529eec33ull, | |||||
0x7a4cf635c8e9c2abull, | |||||
0xeec492d944a725bfull, | |||||
0x000000020cd77058ull, | |||||
0x0000000000000000ull, | |||||
0x0000000000000000ull, | |||||
0x0000000000000000ull | |||||
}; /* TODO: split off */ | |||||
uint64_t scalar2[7]; | |||||
convert_to_signed_window_form(scalar2,scalar,prepared_data,7); | |||||
struct tw_extensible_t tabulator; | |||||
copy_tw_extensible(&tabulator, working); | |||||
double_tw_extensible(&tabulator); | |||||
struct tw_pniels_t pn, multiples[8]; | |||||
convert_tw_extensible_to_tw_pniels(&pn, &tabulator); | |||||
convert_tw_extensible_to_tw_pniels(&multiples[0], working); | |||||
int i; | |||||
for (i=1; i<8; i++) { | |||||
add_tw_pniels_to_tw_extensible(working, &pn); | |||||
convert_tw_extensible_to_tw_pniels(&multiples[i], working); | |||||
} | |||||
i = nbits - 4; | |||||
int bits = scalar2[i/64] >> (i%64) & 0xF, | |||||
inv = (bits>>3)-1; | |||||
bits ^= inv; | |||||
copy_tw_pniels(&pn, &multiples[bits&7]); | |||||
cond_negate_tw_pniels(&pn, inv); | |||||
convert_tw_pniels_to_tw_extensible(working, &pn); | |||||
for (i-=4; i>=0; i-=4) { | |||||
double_tw_extensible(working); | |||||
double_tw_extensible(working); | |||||
double_tw_extensible(working); | |||||
double_tw_extensible(working); | |||||
bits = scalar2[i/64] >> (i%64) & 0xF; | |||||
inv = (bits>>3)-1; | |||||
bits ^= inv; | |||||
copy_tw_pniels(&pn, &multiples[bits&7]); | |||||
cond_negate_tw_pniels(&pn, inv); | |||||
add_tw_pniels_to_tw_extensible(working, &pn); | |||||
} | |||||
} | |||||
void | |||||
edwards_comb( | |||||
struct tw_extensible_t *working, | |||||
const word_t scalar[7], | |||||
const struct tw_niels_t *table, | |||||
int n, | |||||
int t, | |||||
int s | |||||
) { | |||||
word_t prepared_data[14] = { | |||||
0xebec9967f5d3f5c2ull, | |||||
0x0aa09b49b16c9a02ull, | |||||
0x7f6126aec172cd8eull, | |||||
0x00000007b027e54dull, | |||||
0x0000000000000000ull, | |||||
0x0000000000000000ull, | |||||
0x4000000000000000ull, | |||||
0xc873d6d54a7bb0cfull, | |||||
0xe933d8d723a70aadull, | |||||
0xbb124b65129c96fdull, | |||||
0x00000008335dc163ull, | |||||
0x0000000000000000ull, | |||||
0x0000000000000000ull, | |||||
0x0000000000000000ull | |||||
}; /* TODO: split off. Above is for 450 bits */ | |||||
word_t scalar2[7]; | |||||
convert_to_signed_window_form(scalar2,scalar,prepared_data,7); | |||||
/* const int n=3, t=5, s=30; */ | |||||
int i,j,k; | |||||
struct tw_niels_t ni; | |||||
for (i=0; i<s; i++) { | |||||
if (i) double_tw_extensible(working); | |||||
for (j=0; j<n; j++) { | |||||
int tab = 0; | |||||
/* | |||||
* PERF: This computation takes about 1.5µs on SBR, i.e. 2-3% of the | |||||
* time of a keygen or sign op. Surely it is possible to speed it up. | |||||
*/ | |||||
for (k=0; k<t; k++) { | |||||
int bit = (s-1-i) + k*s + j*(s*t); | |||||
if (bit < 7*WORD_BITS) { | |||||
tab |= (scalar2[bit/WORD_BITS] >> (bit%WORD_BITS) & 1) << k; | |||||
} | |||||
} | |||||
mask_t invert = (tab>>(t-1))-1; | |||||
tab ^= invert; | |||||
tab &= (1<<(t-1)) - 1; | |||||
constant_time_lookup_tw_niels(&ni, table + (j<<(t-1)), 1<<(t-1), tab); | |||||
cond_negate_tw_niels(&ni, invert); | |||||
if (i||j) { | |||||
add_tw_niels_to_tw_extensible(working, &ni); | |||||
} else { | |||||
convert_tw_niels_to_tw_extensible(working, &ni); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
void | |||||
simultaneous_invert_p448( | |||||
struct p448_t *out, | |||||
const struct p448_t *in, | |||||
int n | |||||
) { | |||||
if (!n) return; | |||||
p448_copy(&out[1], &in[0]); | |||||
int i; | |||||
for (i=1; i<n-1; i++) { | |||||
p448_mul(&out[i+1], &out[i], &in[i]); | |||||
} | |||||
p448_mul(&out[0], &out[n-1], &in[n-1]); | |||||
struct p448_t tmp; | |||||
p448_inverse(&tmp, &out[0]); | |||||
p448_copy(&out[0], &tmp); | |||||
/* at this point, out[0] = product(in[i]) ^ -1 | |||||
* out[i] = product(in[0]..in[i-1]) if i != 0 | |||||
*/ | |||||
for (i=n-1; i>0; i--) { | |||||
p448_mul(&tmp, &out[i], &out[0]); | |||||
p448_copy(&out[i], &tmp); | |||||
p448_mul(&tmp, &out[0], &in[i]); | |||||
p448_copy(&out[0], &tmp); | |||||
} | |||||
} | |||||
mask_t | |||||
precompute_for_combs( | |||||
struct tw_niels_t *out, | |||||
const struct tw_extensible_t *const_base, | |||||
int n, | |||||
int t, | |||||
int s | |||||
) { | |||||
if (s < 1) return 0; | |||||
struct tw_extensible_t working, start; | |||||
copy_tw_extensible(&working, const_base); | |||||
struct tw_pniels_t pn_tmp; | |||||
struct tw_pniels_t *doubles = (struct tw_pniels_t *) malloc(sizeof(*doubles) * (t-1)); | |||||
struct p448_t *zs = (struct p448_t *) malloc(sizeof(*zs) * (n<<(t-1))); | |||||
struct p448_t *zis = (struct p448_t *) malloc(sizeof(*zis) * (n<<(t-1))); | |||||
if (!doubles || !zs || !zis) { | |||||
free(doubles); | |||||
free(zs); | |||||
free(zis); | |||||
return 0; | |||||
} | |||||
int i,j,k; | |||||
for (i=0; i<n; i++) { | |||||
/* doubling phase */ | |||||
for (j=0; j<t; j++) { | |||||
if (j) { | |||||
convert_tw_extensible_to_tw_pniels(&pn_tmp, &working); | |||||
add_tw_pniels_to_tw_extensible(&start, &pn_tmp); | |||||
} else { | |||||
copy_tw_extensible(&start, &working); | |||||
} | |||||
if (j==t-1 && i==n-1) { | |||||
break; | |||||
} | |||||
double_tw_extensible(&working); | |||||
if (j<t-1) { | |||||
convert_tw_extensible_to_tw_pniels(&doubles[j], &working); | |||||
} | |||||
for (k=0; k<s-1; k++) { | |||||
double_tw_extensible(&working); | |||||
} | |||||
} | |||||
/* Gray-code phase */ | |||||
for (j=0;; j++) { | |||||
int gray = j ^ (j>>1); | |||||
int idx = ((i+1)<<(t-1))-1 ^ gray; | |||||
convert_tw_extensible_to_tw_pniels(&pn_tmp, &start); | |||||
copy_tw_niels(&out[idx], &pn_tmp.n); | |||||
p448_copy(&zs[idx], &pn_tmp.z); | |||||
if (j >= (1<<(t-1)) - 1) break; | |||||
int delta = (j+1) ^ ((j+1)>>1) ^ gray; | |||||
for (k=0; delta>1; k++) | |||||
delta >>=1; | |||||
if (gray & (1<<k)) { | |||||
/* start += doubles[k] */ | |||||
add_tw_pniels_to_tw_extensible(&start, &doubles[k]); | |||||
} else { | |||||
/* start -= doubles[k] */ | |||||
sub_tw_pniels_from_tw_extensible(&start, &doubles[k]); | |||||
} | |||||
} | |||||
} | |||||
simultaneous_invert_p448(zis, zs, n<<(t-1)); | |||||
p448_t product; | |||||
for (i=0; i<n<<(t-1); i++) { | |||||
p448_mul(&product, &out[i].a, &zis[i]); | |||||
p448_strong_reduce(&product); | |||||
p448_copy(&out[i].a, &product); | |||||
p448_mul(&product, &out[i].b, &zis[i]); | |||||
p448_strong_reduce(&product); | |||||
p448_copy(&out[i].b, &product); | |||||
p448_mul(&product, &out[i].c, &zis[i]); | |||||
p448_strong_reduce(&product); | |||||
p448_copy(&out[i].c, &product); | |||||
} | |||||
mask_t ret = ~p448_is_zero(&zis[0]); | |||||
free(doubles); | |||||
free(zs); | |||||
free(zis); | |||||
return ret; | |||||
} | |||||
mask_t | |||||
precompute_for_wnaf( | |||||
struct tw_niels_t *out, | |||||
const struct tw_extensible_t *const_base, | |||||
int tbits | |||||
) { | |||||
int i; | |||||
struct p448_t *zs = (struct p448_t *) malloc(sizeof(*zs)<<tbits); | |||||
struct p448_t *zis = (struct p448_t *) malloc(sizeof(*zis)<<tbits); | |||||
if (!zs || !zis) { | |||||
free(zs); | |||||
free(zis); | |||||
return 0; | |||||
} | |||||
struct tw_extensible_t base; | |||||
copy_tw_extensible(&base,const_base); | |||||
struct tw_pniels_t twop, tmp; | |||||
convert_tw_extensible_to_tw_pniels(&tmp, &base); | |||||
p448_copy(&zs[0], &tmp.z); | |||||
copy_tw_niels(&out[0], &tmp.n); | |||||
if (tbits > 0) { | |||||
double_tw_extensible(&base); | |||||
convert_tw_extensible_to_tw_pniels(&twop, &base); | |||||
add_tw_pniels_to_tw_extensible(&base, &tmp); | |||||
convert_tw_extensible_to_tw_pniels(&tmp, &base); | |||||
p448_copy(&zs[1], &tmp.z); | |||||
copy_tw_niels(&out[1], &tmp.n); | |||||
for (i=2; i < 1<<tbits; i++) { | |||||
add_tw_pniels_to_tw_extensible(&base, &twop); | |||||
convert_tw_extensible_to_tw_pniels(&tmp, &base); | |||||
p448_copy(&zs[i], &tmp.z); | |||||
copy_tw_niels(&out[i], &tmp.n); | |||||
} | |||||
} | |||||
simultaneous_invert_p448(zis, zs, 1<<tbits); | |||||
p448_t product; | |||||
for (i=0; i<1<<tbits; i++) { | |||||
p448_mul(&product, &out[i].a, &zis[i]); | |||||
p448_strong_reduce(&product); | |||||
p448_copy(&out[i].a, &product); | |||||
p448_mul(&product, &out[i].b, &zis[i]); | |||||
p448_strong_reduce(&product); | |||||
p448_copy(&out[i].b, &product); | |||||
p448_mul(&product, &out[i].c, &zis[i]); | |||||
p448_strong_reduce(&product); | |||||
p448_copy(&out[i].c, &product); | |||||
} | |||||
free(zs); | |||||
free(zis); | |||||
return -1; | |||||
} | |||||
/** | |||||
* @cond internal | |||||
* Control for variable-time scalar multiply algorithms. | |||||
*/ | |||||
struct smvt_control { | |||||
int power, addend; | |||||
}; | |||||
static int | |||||
recode_wnaf( | |||||
struct smvt_control *control, /* [nbits/(tableBits+1) + 3] */ | |||||
const word_t *scalar, | |||||
int nbits, | |||||
int tableBits) | |||||
{ | |||||
int current = 0, position=0, i; | |||||
/* PERF: negate scalar if it's large | |||||
* PERF: this is a pretty simplistic algorithm. I'm sure there's a faster one... | |||||
*/ | |||||
for (i=nbits-1; i >= -2 - tableBits; i--) { | |||||
int bit = (i >= 0) | |||||
? (scalar[i/WORD_BITS] >> (i%WORD_BITS)) & 1 | |||||
: 0; | |||||
current = 2*current + bit; | |||||
/* | |||||
* Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0 | |||||
* So current loses (tableBits+1) bits every time. It otherwise gains | |||||
* 1 bit per iteration. The number of iterations is | |||||
* (nbits + 2 + tableBits), and an additional control word is added at | |||||
* the end. So the total number of control words is at most | |||||
* ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2. | |||||
* There's also the stopper with power -1, for a total of +3. | |||||
*/ | |||||
if (current >= (2<<tableBits) || current <= -1 - (2<<tableBits)) { | |||||
int delta = (current + 1) >> 1; | |||||
current = -(current & 1); | |||||
int j; | |||||
for (j=i; (delta & 1) == 0; j++) { | |||||
delta >>= 1; | |||||
} | |||||
control[position].power = j+1; | |||||
control[position].addend = delta; | |||||
position++; | |||||
assert(position <= nbits/(tableBits+1) + 2); | |||||
} | |||||
} | |||||
control[position].power = -1; | |||||
control[position].addend = 0; | |||||
return position; | |||||
} | |||||
static void | |||||
prepare_wnaf_table( | |||||
struct tw_pniels_t *output, | |||||
struct tw_extensible_t *working, | |||||
int tbits | |||||
) { | |||||
convert_tw_extensible_to_tw_pniels(&output[0], working); | |||||
if (tbits == 0) return; | |||||
double_tw_extensible(working); | |||||
struct tw_pniels_t twop; | |||||
convert_tw_extensible_to_tw_pniels(&twop, working); | |||||
add_tw_pniels_to_tw_extensible(working, &output[0]); | |||||
convert_tw_extensible_to_tw_pniels(&output[1], working); | |||||
for (int i=2; i < 1<<tbits; i++) { | |||||
add_tw_pniels_to_tw_extensible(working, &twop); | |||||
convert_tw_extensible_to_tw_pniels(&output[i], working); | |||||
} | |||||
} | |||||
void | |||||
edwards_scalar_multiply_vt( | |||||
struct tw_extensible_t *working, | |||||
const uint64_t scalar[7] | |||||
) { | |||||
/* HACK: not 448? */ | |||||
const int nbits=448, table_bits = 3; | |||||
struct smvt_control control[nbits/(table_bits+1)+3]; | |||||
int control_bits = recode_wnaf(control, scalar, nbits, table_bits); | |||||
struct tw_pniels_t precmp[1<<table_bits]; | |||||
prepare_wnaf_table(precmp, working, table_bits); | |||||
if (control_bits > 0) { | |||||
assert(control[0].addend > 0); | |||||
assert(control[0].power >= 0); | |||||
convert_tw_pniels_to_tw_extensible(working, &precmp[control[0].addend >> 1]); | |||||
} else { | |||||
set_identity_tw_extensible(working); | |||||
return; | |||||
} | |||||
int conti = 1, i; | |||||
for (i = control[0].power - 1; i >= 0; i--) { | |||||
double_tw_extensible(working); | |||||
if (i == control[conti].power) { | |||||
assert(control[conti].addend); | |||||
if (control[conti].addend > 0) { | |||||
add_tw_pniels_to_tw_extensible(working, &precmp[control[conti].addend >> 1]); | |||||
} else { | |||||
sub_tw_pniels_from_tw_extensible(working, &precmp[(-control[conti].addend) >> 1]); | |||||
} | |||||
conti++; | |||||
assert(conti <= control_bits); | |||||
} | |||||
} | |||||
} | |||||
void | |||||
edwards_scalar_multiply_vt_pre( | |||||
struct tw_extensible_t *working, | |||||
const uint64_t scalar[7], | |||||
const struct tw_niels_t *precmp, | |||||
int table_bits | |||||
) { | |||||
/* HACK: not 448? */ | |||||
const int nbits=448; | |||||
struct smvt_control control[nbits/(table_bits+1)+3]; | |||||
int control_bits = recode_wnaf(control, scalar, nbits, table_bits); | |||||
if (control_bits > 0) { | |||||
assert(control[0].addend > 0); | |||||
assert(control[0].power >= 0); | |||||
convert_tw_niels_to_tw_extensible(working, &precmp[control[0].addend >> 1]); | |||||
} else { | |||||
set_identity_tw_extensible(working); | |||||
return; | |||||
} | |||||
int conti = 1, i; | |||||
for (i = control[0].power - 1; i >= 0; i--) { | |||||
double_tw_extensible(working); | |||||
if (i == control[conti].power) { | |||||
assert(control[conti].addend); | |||||
if (control[conti].addend > 0) { | |||||
add_tw_niels_to_tw_extensible(working, &precmp[control[conti].addend >> 1]); | |||||
} else { | |||||
sub_tw_niels_from_tw_extensible(working, &precmp[(-control[conti].addend) >> 1]); | |||||
} | |||||
conti++; | |||||
assert(conti <= control_bits); | |||||
} | |||||
} | |||||
} | |||||
void | |||||
edwards_combo_var_fixed_vt( | |||||
struct tw_extensible_t *working, | |||||
const uint64_t scalar_var[7], | |||||
const uint64_t scalar_pre[7], | |||||
const struct tw_niels_t *precmp, | |||||
int table_bits_pre | |||||
) { | |||||
/* HACK: not 448? */ | |||||
const int nbits_var=448, nbits_pre=448, table_bits_var = 3; | |||||
struct smvt_control control_var[nbits_var/(table_bits_var+1)+3]; | |||||
struct smvt_control control_pre[nbits_pre/(table_bits_pre+1)+3]; | |||||
int ncb_var = recode_wnaf(control_var, scalar_var, nbits_var, table_bits_var); | |||||
int ncb_pre = recode_wnaf(control_pre, scalar_pre, nbits_pre, table_bits_pre); | |||||
(void)ncb_var; | |||||
(void)ncb_pre; | |||||
struct tw_pniels_t precmp_var[1<<table_bits_var]; | |||||
prepare_wnaf_table(precmp_var, working, table_bits_var); | |||||
int contp=0, contv=0, i; | |||||
i = control_var[0].power; | |||||
if (i > control_pre[0].power) { | |||||
convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]); | |||||
contv++; | |||||
} else if (i == control_pre[0].power && i >=0 ) { | |||||
convert_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[0].addend >> 1]); | |||||
add_tw_niels_to_tw_extensible(working, &precmp[control_pre[0].addend >> 1]); | |||||
contv++; contp++; | |||||
} else { | |||||
i = control_pre[0].power; | |||||
convert_tw_niels_to_tw_extensible(working, &precmp[control_pre[0].addend >> 1]); | |||||
contp++; | |||||
} | |||||
if (i < 0) { | |||||
set_identity_tw_extensible(working); | |||||
return; | |||||
} | |||||
for (i--; i >= 0; i--) { | |||||
double_tw_extensible(working); | |||||
if (i == control_var[contv].power) { | |||||
assert(control_var[contv].addend); | |||||
if (control_var[contv].addend > 0) { | |||||
add_tw_pniels_to_tw_extensible(working, &precmp_var[control_var[contv].addend >> 1]); | |||||
} else { | |||||
sub_tw_pniels_from_tw_extensible(working, &precmp_var[(-control_var[contv].addend) >> 1]); | |||||
} | |||||
contv++; | |||||
} | |||||
if (i == control_pre[contp].power) { | |||||
assert(control_pre[contp].addend); | |||||
if (control_pre[contp].addend > 0) { | |||||
add_tw_niels_to_tw_extensible(working, &precmp[control_pre[contp].addend >> 1]); | |||||
} else { | |||||
sub_tw_niels_from_tw_extensible(working, &precmp[(-control_pre[contp].addend) >> 1]); | |||||
} | |||||
contp++; | |||||
} | |||||
} | |||||
assert(contv == ncb_var); | |||||
assert(contp == ncb_pre); | |||||
} | |||||
@@ -1,117 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#ifndef __P448_ALGO_H__ | |||||
#define __P448_ALGO_H__ 1 | |||||
#include "ec_point.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
/* | |||||
* Out = scalar * in, encoded in inverse square root | |||||
* format. | |||||
* | |||||
* nbits is the number of bits in scalar. | |||||
* | |||||
* The scalar is to be presented in little-endian form, | |||||
* meaning that scalar[0] contains the least significant | |||||
* word of the scalar. | |||||
* | |||||
* If the point "in" is on the curve, the return | |||||
* value will be set (to -1). | |||||
* | |||||
* If the point "in" is not on the curve, then the | |||||
* output will be incorrect. If the scalar is even, | |||||
* this condition will be detected by returning 0, | |||||
* unless the output is the identity point (0; TODO). | |||||
* If the scalar is odd, the value returned will be | |||||
* set (to -1; TODO). | |||||
* | |||||
* The input and output points are always even. | |||||
* Therefore on a cofactor-4 curve like Goldilocks, | |||||
* it is sufficient for security to make the scalar | |||||
* even. (TODO: detect when i/o has cofactor?) | |||||
* | |||||
* This function takes constant time, depending on | |||||
* nbits but not on in or scalar. | |||||
*/ | |||||
mask_t | |||||
p448_montgomery_ladder( | |||||
struct p448_t *out, | |||||
const struct p448_t *in, | |||||
const uint64_t *scalar, | |||||
int nbits, | |||||
int n_extra_doubles | |||||
); | |||||
void | |||||
edwards_scalar_multiply( | |||||
struct tw_extensible_t *working, | |||||
const uint64_t scalar[7] | |||||
/* TODO? int nbits */ | |||||
); | |||||
void | |||||
edwards_scalar_multiply_vlook( | |||||
struct tw_extensible_t *working, | |||||
const uint64_t scalar[7] | |||||
/* TODO? int nbits */ | |||||
); | |||||
mask_t | |||||
precompute_for_combs( | |||||
struct tw_niels_t *out, | |||||
const struct tw_extensible_t *const_base, | |||||
int n, | |||||
int t, | |||||
int s | |||||
); | |||||
void | |||||
edwards_comb( | |||||
struct tw_extensible_t *working, | |||||
const word_t scalar[7], | |||||
const struct tw_niels_t *table, | |||||
int n, | |||||
int t, | |||||
int s | |||||
); | |||||
void | |||||
edwards_scalar_multiply_vt( | |||||
struct tw_extensible_t *working, | |||||
const uint64_t scalar[7] | |||||
); | |||||
void | |||||
edwards_scalar_multiply_vt_pre( | |||||
struct tw_extensible_t *working, | |||||
const uint64_t scalar[7], | |||||
const struct tw_niels_t *precmp, | |||||
int table_bits | |||||
); | |||||
mask_t | |||||
precompute_for_wnaf( | |||||
struct tw_niels_t *out, | |||||
const struct tw_extensible_t *const_base, | |||||
int tbits | |||||
); /* TODO: attr don't ignore... */ | |||||
void | |||||
edwards_combo_var_fixed_vt( | |||||
struct tw_extensible_t *working, | |||||
const uint64_t scalar_var[7], | |||||
const uint64_t scalar_pre[7], | |||||
const struct tw_niels_t *precmp, | |||||
int table_bits_pre | |||||
); | |||||
#ifdef __cplusplus | |||||
}; | |||||
#endif | |||||
#endif /* __P448_ALGO_H__ */ |
@@ -1,182 +0,0 @@ | |||||
/* Copyright (c) 2011 Stanford University. | |||||
* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#include "sha512.h" | |||||
#include <string.h> | |||||
#include <assert.h> | |||||
static inline uint64_t | |||||
rotate_r ( | |||||
uint64_t x, | |||||
int d | |||||
) { | |||||
return (x >> d) | (x << (64-d)); | |||||
} | |||||
/* TODO: get from headers */ | |||||
static inline uint64_t | |||||
htobe64 (uint64_t x) { | |||||
__asm__ ("bswapq %0" : "+r"(x)); | |||||
return x; | |||||
} | |||||
static const uint64_t | |||||
sha512_init_state[8] = { | |||||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, | |||||
0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 | |||||
}; | |||||
static const uint64_t | |||||
sha512_k[80] = { | |||||
0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, | |||||
0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, | |||||
0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, | |||||
0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694, | |||||
0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, | |||||
0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, | |||||
0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, | |||||
0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70, | |||||
0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, | |||||
0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, | |||||
0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, | |||||
0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, | |||||
0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, | |||||
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, | |||||
0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, | |||||
0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, | |||||
0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, | |||||
0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, | |||||
0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, | |||||
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 | |||||
}; | |||||
static inline uint64_t S0 (uint64_t h1) { | |||||
return rotate_r(h1, 28) ^ rotate_r(h1, 34) ^ rotate_r(h1, 39); | |||||
} | |||||
static inline uint64_t S1 (uint64_t h4) { | |||||
return rotate_r(h4,14) ^ rotate_r(h4,18) ^ rotate_r(h4,41); | |||||
} | |||||
static inline uint64_t s0 (uint64_t a) { | |||||
return rotate_r(a,1) ^ rotate_r(a,8) ^ a>>7; | |||||
} | |||||
static inline uint64_t s1 (uint64_t b) { | |||||
return rotate_r(b,19) ^ rotate_r(b,61) ^ b>>6; | |||||
} | |||||
static inline uint64_t ch (uint64_t h4, uint64_t h5, uint64_t h6) { | |||||
return h6^(h4 & (h6^h5)); | |||||
} | |||||
static inline uint64_t maj(uint64_t h1, uint64_t h2, uint64_t h3) { | |||||
return (h1&h2) ^ (h3&(h1^h2)); | |||||
} | |||||
static void | |||||
sha512_process_block ( | |||||
struct sha512_ctx_t *ctx | |||||
) { | |||||
uint64_t i, tmp, a, b, | |||||
*w = (uint64_t *) ctx->block, | |||||
*state = ctx->chain, | |||||
h0 = state[0], h1 = state[1], h2 = state[2], h3 = state[3], | |||||
h4 = state[4], h5 = state[5], h6 = state[6], h7 = state[7]; | |||||
/* Clang doesn't unswitch this automatically */ | |||||
for (i=0; i<16; i++) { | |||||
/* load up the input word for this round */ | |||||
tmp = w[i] = htobe64(w[i]); | |||||
tmp = tmp + h7 + S1(h4) + ch(h4,h5,h6) + sha512_k[i]; | |||||
/* shift register */ | |||||
h7 = h6; h6 = h5; h5 = h4; | |||||
h4 = h3 + tmp; | |||||
h3 = h2; h2 = h1; h1 = h0; | |||||
h0 = tmp + maj(h1,h2,h3) + S0(h1); | |||||
} | |||||
for (; i<80; i++) { | |||||
/* load up the input word for this round */ | |||||
a = w[(i+1 ) & 15]; | |||||
b = w[(i+14) & 15]; | |||||
tmp = w[i&15] = s0(a) + s1(b) + w[i&15] + w[(i+9) & 15]; | |||||
tmp = tmp + h7 + S1(h4) + ch(h4,h5,h6) + sha512_k[i]; | |||||
/* shift register */ | |||||
h7 = h6; h6 = h5; h5 = h4; | |||||
h4 = h3 + tmp; | |||||
h3 = h2; h2 = h1; h1 = h0; | |||||
h0 = tmp + maj(h1,h2,h3) + S0(h1); | |||||
} | |||||
state[0] += h0; | |||||
state[1] += h1; | |||||
state[2] += h2; | |||||
state[3] += h3; | |||||
state[4] += h4; | |||||
state[5] += h5; | |||||
state[6] += h6; | |||||
state[7] += h7; | |||||
} | |||||
void | |||||
sha512_init ( | |||||
struct sha512_ctx_t *ctx | |||||
) { | |||||
ctx->nbytes = 0; | |||||
memcpy(ctx->chain, sha512_init_state, sizeof(sha512_init_state)); | |||||
memset(ctx->block, 0, sizeof(ctx->block)); | |||||
} | |||||
void | |||||
sha512_update ( | |||||
struct sha512_ctx_t *ctx, | |||||
const unsigned char *data, | |||||
uint64_t bytes | |||||
) { | |||||
assert(ctx->nbytes < 1ull<<56); | |||||
assert(bytes < 1ull<<56); | |||||
while (bytes) { | |||||
uint64_t fill = ctx->nbytes % 128, accept = 128 - fill; | |||||
if (accept > bytes) accept = bytes; | |||||
ctx->nbytes += accept; | |||||
memcpy(ctx->block + fill, data, accept); | |||||
if (fill+accept == 128) | |||||
sha512_process_block(ctx); | |||||
bytes -= accept; | |||||
data += accept; | |||||
} | |||||
assert(ctx->nbytes < 1ull<<56); | |||||
} | |||||
void | |||||
sha512_final ( | |||||
struct sha512_ctx_t *ctx, | |||||
uint8_t result[64] | |||||
) { | |||||
uint64_t fill = ctx->nbytes % 128, i; | |||||
ctx->block[fill++] = 0x80; | |||||
if (fill > 112) { | |||||
memset(ctx->block + fill, 0, 128-fill); | |||||
sha512_process_block(ctx); | |||||
fill = 0; | |||||
} | |||||
memset(ctx->block + fill, 0, 112-fill); | |||||
*((uint64_t *)&ctx->block[112]) = 0; | |||||
*((uint64_t *)&ctx->block[120]) = htobe64((ctx->nbytes * 8)); | |||||
sha512_process_block(ctx); | |||||
for (i=0; i<8; i++) { | |||||
ctx->chain[i] = htobe64(ctx->chain[i]); | |||||
} | |||||
memcpy(result, ctx->chain, sizeof(ctx->chain)); | |||||
sha512_init(ctx); | |||||
} |
@@ -1,49 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#ifndef __GOLDI_SHA512_H__ | |||||
#define __GOLDI_SHA512_H__ 1 | |||||
#include <stdint.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
/* TODO: KAT */ | |||||
/** | |||||
* SHA512 hashing context. | |||||
* | |||||
* This structure is opaque. | |||||
*/ | |||||
struct sha512_ctx_t { | |||||
/** @privatesection */ | |||||
uint64_t chain[8]; | |||||
uint8_t block[128]; | |||||
uint64_t nbytes; | |||||
}; | |||||
void | |||||
sha512_init ( | |||||
struct sha512_ctx_t *ctx | |||||
); | |||||
void | |||||
sha512_update ( | |||||
struct sha512_ctx_t *ctx, | |||||
const unsigned char *data, | |||||
uint64_t bytes | |||||
); | |||||
void | |||||
sha512_final ( | |||||
struct sha512_ctx_t *ctx, | |||||
uint8_t result[64] | |||||
); | |||||
#ifdef __cplusplus | |||||
}; /* extern "C" */ | |||||
#endif | |||||
#endif /* __GOLDI_SHA512_H__ */ |
@@ -1,55 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#ifndef __WORD_H__ | |||||
#define __WORD_H__ | |||||
#include <stdint.h> | |||||
typedef uint64_t word_t; | |||||
typedef __uint128_t dword_t; | |||||
typedef int64_t sword_t; | |||||
typedef __int128_t dsword_t; | |||||
static const int WORD_BITS = sizeof(word_t) * 8; | |||||
/* TODO: vector width for procs like ARM; gcc support */ | |||||
typedef uint64_t mask_t, vecmask_t __attribute__((ext_vector_type(4))); | |||||
static const mask_t MASK_FAILURE = 0, MASK_SUCCESS = -1; | |||||
/* FIXME this only works on clang */ | |||||
typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2))); | |||||
typedef int64_t int64x2_t __attribute__((ext_vector_type(2))); | |||||
typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4))); | |||||
typedef int64_t int64x4_t __attribute__((ext_vector_type(4))); | |||||
typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4))); | |||||
typedef int32_t int32x4_t __attribute__((ext_vector_type(4))); | |||||
typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8))); | |||||
typedef int32_t int32x8_t __attribute__((ext_vector_type(8))); | |||||
#if __AVX2__ | |||||
typedef uint32x8_t big_register_t; | |||||
typedef uint64x4_t uint64xn_t; | |||||
#elif __SSE2__ || __ARM_NEON__ | |||||
typedef uint32x4_t big_register_t; | |||||
typedef uint64x2_t uint64xn_t; | |||||
#elif _WIN64 || __amd64__ || __X86_64__ || __aarch64__ | |||||
typedef uint64_t big_register_t, uint64xn_t; | |||||
#else | |||||
typedef uint64_t uint64xn_t; | |||||
typedef uint32_t big_register_t; | |||||
#endif | |||||
#if __AVX2__ || __SSE2__ || __ARM_NEON__ | |||||
static __inline__ big_register_t | |||||
br_is_zero(big_register_t x) { | |||||
return (big_register_t)(x == (big_register_t)0); | |||||
} | |||||
#else | |||||
#error "TODO: constant-time equality on vectorless platforms" | |||||
#endif | |||||
#endif /* __WORD_H__ */ |
@@ -1,246 +0,0 @@ | |||||
/* Copyright (c) 2014 Cryptography Research, Inc. | |||||
* Released under the MIT License. See LICENSE.txt for license information. | |||||
*/ | |||||
#ifndef __X86_64_ARITH_H__ | |||||
#define __X86_64_ARITH_H__ | |||||
#include <stdint.h> | |||||
/* TODO: non x86-64 versions of these. | |||||
* TODO: autogenerate | |||||
*/ | |||||
static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) { | |||||
#ifndef __BMI2__ | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("movq %[a], %%rax;" | |||||
"mulq %[b];" | |||||
: [c]"=a"(c), [d]"=d"(d) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "cc"); | |||||
return (((__uint128_t)(d))<<64) | c; | |||||
#else | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("movq %[a], %%rdx;" | |||||
"mulx %[b], %[c], %[d];" | |||||
: [c]"=r"(c), [d]"=r"(d) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rdx"); | |||||
return (((__uint128_t)(d))<<64) | c; | |||||
#endif | |||||
} | |||||
static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) { | |||||
#ifndef __BMI2__ | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("movq %[a], %%rax;" | |||||
"mulq %[b];" | |||||
: [c]"=a"(c), [d]"=d"(d) | |||||
: [b]"m"(*b), [a]"r"(a) | |||||
: "cc"); | |||||
return (((__uint128_t)(d))<<64) | c; | |||||
#else | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("mulx %[b], %[c], %[d];" | |||||
: [c]"=r"(c), [d]"=r"(d) | |||||
: [b]"m"(*b), [a]"d"(a)); | |||||
return (((__uint128_t)(d))<<64) | c; | |||||
#endif | |||||
} | |||||
static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) { | |||||
#ifndef __BMI2__ | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("movq %[a], %%rax; " | |||||
"addq %%rax, %%rax; " | |||||
"mulq %[b];" | |||||
: [c]"=a"(c), [d]"=d"(d) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "cc"); | |||||
return (((__uint128_t)(d))<<64) | c; | |||||
#else | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("movq %[a], %%rdx;" | |||||
"leaq (,%%rdx,2), %%rdx;" | |||||
"mulx %[b], %[c], %[d];" | |||||
: [c]"=r"(c), [d]"=r"(d) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rdx"); | |||||
return (((__uint128_t)(d))<<64) | c; | |||||
#endif | |||||
} | |||||
static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { | |||||
uint64_t lo = *acc, hi = *acc>>64; | |||||
#ifdef __BMI2__ | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("movq %[a], %%rdx; " | |||||
"mulx %[b], %[c], %[d]; " | |||||
"addq %[c], %[lo]; " | |||||
"adcq %[d], %[hi]; " | |||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rdx", "cc"); | |||||
#else | |||||
__asm__ volatile | |||||
("movq %[a], %%rax; " | |||||
"mulq %[b]; " | |||||
"addq %%rax, %[lo]; " | |||||
"adcq %%rdx, %[hi]; " | |||||
: [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rax", "rdx", "cc"); | |||||
#endif | |||||
*acc = (((__uint128_t)(hi))<<64) | lo; | |||||
} | |||||
static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) { | |||||
uint64_t lo = *acc, hi = *acc>>64; | |||||
#ifdef __BMI2__ | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("mulx %[b], %[c], %[d]; " | |||||
"addq %[c], %[lo]; " | |||||
"adcq %[d], %[hi]; " | |||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"d"(a) | |||||
: "cc"); | |||||
#else | |||||
__asm__ volatile | |||||
("movq %[a], %%rax; " | |||||
"mulq %[b]; " | |||||
"addq %%rax, %[lo]; " | |||||
"adcq %%rdx, %[hi]; " | |||||
: [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"r"(a) | |||||
: "rax", "rdx", "cc"); | |||||
#endif | |||||
*acc = (((__uint128_t)(hi))<<64) | lo; | |||||
} | |||||
static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { | |||||
uint64_t lo = *acc, hi = *acc>>64; | |||||
#ifdef __BMI2__ | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("movq %[a], %%rdx; " | |||||
"addq %%rdx, %%rdx; " | |||||
"mulx %[b], %[c], %[d]; " | |||||
"addq %[c], %[lo]; " | |||||
"adcq %[d], %[hi]; " | |||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rdx", "cc"); | |||||
#else | |||||
__asm__ volatile | |||||
("movq %[a], %%rax; " | |||||
"addq %%rax, %%rax; " | |||||
"mulq %[b]; " | |||||
"addq %%rax, %[lo]; " | |||||
"adcq %%rdx, %[hi]; " | |||||
: [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rax", "rdx", "cc"); | |||||
#endif | |||||
*acc = (((__uint128_t)(hi))<<64) | lo; | |||||
} | |||||
static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { | |||||
uint64_t lo = *acc, hi = *acc>>64; | |||||
#ifdef __BMI2__ | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("movq %[a], %%rdx; " | |||||
"mulx %[b], %[c], %[d]; " | |||||
"subq %[c], %[lo]; " | |||||
"sbbq %[d], %[hi]; " | |||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rdx", "cc"); | |||||
#else | |||||
__asm__ volatile | |||||
("movq %[a], %%rax; " | |||||
"mulq %[b]; " | |||||
"subq %%rax, %[lo]; " | |||||
"sbbq %%rdx, %[hi]; " | |||||
: [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rax", "rdx", "cc"); | |||||
#endif | |||||
*acc = (((__uint128_t)(hi))<<64) | lo; | |||||
} | |||||
static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { | |||||
uint64_t lo = *acc, hi = *acc>>64; | |||||
#ifdef __BMI2__ | |||||
uint64_t c,d; | |||||
__asm__ volatile | |||||
("movq %[a], %%rdx; " | |||||
"addq %%rdx, %%rdx; " | |||||
"mulx %[b], %[c], %[d]; " | |||||
"subq %[c], %[lo]; " | |||||
"sbbq %[d], %[hi]; " | |||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rdx", "cc"); | |||||
#else | |||||
__asm__ volatile | |||||
("movq %[a], %%rax; " | |||||
"addq %%rax, %%rax; " | |||||
"mulq %[b]; " | |||||
"subq %%rax, %[lo]; " | |||||
"sbbq %%rdx, %[hi]; " | |||||
: [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rax", "rdx", "cc"); | |||||
#endif | |||||
*acc = (((__uint128_t)(hi))<<64) | lo; | |||||
} | |||||
static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { | |||||
uint64_t c,d, lo = *acc, hi = *acc>>64; | |||||
__asm__ volatile | |||||
("movq %[a], %%rdx; " | |||||
"mulx %[b], %[c], %[d]; " | |||||
"subq %[lo], %[c]; " | |||||
"sbbq %[hi], %[d]; " | |||||
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) | |||||
: [b]"m"(*b), [a]"m"(*a) | |||||
: "rdx", "cc"); | |||||
*acc = (((__uint128_t)(d))<<64) | c; | |||||
} | |||||
static __inline__ __uint128_t widemulu(uint64_t a, uint64_t b) { | |||||
return ((__uint128_t)(a)) * b; | |||||
} | |||||
static __inline__ __int128_t widemuls(int64_t a, int64_t b) { | |||||
return ((__int128_t)(a)) * b; | |||||
} | |||||
static __inline__ uint64_t opacify(uint64_t x) { | |||||
__asm__ volatile("" : "+r"(x)); | |||||
return x; | |||||
} | |||||
static __inline__ mask_t is_zero(uint64_t x) { | |||||
__asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x)); | |||||
return ~x; | |||||
} | |||||
#endif /* __X86_64_ARITH_H__ */ |