|
- /**
- * @file constant_time.h
- * @copyright
- * Copyright (c) 2014 Cryptography Research, Inc. \n
- * Released under the MIT License. See LICENSE.txt for license information.
- * @author Mike Hamburg
- *
- * @brief Constant-time routines.
- */
-
- #ifndef __CONSTANT_TIME_H__
- #define __CONSTANT_TIME_H__ 1
-
- #include "word.h"
-
- /*
- * Constant-time operations on hopefully-compile-time-sized memory
- * regions. Needed for flexibility / demagication: not all fields
- * have sizes which are multiples of the vector width, necessitating
- * a change from the Ed448 versions.
- *
- * These routines would be much simpler to define at the byte level,
- * but if not vectorized they would be a significant fraction of the
- * runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of
- * signing time, vs 6% on Haswell with its fancy AVX2 vectors.
- *
- * If the compiler could do a good job of autovectorizing the code,
- * we could just leave it with the byte definition. But that's unlikely
- * on most deployed compilers, especially if you consider that pcmpeq[size]
- * is much faster than moving a scalar to the vector unit (which is what
- * a naive autovectorizer will do with constant_time_lookup on Intel).
- *
- * Instead, we're putting our trust in the loop unroller and unswitcher.
- *
- * TODO: verify correctness and performance on each platform, to make sure
- * that there are no regressions.
- */
-
-
- /**
- * Unaligned big (vector?) register.
- */
- typedef struct {
- big_register_t unaligned;
- } __attribute__((packed)) unaligned_br_t;
-
- /**
- * Unaligned word register, for architectures where that matters.
- */
- typedef struct {
- word_t unaligned;
- } __attribute__((packed)) unaligned_word_t;
-
- /**
- * @brief Constant-time conditional swap.
- *
- * If doswap, then swap elem_bytes between *a and *b.
- *
- * *a and *b must not alias. Also, they must be at least as aligned
- * as their sizes, if the CPU cares about that sort of thing.
- */
- static __inline__ void
- __attribute__((unused,always_inline))
- constant_time_cond_swap (
- void *__restrict__ a_,
- void *__restrict__ b_,
- word_t elem_bytes,
- mask_t doswap
- ) {
- word_t k;
- unsigned char *a = (unsigned char *)a_;
- unsigned char *b = (unsigned char *)b_;
-
- big_register_t br_mask = br_set_to_mask(doswap);
- for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
- if (elem_bytes % sizeof(big_register_t)) {
- /* unaligned */
- big_register_t xor =
- ((unaligned_br_t*)(&a[k]))->unaligned
- ^ ((unaligned_br_t*)(&b[k]))->unaligned;
- xor &= br_mask;
- ((unaligned_br_t*)(&a[k]))->unaligned ^= xor;
- ((unaligned_br_t*)(&b[k]))->unaligned ^= xor;
- } else {
- /* aligned */
- big_register_t xor =
- *((big_register_t*)(&a[k]))
- ^ *((big_register_t*)(&b[k]));
- xor &= br_mask;
- *((big_register_t*)(&a[k])) ^= xor;
- *((big_register_t*)(&b[k])) ^= xor;
- }
- }
-
- if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
- for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
- if (elem_bytes % sizeof(word_t)) {
- /* unaligned */
- word_t xor =
- ((unaligned_word_t*)(&a[k]))->unaligned
- ^ ((unaligned_word_t*)(&b[k]))->unaligned;
- xor &= doswap;
- ((unaligned_word_t*)(&a[k]))->unaligned ^= xor;
- ((unaligned_word_t*)(&b[k]))->unaligned ^= xor;
- } else {
- /* aligned */
- word_t xor =
- *((word_t*)(&a[k]))
- ^ *((word_t*)(&b[k]));
- xor &= doswap;
- *((word_t*)(&a[k])) ^= xor;
- *((word_t*)(&b[k])) ^= xor;
- }
- }
- }
-
- if (elem_bytes % sizeof(word_t)) {
- for (; k<elem_bytes; k+=1) {
- unsigned char xor = a[k] ^ b[k];
- xor &= doswap;
- a[k] ^= xor;
- b[k] ^= xor;
- }
- }
- }
-
- /**
- * @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
- *
- * The table must be at least as aligned as elem_bytes. The output must be word aligned,
- * and if the input size is vector aligned it must also be vector aligned.
- *
- * The table and output must not alias.
- */
- static __inline__ void
- __attribute__((unused,always_inline))
- constant_time_lookup (
- void *__restrict__ out_,
- const void *table_,
- word_t elem_bytes,
- word_t n_table,
- word_t idx
- ) {
- big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
-
- /* Can't do pointer arithmetic on void* */
- unsigned char *out = (unsigned char *)out_;
- const unsigned char *table = (const unsigned char *)table_;
- word_t j,k;
-
- really_memset(out, 0, elem_bytes);
- for (j=0; j<n_table; j++, big_i-=big_one) {
- big_register_t br_mask = br_is_zero(big_i);
- for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
- if (elem_bytes % sizeof(big_register_t)) {
- /* unaligned */
- ((unaligned_br_t *)(out+k))->unaligned
- |= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned;
- } else {
- /* aligned */
- *(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]);
- }
- }
-
- word_t mask = word_is_zero(idx^j);
- if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
- for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
- if (elem_bytes % sizeof(word_t)) {
- /* input unaligned, output aligned */
- *(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned;
- } else {
- /* aligned */
- *(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]);
- }
- }
- }
-
- if (elem_bytes % sizeof(word_t)) {
- for (; k<elem_bytes; k+=1) {
- out[k] |= mask & table[k+j*elem_bytes];
- }
- }
- }
- }
-
- /**
- * @brief Constant-time a = b&mask.
- *
- * The input and output must be at least as aligned as elem_bytes.
- */
- static __inline__ void
- __attribute__((unused,always_inline))
- constant_time_mask (
- void *__restrict__ a_,
- const void *b_,
- word_t elem_bytes,
- mask_t mask
- ) {
- unsigned char *a = (unsigned char *)a_;
- const unsigned char *b = (const unsigned char *)b_;
-
- word_t k;
- big_register_t br_mask = br_set_to_mask(mask);
- for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
- if (elem_bytes % sizeof(big_register_t)) {
- /* unaligned */
- ((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned;
- } else {
- /* aligned */
- *(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]);
- }
- }
-
- if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
- for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
- if (elem_bytes % sizeof(word_t)) {
- /* unaligned */
- ((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned;
- } else {
- /* aligned */
- *(word_t *)(a+k) = mask & *(const word_t*)(&b[k]);
- }
- }
- }
-
- if (elem_bytes % sizeof(word_t)) {
- for (; k<elem_bytes; k+=1) {
- a[k] = mask & b[k];
- }
- }
- }
-
- /**
- * @brief Constant-time a = mask ? bTrue : bFalse.
- *
- * The input and output must be at least as aligned as elem_bytes.
- *
- * Note that the output is not __restrict__, but if it overlaps either
- * input, it must be equal and not partially overlap.
- */
- static __inline__ void
- __attribute__((unused,always_inline))
- constant_time_select (
- void *a_,
- const void *bTrue_,
- const void *bFalse_,
- word_t elem_bytes,
- mask_t mask
- ) {
- unsigned char *a = (unsigned char *)a_;
- const unsigned char *bTrue = (const unsigned char *)bTrue_;
- const unsigned char *bFalse = (const unsigned char *)bFalse_;
-
- word_t k;
- big_register_t br_mask = br_set_to_mask(mask);
- for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
- if (elem_bytes % sizeof(big_register_t)) {
- /* unaligned */
- ((unaligned_br_t*)(&a[k]))->unaligned =
- ( br_mask & ((const unaligned_br_t*)(&bTrue [k]))->unaligned)
- | (~br_mask & ((const unaligned_br_t*)(&bFalse[k]))->unaligned);
- } else {
- /* aligned */
- *(big_register_t *)(a+k) =
- ( br_mask & *(const big_register_t*)(&bTrue [k]))
- | (~br_mask & *(const big_register_t*)(&bFalse[k]));
- }
- }
-
- if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
- for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
- if (elem_bytes % sizeof(word_t)) {
- /* unaligned */
- ((unaligned_word_t*)(&a[k]))->unaligned =
- ( mask & ((const unaligned_word_t*)(&bTrue [k]))->unaligned)
- | (~mask & ((const unaligned_word_t*)(&bFalse[k]))->unaligned);
- } else {
- /* aligned */
- *(word_t *)(a+k) =
- ( mask & *(const word_t*)(&bTrue [k]))
- | (~mask & *(const word_t*)(&bFalse[k]));
- }
- }
- }
-
- if (elem_bytes % sizeof(word_t)) {
- for (; k<elem_bytes; k+=1) {
- a[k] = ( mask & bTrue[k]) | (~mask & bFalse[k]);
- }
- }
- }
-
- #endif /* __CONSTANT_TIME_H__ */
|