You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

294 lines
9.5 KiB

  1. /**
  2. * @file constant_time.h
  3. * @copyright
  4. * Copyright (c) 2014 Cryptography Research, Inc. \n
  5. * Released under the MIT License. See LICENSE.txt for license information.
  6. * @author Mike Hamburg
  7. *
  8. * @brief Constant-time routines.
  9. */
  10. #ifndef __CONSTANT_TIME_H__
  11. #define __CONSTANT_TIME_H__ 1
  12. #include "word.h"
  13. /*
  14. * Constant-time operations on hopefully-compile-time-sized memory
  15. * regions. Needed for flexibility / demagication: not all fields
  16. * have sizes which are multiples of the vector width, necessitating
  17. * a change from the Ed448 versions.
  18. *
  19. * These routines would be much simpler to define at the byte level,
  20. * but if not vectorized they would be a significant fraction of the
  21. * runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of
  22. * signing time, vs 6% on Haswell with its fancy AVX2 vectors.
  23. *
  24. * If the compiler could do a good job of autovectorizing the code,
  25. * we could just leave it with the byte definition. But that's unlikely
  26. * on most deployed compilers, especially if you consider that pcmpeq[size]
  27. * is much faster than moving a scalar to the vector unit (which is what
  28. * a naive autovectorizer will do with constant_time_lookup on Intel).
  29. *
  30. * Instead, we're putting our trust in the loop unroller and unswitcher.
  31. *
  32. * TODO: verify correctness and performance on each platform, to make sure
  33. * that there are no regressions.
  34. */
  35. /**
  36. * Unaligned big (vector?) register.
  37. */
  38. typedef struct {
  39. big_register_t unaligned;
  40. } __attribute__((packed)) unaligned_br_t;
  41. /**
  42. * Unaligned word register, for architectures where that matters.
  43. */
  44. typedef struct {
  45. word_t unaligned;
  46. } __attribute__((packed)) unaligned_word_t;
  47. /**
  48. * @brief Constant-time conditional swap.
  49. *
  50. * If doswap, then swap elem_bytes between *a and *b.
  51. *
  52. * *a and *b must not alias. Also, they must be at least as aligned
  53. * as their sizes, if the CPU cares about that sort of thing.
  54. */
  55. static __inline__ void
  56. __attribute__((unused,always_inline))
  57. constant_time_cond_swap (
  58. void *__restrict__ a_,
  59. void *__restrict__ b_,
  60. word_t elem_bytes,
  61. mask_t doswap
  62. ) {
  63. word_t k;
  64. unsigned char *a = (unsigned char *)a_;
  65. unsigned char *b = (unsigned char *)b_;
  66. big_register_t br_mask = br_set_to_mask(doswap);
  67. for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
  68. if (elem_bytes % sizeof(big_register_t)) {
  69. /* unaligned */
  70. big_register_t xor =
  71. ((unaligned_br_t*)(&a[k]))->unaligned
  72. ^ ((unaligned_br_t*)(&b[k]))->unaligned;
  73. xor &= br_mask;
  74. ((unaligned_br_t*)(&a[k]))->unaligned ^= xor;
  75. ((unaligned_br_t*)(&b[k]))->unaligned ^= xor;
  76. } else {
  77. /* aligned */
  78. big_register_t xor =
  79. *((big_register_t*)(&a[k]))
  80. ^ *((big_register_t*)(&b[k]));
  81. xor &= br_mask;
  82. *((big_register_t*)(&a[k])) ^= xor;
  83. *((big_register_t*)(&b[k])) ^= xor;
  84. }
  85. }
  86. if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
  87. for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
  88. if (elem_bytes % sizeof(word_t)) {
  89. /* unaligned */
  90. word_t xor =
  91. ((unaligned_word_t*)(&a[k]))->unaligned
  92. ^ ((unaligned_word_t*)(&b[k]))->unaligned;
  93. xor &= doswap;
  94. ((unaligned_word_t*)(&a[k]))->unaligned ^= xor;
  95. ((unaligned_word_t*)(&b[k]))->unaligned ^= xor;
  96. } else {
  97. /* aligned */
  98. word_t xor =
  99. *((word_t*)(&a[k]))
  100. ^ *((word_t*)(&b[k]));
  101. xor &= doswap;
  102. *((word_t*)(&a[k])) ^= xor;
  103. *((word_t*)(&b[k])) ^= xor;
  104. }
  105. }
  106. }
  107. if (elem_bytes % sizeof(word_t)) {
  108. for (; k<elem_bytes; k+=1) {
  109. unsigned char xor = a[k] ^ b[k];
  110. xor &= doswap;
  111. a[k] ^= xor;
  112. b[k] ^= xor;
  113. }
  114. }
  115. }
  116. /**
  117. * @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
  118. *
  119. * The table must be at least as aligned as elem_bytes. The output must be word aligned,
  120. * and if the input size is vector aligned it must also be vector aligned.
  121. *
  122. * The table and output must not alias.
  123. */
  124. static __inline__ void
  125. __attribute__((unused,always_inline))
  126. constant_time_lookup (
  127. void *__restrict__ out_,
  128. const void *table_,
  129. word_t elem_bytes,
  130. word_t n_table,
  131. word_t idx
  132. ) {
  133. big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
  134. /* Can't do pointer arithmetic on void* */
  135. unsigned char *out = (unsigned char *)out_;
  136. const unsigned char *table = (const unsigned char *)table_;
  137. word_t j,k;
  138. really_memset(out, 0, elem_bytes);
  139. for (j=0; j<n_table; j++, big_i-=big_one) {
  140. big_register_t br_mask = br_is_zero(big_i);
  141. for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
  142. if (elem_bytes % sizeof(big_register_t)) {
  143. /* unaligned */
  144. ((unaligned_br_t *)(out+k))->unaligned
  145. |= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned;
  146. } else {
  147. /* aligned */
  148. *(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]);
  149. }
  150. }
  151. word_t mask = word_is_zero(idx^j);
  152. if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
  153. for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
  154. if (elem_bytes % sizeof(word_t)) {
  155. /* input unaligned, output aligned */
  156. *(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned;
  157. } else {
  158. /* aligned */
  159. *(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]);
  160. }
  161. }
  162. }
  163. if (elem_bytes % sizeof(word_t)) {
  164. for (; k<elem_bytes; k+=1) {
  165. out[k] |= mask & table[k+j*elem_bytes];
  166. }
  167. }
  168. }
  169. }
  170. /**
  171. * @brief Constant-time a = b&mask.
  172. *
  173. * The input and output must be at least as aligned as elem_bytes.
  174. */
  175. static __inline__ void
  176. __attribute__((unused,always_inline))
  177. constant_time_mask (
  178. void *__restrict__ a_,
  179. const void *b_,
  180. word_t elem_bytes,
  181. mask_t mask
  182. ) {
  183. unsigned char *a = (unsigned char *)a_;
  184. const unsigned char *b = (const unsigned char *)b_;
  185. word_t k;
  186. big_register_t br_mask = br_set_to_mask(mask);
  187. for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
  188. if (elem_bytes % sizeof(big_register_t)) {
  189. /* unaligned */
  190. ((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned;
  191. } else {
  192. /* aligned */
  193. *(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]);
  194. }
  195. }
  196. if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
  197. for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
  198. if (elem_bytes % sizeof(word_t)) {
  199. /* unaligned */
  200. ((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned;
  201. } else {
  202. /* aligned */
  203. *(word_t *)(a+k) = mask & *(const word_t*)(&b[k]);
  204. }
  205. }
  206. }
  207. if (elem_bytes % sizeof(word_t)) {
  208. for (; k<elem_bytes; k+=1) {
  209. a[k] = mask & b[k];
  210. }
  211. }
  212. }
  213. /**
  214. * @brief Constant-time a = mask ? bTrue : bFalse.
  215. *
  216. * The input and output must be at least as aligned as elem_bytes.
  217. *
  218. * Note that the output is not __restrict__, but if it overlaps either
  219. * input, it must be equal and not partially overlap.
  220. */
  221. static __inline__ void
  222. __attribute__((unused,always_inline))
  223. constant_time_select (
  224. void *a_,
  225. const void *bTrue_,
  226. const void *bFalse_,
  227. word_t elem_bytes,
  228. mask_t mask
  229. ) {
  230. unsigned char *a = (unsigned char *)a_;
  231. const unsigned char *bTrue = (const unsigned char *)bTrue_;
  232. const unsigned char *bFalse = (const unsigned char *)bFalse_;
  233. word_t k;
  234. big_register_t br_mask = br_set_to_mask(mask);
  235. for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
  236. if (elem_bytes % sizeof(big_register_t)) {
  237. /* unaligned */
  238. ((unaligned_br_t*)(&a[k]))->unaligned =
  239. ( br_mask & ((const unaligned_br_t*)(&bTrue [k]))->unaligned)
  240. | (~br_mask & ((const unaligned_br_t*)(&bFalse[k]))->unaligned);
  241. } else {
  242. /* aligned */
  243. *(big_register_t *)(a+k) =
  244. ( br_mask & *(const big_register_t*)(&bTrue [k]))
  245. | (~br_mask & *(const big_register_t*)(&bFalse[k]));
  246. }
  247. }
  248. if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
  249. for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
  250. if (elem_bytes % sizeof(word_t)) {
  251. /* unaligned */
  252. ((unaligned_word_t*)(&a[k]))->unaligned =
  253. ( mask & ((const unaligned_word_t*)(&bTrue [k]))->unaligned)
  254. | (~mask & ((const unaligned_word_t*)(&bFalse[k]))->unaligned);
  255. } else {
  256. /* aligned */
  257. *(word_t *)(a+k) =
  258. ( mask & *(const word_t*)(&bTrue [k]))
  259. | (~mask & *(const word_t*)(&bFalse[k]));
  260. }
  261. }
  262. }
  263. if (elem_bytes % sizeof(word_t)) {
  264. for (; k<elem_bytes; k+=1) {
  265. a[k] = ( mask & bTrue[k]) | (~mask & bFalse[k]);
  266. }
  267. }
  268. }
  269. #endif /* __CONSTANT_TIME_H__ */