You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

363 lines
12 KiB

  1. /**
  2. * @file constant_time.h
  3. * @copyright
  4. * Copyright (c) 2014 Cryptography Research, Inc. \n
  5. * Released under the MIT License. See LICENSE.txt for license information.
  6. * @author Mike Hamburg
  7. *
  8. * @brief Constant-time routines.
  9. */
  10. #ifndef __CONSTANT_TIME_H__
  11. #define __CONSTANT_TIME_H__ 1
  12. #include "word.h"
  13. #include <string.h>
  14. /*
  15. * Constant-time operations on hopefully-compile-time-sized memory
  16. * regions. Needed for flexibility / demagication: not all fields
  17. * have sizes which are multiples of the vector width, necessitating
  18. * a change from the Ed448 versions.
  19. *
  20. * These routines would be much simpler to define at the byte level,
  21. * but if not vectorized they would be a significant fraction of the
  22. * runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of
  23. * signing time, vs 6% on Haswell with its fancy AVX2 vectors.
  24. *
  25. * If the compiler could do a good job of autovectorizing the code,
  26. * we could just leave it with the byte definition. But that's unlikely
  27. * on most deployed compilers, especially if you consider that pcmpeq[size]
  28. * is much faster than moving a scalar to the vector unit (which is what
  29. * a naive autovectorizer will do with constant_time_lookup on Intel).
  30. *
  31. * Instead, we're putting our trust in the loop unroller and unswitcher.
  32. */
  33. /**
  34. * Unaligned big (vector?) register.
  35. */
  36. typedef struct {
  37. big_register_t unaligned;
  38. } __attribute__((packed)) unaligned_br_t;
  39. /**
  40. * Unaligned word register, for architectures where that matters.
  41. */
  42. typedef struct {
  43. word_t unaligned;
  44. } __attribute__((packed)) unaligned_word_t;
  45. /**
  46. * @brief Constant-time conditional swap.
  47. *
  48. * If doswap, then swap elem_bytes between *a and *b.
  49. *
  50. * *a and *b must not alias. Also, they must be at least as aligned
  51. * as their sizes, if the CPU cares about that sort of thing.
  52. */
  53. static __inline__ void
  54. __attribute__((unused,always_inline))
  55. constant_time_cond_swap (
  56. void *__restrict__ a_,
  57. void *__restrict__ b_,
  58. word_t elem_bytes,
  59. mask_t doswap
  60. ) {
  61. word_t k;
  62. unsigned char *a = (unsigned char *)a_;
  63. unsigned char *b = (unsigned char *)b_;
  64. big_register_t br_mask = br_set_to_mask(doswap);
  65. for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
  66. if (elem_bytes % sizeof(big_register_t)) {
  67. /* unaligned */
  68. big_register_t xor =
  69. ((unaligned_br_t*)(&a[k]))->unaligned
  70. ^ ((unaligned_br_t*)(&b[k]))->unaligned;
  71. xor &= br_mask;
  72. ((unaligned_br_t*)(&a[k]))->unaligned ^= xor;
  73. ((unaligned_br_t*)(&b[k]))->unaligned ^= xor;
  74. } else {
  75. /* aligned */
  76. big_register_t xor =
  77. *((big_register_t*)(&a[k]))
  78. ^ *((big_register_t*)(&b[k]));
  79. xor &= br_mask;
  80. *((big_register_t*)(&a[k])) ^= xor;
  81. *((big_register_t*)(&b[k])) ^= xor;
  82. }
  83. }
  84. if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
  85. for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
  86. if (elem_bytes % sizeof(word_t)) {
  87. /* unaligned */
  88. word_t xor =
  89. ((unaligned_word_t*)(&a[k]))->unaligned
  90. ^ ((unaligned_word_t*)(&b[k]))->unaligned;
  91. xor &= doswap;
  92. ((unaligned_word_t*)(&a[k]))->unaligned ^= xor;
  93. ((unaligned_word_t*)(&b[k]))->unaligned ^= xor;
  94. } else {
  95. /* aligned */
  96. word_t xor =
  97. *((word_t*)(&a[k]))
  98. ^ *((word_t*)(&b[k]));
  99. xor &= doswap;
  100. *((word_t*)(&a[k])) ^= xor;
  101. *((word_t*)(&b[k])) ^= xor;
  102. }
  103. }
  104. }
  105. if (elem_bytes % sizeof(word_t)) {
  106. for (; k<elem_bytes; k+=1) {
  107. unsigned char xor = a[k] ^ b[k];
  108. xor &= doswap;
  109. a[k] ^= xor;
  110. b[k] ^= xor;
  111. }
  112. }
  113. }
  114. /**
  115. * @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
  116. *
  117. * The table must be at least as aligned as elem_bytes. The output must be word aligned,
  118. * and if the input size is vector aligned it must also be vector aligned.
  119. *
  120. * The table and output must not alias.
  121. */
  122. static __inline__ void
  123. __attribute__((unused,always_inline))
  124. constant_time_lookup (
  125. void *__restrict__ out_,
  126. const void *table_,
  127. word_t elem_bytes,
  128. word_t n_table,
  129. word_t idx
  130. ) {
  131. big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
  132. /* Can't do pointer arithmetic on void* */
  133. unsigned char *out = (unsigned char *)out_;
  134. const unsigned char *table = (const unsigned char *)table_;
  135. word_t j,k;
  136. memset(out, 0, elem_bytes);
  137. for (j=0; j<n_table; j++, big_i-=big_one) {
  138. big_register_t br_mask = br_is_zero(big_i);
  139. for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
  140. if (elem_bytes % sizeof(big_register_t)) {
  141. /* unaligned */
  142. ((unaligned_br_t *)(out+k))->unaligned
  143. |= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned;
  144. } else {
  145. /* aligned */
  146. *(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]);
  147. }
  148. }
  149. word_t mask = word_is_zero(idx^j);
  150. if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
  151. for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
  152. if (elem_bytes % sizeof(word_t)) {
  153. /* input unaligned, output aligned */
  154. *(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned;
  155. } else {
  156. /* aligned */
  157. *(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]);
  158. }
  159. }
  160. }
  161. if (elem_bytes % sizeof(word_t)) {
  162. for (; k<elem_bytes; k+=1) {
  163. out[k] |= mask & table[k+j*elem_bytes];
  164. }
  165. }
  166. }
  167. }
  168. /**
  169. * @brief Constant-time equivalent of memcpy(table + elem_bytes*idx, in, elem_bytes);
  170. *
  171. * The table must be at least as aligned as elem_bytes. The input must be word aligned,
  172. * and if the output size is vector aligned it must also be vector aligned.
  173. *
  174. * The table and input must not alias.
  175. */
  176. static __inline__ void
  177. __attribute__((unused,always_inline))
  178. constant_time_insert (
  179. void *__restrict__ table_,
  180. const void *in_,
  181. word_t elem_bytes,
  182. word_t n_table,
  183. word_t idx
  184. ) {
  185. big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
  186. /* Can't do pointer arithmetic on void* */
  187. const unsigned char *in = (const unsigned char *)in_;
  188. unsigned char *table = (unsigned char *)table_;
  189. word_t j,k;
  190. for (j=0; j<n_table; j++, big_i-=big_one) {
  191. big_register_t br_mask = br_is_zero(big_i);
  192. for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
  193. if (elem_bytes % sizeof(big_register_t)) {
  194. /* unaligned */
  195. ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned
  196. = ( ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned & ~br_mask )
  197. | ( ((const unaligned_br_t *)(in+k))->unaligned & br_mask );
  198. } else {
  199. /* aligned */
  200. *(big_register_t*)(&table[k+j*elem_bytes])
  201. = ( *(big_register_t*)(&table[k+j*elem_bytes]) & ~br_mask )
  202. | ( *(const big_register_t *)(in+k) & br_mask );
  203. }
  204. }
  205. word_t mask = word_is_zero(idx^j);
  206. if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
  207. for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
  208. if (elem_bytes % sizeof(word_t)) {
  209. /* output unaligned, input aligned */
  210. ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned
  211. = ( ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned & ~mask )
  212. | ( *(const word_t *)(in+k) & mask );
  213. } else {
  214. /* aligned */
  215. *(word_t*)(&table[k+j*elem_bytes])
  216. = ( *(word_t*)(&table[k+j*elem_bytes]) & ~mask )
  217. | ( *(const word_t *)(in+k) & mask );
  218. }
  219. }
  220. }
  221. if (elem_bytes % sizeof(word_t)) {
  222. for (; k<elem_bytes; k+=1) {
  223. table[k+j*elem_bytes]
  224. = ( table[k+j*elem_bytes] & ~mask )
  225. | ( in[k] & mask );
  226. }
  227. }
  228. }
  229. }
  230. /**
  231. * @brief Constant-time a = b&mask.
  232. *
  233. * The input and output must be at least as aligned as elem_bytes.
  234. */
  235. static __inline__ void
  236. __attribute__((unused,always_inline))
  237. constant_time_mask (
  238. void * a_,
  239. const void *b_,
  240. word_t elem_bytes,
  241. mask_t mask
  242. ) {
  243. unsigned char *a = (unsigned char *)a_;
  244. const unsigned char *b = (const unsigned char *)b_;
  245. word_t k;
  246. big_register_t br_mask = br_set_to_mask(mask);
  247. for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
  248. if (elem_bytes % sizeof(big_register_t)) {
  249. /* unaligned */
  250. ((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned;
  251. } else {
  252. /* aligned */
  253. *(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]);
  254. }
  255. }
  256. if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
  257. for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
  258. if (elem_bytes % sizeof(word_t)) {
  259. /* unaligned */
  260. ((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned;
  261. } else {
  262. /* aligned */
  263. *(word_t *)(a+k) = mask & *(const word_t*)(&b[k]);
  264. }
  265. }
  266. }
  267. if (elem_bytes % sizeof(word_t)) {
  268. for (; k<elem_bytes; k+=1) {
  269. a[k] = mask & b[k];
  270. }
  271. }
  272. }
  273. /**
  274. * @brief Constant-time a = mask ? bTrue : bFalse.
  275. *
  276. * The input and output must be at least as aligned as alignment_bytes
  277. * or their size, whichever is smaller.
  278. *
  279. * Note that the output is not __restrict__, but if it overlaps either
  280. * input, it must be equal and not partially overlap.
  281. */
  282. static __inline__ void
  283. __attribute__((unused,always_inline))
  284. constant_time_select (
  285. void *a_,
  286. const void *bFalse_,
  287. const void *bTrue_,
  288. word_t elem_bytes,
  289. mask_t mask,
  290. size_t alignment_bytes
  291. ) {
  292. unsigned char *a = (unsigned char *)a_;
  293. const unsigned char *bTrue = (const unsigned char *)bTrue_;
  294. const unsigned char *bFalse = (const unsigned char *)bFalse_;
  295. alignment_bytes |= elem_bytes;
  296. word_t k;
  297. big_register_t br_mask = br_set_to_mask(mask);
  298. for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
  299. if (alignment_bytes % sizeof(big_register_t)) {
  300. /* unaligned */
  301. ((unaligned_br_t*)(&a[k]))->unaligned =
  302. ( br_mask & ((const unaligned_br_t*)(&bTrue [k]))->unaligned)
  303. | (~br_mask & ((const unaligned_br_t*)(&bFalse[k]))->unaligned);
  304. } else {
  305. /* aligned */
  306. *(big_register_t *)(a+k) =
  307. ( br_mask & *(const big_register_t*)(&bTrue [k]))
  308. | (~br_mask & *(const big_register_t*)(&bFalse[k]));
  309. }
  310. }
  311. if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
  312. for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
  313. if (alignment_bytes % sizeof(word_t)) {
  314. /* unaligned */
  315. ((unaligned_word_t*)(&a[k]))->unaligned =
  316. ( mask & ((const unaligned_word_t*)(&bTrue [k]))->unaligned)
  317. | (~mask & ((const unaligned_word_t*)(&bFalse[k]))->unaligned);
  318. } else {
  319. /* aligned */
  320. *(word_t *)(a+k) =
  321. ( mask & *(const word_t*)(&bTrue [k]))
  322. | (~mask & *(const word_t*)(&bFalse[k]));
  323. }
  324. }
  325. }
  326. if (elem_bytes % sizeof(word_t)) {
  327. for (; k<elem_bytes; k+=1) {
  328. a[k] = ( mask & bTrue[k]) | (~mask & bFalse[k]);
  329. }
  330. }
  331. }
  332. #endif /* __CONSTANT_TIME_H__ */