You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

489 lines
15 KiB

  1. /* Copyright (c) 2011 Stanford University.
  2. * Copyright (c) 2014 Cryptography Research, Inc.
  3. * Released under the MIT License. See LICENSE.txt for license information.
  4. */
  5. /* Chacha random number generator code copied from crandom */
  6. #include "crandom.h"
  7. #include "intrinsics.h"
  8. #include "config.h"
  9. #include "magic.h"
  10. #include <stdio.h>
  11. volatile unsigned int crandom_features = 0;
  12. unsigned int crandom_detect_features(void) {
  13. unsigned int out = GEN;
  14. # if (defined(__i386__) || defined(__x86_64__))
  15. u_int32_t a,b,c,d;
  16. a=1; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d));
  17. out |= GEN;
  18. if (d & 1<<26) out |= SSE2;
  19. if (d & 1<< 9) out |= SSSE3;
  20. if (c & 1<<25) out |= AESNI;
  21. if (c & 1<<28) out |= AVX;
  22. if (b & 1<<5) out |= AVX2;
  23. if (c & 1<<30) out |= RDRAND;
  24. a=0x80000001; __asm__("cpuid" : "+a"(a), "=b"(b), "=c"(c), "=d"(d));
  25. if (c & 1<<11) out |= XOP;
  26. # endif
  27. return out;
  28. }
  29. INTRINSIC u_int64_t rdrand(int abort_on_fail) {
  30. uint64_t out = 0;
  31. int tries = 1000;
  32. if (HAVE(RDRAND)) {
  33. # if defined(__x86_64__)
  34. u_int64_t out, a=0;
  35. for (; tries && !a; tries--) {
  36. __asm__ __volatile__ (
  37. "rdrand %0\n\tsetc %%al"
  38. : "=r"(out), "+a"(a) :: "cc"
  39. );
  40. }
  41. # elif (defined(__i386__))
  42. u_int32_t reg, a=0;
  43. uint64_t out;
  44. for (; tries && !a; tries--) {
  45. __asm__ __volatile__ (
  46. "rdrand %0\n\tsetc %%al"
  47. : "=r"(reg), "+a"(a) :: "cc"
  48. );
  49. }
  50. out = reg; a = 0;
  51. for (; tries && !a; tries--) {
  52. __asm__ __volatile__ (
  53. "rdrand %0\n\tsetc %%al"
  54. : "=r"(reg), "+a"(a) :: "cc"
  55. );
  56. }
  57. out = out << 32 | reg;
  58. return out;
  59. # else
  60. abort(); /* whut */
  61. # endif
  62. } else {
  63. tries = 0;
  64. }
  65. if (abort_on_fail && !tries) {
  66. abort();
  67. }
  68. return out;
  69. }
  70. /* ------------------------------- Vectorized code ------------------------------- */
  71. #define shuffle(x,i) _mm_shuffle_epi32(x, \
  72. i + ((i+1)&3)*4 + ((i+2)&3)*16 + ((i+3)&3)*64)
  73. #define add _mm_add_epi32
  74. #define add64 _mm_add_epi64
  75. #define NEED_XOP (MIGHT_HAVE(XOP))
  76. #define NEED_SSSE3 (MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP))
  77. #define NEED_SSE2 (MIGHT_HAVE(SSE2) && !MUST_HAVE(SSSE3))
  78. #define NEED_CONV (!MUST_HAVE(SSE2))
  79. #if NEED_XOP
  80. static __inline__ void
  81. quarter_round_xop(
  82. ssereg *a,
  83. ssereg *b,
  84. ssereg *c,
  85. ssereg *d
  86. ) {
  87. *a = add(*a,*b); *d = xop_rotate(16, *d ^ *a);
  88. *c = add(*c,*d); *b = xop_rotate(12, *b ^ *c);
  89. *a = add(*a,*b); *d = xop_rotate(8, *d ^ *a);
  90. *c = add(*c,*d); *b = xop_rotate(7, *b ^ *c);
  91. }
  92. #endif
  93. #if NEED_SSSE3
  94. static const ssereg shuffle8 = { 0x0605040702010003ull, 0x0E0D0C0F0A09080Bull };
  95. static const ssereg shuffle16 = { 0x0504070601000302ull, 0x0D0C0F0E09080B0Aull };
  96. INTRINSIC ssereg ssse3_rotate_8(ssereg a) {
  97. return _mm_shuffle_epi8(a, shuffle8);
  98. }
  99. INTRINSIC ssereg ssse3_rotate_16(ssereg a) {
  100. return _mm_shuffle_epi8(a, shuffle16);
  101. }
  102. static __inline__ void
  103. quarter_round_ssse3(
  104. ssereg *a,
  105. ssereg *b,
  106. ssereg *c,
  107. ssereg *d
  108. ) {
  109. *a = add(*a,*b); *d = ssse3_rotate_16(*d ^ *a);
  110. *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c);
  111. *a = add(*a,*b); *d = ssse3_rotate_8( *d ^ *a);
  112. *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c);
  113. }
  114. #endif /* MIGHT_HAVE(SSSE3) && !MUST_HAVE(XOP) */
  115. #if NEED_SSE2
  116. static __inline__ void
  117. quarter_round_sse2(
  118. ssereg *a,
  119. ssereg *b,
  120. ssereg *c,
  121. ssereg *d
  122. ) {
  123. *a = add(*a,*b); *d = sse2_rotate(16, *d ^ *a);
  124. *c = add(*c,*d); *b = sse2_rotate(12, *b ^ *c);
  125. *a = add(*a,*b); *d = sse2_rotate(8, *d ^ *a);
  126. *c = add(*c,*d); *b = sse2_rotate(7, *b ^ *c);
  127. }
  128. #endif
  129. #define DOUBLE_ROUND(qrf) { \
  130. qrf(&a1,&b1,&c1,&d1); \
  131. qrf(&a2,&b2,&c2,&d2); \
  132. b1 = shuffle(b1,1); \
  133. c1 = shuffle(c1,2); \
  134. d1 = shuffle(d1,3); \
  135. b2 = shuffle(b2,1); \
  136. c2 = shuffle(c2,2); \
  137. d2 = shuffle(d2,3); \
  138. \
  139. qrf(&a1,&b1,&c1,&d1); \
  140. qrf(&a2,&b2,&c2,&d2); \
  141. b1 = shuffle(b1,3); \
  142. c1 = shuffle(c1,2); \
  143. d1 = shuffle(d1,1); \
  144. b2 = shuffle(b2,3); \
  145. c2 = shuffle(c2,2); \
  146. d2 = shuffle(d2,1); \
  147. }
  148. #define OUTPUT_FUNCTION { \
  149. output[0] = add(a1,aa); \
  150. output[1] = add(b1,bb); \
  151. output[2] = add(c1,cc); \
  152. output[3] = add(d1,dd); \
  153. output[4] = add(a2,aa); \
  154. output[5] = add(b2,bb); \
  155. output[6] = add(c2,add(cc,p)); \
  156. output[7] = add(d2,dd); \
  157. \
  158. output += 8; \
  159. \
  160. cc = add64(add64(cc,p), p); \
  161. a1 = a2 = aa; \
  162. b1 = b2 = bb; \
  163. c1 = cc; c2 = add64(cc,p);\
  164. d1 = d2 = dd; \
  165. }
  166. /* ------------------------------------------------------------------------------- */
  167. INTRINSIC u_int32_t rotate(int r, u_int32_t a) {
  168. return a<<r ^ a>>(32-r);
  169. }
  170. static __inline__ __attribute__((unused)) void
  171. quarter_round(u_int32_t *a, u_int32_t *b, u_int32_t *c, u_int32_t *d) {
  172. *a = *a + *b; *d = rotate(16, *d^*a);
  173. *c = *c + *d; *b = rotate(12, *b^*c);
  174. *a = *a + *b; *d = rotate(8, *d^*a);
  175. *c = *c + *d; *b = rotate(7, *b^*c);
  176. }
  177. static void
  178. crandom_chacha_expand(u_int64_t iv,
  179. u_int64_t ctr,
  180. int nr,
  181. int output_size,
  182. const unsigned char *key_,
  183. unsigned char *output_) {
  184. # if MIGHT_HAVE_SSE2
  185. if (HAVE(SSE2)) {
  186. ssereg *key = (ssereg *)key_;
  187. ssereg *output = (ssereg *)output_;
  188. ssereg a1 = key[0], a2 = a1, aa = a1,
  189. b1 = key[1], b2 = b1, bb = b1,
  190. c1 = {iv, ctr}, c2 = {iv, ctr+1}, cc = c1,
  191. d1 = {0x3320646e61707865ull, 0x6b20657479622d32ull},
  192. d2 = d1, dd = d1,
  193. p = {0, 1};
  194. int i,r;
  195. # if (NEED_XOP)
  196. if (HAVE(XOP)) {
  197. for (i=0; i<output_size; i+=128) {
  198. for (r=nr; r>0; r-=2)
  199. DOUBLE_ROUND(quarter_round_xop);
  200. OUTPUT_FUNCTION;
  201. }
  202. return;
  203. }
  204. # endif
  205. # if (NEED_SSSE3)
  206. if (HAVE(SSSE3)) {
  207. for (i=0; i<output_size; i+=128) {
  208. for (r=nr; r>0; r-=2)
  209. DOUBLE_ROUND(quarter_round_ssse3);
  210. OUTPUT_FUNCTION;
  211. }
  212. return;
  213. }
  214. # endif
  215. # if (NEED_SSE2)
  216. if (HAVE(SSE2)) {
  217. for (i=0; i<output_size; i+=128) {
  218. for (r=nr; r>0; r-=2)
  219. DOUBLE_ROUND(quarter_round_sse2);
  220. OUTPUT_FUNCTION;
  221. }
  222. return;
  223. }
  224. # endif
  225. }
  226. # endif
  227. # if NEED_CONV
  228. {
  229. const u_int32_t *key = (const u_int32_t *)key_;
  230. u_int32_t
  231. x[16],
  232. input[16] = {
  233. key[0], key[1], key[2], key[3],
  234. key[4], key[5], key[6], key[7],
  235. iv, iv>>32, ctr, ctr>>32,
  236. 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
  237. },
  238. *output = (u_int32_t *)output_;
  239. int i, r;
  240. for (i=0; i<output_size; i+= 64) {
  241. for (r=0; r<16; r++) {
  242. x[r] = input[r];
  243. }
  244. for (r=nr; r>0; r-=2) {
  245. quarter_round(&x[0], &x[4], &x[8], &x[12]);
  246. quarter_round(&x[1], &x[5], &x[9], &x[13]);
  247. quarter_round(&x[2], &x[6], &x[10], &x[14]);
  248. quarter_round(&x[3], &x[7], &x[11], &x[15]);
  249. quarter_round(&x[0], &x[5], &x[10], &x[15]);
  250. quarter_round(&x[1], &x[6], &x[11], &x[12]);
  251. quarter_round(&x[2], &x[7], &x[8], &x[13]);
  252. quarter_round(&x[3], &x[4], &x[9], &x[14]);
  253. }
  254. for (r=0; r<16; r++) {
  255. output[r] = x[r] + input[r];
  256. }
  257. output += 16;
  258. input[11] ++;
  259. if (!input[11]) input[12]++;
  260. }
  261. }
  262. #endif /* NEED_CONV */
  263. }
  264. int
  265. crandom_init_from_file(
  266. crandom_state_a_t state,
  267. const char *filename,
  268. int reseed_interval,
  269. int reseeds_mandatory
  270. ) {
  271. state->fill = 0;
  272. state->reseed_countdown = reseed_interval;
  273. state->reseed_interval = reseed_interval;
  274. state->ctr = 0;
  275. state->randomfd = open(filename, O_RDONLY);
  276. if (state->randomfd == -1) {
  277. int err = errno;
  278. return err ? err : -1;
  279. }
  280. ssize_t offset = 0, red;
  281. do {
  282. red = read(state->randomfd, state->seedBuffer + offset, 32 - offset);
  283. if (red > 0) offset += red;
  284. } while (red > 0 && offset < 32);
  285. if (offset < 32) {
  286. int err = errno;
  287. return err ? err : -1;
  288. }
  289. memset(state->seedBuffer+32, 0, 96);
  290. state->magic = CRANDOM_MAGIC;
  291. state->reseeds_mandatory = reseeds_mandatory;
  292. return 0;
  293. }
  294. void
  295. crandom_init_from_buffer(
  296. crandom_state_a_t state,
  297. const char initial_seed[32]
  298. ) {
  299. memcpy(state->seedBuffer, initial_seed, 32);
  300. memset(state->seedBuffer+32, 0, 96);
  301. state->reseed_countdown = state->reseed_interval = state->fill = state->ctr = state->reseeds_mandatory = 0;
  302. state->randomfd = -1;
  303. state->magic = CRANDOM_MAGIC;
  304. }
  305. int
  306. crandom_generate(
  307. crandom_state_a_t state,
  308. unsigned char *output,
  309. unsigned long long length
  310. ) {
  311. /* the generator isn't seeded; maybe they ignored the return value of init_from_file */
  312. if (unlikely(state->magic != CRANDOM_MAGIC)) {
  313. abort();
  314. }
  315. int ret = 0;
  316. /*
  317. * Addition 5/21/2014.
  318. *
  319. * If this is used in an application inside a VM, and the VM
  320. * is snapshotted and restored, then crandom_generate() would
  321. * produce the same output.
  322. *
  323. * Of course, the real defense against this is "don't do that",
  324. * but we mitigate it by the RDRAND and/or rdtsc() in the refilling
  325. * code. Since chacha is pseudorandom, when the attacker doesn't
  326. * know the state, it's good enough if RDRAND/rdtsc() return
  327. * different results. However, if (part of) the request is filled
  328. * from the buffer, this won't help.
  329. *
  330. * So, add a flag EXPERIMENT_CRANDOM_BUFFER_CUTOFF_BYTES which
  331. * disables the buffer for requests larger than this size.
  332. *
  333. * Suggest EXPERIMENT_CRANDOM_BUFFER_CUTOFF_BYTES = 0, which
  334. * disables the buffer. But instead you can set it to say 16,
  335. * so that pulls of at least 128 bits will be stirred. This
  336. * could still be a problem for eg 64-bit nonces, but those
  337. * aren't entirely collision-resistant anyway.
  338. *
  339. * Heuristic: large requests are more likely to be
  340. * cryptographically important, and the buffer doesn't impact
  341. * their performance as much. So if the request is bigger
  342. * than a certain size, just drop the buffer on the floor.
  343. *
  344. * This code isn't activated if state->reseed_interval == 0,
  345. * because then the PRNG is deterministic anyway.
  346. *
  347. * TODO: sample 128 bits out of RDRAND() instead of 64 bits.
  348. * TODO: option to completely remove the buffer and fill?
  349. * FUTURE: come up with a less band-aid-y solution to this problem.
  350. */
  351. #ifdef EXPERIMENT_CRANDOM_BUFFER_CUTOFF_BYTES
  352. if (state->reseed_interval
  353. #if EXPERIMENT_CRANDOM_CUTOFF_BYTES > 0
  354. /* #if'd to a warning from -Wtype-limits in GCC when it's zero */
  355. && length >= EXPERIMENT_CRANDOM_BUFFER_CUTOFF_BYTES
  356. #endif
  357. ) {
  358. state->fill = 0;
  359. }
  360. #endif
  361. while (length) {
  362. if (unlikely(state->fill <= 0)) {
  363. uint64_t iv = 0;
  364. if (state->reseed_interval) {
  365. /* it's nondeterministic, stir in some rdrand() or rdtsc() */
  366. if (HAVE(RDRAND)) {
  367. iv = rdrand(0);
  368. if (!iv) iv = rdtsc();
  369. } else {
  370. iv = rdtsc();
  371. }
  372. state->reseed_countdown--;
  373. if (unlikely(state->reseed_countdown <= 0)) {
  374. /* reseed by xoring in random state */
  375. state->reseed_countdown = state->reseed_interval;
  376. ssize_t offset = 0, red;
  377. do {
  378. red = read(state->randomfd, state->seedBuffer + 32 + offset, 32 - offset);
  379. if (red > 0) offset += red;
  380. } while (red > 0 && offset < 32);
  381. if (offset < 32) {
  382. /* The read failed. Signal an error with the return code.
  383. *
  384. * If reseeds are mandatory, crash.
  385. *
  386. * If not, the generator is still probably safe to use, because reseeding
  387. * is basically over-engineering for caution. Also, the user might ignore
  388. * the return code, so we still need to fill the request.
  389. *
  390. * Set reseed_countdown = 1 so we'll try again later. If the user's
  391. * performance sucks as a result of ignoring the error code while calling
  392. * us in a loop, well, that's life.
  393. */
  394. if (state->reseeds_mandatory) {
  395. abort();
  396. }
  397. ret = errno;
  398. if (ret == 0) ret = -1;
  399. state->reseed_countdown = 1;
  400. }
  401. int i;
  402. for (i=0; i<32; i++) {
  403. /* Stir in the buffer. If somehow the read failed, it'll be zeros. */
  404. state->seedBuffer[i] ^= state->seedBuffer[i+32];
  405. }
  406. }
  407. }
  408. crandom_chacha_expand(iv,state->ctr,20,128,state->seedBuffer,state->seedBuffer);
  409. state->ctr++;
  410. state->fill = sizeof(state->seedBuffer)-32;
  411. }
  412. unsigned long long copy = (length > state->fill) ? state->fill : length;
  413. state->fill -= copy;
  414. memcpy(output, state->seedBuffer + 32 + state->fill, copy);
  415. really_memset(state->seedBuffer + 32 + state->fill, 0, copy);
  416. output += copy; length -= copy;
  417. }
  418. return ret;
  419. }
  420. void
  421. crandom_destroy(
  422. crandom_state_a_t state
  423. ) {
  424. if (state->magic == CRANDOM_MAGIC && state->randomfd) {
  425. (void) close(state->randomfd);
  426. /* Ignore the return value from close(), because what would it mean?
  427. * "Your random device, which you were reading over NFS, lost some data"?
  428. */
  429. }
  430. really_memset(state, 0, sizeof(*state));
  431. }