MetaData Sharing
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

218 lines
4.9 KiB

  1. from . import bencode
  2. from functools import reduce
  3. from hashlib import sha1
  4. import importlib.resources
  5. import itertools
  6. import os
  7. import pathlib
  8. import shutil
  9. import sys
  10. import tempfile
  11. import unittest
  12. class Storage:
  13. def __init__(self, rootpath, files, piecelen, encoding='us-ascii'):
  14. self._rootpath = pathlib.Path(rootpath)
  15. self._files = files
  16. self._piecelen = piecelen
  17. self._encoding = encoding
  18. self._buildindex()
  19. def _filepaths(self):
  20. '''Iterates over all the files in the torrent.
  21. Each item is a tuple of:
  22. array of file path components (undecoded)
  23. a pathlib.PurePath for the file
  24. a pathlib.Path for file on disk
  25. '''
  26. for curfile in self._files:
  27. fname = pathlib.PurePath(
  28. *(x.decode(self._encoding) for x in
  29. curfile['path']))
  30. curfilepath = self._rootpath / fname
  31. yield curfile, fname, curfilepath
  32. def allfiles(self):
  33. for x, y, curfilepath in self._filepaths():
  34. yield curfilepath
  35. def _buildindex(self):
  36. self._pieceindex = []
  37. self._fileindex = {}
  38. files = self._filepaths()
  39. left = 0
  40. curfile = None
  41. while True:
  42. if curfile is None or curfileoff == curfile['length']:
  43. # next file
  44. try:
  45. curfile, fname, curfilepath = next(files)
  46. except StopIteration:
  47. break
  48. curfileoff = 0
  49. if left == 0:
  50. current = []
  51. self._fileindex.setdefault(fname,
  52. []).append(len(self._pieceindex))
  53. self._pieceindex.append(current)
  54. left = self._piecelen
  55. sz = min(curfile['length'] - curfileoff, left)
  56. current.append(dict(file=curfilepath, fname=fname,
  57. offset=curfileoff, size=sz))
  58. curfileoff += sz
  59. left -= sz
  60. def filepieces(self):
  61. return self._fileindex.items()
  62. def filesforpiece(self, idx):
  63. for x in self._pieceindex[idx]:
  64. yield x['file']
  65. def apply_piece(self, idx, fun):
  66. for i in self._pieceindex[idx]:
  67. with open(i['file'], 'rb') as fp:
  68. fp.seek(i['offset'])
  69. fun(fp.read(i['size']))
  70. def validate(torrent, basedir):
  71. info = torrent['info']
  72. basedir = pathlib.Path(basedir)
  73. try:
  74. encoding = torrent['encoding'].decode('us-ascii')
  75. except KeyError:
  76. encoding = 'us-ascii'
  77. torrentdir = basedir / info['name'].decode(encoding)
  78. stor = Storage(torrentdir, info['files'], info['piece length'],
  79. encoding)
  80. pieces = info['pieces']
  81. piecescnt = len(pieces) // 20
  82. valid = [ None ] * piecescnt
  83. for num, i in enumerate(pieces[x:x+20] for x in range(0, len(pieces),
  84. 20)):
  85. hash = sha1()
  86. stor.apply_piece(num, hash.update)
  87. if hash.digest() == i:
  88. valid[num] = True
  89. else:
  90. valid[num] = False
  91. # if any piece of a file is bad, it's bad
  92. allfiles = set(stor.allfiles())
  93. badfiles = { torrentdir / x for x, y in stor.filepieces() if
  94. not all(valid[i] for i in y) }
  95. return allfiles - badfiles, badfiles
  96. class _TestCases(unittest.TestCase):
  97. dirname = 'somedir'
  98. origfiledata = {
  99. 'filea.txt': b'foo\n',
  100. 'fileb.txt': b'bar\n',
  101. 'filec.txt': b'bleha\n',
  102. 'filed.txt': b'somehow\n',
  103. 'filee.txt': b'nowab\n',
  104. 'filef/filef.txt': b'\n',
  105. }
  106. def setUp(self):
  107. d = pathlib.Path(tempfile.mkdtemp()).resolve()
  108. tor = importlib.resources.files(__name__)
  109. tor = tor / 'fixtures' / 'somedir.torrent'
  110. with tor.open('rb') as fp:
  111. self.torrent = bencode.bdecode(fp.read())
  112. self.basetempdir = d
  113. self.oldcwd = os.getcwd()
  114. os.chdir(d)
  115. def tearDown(self):
  116. shutil.rmtree(self.basetempdir)
  117. os.chdir(self.oldcwd)
  118. @staticmethod
  119. def make_files(dname, fdict):
  120. dname = pathlib.Path(dname)
  121. for k, v in fdict.items():
  122. k = dname / pathlib.PurePosixPath(k)
  123. k.parent.mkdir(parents=True, exist_ok=True)
  124. with open(k, 'wb') as fp:
  125. fp.write(v)
  126. def test_completeverif(self):
  127. sd = self.basetempdir / self.dirname
  128. sd.mkdir()
  129. self.make_files(sd, self.origfiledata)
  130. validate(self.torrent, self.basetempdir)
  131. # encoded names
  132. sd = self.basetempdir / 'thai'
  133. sd.mkdir()
  134. self.make_files(sd, { 'thai - สวัสดี.txt': b'hello\n'
  135. })
  136. tor = importlib.resources.files(__name__)
  137. tor = tor / 'fixtures' / 'thai.torrent'
  138. with tor.open('rb') as fp:
  139. torrent = bencode.bdecode(fp.read())
  140. validate(torrent, self.basetempdir)
  141. def test_verification(self):
  142. # Testing for "missing" files
  143. # piece size 2 (aka 4 bytes)
  144. # empty file of 4 bytes 'foo\n'
  145. # complete file of 4 bytes 'bar\n'
  146. # partial missing file, 6 bytes, last two correct 'bleha\n'
  147. # complete file of 8 bytes (multiple pieces) 'somehow\n'
  148. # partial missing file, starting w/ 2 bytes, length 6 'nowab\n'
  149. # complete file (length 1) '\n'
  150. missingfiles = self.origfiledata.copy()
  151. badfiles = {
  152. 'filea.txt': b'',
  153. 'filec.txt': b'\x00\x00\x00\x00a\n',
  154. 'filee.txt': b'no',
  155. }
  156. missingfiles.update(badfiles)
  157. sd = self.basetempdir / self.dirname
  158. sd.mkdir()
  159. self.make_files(sd, missingfiles)
  160. val, inval = validate(self.torrent, self.basetempdir)
  161. self.assertEqual(set(val), { sd / x for x in
  162. missingfiles.keys() if x not in badfiles })
  163. self.assertEqual(set(inval), { sd / x for x in
  164. badfiles.keys() })