| @@ -2,7 +2,7 @@ | |||
| from . import bencode | |||
| import fnmatch | |||
| from functools import reduce | |||
| from hashlib import sha1 | |||
| from hashlib import sha1, sha512 | |||
| import importlib.resources | |||
| import itertools | |||
| import os | |||
| @@ -56,12 +56,12 @@ class Storage: | |||
| Each item is a tuple of: | |||
| array of file path components (undecoded) | |||
| a pathlib.PurePath for the file | |||
| a pathlib.PurePosixPath for the file | |||
| a pathlib.Path for file on disk | |||
| ''' | |||
| for curfile in self._files: | |||
| fname = pathlib.PurePath( | |||
| fname = pathlib.PurePosixPath( | |||
| *(x.decode(_encoding) for x in | |||
| curfile['path'])) | |||
| curfilepath = self._rootpath / fname | |||
| @@ -134,9 +134,21 @@ class Storage: | |||
| for i in self._pieceindex[idx]: | |||
| with open(i['file'], 'rb') as fp: | |||
| fp.seek(i['offset']) | |||
| fun(fp.read(i['size'])) | |||
| fun(fp.read(i['size']), i.get('fname'), | |||
| i['offset']) | |||
| def validate_file(fname, with_file_hashes=None): | |||
| '''Take a torrent file fname, find the stored data (searching | |||
| subdirectories and verify the torrent. Returns a pair of set, the | |||
| first is all the files that are valid, the second are all the | |||
| invalid files. | |||
| The arg with_file_hashes, if specified, must be a hashlib like | |||
| factory function. It will be processed on a per file basis, and | |||
| a third argument will be returned as a dict w/ the file name as | |||
| key and the digest as the value of each file.. | |||
| ''' | |||
| def validate_file(fname): | |||
| fname = pathlib.Path(fname) | |||
| with open(fname, 'rb') as fp: | |||
| @@ -148,16 +160,22 @@ def validate_file(fname): | |||
| tordir = dirname.parent | |||
| try: | |||
| return validate(torrent, tordir) | |||
| return validate(torrent, tordir, with_file_hashes) | |||
| except FileNotFoundError as e: | |||
| continue | |||
| else: | |||
| raise FileNotFoundError('unable to find directory for %s' % (repr(fname.name))) | |||
| def validate(torrent, basedir): | |||
| def validate(torrent, basedir, with_file_hashes=None): | |||
| '''Take a decode torrent file, where it was stored in basedir, | |||
| verify the torrent. Returns a pair of set, the first is all the | |||
| files that are valid, the second are all the invalid files.''' | |||
| files that are valid, the second are all the invalid files. | |||
| The arg with_file_hashes, if specified, must be a hashlib like | |||
| factory function. It will be processed on a per file basis, and | |||
| a third argument will be returned as a dict w/ the file name as | |||
| key and the digest as the value. | |||
| ''' | |||
| info = torrent['info'] | |||
| @@ -168,6 +186,20 @@ def validate(torrent, basedir): | |||
| files = info.get('files', None) | |||
| stor = Storage(torrentdir, files, info['piece length']) | |||
| file_hashes = dict() | |||
| def apply_fun(data, fname, offset): | |||
| if with_file_hashes is not None: | |||
| hashobj, curoff = file_hashes.setdefault(fname, | |||
| (with_file_hashes(), 0)) | |||
| if curoff == offset: | |||
| hashobj.update(data) | |||
| file_hashes[fname] = (hashobj, offset + | |||
| len(data)) | |||
| hash.update(data) | |||
| pieces = info['pieces'] | |||
| piecescnt = len(pieces) // 20 | |||
| valid = [ None ] * piecescnt | |||
| @@ -175,13 +207,20 @@ def validate(torrent, basedir): | |||
| 20)): | |||
| hash = sha1() | |||
| stor.apply_piece(num, hash.update) | |||
| stor.apply_piece(num, apply_fun) | |||
| if hash.digest() == i: | |||
| valid[num] = True | |||
| else: | |||
| valid[num] = False | |||
| if files is None: | |||
| filesizes = { pathlib.PurePosixPath(info['name'].decode( | |||
| _encoding)): info['length'] } | |||
| else: | |||
| filesizes = { pathlib.PurePosixPath(*(x.decode(_encoding) for | |||
| x in o['path'])): o['length'] for o in files } | |||
| if files is None: | |||
| # single file | |||
| f, e = set([ torrentdir ]), set() | |||
| @@ -189,7 +228,13 @@ def validate(torrent, basedir): | |||
| if not all(valid): | |||
| f, e = e, f | |||
| return f,e | |||
| if with_file_hashes: | |||
| file_hashes = { torrentdir: hashobj.digest() for fname, (hashobj, | |||
| off) in file_hashes.items() if info['length'] == off and | |||
| torrentdir in f } | |||
| return f, e, file_hashes | |||
| return f, e | |||
| # if any piece of a file is bad, it's bad | |||
| allfiles = set(stor.allfiles()) | |||
| @@ -197,7 +242,15 @@ def validate(torrent, basedir): | |||
| badfiles = { torrentdir / x for x, y in stor.filepieces() if | |||
| not all(valid[i] for i in y) } | |||
| return allfiles - badfiles, badfiles | |||
| r = (allfiles - badfiles, badfiles,) | |||
| file_hashes = { torrentdir / fname: hashobj.digest() for fname, (hashobj, | |||
| off) in file_hashes.items() if filesizes[fname] == off and | |||
| (torrentdir / fname) in r[0] } | |||
| if with_file_hashes is not None: | |||
| r += (file_hashes, ) | |||
| return r | |||
| class _TestCases(unittest.TestCase): | |||
| dirname = 'somedir' | |||
| @@ -320,6 +373,21 @@ class _TestCases(unittest.TestCase): | |||
| self.assertFalse(bad) | |||
| self.assertEqual(good, { sd / 'filed.txt' }) | |||
| good, bad, hashes = validate_file(tor, with_file_hashes=sha512) | |||
| self.assertFalse(bad) | |||
| self.assertEqual(good, { sd / 'filed.txt' }) | |||
| self.assertEqual(hashes, { sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), }) | |||
| with open(sd / 'filed.txt', 'w') as fp: | |||
| fp.write('weoifj') | |||
| good, bad, hashes = validate_file(tor, with_file_hashes=sha512) | |||
| self.assertEqual(bad, { sd / 'filed.txt' }) | |||
| self.assertFalse(good) | |||
| self.assertEqual(hashes, {}) | |||
| def test_verification(self): | |||
| # Testing for "missing" files | |||
| # piece size 2 (aka 4 bytes) | |||
| @@ -345,3 +413,16 @@ class _TestCases(unittest.TestCase): | |||
| missingfiles.keys() if x not in self.badfiles }) | |||
| self.assertEqual(set(inval), { sd / x for x in | |||
| self.badfiles.keys() }) | |||
| val, inval, hashdict = validate(self.torrent, self.basetempdir, | |||
| with_file_hashes=sha512) | |||
| self.assertEqual(set(val), { sd / x for x in | |||
| missingfiles.keys() if x not in self.badfiles }) | |||
| self.assertEqual(set(inval), { sd / x for x in | |||
| self.badfiles.keys() }) | |||
| self.assertEqual(hashdict, { | |||
| sd / 'fileb.txt': bytes.fromhex('cc06808cbbee0510331aa97974132e8dc296aeb795be229d064bae784b0a87a5cf4281d82e8c99271b75db2148f08a026c1a60ed9cabdb8cac6d24242dac4063'), | |||
| sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), | |||
| sd / 'filef/filef.txt': bytes.fromhex('be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09'), | |||
| }) | |||