| @@ -2,7 +2,7 @@ | |||||
| from . import bencode | from . import bencode | ||||
| import fnmatch | import fnmatch | ||||
| from functools import reduce | from functools import reduce | ||||
| from hashlib import sha1 | |||||
| from hashlib import sha1, sha512 | |||||
| import importlib.resources | import importlib.resources | ||||
| import itertools | import itertools | ||||
| import os | import os | ||||
| @@ -56,12 +56,12 @@ class Storage: | |||||
| Each item is a tuple of: | Each item is a tuple of: | ||||
| array of file path components (undecoded) | array of file path components (undecoded) | ||||
| a pathlib.PurePath for the file | |||||
| a pathlib.PurePosixPath for the file | |||||
| a pathlib.Path for file on disk | a pathlib.Path for file on disk | ||||
| ''' | ''' | ||||
| for curfile in self._files: | for curfile in self._files: | ||||
| fname = pathlib.PurePath( | |||||
| fname = pathlib.PurePosixPath( | |||||
| *(x.decode(_encoding) for x in | *(x.decode(_encoding) for x in | ||||
| curfile['path'])) | curfile['path'])) | ||||
| curfilepath = self._rootpath / fname | curfilepath = self._rootpath / fname | ||||
| @@ -134,9 +134,21 @@ class Storage: | |||||
| for i in self._pieceindex[idx]: | for i in self._pieceindex[idx]: | ||||
| with open(i['file'], 'rb') as fp: | with open(i['file'], 'rb') as fp: | ||||
| fp.seek(i['offset']) | fp.seek(i['offset']) | ||||
| fun(fp.read(i['size'])) | |||||
| fun(fp.read(i['size']), i.get('fname'), | |||||
| i['offset']) | |||||
| def validate_file(fname, with_file_hashes=None): | |||||
| '''Take a torrent file fname, find the stored data (searching | |||||
| subdirectories and verify the torrent. Returns a pair of set, the | |||||
| first is all the files that are valid, the second are all the | |||||
| invalid files. | |||||
| The arg with_file_hashes, if specified, must be a hashlib like | |||||
| factory function. It will be processed on a per file basis, and | |||||
| a third argument will be returned as a dict w/ the file name as | |||||
| key and the digest as the value of each file.. | |||||
| ''' | |||||
| def validate_file(fname): | |||||
| fname = pathlib.Path(fname) | fname = pathlib.Path(fname) | ||||
| with open(fname, 'rb') as fp: | with open(fname, 'rb') as fp: | ||||
| @@ -148,16 +160,22 @@ def validate_file(fname): | |||||
| tordir = dirname.parent | tordir = dirname.parent | ||||
| try: | try: | ||||
| return validate(torrent, tordir) | |||||
| return validate(torrent, tordir, with_file_hashes) | |||||
| except FileNotFoundError as e: | except FileNotFoundError as e: | ||||
| continue | continue | ||||
| else: | else: | ||||
| raise FileNotFoundError('unable to find directory for %s' % (repr(fname.name))) | raise FileNotFoundError('unable to find directory for %s' % (repr(fname.name))) | ||||
| def validate(torrent, basedir): | |||||
| def validate(torrent, basedir, with_file_hashes=None): | |||||
| '''Take a decode torrent file, where it was stored in basedir, | '''Take a decode torrent file, where it was stored in basedir, | ||||
| verify the torrent. Returns a pair of set, the first is all the | verify the torrent. Returns a pair of set, the first is all the | ||||
| files that are valid, the second are all the invalid files.''' | |||||
| files that are valid, the second are all the invalid files. | |||||
| The arg with_file_hashes, if specified, must be a hashlib like | |||||
| factory function. It will be processed on a per file basis, and | |||||
| a third argument will be returned as a dict w/ the file name as | |||||
| key and the digest as the value. | |||||
| ''' | |||||
| info = torrent['info'] | info = torrent['info'] | ||||
| @@ -168,6 +186,20 @@ def validate(torrent, basedir): | |||||
| files = info.get('files', None) | files = info.get('files', None) | ||||
| stor = Storage(torrentdir, files, info['piece length']) | stor = Storage(torrentdir, files, info['piece length']) | ||||
| file_hashes = dict() | |||||
| def apply_fun(data, fname, offset): | |||||
| if with_file_hashes is not None: | |||||
| hashobj, curoff = file_hashes.setdefault(fname, | |||||
| (with_file_hashes(), 0)) | |||||
| if curoff == offset: | |||||
| hashobj.update(data) | |||||
| file_hashes[fname] = (hashobj, offset + | |||||
| len(data)) | |||||
| hash.update(data) | |||||
| pieces = info['pieces'] | pieces = info['pieces'] | ||||
| piecescnt = len(pieces) // 20 | piecescnt = len(pieces) // 20 | ||||
| valid = [ None ] * piecescnt | valid = [ None ] * piecescnt | ||||
| @@ -175,13 +207,20 @@ def validate(torrent, basedir): | |||||
| 20)): | 20)): | ||||
| hash = sha1() | hash = sha1() | ||||
| stor.apply_piece(num, hash.update) | |||||
| stor.apply_piece(num, apply_fun) | |||||
| if hash.digest() == i: | if hash.digest() == i: | ||||
| valid[num] = True | valid[num] = True | ||||
| else: | else: | ||||
| valid[num] = False | valid[num] = False | ||||
| if files is None: | |||||
| filesizes = { pathlib.PurePosixPath(info['name'].decode( | |||||
| _encoding)): info['length'] } | |||||
| else: | |||||
| filesizes = { pathlib.PurePosixPath(*(x.decode(_encoding) for | |||||
| x in o['path'])): o['length'] for o in files } | |||||
| if files is None: | if files is None: | ||||
| # single file | # single file | ||||
| f, e = set([ torrentdir ]), set() | f, e = set([ torrentdir ]), set() | ||||
| @@ -189,7 +228,13 @@ def validate(torrent, basedir): | |||||
| if not all(valid): | if not all(valid): | ||||
| f, e = e, f | f, e = e, f | ||||
| return f,e | |||||
| if with_file_hashes: | |||||
| file_hashes = { torrentdir: hashobj.digest() for fname, (hashobj, | |||||
| off) in file_hashes.items() if info['length'] == off and | |||||
| torrentdir in f } | |||||
| return f, e, file_hashes | |||||
| return f, e | |||||
| # if any piece of a file is bad, it's bad | # if any piece of a file is bad, it's bad | ||||
| allfiles = set(stor.allfiles()) | allfiles = set(stor.allfiles()) | ||||
| @@ -197,7 +242,15 @@ def validate(torrent, basedir): | |||||
| badfiles = { torrentdir / x for x, y in stor.filepieces() if | badfiles = { torrentdir / x for x, y in stor.filepieces() if | ||||
| not all(valid[i] for i in y) } | not all(valid[i] for i in y) } | ||||
| return allfiles - badfiles, badfiles | |||||
| r = (allfiles - badfiles, badfiles,) | |||||
| file_hashes = { torrentdir / fname: hashobj.digest() for fname, (hashobj, | |||||
| off) in file_hashes.items() if filesizes[fname] == off and | |||||
| (torrentdir / fname) in r[0] } | |||||
| if with_file_hashes is not None: | |||||
| r += (file_hashes, ) | |||||
| return r | |||||
| class _TestCases(unittest.TestCase): | class _TestCases(unittest.TestCase): | ||||
| dirname = 'somedir' | dirname = 'somedir' | ||||
| @@ -320,6 +373,21 @@ class _TestCases(unittest.TestCase): | |||||
| self.assertFalse(bad) | self.assertFalse(bad) | ||||
| self.assertEqual(good, { sd / 'filed.txt' }) | self.assertEqual(good, { sd / 'filed.txt' }) | ||||
| good, bad, hashes = validate_file(tor, with_file_hashes=sha512) | |||||
| self.assertFalse(bad) | |||||
| self.assertEqual(good, { sd / 'filed.txt' }) | |||||
| self.assertEqual(hashes, { sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), }) | |||||
| with open(sd / 'filed.txt', 'w') as fp: | |||||
| fp.write('weoifj') | |||||
| good, bad, hashes = validate_file(tor, with_file_hashes=sha512) | |||||
| self.assertEqual(bad, { sd / 'filed.txt' }) | |||||
| self.assertFalse(good) | |||||
| self.assertEqual(hashes, {}) | |||||
| def test_verification(self): | def test_verification(self): | ||||
| # Testing for "missing" files | # Testing for "missing" files | ||||
| # piece size 2 (aka 4 bytes) | # piece size 2 (aka 4 bytes) | ||||
| @@ -345,3 +413,16 @@ class _TestCases(unittest.TestCase): | |||||
| missingfiles.keys() if x not in self.badfiles }) | missingfiles.keys() if x not in self.badfiles }) | ||||
| self.assertEqual(set(inval), { sd / x for x in | self.assertEqual(set(inval), { sd / x for x in | ||||
| self.badfiles.keys() }) | self.badfiles.keys() }) | ||||
| val, inval, hashdict = validate(self.torrent, self.basetempdir, | |||||
| with_file_hashes=sha512) | |||||
| self.assertEqual(set(val), { sd / x for x in | |||||
| missingfiles.keys() if x not in self.badfiles }) | |||||
| self.assertEqual(set(inval), { sd / x for x in | |||||
| self.badfiles.keys() }) | |||||
| self.assertEqual(hashdict, { | |||||
| sd / 'fileb.txt': bytes.fromhex('cc06808cbbee0510331aa97974132e8dc296aeb795be229d064bae784b0a87a5cf4281d82e8c99271b75db2148f08a026c1a60ed9cabdb8cac6d24242dac4063'), | |||||
| sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), | |||||
| sd / 'filef/filef.txt': bytes.fromhex('be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09'), | |||||
| }) | |||||