| @@ -0,0 +1,31 @@ | |||
| [ | |||
| { | |||
| "title": "gen ident", | |||
| "cmd": [ "genident", "name=A Test User" ], | |||
| "exit": 0 | |||
| }, | |||
| { | |||
| "special": "setup tar file" | |||
| }, | |||
| { | |||
| "title": "import tar.gz container", | |||
| "cmd": [ "container", "testfile.tar.gz" ] | |||
| }, | |||
| { | |||
| "special": "verify store object cnt", | |||
| "comment": "should have one container and one file", | |||
| "count": 2 | |||
| }, | |||
| { | |||
| "title": "verify correct files imported", | |||
| "cmd": [ "dump" ], | |||
| "stdout_check": [ | |||
| { "type": "identity" }, | |||
| { "files": [ "testfiles/newfile.txt", "testfiles/test.txt" ], | |||
| "hashes": [ "sha512:90f8342520f0ac57fb5a779f5d331c2fa87aa40f8799940257f9ba619940951e67143a8d746535ed0284924b2b7bc1478f095198800ba96d01847d7b56ca465c", "sha512:7d5768d47b6bc27dc4fa7e9732cfa2de506ca262a2749cb108923e5dddffde842bbfee6cb8d692fb43aca0f12946c521cce2633887914ca1f96898478d10ad3f" ], | |||
| "type": "container", | |||
| "uri": "hash://sha512/79fab684ca73e25994c1b739dcf8f03acf27dff74d63b4b3affd9aa69fbb37d23794b723802cad131969225403846f8f8c470bc2432c32de34d39f044a360073" }, | |||
| { "type": "file", "hashes": [ "sha512:79fab684ca73e25994c1b739dcf8f03acf27dff74d63b4b3affd9aa69fbb37d23794b723802cad131969225403846f8f8c470bc2432c32de34d39f044a360073" ] } | |||
| ] | |||
| } | |||
| ] | |||
| @@ -18,17 +18,7 @@ if False: | |||
| logging.getLogger('sqlalchemy').addHandler(_handler) | |||
| logging.getLogger('sqlalchemy.engine').setLevel(lvl) | |||
| def _debprint(*args): # pragma: no cover | |||
| import traceback, sys, os.path | |||
| st = traceback.extract_stack(limit=2)[0] | |||
| sep = '' | |||
| if args: | |||
| sep = ':' | |||
| print('%s:%d%s' % (os.path.basename(st.filename), st.lineno, sep), | |||
| *args, file=_real_stderr) | |||
| sys.stderr.flush() | |||
| from .utils import _debprint | |||
| #import pdb, sys; mypdb = pdb.Pdb(stdout=sys.stderr); mypdb.set_trace() | |||
| @@ -38,6 +28,7 @@ from unittest import mock | |||
| from .hostid import hostuuid | |||
| from .tags import TagCache | |||
| from . import orm | |||
| from .magic_wrap import detect_from_filename | |||
| from .btv import _TestCases as bttestcase, validate_file | |||
| @@ -52,6 +43,7 @@ import importlib | |||
| import io | |||
| import itertools | |||
| import json | |||
| import libarchive | |||
| import magic | |||
| import os.path | |||
| import pathlib | |||
| @@ -78,6 +70,11 @@ _validhashes = set([ 'sha256', 'sha512' ]) | |||
| _hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in | |||
| _validhashes } | |||
| def _makehashuri(hashstr): | |||
| hash, value = ObjectStore.makehash(hashstr).split(':') | |||
| return f'hash://{hash}/{value}' | |||
| def _keyordering(x): | |||
| k, v = x | |||
| try: | |||
| @@ -651,16 +648,20 @@ class ObjectStore(object): | |||
| def _readfp(fp): | |||
| while True: | |||
| r = fp.read(64*1024) | |||
| if r == b'': | |||
| # libarchive returns None on EOF | |||
| if r == b'' or r is None: | |||
| return | |||
| yield r | |||
| def _hashfile(fname): | |||
| hash = getattr(hashlib, _defaulthash)() | |||
| with open(fname, 'rb') as fp: | |||
| for r in _readfp(fp): | |||
| hash.update(r) | |||
| return _hashfp(fp) | |||
| def _hashfp(fp): | |||
| hash = getattr(hashlib, _defaulthash)() | |||
| for r in _readfp(fp): | |||
| hash.update(r) | |||
| return '%s:%s' % (_defaulthash, hash.hexdigest()) | |||
| @@ -1219,7 +1220,7 @@ def cmd_dump(options, persona, objstr, cache): | |||
| def cmd_auto(options): | |||
| for i in options.files: | |||
| mf = magic.detect_from_filename(i) | |||
| mf = detect_from_filename(i) | |||
| primary = mf[0].split('/', 1)[0] | |||
| mt = mf[0] | |||
| @@ -1261,68 +1262,113 @@ def cmd_list(options, persona, objstr, cache): | |||
| # This is needed so that if it creates a FileObj, which may be | |||
| # expensive (hashing large file), that it gets saved. | |||
| @init_datastructs | |||
| def cmd_container(options, persona, objstr, cache): | |||
| for i in options.files: | |||
| with open(i, 'rb') as fp: | |||
| torrent = bencode.bdecode(fp.read()) | |||
| bencodedinfo = bencode.bencode(torrent['info']) | |||
| infohash = hashlib.sha1(bencodedinfo).hexdigest() | |||
| def handle_bittorrent(fname, persona, objstr): | |||
| with open(fname, 'rb') as fp: | |||
| torrent = bencode.bdecode(fp.read()) | |||
| bencodedinfo = bencode.bencode(torrent['info']) | |||
| infohash = hashlib.sha1(bencodedinfo).hexdigest() | |||
| # XXX - not entirely happy w/ URI | |||
| uri = 'magnet:?xt=urn:btih:%s&dn=%s' % (infohash, | |||
| torrent['info']['name'].decode('utf-8')) | |||
| # XXX - not entirely happy w/ URI | |||
| uri = 'magnet:?xt=urn:btih:%s&dn=%s' % (infohash, | |||
| torrent['info']['name'].decode('utf-8')) | |||
| try: | |||
| cont = objstr.by_id(Container.make_id(uri)) | |||
| except KeyError: | |||
| pass | |||
| else: | |||
| if not 'incomplete' in cont: | |||
| print('Warning, container already complete, skipping %s.' % repr(fname), file=sys.stderr) | |||
| return | |||
| good, bad = validate_file(fname) | |||
| if bad: | |||
| print('Warning, incomple/invalid files, not added for %s:' % | |||
| repr(fname), file=sys.stderr) | |||
| print('\n'.join('\t%s' % | |||
| repr(str(pathlib.Path(*x.parts[1:]))) for x in | |||
| sorted(bad)), file=sys.stderr) | |||
| files = [] | |||
| hashes = [] | |||
| for j in sorted(good): | |||
| files.append(str(pathlib.PosixPath(*j.parts[1:]))) | |||
| try: | |||
| cont = objstr.by_id(Container.make_id(uri)) | |||
| except KeyError: | |||
| pass | |||
| else: | |||
| if not 'incomplete' in cont: | |||
| print('Warning, container already complete, skipping %s.' % repr(i), file=sys.stderr) | |||
| continue | |||
| fobj = objstr.by_file(j, ('file',))[0] | |||
| except: | |||
| fobj = persona.by_file(j) | |||
| objstr.loadobj(fobj) | |||
| good, bad = validate_file(i) | |||
| # XXX - ensure only one is added? | |||
| hashes.extend(fobj.hashes) | |||
| if bad: | |||
| print('Warning, incomple/invalid files, not added for %s:' % repr(i), | |||
| file=sys.stderr) | |||
| print('\n'.join('\t%s' % | |||
| repr(str(pathlib.Path(*x.parts[1:]))) for x in | |||
| sorted(bad)), file=sys.stderr) | |||
| kwargs = dict(files=files, hashes=hashes, | |||
| uri=uri) | |||
| if bad: | |||
| kwargs['incomplete'] = True | |||
| # XXX - doesn't combine files/hashes, that is if a | |||
| # Container has one set of good files, and then the | |||
| # next scan has a different set, only the second set | |||
| # will be present, not any from the first set. | |||
| try: | |||
| cont = objstr.by_id(Container.make_id(uri)) | |||
| cont = cont.new_version(dels=() if bad | |||
| else ('incomplete',), replaces=kwargs.items()) | |||
| except KeyError: | |||
| cont = persona.Container(**kwargs) | |||
| objstr.loadobj(cont) | |||
| def handle_archive(fname, persona, objstr): | |||
| with libarchive.Archive(fname) as arch: | |||
| files = [] | |||
| hashes = [] | |||
| for j in sorted(good): | |||
| files.append(str(pathlib.PosixPath(*j.parts[1:]))) | |||
| try: | |||
| fobj = objstr.by_file(j, ('file',))[0] | |||
| except: | |||
| fobj = persona.by_file(j) | |||
| objstr.loadobj(fobj) | |||
| # XXX - ensure only one is added? | |||
| hashes.extend(fobj.hashes) | |||
| for i in arch: | |||
| if not i.isfile(): | |||
| continue | |||
| kwargs = dict(files=files, hashes=hashes, | |||
| uri=uri) | |||
| files.append(i.pathname) | |||
| if bad: | |||
| kwargs['incomplete'] = True | |||
| with arch.readstream(i.size) as fp: | |||
| hashes.append(_hashfp(fp)) | |||
| # XXX - doesn't combine files/hashes, that is if a | |||
| # Container has one set of good files, and then the | |||
| # next scan has a different set, only the second set | |||
| # will be present, not any from the first set. | |||
| try: | |||
| fobj = objstr.by_file(fname, ('file',))[0] | |||
| except: | |||
| fobj = persona.by_file(fname) | |||
| objstr.loadobj(fobj) | |||
| try: | |||
| cont = objstr.by_id(Container.make_id(uri)) | |||
| cont = cont.new_version(dels=() if bad | |||
| else ('incomplete',), replaces=kwargs.items()) | |||
| except KeyError: | |||
| cont = persona.Container(**kwargs) | |||
| uri = _makehashuri(fobj.hashes[0]) | |||
| kwargs = dict(files=files, hashes=hashes, | |||
| uri=uri) | |||
| try: | |||
| cont = objstr.by_id(Container.make_id(uri)) | |||
| # XXX - only update when different, check uri | |||
| cont = cont.new_version(replaces=kwargs.items()) | |||
| except KeyError: | |||
| cont = persona.Container(**kwargs) | |||
| objstr.loadobj(cont) | |||
| _container_mapping = { | |||
| 'application/x-bittorrent': handle_bittorrent, | |||
| 'application/x-tar': handle_archive, | |||
| } | |||
| @init_datastructs | |||
| def cmd_container(options, persona, objstr, cache): | |||
| for i in options.files: | |||
| mf = detect_from_filename(i) | |||
| #_debprint('mf:', repr(mf)) | |||
| fun = _container_mapping[mf.mime_type] | |||
| objstr.loadobj(cont) | |||
| fun(i, persona, objstr) | |||
| def _json_objstream(fp): | |||
| inp = fp.read() | |||
| @@ -2135,6 +2181,9 @@ class _TestCases(unittest.TestCase): | |||
| elif special == 'delete files': | |||
| for i in cmd['files']: | |||
| os.unlink(i) | |||
| elif special == 'setup tar file': | |||
| shutil.copy(self.fixtures / | |||
| 'testfile.tar.gz', self.tempdir) | |||
| else: # pragma: no cover | |||
| raise ValueError('unhandled special: %s' % repr(special)) | |||
| @@ -2181,7 +2230,6 @@ class _TestCases(unittest.TestCase): | |||
| if outcheck: | |||
| stdout.seek(0) | |||
| self.objcompare(_json_objstream(stdout), outcheck) | |||
| self.assertEqual(stderr.getvalue(), cmd.get('stderr', '')) | |||
| @@ -2218,7 +2266,8 @@ class _TestCases(unittest.TestCase): | |||
| self.setUp() | |||
| os.chdir(self.tempdir) | |||
| self.run_command_file(i) | |||
| with self.subTest(file=i): | |||
| self.run_command_file(i) | |||
| # XXX - the following test may no longer be needed | |||
| def test_main(self): | |||
| @@ -0,0 +1,12 @@ | |||
| # This file contains magic that is used by tests and the code | |||
| # that must be present to work properly. | |||
| # Transmission adds this | |||
| 0 string d10:created\ by BitTorrent file | |||
| !:mime application/x-bittorrent | |||
| !:ext torrent | |||
| # BitTornado adds this | |||
| 0 string d13:creation\ date BitTorrent file | |||
| !:mime application/x-bittorrent | |||
| !:ext torrent | |||
| @@ -0,0 +1,155 @@ | |||
| import functools | |||
| import importlib | |||
| import magic | |||
| import os | |||
| import pathlib | |||
| import shutil | |||
| import tempfile | |||
| import unittest | |||
| from .utils import _debprint | |||
| __doc__ = ''' | |||
| This is a number of hacks to the Python magic module so that it works | |||
| better. These bugs should be fixed in the module, but I don't want to | |||
| deal w/ forking and getting the fixed upstreamed. | |||
| ''' | |||
| magic.FileMagic = magic.namedtuple('FileMagic', ('mime_type', 'encoding', | |||
| 'name', 'compressed_type'), defaults=[ '' ]) | |||
| from magic import * | |||
| __all__ = [ | |||
| 'detect_from_filename', | |||
| 'detect_from_content', | |||
| ] | |||
| _mgc_data = {} | |||
| _lcl_mgc_data = None | |||
| # Wrapper magic.open so that we look past compression | |||
| _real_magic_open = magic.open | |||
| @functools.wraps(magic.open) | |||
| def open(flags): | |||
| return _real_magic_open(flags|magic.MAGIC_COMPRESS) | |||
| magic.open = open | |||
| def _create_filemagic(mime_detected, type_detected): | |||
| try: | |||
| mime_type, mime_encoding = mime_detected.split('; ', 1) | |||
| except ValueError: | |||
| raise ValueError(mime_detected) | |||
| kwargs = {} | |||
| try: | |||
| mime_encoding, compressed_type = mime_encoding.split(' compressed-encoding=') | |||
| except ValueError: | |||
| pass | |||
| else: | |||
| compressed_type, _ = compressed_type.split('; ', 1) | |||
| kwargs['compressed_type'] = compressed_type | |||
| return FileMagic(name=type_detected, mime_type=mime_type, | |||
| encoding=mime_encoding.replace('charset=', ''), **kwargs) | |||
| magic._create_filemagic = _create_filemagic | |||
| def _get_mgc_data(fname): | |||
| try: | |||
| return _mgc_data[fname] | |||
| except KeyError: | |||
| data = pathlib.Path(fname).read_bytes() | |||
| _mgc_data[fname] = data | |||
| return data | |||
| def _compile_file(inp, out, tempdir): | |||
| oldcwd = pathlib.Path.cwd() | |||
| try: | |||
| os.chdir(tempdir) | |||
| mag = magic.open(magic.MAGIC_NONE) | |||
| mag.compile(str(inp)) | |||
| inp.with_suffix('.mgc').rename(out) | |||
| finally: | |||
| os.chdir(oldcwd) | |||
| def _compile_lcl(): | |||
| magicfile = importlib.resources.files('medashare') / 'magic' | |||
| try: | |||
| d = pathlib.Path(tempfile.mkdtemp()).resolve() | |||
| # write out data | |||
| inpfile = d / 'magic' | |||
| inpfile.write_bytes(magicfile.read_bytes()) | |||
| # where it'll go | |||
| outfile = d / 'someotherfile' | |||
| _compile_file(inpfile, outfile, tempdir=d) | |||
| return outfile.read_bytes() | |||
| finally: | |||
| shutil.rmtree(d) | |||
| def _get_mgc_res(): | |||
| global _lcl_mgc_data | |||
| if _lcl_mgc_data is None: | |||
| try: | |||
| mgcfile = importlib.resources.files('medashare') / 'magic.mgc' | |||
| _lcl_mgc_data = mgcfile.read_bytes() | |||
| except FileNotFoundError: | |||
| _lcl_mgc_data = _compile_lcl() | |||
| _debprint(repr(_lcl_mgc_data)) | |||
| return _lcl_mgc_data | |||
| # patch magic to load custom magic file | |||
| _mgp = magic._libraries['magic'].magic_getpath | |||
| _mgp.restype = magic.c_char_p | |||
| _mgp.argtypes = [ magic.c_char_p, magic.c_int ] | |||
| _mlb = magic._libraries['magic'].magic_load_buffers | |||
| _mlb.restype = magic.c_int | |||
| _mlb.argtypes = [ magic.magic_t, magic.POINTER(magic.c_void_p), magic.POINTER(magic.c_size_t), magic.c_size_t ] | |||
| def _new_magic_load(self, fname=None): | |||
| files = _mgp(None, 0).decode('utf-8') + '.mgc' + ':' + str(pathlib.Path(__file__).parent / 'magic') | |||
| return magic._load(self._magic_t, files.encode('utf-8')) | |||
| # XXX - for some reason this code isn't working | |||
| files = [ _mgp(None, 0).decode('utf-8') + '.mgc' ] | |||
| buffers = [ _get_mgc_data(x) for x in files ] + [ _get_mgc_res() ] | |||
| #buffers.reverse() | |||
| del buffers[0] | |||
| cnt = len(buffers) | |||
| mgcdatas = [ (magic.c_char * len(x))(*x) for x in buffers ] | |||
| bufs = (magic.c_void_p * cnt)(*(magic.cast(magic.pointer(x), magic.c_void_p) for x in mgcdatas)) | |||
| sizes = (magic.c_size_t * cnt)(*(len(x) for x in buffers)) | |||
| _debprint('mg:', cnt, repr([len(x) for x in buffers])) | |||
| r = _mlb(self._magic_t, bufs, sizes, cnt) | |||
| return r | |||
| magic.Magic.load = _new_magic_load | |||
| class _TestMagic(unittest.TestCase): | |||
| def test_create_filemagic(self): | |||
| a = _create_filemagic('application/x-tar; charset=binary compressed-encoding=application/gzip; charset=binary', 'foobar') | |||
| self.assertEqual(a.mime_type, 'application/x-tar') | |||
| self.assertEqual(a.encoding, 'binary') | |||
| self.assertEqual(a.compressed_type, 'application/gzip') | |||
| self.assertEqual(a.name, 'foobar') | |||
| @@ -6,3 +6,4 @@ from .cli import _TestMigrations | |||
| from .tags import _TestTagCache | |||
| from .mtree import Test | |||
| from .server import _TestCases, _TestPostConfig | |||
| from .magic_wrap import _TestMagic | |||
| @@ -1,8 +1,23 @@ | |||
| import base64 | |||
| import datetime | |||
| import pasn1 | |||
| import sys | |||
| import uuid | |||
| _real_stderr = sys.stderr | |||
| def _debprint(*args): # pragma: no cover | |||
| import traceback, sys, os.path | |||
| st = traceback.extract_stack(limit=2)[0] | |||
| sep = '' | |||
| if args: | |||
| sep = ':' | |||
| print('%s:%d%s' % (os.path.basename(st.filename), st.lineno, sep), | |||
| *args, file=_real_stderr) | |||
| sys.stderr.flush() | |||
| def _makeuuid(s): | |||
| if isinstance(s, uuid.UUID): | |||
| return s | |||
| @@ -2,13 +2,43 @@ | |||
| # python setup.py --dry-run --verbose install | |||
| import os.path | |||
| from setuptools import setup, find_packages | |||
| import pathlib | |||
| import shutil | |||
| import subprocess | |||
| from setuptools import setup, find_packages, Command, Extension | |||
| from setuptools.command.build_ext import build_ext | |||
| #from medashare.magic_wrap import compile_file | |||
| from distutils.core import setup | |||
| class file_ext(build_ext): | |||
| def __init__(self, dist): | |||
| super().__init__(dist) | |||
| def run(self): | |||
| # do the building | |||
| #print(repr(self.distribution)) | |||
| fnames = [ (x, pathlib.Path(self.build_lib) / 'medashare' / x) for x in self.get_source_files() ] | |||
| oldcwd = os.getcwd() | |||
| for src, dst in fnames: | |||
| os.chdir(oldcwd) | |||
| shutil.copyfile(src, dst) | |||
| os.chdir(dst.parent) | |||
| cmd = [ 'file', '-C', '-m' ] + [ str(dst) for src, dst in fnames ] | |||
| #print('running:', cmd) | |||
| r = subprocess.run(cmd) | |||
| os.chdir(oldcwd) | |||
| r.check_returncode() | |||
| def get_outputs(self): | |||
| return [ '%s.mgc' % i for i in self.get_source_files() ] | |||
| # method build_extension not needed, in run | |||
| setup( | |||
| name='medashare', | |||
| version='0.1.0', | |||
| version='0.1.1', | |||
| author='John-Mark Gurney', | |||
| author_email='jmg@funkthat.com', | |||
| packages=find_packages(), | |||
| @@ -18,6 +48,9 @@ setup( | |||
| #download_url='', | |||
| long_description=open('README.md').read(), | |||
| python_requires='>=3.8', | |||
| # This isn't needed till magic_wrap.py can use it | |||
| #cmdclass=dict(build_ext=file_ext), | |||
| #ext_modules=[ Extension(name='magic', sources=['medashare/magic']) ], | |||
| install_requires=[ | |||
| 'alembic', | |||
| 'base58', | |||
| @@ -30,9 +63,14 @@ setup( | |||
| 'hypercorn', # option, for server only? | |||
| 'orm', | |||
| 'pasn1 @ git+https://www.funkthat.com/gitea/jmg/pasn1.git@c6c64510b42292557ace2b77272eb32cb647399d#egg=pasn1', | |||
| 'python-libarchive @ git+https://www.funkthat.com/gitea/jmg/python-libarchive.git#egg=python-libarchive', | |||
| 'file-magic @ git+https://github.com/file/file.git#egg=file-magic&subdirectory=python', | |||
| 'pydantic[dotenv]', | |||
| ], | |||
| include_package_data=True, | |||
| package_data={ | |||
| 'medashare': [ 'alembic/**/*.py', 'alembic.ini', ], | |||
| }, | |||
| extras_require = { | |||
| # requests needed for fastpi.testclient.TestClient | |||
| 'dev': [ 'coverage', 'requests' ], | |||