| @@ -0,0 +1,217 @@ | |||
| #!/usr/bin/env python | |||
| import hashlib | |||
| import pasn1 | |||
| import os.path | |||
| import shutil | |||
| import string | |||
| import tempfile | |||
| import unittest | |||
| import uuid | |||
| _validhashes = set([ 'sha256', 'sha512' ]) | |||
| _hashlengths = { len(getattr(hashlib, x)().hexdigest()): x for x in _validhashes } | |||
| # XXX - add validation | |||
| class ObjWrap(object): | |||
| '''This is a simple wrapper that turns a JSON object into a pythonesc | |||
| object where attribute accesses work.''' | |||
| def __init__(self, obj): | |||
| self._obj = obj | |||
| def __getattr__(self, k): | |||
| return self._obj[k] | |||
| def __getitem__(self, k): | |||
| return self._obj[k] | |||
| def __to_dict__(self): | |||
| return self._obj | |||
| def __eq__(self, o): | |||
| return cmp(self._obj, o) == 0 | |||
| def _trytodict(o): | |||
| try: | |||
| return 'dict', o.__to_dict__() | |||
| except Exception: | |||
| raise TypeError('unable to find __to_dict__ on %s' % type(o)) | |||
| _asn1coder = pasn1.ASN1DictCoder(coerce=_trytodict) | |||
| class ObjectStore(object): | |||
| '''A container to store for the various Metadata objects.''' | |||
| def __init__(self): | |||
| self._uuids = {} | |||
| self._hashes = {} | |||
| @staticmethod | |||
| def makehash(hashstr, strict=True): | |||
| '''Take a hash string, and return a valid hash string from it. | |||
| This makes sure that it is of the correct type and length. | |||
| If strict is False, the function will detect the length and | |||
| return a valid hash if one can be found.''' | |||
| try: | |||
| hash, value = hashstr.split(':') | |||
| except ValueError: | |||
| if strict: | |||
| raise | |||
| hash = _hashlengths[len(hashstr)] | |||
| value = hashstr | |||
| if strict and len(str(value).translate(None, string.hexdigits.lower())) != 0: | |||
| raise ValueError('value has invalid hex digits (must be lower case)', value) | |||
| if hash in _validhashes: | |||
| return ':'.join((hash, value)) | |||
| raise ValueError | |||
| def __len__(self): | |||
| return len(self._uuids) | |||
| def store(self, fname): | |||
| '''Write out the objects in the store to the file named | |||
| fname.''' | |||
| with open(fname, 'w') as fp: | |||
| fp.write(_asn1coder.dumps(self._uuids.values())) | |||
| def loadobj(self, obj): | |||
| '''Load obj into the data store.''' | |||
| if not isinstance(obj, ObjWrap): | |||
| obj = ObjWrap(obj) | |||
| id = uuid.UUID(obj.uuid) | |||
| self._uuids[id] = obj | |||
| for j in obj.hashes: | |||
| h = self.makehash(j) | |||
| self._hashes.setdefault(h, []).append(obj) | |||
| def load(self, fname): | |||
| '''Load objects from the provided file name. | |||
| Basic validation will be done on the objects in the file. | |||
| The objects will be accessible via other methods.''' | |||
| with open(fname) as fp: | |||
| objs = _asn1coder.loads(fp.read()) | |||
| for i in objs: | |||
| self.loadobj(i) | |||
| def by_id(self, id): | |||
| '''Look up an object by it's UUID.''' | |||
| uid = uuid.UUID(id) | |||
| return self._uuids[uid] | |||
| def by_hash(self, hash): | |||
| '''Look up an object by it's hash value.''' | |||
| h = self.makehash(hash, strict=False) | |||
| return self._hashes[h] | |||
| class FileObject(object): | |||
| def __init__(self, _dir, filename): | |||
| self._dir = _dir | |||
| self._fname = filename | |||
| @property | |||
| def filename(self): | |||
| '''The name of the file.''' | |||
| return self._fname | |||
| @property | |||
| def dir(self): | |||
| '''The directory of the file.''' | |||
| return self._dir | |||
| @property | |||
| def id(self): | |||
| '''The UUID of the path to this file.''' | |||
| # XXX make sure this is correct | |||
| return uuid.uuid5(uuid.NAMESPACE_URL, 'someurl' + '/'.join(os.path.split(self._dir) + ( self._fname, ))) | |||
| def enumeratedir(_dir): | |||
| '''Enumerate all the files and directories (not recursive) in _dir. | |||
| Returned is a list of FileObjects.''' | |||
| return map(lambda x: FileObject(_dir, x), os.listdir(_dir)) | |||
| class _TestCases(unittest.TestCase): | |||
| def setUp(self): | |||
| d = tempfile.mkdtemp() | |||
| self.basetempdir = d | |||
| self.tempdir = os.path.join(d, 'subdir') | |||
| shutil.copytree(os.path.join('fixtures', 'testfiles'), | |||
| self.tempdir) | |||
| def tearDown(self): | |||
| shutil.rmtree(self.basetempdir) | |||
| self.tempdir = None | |||
| def test_makehash(self): | |||
| self.assertRaises(ValueError, ObjectStore.makehash, 'slkj') | |||
| self.assertRaises(ValueError, ObjectStore.makehash, 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ADA') | |||
| self.assertEqual(ObjectStore.makehash('cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e', strict=False), 'sha512:cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e') | |||
| self.assertEqual(ObjectStore.makehash('e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', strict=False), 'sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855') | |||
| def test_enumeratedir(self): | |||
| files = enumeratedir(self.tempdir) | |||
| ftest = files[0] | |||
| fname = 'test.txt' | |||
| self.assertEqual(ftest.filename, fname) | |||
| self.assertEqual(ftest.dir, self.tempdir) | |||
| self.assertEqual(ftest.id, uuid.uuid5(uuid.NAMESPACE_URL, | |||
| 'someurl' + '/'.join(os.path.split(self.tempdir) + | |||
| ( fname, )))) | |||
| def test_objectstore(self): | |||
| objst = ObjectStore() | |||
| objst.load(os.path.join('fixtures', 'sample.data.pasn1')) | |||
| objst.loadobj({ | |||
| 'type': 'metadata', | |||
| 'uuid': 'c9a1d1e2-3109-4efd-8948-577dc15e44e7', | |||
| 'hashes': [ 'sha256:91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada' ], | |||
| 'lang': 'en', | |||
| }) | |||
| lst = objst.by_hash('91751cee0a1ab8414400238a761411daa29643ab4b8243e9a91649e25be53ada') | |||
| self.assertEqual(len(lst), 2) | |||
| byid = objst.by_id('3e466e06-45de-4ecc-84ba-2d2a3d970e96') | |||
| self.assertIn(byid, lst) | |||
| r = byid | |||
| self.assertEqual(r.uuid, '3e466e06-45de-4ecc-84ba-2d2a3d970e96') | |||
| self.assertEqual(r['dc:author'], 'John-Mark Gurney') | |||
| objst.store('testfile.pasn1') | |||
| with open('testfile.pasn1') as fp: | |||
| objs = _asn1coder.loads(fp.read()) | |||
| self.assertEqual(len(objs), len(objst)) | |||
| for i in objs: | |||
| self.assertEqual(objst.by_id(i['uuid']), i) | |||