Import python modules by their hash.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

278 lines
7.5 KiB

  1. # Copyright 2020 John-Mark Gurney.
  2. # All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions
  6. # are met:
  7. # 1. Redistributions of source code must retain the above copyright
  8. # notice, this list of conditions and the following disclaimer.
  9. # 2. Redistributions in binary form must reproduce the above copyright
  10. # notice, this list of conditions and the following disclaimer in the
  11. # documentation and/or other materials provided with the distribution.
  12. #
  13. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  14. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  16. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  17. # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  18. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  19. # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  20. # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  21. # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  22. # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  23. # SUCH DAMAGE.
  24. import contextlib
  25. import glob
  26. import hashlib
  27. import importlib
  28. import os.path
  29. import pathlib
  30. import shutil
  31. import sys
  32. import tempfile
  33. from importlib.abc import MetaPathFinder, Loader
  34. from importlib.machinery import ModuleSpec
  35. @contextlib.contextmanager
  36. def tempset(obj, key, value):
  37. '''A context (with) manager for changing the value of an item in a
  38. dictionary, and restoring it after the with block.
  39. Example usage:
  40. ```
  41. d = dict(a=5, b=10)
  42. with tempset(d, 'a', 15):
  43. print(repr(d['a'])
  44. print(repr(d['a'])
  45. ```
  46. '''
  47. try:
  48. oldvalue = obj[key]
  49. obj[key] = value
  50. yield
  51. finally:
  52. obj[key] = oldvalue
  53. class FileDirCAS(object):
  54. '''A file loader for CAS that operates on a directory. It looks
  55. at files, caches their hash, and loads them upon request.'''
  56. def __init__(self, path):
  57. self._path = pathlib.Path(path)
  58. self._hashes = {}
  59. def refresh_dir(self):
  60. '''Internal method to refresh the internal cache of
  61. hashes.'''
  62. for i in glob.glob(os.path.join(self._path, '*.py')):
  63. _, hash = self.read_hash_file(i)
  64. self._hashes[hash] = i
  65. @staticmethod
  66. def read_hash_file(fname):
  67. '''Helper function that will read the file at fname, and
  68. return the tuple of it's contents and it's hash.'''
  69. with open(fname, 'rb') as fp:
  70. data = fp.read()
  71. hash = hashlib.sha256(data).hexdigest()
  72. return data, hash
  73. def is_package(self, hash):
  74. '''Decode the provided hash, and decide if it's a package
  75. or not.'''
  76. return False
  77. def exec_module(self, hash, module):
  78. '''Give the hash and module, load the code associated
  79. with the hash, and exec it in the module's context.'''
  80. self.refresh_dir()
  81. parts = hash.split('_', 2)
  82. fname = self._hashes[parts[2]]
  83. data, fhash = self.read_hash_file(fname)
  84. if fhash != parts[2]:
  85. raise ValueError('file no longer matches hash on disk')
  86. exec(data, module.__dict__)
  87. class CASFinder(MetaPathFinder, Loader):
  88. '''Overall class for using Content Addressable Storage to load
  89. Python modules into your code. It contains code to dispatch to
  90. the various loaders to attempt to load the hash.'''
  91. def __init__(self):
  92. self._loaders = []
  93. if [ x for x in sys.meta_path if isinstance(x, self.__class__) ]:
  94. raise RuntimeError('cannot register more than on CASFinder')
  95. sys.meta_path.append(self)
  96. def __enter__(self):
  97. return self
  98. def __exit__(self, exc_type, exc_value, traceback):
  99. self.disconnect()
  100. def disconnect(self):
  101. '''Disconnect this Finder from being used to load modules.
  102. As this claims an entire namespace, only the first loaded
  103. one will work, and any others will be hidden until the
  104. first one is disconnected.
  105. This can be used w/ a with block to automatically
  106. disconnect when no longer needed. This is mostly useful
  107. for testing.'''
  108. try:
  109. sys.meta_path.remove(self)
  110. except ValueError:
  111. pass
  112. def register(self, loader):
  113. '''Register a loader w/ this finder. This will attempt
  114. to load the hash passed to it. It is also (currently)
  115. responsible for executing the code in the module.'''
  116. self._loaders.append(loader)
  117. # MetaPathFinder methods
  118. def find_spec(self, fullname, path, target=None):
  119. if path is None:
  120. ms = ModuleSpec(fullname, self, is_package=True)
  121. else:
  122. parts = fullname.split('.')
  123. for l in self._loaders:
  124. ispkg = l.is_package(parts[1])
  125. break
  126. else:
  127. return None
  128. ms = ModuleSpec(fullname, self, is_package=True, loader_state=(parts[1], l))
  129. return ms
  130. def invalidate_caches(self):
  131. return None
  132. # Loader methods
  133. def exec_module(self, module):
  134. if module.__name__ == 'cas':
  135. pass
  136. else:
  137. hash, load = module.__spec__.loader_state
  138. load.exec_module(hash, module)
  139. def defaultinit(casf):
  140. cachedir = pathlib.Path.home() / '.casimport_cache'
  141. cachedir.mkdir(exist_ok=True)
  142. casf.register(FileDirCAS(cachedir))
  143. # The global version
  144. _casfinder = CASFinder()
  145. defaultinit(_casfinder)
  146. import unittest
  147. class Test(unittest.TestCase):
  148. def setUp(self):
  149. # clear out the default casfinder if there is one
  150. self.old_meta_path = sys.meta_path
  151. sys.meta_path = [ x for x in sys.meta_path if not isinstance(x, CASFinder) ]
  152. # setup temporary directory
  153. d = pathlib.Path(os.path.realpath(tempfile.mkdtemp()))
  154. self.basetempdir = d
  155. self.tempdir = d / 'subdir'
  156. self.tempdir.mkdir()
  157. self.fixtures = pathlib.Path(__file__).parent.parent / 'fixtures'
  158. def tearDown(self):
  159. # restore environment
  160. sys.meta_path = self.old_meta_path
  161. importlib.invalidate_caches()
  162. # clean up sys.modules
  163. [ sys.modules.pop(x) for x in list(sys.modules.keys()) if
  164. x == 'cas' or x.startswith('cas.') ]
  165. shutil.rmtree(self.basetempdir)
  166. self.tempdir = None
  167. def test_filedircas_limit_refresh(self):
  168. # XXX - only refresh when the dir has changed, and each
  169. # file has changed
  170. pass
  171. def test_casimport(self):
  172. # That a CASFinder
  173. f = CASFinder()
  174. # make sure that we can't import anything at first
  175. with self.assertRaises(ImportError):
  176. import cas.v1_f_2398472398
  177. # when registering the fixtures directory
  178. f.register(FileDirCAS(self.fixtures))
  179. # can import the function
  180. from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello
  181. name = 'Olof'
  182. # and run the code
  183. self.assertEqual(hello(name), 'hello ' + name)
  184. # and when finished, can disconnect
  185. f.disconnect()
  186. # and is no longer in the meta_path
  187. self.assertNotIn(f, sys.meta_path)
  188. # and when disconnected as second time, nothing happens
  189. f.disconnect()
  190. def test_defaultinit(self):
  191. temphome = self.tempdir / 'home'
  192. temphome.mkdir()
  193. cachedir = temphome / '.casimport_cache'
  194. with tempset(os.environ, 'HOME', str(temphome)):
  195. with CASFinder() as f:
  196. # Setup the defaults
  197. defaultinit(f)
  198. # that the cache got created
  199. self.assertTrue(cachedir.is_dir())
  200. # and that when hello.py is copied to the cache
  201. shutil.copy(self.fixtures / 'hello.py', cachedir)
  202. # it can be imported
  203. from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello
  204. with CASFinder() as f:
  205. defaultinit(f)
  206. # and that a new CASFinder can still find it
  207. from cas.v1_f_330884aa2febb5e19fb7194ec6a69ed11dd3d77122f1a5175ee93e73cf0161c3 import hello
  208. def test_multiplecas(self):
  209. # that once we have one
  210. with CASFinder() as f:
  211. # if we try to create a second, it fails
  212. self.assertRaises(RuntimeError, CASFinder)