From 009cc105907987a1afab22b769685eb7c89b0e82 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sun, 27 Sep 2020 16:03:39 +0200 Subject: [PATCH] Added `FromPackageLoader` and `open_from_package` --- lark-stubs/lark.pyi | 12 +++++++ lark/lark.py | 38 +++++++++++++++++++--- lark/load_grammar.py | 75 ++++++++++++++++++++++++++++++++------------ tests/test_parser.py | 26 +++++++++++---- 4 files changed, 120 insertions(+), 31 deletions(-) diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi index deb8849..5cb94b2 100644 --- a/lark-stubs/lark.pyi +++ b/lark-stubs/lark.pyi @@ -33,6 +33,13 @@ class LarkOptions: g_regex_flags: int use_bytes: bool import_sources: List[Union[str, Callable[[str, str], str]]] + source: Optional[str] + + +class FromPackageLoader: + def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... + + def __call__(self, base_paths: List[str], grammar_path: str) -> Tuple[str, str]: ... class Lark: @@ -62,6 +69,7 @@ class Lark: g_regex_flags: int = ..., use_bytes: bool = False, import_sources: List[Union[str, Callable[[List[str], str], Tuple[str, str]]]] = ..., + source: Optional[str], ): ... @@ -71,6 +79,10 @@ class Lark: @classmethod def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: ... + + @classmethod + def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T: + ... def lex(self, text: str) -> Iterator[Token]: ... diff --git a/lark/lark.py b/lark/lark.py index 9877b00..9f53841 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -5,7 +5,7 @@ from io import open from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger -from .load_grammar import load_grammar +from .load_grammar import load_grammar, FromPackageLoader from .tree import Tree from .common import LexerConf, ParserConf @@ -92,6 +92,8 @@ class LarkOptions(Serialize): A callback for editing the terminals before parse. import_sources A List of either paths or loader functions to specify from where grammars are imported + source + Override the source of from where the grammar was loaded. Usefull for relative imports and unconventional grammar loading **=== End Options ===** """ @@ -118,6 +120,7 @@ class LarkOptions(Serialize): 'g_regex_flags': 0, 'use_bytes': False, 'import_sources': [], + 'source': None, } def __init__(self, options_dict): @@ -193,10 +196,13 @@ class Lark(Serialize): re_module = re # Some, but not all file-like objects have a 'name' attribute - try: - self.source = grammar.name - except AttributeError: - self.source = '' + if self.options.source is None: + try: + self.source = grammar.name + except AttributeError: + self.source = '' + else: + self.source = self.options.source # Drain file-like objects to get their contents try: @@ -404,6 +410,28 @@ class Lark(Serialize): grammar_filename = os.path.join(basepath, grammar_filename) with open(grammar_filename, encoding='utf8') as f: return cls(f, **options) + + @classmethod + def open_from_package(cls, package, grammar_path, search_paths=("",), **options): + """Create an instance of Lark with the grammar loaded from within the package `package`. + This allows grammar loading from zipapps. + + Will also create a `FromPackageLoader` instance and add it to the `import_sources` to simplify importing + + ``search_paths`` is passed to `FromPackageLoader` + + Example: + + Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...) + """ + package = FromPackageLoader(package, search_paths) + full_path, text = package([], grammar_path) + options.setdefault('source', full_path) + if 'import_sources' in options: + options['import_sources'].append(package) + else: + options['import_sources'] = [package] + return cls(text, **options) def __repr__(self): return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 8849f76..ba1f0f2 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -4,6 +4,7 @@ import os.path import sys from copy import copy, deepcopy from io import open +import pkgutil from .utils import bfs, eval_escaping, Py36, logger, classify_bool from .lexer import Token, TerminalDef, PatternStr, PatternRE @@ -648,35 +649,69 @@ class Grammar: return terminals, compiled_rules, self.ignore -def stdlib_loader(base_paths, grammar_path): - import pkgutil - for path in IMPORT_PATHS: - text = pkgutil.get_data('lark', path + '/' + grammar_path) - if text is None: - continue - return '', text.decode() - raise FileNotFoundError() +class FromPackageLoader(object): + """ + Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. + This allows them to be compatible even from within zip files. + + Relative imports are handled, so you can just freely use them. + + pkg_name: The name of the package. You can probably provide `__name__` most of the time + search_paths: All the path that will be search on absolute imports. + """ + def __init__(self, pkg_name, search_paths=("", )): + self.pkg_name = pkg_name + self.search_paths = search_paths + + def __repr__(self): + return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) + + def __call__(self, base_paths, grammar_path): + if len(base_paths) == 0: + to_try = self.search_paths + else: + assert len(base_paths) == 1 + if not base_paths[0].startswith('<%s:' % (self.pkg_name,)): + # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway + raise IOError() + base_path = base_paths[0].partition(':')[2] + if base_path and base_path[0] == '/': + base_path = base_path[1:] + to_try = [base_path] + for path in to_try: + full_path = os.path.join(path, grammar_path) + text = None + with suppress(IOError): + text = pkgutil.get_data(self.pkg_name, full_path) + if text is None: + continue + return '<%s:/%s>' % (self.pkg_name, full_path), text.decode() + raise IOError() + +stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) _imported_grammars = {} -def import_grammar(grammar_path, re_, base_paths=(), import_sources=()): +def import_grammar(grammar_path, re_, base_paths=[], import_sources=[]): if grammar_path not in _imported_grammars: - import_paths = import_sources + base_paths + [stdlib_loader] + # import_sources take priority over base_paths since they should handle relative imports and ignore everthing else. + # Question: should the stdlib_loader really be pushed to the end? + import_paths = import_sources + base_paths + [stdlib_loader] for source in import_paths: - if callable(source): - with suppress(IOError): + text = None + with suppress(IOError): + if callable(source): joined_path, text = source(base_paths, grammar_path) - grammar = load_grammar(text, joined_path, re_, import_sources) - _imported_grammars[grammar_path] = grammar - break - else: - with suppress(IOError): + else: joined_path = os.path.join(source, grammar_path) with open(joined_path, encoding='utf8') as f: text = f.read() - grammar = load_grammar(text, joined_path, re_, import_sources) - _imported_grammars[grammar_path] = grammar - break + if text is not None: + # Don't load the grammar from within the suppress statement. Otherwise the underlying error message will be swallowed + # and the wrong file will be reported as missing + grammar = load_grammar(text, joined_path, re_, import_sources) + _imported_grammars[grammar_path] = grammar + break else: open(grammar_path, encoding='utf8') assert False diff --git a/tests/test_parser.py b/tests/test_parser.py index 6779f64..0406f46 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -11,6 +11,7 @@ from copy import copy, deepcopy from lark.utils import Py36, isascii from lark import Token +from lark.load_grammar import FromPackageLoader try: from cStringIO import StringIO as cStringIO @@ -1783,12 +1784,7 @@ def _make_parser_test(LEXER, PARSER): self.assertRaises(IOError, _Lark, grammar) def test_import_custom_sources(self): - def custom_loader(base_paths, grammar_path): - import pkgutil - text = pkgutil.get_data('tests', 'grammars/' + grammar_path) - if text is None: - raise FileNotFoundError() - return '', text.decode() + custom_loader = FromPackageLoader('tests', ('grammars', )) grammar = """ start: startab @@ -1800,6 +1796,24 @@ def _make_parser_test(LEXER, PARSER): self.assertEqual(p.parse('ab'), Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) + grammar = """ + start: rule_to_import + + %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import + """ + p = _Lark(grammar, import_sources=[custom_loader]) + x = p.parse('N') + self.assertEqual(next(x.find_data('rule_to_import')).children, ['N']) + + custom_loader2 = FromPackageLoader('tests') + grammar = """ + %import .test_relative_import (start, WS) + %ignore WS + """ + p = _Lark(grammar, import_sources=[custom_loader2]) + x = p.parse('12 capybaras') + self.assertEqual(x.children, ['12', 'capybaras']) + @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") def test_earley_prioritization(self): "Tests effect of priority on result"