From 5930e4ba6ff28df59aa2cdc0196373a1c37d1970 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 25 Dec 2020 01:26:41 +0100 Subject: [PATCH 01/10] Added Terminal support for `%override` --- docs/grammar.md | 2 +- lark/load_grammar.py | 25 +++++++++++++++++++++---- tests/test_grammar.py | 18 +++++++++++++++--- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/docs/grammar.md b/docs/grammar.md index b899b3f..d6d4b3b 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -291,7 +291,7 @@ Declare a terminal without defining it. Useful for plugins. ### %override -Override a rule, affecting all the rules that refer to it. +Override a rule or terminals, affecting all references to it, even in imported grammars. Useful for implementing an inheritance pattern when importing grammars. diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 0fafc1c..7383c17 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -149,8 +149,8 @@ RULES = { 'term': ['TERMINAL _COLON expansions _NL', 'TERMINAL _DOT NUMBER _COLON expansions _NL'], - 'statement': ['ignore', 'import', 'declare', 'override_rule'], - 'override_rule': ['_OVERRIDE rule'], + 'statement': ['ignore', 'import', 'declare', 'override'], + 'override': ['_OVERRIDE rule', '_OVERRIDE term'], 'ignore': ['_IGNORE expansions _NL'], 'declare': ['_DECLARE _declare_args _NL'], 'import': ['_IMPORT _import_path _NL', @@ -950,6 +950,7 @@ class GrammarLoader: # Execute statements ignore, imports = [], {} overriding_rules = [] + overriding_terms = [] for (stmt,) in statements: if stmt.data == 'ignore': t ,= stmt.children @@ -998,9 +999,15 @@ class GrammarLoader: elif stmt.data == 'declare': for t in stmt.children: term_defs.append([t.value, (None, None)]) - elif stmt.data == 'override_rule': + elif stmt.data == 'override': r ,= stmt.children - overriding_rules.append(options_from_rule(*r.children)) + if r.data == 'rule': + overriding_rules.append(options_from_rule(*r.children)) + else: + if len(r.children) == 2: + overriding_terms.append((r.children[0].value, (r.children[1], 1))) + else: + overriding_terms.append((r.children[0].value, (r.children[2], int(r.children[1])))) else: assert False, stmt @@ -1022,6 +1029,16 @@ class GrammarLoader: raise GrammarError("Cannot override a nonexisting rule: %s" % name) rule_defs.append(r) + # Same for terminals + for t in overriding_terms: + name = t[0] + # remove overridden rule from rule_defs + overridden, term_defs = classify_bool(term_defs, lambda t: t[0] == name) # FIXME inefficient + if not overridden: + raise GrammarError("Cannot override a nonexisting terminal: %s" % name) + term_defs.append(t) + + ## Handle terminals # Verify correctness 1 diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 3ce76f6..760d563 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -3,7 +3,7 @@ from __future__ import absolute_import import sys from unittest import TestCase, main -from lark import Lark +from lark import Lark, Token from lark.load_grammar import GrammarLoader, GrammarError @@ -21,7 +21,7 @@ class TestGrammar(TestCase): else: assert False, "example did not raise an error" - def test_override(self): + def test_override_rule(self): # Overrides the 'sep' template in existing grammar to add an optional terminating delimiter # Thus extending it beyond its original capacity p = Lark(""" @@ -29,12 +29,24 @@ class TestGrammar(TestCase): %override sep{item, delim}: item (delim item)* delim? %ignore " " - """) + """, source_path=__file__) a = p.parse('[1, 2, 3]') b = p.parse('[1, 2, 3, ]') assert a == b + def test_override_terminal(self): + p = Lark(""" + + %import .grammars.ab (startab, A, B) + + %override A: "C" + %override B: "D" + """, start='startab', source_path=__file__) + + a = p.parse('CD') + self.assertEqual(a.children[0].children, [Token('A', 'C'), Token('B', 'D')]) + if __name__ == '__main__': From be979f2e634655e9840dc19ab29aad0f66f8e473 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 25 Dec 2020 20:11:50 +0100 Subject: [PATCH 02/10] Added %extend for both rules and terminals. --- docs/grammar.md | 19 ++++++++++++++++ lark/load_grammar.py | 53 +++++++++++++++++++++++++++++++++++++++---- tests/test_grammar.py | 28 +++++++++++++++++++---- 3 files changed, 91 insertions(+), 9 deletions(-) diff --git a/docs/grammar.md b/docs/grammar.md index d6d4b3b..0d77420 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -302,3 +302,22 @@ Useful for implementing an inheritance pattern when importing grammars. // Add hex support to my_grammar %override number: NUMBER | /0x\w+/ ``` + +### %extend + +Extend the definition of a rule or terminal, e.g. add a new option on what it can match, like when separated with `|`. + +Useful for splitting up a definition of a complex rule with many different options over multiple files. + +Can also be used to implement a plugin system where a core grammar is extended by others. + + +**Example:** +```perl +%import my_grammar (start, NUMBER) + +// Add hex support to my_grammar +%extend NUMBER: /0x\w+/ +``` + +For both `%extend` and `%override`, there is not requirement for a rule/terminal to come from another file, but that is probably the most common usecase \ No newline at end of file diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 7383c17..0915c33 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -95,6 +95,7 @@ TERMINALS = { '_IGNORE': r'%ignore', '_OVERRIDE': r'%override', '_DECLARE': r'%declare', + '_EXTEND': r'%extend', '_IMPORT': r'%import', 'NUMBER': r'[+-]?\d+', } @@ -149,8 +150,11 @@ RULES = { 'term': ['TERMINAL _COLON expansions _NL', 'TERMINAL _DOT NUMBER _COLON expansions _NL'], - 'statement': ['ignore', 'import', 'declare', 'override'], - 'override': ['_OVERRIDE rule', '_OVERRIDE term'], + 'statement': ['ignore', 'import', 'declare', 'override', 'extend'], + 'override': ['_OVERRIDE rule', + '_OVERRIDE term'], + 'extend': ['_EXTEND rule', + '_EXTEND term'], 'ignore': ['_IGNORE expansions _NL'], 'declare': ['_DECLARE _declare_args _NL'], 'import': ['_IMPORT _import_path _NL', @@ -744,8 +748,8 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): with a 'namespace' prefix, except for those which are aliased. """ - imported_terms = dict(grammar.term_defs) - imported_rules = {n:(n,p,deepcopy(t),o) for n,p,t,o in grammar.rule_defs} + imported_terms = {n: (deepcopy(e), p) for n, (e, p) in grammar.term_defs} + imported_rules = {n: (n, p, deepcopy(t), o) for n, p, t, o in grammar.rule_defs} term_defs = [] rule_defs = [] @@ -858,6 +862,14 @@ def _find_used_symbols(tree): return {t for x in tree.find_data('expansion') for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} +def extend_expansions(tree, new): + assert isinstance(tree, Tree) and tree.data == 'expansions' + assert isinstance(new, Tree) and new.data == 'expansions' + while len(tree.children) == 2: + assert isinstance(tree.children[0], Tree) and tree.children[0].data == 'expansions', tree + tree = tree.children[0] + tree.children.insert(0, new) + class GrammarLoader: ERRORS = [ @@ -951,6 +963,8 @@ class GrammarLoader: ignore, imports = [], {} overriding_rules = [] overriding_terms = [] + extend_rules = [] + extend_terms = [] for (stmt,) in statements: if stmt.data == 'ignore': t ,= stmt.children @@ -1008,6 +1022,15 @@ class GrammarLoader: overriding_terms.append((r.children[0].value, (r.children[1], 1))) else: overriding_terms.append((r.children[0].value, (r.children[2], int(r.children[1])))) + elif stmt.data == 'extend': + r ,= stmt.children + if r.data == 'rule': + extend_rules.append(options_from_rule(*r.children)) + else: + if len(r.children) == 2: + extend_terms.append((r.children[0].value, (r.children[1], 1))) + else: + extend_terms.append((r.children[0].value, (r.children[2], int(r.children[1])))) else: assert False, stmt @@ -1037,8 +1060,30 @@ class GrammarLoader: if not overridden: raise GrammarError("Cannot override a nonexisting terminal: %s" % name) term_defs.append(t) + + # Extend the definition of rules + for r in extend_rules: + name = r[0] + # remove overridden rule from rule_defs + for old in rule_defs: + if old[0] == name: + if len(old[1]) != len(r[1]): + raise GrammarError("Cannot extend templates with different parameters: %s" % name) + extend_expansions(old[2], r[2]) + break + else: + raise GrammarError("Can't extend rule %s as it wasn't defined before" % name) + # Same for terminals + + for name, (e, _) in extend_terms: + for old in term_defs: + if old[0] == name: + extend_expansions(old[1][0], e) + break + else: + raise GrammarError("Can't extend terminal %s as it wasn't defined before" % name) ## Handle terminals # Verify correctness 1 diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 760d563..ad29c75 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -3,7 +3,7 @@ from __future__ import absolute_import import sys from unittest import TestCase, main -from lark import Lark, Token +from lark import Lark, Token, Tree from lark.load_grammar import GrammarLoader, GrammarError @@ -40,12 +40,30 @@ class TestGrammar(TestCase): %import .grammars.ab (startab, A, B) - %override A: "C" - %override B: "D" + %override A: "c" + %override B: "d" """, start='startab', source_path=__file__) - a = p.parse('CD') - self.assertEqual(a.children[0].children, [Token('A', 'C'), Token('B', 'D')]) + a = p.parse('cd') + self.assertEqual(a.children[0].children, [Token('A', 'c'), Token('B', 'd')]) + + def test_extend_rule(self): + p = Lark(""" + %import .grammars.ab (startab, A, B, expr) + + %extend expr: B A + """, start='startab', source_path=__file__) + a = p.parse('abab') + self.assertEqual(a.children[0].children, ['a', Tree('expr', ['b', 'a']), 'b']) + + def test_extend_term(self): + p = Lark(""" + %import .grammars.ab (startab, A, B, expr) + + %extend A: "c" + """, start='startab', source_path=__file__) + a = p.parse('acbb') + self.assertEqual(a.children[0].children, ['a', Tree('expr', ['c', 'b']), 'b']) From 9e545f88253824f0afbdc41aa816599bf624be71 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sat, 26 Dec 2020 00:14:26 +0100 Subject: [PATCH 03/10] Basics for GrammarBuilder --- lark/lark.py | 83 +++++++++-------- lark/load_grammar.py | 212 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 216 insertions(+), 79 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index 3e0a51f..b1b9270 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -7,7 +7,7 @@ import tempfile from warnings import warn from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger -from .load_grammar import load_grammar, FromPackageLoader +from .load_grammar import load_grammar, FromPackageLoader, Grammar from .tree import Tree from .common import LexerConf, ParserConf @@ -234,42 +234,50 @@ class Lark(Serialize): else: grammar = read() - assert isinstance(grammar, STRING_TYPE) - self.source_grammar = grammar - if self.options.use_bytes: - if not isascii(grammar): - raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") - if sys.version_info[0] == 2 and self.options.use_bytes != 'force': - raise ConfigurationError("`use_bytes=True` may have issues on python2." - "Use `use_bytes='force'` to use it at your own risk.") - cache_fn = None - if self.options.cache: - if self.options.parser != 'lalr': - raise ConfigurationError("cache only works with parser='lalr' for now") - if isinstance(self.options.cache, STRING_TYPE): - cache_fn = self.options.cache - else: - if self.options.cache is not True: - raise ConfigurationError("cache argument must be bool or str") - unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') - from . import __version__ - options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) - s = grammar + options_str + __version__ - md5 = hashlib.md5(s.encode()).hexdigest() - cache_fn = tempfile.gettempdir() + '/.lark_cache_%s.tmp' % md5 - - if FS.exists(cache_fn): - logger.debug('Loading grammar from cache: %s', cache_fn) - # Remove options that aren't relevant for loading from cache - for name in (set(options) - _LOAD_ALLOWED_OPTIONS): - del options[name] - with FS.open(cache_fn, 'rb') as f: - try: - self._load(f, **options) - except Exception: - raise RuntimeError("Failed to load Lark from cache: %r. Try to delete the file and run again." % cache_fn) - return + if isinstance(grammar, STRING_TYPE): + self.source_grammar = grammar + if self.options.use_bytes: + if not isascii(grammar): + raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") + if sys.version_info[0] == 2 and self.options.use_bytes != 'force': + raise ConfigurationError("`use_bytes=True` may have issues on python2." + "Use `use_bytes='force'` to use it at your own risk.") + + if self.options.cache: + if self.options.parser != 'lalr': + raise ConfigurationError("cache only works with parser='lalr' for now") + if isinstance(self.options.cache, STRING_TYPE): + cache_fn = self.options.cache + else: + if self.options.cache is not True: + raise ConfigurationError("cache argument must be bool or str") + unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals') + from . import __version__ + options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable) + s = grammar + options_str + __version__ + md5 = hashlib.md5(s.encode()).hexdigest() + cache_fn = tempfile.gettempdir() + '/.lark_cache_%s.tmp' % md5 + + if FS.exists(cache_fn): + logger.debug('Loading grammar from cache: %s', cache_fn) + # Remove options that aren't relevant for loading from cache + for name in (set(options) - _LOAD_ALLOWED_OPTIONS): + del options[name] + with FS.open(cache_fn, 'rb') as f: + try: + self._load(f, **options) + except Exception: + raise RuntimeError("Failed to load Lark from cache: %r. Try to delete the file and run again." % cache_fn) + return + + + # Parse the grammar file and compose the grammars + self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) + else: + assert isinstance(grammar, Grammar) + self.grammar = grammar + if self.options.lexer == 'auto': if self.options.parser == 'lalr': @@ -301,9 +309,6 @@ class Lark(Serialize): if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) - # Parse the grammar file and compose the grammars - self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) - if self.options.postlex is not None: terminals_to_keep = set(self.options.postlex.always_accept) else: diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 0915c33..78f8fee 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -302,15 +302,6 @@ class RuleTreeToText(Transformer): return expansion, alias.value -@inline_args -class CanonizeTree(Transformer_InPlace): - def tokenmods(self, *args): - if len(args) == 1: - return list(args) - tokenmods, value = args - return tokenmods + [value] - - class PrepareAnonTerminals(Transformer_InPlace): """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them""" @@ -871,8 +862,26 @@ def extend_expansions(tree, new): tree.children.insert(0, new) -class GrammarLoader: - ERRORS = [ + +def _grammar_parser(): + try: + return _grammar_parser.cache + except AttributeError: + terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] + + rules = [options_from_rule(name, None, x) for name, x in RULES.items()] + rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) + for r, _p, xs, o in rules for i, x in enumerate(xs)] + callback = ParseTreeBuilder(rules, ST).create_callback() + import re + lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) + parser_conf = ParserConf(rules, callback, ['start']) + lexer_conf.lexer_type = 'standard' + parser_conf.parser_type = 'lalr' + _grammar_parser.cache = ParsingFrontend(lexer_conf, parser_conf, {}) + return _grammar_parser.cache + +_GRAMMAR_ERRORS = [ ('Unclosed parenthesis', ['a: (\n']), ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), @@ -886,21 +895,39 @@ class GrammarLoader: ('%ignore expects a value', ['%ignore %import\n']), ] - def __init__(self, global_keep_all_tokens): - terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] +def _parse_grammar(text, name, start='start'): + try: + return _grammar_parser().parse(text + '\n', start) + except UnexpectedCharacters as e: + context = e.get_context(text) + raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % + (e.line, e.column, name, context)) + except UnexpectedToken as e: + context = e.get_context(text) + error = e.match_examples(_grammar_parser().parse, _GRAMMAR_ERRORS, use_accepts=True) + if error: + raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context)) + elif 'STRING' in e.expected: + raise GrammarError("Expecting a value at line %s column %s\n\n%s" % (e.line, e.column, context)) + raise - rules = [options_from_rule(name, None, x) for name, x in RULES.items()] - rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) - for r, _p, xs, o in rules for i, x in enumerate(xs)] - callback = ParseTreeBuilder(rules, ST).create_callback() - import re - lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) - parser_conf = ParserConf(rules, callback, ['start']) - lexer_conf.lexer_type = 'standard' - parser_conf.parser_type = 'lalr' - self.parser = ParsingFrontend(lexer_conf, parser_conf, {}) - self.canonize_tree = CanonizeTree() +class GrammarLoader: + ERRORS = [ + ('Unclosed parenthesis', ['a: (\n']), + ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), + ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), + ('Illegal name for rules or terminals', ['Aa:\n']), + ('Alias expects lowercase name', ['a: -> "a"\n']), + ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']), + ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']), + ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']), + ('Terminal names cannot contain dots', ['A.B\n']), + ('%import expects a name', ['%import "a"\n']), + ('%ignore expects a value', ['%ignore %import\n']), + ] + + def __init__(self, global_keep_all_tokens=False): self.global_keep_all_tokens = global_keep_all_tokens def import_grammar(self, grammar_path, base_path=None, import_paths=[]): @@ -931,21 +958,7 @@ class GrammarLoader: def load_grammar(self, grammar_text, grammar_name='', import_paths=[]): """Parse grammar_text, verify, and create Grammar object. Display nice messages on error.""" - try: - tree = self.canonize_tree.transform(self.parser.parse(grammar_text+'\n')) - except UnexpectedCharacters as e: - context = e.get_context(grammar_text) - raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % - (e.line, e.column, grammar_name, context)) - except UnexpectedToken as e: - context = e.get_context(grammar_text) - error = e.match_examples(self.parser.parse, self.ERRORS, use_accepts=True) - if error: - raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context)) - elif 'STRING' in e.expected: - raise GrammarError("Expecting a value at line %s column %s\n\n%s" % (e.line, e.column, context)) - raise - + tree = _parse_grammar(grammar_text+'\n', grammar_name) tree = PrepareGrammar().transform(tree) # Extract grammar items @@ -1061,7 +1074,7 @@ class GrammarLoader: raise GrammarError("Cannot override a nonexisting terminal: %s" % name) term_defs.append(t) - # Extend the definition of rules + # Extend the definition of rules by adding new entries to the `expansions` node for r in extend_rules: name = r[0] @@ -1162,5 +1175,124 @@ class GrammarLoader: return Grammar(rule_defs, term_defs, ignore_names) +class GrammarBuilder: + def __init__(self, global_keep_all_tokens=False, import_paths=None): + self.global_keep_all_tokens = global_keep_all_tokens + self.import_paths = import_paths or [] + + self._term_defs = {} + self._rule_defs = {} + self._ignore_names = [] + + def define_term(self, name, exp, priority=1, override=False): + if (name in self._term_defs) ^ override: + if override: + raise GrammarError("Cannot override a nonexisting terminal" % name) + else: + raise GrammarError("Terminal '%s' defined more than once" % name) + if name.startswith('__'): + raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) + self._term_defs[name] = (exp, priority) + + def define_rule(self, name, params, exp, options, override=False): + if (name in self._rule_defs) ^ override: + if override: + raise GrammarError("Cannot override a nonexisting rule: %s" % name) + else: + raise GrammarError("Rule '%s' defined more than once" % name) + if name.startswith('__'): + raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) + self._rule_defs[name] = (params, exp, options) + + def extend_term(self, name, exp, priority=1): + if name not in self._term_defs: + raise GrammarError("Can't extend terminal %s as it wasn't defined before" % name) + old_expansions = self._term_defs[name][0] + extend_expansions(old_expansions, exp) + + def extend_rule(self, name, params, exp, options): + if name not in self._rule_defs: + raise GrammarError("Can't extend rule %s as it wasn't defined before" % name) + if params != self._rule_defs[name][0]: + raise GrammarError("Cannot extend templates with different parameters: %s" % name) + # TODO: think about what to do with RuleOptions + old_expansions = self._rule_defs[name][1] + extend_expansions(old_expansions, exp) + + def ignore(self, exp_or_name): + if isinstance(exp_or_name, str): + self._ignore_names.append(exp_or_name) + else: + assert isinstance(exp_or_name, Tree) + t = exp_or_name + if t.data=='expansions' and len(t.children) == 1: + t2 ,= t.children + if t2.data=='expansion' and len(t2.children) == 1: + item ,= t2.children + if item.data == 'value': + item ,= item.children + if isinstance(item, Token) and item.type == 'TERMINAL': + self._ignore_names.append(item.value) + return + + name = '__IGNORE_%d'% len(self._ignore_names) + self._ignore_names.append(name) + self._term_defs[name] = (t, 1) + + def declare(self, *names): + for name in names: + self.define_term(name, None, None) + + def _unpack_term_def(self, tree): + name = tree.children[0].value + exp = tree.children[-1] + p = int(tree.children[1]) if len(tree.children) == 3 else 1 + return name, exp, p + + def _unpack_rule_def(self, tree): + # FIXME: A little pointless at the moment, but I want to rework this (e.g. move the work from `options_from_rule` to here) + r = options_from_rule(*tree.children) + return r + + def load_grammar(self, grammar_text, grammar_source=""): + tree = _parse_grammar(grammar_text, grammar_source) + for stmt in tree.children: + if stmt.data == 'term': + self.define_term(*self._unpack_term_def(stmt)) + continue + elif stmt.data == 'rule': + self.define_rule(*self._unpack_rule_def(stmt)) + continue + assert stmt.data == 'statement', stmt.data + stmt ,= stmt.children + if stmt.data == 'ignore': + self.ignore(*stmt.children) + elif stmt.data == 'declare': + self.declare(*(t.value for t in stmt.children)) + elif stmt.data == 'override': + r ,= stmt.children + if r.data == 'rule': + self.define_rule(*self._unpack_rule_def(r), override=True) + else: + assert r.data == 'term' + self.define_term(*self._unpack_term_def(r), override=True) + elif stmt.data == 'extend': + r ,= stmt.children + if r.data == 'rule': + self.extend_rule(*self._unpack_rule_def(r)) + else: + assert r.data == 'term' + self.extend_term(*self._unpack_term_def(r)) + else: + assert False, stmt + + def check(self): + pass + + def build(self) -> Grammar: + return Grammar([(n, *r) for n, r in self._rule_defs.items()], + [(n, t) for n, t in self._term_defs], + self._ignore_names) + def load_grammar(grammar, source, import_paths, global_keep_all_tokens): return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) From 225c9e16a51bef87af1260a2b308a3809ac9c75f Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sun, 27 Dec 2020 19:10:12 +0100 Subject: [PATCH 04/10] Added `%import` to GrammarLoader & fixed tests --- lark/load_grammar.py | 293 +++++++++++++++++++++++++++++------------- tests/test_grammar.py | 4 +- tests/test_parser.py | 2 +- 3 files changed, 207 insertions(+), 92 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 78f8fee..750695c 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -881,7 +881,7 @@ def _grammar_parser(): _grammar_parser.cache = ParsingFrontend(lexer_conf, parser_conf, {}) return _grammar_parser.cache -_GRAMMAR_ERRORS = [ +GRAMMAR_ERRORS = [ ('Unclosed parenthesis', ['a: (\n']), ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), @@ -897,14 +897,14 @@ _GRAMMAR_ERRORS = [ def _parse_grammar(text, name, start='start'): try: - return _grammar_parser().parse(text + '\n', start) + return PrepareGrammar().transform(_grammar_parser().parse(text + '\n', start)) except UnexpectedCharacters as e: context = e.get_context(text) raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % (e.line, e.column, name, context)) except UnexpectedToken as e: context = e.get_context(text) - error = e.match_examples(_grammar_parser().parse, _GRAMMAR_ERRORS, use_accepts=True) + error = e.match_examples(_grammar_parser().parse, GRAMMAR_ERRORS, use_accepts=True) if error: raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context)) elif 'STRING' in e.expected: @@ -913,19 +913,6 @@ def _parse_grammar(text, name, start='start'): class GrammarLoader: - ERRORS = [ - ('Unclosed parenthesis', ['a: (\n']), - ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), - ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), - ('Illegal name for rules or terminals', ['Aa:\n']), - ('Alias expects lowercase name', ['a: -> "a"\n']), - ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']), - ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']), - ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']), - ('Terminal names cannot contain dots', ['A.B\n']), - ('%import expects a name', ['%import "a"\n']), - ('%ignore expects a value', ['%ignore %import\n']), - ] def __init__(self, global_keep_all_tokens=False): self.global_keep_all_tokens = global_keep_all_tokens @@ -959,7 +946,7 @@ class GrammarLoader: """Parse grammar_text, verify, and create Grammar object. Display nice messages on error.""" tree = _parse_grammar(grammar_text+'\n', grammar_name) - tree = PrepareGrammar().transform(tree) + # tree = PrepareGrammar().transform(tree) # Extract grammar items defs = classify(tree.children, lambda c: c.data, lambda c: c.children) @@ -1167,7 +1154,7 @@ class GrammarLoader: for sym in _find_used_symbols(expansions): if sym.type == 'TERMINAL': if sym not in terminal_names: - raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name)) + raise GrammarError("Terminal '%s' used but not defined (in rule %s)" % (sym, name)) else: if sym not in rule_names and sym not in params: raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) @@ -1180,52 +1167,63 @@ class GrammarBuilder: self.global_keep_all_tokens = global_keep_all_tokens self.import_paths = import_paths or [] - self._term_defs = {} - self._rule_defs = {} + self._definitions = {} + self._extend = {} + self._override = {} self._ignore_names = [] + self._import_set = {} + + def _is_term(self, name): + return name.isupper() - def define_term(self, name, exp, priority=1, override=False): - if (name in self._term_defs) ^ override: - if override: - raise GrammarError("Cannot override a nonexisting terminal" % name) - else: - raise GrammarError("Terminal '%s' defined more than once" % name) - if name.startswith('__'): - raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) - self._term_defs[name] = (exp, priority) + def _grammer_error(self, msg, name): + low_type = ("rule", "terminal")[self._is_term(name)] + up_type = low_type.title() + raise GrammarError(msg.format(name=name, type=low_type, Type=up_type)) - def define_rule(self, name, params, exp, options, override=False): - if (name in self._rule_defs) ^ override: + def _check_options(self, name, options): + if self._is_term(name): + if options is None: + options = 1 + elif not isinstance(options, int): + raise GrammarError("Terminal require a single int as 'options' (e.g. priority)") + else: + if options is None: + options = RuleOptions() + elif not isinstance(options, RuleOptions): + raise GrammarError("Rules require a RuleOptions instance as 'options'") + if self.global_keep_all_tokens: + options.keep_all_tokens = True + return options + + + def define(self, name, exp, params=(), options=None, override=False): + if (name in self._definitions) ^ override: if override: - raise GrammarError("Cannot override a nonexisting rule: %s" % name) + self._grammer_error("Cannot override a nonexisting {type} {name}", name) else: - raise GrammarError("Rule '%s' defined more than once" % name) + self._grammer_error("{Type} '{name}' defined more than once", name) if name.startswith('__'): - raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) - self._rule_defs[name] = (params, exp, options) - - def extend_term(self, name, exp, priority=1): - if name not in self._term_defs: - raise GrammarError("Can't extend terminal %s as it wasn't defined before" % name) - old_expansions = self._term_defs[name][0] - extend_expansions(old_expansions, exp) - - def extend_rule(self, name, params, exp, options): - if name not in self._rule_defs: - raise GrammarError("Can't extend rule %s as it wasn't defined before" % name) - if params != self._rule_defs[name][0]: - raise GrammarError("Cannot extend templates with different parameters: %s" % name) - # TODO: think about what to do with RuleOptions - old_expansions = self._rule_defs[name][1] + self._grammer_error('Names starting with double-underscore are reserved (Error at {name})', name) + self._definitions[name] = (params, exp, self._check_options(name, options)) + + def extend(self, name, exp, params=(), options=None): + if name not in self._definitions: + self._grammer_error("Can't extend {type} {name} as it wasn't defined before", name) + if tuple(params) != tuple(self._definitions[name][0]): + print(params, self._definitions[name][0]) + self._grammer_error("Cannot extend {type} with different parameters: {name}", name) + # TODO: think about what to do with 'options' + old_expansions = self._definitions[name][1] extend_expansions(old_expansions, exp) - + def ignore(self, exp_or_name): if isinstance(exp_or_name, str): self._ignore_names.append(exp_or_name) else: assert isinstance(exp_or_name, Tree) t = exp_or_name - if t.data=='expansions' and len(t.children) == 1: + if t.data == 'expansions' and len(t.children) == 1: t2 ,= t.children if t2.data=='expansion' and len(t2.children) == 1: item ,= t2.children @@ -1237,62 +1235,179 @@ class GrammarBuilder: name = '__IGNORE_%d'% len(self._ignore_names) self._ignore_names.append(name) - self._term_defs[name] = (t, 1) + self._definitions[name] = ((), t, 1) def declare(self, *names): for name in names: - self.define_term(name, None, None) + self.define(name, None) + # TODO: options/priority gets filled by this. We have to make sure that this doesn't break anything + + def _mangle_exp(self, exp, mangle): + if mangle is None: + return exp + exp = deepcopy(exp) # TODO: is this needed + for t in exp.iter_subtrees(): + for i, c in enumerate(t.children): + if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): + t.children[i] = Token(c.type, mangle(c.value)) + return exp + - def _unpack_term_def(self, tree): - name = tree.children[0].value - exp = tree.children[-1] - p = int(tree.children[1]) if len(tree.children) == 3 else 1 - return name, exp, p + def _unpack_definition(self, tree, mangle): + if tree.data == 'rule': + name, params, exp, opts = options_from_rule(*tree.children) + else: + name = tree.children[0].value + params = () + opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority + exp = tree.children[-1] + if mangle is not None: + params = tuple(mangle(p) for p in params) + name = mangle(name) + exp = self._mangle_exp(exp, mangle) + return name, exp, params, opts - def _unpack_rule_def(self, tree): - # FIXME: A little pointless at the moment, but I want to rework this (e.g. move the work from `options_from_rule` to here) - r = options_from_rule(*tree.children) - return r + def _unpack_import(self, stmt, grammar_name): + if len(stmt.children) > 1: + path_node, arg1 = stmt.children + else: + path_node, = stmt.children + arg1 = None + + if isinstance(arg1, Tree): # Multi import + dotted_path = tuple(path_node.children) + names = arg1.children + aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names + else: # Single import + dotted_path = tuple(path_node.children[:-1]) + name = path_node.children[-1] # Get name from dotted path + aliases = {name.value: (arg1 or name).value} # Aliases if exist + + if path_node.data == 'import_lib': # Import from library + base_path = None + else: # Relative import + if grammar_name == '': # Import relative to script file path if grammar is coded in script + try: + base_file = os.path.abspath(sys.modules['__main__'].__file__) + except AttributeError: + base_file = None + else: + base_file = grammar_name # Import relative to grammar file path if external grammar file + if base_file: + if isinstance(base_file, PackageResource): + base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0]) + else: + base_path = os.path.split(base_file)[0] + else: + base_path = os.path.abspath(os.path.curdir) + + return dotted_path, base_path, aliases - def load_grammar(self, grammar_text, grammar_source=""): - tree = _parse_grammar(grammar_text, grammar_source) + def load_grammar(self, grammar_text, grammar_name="", mangle=None): + tree = _parse_grammar(grammar_text, grammar_name) + imports = {} # imports are collect over the whole file to prevent duplications + actions = [] # Some statements need to be delayed (override and extend) till after imports are handled for stmt in tree.children: - if stmt.data == 'term': - self.define_term(*self._unpack_term_def(stmt)) - continue - elif stmt.data == 'rule': - self.define_rule(*self._unpack_rule_def(stmt)) + if stmt.data in ('term', 'rule'): + self.define(*self._unpack_definition(stmt, mangle)) continue assert stmt.data == 'statement', stmt.data stmt ,= stmt.children - if stmt.data == 'ignore': - self.ignore(*stmt.children) + if stmt.data == 'import': + dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name) + try: + import_base_path, import_aliases = imports[dotted_path] + assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path) + import_aliases.update(aliases) + except KeyError: + imports[dotted_path] = base_path, aliases + elif stmt.data == 'ignore': + # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar + if mangle is None: + self.ignore(*stmt.children) elif stmt.data == 'declare': - self.declare(*(t.value for t in stmt.children)) + if mangle is None: + self.declare(*(t.value for t in stmt.children)) + else: + self.declare(*(mangle(t.value) for t in stmt.children)) elif stmt.data == 'override': r ,= stmt.children - if r.data == 'rule': - self.define_rule(*self._unpack_rule_def(r), override=True) - else: - assert r.data == 'term' - self.define_term(*self._unpack_term_def(r), override=True) + actions.append((self.define, self._unpack_definition(r, mangle)+ (True,))) elif stmt.data == 'extend': r ,= stmt.children - if r.data == 'rule': - self.extend_rule(*self._unpack_rule_def(r)) - else: - assert r.data == 'term' - self.extend_term(*self._unpack_term_def(r)) + actions.append((self.extend, self._unpack_definition(r, mangle))) else: assert False, stmt + + for dotted_path, (base_path, aliases) in imports.items(): + self.do_import(dotted_path, base_path, aliases, mangle) + + for f, args in actions: + f(*args) + def do_import(self, dotted_path, base_path, aliases, base_mangle=None): + mangle = self.get_mangle('__'.join(dotted_path), aliases, base_mangle) + grammar_path = os.path.join(*dotted_path) + EXT + to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader] + for source in to_try: + try: + if callable(source): + joined_path, text = source(base_path, grammar_path) + else: + joined_path = os.path.join(source, grammar_path) + with open(joined_path, encoding='utf8') as f: + text = f.read() + except IOError: + continue + else: + self.load_grammar(text, joined_path, mangle) + break + else: + # Search failed. Make Python throw a nice error. + open(grammar_path, encoding='utf8') + assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) + + def get_mangle(self, prefix, aliases, base_mangle=None): + prefixes = (prefix, prefix.upper()) + def mangle(s): + if s in aliases: + s = aliases[s] + else: + ns = prefixes[self._is_term(s)] + if s[0] == '_': + s = '_%s__%s' % (ns, s[1:]) + else: + s = '%s__%s' % (ns, s) + if base_mangle is not None: + s = base_mangle(s) + return s + return mangle + def check(self): - pass - - def build(self) -> Grammar: - return Grammar([(n, *r) for n, r in self._rule_defs.items()], - [(n, t) for n, t in self._term_defs], - self._ignore_names) + for name, (params, exp, options) in self._definitions.items(): + if self._is_term(name): + assert isinstance(options, int) + if exp is not None: + for sym in _find_used_symbols(exp): + if sym not in self._definitions and sym not in params: + self._grammer_error("{Type} '{name}' used but not defined (in rule %s)" % name, sym.value) + if not set(self._definitions).issuperset(self._ignore_names): + raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) + + def build(self): + self.check() + rule_defs = [] + term_defs = [] + for name, (params, exp, options) in self._definitions.items(): + if self._is_term(name): + term_defs.append((name, (exp, options))) + else: + rule_defs.append((name, params, exp, options)) + resolve_term_references(term_defs) + return Grammar(rule_defs, term_defs, self._ignore_names) def load_grammar(grammar, source, import_paths, global_keep_all_tokens): - return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) + builder = GrammarBuilder(global_keep_all_tokens, import_paths) + builder.load_grammar(grammar, source) + return builder.build() + # return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) diff --git a/tests/test_grammar.py b/tests/test_grammar.py index ad29c75..221fbc0 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -4,7 +4,7 @@ import sys from unittest import TestCase, main from lark import Lark, Token, Tree -from lark.load_grammar import GrammarLoader, GrammarError +from lark.load_grammar import GrammarError, GRAMMAR_ERRORS class TestGrammar(TestCase): @@ -12,7 +12,7 @@ class TestGrammar(TestCase): pass def test_errors(self): - for msg, examples in GrammarLoader.ERRORS: + for msg, examples in GRAMMAR_ERRORS: for example in examples: try: p = Lark(example) diff --git a/tests/test_parser.py b/tests/test_parser.py index 9b011f7..9be348e 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1961,7 +1961,7 @@ def _make_parser_test(LEXER, PARSER): p = _Lark(grammar, import_paths=[custom_loader]) self.assertEqual(p.parse('ab'), - Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) + Tree('start', [Tree('startab', [Tree('ab__expr', [Token('AB__A', 'a'), Token('AB__B', 'b')])])])) grammar = """ start: rule_to_import From 3bf7c6a19a0a6035154a5df7ce907c7a74d6f0f4 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Thu, 31 Dec 2020 15:54:16 +0100 Subject: [PATCH 05/10] Completed `GrammarBuilder.check` --- lark/load_grammar.py | 79 ++++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 750695c..9107e24 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -1168,18 +1168,19 @@ class GrammarBuilder: self.import_paths = import_paths or [] self._definitions = {} - self._extend = {} - self._override = {} self._ignore_names = [] - self._import_set = {} def _is_term(self, name): return name.isupper() - def _grammer_error(self, msg, name): - low_type = ("rule", "terminal")[self._is_term(name)] - up_type = low_type.title() - raise GrammarError(msg.format(name=name, type=low_type, Type=up_type)) + def _grammar_error(self, msg, *names): + args = {} + for i, name in enumerate(names, start=1): + postfix = '' if i == 1 else str(i) + args['name'+ postfix] = name + args['type' + postfix] = lowercase_type = ("rule", "terminal")[self._is_term(name)] + args['Type' + postfix] = lowercase_type.title() + raise GrammarError(msg.format(**args)) def _check_options(self, name, options): if self._is_term(name): @@ -1197,27 +1198,26 @@ class GrammarBuilder: return options - def define(self, name, exp, params=(), options=None, override=False): + def _define(self, name, exp, params=(), options=None, override=False): if (name in self._definitions) ^ override: if override: - self._grammer_error("Cannot override a nonexisting {type} {name}", name) + self._grammar_error("Cannot override a nonexisting {type} {name}", name) else: - self._grammer_error("{Type} '{name}' defined more than once", name) + self._grammar_error("{Type} '{name}' defined more than once", name) if name.startswith('__'): - self._grammer_error('Names starting with double-underscore are reserved (Error at {name})', name) + self._grammar_error('Names starting with double-underscore are reserved (Error at {name})', name) self._definitions[name] = (params, exp, self._check_options(name, options)) - def extend(self, name, exp, params=(), options=None): + def _extend(self, name, exp, params=(), options=None): if name not in self._definitions: - self._grammer_error("Can't extend {type} {name} as it wasn't defined before", name) + self._grammar_error("Can't extend {type} {name} as it wasn't defined before", name) if tuple(params) != tuple(self._definitions[name][0]): - print(params, self._definitions[name][0]) - self._grammer_error("Cannot extend {type} with different parameters: {name}", name) + self._grammar_error("Cannot extend {type} with different parameters: {name}", name) # TODO: think about what to do with 'options' old_expansions = self._definitions[name][1] extend_expansions(old_expansions, exp) - def ignore(self, exp_or_name): + def _ignore(self, exp_or_name): if isinstance(exp_or_name, str): self._ignore_names.append(exp_or_name) else: @@ -1237,10 +1237,9 @@ class GrammarBuilder: self._ignore_names.append(name) self._definitions[name] = ((), t, 1) - def declare(self, *names): + def _declare(self, *names): for name in names: - self.define(name, None) - # TODO: options/priority gets filled by this. We have to make sure that this doesn't break anything + self._define(name, None) def _mangle_exp(self, exp, mangle): if mangle is None: @@ -1309,7 +1308,7 @@ class GrammarBuilder: actions = [] # Some statements need to be delayed (override and extend) till after imports are handled for stmt in tree.children: if stmt.data in ('term', 'rule'): - self.define(*self._unpack_definition(stmt, mangle)) + self._define(*self._unpack_definition(stmt, mangle)) continue assert stmt.data == 'statement', stmt.data stmt ,= stmt.children @@ -1324,18 +1323,18 @@ class GrammarBuilder: elif stmt.data == 'ignore': # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar if mangle is None: - self.ignore(*stmt.children) + self._ignore(*stmt.children) elif stmt.data == 'declare': if mangle is None: - self.declare(*(t.value for t in stmt.children)) + self._declare(*(t.value for t in stmt.children)) else: - self.declare(*(mangle(t.value) for t in stmt.children)) + self._declare(*(mangle(t.value) for t in stmt.children)) elif stmt.data == 'override': r ,= stmt.children - actions.append((self.define, self._unpack_definition(r, mangle)+ (True,))) + actions.append((self._define, self._unpack_definition(r, mangle) + (True,))) elif stmt.data == 'extend': r ,= stmt.children - actions.append((self.extend, self._unpack_definition(r, mangle))) + actions.append((self._extend, self._unpack_definition(r, mangle))) else: assert False, stmt @@ -1387,10 +1386,31 @@ class GrammarBuilder: for name, (params, exp, options) in self._definitions.items(): if self._is_term(name): assert isinstance(options, int) - if exp is not None: - for sym in _find_used_symbols(exp): - if sym not in self._definitions and sym not in params: - self._grammer_error("{Type} '{name}' used but not defined (in rule %s)" % name, sym.value) + + for i, p in enumerate(params): + if p in self._definitions: + raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) + if p in params[:i]: + raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name)) + + if exp is None: # Remaining checks don't work for abstract rules/terminals + continue + + for temp in exp.find_data('template_usage'): + sym = temp.children[0] + args = temp.children[1:] + if sym not in params: + if sym not in self._definitions: + self._grammar_error("Template '%s' used but not defined (in {type} {name})" % sym, name) + if len(args) != len(self._definitions[sym][0]): + expected, actual = len(self._definitions[sym][0]), len(args) + self._grammar_error("Wrong number of template arguments used for {name} " + "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name) + + for sym in _find_used_symbols(exp): + if sym not in self._definitions and sym not in params: + self._grammar_error("{Type} '{name}' used but not defined (in {type2} {name2})", sym, name) + if not set(self._definitions).issuperset(self._ignore_names): raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) @@ -1400,6 +1420,7 @@ class GrammarBuilder: term_defs = [] for name, (params, exp, options) in self._definitions.items(): if self._is_term(name): + assert len(params) == 0 term_defs.append((name, (exp, options))) else: rule_defs.append((name, params, exp, options)) From d676df9b888ead42daffd31c035d95241bff0920 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Thu, 31 Dec 2020 17:46:09 +0100 Subject: [PATCH 06/10] Python2.7 compatibility --- lark/grammars/common.lark | 2 +- lark/load_grammar.py | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lark/grammars/common.lark b/lark/grammars/common.lark index 1158026..d2e86d1 100644 --- a/lark/grammars/common.lark +++ b/lark/grammars/common.lark @@ -55,5 +55,5 @@ NEWLINE: (CR? LF)+ // Comments SH_COMMENT: /#[^\n]*/ CPP_COMMENT: /\/\/[^\n]*/ -C_COMMENT: "/*" /.*?/s "*/" +C_COMMENT: "/*" /(.|\n)*?/ "*/" SQL_COMMENT: /--[^\n]*/ diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 9107e24..5f18a4e 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -6,6 +6,7 @@ from copy import copy, deepcopy from io import open import pkgutil from ast import literal_eval +from numbers import Integral from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start from .lexer import Token, TerminalDef, PatternStr, PatternRE @@ -1177,7 +1178,7 @@ class GrammarBuilder: args = {} for i, name in enumerate(names, start=1): postfix = '' if i == 1 else str(i) - args['name'+ postfix] = name + args['name' + postfix] = name args['type' + postfix] = lowercase_type = ("rule", "terminal")[self._is_term(name)] args['Type' + postfix] = lowercase_type.title() raise GrammarError(msg.format(**args)) @@ -1186,8 +1187,9 @@ class GrammarBuilder: if self._is_term(name): if options is None: options = 1 - elif not isinstance(options, int): - raise GrammarError("Terminal require a single int as 'options' (e.g. priority)") + # if we don't use Integral here, we run into python2.7/python3 problems with long vs int + elif not isinstance(options, Integral): + raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),)) else: if options is None: options = RuleOptions() @@ -1384,9 +1386,6 @@ class GrammarBuilder: def check(self): for name, (params, exp, options) in self._definitions.items(): - if self._is_term(name): - assert isinstance(options, int) - for i, p in enumerate(params): if p in self._definitions: raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) From 9f3ef97bd7983039ae34fd6be3fea76743d6a7cd Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Thu, 31 Dec 2020 17:50:48 +0100 Subject: [PATCH 07/10] removed GrammarLoader --- lark/load_grammar.py | 268 +------------------------------------------ 1 file changed, 6 insertions(+), 262 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 5f18a4e..ff35052 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -854,15 +854,6 @@ def _find_used_symbols(tree): return {t for x in tree.find_data('expansion') for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} -def extend_expansions(tree, new): - assert isinstance(tree, Tree) and tree.data == 'expansions' - assert isinstance(new, Tree) and new.data == 'expansions' - while len(tree.children) == 2: - assert isinstance(tree.children[0], Tree) and tree.children[0].data == 'expansions', tree - tree = tree.children[0] - tree.children.insert(0, new) - - def _grammar_parser(): try: @@ -913,256 +904,6 @@ def _parse_grammar(text, name, start='start'): raise -class GrammarLoader: - - def __init__(self, global_keep_all_tokens=False): - self.global_keep_all_tokens = global_keep_all_tokens - - def import_grammar(self, grammar_path, base_path=None, import_paths=[]): - if grammar_path not in _imported_grammars: - # import_paths take priority over base_path since they should handle relative imports and ignore everything else. - to_try = import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader] - for source in to_try: - try: - if callable(source): - joined_path, text = source(base_path, grammar_path) - else: - joined_path = os.path.join(source, grammar_path) - with open(joined_path, encoding='utf8') as f: - text = f.read() - except IOError: - continue - else: - grammar = self.load_grammar(text, joined_path, import_paths) - _imported_grammars[grammar_path] = grammar - break - else: - # Search failed. Make Python throw a nice error. - open(grammar_path, encoding='utf8') - assert False - - return _imported_grammars[grammar_path] - - def load_grammar(self, grammar_text, grammar_name='', import_paths=[]): - """Parse grammar_text, verify, and create Grammar object. Display nice messages on error.""" - - tree = _parse_grammar(grammar_text+'\n', grammar_name) - # tree = PrepareGrammar().transform(tree) - - # Extract grammar items - defs = classify(tree.children, lambda c: c.data, lambda c: c.children) - term_defs = defs.pop('term', []) - rule_defs = defs.pop('rule', []) - statements = defs.pop('statement', []) - assert not defs - - term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] - term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] - rule_defs = [options_from_rule(*x) for x in rule_defs] - - # Execute statements - ignore, imports = [], {} - overriding_rules = [] - overriding_terms = [] - extend_rules = [] - extend_terms = [] - for (stmt,) in statements: - if stmt.data == 'ignore': - t ,= stmt.children - ignore.append(t) - elif stmt.data == 'import': - if len(stmt.children) > 1: - path_node, arg1 = stmt.children - else: - path_node ,= stmt.children - arg1 = None - - if isinstance(arg1, Tree): # Multi import - dotted_path = tuple(path_node.children) - names = arg1.children - aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names - else: # Single import - dotted_path = tuple(path_node.children[:-1]) - name = path_node.children[-1] # Get name from dotted path - aliases = {name: arg1 or name} # Aliases if exist - - if path_node.data == 'import_lib': # Import from library - base_path = None - else: # Relative import - if grammar_name == '': # Import relative to script file path if grammar is coded in script - try: - base_file = os.path.abspath(sys.modules['__main__'].__file__) - except AttributeError: - base_file = None - else: - base_file = grammar_name # Import relative to grammar file path if external grammar file - if base_file: - if isinstance(base_file, PackageResource): - base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0]) - else: - base_path = os.path.split(base_file)[0] - else: - base_path = os.path.abspath(os.path.curdir) - - try: - import_base_path, import_aliases = imports[dotted_path] - assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path) - import_aliases.update(aliases) - except KeyError: - imports[dotted_path] = base_path, aliases - - elif stmt.data == 'declare': - for t in stmt.children: - term_defs.append([t.value, (None, None)]) - elif stmt.data == 'override': - r ,= stmt.children - if r.data == 'rule': - overriding_rules.append(options_from_rule(*r.children)) - else: - if len(r.children) == 2: - overriding_terms.append((r.children[0].value, (r.children[1], 1))) - else: - overriding_terms.append((r.children[0].value, (r.children[2], int(r.children[1])))) - elif stmt.data == 'extend': - r ,= stmt.children - if r.data == 'rule': - extend_rules.append(options_from_rule(*r.children)) - else: - if len(r.children) == 2: - extend_terms.append((r.children[0].value, (r.children[1], 1))) - else: - extend_terms.append((r.children[0].value, (r.children[2], int(r.children[1])))) - else: - assert False, stmt - - # import grammars - for dotted_path, (base_path, aliases) in imports.items(): - grammar_path = os.path.join(*dotted_path) + EXT - g = self.import_grammar(grammar_path, base_path=base_path, import_paths=import_paths) - new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) - - term_defs += new_td - rule_defs += new_rd - - # replace rules by overridding rules, according to name - for r in overriding_rules: - name = r[0] - # remove overridden rule from rule_defs - overridden, rule_defs = classify_bool(rule_defs, lambda r: r[0] == name) # FIXME inefficient - if not overridden: - raise GrammarError("Cannot override a nonexisting rule: %s" % name) - rule_defs.append(r) - - # Same for terminals - for t in overriding_terms: - name = t[0] - # remove overridden rule from rule_defs - overridden, term_defs = classify_bool(term_defs, lambda t: t[0] == name) # FIXME inefficient - if not overridden: - raise GrammarError("Cannot override a nonexisting terminal: %s" % name) - term_defs.append(t) - - # Extend the definition of rules by adding new entries to the `expansions` node - - for r in extend_rules: - name = r[0] - # remove overridden rule from rule_defs - for old in rule_defs: - if old[0] == name: - if len(old[1]) != len(r[1]): - raise GrammarError("Cannot extend templates with different parameters: %s" % name) - extend_expansions(old[2], r[2]) - break - else: - raise GrammarError("Can't extend rule %s as it wasn't defined before" % name) - - # Same for terminals - - for name, (e, _) in extend_terms: - for old in term_defs: - if old[0] == name: - extend_expansions(old[1][0], e) - break - else: - raise GrammarError("Can't extend terminal %s as it wasn't defined before" % name) - ## Handle terminals - - # Verify correctness 1 - for name, _ in term_defs: - if name.startswith('__'): - raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) - - # Handle ignore tokens - # XXX A slightly hacky solution. Recognition of %ignore TERMINAL as separate comes from the lexer's - # inability to handle duplicate terminals (two names, one value) - ignore_names = [] - for t in ignore: - if t.data=='expansions' and len(t.children) == 1: - t2 ,= t.children - if t2.data=='expansion' and len(t2.children) == 1: - item ,= t2.children - if item.data == 'value': - item ,= item.children - if isinstance(item, Token) and item.type == 'TERMINAL': - ignore_names.append(item.value) - continue - - name = '__IGNORE_%d'% len(ignore_names) - ignore_names.append(name) - term_defs.append((name, (t, 1))) - - # Verify correctness 2 - terminal_names = set() - for name, _ in term_defs: - if name in terminal_names: - raise GrammarError("Terminal '%s' defined more than once" % name) - terminal_names.add(name) - - if set(ignore_names) > terminal_names: - raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names)) - - resolve_term_references(term_defs) - - ## Handle rules - - rule_names = {} - for name, params, _x, option in rule_defs: - # We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders - if self.global_keep_all_tokens: - option.keep_all_tokens = True - - if name.startswith('__'): - raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) - if name in rule_names: - raise GrammarError("Rule '%s' defined more than once" % name) - rule_names[name] = len(params) - - for name, params , expansions, _o in rule_defs: - for i, p in enumerate(params): - if p in rule_names: - raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) - if p in params[:i]: - raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name)) - for temp in expansions.find_data('template_usage'): - sym = temp.children[0] - args = temp.children[1:] - if sym not in params: - if sym not in rule_names: - raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) - if len(args) != rule_names[sym]: - raise GrammarError("Wrong number of template arguments used for %s " - "(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name)) - for sym in _find_used_symbols(expansions): - if sym.type == 'TERMINAL': - if sym not in terminal_names: - raise GrammarError("Terminal '%s' used but not defined (in rule %s)" % (sym, name)) - else: - if sym not in rule_names and sym not in params: - raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) - - return Grammar(rule_defs, term_defs, ignore_names) - - class GrammarBuilder: def __init__(self, global_keep_all_tokens=False, import_paths=None): self.global_keep_all_tokens = global_keep_all_tokens @@ -1216,8 +957,12 @@ class GrammarBuilder: if tuple(params) != tuple(self._definitions[name][0]): self._grammar_error("Cannot extend {type} with different parameters: {name}", name) # TODO: think about what to do with 'options' - old_expansions = self._definitions[name][1] - extend_expansions(old_expansions, exp) + base = self._definitions[name][1] + + while len(base.children) == 2: + assert isinstance(base.children[0], Tree) and base.children[0].data == 'expansions', tree + base = base.children[0] + base.children.insert(0, exp) def _ignore(self, exp_or_name): if isinstance(exp_or_name, str): @@ -1430,4 +1175,3 @@ def load_grammar(grammar, source, import_paths, global_keep_all_tokens): builder = GrammarBuilder(global_keep_all_tokens, import_paths) builder.load_grammar(grammar, source) return builder.build() - # return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) From 5db3003edb17717960feeeab6b0f23ca7984897d Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 1 Jan 2021 14:54:12 +0100 Subject: [PATCH 08/10] Updated stubs --- lark-stubs/__init__.pyi | 1 + lark-stubs/grammar.pyi | 9 +++++++++ lark-stubs/lark.pyi | 4 +++- lark-stubs/load_grammar.pyi | 28 ++++++++++++++++++++++++++++ lark/load_grammar.py | 2 +- 5 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 lark-stubs/grammar.pyi create mode 100644 lark-stubs/load_grammar.pyi diff --git a/lark-stubs/__init__.pyi b/lark-stubs/__init__.pyi index c010a93..c79a6ef 100644 --- a/lark-stubs/__init__.pyi +++ b/lark-stubs/__init__.pyi @@ -4,6 +4,7 @@ from .tree import * from .visitors import * from .exceptions import * from .lexer import * +from .load_grammar import * from .lark import * from logging import Logger as _Logger diff --git a/lark-stubs/grammar.pyi b/lark-stubs/grammar.pyi new file mode 100644 index 0000000..379d7a9 --- /dev/null +++ b/lark-stubs/grammar.pyi @@ -0,0 +1,9 @@ +from typing import Optional, Tuple + + +class RuleOptions: + keep_all_tokens: bool + expand1: bool + priority: int + template_source: Optional[str] + empty_indices: Tuple[bool, ...] \ No newline at end of file diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi index ecbbb09..9246938 100644 --- a/lark-stubs/lark.pyi +++ b/lark-stubs/lark.pyi @@ -8,6 +8,7 @@ from .visitors import Transformer from .lexer import Token, Lexer, TerminalDef from .tree import Tree from .exceptions import UnexpectedInput +from .load_grammar import Grammar _T = TypeVar('_T') @@ -54,13 +55,14 @@ class FromPackageLoader: class Lark: source_path: str source_grammar: str + grammar: Grammar options: LarkOptions lexer: Lexer terminals: List[TerminalDef] def __init__( self, - grammar: Union[str, IO[str]], + grammar: Union[Grammar, str, IO[str]], *, start: Union[None, str, List[str]] = "start", parser: Literal["earley", "lalr", "cyk"] = "auto", diff --git a/lark-stubs/load_grammar.pyi b/lark-stubs/load_grammar.pyi new file mode 100644 index 0000000..cadd657 --- /dev/null +++ b/lark-stubs/load_grammar.pyi @@ -0,0 +1,28 @@ +from typing import List, Tuple, Union, Callable, Dict, Optional + +from lark import Tree +from lark.grammar import RuleOptions + + +class Grammar: + rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] + term_defs: List[Tuple[str, Tuple[Tree, int]]] + ignore: List[str] + + +class GrammarBuilder: + global_keep_all_tokens: bool + import_paths: List[Union[str, Callable]] + + def __init__(self, global_keep_all_tokens=..., import_paths=...): ... + + def load_grammar(self, grammar_text: str, grammar_name: str = ..., mangle: Callable[[str], str] = None): ... + + def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], + base_mangle: Callable[[str], str] = None): ... + + def get_mangle(self, prefix: str, aliases: Dict[str, str], base_mangle: Callable[[str], str] = None): ... + + def check(self): ... + + def build(self) -> Grammar: ... diff --git a/lark/load_grammar.py b/lark/load_grammar.py index ff35052..df4d028 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -960,7 +960,7 @@ class GrammarBuilder: base = self._definitions[name][1] while len(base.children) == 2: - assert isinstance(base.children[0], Tree) and base.children[0].data == 'expansions', tree + assert isinstance(base.children[0], Tree) and base.children[0].data == 'expansions', base base = base.children[0] base.children.insert(0, exp) From 8b4f874d3d99d1381dd196df5ae0a31da19364c3 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 1 Jan 2021 15:44:08 +0100 Subject: [PATCH 09/10] added example grammar_building.py --- examples/advanced/grammar_building.py | 59 +++++++++++++++++++++++++++ lark-stubs/reconstruct.pyi | 4 +- 2 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 examples/advanced/grammar_building.py diff --git a/examples/advanced/grammar_building.py b/examples/advanced/grammar_building.py new file mode 100644 index 0000000..0967045 --- /dev/null +++ b/examples/advanced/grammar_building.py @@ -0,0 +1,59 @@ +from pathlib import Path + +from lark.indenter import Indenter +from lark.lark import Lark +from lark.load_grammar import GrammarBuilder + +MATCH_GRAMMAR = ('match', """ + +%extend compound_stmt: match_stmt + +match_stmt: "match" test ":" cases + +cases: _NEWLINE _INDENT case+ _DEDENT + +case: "case" test ":" suite // test is not quite correct. + +""", ('compound_stmt', 'test', 'suite', '_DEDENT', '_INDENT', '_NEWLINE')) + +EXTENSIONS = (MATCH_GRAMMAR,) + +builder = GrammarBuilder() + +builder.load_grammar((Path(__file__).with_name('python3.lark')).read_text(), 'python3') + +for name, ext_grammar, needed_names in EXTENSIONS: + mangle = builder.get_mangle(name, dict(zip(needed_names, needed_names))) + builder.load_grammar(ext_grammar, name, mangle) + +grammar = builder.build() + + +class PythonIndenter(Indenter): + NL_type = '_NEWLINE' + OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE'] + CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE'] + INDENT_type = '_INDENT' + DEDENT_type = '_DEDENT' + tab_len = 8 + + +parser = Lark(grammar, parser='lalr', start=['single_input', 'file_input', 'eval_input'], postlex=PythonIndenter()) + +tree = parser.parse(r""" + +a = 5 + +def name(n): + match n: + case 1: + print("one") + case 2: + print("two") + case _: + print("number is to big") + +name(a) +""", start='file_input') + +print(tree.pretty()) diff --git a/lark-stubs/reconstruct.pyi b/lark-stubs/reconstruct.pyi index 2220c46..5a4aede 100644 --- a/lark-stubs/reconstruct.pyi +++ b/lark-stubs/reconstruct.pyi @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from typing import List, Dict, Union +from typing import List, Dict, Union, Callable from .lark import Lark from .tree import Tree from .visitors import Transformer_InPlace @@ -30,7 +30,7 @@ class MakeMatchTree: class Reconstructor: - def __init__(self, parser: Lark, term_subs: Dict[str, str] = ...): + def __init__(self, parser: Lark, term_subs: Dict[str, Callable[[str], str]] = ...): ... def reconstruct(self, tree: Tree) -> str: From b6b31548ca5fd0a6528fbb31c0877f00c28303d9 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sun, 3 Jan 2021 14:30:06 +0100 Subject: [PATCH 10/10] Fix for review + Removed unused functions --- lark/load_grammar.py | 66 ++++---------------------------------------- tests/test_parser.py | 2 +- 2 files changed, 6 insertions(+), 62 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index df4d028..58cb7e3 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -542,10 +542,6 @@ class PrepareSymbols(Transformer_InPlace): assert False -def _choice_of_rules(rules): - return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) - - def nr_deepcopy_tree(t): """Deepcopy tree `t` without recursion""" return Transformer_NonRecursive(False).transform(t) @@ -732,58 +728,6 @@ class FromPackageLoader(object): stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) -_imported_grammars = {} - - -def import_from_grammar_into_namespace(grammar, namespace, aliases): - """Returns all rules and terminals of grammar, prepended - with a 'namespace' prefix, except for those which are aliased. - """ - - imported_terms = {n: (deepcopy(e), p) for n, (e, p) in grammar.term_defs} - imported_rules = {n: (n, p, deepcopy(t), o) for n, p, t, o in grammar.rule_defs} - - term_defs = [] - rule_defs = [] - - def rule_dependencies(symbol): - if symbol.type != 'RULE': - return [] - try: - _, params, tree,_ = imported_rules[symbol] - except KeyError: - raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) - return _find_used_symbols(tree) - set(params) - - def get_namespace_name(name, params): - if params is not None: - try: - return params[name] - except KeyError: - pass - try: - return aliases[name].value - except KeyError: - if name[0] == '_': - return '_%s__%s' % (namespace, name[1:]) - return '%s__%s' % (namespace, name) - - to_import = list(bfs(aliases, rule_dependencies)) - for symbol in to_import: - if symbol.type == 'TERMINAL': - term_defs.append([get_namespace_name(symbol, None), imported_terms[symbol]]) - else: - assert symbol.type == 'RULE' - _, params, tree, options = imported_rules[symbol] - params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s') % (namespace, p) for p in params} - for t in tree.iter_subtrees(): - for i, c in enumerate(t.children): - if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): - t.children[i] = Token(c.type, get_namespace_name(c, params_map)) - params = [params_map[p] for p in params] # We can not rely on ordered dictionaries - rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) - - return term_defs, rule_defs def resolve_term_references(term_defs): @@ -913,7 +857,9 @@ class GrammarBuilder: self._ignore_names = [] def _is_term(self, name): - return name.isupper() + # Imported terminals are of the form `Path__to__Grammar__file__TERMINAL_NAME` + # Only the last part is the actual name, and the rest might contain mixed case + return name.rpartition('__')[-1].isupper() def _grammar_error(self, msg, *names): args = {} @@ -1114,16 +1060,14 @@ class GrammarBuilder: assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) def get_mangle(self, prefix, aliases, base_mangle=None): - prefixes = (prefix, prefix.upper()) def mangle(s): if s in aliases: s = aliases[s] else: - ns = prefixes[self._is_term(s)] if s[0] == '_': - s = '_%s__%s' % (ns, s[1:]) + s = '_%s__%s' % (prefix, s[1:]) else: - s = '%s__%s' % (ns, s) + s = '%s__%s' % (prefix, s) if base_mangle is not None: s = base_mangle(s) return s diff --git a/tests/test_parser.py b/tests/test_parser.py index 9be348e..9b011f7 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1961,7 +1961,7 @@ def _make_parser_test(LEXER, PARSER): p = _Lark(grammar, import_paths=[custom_loader]) self.assertEqual(p.parse('ab'), - Tree('start', [Tree('startab', [Tree('ab__expr', [Token('AB__A', 'a'), Token('AB__B', 'b')])])])) + Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])])) grammar = """ start: rule_to_import