| @@ -47,12 +47,12 @@ class TreeToJson(Transformer): | |||
| true = lambda self, _: True | |||
| false = lambda self, _: False | |||
| # json_parser = Lark(json_grammar, parser='earley', lexer='standard') | |||
| # def parse(x): | |||
| # return TreeToJson().transform(json_parser.parse(x)) | |||
| json_parser = Lark(json_grammar, parser='earley', lexer='dynamic') | |||
| def parse(x): | |||
| return TreeToJson().transform(json_parser.parse(x)) | |||
| json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||
| parse = json_parser.parse | |||
| # json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||
| # parse = json_parser.parse | |||
| def test(): | |||
| test_json = ''' | |||
| @@ -130,8 +130,10 @@ class Lark: | |||
| self.options.lexer = 'standard' | |||
| elif self.options.parser == 'earley': | |||
| self.options.lexer = None | |||
| else: | |||
| assert False, self.options.parser | |||
| lexer = self.options.lexer | |||
| assert lexer in ('standard', 'contextual', None) | |||
| assert lexer in ('standard', 'contextual', 'dynamic', None) | |||
| if self.options.ambiguity == 'auto': | |||
| if self.options.parser == 'earley': | |||
| @@ -7,6 +7,8 @@ from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Term | |||
| from .parsers import lalr_parser, old_earley, nearley, earley | |||
| from .tree import Transformer | |||
| from .parsers import xearley | |||
| class WithLexer: | |||
| def __init__(self, lexer_conf): | |||
| self.lexer_conf = lexer_conf | |||
| @@ -171,6 +173,31 @@ class Earley(WithLexer): | |||
| tokens = self.lex(text) | |||
| return self.parser.parse(tokens) | |||
| class XEarley: | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||
| resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||
| self.parser = xearley.Parser(rules, | |||
| parser_conf.start, | |||
| parser_conf.callback, | |||
| resolve_ambiguity=resolve_ambiguity) | |||
| def _prepare_expansion(self, expansion): | |||
| for sym in expansion: | |||
| if is_terminal(sym): | |||
| regexp = self.token_by_name[sym].pattern.to_regexp() | |||
| width = sre_parse.parse(regexp).getwidth() | |||
| yield Terminal_Regexp(regexp) | |||
| else: | |||
| yield sym | |||
| def parse(self, text): | |||
| return self.parser.parse(text) | |||
| def get_frontend(parser, lexer): | |||
| if parser=='lalr': | |||
| if lexer is None: | |||
| @@ -186,6 +213,8 @@ def get_frontend(parser, lexer): | |||
| return Earley_NoLex | |||
| elif lexer=='standard': | |||
| return Earley | |||
| elif lexer=='dynamic': | |||
| return XEarley | |||
| elif lexer=='contextual': | |||
| raise ValueError('The Earley parser does not support the contextual parser') | |||
| else: | |||
| @@ -5,7 +5,7 @@ import logging | |||
| from .test_trees import TestTrees | |||
| # from .test_selectors import TestSelectors | |||
| from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley | |||
| from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley, TestEarleyDynamic | |||
| # from .test_grammars import TestPythonG, TestConfigG | |||
| logging.basicConfig(level=logging.INFO) | |||
| @@ -57,7 +57,7 @@ class TestEarley(unittest.TestCase): | |||
| # or re-processing of already completed rules. | |||
| g = Lark(r"""start: B | |||
| B: ("ab"|/[^b]/)* | |||
| """, lexer=None) | |||
| """, lexer='dynamic') | |||
| self.assertEqual( g.parse('abc').children[0], 'abc') | |||
| @@ -65,7 +65,7 @@ class TestEarley(unittest.TestCase): | |||
| g = Lark("""start: A "b" c | |||
| A: "a"+ | |||
| c: "abc" | |||
| """, parser="earley", lexer=None) | |||
| """, parser="earley", lexer='dynamic') | |||
| x = g.parse('aaaababc') | |||
| def test_earley_scanless2(self): | |||
| @@ -80,7 +80,7 @@ class TestEarley(unittest.TestCase): | |||
| program = """c b r""" | |||
| l = Lark(grammar, parser='earley', lexer=None) | |||
| l = Lark(grammar, parser='earley', lexer='dynamic') | |||
| l.parse(program) | |||
| def test_earley_scanless3(self): | |||
| @@ -91,7 +91,7 @@ class TestEarley(unittest.TestCase): | |||
| A: "a"+ | |||
| """ | |||
| l = Lark(grammar, parser='earley', lexer=None) | |||
| l = Lark(grammar, parser='earley', lexer='dynamic') | |||
| res = l.parse("aaa") | |||
| self.assertEqual(res.children, ['aa', 'a']) | |||
| @@ -101,7 +101,7 @@ class TestEarley(unittest.TestCase): | |||
| A: "a"+ | |||
| """ | |||
| l = Lark(grammar, parser='earley', lexer=None) | |||
| l = Lark(grammar, parser='earley', lexer='dynamic') | |||
| res = l.parse("aaa") | |||
| self.assertEqual(res.children, ['aaa']) | |||
| @@ -114,7 +114,7 @@ class TestEarley(unittest.TestCase): | |||
| empty2: | |||
| """ | |||
| parser = Lark(grammar, parser='earley', lexer=None) | |||
| parser = Lark(grammar, parser='earley', lexer='dynamic') | |||
| res = parser.parse('ab') | |||
| empty_tree = Tree('empty', [Tree('empty2', [])]) | |||
| @@ -130,7 +130,7 @@ class TestEarley(unittest.TestCase): | |||
| ab: "ab" | |||
| """ | |||
| parser = Lark(grammar, parser='earley', lexer=None, ambiguity='explicit') | |||
| parser = Lark(grammar, parser='earley', lexer='dynamic', ambiguity='explicit') | |||
| res = parser.parse('ab') | |||
| self.assertEqual( res.data, '_ambig') | |||
| @@ -146,6 +146,7 @@ def _make_parser_test(LEXER, PARSER): | |||
| b: "b" | |||
| a: "a" | |||
| """) | |||
| r = g.parse('aaabaab') | |||
| self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' ) | |||
| r = g.parse('aaabaaba') | |||
| @@ -583,15 +584,17 @@ def _make_parser_test(LEXER, PARSER): | |||
| _TestParser.__name__ = _NAME | |||
| globals()[_NAME] = _TestParser | |||
| # Note: You still have to import them in __main__ for the tests to run | |||
| _TO_TEST = [ | |||
| ('standard', 'earley'), | |||
| ('dynamic', 'earley'), | |||
| ('standard', 'lalr'), | |||
| ('contextual', 'lalr'), | |||
| (None, 'earley'), | |||
| ] | |||
| for LEXER, PARSER in _TO_TEST: | |||
| _make_parser_test(LEXER, PARSER) | |||
| for _LEXER, _PARSER in _TO_TEST: | |||
| _make_parser_test(_LEXER, _PARSER) | |||
| if __name__ == '__main__': | |||