| @@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||||
| from .lark import Lark | from .lark import Lark | ||||
| from .utils import inline_args | from .utils import inline_args | ||||
| __version__ = "0.3.4" | |||||
| __version__ = "0.3.5" | |||||
| @@ -28,10 +28,14 @@ class LarkOptions(object): | |||||
| "auto" (default): Choose for me based on grammar and parser | "auto" (default): Choose for me based on grammar and parser | ||||
| ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley" | ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley" | ||||
| "resolve": The parser will automatically choose the simplest derivation | |||||
| "resolve": The parser will automatically choose the simplest derivation | |||||
| (it chooses consistently: greedy for tokens, non-greedy for rules) | (it chooses consistently: greedy for tokens, non-greedy for rules) | ||||
| "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). | "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). | ||||
| earley__all_derivations - If True, try every possible derivation of each rule. If False, pick the first | |||||
| correct derivation. Both will find a solution to every correct grammar & input, | |||||
| but when False, some ambiguities won't appear (Default: True) | |||||
| transformer - Applies the transformer to every parse tree | transformer - Applies the transformer to every parse tree | ||||
| debug - Affects verbosity (default: False) | debug - Affects verbosity (default: False) | ||||
| keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False) | keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False) | ||||
| @@ -57,6 +61,7 @@ class LarkOptions(object): | |||||
| self.profile = o.pop('profile', False) | self.profile = o.pop('profile', False) | ||||
| self.ambiguity = o.pop('ambiguity', 'auto') | self.ambiguity = o.pop('ambiguity', 'auto') | ||||
| self.propagate_positions = o.pop('propagate_positions', False) | self.propagate_positions = o.pop('propagate_positions', False) | ||||
| self.earley__all_derivations = o.pop('earley__all_derivations', True) | |||||
| assert self.parser in ('earley', 'lalr', None) | assert self.parser in ('earley', 'lalr', None) | ||||
| @@ -77,7 +77,8 @@ class Earley_NoLex: | |||||
| self.parser = earley.Parser(rules, | self.parser = earley.Parser(rules, | ||||
| parser_conf.start, | parser_conf.start, | ||||
| parser_conf.callback, | parser_conf.callback, | ||||
| resolve_ambiguity=get_ambiguity_resolver(options)) | |||||
| resolve_ambiguity=get_ambiguity_resolver(options), | |||||
| all_derivations = options.earley__all_derivations if options else True) | |||||
| def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
| for sym in expansion: | for sym in expansion: | ||||
| @@ -100,10 +101,11 @@ class Earley(WithLexer): | |||||
| rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | ||||
| self.parser = earley.Parser(rules, | |||||
| self.parser = earley.Parser(rules, | |||||
| parser_conf.start, | parser_conf.start, | ||||
| parser_conf.callback, | parser_conf.callback, | ||||
| resolve_ambiguity=get_ambiguity_resolver(options)) | |||||
| resolve_ambiguity=get_ambiguity_resolver(options), | |||||
| all_derivations = options.earley__all_derivations if options else True) | |||||
| def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
| return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | ||||
| @@ -51,7 +51,7 @@ class Item(object): | |||||
| def advance(self, tree): | def advance(self, tree): | ||||
| assert self.tree.data == 'drv' | assert self.tree.data == 'drv' | ||||
| new_tree = Derivation(self.rule, self.tree.children + [tree]) | new_tree = Derivation(self.rule, self.tree.children + [tree]) | ||||
| return Item(self.rule, self.ptr+1, self.start, new_tree) | |||||
| return self.__class__(self.rule, self.ptr+1, self.start, new_tree) | |||||
| def similar(self, other): | def similar(self, other): | ||||
| return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | ||||
| @@ -67,6 +67,9 @@ class Item(object): | |||||
| after = list(map(str, self.rule.expansion[self.ptr:])) | after = list(map(str, self.rule.expansion[self.ptr:])) | ||||
| return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after)) | return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after)) | ||||
| class Item_JoinDerivations(Item): | |||||
| __eq__ = Item.similar | |||||
| class NewsList(list): | class NewsList(list): | ||||
| "Keeps track of newly added items (append-only)" | "Keeps track of newly added items (append-only)" | ||||
| @@ -133,10 +136,16 @@ class Column: | |||||
| return bool(self.item_count) | return bool(self.item_count) | ||||
| class Parser: | class Parser: | ||||
| def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None): | |||||
| def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, all_derivations=True): | |||||
| """ | |||||
| all_derivations: | |||||
| True = Try every rule combination, and every possible derivation of each rule. (default) | |||||
| False = Try every rule combination, but not every derivation of the same rule. | |||||
| """ | |||||
| self.analysis = GrammarAnalyzer(rules, start_symbol) | self.analysis = GrammarAnalyzer(rules, start_symbol) | ||||
| self.start_symbol = start_symbol | self.start_symbol = start_symbol | ||||
| self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
| self.all_derivations = all_derivations | |||||
| self.postprocess = {} | self.postprocess = {} | ||||
| self.predictions = {} | self.predictions = {} | ||||
| @@ -150,9 +159,11 @@ class Parser: | |||||
| # Define parser functions | # Define parser functions | ||||
| start_symbol = start_symbol or self.start_symbol | start_symbol = start_symbol or self.start_symbol | ||||
| _Item = Item if self.all_derivations else Item_JoinDerivations | |||||
| def predict(nonterm, column): | def predict(nonterm, column): | ||||
| assert not isinstance(nonterm, Terminal), nonterm | assert not isinstance(nonterm, Terminal), nonterm | ||||
| return [Item(rule, 0, column, None) for rule in self.predictions[nonterm]] | |||||
| return [_Item(rule, 0, column, None) for rule in self.predictions[nonterm]] | |||||
| def complete(item): | def complete(item): | ||||
| name = item.rule.origin | name = item.rule.origin | ||||
| @@ -140,6 +140,35 @@ def _make_full_earley_test(LEXER): | |||||
| self.assertEqual( res.data, '_ambig') | self.assertEqual( res.data, '_ambig') | ||||
| self.assertEqual( len(res.children), 2) | self.assertEqual( len(res.children), 2) | ||||
| def test_ambiguity1(self): | |||||
| grammar = """ | |||||
| start: cd+ "e" | |||||
| !cd: "c" | |||||
| | "d" | |||||
| | "cd" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||||
| x = l.parse('cde') | |||||
| assert x.data == '_ambig', x | |||||
| assert len(x.children) == 2 | |||||
| @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO | |||||
| def test_not_all_derivations(self): | |||||
| grammar = """ | |||||
| start: cd+ "e" | |||||
| !cd: "c" | |||||
| | "d" | |||||
| | "cd" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False) | |||||
| x = l.parse('cde') | |||||
| assert x.data != '_ambig', x | |||||
| assert len(x.children) == 1 | |||||
| _NAME = "TestFullEarley" + (LEXER or 'Scanless').capitalize() | _NAME = "TestFullEarley" + (LEXER or 'Scanless').capitalize() | ||||
| _TestFullEarley.__name__ = _NAME | _TestFullEarley.__name__ = _NAME | ||||
| globals()[_NAME] = _TestFullEarley | globals()[_NAME] = _TestFullEarley | ||||
| @@ -400,6 +429,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertSequenceEqual(x.children, ['HelloWorld']) | self.assertSequenceEqual(x.children, ['HelloWorld']) | ||||
| @unittest.skipIf(LEXER is None, "Known bug with scanless parsing") # TODO | |||||
| def test_token_collision2(self): | def test_token_collision2(self): | ||||
| # NOTE: This test reveals a bug in token reconstruction in Scanless Earley | # NOTE: This test reveals a bug in token reconstruction in Scanless Earley | ||||
| # I probably need to re-write grammar transformation | # I probably need to re-write grammar transformation | ||||
| @@ -625,32 +655,6 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertEqual(len(tree.children), 2) | self.assertEqual(len(tree.children), 2) | ||||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||||
| def test_earley_prioritization(self): | |||||
| "Tests effect of priority on result" | |||||
| grammar = """ | |||||
| start: a | b | |||||
| a.1: "a" | |||||
| b.2: "a" | |||||
| """ | |||||
| # l = Lark(grammar, parser='earley', lexer='standard') | |||||
| l = _Lark(grammar) | |||||
| res = l.parse("a") | |||||
| self.assertEqual(res.children[0].data, 'b') | |||||
| grammar = """ | |||||
| start: a | b | |||||
| a.2: "a" | |||||
| b.1: "a" | |||||
| """ | |||||
| l = _Lark(grammar) | |||||
| # l = Lark(grammar, parser='earley', lexer='standard') | |||||
| res = l.parse("a") | |||||
| self.assertEqual(res.children[0].data, 'a') | |||||
| @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority") | @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority") | ||||
| def test_lexer_prioritization(self): | def test_lexer_prioritization(self): | ||||
| "Tests effect of priority on result" | "Tests effect of priority on result" | ||||
| @@ -680,22 +684,6 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertEqual(res.children, ['ab']) | self.assertEqual(res.children, ['ab']) | ||||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports ambiguity") | |||||
| def test_ambiguity1(self): | |||||
| grammar = """ | |||||
| start: cd+ "e" | |||||
| !cd: "c" | |||||
| | "d" | |||||
| | "cd" | |||||
| """ | |||||
| # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=None) | |||||
| l = _Lark(grammar, ambiguity='explicit') | |||||
| x = l.parse('cde') | |||||
| assert x.data == '_ambig' | |||||
| assert len(x.children) == 2 | |||||
| def test_import(self): | def test_import(self): | ||||
| grammar = """ | grammar = """ | ||||
| @@ -711,6 +699,33 @@ def _make_parser_test(LEXER, PARSER): | |||||
| x = l.parse('12 elephants') | x = l.parse('12 elephants') | ||||
| self.assertEqual(x.children, ['12', 'elephants']) | self.assertEqual(x.children, ['12', 'elephants']) | ||||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||||
| def test_earley_prioritization(self): | |||||
| "Tests effect of priority on result" | |||||
| grammar = """ | |||||
| start: a | b | |||||
| a.1: "a" | |||||
| b.2: "a" | |||||
| """ | |||||
| # l = Lark(grammar, parser='earley', lexer='standard') | |||||
| l = _Lark(grammar) | |||||
| res = l.parse("a") | |||||
| self.assertEqual(res.children[0].data, 'b') | |||||
| grammar = """ | |||||
| start: a | b | |||||
| a.2: "a" | |||||
| b.1: "a" | |||||
| """ | |||||
| l = _Lark(grammar) | |||||
| # l = Lark(grammar, parser='earley', lexer='standard') | |||||
| res = l.parse("a") | |||||
| self.assertEqual(res.children[0].data, 'a') | |||||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | ||||
| def test_earley_prioritization_sum(self): | def test_earley_prioritization_sum(self): | ||||
| "Tests effect of priority on result" | "Tests effect of priority on result" | ||||