| @@ -140,7 +140,7 @@ class Lark: | |||||
| self.options.ambiguity = 'resolve' | self.options.ambiguity = 'resolve' | ||||
| else: | else: | ||||
| assert self.options.parser == 'earley', "Only Earley supports disambiguation right now" | assert self.options.parser == 'earley', "Only Earley supports disambiguation right now" | ||||
| assert self.options.ambiguity in ('resolve', 'explicit', 'auto') | |||||
| assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum') | |||||
| # Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
| self.grammar = load_grammar(grammar, source) | self.grammar = load_grammar(grammar, source) | ||||
| @@ -50,7 +50,9 @@ class LALR_ContextualLexer: | |||||
| def get_ambiguity_resolver(options): | def get_ambiguity_resolver(options): | ||||
| if not options or options.ambiguity == 'resolve': | if not options or options.ambiguity == 'resolve': | ||||
| return resolve_ambig.resolve_ambig | |||||
| return resolve_ambig.standard_resolve_ambig | |||||
| elif options.ambiguity == 'resolve__antiscore_sum': | |||||
| return resolve_ambig.antiscore_sum_resolve_ambig | |||||
| elif options.ambiguity == 'explicit': | elif options.ambiguity == 'explicit': | ||||
| return None | return None | ||||
| raise ValueError(options) | raise ValueError(options) | ||||
| @@ -3,6 +3,11 @@ from functools import cmp_to_key | |||||
| from ..tree import Tree, Visitor_NoRecurse | from ..tree import Tree, Visitor_NoRecurse | ||||
| # Standard ambiguity resolver (uses comparison) | |||||
| # | |||||
| # Author: Erez Sh | |||||
| def _compare_rules(rule1, rule2): | def _compare_rules(rule1, rule2): | ||||
| if rule1.origin != rule2.origin: | if rule1.origin != rule2.origin: | ||||
| if rule1.options and rule2.options: | if rule1.options and rule2.options: | ||||
| @@ -31,9 +36,9 @@ def _compare_drv(tree1, tree2): | |||||
| # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | ||||
| # computationally inefficient. So we handle it here. | # computationally inefficient. So we handle it here. | ||||
| if tree1.data == '_ambig': | if tree1.data == '_ambig': | ||||
| _resolve_ambig(tree1) | |||||
| _standard_resolve_ambig(tree1) | |||||
| if tree2.data == '_ambig': | if tree2.data == '_ambig': | ||||
| _resolve_ambig(tree2) | |||||
| _standard_resolve_ambig(tree2) | |||||
| c = _compare_rules(tree1.rule, tree2.rule) | c = _compare_rules(tree1.rule, tree2.rule) | ||||
| if c: | if c: | ||||
| @@ -48,21 +53,54 @@ def _compare_drv(tree1, tree2): | |||||
| return compare(len(tree1.children), len(tree2.children)) | return compare(len(tree1.children), len(tree2.children)) | ||||
| def _resolve_ambig(tree): | |||||
| def _standard_resolve_ambig(tree): | |||||
| assert tree.data == '_ambig' | assert tree.data == '_ambig' | ||||
| best = min(tree.children, key=cmp_to_key(_compare_drv)) | best = min(tree.children, key=cmp_to_key(_compare_drv)) | ||||
| assert best.data == 'drv' | assert best.data == 'drv' | ||||
| tree.set('drv', best.children) | tree.set('drv', best.children) | ||||
| tree.rule = best.rule # needed for applying callbacks | tree.rule = best.rule # needed for applying callbacks | ||||
| assert tree.data != '_ambig' | |||||
| def standard_resolve_ambig(tree): | |||||
| for ambig in tree.find_data('_ambig'): | |||||
| _standard_resolve_ambig(ambig) | |||||
| return tree | |||||
| class ResolveAmbig(Visitor_NoRecurse): | |||||
| def _ambig(self, tree): | |||||
| _resolve_ambig(tree) | |||||
| # Anti-score Sum | |||||
| # | |||||
| # Author: Uriva (https://github.com/uriva) | |||||
| def _antiscore_sum_drv(tree): | |||||
| if not isinstance(tree, Tree): | |||||
| return 0 | |||||
| # XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, | |||||
| # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | |||||
| # computationally inefficient. So we handle it here. | |||||
| if tree.data == '_ambig': | |||||
| _antiscore_sum_resolve_ambig(tree) | |||||
| try: | |||||
| priority = tree.rule.options.priority | |||||
| except AttributeError: | |||||
| # Probably trees that don't take part in this parse (better way to distinguish?) | |||||
| priority = None | |||||
| return (priority or 0) + sum(map(_antiscore_sum_drv, tree.children), 0) | |||||
| def _antiscore_sum_resolve_ambig(tree): | |||||
| assert tree.data == '_ambig' | |||||
| best = min(tree.children, key=_antiscore_sum_drv) | |||||
| assert best.data == 'drv' | |||||
| tree.set('drv', best.children) | |||||
| tree.rule = best.rule # needed for applying callbacks | |||||
| def antiscore_sum_resolve_ambig(tree): | |||||
| for ambig in tree.find_data('_ambig'): | |||||
| _antiscore_sum_resolve_ambig(ambig) | |||||
| def resolve_ambig(tree): | |||||
| ResolveAmbig().visit(tree) | |||||
| return tree | return tree | ||||
| @@ -1,3 +1,8 @@ | |||||
| try: | |||||
| from future_builtins import filter | |||||
| except ImportError: | |||||
| pass | |||||
| from copy import deepcopy | from copy import deepcopy | ||||
| from .utils import inline_args | from .utils import inline_args | ||||
| @@ -44,13 +49,7 @@ class Tree(object): | |||||
| return hash((self.data, tuple(self.children))) | return hash((self.data, tuple(self.children))) | ||||
| def find_pred(self, pred): | def find_pred(self, pred): | ||||
| if pred(self): | |||||
| yield self | |||||
| for c in self.children: | |||||
| if isinstance(c, Tree): | |||||
| for t in c.find_pred(pred): | |||||
| yield t | |||||
| return filter(pred, self.iter_subtrees()) | |||||
| def find_data(self, data): | def find_data(self, data): | ||||
| return self.find_pred(lambda t: t.data == data) | return self.find_pred(lambda t: t.data == data) | ||||
| @@ -650,6 +650,62 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertEqual(res.children[0].data, 'a') | self.assertEqual(res.children[0].data, 'a') | ||||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||||
| def test_earley_prioritization_sum(self): | |||||
| "Tests effect of priority on result" | |||||
| grammar = """ | |||||
| start: ab_ b_ a_ | indirection | |||||
| indirection: a_ bb_ a_ | |||||
| a_: "a" | |||||
| b_: "b" | |||||
| ab_: "ab" | |||||
| bb_.1: "bb" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
| res = l.parse('abba') | |||||
| self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||||
| grammar = """ | |||||
| start: ab_ b_ a_ | indirection | |||||
| indirection: a_ bb_ a_ | |||||
| a_: "a" | |||||
| b_: "b" | |||||
| ab_.1: "ab" | |||||
| bb_: "bb" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
| res = l.parse('abba') | |||||
| self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||||
| grammar = """ | |||||
| start: ab_ b_ a_ | indirection | |||||
| indirection: a_ bb_ a_ | |||||
| a_.2: "a" | |||||
| b_.1: "b" | |||||
| ab_.3: "ab" | |||||
| bb_.3: "bb" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
| res = l.parse('abba') | |||||
| self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||||
| grammar = """ | |||||
| start: ab_ b_ a_ | indirection | |||||
| indirection: a_ bb_ a_ | |||||
| a_.1: "a" | |||||
| b_.1: "b" | |||||
| ab_.4: "ab" | |||||
| bb_.3: "bb" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||||
| res = l.parse('abba') | |||||
| self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||||
| _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | ||||
| _TestParser.__name__ = _NAME | _TestParser.__name__ = _NAME | ||||