| @@ -140,7 +140,7 @@ class Lark: | |||
| self.options.ambiguity = 'resolve' | |||
| else: | |||
| assert self.options.parser == 'earley', "Only Earley supports disambiguation right now" | |||
| assert self.options.ambiguity in ('resolve', 'explicit', 'auto') | |||
| assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum') | |||
| # Parse the grammar file and compose the grammars (TODO) | |||
| self.grammar = load_grammar(grammar, source) | |||
| @@ -50,7 +50,9 @@ class LALR_ContextualLexer: | |||
| def get_ambiguity_resolver(options): | |||
| if not options or options.ambiguity == 'resolve': | |||
| return resolve_ambig.resolve_ambig | |||
| return resolve_ambig.standard_resolve_ambig | |||
| elif options.ambiguity == 'resolve__antiscore_sum': | |||
| return resolve_ambig.antiscore_sum_resolve_ambig | |||
| elif options.ambiguity == 'explicit': | |||
| return None | |||
| raise ValueError(options) | |||
| @@ -3,6 +3,11 @@ from functools import cmp_to_key | |||
| from ..tree import Tree, Visitor_NoRecurse | |||
| # Standard ambiguity resolver (uses comparison) | |||
| # | |||
| # Author: Erez Sh | |||
| def _compare_rules(rule1, rule2): | |||
| if rule1.origin != rule2.origin: | |||
| if rule1.options and rule2.options: | |||
| @@ -31,9 +36,9 @@ def _compare_drv(tree1, tree2): | |||
| # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | |||
| # computationally inefficient. So we handle it here. | |||
| if tree1.data == '_ambig': | |||
| _resolve_ambig(tree1) | |||
| _standard_resolve_ambig(tree1) | |||
| if tree2.data == '_ambig': | |||
| _resolve_ambig(tree2) | |||
| _standard_resolve_ambig(tree2) | |||
| c = _compare_rules(tree1.rule, tree2.rule) | |||
| if c: | |||
| @@ -48,21 +53,54 @@ def _compare_drv(tree1, tree2): | |||
| return compare(len(tree1.children), len(tree2.children)) | |||
| def _resolve_ambig(tree): | |||
| def _standard_resolve_ambig(tree): | |||
| assert tree.data == '_ambig' | |||
| best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||
| assert best.data == 'drv' | |||
| tree.set('drv', best.children) | |||
| tree.rule = best.rule # needed for applying callbacks | |||
| assert tree.data != '_ambig' | |||
| def standard_resolve_ambig(tree): | |||
| for ambig in tree.find_data('_ambig'): | |||
| _standard_resolve_ambig(ambig) | |||
| return tree | |||
| class ResolveAmbig(Visitor_NoRecurse): | |||
| def _ambig(self, tree): | |||
| _resolve_ambig(tree) | |||
| # Anti-score Sum | |||
| # | |||
| # Author: Uriva (https://github.com/uriva) | |||
| def _antiscore_sum_drv(tree): | |||
| if not isinstance(tree, Tree): | |||
| return 0 | |||
| # XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, | |||
| # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | |||
| # computationally inefficient. So we handle it here. | |||
| if tree.data == '_ambig': | |||
| _antiscore_sum_resolve_ambig(tree) | |||
| try: | |||
| priority = tree.rule.options.priority | |||
| except AttributeError: | |||
| # Probably trees that don't take part in this parse (better way to distinguish?) | |||
| priority = None | |||
| return (priority or 0) + sum(map(_antiscore_sum_drv, tree.children), 0) | |||
| def _antiscore_sum_resolve_ambig(tree): | |||
| assert tree.data == '_ambig' | |||
| best = min(tree.children, key=_antiscore_sum_drv) | |||
| assert best.data == 'drv' | |||
| tree.set('drv', best.children) | |||
| tree.rule = best.rule # needed for applying callbacks | |||
| def antiscore_sum_resolve_ambig(tree): | |||
| for ambig in tree.find_data('_ambig'): | |||
| _antiscore_sum_resolve_ambig(ambig) | |||
| def resolve_ambig(tree): | |||
| ResolveAmbig().visit(tree) | |||
| return tree | |||
| @@ -1,3 +1,8 @@ | |||
| try: | |||
| from future_builtins import filter | |||
| except ImportError: | |||
| pass | |||
| from copy import deepcopy | |||
| from .utils import inline_args | |||
| @@ -44,13 +49,7 @@ class Tree(object): | |||
| return hash((self.data, tuple(self.children))) | |||
| def find_pred(self, pred): | |||
| if pred(self): | |||
| yield self | |||
| for c in self.children: | |||
| if isinstance(c, Tree): | |||
| for t in c.find_pred(pred): | |||
| yield t | |||
| return filter(pred, self.iter_subtrees()) | |||
| def find_data(self, data): | |||
| return self.find_pred(lambda t: t.data == data) | |||
| @@ -650,6 +650,62 @@ def _make_parser_test(LEXER, PARSER): | |||
| self.assertEqual(res.children[0].data, 'a') | |||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
| def test_earley_prioritization_sum(self): | |||
| "Tests effect of priority on result" | |||
| grammar = """ | |||
| start: ab_ b_ a_ | indirection | |||
| indirection: a_ bb_ a_ | |||
| a_: "a" | |||
| b_: "b" | |||
| ab_: "ab" | |||
| bb_.1: "bb" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
| res = l.parse('abba') | |||
| self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||
| grammar = """ | |||
| start: ab_ b_ a_ | indirection | |||
| indirection: a_ bb_ a_ | |||
| a_: "a" | |||
| b_: "b" | |||
| ab_.1: "ab" | |||
| bb_: "bb" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
| res = l.parse('abba') | |||
| self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||
| grammar = """ | |||
| start: ab_ b_ a_ | indirection | |||
| indirection: a_ bb_ a_ | |||
| a_.2: "a" | |||
| b_.1: "b" | |||
| ab_.3: "ab" | |||
| bb_.3: "bb" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
| res = l.parse('abba') | |||
| self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||
| grammar = """ | |||
| start: ab_ b_ a_ | indirection | |||
| indirection: a_ bb_ a_ | |||
| a_.1: "a" | |||
| b_.1: "b" | |||
| ab_.4: "ab" | |||
| bb_.3: "bb" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') | |||
| res = l.parse('abba') | |||
| self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||
| _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | |||
| _TestParser.__name__ = _NAME | |||