Added resolve__antiscore_sum

8 years ago · 5d41371fb3
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -140,7 +140,7 @@ class Lark:
                self.options.ambiguity = 'resolve'
        else:
            assert self.options.parser == 'earley', "Only Earley supports disambiguation right now"
        assert self.options.ambiguity in ('resolve', 'explicit', 'auto')
        assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum')

        # Parse the grammar file and compose the grammars (TODO)
        self.grammar = load_grammar(grammar, source)
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -50,7 +50,9 @@ class LALR_ContextualLexer:

 def get_ambiguity_resolver(options):
    if not options or options.ambiguity == 'resolve':
        return resolve_ambig.resolve_ambig
        return resolve_ambig.standard_resolve_ambig
    elif options.ambiguity == 'resolve__antiscore_sum':
        return resolve_ambig.antiscore_sum_resolve_ambig
    elif options.ambiguity == 'explicit':
        return None
    raise ValueError(options)
--- a/lark/parsers/resolve_ambig.py
+++ b/lark/parsers/resolve_ambig.py
@@ -3,6 +3,11 @@ from functools import cmp_to_key

 from ..tree import Tree, Visitor_NoRecurse


 # Standard ambiguity resolver (uses comparison)
 #
 # Author: Erez Sh

 def _compare_rules(rule1, rule2):
    if rule1.origin != rule2.origin:
        if rule1.options and rule2.options:
@@ -31,9 +36,9 @@ def _compare_drv(tree1, tree2):
    #     when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be
    #     computationally inefficient. So we handle it here.
    if tree1.data == '_ambig':
        _resolve_ambig(tree1)
        _standard_resolve_ambig(tree1)
    if tree2.data == '_ambig':
        _resolve_ambig(tree2)
        _standard_resolve_ambig(tree2)

    c = _compare_rules(tree1.rule, tree2.rule)
    if c:
@@ -48,21 +53,54 @@ def _compare_drv(tree1, tree2):
    return compare(len(tree1.children), len(tree2.children))


 def _resolve_ambig(tree):
 def _standard_resolve_ambig(tree):
    assert tree.data == '_ambig'

    best = min(tree.children, key=cmp_to_key(_compare_drv))
    assert best.data == 'drv'
    tree.set('drv', best.children)
    tree.rule = best.rule   # needed for applying callbacks

    assert tree.data != '_ambig'
 def standard_resolve_ambig(tree):
    for ambig in tree.find_data('_ambig'):
        _standard_resolve_ambig(ambig)

    return tree



 class ResolveAmbig(Visitor_NoRecurse):
    def _ambig(self, tree):
        _resolve_ambig(tree)

 # Anti-score Sum
 #
 # Author: Uriva (https://github.com/uriva)

 def _antiscore_sum_drv(tree):
    if not isinstance(tree, Tree):
        return 0

    # XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse,
    #     when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be
    #     computationally inefficient. So we handle it here.
    if tree.data == '_ambig':
        _antiscore_sum_resolve_ambig(tree)

    try:
        priority = tree.rule.options.priority
    except AttributeError:
        # Probably trees that don't take part in this parse (better way to distinguish?)
        priority = None

    return (priority or 0) + sum(map(_antiscore_sum_drv, tree.children), 0)

 def _antiscore_sum_resolve_ambig(tree):
    assert tree.data == '_ambig'

    best = min(tree.children, key=_antiscore_sum_drv)
    assert best.data == 'drv'
    tree.set('drv', best.children)
    tree.rule = best.rule   # needed for applying callbacks

 def antiscore_sum_resolve_ambig(tree):
    for ambig in tree.find_data('_ambig'):
        _antiscore_sum_resolve_ambig(ambig)

 def resolve_ambig(tree):
    ResolveAmbig().visit(tree)
    return tree
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -1,3 +1,8 @@
 try:
    from future_builtins import filter
 except ImportError:
    pass

 from copy import deepcopy

 from .utils import inline_args
@@ -44,13 +49,7 @@ class Tree(object):
        return hash((self.data, tuple(self.children)))

    def find_pred(self, pred):
        if pred(self):
            yield self

        for c in self.children:
            if isinstance(c, Tree):
                for t in c.find_pred(pred):
                    yield t
        return filter(pred, self.iter_subtrees())

    def find_data(self, data):
        return self.find_pred(lambda t: t.data == data)
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -650,6 +650,62 @@ def _make_parser_test(LEXER, PARSER):
            self.assertEqual(res.children[0].data, 'a')


        @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
        def test_earley_prioritization_sum(self):
            "Tests effect of priority on result"

            grammar = """
            start: ab_ b_ a_ | indirection
            indirection: a_ bb_ a_
            a_: "a"
            b_: "b"
            ab_: "ab"
            bb_.1: "bb"
            """

            l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

            grammar = """
            start: ab_ b_ a_ | indirection
            indirection: a_ bb_ a_
            a_: "a"
            b_: "b"
            ab_.1: "ab"
            bb_: "bb"
            """

            l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'indirection')

            grammar = """
            start: ab_ b_ a_ | indirection
            indirection: a_ bb_ a_
            a_.2: "a"
            b_.1: "b"
            ab_.3: "ab"
            bb_.3: "bb"
            """

            l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

            grammar = """
            start: ab_ b_ a_ | indirection
            indirection: a_ bb_ a_
            a_.1: "a"
            b_.1: "b"
            ab_.4: "ab"
            bb_.3: "bb"
            """

            l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'indirection')


    _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize()
    _TestParser.__name__ = _NAME