| @@ -195,6 +195,86 @@ def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): | |||
| if to_expand: | |||
| return partial(AmbiguousExpander, to_expand, tree_class) | |||
| class AmbiguousIntermediateExpander: | |||
| """ | |||
| Propagate ambiguous intermediate nodes and their derivations up to the | |||
| current rule. | |||
| In general, converts | |||
| rule | |||
| _iambig | |||
| _inter | |||
| someChildren1 | |||
| ... | |||
| _inter | |||
| someChildren2 | |||
| ... | |||
| someChildren3 | |||
| ... | |||
| to | |||
| _ambig | |||
| rule | |||
| someChildren1 | |||
| ... | |||
| someChildren3 | |||
| ... | |||
| rule | |||
| someChildren2 | |||
| ... | |||
| someChildren3 | |||
| ... | |||
| rule | |||
| childrenFromNestedIambigs | |||
| ... | |||
| someChildren3 | |||
| ... | |||
| ... | |||
| propagating up any nested '_iambig' nodes along the way. | |||
| """ | |||
| def __init__(self, tree_class, node_builder): | |||
| self.node_builder = node_builder | |||
| self.tree_class = tree_class | |||
| def __call__(self, children): | |||
| def _is_iambig_tree(child): | |||
| return hasattr(child, 'data') and child.data == '_iambig' | |||
| def _collapse_iambig(children): | |||
| """ | |||
| Recursively flatten the derivations of the parent of an '_iambig' | |||
| node. Returns a list of '_inter' nodes guaranteed not | |||
| to contain any nested '_iambig' nodes, or None if children does | |||
| not contain an '_iambig' node. | |||
| """ | |||
| # Due to the structure of the SPPF, | |||
| # an '_iambig' node can only appear as the first child | |||
| if children and _is_iambig_tree(children[0]): | |||
| iambig_node = children[0] | |||
| result = [] | |||
| for grandchild in iambig_node.children: | |||
| collapsed = _collapse_iambig(grandchild.children) | |||
| if collapsed: | |||
| for child in collapsed: | |||
| child.children += children[1:] | |||
| result += collapsed | |||
| else: | |||
| new_tree = self.tree_class('_inter', grandchild.children + children[1:]) | |||
| result.append(new_tree) | |||
| return result | |||
| collapsed = _collapse_iambig(children) | |||
| if collapsed: | |||
| processed_nodes = [self.node_builder(c.children) for c in collapsed] | |||
| return self.tree_class('_ambig', processed_nodes) | |||
| return self.node_builder(children) | |||
| def ptb_inline_args(func): | |||
| @wraps(func) | |||
| def f(children): | |||
| @@ -239,6 +319,7 @@ class ParseTreeBuilder: | |||
| maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), | |||
| self.propagate_positions and PropagatePositions, | |||
| self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), | |||
| self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) | |||
| ])) | |||
| yield rule, wrapper_chain | |||
| @@ -18,7 +18,7 @@ from ..utils import logger | |||
| from .grammar_analysis import GrammarAnalyzer | |||
| from ..grammar import NonTerminal | |||
| from .earley_common import Item, TransitiveItem | |||
| from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor | |||
| from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, CompleteForestToAmbiguousTreeVisitor | |||
| class Parser: | |||
| def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False): | |||
| @@ -313,7 +313,7 @@ class Parser: | |||
| assert False, 'Earley should not generate multiple start symbol items!' | |||
| # Perform our SPPF -> AST conversion using the right ForestVisitor. | |||
| forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else ForestToAmbiguousTreeVisitor | |||
| forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else CompleteForestToAmbiguousTreeVisitor | |||
| forest_tree_visitor = forest_tree_visitor_cls(self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor()) | |||
| return forest_tree_visitor.visit(solutions[0]) | |||
| @@ -363,6 +363,75 @@ class ForestToAmbiguousTreeVisitor(ForestToTreeVisitor): | |||
| else: | |||
| self.result = result | |||
| class CompleteForestToAmbiguousTreeVisitor(ForestToTreeVisitor): | |||
| """ | |||
| An augmented version of ForestToAmbiguousTreeVisitor that is designed to | |||
| handle ambiguous intermediate nodes as well as ambiguous symbol nodes. | |||
| On the way down: | |||
| - When an ambiguous intermediate node is encountered, an '_iambig' node | |||
| is inserted into the tree. | |||
| - Each possible derivation of an ambiguous intermediate node is represented | |||
| by an '_inter' node added as a child of the corresponding '_iambig' node. | |||
| On the way up, these nodes are propagated up the tree and collapsed | |||
| into a single '_ambig' node for the nearest symbol node ancestor. | |||
| This is achieved by the AmbiguousIntermediateExpander contained in | |||
| the callbacks. | |||
| """ | |||
| def _collapse_ambig(self, children): | |||
| new_children = [] | |||
| for child in children: | |||
| if child.data == '_ambig': | |||
| new_children += child.children | |||
| else: | |||
| new_children.append(child) | |||
| return new_children | |||
| def visit_token_node(self, node): | |||
| self.output_stack[-1].children.append(node) | |||
| def visit_symbol_node_in(self, node): | |||
| if node.is_ambiguous: | |||
| if self.forest_sum_visitor and isinf(node.priority): | |||
| self.forest_sum_visitor.visit(node) | |||
| if node.is_intermediate: | |||
| self.output_stack.append(Tree('_iambig', [])) | |||
| else: | |||
| self.output_stack.append(Tree('_ambig', [])) | |||
| return iter(node.children) | |||
| def visit_symbol_node_out(self, node): | |||
| if node.is_ambiguous: | |||
| result = self.output_stack.pop() | |||
| if not node.is_intermediate: | |||
| result = Tree('_ambig', self._collapse_ambig(result.children)) | |||
| if self.output_stack: | |||
| self.output_stack[-1].children.append(result) | |||
| else: | |||
| self.result = result | |||
| def visit_packed_node_in(self, node): | |||
| if not node.parent.is_intermediate: | |||
| self.output_stack.append(Tree('drv', [])) | |||
| elif node.parent.is_ambiguous: | |||
| self.output_stack.append(Tree('_inter', [])) | |||
| return iter([node.left, node.right]) | |||
| def visit_packed_node_out(self, node): | |||
| if not node.parent.is_intermediate: | |||
| result = self.callbacks[node.rule](self.output_stack.pop().children) | |||
| elif node.parent.is_ambiguous: | |||
| result = self.output_stack.pop() | |||
| else: | |||
| return | |||
| if self.output_stack: | |||
| self.output_stack[-1].children.append(result) | |||
| else: | |||
| self.result = result | |||
| class ForestToPyDotVisitor(ForestVisitor): | |||
| """ | |||
| A Forest visitor which writes the SPPF to a PNG. | |||
| @@ -460,6 +460,221 @@ def _make_full_earley_test(LEXER): | |||
| ]) | |||
| self.assertEqual(res, expected) | |||
| def test_ambiguous_intermediate_node(self): | |||
| grammar = """ | |||
| start: ab bc d? | |||
| !ab: "A" "B"? | |||
| !bc: "B"? "C" | |||
| !d: "D" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
| ambig_tree = l.parse("ABCD") | |||
| expected = { | |||
| Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]), | |||
| Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C']), Tree('d', ['D'])]) | |||
| } | |||
| self.assertEqual(ambig_tree.data, '_ambig') | |||
| self.assertEqual(set(ambig_tree.children), expected) | |||
| def test_ambiguous_symbol_and_intermediate_nodes(self): | |||
| grammar = """ | |||
| start: ab bc cd | |||
| !ab: "A" "B"? | |||
| !bc: "B"? "C"? | |||
| !cd: "C"? "D" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
| ambig_tree = l.parse("ABCD") | |||
| expected = { | |||
| Tree('start', [ | |||
| Tree('ab', ['A', 'B']), | |||
| Tree('bc', ['C']), | |||
| Tree('cd', ['D']) | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A', 'B']), | |||
| Tree('bc', []), | |||
| Tree('cd', ['C', 'D']) | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A']), | |||
| Tree('bc', ['B', 'C']), | |||
| Tree('cd', ['D']) | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A']), | |||
| Tree('bc', ['B']), | |||
| Tree('cd', ['C', 'D']) | |||
| ]), | |||
| } | |||
| self.assertEqual(ambig_tree.data, '_ambig') | |||
| self.assertEqual(set(ambig_tree.children), expected) | |||
| def test_nested_ambiguous_intermediate_nodes(self): | |||
| grammar = """ | |||
| start: ab bc cd e? | |||
| !ab: "A" "B"? | |||
| !bc: "B"? "C"? | |||
| !cd: "C"? "D" | |||
| !e: "E" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
| ambig_tree = l.parse("ABCDE") | |||
| expected = { | |||
| Tree('start', [ | |||
| Tree('ab', ['A', 'B']), | |||
| Tree('bc', ['C']), | |||
| Tree('cd', ['D']), | |||
| Tree('e', ['E']) | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A']), | |||
| Tree('bc', ['B', 'C']), | |||
| Tree('cd', ['D']), | |||
| Tree('e', ['E']) | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A']), | |||
| Tree('bc', ['B']), | |||
| Tree('cd', ['C', 'D']), | |||
| Tree('e', ['E']) | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A', 'B']), | |||
| Tree('bc', []), | |||
| Tree('cd', ['C', 'D']), | |||
| Tree('e', ['E']) | |||
| ]), | |||
| } | |||
| self.assertEqual(ambig_tree.data, '_ambig') | |||
| self.assertEqual(set(ambig_tree.children), expected) | |||
| def test_nested_ambiguous_intermediate_nodes2(self): | |||
| grammar = """ | |||
| start: ab bc cd de f | |||
| !ab: "A" "B"? | |||
| !bc: "B"? "C"? | |||
| !cd: "C"? "D"? | |||
| !de: "D"? "E" | |||
| !f: "F" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
| ambig_tree = l.parse("ABCDEF") | |||
| expected = { | |||
| Tree('start', [ | |||
| Tree('ab', ['A', 'B']), | |||
| Tree('bc', ['C']), | |||
| Tree('cd', ['D']), | |||
| Tree('de', ['E']), | |||
| Tree('f', ['F']), | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A']), | |||
| Tree('bc', ['B', 'C']), | |||
| Tree('cd', ['D']), | |||
| Tree('de', ['E']), | |||
| Tree('f', ['F']), | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A']), | |||
| Tree('bc', ['B']), | |||
| Tree('cd', ['C', 'D']), | |||
| Tree('de', ['E']), | |||
| Tree('f', ['F']), | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A']), | |||
| Tree('bc', ['B']), | |||
| Tree('cd', ['C']), | |||
| Tree('de', ['D', 'E']), | |||
| Tree('f', ['F']), | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A', "B"]), | |||
| Tree('bc', []), | |||
| Tree('cd', ['C']), | |||
| Tree('de', ['D', 'E']), | |||
| Tree('f', ['F']), | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A']), | |||
| Tree('bc', ['B', 'C']), | |||
| Tree('cd', []), | |||
| Tree('de', ['D', 'E']), | |||
| Tree('f', ['F']), | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A', 'B']), | |||
| Tree('bc', []), | |||
| Tree('cd', ['C', 'D']), | |||
| Tree('de', ['E']), | |||
| Tree('f', ['F']), | |||
| ]), | |||
| Tree('start', [ | |||
| Tree('ab', ['A', 'B']), | |||
| Tree('bc', ['C']), | |||
| Tree('cd', []), | |||
| Tree('de', ['D', 'E']), | |||
| Tree('f', ['F']), | |||
| ]), | |||
| } | |||
| self.assertEqual(ambig_tree.data, '_ambig') | |||
| self.assertEqual(set(ambig_tree.children), expected) | |||
| def test_ambiguous_intermediate_node_unnamed_token(self): | |||
| grammar = """ | |||
| start: ab bc "D" | |||
| !ab: "A" "B"? | |||
| !bc: "B"? "C" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
| ambig_tree = l.parse("ABCD") | |||
| expected = { | |||
| Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C'])]), | |||
| Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C'])]) | |||
| } | |||
| self.assertEqual(ambig_tree.data, '_ambig') | |||
| self.assertEqual(set(ambig_tree.children), expected) | |||
| def test_ambiguous_intermediate_node_inlined_rule(self): | |||
| grammar = """ | |||
| start: ab _bc d? | |||
| !ab: "A" "B"? | |||
| _bc: "B"? "C" | |||
| !d: "D" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
| ambig_tree = l.parse("ABCD") | |||
| expected = { | |||
| Tree('start', [Tree('ab', ['A']), Tree('d', ['D'])]), | |||
| Tree('start', [Tree('ab', ['A', 'B']), Tree('d', ['D'])]) | |||
| } | |||
| self.assertEqual(ambig_tree.data, '_ambig') | |||
| self.assertEqual(set(ambig_tree.children), expected) | |||
| def test_ambiguous_intermediate_node_conditionally_inlined_rule(self): | |||
| grammar = """ | |||
| start: ab bc d? | |||
| !ab: "A" "B"? | |||
| !?bc: "B"? "C" | |||
| !d: "D" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
| ambig_tree = l.parse("ABCD") | |||
| expected = { | |||
| Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]), | |||
| Tree('start', [Tree('ab', ['A', 'B']), 'C', Tree('d', ['D'])]) | |||
| } | |||
| self.assertEqual(ambig_tree.data, '_ambig') | |||
| self.assertEqual(set(ambig_tree.children), expected) | |||
| def test_fruitflies_ambig(self): | |||
| grammar = """ | |||
| start: noun verb noun -> simple | |||