| @@ -195,6 +195,86 @@ def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): | |||||
| if to_expand: | if to_expand: | ||||
| return partial(AmbiguousExpander, to_expand, tree_class) | return partial(AmbiguousExpander, to_expand, tree_class) | ||||
| class AmbiguousIntermediateExpander: | |||||
| """ | |||||
| Propagate ambiguous intermediate nodes and their derivations up to the | |||||
| current rule. | |||||
| In general, converts | |||||
| rule | |||||
| _iambig | |||||
| _inter | |||||
| someChildren1 | |||||
| ... | |||||
| _inter | |||||
| someChildren2 | |||||
| ... | |||||
| someChildren3 | |||||
| ... | |||||
| to | |||||
| _ambig | |||||
| rule | |||||
| someChildren1 | |||||
| ... | |||||
| someChildren3 | |||||
| ... | |||||
| rule | |||||
| someChildren2 | |||||
| ... | |||||
| someChildren3 | |||||
| ... | |||||
| rule | |||||
| childrenFromNestedIambigs | |||||
| ... | |||||
| someChildren3 | |||||
| ... | |||||
| ... | |||||
| propagating up any nested '_iambig' nodes along the way. | |||||
| """ | |||||
| def __init__(self, tree_class, node_builder): | |||||
| self.node_builder = node_builder | |||||
| self.tree_class = tree_class | |||||
| def __call__(self, children): | |||||
| def _is_iambig_tree(child): | |||||
| return hasattr(child, 'data') and child.data == '_iambig' | |||||
| def _collapse_iambig(children): | |||||
| """ | |||||
| Recursively flatten the derivations of the parent of an '_iambig' | |||||
| node. Returns a list of '_inter' nodes guaranteed not | |||||
| to contain any nested '_iambig' nodes, or None if children does | |||||
| not contain an '_iambig' node. | |||||
| """ | |||||
| # Due to the structure of the SPPF, | |||||
| # an '_iambig' node can only appear as the first child | |||||
| if children and _is_iambig_tree(children[0]): | |||||
| iambig_node = children[0] | |||||
| result = [] | |||||
| for grandchild in iambig_node.children: | |||||
| collapsed = _collapse_iambig(grandchild.children) | |||||
| if collapsed: | |||||
| for child in collapsed: | |||||
| child.children += children[1:] | |||||
| result += collapsed | |||||
| else: | |||||
| new_tree = self.tree_class('_inter', grandchild.children + children[1:]) | |||||
| result.append(new_tree) | |||||
| return result | |||||
| collapsed = _collapse_iambig(children) | |||||
| if collapsed: | |||||
| processed_nodes = [self.node_builder(c.children) for c in collapsed] | |||||
| return self.tree_class('_ambig', processed_nodes) | |||||
| return self.node_builder(children) | |||||
| def ptb_inline_args(func): | def ptb_inline_args(func): | ||||
| @wraps(func) | @wraps(func) | ||||
| def f(children): | def f(children): | ||||
| @@ -239,6 +319,7 @@ class ParseTreeBuilder: | |||||
| maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), | maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), | ||||
| self.propagate_positions and PropagatePositions, | self.propagate_positions and PropagatePositions, | ||||
| self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), | self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), | ||||
| self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) | |||||
| ])) | ])) | ||||
| yield rule, wrapper_chain | yield rule, wrapper_chain | ||||
| @@ -18,7 +18,7 @@ from ..utils import logger | |||||
| from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
| from ..grammar import NonTerminal | from ..grammar import NonTerminal | ||||
| from .earley_common import Item, TransitiveItem | from .earley_common import Item, TransitiveItem | ||||
| from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor | |||||
| from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, CompleteForestToAmbiguousTreeVisitor | |||||
| class Parser: | class Parser: | ||||
| def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False): | def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False): | ||||
| @@ -313,7 +313,7 @@ class Parser: | |||||
| assert False, 'Earley should not generate multiple start symbol items!' | assert False, 'Earley should not generate multiple start symbol items!' | ||||
| # Perform our SPPF -> AST conversion using the right ForestVisitor. | # Perform our SPPF -> AST conversion using the right ForestVisitor. | ||||
| forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else ForestToAmbiguousTreeVisitor | |||||
| forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else CompleteForestToAmbiguousTreeVisitor | |||||
| forest_tree_visitor = forest_tree_visitor_cls(self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor()) | forest_tree_visitor = forest_tree_visitor_cls(self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor()) | ||||
| return forest_tree_visitor.visit(solutions[0]) | return forest_tree_visitor.visit(solutions[0]) | ||||
| @@ -363,6 +363,75 @@ class ForestToAmbiguousTreeVisitor(ForestToTreeVisitor): | |||||
| else: | else: | ||||
| self.result = result | self.result = result | ||||
| class CompleteForestToAmbiguousTreeVisitor(ForestToTreeVisitor): | |||||
| """ | |||||
| An augmented version of ForestToAmbiguousTreeVisitor that is designed to | |||||
| handle ambiguous intermediate nodes as well as ambiguous symbol nodes. | |||||
| On the way down: | |||||
| - When an ambiguous intermediate node is encountered, an '_iambig' node | |||||
| is inserted into the tree. | |||||
| - Each possible derivation of an ambiguous intermediate node is represented | |||||
| by an '_inter' node added as a child of the corresponding '_iambig' node. | |||||
| On the way up, these nodes are propagated up the tree and collapsed | |||||
| into a single '_ambig' node for the nearest symbol node ancestor. | |||||
| This is achieved by the AmbiguousIntermediateExpander contained in | |||||
| the callbacks. | |||||
| """ | |||||
| def _collapse_ambig(self, children): | |||||
| new_children = [] | |||||
| for child in children: | |||||
| if child.data == '_ambig': | |||||
| new_children += child.children | |||||
| else: | |||||
| new_children.append(child) | |||||
| return new_children | |||||
| def visit_token_node(self, node): | |||||
| self.output_stack[-1].children.append(node) | |||||
| def visit_symbol_node_in(self, node): | |||||
| if node.is_ambiguous: | |||||
| if self.forest_sum_visitor and isinf(node.priority): | |||||
| self.forest_sum_visitor.visit(node) | |||||
| if node.is_intermediate: | |||||
| self.output_stack.append(Tree('_iambig', [])) | |||||
| else: | |||||
| self.output_stack.append(Tree('_ambig', [])) | |||||
| return iter(node.children) | |||||
| def visit_symbol_node_out(self, node): | |||||
| if node.is_ambiguous: | |||||
| result = self.output_stack.pop() | |||||
| if not node.is_intermediate: | |||||
| result = Tree('_ambig', self._collapse_ambig(result.children)) | |||||
| if self.output_stack: | |||||
| self.output_stack[-1].children.append(result) | |||||
| else: | |||||
| self.result = result | |||||
| def visit_packed_node_in(self, node): | |||||
| if not node.parent.is_intermediate: | |||||
| self.output_stack.append(Tree('drv', [])) | |||||
| elif node.parent.is_ambiguous: | |||||
| self.output_stack.append(Tree('_inter', [])) | |||||
| return iter([node.left, node.right]) | |||||
| def visit_packed_node_out(self, node): | |||||
| if not node.parent.is_intermediate: | |||||
| result = self.callbacks[node.rule](self.output_stack.pop().children) | |||||
| elif node.parent.is_ambiguous: | |||||
| result = self.output_stack.pop() | |||||
| else: | |||||
| return | |||||
| if self.output_stack: | |||||
| self.output_stack[-1].children.append(result) | |||||
| else: | |||||
| self.result = result | |||||
| class ForestToPyDotVisitor(ForestVisitor): | class ForestToPyDotVisitor(ForestVisitor): | ||||
| """ | """ | ||||
| A Forest visitor which writes the SPPF to a PNG. | A Forest visitor which writes the SPPF to a PNG. | ||||
| @@ -460,6 +460,221 @@ def _make_full_earley_test(LEXER): | |||||
| ]) | ]) | ||||
| self.assertEqual(res, expected) | self.assertEqual(res, expected) | ||||
| def test_ambiguous_intermediate_node(self): | |||||
| grammar = """ | |||||
| start: ab bc d? | |||||
| !ab: "A" "B"? | |||||
| !bc: "B"? "C" | |||||
| !d: "D" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||||
| ambig_tree = l.parse("ABCD") | |||||
| expected = { | |||||
| Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]), | |||||
| Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C']), Tree('d', ['D'])]) | |||||
| } | |||||
| self.assertEqual(ambig_tree.data, '_ambig') | |||||
| self.assertEqual(set(ambig_tree.children), expected) | |||||
| def test_ambiguous_symbol_and_intermediate_nodes(self): | |||||
| grammar = """ | |||||
| start: ab bc cd | |||||
| !ab: "A" "B"? | |||||
| !bc: "B"? "C"? | |||||
| !cd: "C"? "D" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||||
| ambig_tree = l.parse("ABCD") | |||||
| expected = { | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A', 'B']), | |||||
| Tree('bc', ['C']), | |||||
| Tree('cd', ['D']) | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A', 'B']), | |||||
| Tree('bc', []), | |||||
| Tree('cd', ['C', 'D']) | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A']), | |||||
| Tree('bc', ['B', 'C']), | |||||
| Tree('cd', ['D']) | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A']), | |||||
| Tree('bc', ['B']), | |||||
| Tree('cd', ['C', 'D']) | |||||
| ]), | |||||
| } | |||||
| self.assertEqual(ambig_tree.data, '_ambig') | |||||
| self.assertEqual(set(ambig_tree.children), expected) | |||||
| def test_nested_ambiguous_intermediate_nodes(self): | |||||
| grammar = """ | |||||
| start: ab bc cd e? | |||||
| !ab: "A" "B"? | |||||
| !bc: "B"? "C"? | |||||
| !cd: "C"? "D" | |||||
| !e: "E" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||||
| ambig_tree = l.parse("ABCDE") | |||||
| expected = { | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A', 'B']), | |||||
| Tree('bc', ['C']), | |||||
| Tree('cd', ['D']), | |||||
| Tree('e', ['E']) | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A']), | |||||
| Tree('bc', ['B', 'C']), | |||||
| Tree('cd', ['D']), | |||||
| Tree('e', ['E']) | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A']), | |||||
| Tree('bc', ['B']), | |||||
| Tree('cd', ['C', 'D']), | |||||
| Tree('e', ['E']) | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A', 'B']), | |||||
| Tree('bc', []), | |||||
| Tree('cd', ['C', 'D']), | |||||
| Tree('e', ['E']) | |||||
| ]), | |||||
| } | |||||
| self.assertEqual(ambig_tree.data, '_ambig') | |||||
| self.assertEqual(set(ambig_tree.children), expected) | |||||
| def test_nested_ambiguous_intermediate_nodes2(self): | |||||
| grammar = """ | |||||
| start: ab bc cd de f | |||||
| !ab: "A" "B"? | |||||
| !bc: "B"? "C"? | |||||
| !cd: "C"? "D"? | |||||
| !de: "D"? "E" | |||||
| !f: "F" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||||
| ambig_tree = l.parse("ABCDEF") | |||||
| expected = { | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A', 'B']), | |||||
| Tree('bc', ['C']), | |||||
| Tree('cd', ['D']), | |||||
| Tree('de', ['E']), | |||||
| Tree('f', ['F']), | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A']), | |||||
| Tree('bc', ['B', 'C']), | |||||
| Tree('cd', ['D']), | |||||
| Tree('de', ['E']), | |||||
| Tree('f', ['F']), | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A']), | |||||
| Tree('bc', ['B']), | |||||
| Tree('cd', ['C', 'D']), | |||||
| Tree('de', ['E']), | |||||
| Tree('f', ['F']), | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A']), | |||||
| Tree('bc', ['B']), | |||||
| Tree('cd', ['C']), | |||||
| Tree('de', ['D', 'E']), | |||||
| Tree('f', ['F']), | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A', "B"]), | |||||
| Tree('bc', []), | |||||
| Tree('cd', ['C']), | |||||
| Tree('de', ['D', 'E']), | |||||
| Tree('f', ['F']), | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A']), | |||||
| Tree('bc', ['B', 'C']), | |||||
| Tree('cd', []), | |||||
| Tree('de', ['D', 'E']), | |||||
| Tree('f', ['F']), | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A', 'B']), | |||||
| Tree('bc', []), | |||||
| Tree('cd', ['C', 'D']), | |||||
| Tree('de', ['E']), | |||||
| Tree('f', ['F']), | |||||
| ]), | |||||
| Tree('start', [ | |||||
| Tree('ab', ['A', 'B']), | |||||
| Tree('bc', ['C']), | |||||
| Tree('cd', []), | |||||
| Tree('de', ['D', 'E']), | |||||
| Tree('f', ['F']), | |||||
| ]), | |||||
| } | |||||
| self.assertEqual(ambig_tree.data, '_ambig') | |||||
| self.assertEqual(set(ambig_tree.children), expected) | |||||
| def test_ambiguous_intermediate_node_unnamed_token(self): | |||||
| grammar = """ | |||||
| start: ab bc "D" | |||||
| !ab: "A" "B"? | |||||
| !bc: "B"? "C" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||||
| ambig_tree = l.parse("ABCD") | |||||
| expected = { | |||||
| Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C'])]), | |||||
| Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C'])]) | |||||
| } | |||||
| self.assertEqual(ambig_tree.data, '_ambig') | |||||
| self.assertEqual(set(ambig_tree.children), expected) | |||||
| def test_ambiguous_intermediate_node_inlined_rule(self): | |||||
| grammar = """ | |||||
| start: ab _bc d? | |||||
| !ab: "A" "B"? | |||||
| _bc: "B"? "C" | |||||
| !d: "D" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||||
| ambig_tree = l.parse("ABCD") | |||||
| expected = { | |||||
| Tree('start', [Tree('ab', ['A']), Tree('d', ['D'])]), | |||||
| Tree('start', [Tree('ab', ['A', 'B']), Tree('d', ['D'])]) | |||||
| } | |||||
| self.assertEqual(ambig_tree.data, '_ambig') | |||||
| self.assertEqual(set(ambig_tree.children), expected) | |||||
| def test_ambiguous_intermediate_node_conditionally_inlined_rule(self): | |||||
| grammar = """ | |||||
| start: ab bc d? | |||||
| !ab: "A" "B"? | |||||
| !?bc: "B"? "C" | |||||
| !d: "D" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||||
| ambig_tree = l.parse("ABCD") | |||||
| expected = { | |||||
| Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]), | |||||
| Tree('start', [Tree('ab', ['A', 'B']), 'C', Tree('d', ['D'])]) | |||||
| } | |||||
| self.assertEqual(ambig_tree.data, '_ambig') | |||||
| self.assertEqual(set(ambig_tree.children), expected) | |||||
| def test_fruitflies_ambig(self): | def test_fruitflies_ambig(self): | ||||
| grammar = """ | grammar = """ | ||||
| start: noun verb noun -> simple | start: noun verb noun -> simple | ||||