| @@ -64,6 +64,7 @@ class RuleOptions: | |||||
| self.keep_all_tokens = keep_all_tokens | self.keep_all_tokens = keep_all_tokens | ||||
| self.expand1 = expand1 | self.expand1 = expand1 | ||||
| self.priority = priority | self.priority = priority | ||||
| self.empty_indices = () | |||||
| def __repr__(self): | def __repr__(self): | ||||
| return 'RuleOptions(%r, %r, %r)' % ( | return 'RuleOptions(%r, %r, %r)' % ( | ||||
| @@ -45,6 +45,7 @@ class LarkOptions(object): | |||||
| profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) | profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) | ||||
| propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches. | propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches. | ||||
| lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. | lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. | ||||
| maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None | |||||
| """ | """ | ||||
| if __doc__: | if __doc__: | ||||
| __doc__ += OPTIONS_DOC | __doc__ += OPTIONS_DOC | ||||
| @@ -66,6 +67,7 @@ class LarkOptions(object): | |||||
| self.propagate_positions = o.pop('propagate_positions', False) | self.propagate_positions = o.pop('propagate_positions', False) | ||||
| self.earley__predict_all = o.pop('earley__predict_all', False) | self.earley__predict_all = o.pop('earley__predict_all', False) | ||||
| self.lexer_callbacks = o.pop('lexer_callbacks', {}) | self.lexer_callbacks = o.pop('lexer_callbacks', {}) | ||||
| self.maybe_placeholders = o.pop('maybe_placeholders', False) | |||||
| assert self.parser in ('earley', 'lalr', 'cyk', None) | assert self.parser in ('earley', 'lalr', 'cyk', None) | ||||
| @@ -179,7 +181,7 @@ class Lark: | |||||
| def _build_parser(self): | def _build_parser(self): | ||||
| self.parser_class = get_frontend(self.options.parser, self.options.lexer) | self.parser_class = get_frontend(self.options.parser, self.options.lexer) | ||||
| self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr') | |||||
| self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr', self.options.maybe_placeholders) | |||||
| callback = self._parse_tree_builder.create_callback(self.options.transformer) | callback = self._parse_tree_builder.create_callback(self.options.transformer) | ||||
| if self.profiler: | if self.profiler: | ||||
| for f in dir(callback): | for f in dir(callback): | ||||
| @@ -3,7 +3,7 @@ | |||||
| import os.path | import os.path | ||||
| import sys | import sys | ||||
| from ast import literal_eval | from ast import literal_eval | ||||
| from copy import deepcopy | |||||
| from copy import copy, deepcopy | |||||
| from .utils import bfs | from .utils import bfs | ||||
| from .lexer import Token, TerminalDef, PatternStr, PatternRE | from .lexer import Token, TerminalDef, PatternStr, PatternRE | ||||
| @@ -26,6 +26,8 @@ EXT = '.lark' | |||||
| _RE_FLAGS = 'imslux' | _RE_FLAGS = 'imslux' | ||||
| _EMPTY = Symbol('__empty__') | |||||
| _TERMINAL_NAMES = { | _TERMINAL_NAMES = { | ||||
| '.' : 'DOT', | '.' : 'DOT', | ||||
| ',' : 'COMMA', | ',' : 'COMMA', | ||||
| @@ -151,7 +153,6 @@ RULES = { | |||||
| 'literal': ['REGEXP', 'STRING'], | 'literal': ['REGEXP', 'STRING'], | ||||
| } | } | ||||
| @inline_args | @inline_args | ||||
| class EBNF_to_BNF(Transformer_InPlace): | class EBNF_to_BNF(Transformer_InPlace): | ||||
| def __init__(self): | def __init__(self): | ||||
| @@ -175,7 +176,7 @@ class EBNF_to_BNF(Transformer_InPlace): | |||||
| def expr(self, rule, op, *args): | def expr(self, rule, op, *args): | ||||
| if op.value == '?': | if op.value == '?': | ||||
| return ST('expansions', [rule, ST('expansion', [])]) | |||||
| return ST('expansions', [rule, _EMPTY]) | |||||
| elif op.value == '+': | elif op.value == '+': | ||||
| # a : b c+ d | # a : b c+ d | ||||
| # --> | # --> | ||||
| @@ -481,7 +482,8 @@ class Grammar: | |||||
| for name, rule_tree, options in rule_defs: | for name, rule_tree, options in rule_defs: | ||||
| ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None | ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None | ||||
| tree = transformer.transform(rule_tree) | tree = transformer.transform(rule_tree) | ||||
| rules.append((name, ebnf_to_bnf.transform(tree), options)) | |||||
| res = ebnf_to_bnf.transform(tree) | |||||
| rules.append((name, res, options)) | |||||
| rules += ebnf_to_bnf.new_rules | rules += ebnf_to_bnf.new_rules | ||||
| assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" | assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" | ||||
| @@ -499,9 +501,17 @@ class Grammar: | |||||
| if alias and name.startswith('_'): | if alias and name.startswith('_'): | ||||
| raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | ||||
| assert all(isinstance(x, Symbol) for x in expansion), expansion | |||||
| empty_indices = [i for i, x in enumerate(expansion) if x==_EMPTY] | |||||
| if empty_indices: | |||||
| assert options | |||||
| exp_options = copy(options) | |||||
| exp_options.empty_indices = len(expansion), empty_indices | |||||
| expansion = [x for x in expansion if x!=_EMPTY] | |||||
| else: | |||||
| exp_options = options | |||||
| rule = Rule(NonTerminal(name), expansion, alias, options) | |||||
| assert all(isinstance(x, Symbol) for x in expansion), expansion | |||||
| rule = Rule(NonTerminal(name), expansion, alias, exp_options) | |||||
| compiled_rules.append(rule) | compiled_rules.append(rule) | ||||
| return terminals, compiled_rules, self.ignore | return terminals, compiled_rules, self.ignore | ||||
| @@ -1,7 +1,5 @@ | |||||
| from .exceptions import GrammarError | from .exceptions import GrammarError | ||||
| from .utils import suppress | |||||
| from .lexer import Token | from .lexer import Token | ||||
| from .grammar import Rule | |||||
| from .tree import Tree | from .tree import Tree | ||||
| from .visitors import InlineTransformer # XXX Deprecated | from .visitors import InlineTransformer # XXX Deprecated | ||||
| @@ -19,6 +17,23 @@ class ExpandSingleChild: | |||||
| else: | else: | ||||
| return self.node_builder(children) | return self.node_builder(children) | ||||
| class AddMaybePlaceholder: | |||||
| def __init__(self, empty_indices, node_builder): | |||||
| self.node_builder = node_builder | |||||
| self.empty_indices = empty_indices | |||||
| def __call__(self, children): | |||||
| t = self.node_builder(children) | |||||
| if self.empty_indices: | |||||
| exp_len, empty_indices = self.empty_indices | |||||
| # Calculate offset to handle repetition correctly | |||||
| # e.g. ("a" "b"?)+ | |||||
| # For non-repetitive rules, offset should be 0 | |||||
| offset = len(t.children) - (exp_len - len(empty_indices)) | |||||
| for i in empty_indices: | |||||
| t.children.insert(i + offset, None) | |||||
| return t | |||||
| class PropagatePositions: | class PropagatePositions: | ||||
| def __init__(self, node_builder): | def __init__(self, node_builder): | ||||
| @@ -116,11 +131,12 @@ def ptb_inline_args(func): | |||||
| class ParseTreeBuilder: | class ParseTreeBuilder: | ||||
| def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False): | |||||
| def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False): | |||||
| self.tree_class = tree_class | self.tree_class = tree_class | ||||
| self.propagate_positions = propagate_positions | self.propagate_positions = propagate_positions | ||||
| self.always_keep_all_tokens = keep_all_tokens | self.always_keep_all_tokens = keep_all_tokens | ||||
| self.ambiguous = ambiguous | self.ambiguous = ambiguous | ||||
| self.maybe_placeholders = maybe_placeholders | |||||
| self.rule_builders = list(self._init_builders(rules)) | self.rule_builders = list(self._init_builders(rules)) | ||||
| @@ -135,6 +151,7 @@ class ParseTreeBuilder: | |||||
| wrapper_chain = filter(None, [ | wrapper_chain = filter(None, [ | ||||
| (expand_single_child and not rule.alias) and ExpandSingleChild, | (expand_single_child and not rule.alias) and ExpandSingleChild, | ||||
| maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous), | maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous), | ||||
| self.maybe_placeholders and partial(AddMaybePlaceholder, options.empty_indices), | |||||
| self.propagate_positions and PropagatePositions, | self.propagate_positions and PropagatePositions, | ||||
| ]) | ]) | ||||
| @@ -1248,6 +1248,28 @@ def _make_parser_test(LEXER, PARSER): | |||||
| res = p.parse('B') | res = p.parse('B') | ||||
| self.assertEqual(len(res.children), 3) | self.assertEqual(len(res.children), 3) | ||||
| def test_maybe_placeholders(self): | |||||
| p = Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||||
| self.assertEqual(p.parse("").children, [None, None, None]) | |||||
| self.assertEqual(p.parse("a").children, ['a', None, None]) | |||||
| self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||||
| self.assertEqual(p.parse("c").children, [None, None, 'c']) | |||||
| self.assertEqual(p.parse("ab").children, ['a', 'b', None]) | |||||
| self.assertEqual(p.parse("ac").children, ['a', None, 'c']) | |||||
| self.assertEqual(p.parse("bc").children, [None, 'b', 'c']) | |||||
| self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) | |||||
| p = Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True) | |||||
| self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||||
| self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None]) | |||||
| self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c']) | |||||
| self.assertEqual(p.parse("babbcabcb").children, | |||||
| [None, 'b', None, | |||||
| 'a', 'b', None, | |||||
| None, 'b', 'c', | |||||
| 'a', 'b', 'c', | |||||
| None, 'b', None]) | |||||
| _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | ||||