| @@ -267,7 +267,7 @@ class Lark(Serialize): | |||||
| assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) | assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) | ||||
| # Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
| self.grammar = load_grammar(grammar, self.source, re_module) | |||||
| self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens) | |||||
| # Compile the EBNF grammar into BNF | # Compile the EBNF grammar into BNF | ||||
| self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | ||||
| @@ -322,7 +322,7 @@ class Lark(Serialize): | |||||
| self._callbacks = None | self._callbacks = None | ||||
| # we don't need these callbacks if we aren't building a tree | # we don't need these callbacks if we aren't building a tree | ||||
| if self.options.ambiguity != 'forest': | if self.options.ambiguity != 'forest': | ||||
| self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders) | |||||
| self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders) | |||||
| self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | ||||
| def _build_parser(self): | def _build_parser(self): | ||||
| @@ -650,7 +650,7 @@ class Grammar: | |||||
| _imported_grammars = {} | _imported_grammars = {} | ||||
| def import_grammar(grammar_path, re_, base_paths=[]): | |||||
| def import_grammar(grammar_path, loader, base_paths=[]): | |||||
| if grammar_path not in _imported_grammars: | if grammar_path not in _imported_grammars: | ||||
| import_paths = base_paths + IMPORT_PATHS | import_paths = base_paths + IMPORT_PATHS | ||||
| for import_path in import_paths: | for import_path in import_paths: | ||||
| @@ -658,7 +658,7 @@ def import_grammar(grammar_path, re_, base_paths=[]): | |||||
| joined_path = os.path.join(import_path, grammar_path) | joined_path = os.path.join(import_path, grammar_path) | ||||
| with open(joined_path, encoding='utf8') as f: | with open(joined_path, encoding='utf8') as f: | ||||
| text = f.read() | text = f.read() | ||||
| grammar = load_grammar(text, joined_path, re_) | |||||
| grammar = loader.load_grammar(text, joined_path) | |||||
| _imported_grammars[grammar_path] = grammar | _imported_grammars[grammar_path] = grammar | ||||
| break | break | ||||
| else: | else: | ||||
| @@ -803,7 +803,7 @@ class GrammarLoader: | |||||
| ('%ignore expects a value', ['%ignore %import\n']), | ('%ignore expects a value', ['%ignore %import\n']), | ||||
| ] | ] | ||||
| def __init__(self, re_module): | |||||
| def __init__(self, re_module, always_keep_all_tokens): | |||||
| terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | ||||
| rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | ||||
| @@ -816,6 +816,7 @@ class GrammarLoader: | |||||
| self.canonize_tree = CanonizeTree() | self.canonize_tree = CanonizeTree() | ||||
| self.re_module = re_module | self.re_module = re_module | ||||
| self.always_keep_all_tokens = always_keep_all_tokens | |||||
| def load_grammar(self, grammar_text, grammar_name='<?>'): | def load_grammar(self, grammar_text, grammar_name='<?>'): | ||||
| "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | ||||
| @@ -901,7 +902,7 @@ class GrammarLoader: | |||||
| # import grammars | # import grammars | ||||
| for dotted_path, (base_paths, aliases) in imports.items(): | for dotted_path, (base_paths, aliases) in imports.items(): | ||||
| grammar_path = os.path.join(*dotted_path) + EXT | grammar_path = os.path.join(*dotted_path) + EXT | ||||
| g = import_grammar(grammar_path, self.re_module, base_paths=base_paths) | |||||
| g = import_grammar(grammar_path, self, base_paths=base_paths) | |||||
| new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | ||||
| term_defs += new_td | term_defs += new_td | ||||
| @@ -946,7 +947,10 @@ class GrammarLoader: | |||||
| rules = rule_defs | rules = rule_defs | ||||
| rule_names = {} | rule_names = {} | ||||
| for name, params, _x, _o in rules: | |||||
| for name, params, _x, option in rules: | |||||
| if self.always_keep_all_tokens: # We need to do this somewhere. Might as well prevent an additional loop | |||||
| option.keep_all_tokens = True | |||||
| if name.startswith('__'): | if name.startswith('__'): | ||||
| raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) | raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) | ||||
| if name in rule_names: | if name in rule_names: | ||||
| @@ -981,5 +985,5 @@ class GrammarLoader: | |||||
| def load_grammar(grammar, source, re_): | |||||
| return GrammarLoader(re_).load_grammar(grammar, source) | |||||
| def load_grammar(grammar, source, re_, always_keep_all_tokens): | |||||
| return GrammarLoader(re_, always_keep_all_tokens).load_grammar(grammar, source) | |||||
| @@ -299,10 +299,9 @@ def apply_visit_wrapper(func, name, wrapper): | |||||
| class ParseTreeBuilder: | class ParseTreeBuilder: | ||||
| def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False): | |||||
| def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False): | |||||
| self.tree_class = tree_class | self.tree_class = tree_class | ||||
| self.propagate_positions = propagate_positions | self.propagate_positions = propagate_positions | ||||
| self.always_keep_all_tokens = keep_all_tokens | |||||
| self.ambiguous = ambiguous | self.ambiguous = ambiguous | ||||
| self.maybe_placeholders = maybe_placeholders | self.maybe_placeholders = maybe_placeholders | ||||
| @@ -311,7 +310,7 @@ class ParseTreeBuilder: | |||||
| def _init_builders(self, rules): | def _init_builders(self, rules): | ||||
| for rule in rules: | for rule in rules: | ||||
| options = rule.options | options = rule.options | ||||
| keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens | |||||
| keep_all_tokens = options.keep_all_tokens | |||||
| expand_single_child = options.expand1 | expand_single_child = options.expand1 | ||||
| wrapper_chain = list(filter(None, [ | wrapper_chain = list(filter(None, [ | ||||
| @@ -2041,6 +2041,10 @@ def _make_parser_test(LEXER, PARSER): | |||||
| # Anonymous tokens shouldn't count | # Anonymous tokens shouldn't count | ||||
| p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | ||||
| self.assertEqual(p.parse("").children, []) | self.assertEqual(p.parse("").children, []) | ||||
| # Unless keep_all_tokens=True | |||||
| p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True) | |||||
| self.assertEqual(p.parse("").children, [None, None, None]) | |||||
| # All invisible constructs shouldn't count | # All invisible constructs shouldn't count | ||||
| p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c] | p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c] | ||||