| @@ -200,7 +200,7 @@ class Lark(Serialize): | |||||
| self.grammar = load_grammar(grammar, self.source) | self.grammar = load_grammar(grammar, self.source) | ||||
| # Compile the EBNF grammar into BNF | # Compile the EBNF grammar into BNF | ||||
| self.terminals, self.rules, self.ignore_tokens = self.grammar.compile() | |||||
| self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | |||||
| # If the user asked to invert the priorities, negate them all here. | # If the user asked to invert the priorities, negate them all here. | ||||
| # This replaces the old 'resolve__antiscore_sum' option. | # This replaces the old 'resolve__antiscore_sum' option. | ||||
| @@ -205,7 +205,7 @@ class EBNF_to_BNF(Transformer_InPlace): | |||||
| keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens | keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens | ||||
| def will_not_get_removed(sym): | def will_not_get_removed(sym): | ||||
| if isinstance(sym, NonTerminal): | |||||
| if isinstance(sym, NonTerminal): | |||||
| return not sym.name.startswith('_') | return not sym.name.startswith('_') | ||||
| if isinstance(sym, Terminal): | if isinstance(sym, Terminal): | ||||
| return keep_all_tokens or not sym.filter_out | return keep_all_tokens or not sym.filter_out | ||||
| @@ -465,7 +465,7 @@ class Grammar: | |||||
| self.rule_defs = rule_defs | self.rule_defs = rule_defs | ||||
| self.ignore = ignore | self.ignore = ignore | ||||
| def compile(self): | |||||
| def compile(self, start): | |||||
| # We change the trees in-place (to support huge grammars) | # We change the trees in-place (to support huge grammars) | ||||
| # So deepcopy allows calling compile more than once. | # So deepcopy allows calling compile more than once. | ||||
| term_defs = deepcopy(list(self.term_defs)) | term_defs = deepcopy(list(self.term_defs)) | ||||
| @@ -546,6 +546,18 @@ class Grammar: | |||||
| # Remove duplicates | # Remove duplicates | ||||
| compiled_rules = list(set(compiled_rules)) | compiled_rules = list(set(compiled_rules)) | ||||
| # Filter out unused rules | |||||
| while True: | |||||
| c = len(compiled_rules) | |||||
| used_rules = {s for r in compiled_rules | |||||
| for s in r.expansion | |||||
| if isinstance(s, NonTerminal) | |||||
| and s != r.origin} | |||||
| compiled_rules = [r for r in compiled_rules if r.origin.name==start or r.origin in used_rules] | |||||
| if len(compiled_rules) == c: | |||||
| break | |||||
| # Filter out unused terminals | # Filter out unused terminals | ||||
| used_terms = {t.name for r in compiled_rules | used_terms = {t.name for r in compiled_rules | ||||
| for t in r.expansion | for t in r.expansion | ||||
| @@ -69,7 +69,7 @@ class MakeMatchTree: | |||||
| class Reconstructor: | class Reconstructor: | ||||
| def __init__(self, parser): | def __init__(self, parser): | ||||
| # XXX TODO calling compile twice returns different results! | # XXX TODO calling compile twice returns different results! | ||||
| tokens, rules, _grammar_extra = parser.grammar.compile() | |||||
| tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) | |||||
| self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) | self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) | ||||
| self.rules = list(self._build_recons_rules(rules)) | self.rules = list(self._build_recons_rules(rules)) | ||||
| @@ -1493,6 +1493,19 @@ def _make_parser_test(LEXER, PARSER): | |||||
| parser.parse(r'"That" "And a \"b"') | parser.parse(r'"That" "And a \"b"') | ||||
| def test_meddling_unused(self): | |||||
| "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision" | |||||
| grammar = """ | |||||
| start: EKS* x | |||||
| x: EKS | |||||
| unused: x* | |||||
| EKS: "x" | |||||
| """ | |||||
| parser = _Lark(grammar) | |||||
| @unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)") | @unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)") | ||||
| def test_serialize(self): | def test_serialize(self): | ||||
| grammar = """ | grammar = """ | ||||