| @@ -324,7 +324,7 @@ class TokenTreeToPattern(Transformer): | |||||
| def expansion(self, items): | def expansion(self, items): | ||||
| if len(items) == 1: | if len(items) == 1: | ||||
| return items[0] | return items[0] | ||||
| if len(set(i.flags for i in items)) > 1: | |||||
| if len({i.flags for i in items}) > 1: | |||||
| raise GrammarError("Lark doesn't support joining tokens with conflicting flags!") | raise GrammarError("Lark doesn't support joining tokens with conflicting flags!") | ||||
| return PatternRE(''.join(i.to_regexp() for i in items), items[0].flags) | return PatternRE(''.join(i.to_regexp() for i in items), items[0].flags) | ||||
| @@ -348,60 +348,64 @@ def _interleave(l, item): | |||||
| elif is_terminal(e): | elif is_terminal(e): | ||||
| yield item | yield item | ||||
| def _choice_of_rules(rules): | |||||
| return T('expansions', [T('expansion', [Token('RULE', name)]) for name in rules]) | |||||
| class Grammar: | class Grammar: | ||||
| def __init__(self, rule_defs, token_defs, extra): | def __init__(self, rule_defs, token_defs, extra): | ||||
| self.token_defs = token_defs | self.token_defs = token_defs | ||||
| self.rule_defs = rule_defs | self.rule_defs = rule_defs | ||||
| self.extra = extra | self.extra = extra | ||||
| def compile(self, lexer=False, start=None): | |||||
| if not lexer: | |||||
| rule_defs = deepcopy(self.rule_defs) | |||||
| # XXX VERY HACKY!! There must be a better way.. | |||||
| ignore_tokens = [('_'+name, t) for name, t in self.token_defs if name in self.extra['ignore']] | |||||
| if ignore_tokens: | |||||
| self.token_defs = [('_'+name if name in self.extra['ignore'] else name,t) for name,t in self.token_defs] | |||||
| ignore_names = [t[0] for t in ignore_tokens] | |||||
| expr = Token('RULE', '__ignore') | |||||
| for r, tree, _o in rule_defs: | |||||
| for exp in tree.find_data('expansion'): | |||||
| exp.children = list(_interleave(exp.children, expr)) | |||||
| if r == start: | |||||
| exp.children = [expr] + exp.children | |||||
| for exp in tree.find_data('expr'): | |||||
| exp.children[0] = T('expansion', list(_interleave(exp.children[:1], expr))) | |||||
| x = [T('expansion', [Token('RULE', x)]) for x in ignore_names] | |||||
| _ignore_tree = T('expr', [T('expansions', x), Token('OP', '?')]) | |||||
| rule_defs.append(('__ignore', _ignore_tree, None)) | |||||
| # End of "ignore" section | |||||
| rule_defs += [(name, tree, RuleOptions(keep_all_tokens=True)) for name, tree in self.token_defs] | |||||
| token_defs = [] | |||||
| tokens_to_convert = {name: '__token_'+name for name, tree, _ in rule_defs if is_terminal(name)} | |||||
| new_rule_defs = [] | |||||
| for name, tree, options in rule_defs: | |||||
| if name in tokens_to_convert: | |||||
| if name.startswith('_'): | |||||
| options = RuleOptions.new_from(options, filter_out=True) | |||||
| else: | |||||
| options = RuleOptions.new_from(options, create_token=name) | |||||
| name = tokens_to_convert[name] | |||||
| inner = Token('RULE', name + '_inner') | |||||
| new_rule_defs.append((name, T('expansions', [T('expansion', [inner])]), None)) | |||||
| name = inner | |||||
| def _prepare_scanless_grammar(self, start): | |||||
| # XXX Pretty hacky! There should be a better way to write this method.. | |||||
| rule_defs = deepcopy(self.rule_defs) | |||||
| term_defs = self.token_defs | |||||
| # Implement the "%ignore" feature without a lexer.. | |||||
| terms_to_ignore = {name:'__'+name for name in self.extra['ignore']} | |||||
| if terms_to_ignore: | |||||
| assert set(terms_to_ignore) <= {name for name, t in term_defs} | |||||
| term_defs = [(terms_to_ignore.get(name,name),t) for name,t in term_defs] | |||||
| expr = Token('RULE', '__ignore') | |||||
| for r, tree, _o in rule_defs: | |||||
| for exp in tree.find_data('expansion'): | |||||
| exp.children = list(_interleave(exp.children, expr)) | |||||
| if r == start: | |||||
| exp.children = [expr] + exp.children | |||||
| for exp in tree.find_data('expr'): | |||||
| exp.children[0] = T('expansion', list(_interleave(exp.children[:1], expr))) | |||||
| _ignore_tree = T('expr', [_choice_of_rules(terms_to_ignore.values()), Token('OP', '?')]) | |||||
| rule_defs.append(('__ignore', _ignore_tree, None)) | |||||
| # Convert all tokens to rules | |||||
| new_terminal_names = {name: '__token_'+name for name, tree in term_defs} | |||||
| for name, tree, options in rule_defs: | |||||
| for exp in chain( tree.find_data('expansion'), tree.find_data('expr') ): | |||||
| for i, sym in enumerate(exp.children): | |||||
| if sym in new_terminal_names: | |||||
| exp.children[i] = Token(sym.type, new_terminal_names[sym]) | |||||
| for name, tree in term_defs: | |||||
| if name.startswith('_'): | |||||
| options = RuleOptions(filter_out=True) | |||||
| else: | |||||
| options = RuleOptions(keep_all_tokens=True, create_token=name) | |||||
| else: | |||||
| for exp in chain( tree.find_data('expansion'), tree.find_data('expr') ): | |||||
| for i, sym in enumerate(exp.children): | |||||
| if sym in tokens_to_convert: | |||||
| exp.children[i] = Token(sym.type, tokens_to_convert[sym]) | |||||
| name = new_terminal_names[name] | |||||
| inner_name = name + '_inner' | |||||
| rule_defs.append((name, _choice_of_rules([inner_name]), None)) | |||||
| rule_defs.append((inner_name, tree, options)) | |||||
| new_rule_defs.append((name, tree, options)) | |||||
| return [], rule_defs | |||||
| rule_defs = new_rule_defs | |||||
| def compile(self, lexer=False, start=None): | |||||
| if not lexer: | |||||
| token_defs, rule_defs = self._prepare_scanless_grammar(start) | |||||
| else: | else: | ||||
| token_defs = list(self.token_defs) | token_defs = list(self.token_defs) | ||||
| rule_defs = self.rule_defs | rule_defs = self.rule_defs | ||||
| @@ -473,14 +477,6 @@ class RuleOptions: | |||||
| self.filter_out = filter_out # remove this rule from the tree | self.filter_out = filter_out # remove this rule from the tree | ||||
| # used for "token"-rules in scanless | # used for "token"-rules in scanless | ||||
| @classmethod | |||||
| def new_from(cls, options, **kw): | |||||
| return cls( | |||||
| keep_all_tokens=options and options.keep_all_tokens, | |||||
| expand1=options and options.expand1, | |||||
| **kw) | |||||
| @classmethod | @classmethod | ||||
| def from_rule(cls, name, expansions): | def from_rule(cls, name, expansions): | ||||
| keep_all_tokens = name.startswith('!') | keep_all_tokens = name.startswith('!') | ||||