| @@ -99,13 +99,14 @@ TERMINALS = { | |||
| RULES = { | |||
| 'start': ['_list'], | |||
| '_list': ['_item', '_list _item'], | |||
| '_item': ['rule', 'rule_template', 'term', 'statement', '_NL'], | |||
| '_item': ['rule', 'template', 'term', 'statement', '_NL'], | |||
| 'template': ['RULE _LBRACE template_params _RBRACE _COLON expansions _NL', | |||
| 'RULE _LBRACE template_params _RBRACE _DOT NUMBER _COLON expansions _NL'], | |||
| 'template_params': ['RULE', | |||
| 'template_params _COMMA RULE'], | |||
| 'template_params': ['_template_params'], | |||
| '_template_params': ['RULE', | |||
| '_template_params _COMMA RULE'], | |||
| 'rule': ['RULE _COLON expansions _NL', | |||
| 'RULE _DOT NUMBER _COLON expansions _NL'], | |||
| @@ -142,9 +143,9 @@ RULES = { | |||
| 'maybe': ['_LBRA expansions _RBRA'], | |||
| 'range': ['STRING _DOTDOT STRING'], | |||
| 'template_usage': ['RULE _LBRACE template_args _RBRACE'], | |||
| 'template_args': ['atom', | |||
| 'template_args _COMMA atom'], | |||
| 'template_usage': ['RULE _LBRACE _template_args _RBRACE'], | |||
| '_template_args': ['value', | |||
| '_template_args _COMMA value'], | |||
| 'term': ['TERMINAL _COLON expansions _NL', | |||
| 'TERMINAL _DOT NUMBER _COLON expansions _NL'], | |||
| @@ -353,6 +354,44 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||
| return Terminal(term_name, filter_out=isinstance(p, PatternStr)) | |||
| class _ReplaceSymbols(Transformer_InPlace): | |||
| " Helper for ApplyTemplates " | |||
| def __init__(self): | |||
| super(_ReplaceSymbols, self).__init__() | |||
| self.names = {} | |||
| def value(self, c): | |||
| if len(c) == 1 and isinstance(c[0], Token) and c[0].type == 'RULE' and c[0].value in self.names: | |||
| return self.names[c[0].value] | |||
| return self.__default__('value', c, None) | |||
| class ApplyTemplates(Transformer_InPlace): | |||
| " Apply the templates, creating new rules that represent the used templates " | |||
| def __init__(self, temp_defs, rule_defs): | |||
| super(ApplyTemplates, self).__init__() | |||
| self.temp_defs = temp_defs | |||
| self.rule_defs = rule_defs | |||
| self.replacer = _ReplaceSymbols() | |||
| self.created_templates = set() | |||
| def _get_template_name(self, name, args): | |||
| return "_%s{%s}" % (name, ",".join(a.name for a in args)) | |||
| def template_usage(self, c): | |||
| name = c[0] | |||
| args = c[1:] | |||
| result_name = self._get_template_name(name.value, args) | |||
| if result_name not in self.created_templates: | |||
| (_n, params, tree, options) ,= (t for t in self.temp_defs if t[0] == name) | |||
| assert len(params) == len(args), args | |||
| result_tree = deepcopy(tree) | |||
| self.replacer.names = dict(zip(params, args)) | |||
| self.replacer.transform(result_tree) | |||
| self.rule_defs.append((result_name, result_tree, deepcopy(options))) | |||
| return NonTerminal(result_name) | |||
| def _rfind(s, choices): | |||
| return max(s.rfind(c) for c in choices) | |||
| @@ -452,9 +491,10 @@ def _choice_of_rules(rules): | |||
| return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) | |||
| class Grammar: | |||
| def __init__(self, rule_defs, term_defs, ignore): | |||
| def __init__(self, rule_defs, term_defs, temp_defs, ignore): | |||
| self.term_defs = term_defs | |||
| self.rule_defs = rule_defs | |||
| self.temp_defs = temp_defs | |||
| self.ignore = ignore | |||
| def compile(self, start): | |||
| @@ -462,6 +502,7 @@ class Grammar: | |||
| # So deepcopy allows calling compile more than once. | |||
| term_defs = deepcopy(list(self.term_defs)) | |||
| rule_defs = deepcopy(self.rule_defs) | |||
| temp_defs = deepcopy(self.temp_defs) | |||
| # =================== | |||
| # Compile Terminals | |||
| @@ -478,29 +519,38 @@ class Grammar: | |||
| transformer = PrepareLiterals() * TerminalTreeToPattern() | |||
| terminals = [TerminalDef(name, transformer.transform( term_tree ), priority) | |||
| for name, (term_tree, priority) in term_defs if term_tree] | |||
| for name, (term_tree, priority) in term_defs if term_tree] | |||
| # ================= | |||
| # Compile Rules | |||
| # ================= | |||
| # TODO: add templates | |||
| # 1. Pre-process terminals | |||
| transformer = PrepareLiterals() * PrepareSymbols() * PrepareAnonTerminals(terminals) # Adds to terminals | |||
| transformer = PrepareLiterals() * PrepareSymbols() * PrepareAnonTerminals(terminals) # Adds to terminals | |||
| # 2. Inline Templates | |||
| # 2. Convert EBNF to BNF (and apply step 1) | |||
| transformer *= ApplyTemplates(temp_defs, rule_defs) | |||
| # 3. Convert EBNF to BNF (and apply step 1 & 2) | |||
| ebnf_to_bnf = EBNF_to_BNF() | |||
| rules = [] | |||
| for name, rule_tree, options in rule_defs: | |||
| i = 0 | |||
| while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates | |||
| name, rule_tree, options = rule_defs[i] | |||
| ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options.keep_all_tokens else None | |||
| ebnf_to_bnf.prefix = name | |||
| tree = transformer.transform(rule_tree) | |||
| res = ebnf_to_bnf.transform(tree) | |||
| rules.append((name, res, options)) | |||
| i += 1 | |||
| rules += ebnf_to_bnf.new_rules | |||
| assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" | |||
| # 3. Compile tree to Rule objects | |||
| # 4. Compile tree to Rule objects | |||
| rule_tree_to_text = RuleTreeToText() | |||
| simplify_rule = SimplifyRule_Visitor() | |||
| @@ -589,9 +639,11 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||
| imported_terms = dict(grammar.term_defs) | |||
| imported_rules = {n:(n,deepcopy(t),o) for n,t,o in grammar.rule_defs} | |||
| imported_temps = {n:(n,deepcopy(t),o) for n,t,o in grammar.temp_defs} | |||
| term_defs = [] | |||
| rule_defs = [] | |||
| temp_defs = [] | |||
| def rule_dependencies(symbol): | |||
| if symbol.type != 'RULE': | |||
| @@ -661,8 +713,8 @@ def resolve_term_references(term_defs): | |||
| raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name) | |||
| def options_from_rule(name, *x,is_template=False): | |||
| if len(x) > (1+is_template): | |||
| def options_from_rule(name, *x): | |||
| if len(x) > 1: | |||
| priority, expansions = x | |||
| priority = int(priority) | |||
| else: | |||
| @@ -676,6 +728,22 @@ def options_from_rule(name, *x,is_template=False): | |||
| return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) | |||
| def options_from_template(name, params, *x): | |||
| if len(x) > 1: | |||
| priority, expansions = x | |||
| priority = int(priority) | |||
| else: | |||
| expansions ,= x | |||
| priority = None | |||
| params = [t.value for t in params.children] | |||
| keep_all_tokens = name.startswith('!') | |||
| name = name.lstrip('!') | |||
| expand1 = name.startswith('?') | |||
| name = name.lstrip('?') | |||
| return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) | |||
| def symbols_from_strcase(expansion): | |||
| return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] | |||
| @@ -741,14 +809,14 @@ class GrammarLoader: | |||
| defs = classify(tree.children, lambda c: c.data, lambda c: c.children) | |||
| term_defs = defs.pop('term', []) | |||
| rule_defs = defs.pop('rule', []) | |||
| template_defs = defs.pop('template', []) | |||
| temp_defs = defs.pop('template', []) | |||
| statements = defs.pop('statement', []) | |||
| assert not defs | |||
| term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] | |||
| term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] | |||
| rule_defs = [options_from_rule(*x) for x in rule_defs] | |||
| template_defs = [options_from_rule(*x, is_template=True) for x in rule_defs] | |||
| temp_defs = [options_from_template(*x) for x in temp_defs] | |||
| # Execute statements | |||
| ignore, imports = [], {} | |||
| @@ -804,10 +872,11 @@ class GrammarLoader: | |||
| for dotted_path, (base_paths, aliases) in imports.items(): | |||
| grammar_path = os.path.join(*dotted_path) + EXT | |||
| g = import_grammar(grammar_path, base_paths=base_paths) | |||
| new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | |||
| new_td, new_rd, new_tp = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | |||
| term_defs += new_td | |||
| rule_defs += new_rd | |||
| temp_defs += new_tp | |||
| # Verify correctness 1 | |||
| for name, _ in term_defs: | |||
| @@ -854,6 +923,17 @@ class GrammarLoader: | |||
| if name in rule_names: | |||
| raise GrammarError("Rule '%s' defined more than once" % name) | |||
| rule_names.add(name) | |||
| temp_names = set() | |||
| for name, _p, _x, _o in temp_defs: | |||
| if name.startswith('__'): | |||
| raise GrammarError('Names starting with double-underscore are reserved (Error at %s (template))' % name) | |||
| if name.startswith('_'): # TODO: rethink this decision (not the error msg) | |||
| raise GrammarError('Templates are always inline, they should not start with a underscore (Error ar %s)' % name) | |||
| if name in temp_names: | |||
| raise GrammarError("Template '%s' defined more than once" % name) | |||
| temp_names.add(name) | |||
| if name in rule_names: | |||
| raise GrammarError("Template '%s' conflicts with rule of same name" % name) | |||
| for name, expansions, _o in rules: | |||
| for sym in _find_used_symbols(expansions): | |||
| @@ -861,10 +941,28 @@ class GrammarLoader: | |||
| if sym not in terminal_names: | |||
| raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name)) | |||
| else: | |||
| if sym not in rule_names: | |||
| if sym not in rule_names and sym not in temp_names: # TODO: check that sym is actually used as template | |||
| raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) | |||
| return Grammar(rules, term_defs, ignore_names) | |||
| for name, params, expansions, _o in temp_defs: | |||
| for i, p in enumerate(params): | |||
| if p in rule_names: | |||
| raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) | |||
| if p in temp_names: | |||
| raise GrammarError("Template Parameter conflicts with template %s (in template %s)" % (p, name)) | |||
| if p in params[:i]: | |||
| raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name)) | |||
| for sym in _find_used_symbols(expansions): | |||
| if sym.type == 'TERMINAL': | |||
| if sym not in terminal_names: | |||
| raise GrammarError("Token '%s' used but not defined (in template %s)" % (sym, name)) | |||
| else: | |||
| if sym not in rule_names and sym not in temp_names and sym not in params: | |||
| raise GrammarError("Rule '%s' used but not defined (in template %s)" % (sym, name)) | |||
| # TODO: check that sym is actually used as template | |||
| # TODO: number of template arguments matches requirement | |||
| return Grammar(rules, term_defs, temp_defs, ignore_names) | |||