| @@ -1,7 +1,7 @@ | |||
| # -*- coding: utf-8 -*- | |||
| from types import ModuleType | |||
| from typing import ( | |||
| TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, | |||
| TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||
| Pattern as REPattern, | |||
| ) | |||
| from abc import abstractmethod, ABC | |||
| @@ -100,10 +100,22 @@ class Lexer(ABC): | |||
| lex: Callable[..., Iterator[Token]] | |||
| class LexerConf: | |||
| tokens: Collection[TerminalDef] | |||
| re_module: ModuleType | |||
| ignore: Collection[str] = () | |||
| postlex: Any =None | |||
| callbacks: Optional[Dict[str, _Callback]] = None | |||
| g_regex_flags: int = 0 | |||
| skip_validation: bool = False | |||
| use_bytes: bool = False | |||
| class TraditionalLexer(Lexer): | |||
| terminals: Collection[TerminalDef] | |||
| ignore_types: List[str] | |||
| newline_types: List[str] | |||
| ignore_types: FrozenSet[str] | |||
| newline_types: FrozenSet[str] | |||
| user_callbacks: Dict[str, _Callback] | |||
| callback: Dict[str, _Callback] | |||
| mres: List[Tuple[REPattern, Dict[int, str]]] | |||
| @@ -111,11 +123,7 @@ class TraditionalLexer(Lexer): | |||
| def __init__( | |||
| self, | |||
| terminals: Collection[TerminalDef], | |||
| re_: ModuleType, | |||
| ignore: Collection[str] = ..., | |||
| user_callbacks: Dict[str, _Callback] = ..., | |||
| g_regex_flags: int = ... | |||
| conf: LexerConf | |||
| ): | |||
| ... | |||
| @@ -128,6 +136,8 @@ class TraditionalLexer(Lexer): | |||
| def lex(self, stream: str) -> Iterator[Token]: | |||
| ... | |||
| def next_token(self, lex_state: Any) -> Token: | |||
| ... | |||
| class ContextualLexer(Lexer): | |||
| lexers: Dict[str, TraditionalLexer] | |||
| @@ -3,6 +3,7 @@ from .lexer import TerminalDef | |||
| ###{standalone | |||
| class LexerConf(Serialize): | |||
| __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes' | |||
| __serialize_namespace__ = TerminalDef, | |||
| @@ -19,11 +20,10 @@ class LexerConf(Serialize): | |||
| ###} | |||
| class ParserConf: | |||
| def __init__(self, rules, callbacks, start): | |||
| assert isinstance(start, list) | |||
| self.rules = rules | |||
| self.callbacks = callbacks | |||
| self.start = start | |||
| @@ -6,15 +6,19 @@ from .utils import STRING_TYPE, logger | |||
| class LarkError(Exception): | |||
| pass | |||
| class GrammarError(LarkError): | |||
| pass | |||
| class ParseError(LarkError): | |||
| pass | |||
| class LexError(LarkError): | |||
| pass | |||
| class UnexpectedEOF(ParseError): | |||
| def __init__(self, expected): | |||
| self.expected = expected | |||
| @@ -40,14 +40,12 @@ class Terminal(Symbol): | |||
| return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) | |||
| class NonTerminal(Symbol): | |||
| __serialize_fields__ = 'name', | |||
| is_term = False | |||
| class RuleOptions(Serialize): | |||
| __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | |||
| @@ -104,5 +102,4 @@ class Rule(Serialize): | |||
| return self.origin == other.origin and self.expansion == other.expansion | |||
| ###} | |||
| @@ -24,6 +24,7 @@ except ImportError: | |||
| ###{standalone | |||
| class LarkOptions(Serialize): | |||
| """Specifies the options for Lark | |||
| @@ -36,7 +37,7 @@ class LarkOptions(Serialize): | |||
| debug | |||
| Display debug information, such as warnings (default: False) | |||
| transformer | |||
| Applies the transformer to every parse tree (equivlent to applying it after the parse, but faster) | |||
| Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | |||
| propagate_positions | |||
| Propagates (line, column, end_line, end_column) attributes into all tree branches. | |||
| maybe_placeholders | |||
| @@ -320,7 +321,7 @@ class Lark(Serialize): | |||
| # Else, if the user asked to disable priorities, strip them from the | |||
| # rules. This allows the Earley parsers to skip an extra forest walk | |||
| # for improved performance, if you don't need them (or didn't specify any). | |||
| elif self.options.priority == None: | |||
| elif self.options.priority is None: | |||
| for rule in self.rules: | |||
| if rule.options.priority is not None: | |||
| rule.options.priority = None | |||
| @@ -360,7 +361,7 @@ class Lark(Serialize): | |||
| self.rules, | |||
| self.options.tree_class or Tree, | |||
| self.options.propagate_positions, | |||
| self.options.parser!='lalr' and self.options.ambiguity=='explicit', | |||
| self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', | |||
| self.options.maybe_placeholders | |||
| ) | |||
| self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | |||
| @@ -410,7 +411,7 @@ class Lark(Serialize): | |||
| data['parser'], | |||
| memo, | |||
| self._callbacks, | |||
| self.options, # Not all, but multiple attributes are used | |||
| self.options, # Not all, but multiple attributes are used | |||
| ) | |||
| self.terminals = self.parser.lexer_conf.tokens | |||
| self._terminals_dict = {t.name: t for t in self.terminals} | |||
| @@ -1,4 +1,4 @@ | |||
| ## Lexer Implementation | |||
| # Lexer Implementation | |||
| import re | |||
| @@ -8,6 +8,7 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | |||
| ###{standalone | |||
| from copy import copy | |||
| class Pattern(Serialize): | |||
| def __init__(self, value, flags=()): | |||
| @@ -20,6 +21,7 @@ class Pattern(Serialize): | |||
| # Pattern Hashing assumes all subclasses have a different priority! | |||
| def __hash__(self): | |||
| return hash((type(self), self.value, self.flags)) | |||
| def __eq__(self, other): | |||
| return type(self) == type(other) and self.value == other.value and self.flags == other.flags | |||
| @@ -53,6 +55,7 @@ class PatternStr(Pattern): | |||
| return len(self.value) | |||
| max_width = min_width | |||
| class PatternRE(Pattern): | |||
| __serialize_fields__ = 'value', 'flags', '_width' | |||
| @@ -70,6 +73,7 @@ class PatternRE(Pattern): | |||
| @property | |||
| def min_width(self): | |||
| return self._get_width()[0] | |||
| @property | |||
| def max_width(self): | |||
| return self._get_width()[1] | |||
| @@ -139,7 +143,7 @@ class Token(Str): | |||
| return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | |||
| def __reduce__(self): | |||
| return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, )) | |||
| return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column)) | |||
| def __repr__(self): | |||
| return 'Token(%r, %r)' % (self.type, self.value) | |||
| @@ -193,6 +197,7 @@ class UnlessCallback: | |||
| break | |||
| return t | |||
| class CallChain: | |||
| def __init__(self, callback1, callback2, cond): | |||
| self.callback1 = callback1 | |||
| @@ -204,16 +209,13 @@ class CallChain: | |||
| return self.callback2(t) if self.cond(t2) else t2 | |||
| def _create_unless(terminals, g_regex_flags, re_, use_bytes): | |||
| tokens_by_type = classify(terminals, lambda t: type(t.pattern)) | |||
| assert len(tokens_by_type) <= 2, tokens_by_type.keys() | |||
| embedded_strs = set() | |||
| callback = {} | |||
| for retok in tokens_by_type.get(PatternRE, []): | |||
| unless = [] # {} | |||
| unless = [] | |||
| for strtok in tokens_by_type.get(PatternStr, []): | |||
| if strtok.priority > retok.priority: | |||
| continue | |||
| @@ -245,13 +247,15 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes) | |||
| except AssertionError: # Yes, this is what Python provides us.. :/ | |||
| return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes) | |||
| mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) | |||
| mres.append((mre, {i: n for n, i in mre.groupindex.items()})) | |||
| terminals = terminals[max_size:] | |||
| return mres | |||
| def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False): | |||
| return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes) | |||
| def _regexp_has_newline(r): | |||
| r"""Expressions that may indicate newlines in a regexp: | |||
| - newlines (\n) | |||
| @@ -262,6 +266,7 @@ def _regexp_has_newline(r): | |||
| """ | |||
| return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | |||
| class Lexer(object): | |||
| """Lexer interface | |||
| @@ -300,7 +305,7 @@ class TraditionalLexer(Lexer): | |||
| self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) | |||
| self.ignore_types = frozenset(conf.ignore) | |||
| terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) | |||
| terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) | |||
| self.terminals = terminals | |||
| self.user_callbacks = conf.callbacks | |||
| self.g_regex_flags = conf.g_regex_flags | |||
| @@ -309,7 +314,7 @@ class TraditionalLexer(Lexer): | |||
| self._mres = None | |||
| def _build(self): | |||
| terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re, use_bytes=self.use_bytes) | |||
| terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) | |||
| assert all(self.callback.values()) | |||
| for type_, f in self.user_callbacks.items(): | |||
| @@ -333,7 +338,7 @@ class TraditionalLexer(Lexer): | |||
| if m: | |||
| return m.group(0), type_from_index[m.lastindex] | |||
| def lex(self, state, parser_state): | |||
| def lex(self, state, _parser_state): | |||
| with suppress(EOFError): | |||
| while True: | |||
| yield self.next_token(state) | |||
| @@ -372,6 +377,7 @@ class TraditionalLexer(Lexer): | |||
| # EOF | |||
| raise EOFError(self) | |||
| class LexerState: | |||
| __slots__ = 'text', 'line_ctr', 'last_token' | |||
| @@ -383,6 +389,7 @@ class LexerState: | |||
| def __copy__(self): | |||
| return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||
| class ContextualLexer(Lexer): | |||
| def __init__(self, conf, states, always_accept=()): | |||
| @@ -430,8 +437,9 @@ class ContextualLexer(Lexer): | |||
| token = self.root_lexer.next_token(lexer_state) | |||
| raise UnexpectedToken(token, e.allowed, state=parser_state.position) | |||
| class LexerThread: | |||
| "A thread that ties a lexer instance and a lexer state, to be used by the parser" | |||
| """A thread that ties a lexer instance and a lexer state, to be used by the parser""" | |||
| def __init__(self, lexer, text): | |||
| self.lexer = lexer | |||
| @@ -1,4 +1,4 @@ | |||
| "Parses and creates Grammar objects" | |||
| """Parses and creates Grammar objects""" | |||
| import os.path | |||
| import sys | |||
| @@ -166,6 +166,7 @@ RULES = { | |||
| 'literal': ['REGEXP', 'STRING'], | |||
| } | |||
| @inline_args | |||
| class EBNF_to_BNF(Transformer_InPlace): | |||
| def __init__(self): | |||
| @@ -259,9 +260,9 @@ class SimplifyRule_Visitor(Visitor): | |||
| for i, child in enumerate(tree.children): | |||
| if isinstance(child, Tree) and child.data == 'expansions': | |||
| tree.data = 'expansions' | |||
| tree.children = [self.visit(ST('expansion', [option if i==j else other | |||
| for j, other in enumerate(tree.children)])) | |||
| for option in dedup_list(child.children)] | |||
| tree.children = [self.visit(ST('expansion', [option if i == j else other | |||
| for j, other in enumerate(tree.children)])) | |||
| for option in dedup_list(child.children)] | |||
| self._flatten(tree) | |||
| break | |||
| @@ -284,8 +285,10 @@ class SimplifyRule_Visitor(Visitor): | |||
| class RuleTreeToText(Transformer): | |||
| def expansions(self, x): | |||
| return x | |||
| def expansion(self, symbols): | |||
| return symbols, None | |||
| def alias(self, x): | |||
| (expansion, _alias), alias = x | |||
| assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed | |||
| @@ -300,8 +303,9 @@ class CanonizeTree(Transformer_InPlace): | |||
| tokenmods, value = args | |||
| return tokenmods + [value] | |||
| class PrepareAnonTerminals(Transformer_InPlace): | |||
| "Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" | |||
| """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them""" | |||
| def __init__(self, terminals): | |||
| self.terminals = terminals | |||
| @@ -310,7 +314,6 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||
| self.i = 0 | |||
| self.rule_options = None | |||
| @inline_args | |||
| def pattern(self, p): | |||
| value = p.value | |||
| @@ -330,14 +333,14 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||
| except KeyError: | |||
| if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set: | |||
| with suppress(UnicodeEncodeError): | |||
| value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names | |||
| value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names | |||
| term_name = value.upper() | |||
| if term_name in self.term_set: | |||
| term_name = None | |||
| elif isinstance(p, PatternRE): | |||
| if p in self.term_reverse: # Kind of a weird placement.name | |||
| if p in self.term_reverse: # Kind of a weird placement.name | |||
| term_name = self.term_reverse[p].name | |||
| else: | |||
| assert False, p | |||
| @@ -359,7 +362,7 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||
| class _ReplaceSymbols(Transformer_InPlace): | |||
| " Helper for ApplyTemplates " | |||
| """Helper for ApplyTemplates""" | |||
| def __init__(self): | |||
| self.names = {} | |||
| @@ -374,8 +377,9 @@ class _ReplaceSymbols(Transformer_InPlace): | |||
| return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None) | |||
| return self.__default__('template_usage', c, None) | |||
| class ApplyTemplates(Transformer_InPlace): | |||
| " Apply the templates, creating new rules that represent the used templates " | |||
| """Apply the templates, creating new rules that represent the used templates""" | |||
| def __init__(self, rule_defs): | |||
| self.rule_defs = rule_defs | |||
| @@ -401,8 +405,6 @@ def _rfind(s, choices): | |||
| return max(s.rfind(c) for c in choices) | |||
| def _literal_to_pattern(literal): | |||
| v = literal.value | |||
| flag_start = _rfind(v, '/"')+1 | |||
| @@ -441,7 +443,7 @@ class PrepareLiterals(Transformer_InPlace): | |||
| assert start.type == end.type == 'STRING' | |||
| start = start.value[1:-1] | |||
| end = end.value[1:-1] | |||
| assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1, (start, end, len(eval_escaping(start)), len(eval_escaping(end))) | |||
| assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1 | |||
| regexp = '[%s-%s]' % (start, end) | |||
| return ST('pattern', [PatternRE(regexp)]) | |||
| @@ -460,6 +462,7 @@ def _make_joined_pattern(regexp, flags_set): | |||
| return PatternRE(regexp, flags) | |||
| class TerminalTreeToPattern(Transformer): | |||
| def pattern(self, ps): | |||
| p ,= ps | |||
| @@ -503,6 +506,7 @@ class TerminalTreeToPattern(Transformer): | |||
| def value(self, v): | |||
| return v[0] | |||
| class PrepareSymbols(Transformer_InPlace): | |||
| def value(self, v): | |||
| v ,= v | |||
| @@ -514,13 +518,16 @@ class PrepareSymbols(Transformer_InPlace): | |||
| return Terminal(Str(v.value), filter_out=v.startswith('_')) | |||
| assert False | |||
| def _choice_of_rules(rules): | |||
| return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) | |||
| def nr_deepcopy_tree(t): | |||
| "Deepcopy tree `t` without recursion" | |||
| """Deepcopy tree `t` without recursion""" | |||
| return Transformer_NonRecursive(False).transform(t) | |||
| class Grammar: | |||
| def __init__(self, rule_defs, term_defs, ignore): | |||
| self.term_defs = term_defs | |||
| @@ -547,7 +554,7 @@ class Grammar: | |||
| raise GrammarError("Terminals cannot be empty (%s)" % name) | |||
| transformer = PrepareLiterals() * TerminalTreeToPattern() | |||
| terminals = [TerminalDef(name, transformer.transform( term_tree ), priority) | |||
| terminals = [TerminalDef(name, transformer.transform(term_tree), priority) | |||
| for name, (term_tree, priority) in term_defs if term_tree] | |||
| # ================= | |||
| @@ -566,10 +573,10 @@ class Grammar: | |||
| ebnf_to_bnf = EBNF_to_BNF() | |||
| rules = [] | |||
| i = 0 | |||
| while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates | |||
| while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates | |||
| name, params, rule_tree, options = rule_defs[i] | |||
| i += 1 | |||
| if len(params) != 0: # Dont transform templates | |||
| if len(params) != 0: # Dont transform templates | |||
| continue | |||
| rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None | |||
| ebnf_to_bnf.rule_options = rule_options | |||
| @@ -594,7 +601,7 @@ class Grammar: | |||
| for i, (expansion, alias) in enumerate(expansions): | |||
| if alias and name.startswith('_'): | |||
| raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | |||
| raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias)) | |||
| empty_indices = [x==_EMPTY for x in expansion] | |||
| if any(empty_indices): | |||
| @@ -623,14 +630,13 @@ class Grammar: | |||
| # Remove duplicates | |||
| compiled_rules = list(set(compiled_rules)) | |||
| # Filter out unused rules | |||
| while True: | |||
| c = len(compiled_rules) | |||
| used_rules = {s for r in compiled_rules | |||
| for s in r.expansion | |||
| if isinstance(s, NonTerminal) | |||
| and s != r.origin} | |||
| for s in r.expansion | |||
| if isinstance(s, NonTerminal) | |||
| and s != r.origin} | |||
| used_rules |= {NonTerminal(s) for s in start} | |||
| compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) | |||
| for r in unused: | |||
| @@ -663,6 +669,7 @@ class PackageResource(object): | |||
| def __repr__(self): | |||
| return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path) | |||
| class FromPackageLoader(object): | |||
| """ | |||
| Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. | |||
| @@ -699,11 +706,12 @@ class FromPackageLoader(object): | |||
| return PackageResource(self.pkg_name, full_path), text.decode() | |||
| raise IOError() | |||
| stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||
| stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||
| _imported_grammars = {} | |||
| def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||
| """Returns all rules and terminals of grammar, prepended | |||
| with a 'namespace' prefix, except for those which are aliased. | |||
| @@ -724,8 +732,6 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||
| raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) | |||
| return _find_used_symbols(tree) - set(params) | |||
| def get_namespace_name(name, params): | |||
| if params is not None: | |||
| try: | |||
| @@ -746,19 +752,17 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||
| else: | |||
| assert symbol.type == 'RULE' | |||
| _, params, tree, options = imported_rules[symbol] | |||
| params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in params} | |||
| params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s') % (namespace, p) for p in params} | |||
| for t in tree.iter_subtrees(): | |||
| for i, c in enumerate(t.children): | |||
| if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): | |||
| t.children[i] = Token(c.type, get_namespace_name(c, params_map)) | |||
| params = [params_map[p] for p in params] # We can not rely on ordered dictionaries | |||
| params = [params_map[p] for p in params] # We can not rely on ordered dictionaries | |||
| rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) | |||
| return term_defs, rule_defs | |||
| def resolve_term_references(term_defs): | |||
| # TODO Solve with transitive closure (maybe) | |||
| @@ -798,7 +802,7 @@ def options_from_rule(name, params, *x): | |||
| else: | |||
| expansions ,= x | |||
| priority = None | |||
| params = [t.value for t in params.children] if params is not None else [] # For the grammar parser | |||
| params = [t.value for t in params.children] if params is not None else [] # For the grammar parser | |||
| keep_all_tokens = name.startswith('!') | |||
| name = name.lstrip('!') | |||
| @@ -812,10 +816,12 @@ def options_from_rule(name, params, *x): | |||
| def symbols_from_strcase(expansion): | |||
| return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] | |||
| @inline_args | |||
| class PrepareGrammar(Transformer_InPlace): | |||
| def terminal(self, name): | |||
| return name | |||
| def nonterminal(self, name): | |||
| return name | |||
| @@ -825,10 +831,11 @@ def _find_used_symbols(tree): | |||
| return {t for x in tree.find_data('expansion') | |||
| for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | |||
| class GrammarLoader: | |||
| ERRORS = [ | |||
| ('Unclosed parenthesis', ['a: (\n']), | |||
| ('Umatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), | |||
| ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), | |||
| ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), | |||
| ('Illegal name for rules or terminals', ['Aa:\n']), | |||
| ('Alias expects lowercase name', ['a: -> "a"\n']), | |||
| @@ -843,8 +850,9 @@ class GrammarLoader: | |||
| def __init__(self, global_keep_all_tokens): | |||
| terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | |||
| rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | |||
| rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] | |||
| rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | |||
| rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) | |||
| for r, _p, xs, o in rules for i, x in enumerate(xs)] | |||
| callback = ParseTreeBuilder(rules, ST).create_callback() | |||
| import re | |||
| lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) | |||
| @@ -881,10 +889,10 @@ class GrammarLoader: | |||
| return _imported_grammars[grammar_path] | |||
| def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]): | |||
| "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | |||
| """Parse grammar_text, verify, and create Grammar object. Display nice messages on error.""" | |||
| try: | |||
| tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') ) | |||
| tree = self.canonize_tree.transform(self.parser.parse(grammar_text+'\n')) | |||
| except UnexpectedCharacters as e: | |||
| context = e.get_context(grammar_text) | |||
| raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % | |||
| @@ -1037,7 +1045,7 @@ class GrammarLoader: | |||
| raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) | |||
| if len(args) != rule_names[sym]: | |||
| raise GrammarError("Wrong number of template arguments used for %s " | |||
| "(expected %s, got %s) (in rule %s)"%(sym, rule_names[sym], len(args), name)) | |||
| "(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name)) | |||
| for sym in _find_used_symbols(expansions): | |||
| if sym.type == 'TERMINAL': | |||
| if sym not in terminal_names: | |||
| @@ -1046,10 +1054,8 @@ class GrammarLoader: | |||
| if sym not in rule_names and sym not in params: | |||
| raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) | |||
| return Grammar(rules, term_defs, ignore_names) | |||
| def load_grammar(grammar, source, import_paths, global_keep_all_tokens): | |||
| return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) | |||
| @@ -1,7 +1,7 @@ | |||
| from .exceptions import GrammarError | |||
| from .lexer import Token | |||
| from .tree import Tree | |||
| from .visitors import InlineTransformer # XXX Deprecated | |||
| from .visitors import InlineTransformer # XXX Deprecated | |||
| from .visitors import Transformer_InPlace | |||
| from .visitors import _vargs_meta, _vargs_meta_inline | |||
| @@ -20,6 +20,7 @@ class ExpandSingleChild: | |||
| else: | |||
| return self.node_builder(children) | |||
| class PropagatePositions: | |||
| def __init__(self, node_builder): | |||
| self.node_builder = node_builder | |||
| @@ -87,8 +88,9 @@ class ChildFilter: | |||
| return self.node_builder(filtered) | |||
| class ChildFilterLALR(ChildFilter): | |||
| "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | |||
| """Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)""" | |||
| def __call__(self, children): | |||
| filtered = [] | |||
| @@ -108,6 +110,7 @@ class ChildFilterLALR(ChildFilter): | |||
| return self.node_builder(filtered) | |||
| class ChildFilterLALR_NoPlaceholders(ChildFilter): | |||
| "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | |||
| def __init__(self, to_include, node_builder): | |||
| @@ -126,9 +129,11 @@ class ChildFilterLALR_NoPlaceholders(ChildFilter): | |||
| filtered.append(children[i]) | |||
| return self.node_builder(filtered) | |||
| def _should_expand(sym): | |||
| return not sym.is_term and sym.name.startswith('_') | |||
| def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): | |||
| # Prepare empty_indices as: How many Nones to insert at each index? | |||
| if _empty_indices: | |||
| @@ -156,6 +161,7 @@ def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indi | |||
| # LALR without placeholders | |||
| return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) | |||
| class AmbiguousExpander: | |||
| """Deal with the case where we're expanding children ('_rule') into a parent but the children | |||
| are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself | |||
| @@ -167,10 +173,10 @@ class AmbiguousExpander: | |||
| self.to_expand = to_expand | |||
| def __call__(self, children): | |||
| def _is_ambig_tree(child): | |||
| return hasattr(child, 'data') and child.data == '_ambig' | |||
| def _is_ambig_tree(t): | |||
| return hasattr(t, 'data') and t.data == '_ambig' | |||
| #### When we're repeatedly expanding ambiguities we can end up with nested ambiguities. | |||
| # -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities. | |||
| # All children of an _ambig node should be a derivation of that ambig node, hence | |||
| # it is safe to assume that if we see an _ambig node nested within an ambig node | |||
| # it is safe to simply expand it into the parent _ambig node as an alternative derivation. | |||
| @@ -186,15 +192,17 @@ class AmbiguousExpander: | |||
| if not ambiguous: | |||
| return self.node_builder(children) | |||
| expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ] | |||
| expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)] | |||
| return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) | |||
| def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): | |||
| to_expand = [i for i, sym in enumerate(expansion) | |||
| if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] | |||
| if to_expand: | |||
| return partial(AmbiguousExpander, to_expand, tree_class) | |||
| class AmbiguousIntermediateExpander: | |||
| """ | |||
| Propagate ambiguous intermediate nodes and their derivations up to the | |||
| @@ -275,12 +283,14 @@ class AmbiguousIntermediateExpander: | |||
| return self.node_builder(children) | |||
| def ptb_inline_args(func): | |||
| @wraps(func) | |||
| def f(children): | |||
| return func(*children) | |||
| return f | |||
| def inplace_transformer(func): | |||
| @wraps(func) | |||
| def f(children): | |||
| @@ -289,9 +299,11 @@ def inplace_transformer(func): | |||
| return func(tree) | |||
| return f | |||
| def apply_visit_wrapper(func, name, wrapper): | |||
| if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: | |||
| raise NotImplementedError("Meta args not supported for internal transformer") | |||
| @wraps(func) | |||
| def f(children): | |||
| return wrapper(func, name, children, None) | |||
| @@ -323,7 +335,6 @@ class ParseTreeBuilder: | |||
| yield rule, wrapper_chain | |||
| def create_callback(self, transformer=None): | |||
| callbacks = {} | |||
| @@ -298,8 +298,8 @@ class Parser: | |||
| # this column. Find the item for the start_symbol, which is the root of the SPPF tree. | |||
| solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | |||
| if not solutions: | |||
| expected_tokens = [t.expect for t in to_scan] | |||
| raise UnexpectedEOF(expected_tokens) | |||
| expected_terminals = [t.expect for t in to_scan] | |||
| raise UnexpectedEOF(expected_terminals) | |||
| if self.debug: | |||
| from .earley_forest import ForestToPyDotVisitor | |||
| @@ -46,14 +46,14 @@ class Tree(object): | |||
| def _pretty(self, level, indent_str): | |||
| if len(self.children) == 1 and not isinstance(self.children[0], Tree): | |||
| return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] | |||
| return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] | |||
| l = [ indent_str*level, self._pretty_label(), '\n' ] | |||
| l = [indent_str*level, self._pretty_label(), '\n'] | |||
| for n in self.children: | |||
| if isinstance(n, Tree): | |||
| l += n._pretty(level+1, indent_str) | |||
| else: | |||
| l += [ indent_str*(level+1), '%s' % (n,), '\n' ] | |||
| l += [indent_str*(level+1), '%s' % (n,), '\n'] | |||
| return l | |||
| @@ -102,8 +102,8 @@ class Tree(object): | |||
| ###} | |||
| def expand_kids_by_index(self, *indices): | |||
| "Expand (inline) children at the given indices" | |||
| for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||
| """Expand (inline) children at the given indices""" | |||
| for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||
| kid = self.children[i] | |||
| self.children[i:i+1] = kid.children | |||
| @@ -144,12 +144,15 @@ class Tree(object): | |||
| @property | |||
| def line(self): | |||
| return self.meta.line | |||
| @property | |||
| def column(self): | |||
| return self.meta.column | |||
| @property | |||
| def end_line(self): | |||
| return self.meta.end_line | |||
| @property | |||
| def end_column(self): | |||
| return self.meta.end_column | |||
| @@ -168,6 +171,7 @@ def pydot__tree_to_dot(tree, filename, rankdir="LR", **kwargs): | |||
| graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | |||
| graph.write(filename) | |||
| def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||
| """Creates a colorful image that represents the tree (data+children, without meta) | |||
| @@ -196,7 +200,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||
| subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child) | |||
| for child in subtree.children] | |||
| node = pydot.Node(i[0], style="filled", fillcolor="#%x"%color, label=subtree.data) | |||
| node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data) | |||
| i[0] += 1 | |||
| graph.add_node(node) | |||
| @@ -1,10 +1,10 @@ | |||
| import sys | |||
| import os | |||
| from functools import reduce | |||
| from ast import literal_eval | |||
| from collections import deque | |||
| ###{standalone | |||
| import sys, re | |||
| import logging | |||
| logger = logging.getLogger("lark") | |||
| logger.addHandler(logging.StreamHandler()) | |||
| @@ -12,6 +12,8 @@ logger.addHandler(logging.StreamHandler()) | |||
| # By default, we should not output any log messages | |||
| logger.setLevel(logging.CRITICAL) | |||
| Py36 = (sys.version_info[:2] >= (3, 6)) | |||
| def classify(seq, key=None, value=None): | |||
| d = {} | |||
| @@ -27,7 +29,7 @@ def classify(seq, key=None, value=None): | |||
| def _deserialize(data, namespace, memo): | |||
| if isinstance(data, dict): | |||
| if '__type__' in data: # Object | |||
| if '__type__' in data: # Object | |||
| class_ = namespace[data['__type__']] | |||
| return class_.deserialize(data, memo) | |||
| elif '@' in data: | |||
| @@ -105,7 +107,6 @@ class SerializeMemoizer(Serialize): | |||
| return _deserialize(data, namespace, memo) | |||
| try: | |||
| STRING_TYPE = basestring | |||
| except NameError: # Python 3 | |||
| @@ -118,10 +119,11 @@ from contextlib import contextmanager | |||
| Str = type(u'') | |||
| try: | |||
| classtype = types.ClassType # Python2 | |||
| classtype = types.ClassType # Python2 | |||
| except AttributeError: | |||
| classtype = type # Python3 | |||
| def smart_decorator(f, create_decorator): | |||
| if isinstance(f, types.FunctionType): | |||
| return wraps(f)(create_decorator(f, True)) | |||
| @@ -139,17 +141,16 @@ def smart_decorator(f, create_decorator): | |||
| else: | |||
| return create_decorator(f.__func__.__call__, True) | |||
| try: | |||
| import regex | |||
| except ImportError: | |||
| regex = None | |||
| import sys, re | |||
| Py36 = (sys.version_info[:2] >= (3, 6)) | |||
| import sre_parse | |||
| import sre_constants | |||
| categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') | |||
| def get_regexp_width(expr): | |||
| if regex: | |||
| # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with | |||
| @@ -173,9 +174,7 @@ def dedup_list(l): | |||
| preserving the original order of the list. Assumes that | |||
| the list entries are hashable.""" | |||
| dedup = set() | |||
| return [ x for x in l if not (x in dedup or dedup.add(x))] | |||
| return [x for x in l if not (x in dedup or dedup.add(x))] | |||
| try: | |||
| @@ -197,8 +196,6 @@ except ImportError: | |||
| pass | |||
| try: | |||
| compare = cmp | |||
| except NameError: | |||
| @@ -210,7 +207,6 @@ except NameError: | |||
| return -1 | |||
| class Enumerator(Serialize): | |||
| def __init__(self): | |||
| self.enums = {} | |||
| @@ -8,6 +8,7 @@ from .lexer import Token | |||
| ###{standalone | |||
| from inspect import getmembers, getmro | |||
| class Discard(Exception): | |||
| """When raising the Discard exception in a transformer callback, | |||
| that node is discarded and won't appear in the parent. | |||
| @@ -16,6 +17,7 @@ class Discard(Exception): | |||
| # Transformers | |||
| class _Decoratable: | |||
| "Provides support for decorating methods with @v_args" | |||
| @@ -107,7 +109,6 @@ class Transformer(_Decoratable): | |||
| except Exception as e: | |||
| raise VisitError(token.type, token, e) | |||
| def _transform_children(self, children): | |||
| for c in children: | |||
| try: | |||
| @@ -148,7 +149,6 @@ class Transformer(_Decoratable): | |||
| return token | |||
| class InlineTransformer(Transformer): # XXX Deprecated | |||
| def _call_userfunc(self, tree, new_children=None): | |||
| # Assumes tree is already transformed | |||
| @@ -203,7 +203,7 @@ class Transformer_NonRecursive(Transformer): | |||
| q = [tree] | |||
| while q: | |||
| t = q.pop() | |||
| rev_postfix.append( t ) | |||
| rev_postfix.append(t) | |||
| if isinstance(t, Tree): | |||
| q += t.children | |||
| @@ -225,7 +225,6 @@ class Transformer_NonRecursive(Transformer): | |||
| return t | |||
| class Transformer_InPlaceRecursive(Transformer): | |||
| "Same as Transformer, recursive, but changes the tree in-place instead of returning new instances" | |||
| def _transform_tree(self, tree): | |||
| @@ -297,7 +296,6 @@ class Visitor_Recursive(VisitorBase): | |||
| return tree | |||
| def visit_children_decor(func): | |||
| "See Interpreter" | |||
| @wraps(func) | |||
| @@ -338,8 +336,6 @@ class Interpreter(_Decoratable): | |||
| return self.visit_children(tree) | |||
| # Decorators | |||
| def _apply_decorator(obj, decorator, **kwargs): | |||
| @@ -351,7 +347,6 @@ def _apply_decorator(obj, decorator, **kwargs): | |||
| return _apply(decorator, **kwargs) | |||
| def _inline_args__func(func): | |||
| @wraps(func) | |||
| def create_decorator(_f, with_self): | |||
| @@ -370,7 +365,6 @@ def inline_args(obj): # XXX Deprecated | |||
| return _apply_decorator(obj, _inline_args__func) | |||
| def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | |||
| def create_decorator(_f, with_self): | |||
| if with_self: | |||
| @@ -390,11 +384,11 @@ def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | |||
| return f | |||
| def _vargs_inline(f, data, children, meta): | |||
| def _vargs_inline(f, _data, children, _meta): | |||
| return f(*children) | |||
| def _vargs_meta_inline(f, data, children, meta): | |||
| def _vargs_meta_inline(f, _data, children, meta): | |||
| return f(meta, *children) | |||
| def _vargs_meta(f, data, children, meta): | |||
| def _vargs_meta(f, _data, children, meta): | |||
| return f(children, meta) # TODO swap these for consistency? Backwards incompatible! | |||
| def _vargs_tree(f, data, children, meta): | |||
| return f(Tree(data, children, meta)) | |||
| @@ -415,6 +409,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||
| inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). | |||
| meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first) | |||
| tree (bool, optional): Provides the entire tree as the argument, instead of the children. | |||
| wrapper (function, optional): Provide a function to decorate all methods. | |||
| Example: | |||
| :: | |||
| @@ -457,7 +452,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||
| ###} | |||
| #--- Visitor Utilities --- | |||
| # --- Visitor Utilities --- | |||
| class CollapseAmbiguities(Transformer): | |||
| """ | |||
| @@ -471,7 +466,9 @@ class CollapseAmbiguities(Transformer): | |||
| """ | |||
| def _ambig(self, options): | |||
| return sum(options, []) | |||
| def __default__(self, data, children_lists, meta): | |||
| return [Tree(data, children, meta) for children in combine_alternatives(children_lists)] | |||
| def __default_token__(self, t): | |||
| return [t] | |||