| @@ -1,7 +1,7 @@ | |||||
| # -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
| from types import ModuleType | from types import ModuleType | ||||
| from typing import ( | from typing import ( | ||||
| TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, | |||||
| TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||||
| Pattern as REPattern, | Pattern as REPattern, | ||||
| ) | ) | ||||
| from abc import abstractmethod, ABC | from abc import abstractmethod, ABC | ||||
| @@ -100,10 +100,22 @@ class Lexer(ABC): | |||||
| lex: Callable[..., Iterator[Token]] | lex: Callable[..., Iterator[Token]] | ||||
| class LexerConf: | |||||
| tokens: Collection[TerminalDef] | |||||
| re_module: ModuleType | |||||
| ignore: Collection[str] = () | |||||
| postlex: Any =None | |||||
| callbacks: Optional[Dict[str, _Callback]] = None | |||||
| g_regex_flags: int = 0 | |||||
| skip_validation: bool = False | |||||
| use_bytes: bool = False | |||||
| class TraditionalLexer(Lexer): | class TraditionalLexer(Lexer): | ||||
| terminals: Collection[TerminalDef] | terminals: Collection[TerminalDef] | ||||
| ignore_types: List[str] | |||||
| newline_types: List[str] | |||||
| ignore_types: FrozenSet[str] | |||||
| newline_types: FrozenSet[str] | |||||
| user_callbacks: Dict[str, _Callback] | user_callbacks: Dict[str, _Callback] | ||||
| callback: Dict[str, _Callback] | callback: Dict[str, _Callback] | ||||
| mres: List[Tuple[REPattern, Dict[int, str]]] | mres: List[Tuple[REPattern, Dict[int, str]]] | ||||
| @@ -111,11 +123,7 @@ class TraditionalLexer(Lexer): | |||||
| def __init__( | def __init__( | ||||
| self, | self, | ||||
| terminals: Collection[TerminalDef], | |||||
| re_: ModuleType, | |||||
| ignore: Collection[str] = ..., | |||||
| user_callbacks: Dict[str, _Callback] = ..., | |||||
| g_regex_flags: int = ... | |||||
| conf: LexerConf | |||||
| ): | ): | ||||
| ... | ... | ||||
| @@ -128,6 +136,8 @@ class TraditionalLexer(Lexer): | |||||
| def lex(self, stream: str) -> Iterator[Token]: | def lex(self, stream: str) -> Iterator[Token]: | ||||
| ... | ... | ||||
| def next_token(self, lex_state: Any) -> Token: | |||||
| ... | |||||
| class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
| lexers: Dict[str, TraditionalLexer] | lexers: Dict[str, TraditionalLexer] | ||||
| @@ -3,6 +3,7 @@ from .lexer import TerminalDef | |||||
| ###{standalone | ###{standalone | ||||
| class LexerConf(Serialize): | class LexerConf(Serialize): | ||||
| __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes' | __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes' | ||||
| __serialize_namespace__ = TerminalDef, | __serialize_namespace__ = TerminalDef, | ||||
| @@ -19,11 +20,10 @@ class LexerConf(Serialize): | |||||
| ###} | ###} | ||||
| class ParserConf: | class ParserConf: | ||||
| def __init__(self, rules, callbacks, start): | def __init__(self, rules, callbacks, start): | ||||
| assert isinstance(start, list) | assert isinstance(start, list) | ||||
| self.rules = rules | self.rules = rules | ||||
| self.callbacks = callbacks | self.callbacks = callbacks | ||||
| self.start = start | self.start = start | ||||
| @@ -6,15 +6,19 @@ from .utils import STRING_TYPE, logger | |||||
| class LarkError(Exception): | class LarkError(Exception): | ||||
| pass | pass | ||||
| class GrammarError(LarkError): | class GrammarError(LarkError): | ||||
| pass | pass | ||||
| class ParseError(LarkError): | class ParseError(LarkError): | ||||
| pass | pass | ||||
| class LexError(LarkError): | class LexError(LarkError): | ||||
| pass | pass | ||||
| class UnexpectedEOF(ParseError): | class UnexpectedEOF(ParseError): | ||||
| def __init__(self, expected): | def __init__(self, expected): | ||||
| self.expected = expected | self.expected = expected | ||||
| @@ -40,14 +40,12 @@ class Terminal(Symbol): | |||||
| return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) | return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) | ||||
| class NonTerminal(Symbol): | class NonTerminal(Symbol): | ||||
| __serialize_fields__ = 'name', | __serialize_fields__ = 'name', | ||||
| is_term = False | is_term = False | ||||
| class RuleOptions(Serialize): | class RuleOptions(Serialize): | ||||
| __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | ||||
| @@ -104,5 +102,4 @@ class Rule(Serialize): | |||||
| return self.origin == other.origin and self.expansion == other.expansion | return self.origin == other.origin and self.expansion == other.expansion | ||||
| ###} | ###} | ||||
| @@ -24,6 +24,7 @@ except ImportError: | |||||
| ###{standalone | ###{standalone | ||||
| class LarkOptions(Serialize): | class LarkOptions(Serialize): | ||||
| """Specifies the options for Lark | """Specifies the options for Lark | ||||
| @@ -36,7 +37,7 @@ class LarkOptions(Serialize): | |||||
| debug | debug | ||||
| Display debug information, such as warnings (default: False) | Display debug information, such as warnings (default: False) | ||||
| transformer | transformer | ||||
| Applies the transformer to every parse tree (equivlent to applying it after the parse, but faster) | |||||
| Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | |||||
| propagate_positions | propagate_positions | ||||
| Propagates (line, column, end_line, end_column) attributes into all tree branches. | Propagates (line, column, end_line, end_column) attributes into all tree branches. | ||||
| maybe_placeholders | maybe_placeholders | ||||
| @@ -320,7 +321,7 @@ class Lark(Serialize): | |||||
| # Else, if the user asked to disable priorities, strip them from the | # Else, if the user asked to disable priorities, strip them from the | ||||
| # rules. This allows the Earley parsers to skip an extra forest walk | # rules. This allows the Earley parsers to skip an extra forest walk | ||||
| # for improved performance, if you don't need them (or didn't specify any). | # for improved performance, if you don't need them (or didn't specify any). | ||||
| elif self.options.priority == None: | |||||
| elif self.options.priority is None: | |||||
| for rule in self.rules: | for rule in self.rules: | ||||
| if rule.options.priority is not None: | if rule.options.priority is not None: | ||||
| rule.options.priority = None | rule.options.priority = None | ||||
| @@ -360,7 +361,7 @@ class Lark(Serialize): | |||||
| self.rules, | self.rules, | ||||
| self.options.tree_class or Tree, | self.options.tree_class or Tree, | ||||
| self.options.propagate_positions, | self.options.propagate_positions, | ||||
| self.options.parser!='lalr' and self.options.ambiguity=='explicit', | |||||
| self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', | |||||
| self.options.maybe_placeholders | self.options.maybe_placeholders | ||||
| ) | ) | ||||
| self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | ||||
| @@ -410,7 +411,7 @@ class Lark(Serialize): | |||||
| data['parser'], | data['parser'], | ||||
| memo, | memo, | ||||
| self._callbacks, | self._callbacks, | ||||
| self.options, # Not all, but multiple attributes are used | |||||
| self.options, # Not all, but multiple attributes are used | |||||
| ) | ) | ||||
| self.terminals = self.parser.lexer_conf.tokens | self.terminals = self.parser.lexer_conf.tokens | ||||
| self._terminals_dict = {t.name: t for t in self.terminals} | self._terminals_dict = {t.name: t for t in self.terminals} | ||||
| @@ -1,4 +1,4 @@ | |||||
| ## Lexer Implementation | |||||
| # Lexer Implementation | |||||
| import re | import re | ||||
| @@ -8,6 +8,7 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | |||||
| ###{standalone | ###{standalone | ||||
| from copy import copy | from copy import copy | ||||
| class Pattern(Serialize): | class Pattern(Serialize): | ||||
| def __init__(self, value, flags=()): | def __init__(self, value, flags=()): | ||||
| @@ -20,6 +21,7 @@ class Pattern(Serialize): | |||||
| # Pattern Hashing assumes all subclasses have a different priority! | # Pattern Hashing assumes all subclasses have a different priority! | ||||
| def __hash__(self): | def __hash__(self): | ||||
| return hash((type(self), self.value, self.flags)) | return hash((type(self), self.value, self.flags)) | ||||
| def __eq__(self, other): | def __eq__(self, other): | ||||
| return type(self) == type(other) and self.value == other.value and self.flags == other.flags | return type(self) == type(other) and self.value == other.value and self.flags == other.flags | ||||
| @@ -53,6 +55,7 @@ class PatternStr(Pattern): | |||||
| return len(self.value) | return len(self.value) | ||||
| max_width = min_width | max_width = min_width | ||||
| class PatternRE(Pattern): | class PatternRE(Pattern): | ||||
| __serialize_fields__ = 'value', 'flags', '_width' | __serialize_fields__ = 'value', 'flags', '_width' | ||||
| @@ -70,6 +73,7 @@ class PatternRE(Pattern): | |||||
| @property | @property | ||||
| def min_width(self): | def min_width(self): | ||||
| return self._get_width()[0] | return self._get_width()[0] | ||||
| @property | @property | ||||
| def max_width(self): | def max_width(self): | ||||
| return self._get_width()[1] | return self._get_width()[1] | ||||
| @@ -139,7 +143,7 @@ class Token(Str): | |||||
| return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | ||||
| def __reduce__(self): | def __reduce__(self): | ||||
| return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, )) | |||||
| return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column)) | |||||
| def __repr__(self): | def __repr__(self): | ||||
| return 'Token(%r, %r)' % (self.type, self.value) | return 'Token(%r, %r)' % (self.type, self.value) | ||||
| @@ -193,6 +197,7 @@ class UnlessCallback: | |||||
| break | break | ||||
| return t | return t | ||||
| class CallChain: | class CallChain: | ||||
| def __init__(self, callback1, callback2, cond): | def __init__(self, callback1, callback2, cond): | ||||
| self.callback1 = callback1 | self.callback1 = callback1 | ||||
| @@ -204,16 +209,13 @@ class CallChain: | |||||
| return self.callback2(t) if self.cond(t2) else t2 | return self.callback2(t) if self.cond(t2) else t2 | ||||
| def _create_unless(terminals, g_regex_flags, re_, use_bytes): | def _create_unless(terminals, g_regex_flags, re_, use_bytes): | ||||
| tokens_by_type = classify(terminals, lambda t: type(t.pattern)) | tokens_by_type = classify(terminals, lambda t: type(t.pattern)) | ||||
| assert len(tokens_by_type) <= 2, tokens_by_type.keys() | assert len(tokens_by_type) <= 2, tokens_by_type.keys() | ||||
| embedded_strs = set() | embedded_strs = set() | ||||
| callback = {} | callback = {} | ||||
| for retok in tokens_by_type.get(PatternRE, []): | for retok in tokens_by_type.get(PatternRE, []): | ||||
| unless = [] # {} | |||||
| unless = [] | |||||
| for strtok in tokens_by_type.get(PatternStr, []): | for strtok in tokens_by_type.get(PatternStr, []): | ||||
| if strtok.priority > retok.priority: | if strtok.priority > retok.priority: | ||||
| continue | continue | ||||
| @@ -245,13 +247,15 @@ def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes) | |||||
| except AssertionError: # Yes, this is what Python provides us.. :/ | except AssertionError: # Yes, this is what Python provides us.. :/ | ||||
| return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes) | return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes) | ||||
| mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) | |||||
| mres.append((mre, {i: n for n, i in mre.groupindex.items()})) | |||||
| terminals = terminals[max_size:] | terminals = terminals[max_size:] | ||||
| return mres | return mres | ||||
| def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False): | def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False): | ||||
| return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes) | return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes) | ||||
| def _regexp_has_newline(r): | def _regexp_has_newline(r): | ||||
| r"""Expressions that may indicate newlines in a regexp: | r"""Expressions that may indicate newlines in a regexp: | ||||
| - newlines (\n) | - newlines (\n) | ||||
| @@ -262,6 +266,7 @@ def _regexp_has_newline(r): | |||||
| """ | """ | ||||
| return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | ||||
| class Lexer(object): | class Lexer(object): | ||||
| """Lexer interface | """Lexer interface | ||||
| @@ -300,7 +305,7 @@ class TraditionalLexer(Lexer): | |||||
| self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) | self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) | ||||
| self.ignore_types = frozenset(conf.ignore) | self.ignore_types = frozenset(conf.ignore) | ||||
| terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) | |||||
| terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) | |||||
| self.terminals = terminals | self.terminals = terminals | ||||
| self.user_callbacks = conf.callbacks | self.user_callbacks = conf.callbacks | ||||
| self.g_regex_flags = conf.g_regex_flags | self.g_regex_flags = conf.g_regex_flags | ||||
| @@ -309,7 +314,7 @@ class TraditionalLexer(Lexer): | |||||
| self._mres = None | self._mres = None | ||||
| def _build(self): | def _build(self): | ||||
| terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re, use_bytes=self.use_bytes) | |||||
| terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) | |||||
| assert all(self.callback.values()) | assert all(self.callback.values()) | ||||
| for type_, f in self.user_callbacks.items(): | for type_, f in self.user_callbacks.items(): | ||||
| @@ -333,7 +338,7 @@ class TraditionalLexer(Lexer): | |||||
| if m: | if m: | ||||
| return m.group(0), type_from_index[m.lastindex] | return m.group(0), type_from_index[m.lastindex] | ||||
| def lex(self, state, parser_state): | |||||
| def lex(self, state, _parser_state): | |||||
| with suppress(EOFError): | with suppress(EOFError): | ||||
| while True: | while True: | ||||
| yield self.next_token(state) | yield self.next_token(state) | ||||
| @@ -372,6 +377,7 @@ class TraditionalLexer(Lexer): | |||||
| # EOF | # EOF | ||||
| raise EOFError(self) | raise EOFError(self) | ||||
| class LexerState: | class LexerState: | ||||
| __slots__ = 'text', 'line_ctr', 'last_token' | __slots__ = 'text', 'line_ctr', 'last_token' | ||||
| @@ -383,6 +389,7 @@ class LexerState: | |||||
| def __copy__(self): | def __copy__(self): | ||||
| return type(self)(self.text, copy(self.line_ctr), self.last_token) | return type(self)(self.text, copy(self.line_ctr), self.last_token) | ||||
| class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
| def __init__(self, conf, states, always_accept=()): | def __init__(self, conf, states, always_accept=()): | ||||
| @@ -430,8 +437,9 @@ class ContextualLexer(Lexer): | |||||
| token = self.root_lexer.next_token(lexer_state) | token = self.root_lexer.next_token(lexer_state) | ||||
| raise UnexpectedToken(token, e.allowed, state=parser_state.position) | raise UnexpectedToken(token, e.allowed, state=parser_state.position) | ||||
| class LexerThread: | class LexerThread: | ||||
| "A thread that ties a lexer instance and a lexer state, to be used by the parser" | |||||
| """A thread that ties a lexer instance and a lexer state, to be used by the parser""" | |||||
| def __init__(self, lexer, text): | def __init__(self, lexer, text): | ||||
| self.lexer = lexer | self.lexer = lexer | ||||
| @@ -1,4 +1,4 @@ | |||||
| "Parses and creates Grammar objects" | |||||
| """Parses and creates Grammar objects""" | |||||
| import os.path | import os.path | ||||
| import sys | import sys | ||||
| @@ -166,6 +166,7 @@ RULES = { | |||||
| 'literal': ['REGEXP', 'STRING'], | 'literal': ['REGEXP', 'STRING'], | ||||
| } | } | ||||
| @inline_args | @inline_args | ||||
| class EBNF_to_BNF(Transformer_InPlace): | class EBNF_to_BNF(Transformer_InPlace): | ||||
| def __init__(self): | def __init__(self): | ||||
| @@ -259,9 +260,9 @@ class SimplifyRule_Visitor(Visitor): | |||||
| for i, child in enumerate(tree.children): | for i, child in enumerate(tree.children): | ||||
| if isinstance(child, Tree) and child.data == 'expansions': | if isinstance(child, Tree) and child.data == 'expansions': | ||||
| tree.data = 'expansions' | tree.data = 'expansions' | ||||
| tree.children = [self.visit(ST('expansion', [option if i==j else other | |||||
| for j, other in enumerate(tree.children)])) | |||||
| for option in dedup_list(child.children)] | |||||
| tree.children = [self.visit(ST('expansion', [option if i == j else other | |||||
| for j, other in enumerate(tree.children)])) | |||||
| for option in dedup_list(child.children)] | |||||
| self._flatten(tree) | self._flatten(tree) | ||||
| break | break | ||||
| @@ -284,8 +285,10 @@ class SimplifyRule_Visitor(Visitor): | |||||
| class RuleTreeToText(Transformer): | class RuleTreeToText(Transformer): | ||||
| def expansions(self, x): | def expansions(self, x): | ||||
| return x | return x | ||||
| def expansion(self, symbols): | def expansion(self, symbols): | ||||
| return symbols, None | return symbols, None | ||||
| def alias(self, x): | def alias(self, x): | ||||
| (expansion, _alias), alias = x | (expansion, _alias), alias = x | ||||
| assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed | assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed | ||||
| @@ -300,8 +303,9 @@ class CanonizeTree(Transformer_InPlace): | |||||
| tokenmods, value = args | tokenmods, value = args | ||||
| return tokenmods + [value] | return tokenmods + [value] | ||||
| class PrepareAnonTerminals(Transformer_InPlace): | class PrepareAnonTerminals(Transformer_InPlace): | ||||
| "Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" | |||||
| """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them""" | |||||
| def __init__(self, terminals): | def __init__(self, terminals): | ||||
| self.terminals = terminals | self.terminals = terminals | ||||
| @@ -310,7 +314,6 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||||
| self.i = 0 | self.i = 0 | ||||
| self.rule_options = None | self.rule_options = None | ||||
| @inline_args | @inline_args | ||||
| def pattern(self, p): | def pattern(self, p): | ||||
| value = p.value | value = p.value | ||||
| @@ -330,14 +333,14 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||||
| except KeyError: | except KeyError: | ||||
| if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set: | if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set: | ||||
| with suppress(UnicodeEncodeError): | with suppress(UnicodeEncodeError): | ||||
| value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names | |||||
| value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names | |||||
| term_name = value.upper() | term_name = value.upper() | ||||
| if term_name in self.term_set: | if term_name in self.term_set: | ||||
| term_name = None | term_name = None | ||||
| elif isinstance(p, PatternRE): | elif isinstance(p, PatternRE): | ||||
| if p in self.term_reverse: # Kind of a weird placement.name | |||||
| if p in self.term_reverse: # Kind of a weird placement.name | |||||
| term_name = self.term_reverse[p].name | term_name = self.term_reverse[p].name | ||||
| else: | else: | ||||
| assert False, p | assert False, p | ||||
| @@ -359,7 +362,7 @@ class PrepareAnonTerminals(Transformer_InPlace): | |||||
| class _ReplaceSymbols(Transformer_InPlace): | class _ReplaceSymbols(Transformer_InPlace): | ||||
| " Helper for ApplyTemplates " | |||||
| """Helper for ApplyTemplates""" | |||||
| def __init__(self): | def __init__(self): | ||||
| self.names = {} | self.names = {} | ||||
| @@ -374,8 +377,9 @@ class _ReplaceSymbols(Transformer_InPlace): | |||||
| return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None) | return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None) | ||||
| return self.__default__('template_usage', c, None) | return self.__default__('template_usage', c, None) | ||||
| class ApplyTemplates(Transformer_InPlace): | class ApplyTemplates(Transformer_InPlace): | ||||
| " Apply the templates, creating new rules that represent the used templates " | |||||
| """Apply the templates, creating new rules that represent the used templates""" | |||||
| def __init__(self, rule_defs): | def __init__(self, rule_defs): | ||||
| self.rule_defs = rule_defs | self.rule_defs = rule_defs | ||||
| @@ -401,8 +405,6 @@ def _rfind(s, choices): | |||||
| return max(s.rfind(c) for c in choices) | return max(s.rfind(c) for c in choices) | ||||
| def _literal_to_pattern(literal): | def _literal_to_pattern(literal): | ||||
| v = literal.value | v = literal.value | ||||
| flag_start = _rfind(v, '/"')+1 | flag_start = _rfind(v, '/"')+1 | ||||
| @@ -441,7 +443,7 @@ class PrepareLiterals(Transformer_InPlace): | |||||
| assert start.type == end.type == 'STRING' | assert start.type == end.type == 'STRING' | ||||
| start = start.value[1:-1] | start = start.value[1:-1] | ||||
| end = end.value[1:-1] | end = end.value[1:-1] | ||||
| assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1, (start, end, len(eval_escaping(start)), len(eval_escaping(end))) | |||||
| assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1 | |||||
| regexp = '[%s-%s]' % (start, end) | regexp = '[%s-%s]' % (start, end) | ||||
| return ST('pattern', [PatternRE(regexp)]) | return ST('pattern', [PatternRE(regexp)]) | ||||
| @@ -460,6 +462,7 @@ def _make_joined_pattern(regexp, flags_set): | |||||
| return PatternRE(regexp, flags) | return PatternRE(regexp, flags) | ||||
| class TerminalTreeToPattern(Transformer): | class TerminalTreeToPattern(Transformer): | ||||
| def pattern(self, ps): | def pattern(self, ps): | ||||
| p ,= ps | p ,= ps | ||||
| @@ -503,6 +506,7 @@ class TerminalTreeToPattern(Transformer): | |||||
| def value(self, v): | def value(self, v): | ||||
| return v[0] | return v[0] | ||||
| class PrepareSymbols(Transformer_InPlace): | class PrepareSymbols(Transformer_InPlace): | ||||
| def value(self, v): | def value(self, v): | ||||
| v ,= v | v ,= v | ||||
| @@ -514,13 +518,16 @@ class PrepareSymbols(Transformer_InPlace): | |||||
| return Terminal(Str(v.value), filter_out=v.startswith('_')) | return Terminal(Str(v.value), filter_out=v.startswith('_')) | ||||
| assert False | assert False | ||||
| def _choice_of_rules(rules): | def _choice_of_rules(rules): | ||||
| return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) | return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) | ||||
| def nr_deepcopy_tree(t): | def nr_deepcopy_tree(t): | ||||
| "Deepcopy tree `t` without recursion" | |||||
| """Deepcopy tree `t` without recursion""" | |||||
| return Transformer_NonRecursive(False).transform(t) | return Transformer_NonRecursive(False).transform(t) | ||||
| class Grammar: | class Grammar: | ||||
| def __init__(self, rule_defs, term_defs, ignore): | def __init__(self, rule_defs, term_defs, ignore): | ||||
| self.term_defs = term_defs | self.term_defs = term_defs | ||||
| @@ -547,7 +554,7 @@ class Grammar: | |||||
| raise GrammarError("Terminals cannot be empty (%s)" % name) | raise GrammarError("Terminals cannot be empty (%s)" % name) | ||||
| transformer = PrepareLiterals() * TerminalTreeToPattern() | transformer = PrepareLiterals() * TerminalTreeToPattern() | ||||
| terminals = [TerminalDef(name, transformer.transform( term_tree ), priority) | |||||
| terminals = [TerminalDef(name, transformer.transform(term_tree), priority) | |||||
| for name, (term_tree, priority) in term_defs if term_tree] | for name, (term_tree, priority) in term_defs if term_tree] | ||||
| # ================= | # ================= | ||||
| @@ -566,10 +573,10 @@ class Grammar: | |||||
| ebnf_to_bnf = EBNF_to_BNF() | ebnf_to_bnf = EBNF_to_BNF() | ||||
| rules = [] | rules = [] | ||||
| i = 0 | i = 0 | ||||
| while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates | |||||
| while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates | |||||
| name, params, rule_tree, options = rule_defs[i] | name, params, rule_tree, options = rule_defs[i] | ||||
| i += 1 | i += 1 | ||||
| if len(params) != 0: # Dont transform templates | |||||
| if len(params) != 0: # Dont transform templates | |||||
| continue | continue | ||||
| rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None | rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None | ||||
| ebnf_to_bnf.rule_options = rule_options | ebnf_to_bnf.rule_options = rule_options | ||||
| @@ -594,7 +601,7 @@ class Grammar: | |||||
| for i, (expansion, alias) in enumerate(expansions): | for i, (expansion, alias) in enumerate(expansions): | ||||
| if alias and name.startswith('_'): | if alias and name.startswith('_'): | ||||
| raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | |||||
| raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias)) | |||||
| empty_indices = [x==_EMPTY for x in expansion] | empty_indices = [x==_EMPTY for x in expansion] | ||||
| if any(empty_indices): | if any(empty_indices): | ||||
| @@ -623,14 +630,13 @@ class Grammar: | |||||
| # Remove duplicates | # Remove duplicates | ||||
| compiled_rules = list(set(compiled_rules)) | compiled_rules = list(set(compiled_rules)) | ||||
| # Filter out unused rules | # Filter out unused rules | ||||
| while True: | while True: | ||||
| c = len(compiled_rules) | c = len(compiled_rules) | ||||
| used_rules = {s for r in compiled_rules | used_rules = {s for r in compiled_rules | ||||
| for s in r.expansion | |||||
| if isinstance(s, NonTerminal) | |||||
| and s != r.origin} | |||||
| for s in r.expansion | |||||
| if isinstance(s, NonTerminal) | |||||
| and s != r.origin} | |||||
| used_rules |= {NonTerminal(s) for s in start} | used_rules |= {NonTerminal(s) for s in start} | ||||
| compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) | compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules) | ||||
| for r in unused: | for r in unused: | ||||
| @@ -663,6 +669,7 @@ class PackageResource(object): | |||||
| def __repr__(self): | def __repr__(self): | ||||
| return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path) | return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.path) | ||||
| class FromPackageLoader(object): | class FromPackageLoader(object): | ||||
| """ | """ | ||||
| Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. | Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`. | ||||
| @@ -699,11 +706,12 @@ class FromPackageLoader(object): | |||||
| return PackageResource(self.pkg_name, full_path), text.decode() | return PackageResource(self.pkg_name, full_path), text.decode() | ||||
| raise IOError() | raise IOError() | ||||
| stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||||
| stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||||
| _imported_grammars = {} | _imported_grammars = {} | ||||
| def import_from_grammar_into_namespace(grammar, namespace, aliases): | def import_from_grammar_into_namespace(grammar, namespace, aliases): | ||||
| """Returns all rules and terminals of grammar, prepended | """Returns all rules and terminals of grammar, prepended | ||||
| with a 'namespace' prefix, except for those which are aliased. | with a 'namespace' prefix, except for those which are aliased. | ||||
| @@ -724,8 +732,6 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||||
| raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) | raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) | ||||
| return _find_used_symbols(tree) - set(params) | return _find_used_symbols(tree) - set(params) | ||||
| def get_namespace_name(name, params): | def get_namespace_name(name, params): | ||||
| if params is not None: | if params is not None: | ||||
| try: | try: | ||||
| @@ -746,19 +752,17 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||||
| else: | else: | ||||
| assert symbol.type == 'RULE' | assert symbol.type == 'RULE' | ||||
| _, params, tree, options = imported_rules[symbol] | _, params, tree, options = imported_rules[symbol] | ||||
| params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in params} | |||||
| params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s') % (namespace, p) for p in params} | |||||
| for t in tree.iter_subtrees(): | for t in tree.iter_subtrees(): | ||||
| for i, c in enumerate(t.children): | for i, c in enumerate(t.children): | ||||
| if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): | if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): | ||||
| t.children[i] = Token(c.type, get_namespace_name(c, params_map)) | t.children[i] = Token(c.type, get_namespace_name(c, params_map)) | ||||
| params = [params_map[p] for p in params] # We can not rely on ordered dictionaries | |||||
| params = [params_map[p] for p in params] # We can not rely on ordered dictionaries | |||||
| rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) | rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) | ||||
| return term_defs, rule_defs | return term_defs, rule_defs | ||||
| def resolve_term_references(term_defs): | def resolve_term_references(term_defs): | ||||
| # TODO Solve with transitive closure (maybe) | # TODO Solve with transitive closure (maybe) | ||||
| @@ -798,7 +802,7 @@ def options_from_rule(name, params, *x): | |||||
| else: | else: | ||||
| expansions ,= x | expansions ,= x | ||||
| priority = None | priority = None | ||||
| params = [t.value for t in params.children] if params is not None else [] # For the grammar parser | |||||
| params = [t.value for t in params.children] if params is not None else [] # For the grammar parser | |||||
| keep_all_tokens = name.startswith('!') | keep_all_tokens = name.startswith('!') | ||||
| name = name.lstrip('!') | name = name.lstrip('!') | ||||
| @@ -812,10 +816,12 @@ def options_from_rule(name, params, *x): | |||||
| def symbols_from_strcase(expansion): | def symbols_from_strcase(expansion): | ||||
| return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] | return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] | ||||
| @inline_args | @inline_args | ||||
| class PrepareGrammar(Transformer_InPlace): | class PrepareGrammar(Transformer_InPlace): | ||||
| def terminal(self, name): | def terminal(self, name): | ||||
| return name | return name | ||||
| def nonterminal(self, name): | def nonterminal(self, name): | ||||
| return name | return name | ||||
| @@ -825,10 +831,11 @@ def _find_used_symbols(tree): | |||||
| return {t for x in tree.find_data('expansion') | return {t for x in tree.find_data('expansion') | ||||
| for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | ||||
| class GrammarLoader: | class GrammarLoader: | ||||
| ERRORS = [ | ERRORS = [ | ||||
| ('Unclosed parenthesis', ['a: (\n']), | ('Unclosed parenthesis', ['a: (\n']), | ||||
| ('Umatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), | |||||
| ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']), | |||||
| ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), | ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']), | ||||
| ('Illegal name for rules or terminals', ['Aa:\n']), | ('Illegal name for rules or terminals', ['Aa:\n']), | ||||
| ('Alias expects lowercase name', ['a: -> "a"\n']), | ('Alias expects lowercase name', ['a: -> "a"\n']), | ||||
| @@ -843,8 +850,9 @@ class GrammarLoader: | |||||
| def __init__(self, global_keep_all_tokens): | def __init__(self, global_keep_all_tokens): | ||||
| terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | ||||
| rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | |||||
| rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] | |||||
| rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | |||||
| rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) | |||||
| for r, _p, xs, o in rules for i, x in enumerate(xs)] | |||||
| callback = ParseTreeBuilder(rules, ST).create_callback() | callback = ParseTreeBuilder(rules, ST).create_callback() | ||||
| import re | import re | ||||
| lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) | lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT']) | ||||
| @@ -881,10 +889,10 @@ class GrammarLoader: | |||||
| return _imported_grammars[grammar_path] | return _imported_grammars[grammar_path] | ||||
| def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]): | def load_grammar(self, grammar_text, grammar_name='<?>', import_paths=[]): | ||||
| "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | |||||
| """Parse grammar_text, verify, and create Grammar object. Display nice messages on error.""" | |||||
| try: | try: | ||||
| tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') ) | |||||
| tree = self.canonize_tree.transform(self.parser.parse(grammar_text+'\n')) | |||||
| except UnexpectedCharacters as e: | except UnexpectedCharacters as e: | ||||
| context = e.get_context(grammar_text) | context = e.get_context(grammar_text) | ||||
| raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % | raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" % | ||||
| @@ -1037,7 +1045,7 @@ class GrammarLoader: | |||||
| raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) | raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) | ||||
| if len(args) != rule_names[sym]: | if len(args) != rule_names[sym]: | ||||
| raise GrammarError("Wrong number of template arguments used for %s " | raise GrammarError("Wrong number of template arguments used for %s " | ||||
| "(expected %s, got %s) (in rule %s)"%(sym, rule_names[sym], len(args), name)) | |||||
| "(expected %s, got %s) (in rule %s)" % (sym, rule_names[sym], len(args), name)) | |||||
| for sym in _find_used_symbols(expansions): | for sym in _find_used_symbols(expansions): | ||||
| if sym.type == 'TERMINAL': | if sym.type == 'TERMINAL': | ||||
| if sym not in terminal_names: | if sym not in terminal_names: | ||||
| @@ -1046,10 +1054,8 @@ class GrammarLoader: | |||||
| if sym not in rule_names and sym not in params: | if sym not in rule_names and sym not in params: | ||||
| raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) | raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) | ||||
| return Grammar(rules, term_defs, ignore_names) | return Grammar(rules, term_defs, ignore_names) | ||||
| def load_grammar(grammar, source, import_paths, global_keep_all_tokens): | def load_grammar(grammar, source, import_paths, global_keep_all_tokens): | ||||
| return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) | return GrammarLoader(global_keep_all_tokens).load_grammar(grammar, source, import_paths) | ||||
| @@ -1,7 +1,7 @@ | |||||
| from .exceptions import GrammarError | from .exceptions import GrammarError | ||||
| from .lexer import Token | from .lexer import Token | ||||
| from .tree import Tree | from .tree import Tree | ||||
| from .visitors import InlineTransformer # XXX Deprecated | |||||
| from .visitors import InlineTransformer # XXX Deprecated | |||||
| from .visitors import Transformer_InPlace | from .visitors import Transformer_InPlace | ||||
| from .visitors import _vargs_meta, _vargs_meta_inline | from .visitors import _vargs_meta, _vargs_meta_inline | ||||
| @@ -20,6 +20,7 @@ class ExpandSingleChild: | |||||
| else: | else: | ||||
| return self.node_builder(children) | return self.node_builder(children) | ||||
| class PropagatePositions: | class PropagatePositions: | ||||
| def __init__(self, node_builder): | def __init__(self, node_builder): | ||||
| self.node_builder = node_builder | self.node_builder = node_builder | ||||
| @@ -87,8 +88,9 @@ class ChildFilter: | |||||
| return self.node_builder(filtered) | return self.node_builder(filtered) | ||||
| class ChildFilterLALR(ChildFilter): | class ChildFilterLALR(ChildFilter): | ||||
| "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | |||||
| """Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)""" | |||||
| def __call__(self, children): | def __call__(self, children): | ||||
| filtered = [] | filtered = [] | ||||
| @@ -108,6 +110,7 @@ class ChildFilterLALR(ChildFilter): | |||||
| return self.node_builder(filtered) | return self.node_builder(filtered) | ||||
| class ChildFilterLALR_NoPlaceholders(ChildFilter): | class ChildFilterLALR_NoPlaceholders(ChildFilter): | ||||
| "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" | ||||
| def __init__(self, to_include, node_builder): | def __init__(self, to_include, node_builder): | ||||
| @@ -126,9 +129,11 @@ class ChildFilterLALR_NoPlaceholders(ChildFilter): | |||||
| filtered.append(children[i]) | filtered.append(children[i]) | ||||
| return self.node_builder(filtered) | return self.node_builder(filtered) | ||||
| def _should_expand(sym): | def _should_expand(sym): | ||||
| return not sym.is_term and sym.name.startswith('_') | return not sym.is_term and sym.name.startswith('_') | ||||
| def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): | def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): | ||||
| # Prepare empty_indices as: How many Nones to insert at each index? | # Prepare empty_indices as: How many Nones to insert at each index? | ||||
| if _empty_indices: | if _empty_indices: | ||||
| @@ -156,6 +161,7 @@ def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indi | |||||
| # LALR without placeholders | # LALR without placeholders | ||||
| return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) | return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) | ||||
| class AmbiguousExpander: | class AmbiguousExpander: | ||||
| """Deal with the case where we're expanding children ('_rule') into a parent but the children | """Deal with the case where we're expanding children ('_rule') into a parent but the children | ||||
| are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself | are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself | ||||
| @@ -167,10 +173,10 @@ class AmbiguousExpander: | |||||
| self.to_expand = to_expand | self.to_expand = to_expand | ||||
| def __call__(self, children): | def __call__(self, children): | ||||
| def _is_ambig_tree(child): | |||||
| return hasattr(child, 'data') and child.data == '_ambig' | |||||
| def _is_ambig_tree(t): | |||||
| return hasattr(t, 'data') and t.data == '_ambig' | |||||
| #### When we're repeatedly expanding ambiguities we can end up with nested ambiguities. | |||||
| # -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities. | |||||
| # All children of an _ambig node should be a derivation of that ambig node, hence | # All children of an _ambig node should be a derivation of that ambig node, hence | ||||
| # it is safe to assume that if we see an _ambig node nested within an ambig node | # it is safe to assume that if we see an _ambig node nested within an ambig node | ||||
| # it is safe to simply expand it into the parent _ambig node as an alternative derivation. | # it is safe to simply expand it into the parent _ambig node as an alternative derivation. | ||||
| @@ -186,15 +192,17 @@ class AmbiguousExpander: | |||||
| if not ambiguous: | if not ambiguous: | ||||
| return self.node_builder(children) | return self.node_builder(children) | ||||
| expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ] | |||||
| expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)] | |||||
| return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) | return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) | ||||
| def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): | def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): | ||||
| to_expand = [i for i, sym in enumerate(expansion) | to_expand = [i for i, sym in enumerate(expansion) | ||||
| if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] | if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] | ||||
| if to_expand: | if to_expand: | ||||
| return partial(AmbiguousExpander, to_expand, tree_class) | return partial(AmbiguousExpander, to_expand, tree_class) | ||||
| class AmbiguousIntermediateExpander: | class AmbiguousIntermediateExpander: | ||||
| """ | """ | ||||
| Propagate ambiguous intermediate nodes and their derivations up to the | Propagate ambiguous intermediate nodes and their derivations up to the | ||||
| @@ -275,12 +283,14 @@ class AmbiguousIntermediateExpander: | |||||
| return self.node_builder(children) | return self.node_builder(children) | ||||
| def ptb_inline_args(func): | def ptb_inline_args(func): | ||||
| @wraps(func) | @wraps(func) | ||||
| def f(children): | def f(children): | ||||
| return func(*children) | return func(*children) | ||||
| return f | return f | ||||
| def inplace_transformer(func): | def inplace_transformer(func): | ||||
| @wraps(func) | @wraps(func) | ||||
| def f(children): | def f(children): | ||||
| @@ -289,9 +299,11 @@ def inplace_transformer(func): | |||||
| return func(tree) | return func(tree) | ||||
| return f | return f | ||||
| def apply_visit_wrapper(func, name, wrapper): | def apply_visit_wrapper(func, name, wrapper): | ||||
| if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: | if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: | ||||
| raise NotImplementedError("Meta args not supported for internal transformer") | raise NotImplementedError("Meta args not supported for internal transformer") | ||||
| @wraps(func) | @wraps(func) | ||||
| def f(children): | def f(children): | ||||
| return wrapper(func, name, children, None) | return wrapper(func, name, children, None) | ||||
| @@ -323,7 +335,6 @@ class ParseTreeBuilder: | |||||
| yield rule, wrapper_chain | yield rule, wrapper_chain | ||||
| def create_callback(self, transformer=None): | def create_callback(self, transformer=None): | ||||
| callbacks = {} | callbacks = {} | ||||
| @@ -298,8 +298,8 @@ class Parser: | |||||
| # this column. Find the item for the start_symbol, which is the root of the SPPF tree. | # this column. Find the item for the start_symbol, which is the root of the SPPF tree. | ||||
| solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | ||||
| if not solutions: | if not solutions: | ||||
| expected_tokens = [t.expect for t in to_scan] | |||||
| raise UnexpectedEOF(expected_tokens) | |||||
| expected_terminals = [t.expect for t in to_scan] | |||||
| raise UnexpectedEOF(expected_terminals) | |||||
| if self.debug: | if self.debug: | ||||
| from .earley_forest import ForestToPyDotVisitor | from .earley_forest import ForestToPyDotVisitor | ||||
| @@ -46,14 +46,14 @@ class Tree(object): | |||||
| def _pretty(self, level, indent_str): | def _pretty(self, level, indent_str): | ||||
| if len(self.children) == 1 and not isinstance(self.children[0], Tree): | if len(self.children) == 1 and not isinstance(self.children[0], Tree): | ||||
| return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] | |||||
| return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] | |||||
| l = [ indent_str*level, self._pretty_label(), '\n' ] | |||||
| l = [indent_str*level, self._pretty_label(), '\n'] | |||||
| for n in self.children: | for n in self.children: | ||||
| if isinstance(n, Tree): | if isinstance(n, Tree): | ||||
| l += n._pretty(level+1, indent_str) | l += n._pretty(level+1, indent_str) | ||||
| else: | else: | ||||
| l += [ indent_str*(level+1), '%s' % (n,), '\n' ] | |||||
| l += [indent_str*(level+1), '%s' % (n,), '\n'] | |||||
| return l | return l | ||||
| @@ -102,8 +102,8 @@ class Tree(object): | |||||
| ###} | ###} | ||||
| def expand_kids_by_index(self, *indices): | def expand_kids_by_index(self, *indices): | ||||
| "Expand (inline) children at the given indices" | |||||
| for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||||
| """Expand (inline) children at the given indices""" | |||||
| for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||||
| kid = self.children[i] | kid = self.children[i] | ||||
| self.children[i:i+1] = kid.children | self.children[i:i+1] = kid.children | ||||
| @@ -144,12 +144,15 @@ class Tree(object): | |||||
| @property | @property | ||||
| def line(self): | def line(self): | ||||
| return self.meta.line | return self.meta.line | ||||
| @property | @property | ||||
| def column(self): | def column(self): | ||||
| return self.meta.column | return self.meta.column | ||||
| @property | @property | ||||
| def end_line(self): | def end_line(self): | ||||
| return self.meta.end_line | return self.meta.end_line | ||||
| @property | @property | ||||
| def end_column(self): | def end_column(self): | ||||
| return self.meta.end_column | return self.meta.end_column | ||||
| @@ -168,6 +171,7 @@ def pydot__tree_to_dot(tree, filename, rankdir="LR", **kwargs): | |||||
| graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | ||||
| graph.write(filename) | graph.write(filename) | ||||
| def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | ||||
| """Creates a colorful image that represents the tree (data+children, without meta) | """Creates a colorful image that represents the tree (data+children, without meta) | ||||
| @@ -196,7 +200,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||||
| subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child) | subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child) | ||||
| for child in subtree.children] | for child in subtree.children] | ||||
| node = pydot.Node(i[0], style="filled", fillcolor="#%x"%color, label=subtree.data) | |||||
| node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data) | |||||
| i[0] += 1 | i[0] += 1 | ||||
| graph.add_node(node) | graph.add_node(node) | ||||
| @@ -1,10 +1,10 @@ | |||||
| import sys | |||||
| import os | import os | ||||
| from functools import reduce | from functools import reduce | ||||
| from ast import literal_eval | from ast import literal_eval | ||||
| from collections import deque | from collections import deque | ||||
| ###{standalone | ###{standalone | ||||
| import sys, re | |||||
| import logging | import logging | ||||
| logger = logging.getLogger("lark") | logger = logging.getLogger("lark") | ||||
| logger.addHandler(logging.StreamHandler()) | logger.addHandler(logging.StreamHandler()) | ||||
| @@ -12,6 +12,8 @@ logger.addHandler(logging.StreamHandler()) | |||||
| # By default, we should not output any log messages | # By default, we should not output any log messages | ||||
| logger.setLevel(logging.CRITICAL) | logger.setLevel(logging.CRITICAL) | ||||
| Py36 = (sys.version_info[:2] >= (3, 6)) | |||||
| def classify(seq, key=None, value=None): | def classify(seq, key=None, value=None): | ||||
| d = {} | d = {} | ||||
| @@ -27,7 +29,7 @@ def classify(seq, key=None, value=None): | |||||
| def _deserialize(data, namespace, memo): | def _deserialize(data, namespace, memo): | ||||
| if isinstance(data, dict): | if isinstance(data, dict): | ||||
| if '__type__' in data: # Object | |||||
| if '__type__' in data: # Object | |||||
| class_ = namespace[data['__type__']] | class_ = namespace[data['__type__']] | ||||
| return class_.deserialize(data, memo) | return class_.deserialize(data, memo) | ||||
| elif '@' in data: | elif '@' in data: | ||||
| @@ -105,7 +107,6 @@ class SerializeMemoizer(Serialize): | |||||
| return _deserialize(data, namespace, memo) | return _deserialize(data, namespace, memo) | ||||
| try: | try: | ||||
| STRING_TYPE = basestring | STRING_TYPE = basestring | ||||
| except NameError: # Python 3 | except NameError: # Python 3 | ||||
| @@ -118,10 +119,11 @@ from contextlib import contextmanager | |||||
| Str = type(u'') | Str = type(u'') | ||||
| try: | try: | ||||
| classtype = types.ClassType # Python2 | |||||
| classtype = types.ClassType # Python2 | |||||
| except AttributeError: | except AttributeError: | ||||
| classtype = type # Python3 | classtype = type # Python3 | ||||
| def smart_decorator(f, create_decorator): | def smart_decorator(f, create_decorator): | ||||
| if isinstance(f, types.FunctionType): | if isinstance(f, types.FunctionType): | ||||
| return wraps(f)(create_decorator(f, True)) | return wraps(f)(create_decorator(f, True)) | ||||
| @@ -139,17 +141,16 @@ def smart_decorator(f, create_decorator): | |||||
| else: | else: | ||||
| return create_decorator(f.__func__.__call__, True) | return create_decorator(f.__func__.__call__, True) | ||||
| try: | try: | ||||
| import regex | import regex | ||||
| except ImportError: | except ImportError: | ||||
| regex = None | regex = None | ||||
| import sys, re | |||||
| Py36 = (sys.version_info[:2] >= (3, 6)) | |||||
| import sre_parse | import sre_parse | ||||
| import sre_constants | import sre_constants | ||||
| categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') | categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') | ||||
| def get_regexp_width(expr): | def get_regexp_width(expr): | ||||
| if regex: | if regex: | ||||
| # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with | # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with | ||||
| @@ -173,9 +174,7 @@ def dedup_list(l): | |||||
| preserving the original order of the list. Assumes that | preserving the original order of the list. Assumes that | ||||
| the list entries are hashable.""" | the list entries are hashable.""" | ||||
| dedup = set() | dedup = set() | ||||
| return [ x for x in l if not (x in dedup or dedup.add(x))] | |||||
| return [x for x in l if not (x in dedup or dedup.add(x))] | |||||
| try: | try: | ||||
| @@ -197,8 +196,6 @@ except ImportError: | |||||
| pass | pass | ||||
| try: | try: | ||||
| compare = cmp | compare = cmp | ||||
| except NameError: | except NameError: | ||||
| @@ -210,7 +207,6 @@ except NameError: | |||||
| return -1 | return -1 | ||||
| class Enumerator(Serialize): | class Enumerator(Serialize): | ||||
| def __init__(self): | def __init__(self): | ||||
| self.enums = {} | self.enums = {} | ||||
| @@ -8,6 +8,7 @@ from .lexer import Token | |||||
| ###{standalone | ###{standalone | ||||
| from inspect import getmembers, getmro | from inspect import getmembers, getmro | ||||
| class Discard(Exception): | class Discard(Exception): | ||||
| """When raising the Discard exception in a transformer callback, | """When raising the Discard exception in a transformer callback, | ||||
| that node is discarded and won't appear in the parent. | that node is discarded and won't appear in the parent. | ||||
| @@ -16,6 +17,7 @@ class Discard(Exception): | |||||
| # Transformers | # Transformers | ||||
| class _Decoratable: | class _Decoratable: | ||||
| "Provides support for decorating methods with @v_args" | "Provides support for decorating methods with @v_args" | ||||
| @@ -107,7 +109,6 @@ class Transformer(_Decoratable): | |||||
| except Exception as e: | except Exception as e: | ||||
| raise VisitError(token.type, token, e) | raise VisitError(token.type, token, e) | ||||
| def _transform_children(self, children): | def _transform_children(self, children): | ||||
| for c in children: | for c in children: | ||||
| try: | try: | ||||
| @@ -148,7 +149,6 @@ class Transformer(_Decoratable): | |||||
| return token | return token | ||||
| class InlineTransformer(Transformer): # XXX Deprecated | class InlineTransformer(Transformer): # XXX Deprecated | ||||
| def _call_userfunc(self, tree, new_children=None): | def _call_userfunc(self, tree, new_children=None): | ||||
| # Assumes tree is already transformed | # Assumes tree is already transformed | ||||
| @@ -203,7 +203,7 @@ class Transformer_NonRecursive(Transformer): | |||||
| q = [tree] | q = [tree] | ||||
| while q: | while q: | ||||
| t = q.pop() | t = q.pop() | ||||
| rev_postfix.append( t ) | |||||
| rev_postfix.append(t) | |||||
| if isinstance(t, Tree): | if isinstance(t, Tree): | ||||
| q += t.children | q += t.children | ||||
| @@ -225,7 +225,6 @@ class Transformer_NonRecursive(Transformer): | |||||
| return t | return t | ||||
| class Transformer_InPlaceRecursive(Transformer): | class Transformer_InPlaceRecursive(Transformer): | ||||
| "Same as Transformer, recursive, but changes the tree in-place instead of returning new instances" | "Same as Transformer, recursive, but changes the tree in-place instead of returning new instances" | ||||
| def _transform_tree(self, tree): | def _transform_tree(self, tree): | ||||
| @@ -297,7 +296,6 @@ class Visitor_Recursive(VisitorBase): | |||||
| return tree | return tree | ||||
| def visit_children_decor(func): | def visit_children_decor(func): | ||||
| "See Interpreter" | "See Interpreter" | ||||
| @wraps(func) | @wraps(func) | ||||
| @@ -338,8 +336,6 @@ class Interpreter(_Decoratable): | |||||
| return self.visit_children(tree) | return self.visit_children(tree) | ||||
| # Decorators | # Decorators | ||||
| def _apply_decorator(obj, decorator, **kwargs): | def _apply_decorator(obj, decorator, **kwargs): | ||||
| @@ -351,7 +347,6 @@ def _apply_decorator(obj, decorator, **kwargs): | |||||
| return _apply(decorator, **kwargs) | return _apply(decorator, **kwargs) | ||||
| def _inline_args__func(func): | def _inline_args__func(func): | ||||
| @wraps(func) | @wraps(func) | ||||
| def create_decorator(_f, with_self): | def create_decorator(_f, with_self): | ||||
| @@ -370,7 +365,6 @@ def inline_args(obj): # XXX Deprecated | |||||
| return _apply_decorator(obj, _inline_args__func) | return _apply_decorator(obj, _inline_args__func) | ||||
| def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | ||||
| def create_decorator(_f, with_self): | def create_decorator(_f, with_self): | ||||
| if with_self: | if with_self: | ||||
| @@ -390,11 +384,11 @@ def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | |||||
| return f | return f | ||||
| def _vargs_inline(f, data, children, meta): | |||||
| def _vargs_inline(f, _data, children, _meta): | |||||
| return f(*children) | return f(*children) | ||||
| def _vargs_meta_inline(f, data, children, meta): | |||||
| def _vargs_meta_inline(f, _data, children, meta): | |||||
| return f(meta, *children) | return f(meta, *children) | ||||
| def _vargs_meta(f, data, children, meta): | |||||
| def _vargs_meta(f, _data, children, meta): | |||||
| return f(children, meta) # TODO swap these for consistency? Backwards incompatible! | return f(children, meta) # TODO swap these for consistency? Backwards incompatible! | ||||
| def _vargs_tree(f, data, children, meta): | def _vargs_tree(f, data, children, meta): | ||||
| return f(Tree(data, children, meta)) | return f(Tree(data, children, meta)) | ||||
| @@ -415,6 +409,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||||
| inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). | inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists). | ||||
| meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first) | meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first) | ||||
| tree (bool, optional): Provides the entire tree as the argument, instead of the children. | tree (bool, optional): Provides the entire tree as the argument, instead of the children. | ||||
| wrapper (function, optional): Provide a function to decorate all methods. | |||||
| Example: | Example: | ||||
| :: | :: | ||||
| @@ -457,7 +452,7 @@ def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||||
| ###} | ###} | ||||
| #--- Visitor Utilities --- | |||||
| # --- Visitor Utilities --- | |||||
| class CollapseAmbiguities(Transformer): | class CollapseAmbiguities(Transformer): | ||||
| """ | """ | ||||
| @@ -471,7 +466,9 @@ class CollapseAmbiguities(Transformer): | |||||
| """ | """ | ||||
| def _ambig(self, options): | def _ambig(self, options): | ||||
| return sum(options, []) | return sum(options, []) | ||||
| def __default__(self, data, children_lists, meta): | def __default__(self, data, children_lists, meta): | ||||
| return [Tree(data, children, meta) for children in combine_alternatives(children_lists)] | return [Tree(data, children, meta) for children in combine_alternatives(children_lists)] | ||||
| def __default_token__(self, t): | def __default_token__(self, t): | ||||
| return [t] | return [t] | ||||