| @@ -16,4 +16,4 @@ jobs: | |||||
| python -m pip install --upgrade pip | python -m pip install --upgrade pip | ||||
| pip install mypy | pip install mypy | ||||
| - name: Lint with mypy | - name: Lint with mypy | ||||
| run: mypy -p lark-stubs || true | |||||
| run: mypy -p lark || true | |||||
| @@ -1,12 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| from .tree import * | |||||
| from .visitors import * | |||||
| from .exceptions import * | |||||
| from .lexer import * | |||||
| from .load_grammar import * | |||||
| from .lark import * | |||||
| from logging import Logger as _Logger | |||||
| logger: _Logger | |||||
| __version__: str = ... | |||||
| @@ -1,17 +0,0 @@ | |||||
| import types | |||||
| from typing import Optional | |||||
| from .visitors import Transformer | |||||
| class Ast(object): | |||||
| pass | |||||
| class AsList(object): | |||||
| pass | |||||
| def create_transformer( | |||||
| ast_module: types.ModuleType, | |||||
| transformer: Optional[Transformer]=None | |||||
| ) -> Transformer: | |||||
| ... | |||||
| @@ -1,65 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | |||||
| from .tree import Tree | |||||
| from .lexer import Token | |||||
| from .parsers.lalr_interactive_parser import InteractiveParser | |||||
| class LarkError(Exception): | |||||
| pass | |||||
| class ConfigurationError(LarkError, ValueError): | |||||
| pass | |||||
| class GrammarError(LarkError): | |||||
| pass | |||||
| class ParseError(LarkError): | |||||
| pass | |||||
| class LexError(LarkError): | |||||
| pass | |||||
| T = TypeVar('T') | |||||
| class UnexpectedEOF(ParseError): | |||||
| expected: List[Token] | |||||
| class UnexpectedInput(LarkError): | |||||
| line: int | |||||
| column: int | |||||
| pos_in_stream: int | |||||
| state: Any | |||||
| def get_context(self, text: str, span: int = ...) -> str: | |||||
| ... | |||||
| def match_examples( | |||||
| self, | |||||
| parse_fn: Callable[[str], Tree], | |||||
| examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], | |||||
| token_type_match_fallback: bool = False, | |||||
| use_accepts: bool = False, | |||||
| ) -> T: | |||||
| ... | |||||
| class UnexpectedToken(ParseError, UnexpectedInput): | |||||
| expected: Set[str] | |||||
| considered_rules: Set[str] | |||||
| interactive_parser: InteractiveParser | |||||
| accepts: Set[str] | |||||
| class UnexpectedCharacters(LexError, UnexpectedInput): | |||||
| allowed: Set[str] | |||||
| considered_tokens: Set[Any] | |||||
| class VisitError(LarkError): | |||||
| obj: Union[Tree, Token] | |||||
| orig_exc: Exception | |||||
| @@ -1,14 +0,0 @@ | |||||
| from typing import Optional, Tuple | |||||
| class RuleOptions: | |||||
| keep_all_tokens: bool | |||||
| expand1: bool | |||||
| priority: int | |||||
| template_source: Optional[str] | |||||
| empty_indices: Tuple[bool, ...] | |||||
| class Symbol: | |||||
| name: str | |||||
| is_term: bool | |||||
| @@ -1,47 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| from typing import Tuple, List, Iterator, Optional | |||||
| from abc import ABC, abstractmethod | |||||
| from .lexer import Token | |||||
| from .lark import PostLex | |||||
| class Indenter(PostLex, ABC): | |||||
| paren_level: Optional[int] | |||||
| indent_level: Optional[List[int]] | |||||
| def __init__(self) -> None: | |||||
| ... | |||||
| def handle_NL(self, token: Token) -> Iterator[Token]: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def NL_type(self) -> str: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def OPEN_PAREN_types(self) -> List[str]: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def CLOSE_PAREN_types(self) -> List[str]: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def INDENT_type(self) -> str: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def DEDENT_type(self) -> str: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def tab_len(self) -> int: | |||||
| ... | |||||
| @@ -1,109 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| from typing import ( | |||||
| TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | |||||
| Literal, Protocol, Tuple, Iterable, | |||||
| ) | |||||
| from .parsers.lalr_interactive_parser import InteractiveParser | |||||
| from .visitors import Transformer | |||||
| from .lexer import Token, Lexer, TerminalDef | |||||
| from .tree import Tree | |||||
| from .exceptions import UnexpectedInput | |||||
| from .load_grammar import Grammar | |||||
| _T = TypeVar('_T') | |||||
| class PostLex(Protocol): | |||||
| def process(self, stream: Iterator[Token]) -> Iterator[Token]: | |||||
| ... | |||||
| always_accept: Iterable[str] | |||||
| class LarkOptions: | |||||
| start: List[str] | |||||
| parser: str | |||||
| lexer: str | |||||
| transformer: Optional[Transformer] | |||||
| postlex: Optional[PostLex] | |||||
| ambiguity: str | |||||
| regex: bool | |||||
| debug: bool | |||||
| keep_all_tokens: bool | |||||
| propagate_positions: Union[bool, str] | |||||
| maybe_placeholders: bool | |||||
| lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||||
| cache: Union[bool, str] | |||||
| g_regex_flags: int | |||||
| use_bytes: bool | |||||
| import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] | |||||
| source_path: Optional[str] | |||||
| class PackageResource(object): | |||||
| pkg_name: str | |||||
| path: str | |||||
| def __init__(self, pkg_name: str, path: str): ... | |||||
| class FromPackageLoader: | |||||
| def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | |||||
| def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ... | |||||
| class Lark: | |||||
| source_path: str | |||||
| source_grammar: str | |||||
| grammar: Grammar | |||||
| options: LarkOptions | |||||
| lexer: Lexer | |||||
| terminals: List[TerminalDef] | |||||
| def __init__( | |||||
| self, | |||||
| grammar: Union[Grammar, str, IO[str]], | |||||
| *, | |||||
| start: Union[None, str, List[str]] = "start", | |||||
| parser: Literal["earley", "lalr", "cyk", "auto"] = "auto", | |||||
| lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto", | |||||
| transformer: Optional[Transformer] = None, | |||||
| postlex: Optional[PostLex] = None, | |||||
| ambiguity: Literal["explicit", "resolve"] = "resolve", | |||||
| regex: bool = False, | |||||
| debug: bool = False, | |||||
| keep_all_tokens: bool = False, | |||||
| propagate_positions: Union[bool, str] = False, | |||||
| maybe_placeholders: bool = False, | |||||
| lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, | |||||
| cache: Union[bool, str] = False, | |||||
| g_regex_flags: int = ..., | |||||
| use_bytes: bool = False, | |||||
| import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] = ..., | |||||
| source_path: Optional[str]=None, | |||||
| ): | |||||
| ... | |||||
| def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | |||||
| ... | |||||
| def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser: | |||||
| ... | |||||
| @classmethod | |||||
| def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | |||||
| ... | |||||
| @classmethod | |||||
| def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T: | |||||
| ... | |||||
| def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]: | |||||
| ... | |||||
| def get_terminal(self, name: str) -> TerminalDef: | |||||
| ... | |||||
| @@ -1,161 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| from types import ModuleType | |||||
| from typing import ( | |||||
| TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||||
| Pattern as REPattern, | |||||
| ) | |||||
| from abc import abstractmethod, ABC | |||||
| _T = TypeVar('_T') | |||||
| class Pattern(ABC): | |||||
| value: str | |||||
| flags: Collection[str] | |||||
| raw: str | |||||
| type: str | |||||
| def __init__(self, value: str, flags: Collection[str] = (), raw: str = None) -> None: | |||||
| ... | |||||
| @abstractmethod | |||||
| def to_regexp(self) -> str: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def min_width(self) -> int: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def max_width(self) -> int: | |||||
| ... | |||||
| class PatternStr(Pattern): | |||||
| type: str = ... | |||||
| def to_regexp(self) -> str: | |||||
| ... | |||||
| @property | |||||
| def min_width(self) -> int: | |||||
| ... | |||||
| @property | |||||
| def max_width(self) -> int: | |||||
| ... | |||||
| class PatternRE(Pattern): | |||||
| type: str = ... | |||||
| def to_regexp(self) -> str: | |||||
| ... | |||||
| @property | |||||
| def min_width(self) -> int: | |||||
| ... | |||||
| @property | |||||
| def max_width(self) -> int: | |||||
| ... | |||||
| class TerminalDef: | |||||
| name: str | |||||
| pattern: Pattern | |||||
| priority: int | |||||
| def __init__(self, name: str, pattern: Pattern, priority: int = ...) -> None: | |||||
| ... | |||||
| def user_repr(self) -> str: ... | |||||
| class Token(str): | |||||
| type: str | |||||
| start_pos: int | |||||
| value: Any | |||||
| line: int | |||||
| column: int | |||||
| end_line: int | |||||
| end_column: int | |||||
| end_pos: int | |||||
| def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None: | |||||
| ... | |||||
| def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token: | |||||
| ... | |||||
| @classmethod | |||||
| def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: Token) -> _T: | |||||
| ... | |||||
| _Callback = Callable[[Token], Token] | |||||
| class Lexer(ABC): | |||||
| lex: Callable[..., Iterator[Token]] | |||||
| class LexerConf: | |||||
| tokens: Collection[TerminalDef] | |||||
| re_module: ModuleType | |||||
| ignore: Collection[str] = () | |||||
| postlex: Any =None | |||||
| callbacks: Optional[Dict[str, _Callback]] = None | |||||
| g_regex_flags: int = 0 | |||||
| skip_validation: bool = False | |||||
| use_bytes: bool = False | |||||
| class TraditionalLexer(Lexer): | |||||
| terminals: Collection[TerminalDef] | |||||
| ignore_types: FrozenSet[str] | |||||
| newline_types: FrozenSet[str] | |||||
| user_callbacks: Dict[str, _Callback] | |||||
| callback: Dict[str, _Callback] | |||||
| mres: List[Tuple[REPattern, Dict[int, str]]] | |||||
| re: ModuleType | |||||
| def __init__( | |||||
| self, | |||||
| conf: LexerConf | |||||
| ) -> None: | |||||
| ... | |||||
| def build(self) -> None: | |||||
| ... | |||||
| def match(self, stream: str, pos: int) -> Optional[Tuple[str, str]]: | |||||
| ... | |||||
| def lex(self, stream: str) -> Iterator[Token]: | |||||
| ... | |||||
| def next_token(self, lex_state: Any, parser_state: Any = None) -> Token: | |||||
| ... | |||||
| class ContextualLexer(Lexer): | |||||
| lexers: Dict[str, TraditionalLexer] | |||||
| root_lexer: TraditionalLexer | |||||
| def __init__( | |||||
| self, | |||||
| terminals: Collection[TerminalDef], | |||||
| states: Dict[str, Collection[str]], | |||||
| re_: ModuleType, | |||||
| ignore: Collection[str] = ..., | |||||
| always_accept: Collection[str] = ..., | |||||
| user_callbacks: Dict[str, _Callback] = ..., | |||||
| g_regex_flags: int = ... | |||||
| ) -> None: | |||||
| ... | |||||
| def lex(self, stream: str, get_parser_state: Callable[[], str]) -> Iterator[Token]: | |||||
| ... | |||||
| @@ -1,31 +0,0 @@ | |||||
| from typing import List, Tuple, Union, Callable, Dict, Optional | |||||
| from .tree import Tree | |||||
| from .grammar import RuleOptions | |||||
| from .exceptions import UnexpectedInput | |||||
| class Grammar: | |||||
| rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] | |||||
| term_defs: List[Tuple[str, Tuple[Tree, int]]] | |||||
| ignore: List[str] | |||||
| class GrammarBuilder: | |||||
| global_keep_all_tokens: bool | |||||
| import_paths: List[Union[str, Callable]] | |||||
| used_files: Dict[str, str] | |||||
| def __init__(self, global_keep_all_tokens: bool = False, import_paths: List[Union[str, Callable]] = None, used_files: Dict[str, str]=None) -> None: ... | |||||
| def load_grammar(self, grammar_text: str, grammar_name: str = ..., mangle: Callable[[str], str] = None) -> None: ... | |||||
| def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], | |||||
| base_mangle: Callable[[str], str] = None) -> None: ... | |||||
| def validate(self) -> None: ... | |||||
| def build(self) -> Grammar: ... | |||||
| def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: ... | |||||
| @@ -1,39 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| from typing import List, Dict, Union, Callable, Iterable | |||||
| from .grammar import Symbol | |||||
| from .lark import Lark | |||||
| from .tree import Tree | |||||
| from .visitors import Transformer_InPlace | |||||
| from .lexer import TerminalDef | |||||
| class WriteTokensTransformer(Transformer_InPlace): | |||||
| def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ... | |||||
| class MatchTree(Tree): | |||||
| pass | |||||
| class MakeMatchTree: | |||||
| name: str | |||||
| expansion: List[TerminalDef] | |||||
| def __init__(self, name: str, expansion: List[TerminalDef]): | |||||
| ... | |||||
| def __call__(self, args: List[Union[str, Tree]]): | |||||
| ... | |||||
| class Reconstructor: | |||||
| def __init__(self, parser: Lark, term_subs: Dict[str, Callable[[Symbol], str]] = ...): | |||||
| ... | |||||
| def reconstruct(self, tree: Tree, postproc: Callable[[Iterable[str]], Iterable[str]]=None, | |||||
| insert_spaces: bool = True) -> str: | |||||
| ... | |||||
| @@ -1,72 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| from typing import List, Callable, Iterator, Union, Optional, Literal, Any | |||||
| from .lexer import TerminalDef | |||||
| class Meta: | |||||
| empty: bool | |||||
| line: int | |||||
| column: int | |||||
| start_pos: int | |||||
| end_line: int | |||||
| end_column: int | |||||
| end_pos: int | |||||
| orig_expansion: List[TerminalDef] | |||||
| match_tree: bool | |||||
| class Tree: | |||||
| data: str | |||||
| children: List[Union[str, Tree]] | |||||
| meta: Meta | |||||
| def __init__( | |||||
| self, | |||||
| data: str, | |||||
| children: List[Union[str, Tree]], | |||||
| meta: Optional[Meta] = None | |||||
| ) -> None: | |||||
| ... | |||||
| def pretty(self, indent_str: str = ...) -> str: | |||||
| ... | |||||
| def find_pred(self, pred: Callable[[Tree], bool]) -> Iterator[Tree]: | |||||
| ... | |||||
| def find_data(self, data: str) -> Iterator[Tree]: | |||||
| ... | |||||
| def expand_kids_by_index(self, *indices: int) -> None: | |||||
| ... | |||||
| def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]: | |||||
| ... | |||||
| def iter_subtrees(self) -> Iterator[Tree]: | |||||
| ... | |||||
| def iter_subtrees_topdown(self) -> Iterator[Tree]: | |||||
| ... | |||||
| def copy(self) -> Tree: | |||||
| ... | |||||
| def set(self, data: str, children: List[Union[str, Tree]]) -> None: | |||||
| ... | |||||
| def __hash__(self) -> int: | |||||
| ... | |||||
| class SlottedTree(Tree): | |||||
| pass | |||||
| def pydot__tree_to_png( | |||||
| tree: Tree, | |||||
| filename: str, | |||||
| rankdir: Literal["TB", "LR", "BT", "RL"] = ..., | |||||
| **kwargs | |||||
| ) -> None: | |||||
| ... | |||||
| @@ -1,108 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union | |||||
| from abc import ABC | |||||
| from .tree import Tree | |||||
| _T = TypeVar('_T') | |||||
| _R = TypeVar('_R') | |||||
| _FUNC = Callable[..., _T] | |||||
| _DECORATED = Union[_FUNC, type] | |||||
| class Transformer(ABC, Generic[_T]): | |||||
| def __init__(self, visit_tokens: bool = True) -> None: | |||||
| ... | |||||
| def transform(self, tree: Tree) -> _T: | |||||
| ... | |||||
| def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]: | |||||
| ... | |||||
| class TransformerChain(Generic[_T]): | |||||
| transformers: Tuple[Transformer[_T], ...] | |||||
| def __init__(self, *transformers: Transformer[_T]) -> None: | |||||
| ... | |||||
| def transform(self, tree: Tree) -> _T: | |||||
| ... | |||||
| def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]: | |||||
| ... | |||||
| class Transformer_InPlace(Transformer): | |||||
| pass | |||||
| class Transformer_NonRecursive(Transformer): | |||||
| pass | |||||
| class Transformer_InPlaceRecursive(Transformer): | |||||
| pass | |||||
| class VisitorBase: | |||||
| pass | |||||
| class Visitor(VisitorBase, ABC, Generic[_T]): | |||||
| def visit(self, tree: Tree) -> Tree: | |||||
| ... | |||||
| def visit_topdown(self, tree: Tree) -> Tree: | |||||
| ... | |||||
| class Visitor_Recursive(VisitorBase): | |||||
| def visit(self, tree: Tree) -> Tree: | |||||
| ... | |||||
| def visit_topdown(self, tree: Tree) -> Tree: | |||||
| ... | |||||
| class Interpreter(ABC, Generic[_T]): | |||||
| def visit(self, tree: Tree) -> _T: | |||||
| ... | |||||
| def visit_children(self, tree: Tree) -> List[_T]: | |||||
| ... | |||||
| _InterMethod = Callable[[Type[Interpreter], _T], _R] | |||||
| def v_args( | |||||
| inline: bool = False, | |||||
| meta: bool = False, | |||||
| tree: bool = False, | |||||
| wrapper: Callable = None | |||||
| ) -> Callable[[_DECORATED], _DECORATED]: | |||||
| ... | |||||
| def visit_children_decor(func: _InterMethod) -> _InterMethod: | |||||
| ... | |||||
| class Discard(Exception): | |||||
| pass | |||||
| # Deprecated | |||||
| class InlineTransformer: | |||||
| pass | |||||
| # Deprecated | |||||
| def inline_args(obj: _FUNC) -> _FUNC: | |||||
| ... | |||||
| @@ -6,4 +6,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | |||||
| from .lexer import Token | from .lexer import Token | ||||
| from .lark import Lark | from .lark import Lark | ||||
| __version__ = "1.0.0a" | |||||
| __version__: str = "1.0.0a" | |||||
| @@ -3,6 +3,8 @@ | |||||
| """ | """ | ||||
| import inspect, re | import inspect, re | ||||
| import types | |||||
| from typing import Optional | |||||
| from lark import Transformer, v_args | from lark import Transformer, v_args | ||||
| @@ -27,7 +29,7 @@ def _call(func, _data, children, _meta): | |||||
| inline = v_args(wrapper=_call) | inline = v_args(wrapper=_call) | ||||
| def create_transformer(ast_module, transformer=None): | |||||
| def create_transformer(ast_module: types.ModuleType, transformer: Optional[Transformer]=None) -> Transformer: | |||||
| """Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. | """Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. | ||||
| For each class, we create a corresponding rule in the transformer, with a matching name. | For each class, we create a corresponding rule in the transformer, with a matching name. | ||||
| @@ -49,4 +51,4 @@ def create_transformer(ast_module, transformer=None): | |||||
| setattr(t, camel_to_snake(name), obj) | setattr(t, camel_to_snake(name), obj) | ||||
| return t | |||||
| return t | |||||
| @@ -1,14 +1,30 @@ | |||||
| from types import ModuleType | |||||
| from .utils import Serialize | from .utils import Serialize | ||||
| from .lexer import TerminalDef | |||||
| from .lexer import TerminalDef, Token | |||||
| ###{standalone | ###{standalone | ||||
| from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING | |||||
| if TYPE_CHECKING: | |||||
| from .lark import PostLex | |||||
| _Callback = Callable[[Token], Token] | |||||
| class LexerConf(Serialize): | class LexerConf(Serialize): | ||||
| __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' | __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' | ||||
| __serialize_namespace__ = TerminalDef, | __serialize_namespace__ = TerminalDef, | ||||
| def __init__(self, terminals, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False): | |||||
| terminals: Collection[TerminalDef] | |||||
| re_module: ModuleType | |||||
| ignore: Collection[str] | |||||
| postlex: 'Optional[PostLex]' | |||||
| callbacks: Dict[str, _Callback] | |||||
| g_regex_flags: int | |||||
| skip_validation: bool | |||||
| use_bytes: bool | |||||
| def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): | |||||
| self.terminals = terminals | self.terminals = terminals | ||||
| self.terminals_by_name = {t.name: t for t in self.terminals} | self.terminals_by_name = {t.name: t for t in self.terminals} | ||||
| assert len(self.terminals) == len(self.terminals_by_name) | assert len(self.terminals) == len(self.terminals_by_name) | ||||
| @@ -3,6 +3,12 @@ from .utils import logger, NO_VALUE | |||||
| ###{standalone | ###{standalone | ||||
| from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING | |||||
| if TYPE_CHECKING: | |||||
| from .lexer import Token | |||||
| from .parsers.lalr_interactive_parser import InteractiveParser | |||||
| from .tree import Tree | |||||
| class LarkError(Exception): | class LarkError(Exception): | ||||
| pass | pass | ||||
| @@ -28,6 +34,7 @@ class ParseError(LarkError): | |||||
| class LexError(LarkError): | class LexError(LarkError): | ||||
| pass | pass | ||||
| T = TypeVar('T') | |||||
| class UnexpectedInput(LarkError): | class UnexpectedInput(LarkError): | ||||
| """UnexpectedInput Error. | """UnexpectedInput Error. | ||||
| @@ -39,10 +46,13 @@ class UnexpectedInput(LarkError): | |||||
| After catching one of these exceptions, you may call the following helper methods to create a nicer error message. | After catching one of these exceptions, you may call the following helper methods to create a nicer error message. | ||||
| """ | """ | ||||
| line: int | |||||
| column: int | |||||
| pos_in_stream = None | pos_in_stream = None | ||||
| state: Any | |||||
| _terminals_by_name = None | _terminals_by_name = None | ||||
| def get_context(self, text, span=40): | |||||
| def get_context(self, text: str, span: int=40) -> str: | |||||
| """Returns a pretty string pinpointing the error in the text, | """Returns a pretty string pinpointing the error in the text, | ||||
| with span amount of context characters around it. | with span amount of context characters around it. | ||||
| @@ -63,7 +73,7 @@ class UnexpectedInput(LarkError): | |||||
| after = text[pos:end].split(b'\n', 1)[0] | after = text[pos:end].split(b'\n', 1)[0] | ||||
| return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | ||||
| def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): | |||||
| def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> Optional[T]: | |||||
| """Allows you to detect what's wrong in the input text by matching | """Allows you to detect what's wrong in the input text by matching | ||||
| against example errors. | against example errors. | ||||
| @@ -126,6 +136,9 @@ class UnexpectedInput(LarkError): | |||||
| class UnexpectedEOF(ParseError, UnexpectedInput): | class UnexpectedEOF(ParseError, UnexpectedInput): | ||||
| expected: 'List[Token]' | |||||
| def __init__(self, expected, state=None, terminals_by_name=None): | def __init__(self, expected, state=None, terminals_by_name=None): | ||||
| self.expected = expected | self.expected = expected | ||||
| self.state = state | self.state = state | ||||
| @@ -145,6 +158,10 @@ class UnexpectedEOF(ParseError, UnexpectedInput): | |||||
| class UnexpectedCharacters(LexError, UnexpectedInput): | class UnexpectedCharacters(LexError, UnexpectedInput): | ||||
| allowed: Set[str] | |||||
| considered_tokens: Set[Any] | |||||
| def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, | def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, | ||||
| terminals_by_name=None, considered_rules=None): | terminals_by_name=None, considered_rules=None): | ||||
| # TODO considered_tokens and allowed can be figured out using state | # TODO considered_tokens and allowed can be figured out using state | ||||
| @@ -187,6 +204,10 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
| see: ``InteractiveParser``. | see: ``InteractiveParser``. | ||||
| """ | """ | ||||
| expected: Set[str] | |||||
| considered_rules: Set[str] | |||||
| interactive_parser: 'InteractiveParser' | |||||
| def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): | def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): | ||||
| # TODO considered_rules and expected can be figured out using state | # TODO considered_rules and expected can be figured out using state | ||||
| self.line = getattr(token, 'line', '?') | self.line = getattr(token, 'line', '?') | ||||
| @@ -205,7 +226,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
| super(UnexpectedToken, self).__init__() | super(UnexpectedToken, self).__init__() | ||||
| @property | @property | ||||
| def accepts(self): | |||||
| def accepts(self) -> Set[str]: | |||||
| if self._accepts is NO_VALUE: | if self._accepts is NO_VALUE: | ||||
| self._accepts = self.interactive_parser and self.interactive_parser.accepts() | self._accepts = self.interactive_parser and self.interactive_parser.accepts() | ||||
| return self._accepts | return self._accepts | ||||
| @@ -228,6 +249,9 @@ class VisitError(LarkError): | |||||
| - orig_exc: the exception that cause it to fail | - orig_exc: the exception that cause it to fail | ||||
| """ | """ | ||||
| obj: 'Union[Tree, Token]' | |||||
| orig_exc: Exception | |||||
| def __init__(self, rule, obj, orig_exc): | def __init__(self, rule, obj, orig_exc): | ||||
| self.obj = obj | self.obj = obj | ||||
| self.orig_exc = orig_exc | self.orig_exc = orig_exc | ||||
| @@ -1,13 +1,17 @@ | |||||
| from .utils import Serialize | from .utils import Serialize | ||||
| ###{standalone | ###{standalone | ||||
| from typing import Optional, Tuple, ClassVar | |||||
| class Symbol(Serialize): | class Symbol(Serialize): | ||||
| __slots__ = ('name',) | __slots__ = ('name',) | ||||
| is_term = NotImplemented | |||||
| name: str | |||||
| is_term: ClassVar[bool] = NotImplemented | |||||
| def __init__(self, name): | |||||
| def __init__(self, name: str) -> None: | |||||
| self.name = name | self.name = name | ||||
| def __eq__(self, other): | def __eq__(self, other): | ||||
| @@ -29,7 +33,7 @@ class Symbol(Serialize): | |||||
| class Terminal(Symbol): | class Terminal(Symbol): | ||||
| __serialize_fields__ = 'name', 'filter_out' | __serialize_fields__ = 'name', 'filter_out' | ||||
| is_term = True | |||||
| is_term: ClassVar[bool] = True | |||||
| def __init__(self, name, filter_out=False): | def __init__(self, name, filter_out=False): | ||||
| self.name = name | self.name = name | ||||
| @@ -43,13 +47,19 @@ class Terminal(Symbol): | |||||
| class NonTerminal(Symbol): | class NonTerminal(Symbol): | ||||
| __serialize_fields__ = 'name', | __serialize_fields__ = 'name', | ||||
| is_term = False | |||||
| is_term: ClassVar[bool] = False | |||||
| class RuleOptions(Serialize): | class RuleOptions(Serialize): | ||||
| __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | ||||
| def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()): | |||||
| keep_all_tokens: bool | |||||
| expand1: bool | |||||
| priority: Optional[int] | |||||
| template_source: Optional[str] | |||||
| empty_indices: Tuple[bool, ...] | |||||
| def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None: | |||||
| self.keep_all_tokens = keep_all_tokens | self.keep_all_tokens = keep_all_tokens | ||||
| self.expand1 = expand1 | self.expand1 = expand1 | ||||
| self.priority = priority | self.priority = priority | ||||
| @@ -1,20 +1,28 @@ | |||||
| "Provides Indentation services for languages with indentation similar to Python" | "Provides Indentation services for languages with indentation similar to Python" | ||||
| from abc import ABC, abstractmethod | |||||
| from .exceptions import LarkError | from .exceptions import LarkError | ||||
| from .lark import PostLex | from .lark import PostLex | ||||
| from .lexer import Token | from .lexer import Token | ||||
| ###{standalone | ###{standalone | ||||
| from typing import Tuple, List, Iterator, Optional | |||||
| class DedentError(LarkError): | class DedentError(LarkError): | ||||
| pass | pass | ||||
| class Indenter(PostLex): | |||||
| def __init__(self): | |||||
| self.paren_level = None | |||||
| self.indent_level = None | |||||
| class Indenter(PostLex, ABC): | |||||
| paren_level: int | |||||
| indent_level: List[int] | |||||
| def __init__(self) -> None: | |||||
| self.paren_level = 0 | |||||
| self.indent_level = [0] | |||||
| assert self.tab_len > 0 | assert self.tab_len > 0 | ||||
| def handle_NL(self, token): | |||||
| def handle_NL(self, token: Token) -> Iterator[Token]: | |||||
| if self.paren_level > 0: | if self.paren_level > 0: | ||||
| return | return | ||||
| @@ -64,4 +72,34 @@ class Indenter(PostLex): | |||||
| def always_accept(self): | def always_accept(self): | ||||
| return (self.NL_type,) | return (self.NL_type,) | ||||
| @property | |||||
| @abstractmethod | |||||
| def NL_type(self) -> str: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def OPEN_PAREN_types(self) -> List[str]: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def CLOSE_PAREN_types(self) -> List[str]: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def INDENT_type(self) -> str: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def DEDENT_type(self) -> str: | |||||
| ... | |||||
| @property | |||||
| @abstractmethod | |||||
| def tab_len(self) -> int: | |||||
| ... | |||||
| ###} | ###} | ||||
| @@ -2,31 +2,71 @@ from abc import ABC, abstractmethod | |||||
| import sys, os, pickle, hashlib | import sys, os, pickle, hashlib | ||||
| import tempfile | import tempfile | ||||
| from .exceptions import ConfigurationError, assert_config | |||||
| from .exceptions import ConfigurationError, assert_config, UnexpectedInput | |||||
| from .utils import Serialize, SerializeMemoizer, FS, isascii, logger | from .utils import Serialize, SerializeMemoizer, FS, isascii, logger | ||||
| from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files | |||||
| from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource | |||||
| from .tree import Tree | from .tree import Tree | ||||
| from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
| from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread | |||||
| from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread, Token | |||||
| from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
| from .parser_frontends import get_frontend, _get_lexer_callbacks | from .parser_frontends import get_frontend, _get_lexer_callbacks | ||||
| from .grammar import Rule | from .grammar import Rule | ||||
| import re | import re | ||||
| try: | try: | ||||
| import regex | |||||
| import regex # type: ignore | |||||
| except ImportError: | except ImportError: | ||||
| regex = None | regex = None | ||||
| ###{standalone | ###{standalone | ||||
| from typing import ( | |||||
| TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, | |||||
| Tuple, Iterable, IO, Any, TYPE_CHECKING | |||||
| ) | |||||
| if TYPE_CHECKING: | |||||
| from .parsers.lalr_interactive_parser import InteractiveParser | |||||
| from .visitors import Transformer | |||||
| if sys.version_info >= (3, 8): | |||||
| from typing import Literal | |||||
| else: | |||||
| from typing_extensions import Literal | |||||
| class PostLex(ABC): | |||||
| @abstractmethod | |||||
| def process(self, stream: Iterator[Token]) -> Iterator[Token]: | |||||
| return stream | |||||
| always_accept: Iterable[str] = () | |||||
| class LarkOptions(Serialize): | class LarkOptions(Serialize): | ||||
| """Specifies the options for Lark | """Specifies the options for Lark | ||||
| """ | """ | ||||
| start: List[str] | |||||
| debug: bool | |||||
| transformer: 'Optional[Transformer]' | |||||
| propagate_positions: Union[bool, str] | |||||
| maybe_placeholders: bool | |||||
| cache: Union[bool, str] | |||||
| regex: bool | |||||
| g_regex_flags: int | |||||
| keep_all_tokens: bool | |||||
| tree_class: Any | |||||
| parser: 'Literal["earley", "lalr", "cyk", "auto"]' | |||||
| lexer: 'Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]' | |||||
| ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]' | |||||
| postlex: Optional[PostLex] | |||||
| priority: 'Optional[Literal["auto", "normal", "invert"]]' | |||||
| lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||||
| use_bytes: bool | |||||
| edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]] | |||||
| import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' | |||||
| source_path: Optional[str] | |||||
| OPTIONS_DOC = """ | OPTIONS_DOC = """ | ||||
| **=== General Options ===** | **=== General Options ===** | ||||
| @@ -106,12 +146,10 @@ class LarkOptions(Serialize): | |||||
| # Adding a new option needs to be done in multiple places: | # Adding a new option needs to be done in multiple places: | ||||
| # - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts | # - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts | ||||
| # - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs | # - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs | ||||
| # - In `lark-stubs/lark.pyi`: | |||||
| # - As attribute to `LarkOptions` | |||||
| # - As parameter to `Lark.__init__` | |||||
| # - As an attribute of `LarkOptions` above | |||||
| # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | ||||
| # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | ||||
| _defaults = { | |||||
| _defaults: Dict[str, Any] = { | |||||
| 'debug': False, | 'debug': False, | ||||
| 'keep_all_tokens': False, | 'keep_all_tokens': False, | ||||
| 'tree_class': None, | 'tree_class': None, | ||||
| @@ -189,13 +227,7 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | |||||
| _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | ||||
| class PostLex(ABC): | |||||
| @abstractmethod | |||||
| def process(self, stream): | |||||
| return stream | |||||
| always_accept = () | |||||
| _T = TypeVar('_T') | |||||
| class Lark(Serialize): | class Lark(Serialize): | ||||
| """Main interface for the library. | """Main interface for the library. | ||||
| @@ -210,7 +242,15 @@ class Lark(Serialize): | |||||
| >>> Lark(r'''start: "foo" ''') | >>> Lark(r'''start: "foo" ''') | ||||
| Lark(...) | Lark(...) | ||||
| """ | """ | ||||
| def __init__(self, grammar, **options): | |||||
| source_path: str | |||||
| source_grammar: str | |||||
| grammar: 'Grammar' | |||||
| options: LarkOptions | |||||
| lexer: Lexer | |||||
| terminals: List[TerminalDef] | |||||
| def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: | |||||
| self.options = LarkOptions(options) | self.options = LarkOptions(options) | ||||
| # Set regex or re module | # Set regex or re module | ||||
| @@ -374,6 +414,7 @@ class Lark(Serialize): | |||||
| if cache_fn: | if cache_fn: | ||||
| logger.debug('Saving grammar to cache: %s', cache_fn) | logger.debug('Saving grammar to cache: %s', cache_fn) | ||||
| with FS.open(cache_fn, 'wb') as f: | with FS.open(cache_fn, 'wb') as f: | ||||
| assert cache_md5 is not None | |||||
| f.write(cache_md5.encode('utf8') + b'\n') | f.write(cache_md5.encode('utf8') + b'\n') | ||||
| pickle.dump(used_files, f) | pickle.dump(used_files, f) | ||||
| self.save(f) | self.save(f) | ||||
| @@ -476,7 +517,7 @@ class Lark(Serialize): | |||||
| return inst._load({'data': data, 'memo': memo}, **kwargs) | return inst._load({'data': data, 'memo': memo}, **kwargs) | ||||
| @classmethod | @classmethod | ||||
| def open(cls, grammar_filename, rel_to=None, **options): | |||||
| def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T: | |||||
| """Create an instance of Lark with the grammar given by its filename | """Create an instance of Lark with the grammar given by its filename | ||||
| If ``rel_to`` is provided, the function will find the grammar filename in relation to it. | If ``rel_to`` is provided, the function will find the grammar filename in relation to it. | ||||
| @@ -494,7 +535,7 @@ class Lark(Serialize): | |||||
| return cls(f, **options) | return cls(f, **options) | ||||
| @classmethod | @classmethod | ||||
| def open_from_package(cls, package, grammar_path, search_paths=("",), **options): | |||||
| def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...]=("",), **options) -> _T: | |||||
| """Create an instance of Lark with the grammar loaded from within the package `package`. | """Create an instance of Lark with the grammar loaded from within the package `package`. | ||||
| This allows grammar loading from zipapps. | This allows grammar loading from zipapps. | ||||
| @@ -515,7 +556,7 @@ class Lark(Serialize): | |||||
| return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) | return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) | ||||
| def lex(self, text, dont_ignore=False): | |||||
| def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]: | |||||
| """Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard' | """Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard' | ||||
| When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore. | When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore. | ||||
| @@ -530,11 +571,11 @@ class Lark(Serialize): | |||||
| return self.options.postlex.process(stream) | return self.options.postlex.process(stream) | ||||
| return stream | return stream | ||||
| def get_terminal(self, name): | |||||
| def get_terminal(self, name: str) -> TerminalDef: | |||||
| """Get information about a terminal""" | """Get information about a terminal""" | ||||
| return self._terminals_dict[name] | return self._terminals_dict[name] | ||||
| def parse_interactive(self, text=None, start=None): | |||||
| def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser': | |||||
| """Start an interactive parsing session. | """Start an interactive parsing session. | ||||
| Parameters: | Parameters: | ||||
| @@ -548,7 +589,7 @@ class Lark(Serialize): | |||||
| """ | """ | ||||
| return self.parser.parse_interactive(text, start=start) | return self.parser.parse_interactive(text, start=start) | ||||
| def parse(self, text, start=None, on_error=None): | |||||
| def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> Tree: | |||||
| """Parse the given text, according to the options provided. | """Parse the given text, according to the options provided. | ||||
| Parameters: | Parameters: | ||||
| @@ -1,5 +1,6 @@ | |||||
| # Lexer Implementation | # Lexer Implementation | ||||
| from abc import abstractmethod, ABC | |||||
| import re | import re | ||||
| from contextlib import suppress | from contextlib import suppress | ||||
| @@ -9,12 +10,23 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | |||||
| ###{standalone | ###{standalone | ||||
| from copy import copy | from copy import copy | ||||
| from types import ModuleType | |||||
| from typing import ( | |||||
| TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||||
| Pattern as REPattern, ClassVar, TYPE_CHECKING | |||||
| ) | |||||
| class Pattern(Serialize): | |||||
| raw = None | |||||
| type = None | |||||
| if TYPE_CHECKING: | |||||
| from .common import LexerConf | |||||
| def __init__(self, value, flags=(), raw=None): | |||||
| class Pattern(Serialize, ABC): | |||||
| value: str | |||||
| flags: Collection[str] | |||||
| raw: Optional[str] | |||||
| type: ClassVar[str] | |||||
| def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None: | |||||
| self.value = value | self.value = value | ||||
| self.flags = frozenset(flags) | self.flags = frozenset(flags) | ||||
| self.raw = raw | self.raw = raw | ||||
| @@ -29,13 +41,18 @@ class Pattern(Serialize): | |||||
| def __eq__(self, other): | def __eq__(self, other): | ||||
| return type(self) == type(other) and self.value == other.value and self.flags == other.flags | return type(self) == type(other) and self.value == other.value and self.flags == other.flags | ||||
| def to_regexp(self): | |||||
| @abstractmethod | |||||
| def to_regexp(self) -> str: | |||||
| raise NotImplementedError() | raise NotImplementedError() | ||||
| def min_width(self): | |||||
| @property | |||||
| @abstractmethod | |||||
| def min_width(self) -> int: | |||||
| raise NotImplementedError() | raise NotImplementedError() | ||||
| def max_width(self): | |||||
| @property | |||||
| @abstractmethod | |||||
| def max_width(self) -> int: | |||||
| raise NotImplementedError() | raise NotImplementedError() | ||||
| def _get_flags(self, value): | def _get_flags(self, value): | ||||
| @@ -47,23 +64,26 @@ class Pattern(Serialize): | |||||
| class PatternStr(Pattern): | class PatternStr(Pattern): | ||||
| __serialize_fields__ = 'value', 'flags' | __serialize_fields__ = 'value', 'flags' | ||||
| type = "str" | |||||
| type: ClassVar[str] = "str" | |||||
| def to_regexp(self): | |||||
| def to_regexp(self) -> str: | |||||
| return self._get_flags(re.escape(self.value)) | return self._get_flags(re.escape(self.value)) | ||||
| @property | @property | ||||
| def min_width(self): | |||||
| def min_width(self) -> int: | |||||
| return len(self.value) | |||||
| @property | |||||
| def max_width(self) -> int: | |||||
| return len(self.value) | return len(self.value) | ||||
| max_width = min_width | |||||
| class PatternRE(Pattern): | class PatternRE(Pattern): | ||||
| __serialize_fields__ = 'value', 'flags', '_width' | __serialize_fields__ = 'value', 'flags', '_width' | ||||
| type = "re" | |||||
| type: ClassVar[str] = "re" | |||||
| def to_regexp(self): | |||||
| def to_regexp(self) -> str: | |||||
| return self._get_flags(self.value) | return self._get_flags(self.value) | ||||
| _width = None | _width = None | ||||
| @@ -73,11 +93,11 @@ class PatternRE(Pattern): | |||||
| return self._width | return self._width | ||||
| @property | @property | ||||
| def min_width(self): | |||||
| def min_width(self) -> int: | |||||
| return self._get_width()[0] | return self._get_width()[0] | ||||
| @property | @property | ||||
| def max_width(self): | |||||
| def max_width(self) -> int: | |||||
| return self._get_width()[1] | return self._get_width()[1] | ||||
| @@ -85,7 +105,11 @@ class TerminalDef(Serialize): | |||||
| __serialize_fields__ = 'name', 'pattern', 'priority' | __serialize_fields__ = 'name', 'pattern', 'priority' | ||||
| __serialize_namespace__ = PatternStr, PatternRE | __serialize_namespace__ = PatternStr, PatternRE | ||||
| def __init__(self, name, pattern, priority=1): | |||||
| name: str | |||||
| pattern: Pattern | |||||
| priority: int | |||||
| def __init__(self, name: str, pattern: Pattern, priority: int=1) -> None: | |||||
| assert isinstance(pattern, Pattern), pattern | assert isinstance(pattern, Pattern), pattern | ||||
| self.name = name | self.name = name | ||||
| self.pattern = pattern | self.pattern = pattern | ||||
| @@ -94,12 +118,13 @@ class TerminalDef(Serialize): | |||||
| def __repr__(self): | def __repr__(self): | ||||
| return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) | return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) | ||||
| def user_repr(self): | |||||
| def user_repr(self) -> str: | |||||
| if self.name.startswith('__'): # We represent a generated terminal | if self.name.startswith('__'): # We represent a generated terminal | ||||
| return self.pattern.raw or self.name | return self.pattern.raw or self.name | ||||
| else: | else: | ||||
| return self.name | return self.name | ||||
| _T = TypeVar('_T') | |||||
| class Token(str): | class Token(str): | ||||
| """A string with meta-information, that is produced by the lexer. | """A string with meta-information, that is produced by the lexer. | ||||
| @@ -122,6 +147,15 @@ class Token(str): | |||||
| """ | """ | ||||
| __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | ||||
| type: str | |||||
| start_pos: int | |||||
| value: Any | |||||
| line: int | |||||
| column: int | |||||
| end_line: int | |||||
| end_column: int | |||||
| end_pos: int | |||||
| def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): | def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): | ||||
| try: | try: | ||||
| self = super(Token, cls).__new__(cls, value) | self = super(Token, cls).__new__(cls, value) | ||||
| @@ -139,7 +173,7 @@ class Token(str): | |||||
| self.end_pos = end_pos | self.end_pos = end_pos | ||||
| return self | return self | ||||
| def update(self, type_=None, value=None): | |||||
| def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token': | |||||
| return Token.new_borrow_pos( | return Token.new_borrow_pos( | ||||
| type_ if type_ is not None else self.type, | type_ if type_ is not None else self.type, | ||||
| value if value is not None else self.value, | value if value is not None else self.value, | ||||
| @@ -147,7 +181,7 @@ class Token(str): | |||||
| ) | ) | ||||
| @classmethod | @classmethod | ||||
| def new_borrow_pos(cls, type_, value, borrow_t): | |||||
| def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T: | |||||
| return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | ||||
| def __reduce__(self): | def __reduce__(self): | ||||
| @@ -281,13 +315,35 @@ def _regexp_has_newline(r): | |||||
| return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | ||||
| class Lexer(object): | |||||
| class LexerState(object): | |||||
| __slots__ = 'text', 'line_ctr', 'last_token' | |||||
| def __init__(self, text, line_ctr, last_token=None): | |||||
| self.text = text | |||||
| self.line_ctr = line_ctr | |||||
| self.last_token = last_token | |||||
| def __eq__(self, other): | |||||
| if not isinstance(other, LexerState): | |||||
| return NotImplemented | |||||
| return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||||
| def __copy__(self): | |||||
| return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||||
| _Callback = Callable[[Token], Token] | |||||
| class Lexer(ABC): | |||||
| """Lexer interface | """Lexer interface | ||||
| Method Signatures: | Method Signatures: | ||||
| lex(self, text) -> Iterator[Token] | |||||
| lex(self, lexer_state, parser_state) -> Iterator[Token] | |||||
| """ | """ | ||||
| lex = NotImplemented | |||||
| @abstractmethod | |||||
| def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||||
| ... | |||||
| def make_lexer_state(self, text): | def make_lexer_state(self, text): | ||||
| line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') | line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') | ||||
| @@ -296,7 +352,14 @@ class Lexer(object): | |||||
| class TraditionalLexer(Lexer): | class TraditionalLexer(Lexer): | ||||
| def __init__(self, conf): | |||||
| terminals: Collection[TerminalDef] | |||||
| ignore_types: FrozenSet[str] | |||||
| newline_types: FrozenSet[str] | |||||
| user_callbacks: Dict[str, _Callback] | |||||
| callback: Dict[str, _Callback] | |||||
| re: ModuleType | |||||
| def __init__(self, conf: 'LexerConf') -> None: | |||||
| terminals = list(conf.terminals) | terminals = list(conf.terminals) | ||||
| assert all(isinstance(t, TerminalDef) for t in terminals), terminals | assert all(isinstance(t, TerminalDef) for t in terminals), terminals | ||||
| @@ -329,7 +392,7 @@ class TraditionalLexer(Lexer): | |||||
| self._mres = None | self._mres = None | ||||
| def _build(self): | |||||
| def _build(self) -> None: | |||||
| terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) | terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes) | ||||
| assert all(self.callback.values()) | assert all(self.callback.values()) | ||||
| @@ -343,23 +406,24 @@ class TraditionalLexer(Lexer): | |||||
| self._mres = build_mres(terminals, self.g_regex_flags, self.re, self.use_bytes) | self._mres = build_mres(terminals, self.g_regex_flags, self.re, self.use_bytes) | ||||
| @property | @property | ||||
| def mres(self): | |||||
| def mres(self) -> List[Tuple[REPattern, Dict[int, str]]]: | |||||
| if self._mres is None: | if self._mres is None: | ||||
| self._build() | self._build() | ||||
| assert self._mres is not None | |||||
| return self._mres | return self._mres | ||||
| def match(self, text, pos): | |||||
| def match(self, text: str, pos: int) -> Optional[Tuple[str, str]]: | |||||
| for mre, type_from_index in self.mres: | for mre, type_from_index in self.mres: | ||||
| m = mre.match(text, pos) | m = mre.match(text, pos) | ||||
| if m: | if m: | ||||
| return m.group(0), type_from_index[m.lastindex] | return m.group(0), type_from_index[m.lastindex] | ||||
| def lex(self, state, parser_state): | |||||
| def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: | |||||
| with suppress(EOFError): | with suppress(EOFError): | ||||
| while True: | while True: | ||||
| yield self.next_token(state, parser_state) | yield self.next_token(state, parser_state) | ||||
| def next_token(self, lex_state, parser_state=None): | |||||
| def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token: | |||||
| line_ctr = lex_state.line_ctr | line_ctr = lex_state.line_ctr | ||||
| while line_ctr.char_pos < len(lex_state.text): | while line_ctr.char_pos < len(lex_state.text): | ||||
| res = self.match(lex_state.text, line_ctr.char_pos) | res = self.match(lex_state.text, line_ctr.char_pos) | ||||
| @@ -395,27 +459,12 @@ class TraditionalLexer(Lexer): | |||||
| raise EOFError(self) | raise EOFError(self) | ||||
| class LexerState(object): | |||||
| __slots__ = 'text', 'line_ctr', 'last_token' | |||||
| def __init__(self, text, line_ctr, last_token=None): | |||||
| self.text = text | |||||
| self.line_ctr = line_ctr | |||||
| self.last_token = last_token | |||||
| def __eq__(self, other): | |||||
| if not isinstance(other, LexerState): | |||||
| return NotImplemented | |||||
| return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||||
| def __copy__(self): | |||||
| return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||||
| class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
| def __init__(self, conf, states, always_accept=()): | |||||
| lexers: Dict[str, TraditionalLexer] | |||||
| root_lexer: TraditionalLexer | |||||
| def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None: | |||||
| terminals = list(conf.terminals) | terminals = list(conf.terminals) | ||||
| terminals_by_name = conf.terminals_by_name | terminals_by_name = conf.terminals_by_name | ||||
| @@ -443,7 +492,7 @@ class ContextualLexer(Lexer): | |||||
| def make_lexer_state(self, text): | def make_lexer_state(self, text): | ||||
| return self.root_lexer.make_lexer_state(text) | return self.root_lexer.make_lexer_state(text) | ||||
| def lex(self, lexer_state, parser_state): | |||||
| def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||||
| try: | try: | ||||
| while True: | while True: | ||||
| lexer = self.lexers[parser_state.position] | lexer = self.lexers[parser_state.position] | ||||
| @@ -8,6 +8,7 @@ import pkgutil | |||||
| from ast import literal_eval | from ast import literal_eval | ||||
| from numbers import Integral | from numbers import Integral | ||||
| from contextlib import suppress | from contextlib import suppress | ||||
| from typing import List, Tuple, Union, Callable, Dict, Optional | |||||
| from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique | from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique | ||||
| from .lexer import Token, TerminalDef, PatternStr, PatternRE | from .lexer import Token, TerminalDef, PatternStr, PatternRE | ||||
| @@ -17,7 +18,7 @@ from .parser_frontends import ParsingFrontend | |||||
| from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
| from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | ||||
| from .utils import classify, dedup_list | from .utils import classify, dedup_list | ||||
| from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError | |||||
| from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput | |||||
| from .tree import Tree, SlottedTree as ST | from .tree import Tree, SlottedTree as ST | ||||
| from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive | from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive | ||||
| @@ -540,7 +541,12 @@ def nr_deepcopy_tree(t): | |||||
| class Grammar: | class Grammar: | ||||
| def __init__(self, rule_defs, term_defs, ignore): | |||||
| term_defs: List[Tuple[str, Tuple[Tree, int]]] | |||||
| rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] | |||||
| ignore: List[str] | |||||
| def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None: | |||||
| self.term_defs = term_defs | self.term_defs = term_defs | ||||
| self.rule_defs = rule_defs | self.rule_defs = rule_defs | ||||
| self.ignore = ignore | self.ignore = ignore | ||||
| @@ -679,14 +685,18 @@ class FromPackageLoader(object): | |||||
| pkg_name: The name of the package. You can probably provide `__name__` most of the time | pkg_name: The name of the package. You can probably provide `__name__` most of the time | ||||
| search_paths: All the path that will be search on absolute imports. | search_paths: All the path that will be search on absolute imports. | ||||
| """ | """ | ||||
| def __init__(self, pkg_name, search_paths=("", )): | |||||
| pkg_name: str | |||||
| search_paths: Tuple[str, ...] | |||||
| def __init__(self, pkg_name: str, search_paths: Tuple[str, ...]=("", )) -> None: | |||||
| self.pkg_name = pkg_name | self.pkg_name = pkg_name | ||||
| self.search_paths = search_paths | self.search_paths = search_paths | ||||
| def __repr__(self): | def __repr__(self): | ||||
| return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | ||||
| def __call__(self, base_path, grammar_path): | |||||
| def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: | |||||
| if base_path is None: | if base_path is None: | ||||
| to_try = self.search_paths | to_try = self.search_paths | ||||
| else: | else: | ||||
| @@ -863,7 +873,7 @@ def _search_interactive_parser(interactive_parser, predicate): | |||||
| if predicate(p): | if predicate(p): | ||||
| return path, p | return path, p | ||||
| def find_grammar_errors(text, start='start'): | |||||
| def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: | |||||
| errors = [] | errors = [] | ||||
| def on_error(e): | def on_error(e): | ||||
| errors.append((e, _error_repr(e))) | errors.append((e, _error_repr(e))) | ||||
| @@ -912,7 +922,12 @@ def _mangle_exp(exp, mangle): | |||||
| class GrammarBuilder: | class GrammarBuilder: | ||||
| def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None): | |||||
| global_keep_all_tokens: bool | |||||
| import_paths: List[Union[str, Callable]] | |||||
| used_files: Dict[str, str] | |||||
| def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None: | |||||
| self.global_keep_all_tokens = global_keep_all_tokens | self.global_keep_all_tokens = global_keep_all_tokens | ||||
| self.import_paths = import_paths or [] | self.import_paths = import_paths or [] | ||||
| self.used_files = used_files or {} | self.used_files = used_files or {} | ||||
| @@ -1056,7 +1071,7 @@ class GrammarBuilder: | |||||
| return name, exp, params, opts | return name, exp, params, opts | ||||
| def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None): | |||||
| def load_grammar(self, grammar_text: str, grammar_name: str="<?>", mangle: Optional[Callable[[str], str]]=None) -> None: | |||||
| tree = _parse_grammar(grammar_text, grammar_name) | tree = _parse_grammar(grammar_text, grammar_name) | ||||
| imports = {} | imports = {} | ||||
| @@ -1119,7 +1134,7 @@ class GrammarBuilder: | |||||
| self._definitions = {k: v for k, v in self._definitions.items() if k in _used} | self._definitions = {k: v for k, v in self._definitions.items() if k in _used} | ||||
| def do_import(self, dotted_path, base_path, aliases, base_mangle=None): | |||||
| def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None: | |||||
| assert dotted_path | assert dotted_path | ||||
| mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) | mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) | ||||
| grammar_path = os.path.join(*dotted_path) + EXT | grammar_path = os.path.join(*dotted_path) + EXT | ||||
| @@ -1155,7 +1170,7 @@ class GrammarBuilder: | |||||
| assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) | assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) | ||||
| def validate(self): | |||||
| def validate(self) -> None: | |||||
| for name, (params, exp, _options) in self._definitions.items(): | for name, (params, exp, _options) in self._definitions.items(): | ||||
| for i, p in enumerate(params): | for i, p in enumerate(params): | ||||
| if p in self._definitions: | if p in self._definitions: | ||||
| @@ -1184,7 +1199,7 @@ class GrammarBuilder: | |||||
| if not set(self._definitions).issuperset(self._ignore_names): | if not set(self._definitions).issuperset(self._ignore_names): | ||||
| raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) | raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) | ||||
| def build(self): | |||||
| def build(self) -> Grammar: | |||||
| self.validate() | self.validate() | ||||
| rule_defs = [] | rule_defs = [] | ||||
| term_defs = [] | term_defs = [] | ||||
| @@ -7,7 +7,7 @@ from .parsers.lalr_parser import LALR_Parser | |||||
| from .tree import Tree | from .tree import Tree | ||||
| from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
| try: | try: | ||||
| import regex | |||||
| import regex # type: ignore | |||||
| except ImportError: | except ImportError: | ||||
| regex = None | regex = None | ||||
| import re | import re | ||||
| @@ -1,11 +1,13 @@ | |||||
| """Reconstruct text from a tree, based on Lark grammar""" | """Reconstruct text from a tree, based on Lark grammar""" | ||||
| from typing import List, Dict, Union, Callable, Iterable, Optional | |||||
| import unicodedata | import unicodedata | ||||
| from .lark import Lark | |||||
| from .tree import Tree | from .tree import Tree | ||||
| from .visitors import Transformer_InPlace | from .visitors import Transformer_InPlace | ||||
| from .lexer import Token, PatternStr | |||||
| from .grammar import Terminal, NonTerminal | |||||
| from .lexer import Token, PatternStr, TerminalDef | |||||
| from .grammar import Terminal, NonTerminal, Symbol | |||||
| from .tree_matcher import TreeMatcher, is_discarded_terminal | from .tree_matcher import TreeMatcher, is_discarded_terminal | ||||
| from .utils import is_id_continue | from .utils import is_id_continue | ||||
| @@ -21,7 +23,10 @@ def is_iter_empty(i): | |||||
| class WriteTokensTransformer(Transformer_InPlace): | class WriteTokensTransformer(Transformer_InPlace): | ||||
| "Inserts discarded tokens into their correct place, according to the rules of grammar" | "Inserts discarded tokens into their correct place, according to the rules of grammar" | ||||
| def __init__(self, tokens, term_subs): | |||||
| tokens: Dict[str, TerminalDef] | |||||
| term_subs: Dict[str, Callable[[Symbol], str]] | |||||
| def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: | |||||
| self.tokens = tokens | self.tokens = tokens | ||||
| self.term_subs = term_subs | self.term_subs = term_subs | ||||
| @@ -70,7 +75,9 @@ class Reconstructor(TreeMatcher): | |||||
| term_subs: a dictionary of [Terminal name as str] to [output text as str] | term_subs: a dictionary of [Terminal name as str] to [output text as str] | ||||
| """ | """ | ||||
| def __init__(self, parser, term_subs=None): | |||||
| write_tokens: WriteTokensTransformer | |||||
| def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None: | |||||
| TreeMatcher.__init__(self, parser) | TreeMatcher.__init__(self, parser) | ||||
| self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) | self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) | ||||
| @@ -87,7 +94,7 @@ class Reconstructor(TreeMatcher): | |||||
| else: | else: | ||||
| yield item | yield item | ||||
| def reconstruct(self, tree, postproc=None, insert_spaces=True): | |||||
| def reconstruct(self, tree: Tree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str: | |||||
| x = self._reconstruct(tree) | x = self._reconstruct(tree) | ||||
| if postproc: | if postproc: | ||||
| x = postproc(x) | x = postproc(x) | ||||
| @@ -1,16 +1,35 @@ | |||||
| try: | try: | ||||
| from future_builtins import filter | |||||
| from future_builtins import filter # type: ignore | |||||
| except ImportError: | except ImportError: | ||||
| pass | pass | ||||
| import sys | |||||
| from copy import deepcopy | from copy import deepcopy | ||||
| ###{standalone | ###{standalone | ||||
| from collections import OrderedDict | from collections import OrderedDict | ||||
| from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING | |||||
| if TYPE_CHECKING: | |||||
| from .lexer import TerminalDef | |||||
| if sys.version_info >= (3, 8): | |||||
| from typing import Literal | |||||
| else: | |||||
| from typing_extensions import Literal | |||||
| class Meta: | class Meta: | ||||
| empty: bool | |||||
| line: int | |||||
| column: int | |||||
| start_pos: int | |||||
| end_line: int | |||||
| end_column: int | |||||
| end_pos: int | |||||
| orig_expansion: 'List[TerminalDef]' | |||||
| match_tree: bool | |||||
| def __init__(self): | def __init__(self): | ||||
| self.empty = True | self.empty = True | ||||
| @@ -27,13 +46,17 @@ class Tree(object): | |||||
| meta: Line & Column numbers (if ``propagate_positions`` is enabled). | meta: Line & Column numbers (if ``propagate_positions`` is enabled). | ||||
| meta attributes: line, column, start_pos, end_line, end_column, end_pos | meta attributes: line, column, start_pos, end_line, end_column, end_pos | ||||
| """ | """ | ||||
| def __init__(self, data, children, meta=None): | |||||
| data: str | |||||
| children: 'List[Union[str, Tree]]' | |||||
| def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Optional[Meta]=None) -> None: | |||||
| self.data = data | self.data = data | ||||
| self.children = children | self.children = children | ||||
| self._meta = meta | self._meta = meta | ||||
| @property | @property | ||||
| def meta(self): | |||||
| def meta(self) -> Meta: | |||||
| if self._meta is None: | if self._meta is None: | ||||
| self._meta = Meta() | self._meta = Meta() | ||||
| return self._meta | return self._meta | ||||
| @@ -57,7 +80,7 @@ class Tree(object): | |||||
| return l | return l | ||||
| def pretty(self, indent_str=' '): | |||||
| def pretty(self, indent_str: str=' ') -> str: | |||||
| """Returns an indented string representation of the tree. | """Returns an indented string representation of the tree. | ||||
| Great for debugging. | Great for debugging. | ||||
| @@ -73,10 +96,10 @@ class Tree(object): | |||||
| def __ne__(self, other): | def __ne__(self, other): | ||||
| return not (self == other) | return not (self == other) | ||||
| def __hash__(self): | |||||
| def __hash__(self) -> int: | |||||
| return hash((self.data, tuple(self.children))) | return hash((self.data, tuple(self.children))) | ||||
| def iter_subtrees(self): | |||||
| def iter_subtrees(self) -> 'Iterator[Tree]': | |||||
| """Depth-first iteration. | """Depth-first iteration. | ||||
| Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). | Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). | ||||
| @@ -91,23 +114,23 @@ class Tree(object): | |||||
| del queue | del queue | ||||
| return reversed(list(subtrees.values())) | return reversed(list(subtrees.values())) | ||||
| def find_pred(self, pred): | |||||
| def find_pred(self, pred: 'Callable[[Tree], bool]') -> 'Iterator[Tree]': | |||||
| """Returns all nodes of the tree that evaluate pred(node) as true.""" | """Returns all nodes of the tree that evaluate pred(node) as true.""" | ||||
| return filter(pred, self.iter_subtrees()) | return filter(pred, self.iter_subtrees()) | ||||
| def find_data(self, data): | |||||
| def find_data(self, data: str) -> 'Iterator[Tree]': | |||||
| """Returns all nodes of the tree whose data equals the given data.""" | """Returns all nodes of the tree whose data equals the given data.""" | ||||
| return self.find_pred(lambda t: t.data == data) | return self.find_pred(lambda t: t.data == data) | ||||
| ###} | ###} | ||||
| def expand_kids_by_index(self, *indices): | |||||
| def expand_kids_by_index(self, *indices: int) -> None: | |||||
| """Expand (inline) children at the given indices""" | """Expand (inline) children at the given indices""" | ||||
| for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | ||||
| kid = self.children[i] | kid = self.children[i] | ||||
| self.children[i:i+1] = kid.children | self.children[i:i+1] = kid.children | ||||
| def scan_values(self, pred): | |||||
| def scan_values(self, pred: 'Callable[[Union[str, Tree]], bool]') -> Iterator[str]: | |||||
| """Return all values in the tree that evaluate pred(value) as true. | """Return all values in the tree that evaluate pred(value) as true. | ||||
| This can be used to find all the tokens in the tree. | This can be used to find all the tokens in the tree. | ||||
| @@ -140,10 +163,10 @@ class Tree(object): | |||||
| def __deepcopy__(self, memo): | def __deepcopy__(self, memo): | ||||
| return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta) | return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta) | ||||
| def copy(self): | |||||
| def copy(self) -> 'Tree': | |||||
| return type(self)(self.data, self.children) | return type(self)(self.data, self.children) | ||||
| def set(self, data, children): | |||||
| def set(self, data: str, children: 'List[Union[str, Tree]]') -> None: | |||||
| self.data = data | self.data = data | ||||
| self.children = children | self.children = children | ||||
| @@ -153,7 +176,7 @@ class SlottedTree(Tree): | |||||
| __slots__ = 'data', 'children', 'rule', '_meta' | __slots__ = 'data', 'children', 'rule', '_meta' | ||||
| def pydot__tree_to_png(tree, filename, rankdir="LR", **kwargs): | |||||
| def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None: | |||||
| graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | ||||
| graph.write_png(filename) | graph.write_png(filename) | ||||
| @@ -174,7 +197,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||||
| possible attributes, see https://www.graphviz.org/doc/info/attrs.html. | possible attributes, see https://www.graphviz.org/doc/info/attrs.html. | ||||
| """ | """ | ||||
| import pydot | |||||
| import pydot # type: ignore | |||||
| graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) | graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) | ||||
| i = [0] | i = [0] | ||||
| @@ -6,7 +6,7 @@ from collections import deque | |||||
| ###{standalone | ###{standalone | ||||
| import sys, re | import sys, re | ||||
| import logging | import logging | ||||
| logger = logging.getLogger("lark") | |||||
| logger: logging.Logger = logging.getLogger("lark") | |||||
| logger.addHandler(logging.StreamHandler()) | logger.addHandler(logging.StreamHandler()) | ||||
| # Set to highest level, since we have some warnings amongst the code | # Set to highest level, since we have some warnings amongst the code | ||||
| # By default, we should not output any log messages | # By default, we should not output any log messages | ||||
| @@ -132,7 +132,7 @@ def smart_decorator(f, create_decorator): | |||||
| try: | try: | ||||
| import regex | |||||
| import regex # type: ignore | |||||
| except ImportError: | except ImportError: | ||||
| regex = None | regex = None | ||||
| @@ -1,3 +1,4 @@ | |||||
| from abc import ABC | |||||
| from functools import wraps | from functools import wraps | ||||
| from .utils import smart_decorator, combine_alternatives | from .utils import smart_decorator, combine_alternatives | ||||
| @@ -7,7 +8,12 @@ from .lexer import Token | |||||
| ###{standalone | ###{standalone | ||||
| from inspect import getmembers, getmro | from inspect import getmembers, getmro | ||||
| from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional | |||||
| _T = TypeVar('_T') | |||||
| _R = TypeVar('_R') | |||||
| _FUNC = Callable[..., _T] | |||||
| _DECORATED = Union[_FUNC, type] | |||||
| class Discard(Exception): | class Discard(Exception): | ||||
| """When raising the Discard exception in a transformer callback, | """When raising the Discard exception in a transformer callback, | ||||
| @@ -46,7 +52,7 @@ class _Decoratable: | |||||
| return cls | return cls | ||||
| class Transformer(_Decoratable): | |||||
| class Transformer(_Decoratable, ABC, Generic[_T]): | |||||
| """Transformers visit each node of the tree, and run the appropriate method on it according to the node's data. | """Transformers visit each node of the tree, and run the appropriate method on it according to the node's data. | ||||
| Methods are provided by the user via inheritance, and called according to ``tree.data``. | Methods are provided by the user via inheritance, and called according to ``tree.data``. | ||||
| @@ -74,7 +80,7 @@ class Transformer(_Decoratable): | |||||
| """ | """ | ||||
| __visit_tokens__ = True # For backwards compatibility | __visit_tokens__ = True # For backwards compatibility | ||||
| def __init__(self, visit_tokens=True): | |||||
| def __init__(self, visit_tokens: bool=True) -> None: | |||||
| self.__visit_tokens__ = visit_tokens | self.__visit_tokens__ = visit_tokens | ||||
| def _call_userfunc(self, tree, new_children=None): | def _call_userfunc(self, tree, new_children=None): | ||||
| @@ -125,11 +131,11 @@ class Transformer(_Decoratable): | |||||
| children = list(self._transform_children(tree.children)) | children = list(self._transform_children(tree.children)) | ||||
| return self._call_userfunc(tree, children) | return self._call_userfunc(tree, children) | ||||
| def transform(self, tree): | |||||
| def transform(self, tree: Tree) -> _T: | |||||
| "Transform the given tree, and return the final result" | "Transform the given tree, and return the final result" | ||||
| return self._transform_tree(tree) | return self._transform_tree(tree) | ||||
| def __mul__(self, other): | |||||
| def __mul__(self, other: 'Transformer[_T]') -> 'TransformerChain[_T]': | |||||
| """Chain two transformers together, returning a new transformer. | """Chain two transformers together, returning a new transformer. | ||||
| """ | """ | ||||
| return TransformerChain(self, other) | return TransformerChain(self, other) | ||||
| @@ -149,16 +155,19 @@ class Transformer(_Decoratable): | |||||
| return token | return token | ||||
| class TransformerChain(object): | |||||
| def __init__(self, *transformers): | |||||
| class TransformerChain(Generic[_T]): | |||||
| transformers: Tuple[Transformer[_T], ...] | |||||
| def __init__(self, *transformers: Transformer[_T]) -> None: | |||||
| self.transformers = transformers | self.transformers = transformers | ||||
| def transform(self, tree): | |||||
| def transform(self, tree: Tree) -> _T: | |||||
| for t in self.transformers: | for t in self.transformers: | ||||
| tree = t.transform(tree) | tree = t.transform(tree) | ||||
| return tree | return tree | ||||
| def __mul__(self, other): | |||||
| def __mul__(self, other: Transformer[_T]) -> 'TransformerChain[_T]': | |||||
| return TransformerChain(*self.transformers + (other,)) | return TransformerChain(*self.transformers + (other,)) | ||||
| @@ -239,19 +248,19 @@ class VisitorBase: | |||||
| return cls | return cls | ||||
| class Visitor(VisitorBase): | |||||
| class Visitor(VisitorBase, ABC, Generic[_T]): | |||||
| """Tree visitor, non-recursive (can handle huge trees). | """Tree visitor, non-recursive (can handle huge trees). | ||||
| Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data`` | Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data`` | ||||
| """ | """ | ||||
| def visit(self, tree): | |||||
| def visit(self, tree: Tree) -> Tree: | |||||
| "Visits the tree, starting with the leaves and finally the root (bottom-up)" | "Visits the tree, starting with the leaves and finally the root (bottom-up)" | ||||
| for subtree in tree.iter_subtrees(): | for subtree in tree.iter_subtrees(): | ||||
| self._call_userfunc(subtree) | self._call_userfunc(subtree) | ||||
| return tree | return tree | ||||
| def visit_topdown(self,tree): | |||||
| def visit_topdown(self, tree: Tree) -> Tree: | |||||
| "Visit the tree, starting at the root, and ending at the leaves (top-down)" | "Visit the tree, starting at the root, and ending at the leaves (top-down)" | ||||
| for subtree in tree.iter_subtrees_topdown(): | for subtree in tree.iter_subtrees_topdown(): | ||||
| self._call_userfunc(subtree) | self._call_userfunc(subtree) | ||||
| @@ -266,7 +275,7 @@ class Visitor_Recursive(VisitorBase): | |||||
| Slightly faster than the non-recursive version. | Slightly faster than the non-recursive version. | ||||
| """ | """ | ||||
| def visit(self, tree): | |||||
| def visit(self, tree: Tree) -> Tree: | |||||
| "Visits the tree, starting with the leaves and finally the root (bottom-up)" | "Visits the tree, starting with the leaves and finally the root (bottom-up)" | ||||
| for child in tree.children: | for child in tree.children: | ||||
| if isinstance(child, Tree): | if isinstance(child, Tree): | ||||
| @@ -275,7 +284,7 @@ class Visitor_Recursive(VisitorBase): | |||||
| self._call_userfunc(tree) | self._call_userfunc(tree) | ||||
| return tree | return tree | ||||
| def visit_topdown(self,tree): | |||||
| def visit_topdown(self,tree: Tree) -> Tree: | |||||
| "Visit the tree, starting at the root, and ending at the leaves (top-down)" | "Visit the tree, starting at the root, and ending at the leaves (top-down)" | ||||
| self._call_userfunc(tree) | self._call_userfunc(tree) | ||||
| @@ -286,16 +295,7 @@ class Visitor_Recursive(VisitorBase): | |||||
| return tree | return tree | ||||
| def visit_children_decor(func): | |||||
| "See Interpreter" | |||||
| @wraps(func) | |||||
| def inner(cls, tree): | |||||
| values = cls.visit_children(tree) | |||||
| return func(cls, values) | |||||
| return inner | |||||
| class Interpreter(_Decoratable): | |||||
| class Interpreter(_Decoratable, ABC, Generic[_T]): | |||||
| """Interpreter walks the tree starting at the root. | """Interpreter walks the tree starting at the root. | ||||
| Visits the tree, starting with the root and finally the leaves (top-down) | Visits the tree, starting with the root and finally the leaves (top-down) | ||||
| @@ -307,7 +307,7 @@ class Interpreter(_Decoratable): | |||||
| This allows the user to implement branching and loops. | This allows the user to implement branching and loops. | ||||
| """ | """ | ||||
| def visit(self, tree): | |||||
| def visit(self, tree: Tree) -> _T: | |||||
| f = getattr(self, tree.data) | f = getattr(self, tree.data) | ||||
| wrapper = getattr(f, 'visit_wrapper', None) | wrapper = getattr(f, 'visit_wrapper', None) | ||||
| if wrapper is not None: | if wrapper is not None: | ||||
| @@ -315,7 +315,7 @@ class Interpreter(_Decoratable): | |||||
| else: | else: | ||||
| return f(tree) | return f(tree) | ||||
| def visit_children(self, tree): | |||||
| def visit_children(self, tree: Tree) -> List[_T]: | |||||
| return [self.visit(child) if isinstance(child, Tree) else child | return [self.visit(child) if isinstance(child, Tree) else child | ||||
| for child in tree.children] | for child in tree.children] | ||||
| @@ -326,6 +326,16 @@ class Interpreter(_Decoratable): | |||||
| return self.visit_children(tree) | return self.visit_children(tree) | ||||
| _InterMethod = Callable[[Type[Interpreter], _T], _R] | |||||
| def visit_children_decor(func: _InterMethod) -> _InterMethod: | |||||
| "See Interpreter" | |||||
| @wraps(func) | |||||
| def inner(cls, tree): | |||||
| values = cls.visit_children(tree) | |||||
| return func(cls, values) | |||||
| return inner | |||||
| # Decorators | # Decorators | ||||
| def _apply_decorator(obj, decorator, **kwargs): | def _apply_decorator(obj, decorator, **kwargs): | ||||
| @@ -380,7 +390,7 @@ def _vargs_tree(f, data, children, meta): | |||||
| return f(Tree(data, children, meta)) | return f(Tree(data, children, meta)) | ||||
| def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||||
| def v_args(inline: bool=False, meta: bool=False, tree: bool=False, wrapper: Optional[Callable]=None) -> Callable[[_DECORATED], _DECORATED]: | |||||
| """A convenience decorator factory for modifying the behavior of user-supplied visitor methods. | """A convenience decorator factory for modifying the behavior of user-supplied visitor methods. | ||||
| By default, callback methods of transformers/visitors accept one argument - a list of the node's children. | By default, callback methods of transformers/visitors accept one argument - a list of the node's children. | ||||