| @@ -0,0 +1,105 @@ | |||
| """ | |||
| This example demonstrates how to transform a parse-tree into an AST using `lark.ast_utils`. | |||
| This example only works with Python 3. | |||
| """ | |||
| import sys | |||
| from typing import List | |||
| from dataclasses import dataclass | |||
| from lark import Lark, ast_utils, Transformer, v_args | |||
| this_module = sys.modules[__name__] | |||
| # | |||
| # Define AST | |||
| # | |||
| class _Ast(ast_utils.Ast): | |||
| pass | |||
| class _Statement(_Ast): | |||
| pass | |||
| @dataclass | |||
| class Value(_Ast): | |||
| value: object | |||
| @dataclass | |||
| class Name(_Ast): | |||
| name: str | |||
| @dataclass | |||
| class CodeBlock(_Ast, ast_utils.AsList): | |||
| statements: List[_Statement] | |||
| @dataclass | |||
| class If(_Statement): | |||
| cond: Value | |||
| then: CodeBlock | |||
| @dataclass | |||
| class SetVar(_Statement): | |||
| name: str | |||
| value: Value | |||
| @dataclass | |||
| class Print(_Statement): | |||
| value: Value | |||
| class ToAst(Transformer): | |||
| def STRING(self, s): | |||
| # Remove quotation marks | |||
| return s[1:-1] | |||
| def DEC_NUMBER(self, n): | |||
| return int(n) | |||
| @v_args(inline=True) | |||
| def start(self, x): | |||
| return x | |||
| # | |||
| # Define Parser | |||
| # | |||
| parser = Lark(""" | |||
| start: code_block | |||
| code_block: statement+ | |||
| ?statement: if | set_var | print | |||
| if: "if" value "{" code_block "}" | |||
| set_var: NAME "=" value ";" | |||
| print: "print" value ";" | |||
| value: name | STRING | DEC_NUMBER | |||
| name: NAME | |||
| %import python (NAME, STRING, DEC_NUMBER) | |||
| %import common.WS | |||
| %ignore WS | |||
| """, | |||
| parser="lalr", | |||
| ) | |||
| transformer = ast_utils.create_transformer(this_module, ToAst()) | |||
| def parse(text): | |||
| return transformer.transform(parser.parse(text)) | |||
| # | |||
| # Test | |||
| # | |||
| if __name__ == '__main__': | |||
| print(parse(""" | |||
| a = 1; | |||
| if a { | |||
| print "a is 1"; | |||
| a = 2; | |||
| } | |||
| """)) | |||
| @@ -0,0 +1,17 @@ | |||
| import types | |||
| from typing import Optional | |||
| from .visitors import Transformer | |||
| class Ast(object): | |||
| pass | |||
| class AsList(object): | |||
| pass | |||
| def create_transformer( | |||
| ast_module: types.ModuleType, | |||
| transformer: Optional[Transformer]=None | |||
| ) -> Transformer: | |||
| ... | |||
| @@ -6,4 +6,9 @@ class RuleOptions: | |||
| expand1: bool | |||
| priority: int | |||
| template_source: Optional[str] | |||
| empty_indices: Tuple[bool, ...] | |||
| empty_indices: Tuple[bool, ...] | |||
| class Symbol: | |||
| name: str | |||
| is_term: bool | |||
| @@ -3,9 +3,10 @@ | |||
| from typing import Tuple, List, Iterator, Optional | |||
| from abc import ABC, abstractmethod | |||
| from .lexer import Token | |||
| from .lark import PostLex | |||
| class Indenter(ABC): | |||
| class Indenter(PostLex, ABC): | |||
| paren_level: Optional[int] | |||
| indent_level: Optional[List[int]] | |||
| @@ -15,13 +16,6 @@ class Indenter(ABC): | |||
| def handle_NL(self, token: Token) -> Iterator[Token]: | |||
| ... | |||
| def process(self, stream: Iterator[Token]) -> Iterator[Token]: | |||
| ... | |||
| @property | |||
| def always_accept(self) -> Tuple[str]: | |||
| ... | |||
| @property | |||
| @abstractmethod | |||
| def NL_type(self) -> str: | |||
| @@ -65,7 +65,7 @@ class Lark: | |||
| grammar: Union[Grammar, str, IO[str]], | |||
| *, | |||
| start: Union[None, str, List[str]] = "start", | |||
| parser: Literal["earley", "lalr", "cyk"] = "auto", | |||
| parser: Literal["earley", "lalr", "cyk", "auto"] = "auto", | |||
| lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto", | |||
| transformer: Optional[Transformer] = None, | |||
| postlex: Optional[PostLex] = None, | |||
| @@ -1,8 +1,8 @@ | |||
| from typing import List, Tuple, Union, Callable, Dict, Optional | |||
| from lark import Tree | |||
| from lark.grammar import RuleOptions | |||
| from lark.exceptions import UnexpectedInput | |||
| from .tree import Tree | |||
| from .grammar import RuleOptions | |||
| from .exceptions import UnexpectedInput | |||
| class Grammar: | |||
| @@ -11,8 +11,7 @@ from .lexer import TerminalDef | |||
| class WriteTokensTransformer(Transformer_InPlace): | |||
| def __init__(self, tokens: Dict[str, TerminalDef], Dict[str, Callable[[Symbol], str]] = ...): | |||
| ... | |||
| def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ... | |||
| class MatchTree(Tree): | |||
| @@ -0,0 +1,51 @@ | |||
| """ | |||
| Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree | |||
| """ | |||
| import inspect, re | |||
| from lark import Transformer, v_args | |||
| class Ast(object): | |||
| """Abstract class | |||
| Subclasses will be collected by `create_transformer()` | |||
| """ | |||
| pass | |||
| class AsList(object): | |||
| """Abstract class | |||
| Subclasses will be instanciated with the parse results as a single list, instead of as arguments. | |||
| """ | |||
| def camel_to_snake(name): | |||
| return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower() | |||
| def _call(func, _data, children, _meta): | |||
| return func(*children) | |||
| inline = v_args(wrapper=_call) | |||
| def create_transformer(ast_module, transformer=None): | |||
| """Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. | |||
| For each class, we create a corresponding rule in the transformer, with a matching name. | |||
| CamelCase names will be converted into snake_case. Example: "CodeBlock" -> "code_block". | |||
| Parameters: | |||
| ast_module - A Python module containing all the subclasses of `ast_utils.Ast` | |||
| Classes starting with an underscore (`_`) will be skipped. | |||
| transformer (Optional[Transformer]) - An initial transformer. Its attributes may be overwritten. | |||
| """ | |||
| t = transformer or Transformer() | |||
| for name, obj in inspect.getmembers(ast_module): | |||
| if not name.startswith('_') and inspect.isclass(obj): | |||
| if issubclass(obj, Ast): | |||
| if not issubclass(obj, AsList): | |||
| obj = inline(obj).__get__(t) | |||
| setattr(t, camel_to_snake(name), obj) | |||
| return t | |||
| @@ -1,13 +1,14 @@ | |||
| "Provides Indentation services for languages with indentation similar to Python" | |||
| from .exceptions import LarkError | |||
| from .lark import PostLex | |||
| from .lexer import Token | |||
| ###{standalone | |||
| class DedentError(LarkError): | |||
| pass | |||
| class Indenter: | |||
| class Indenter(PostLex): | |||
| def __init__(self): | |||
| self.paren_level = None | |||
| self.indent_level = None | |||
| @@ -1,4 +1,6 @@ | |||
| from __future__ import absolute_import | |||
| from lark.exceptions import ConfigurationError, assert_config | |||
| import sys, os, pickle, hashlib | |||
| @@ -6,7 +8,7 @@ from io import open | |||
| import tempfile | |||
| from warnings import warn | |||
| from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger | |||
| from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod | |||
| from .load_grammar import load_grammar, FromPackageLoader, Grammar | |||
| from .tree import Tree | |||
| from .common import LexerConf, ParserConf | |||
| @@ -191,6 +193,14 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | |||
| _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | |||
| class PostLex(ABC): | |||
| @abstractmethod | |||
| def process(self, stream): | |||
| return stream | |||
| always_accept = () | |||
| class Lark(Serialize): | |||
| """Main interface for the library. | |||
| @@ -288,7 +298,12 @@ class Lark(Serialize): | |||
| if self.options.parser == 'lalr': | |||
| self.options.lexer = 'contextual' | |||
| elif self.options.parser == 'earley': | |||
| self.options.lexer = 'dynamic' | |||
| if self.options.postlex is not None: | |||
| logger.info("postlex can't be used with the dynamic lexer, so we use standard instead. " | |||
| "Consider using lalr with contextual instead of earley") | |||
| self.options.lexer = 'standard' | |||
| else: | |||
| self.options.lexer = 'dynamic' | |||
| elif self.options.parser == 'cyk': | |||
| self.options.lexer = 'standard' | |||
| else: | |||
| @@ -298,6 +313,8 @@ class Lark(Serialize): | |||
| assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance | |||
| else: | |||
| assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete')) | |||
| if self.options.postlex is not None and 'dynamic' in lexer: | |||
| raise ConfigurationError("Can't use postlex with a dynamic lexer. Use standard or contextual instead") | |||
| if self.options.ambiguity == 'auto': | |||
| if self.options.parser == 'earley': | |||
| @@ -72,6 +72,7 @@ class ParsingFrontend(Serialize): | |||
| lexer_type = lexer_conf.lexer_type | |||
| self.skip_lexer = False | |||
| if lexer_type in ('dynamic', 'dynamic_complete'): | |||
| assert lexer_conf.postlex is None | |||
| self.skip_lexer = True | |||
| return | |||
| @@ -56,7 +56,6 @@ EXTRACT_STANDALONE_FILES = [ | |||
| 'utils.py', | |||
| 'tree.py', | |||
| 'visitors.py', | |||
| 'indenter.py', | |||
| 'grammar.py', | |||
| 'lexer.py', | |||
| 'common.py', | |||
| @@ -65,6 +64,7 @@ EXTRACT_STANDALONE_FILES = [ | |||
| 'parsers/lalr_analysis.py', | |||
| 'parser_frontends.py', | |||
| 'lark.py', | |||
| 'indenter.py', | |||
| ] | |||
| def extract_sections(lines): | |||
| @@ -12,6 +12,15 @@ logger.addHandler(logging.StreamHandler()) | |||
| # By default, we should not output any log messages | |||
| logger.setLevel(logging.CRITICAL) | |||
| if sys.version_info[0]>2: | |||
| from abc import ABC, abstractmethod | |||
| else: | |||
| from abc import ABCMeta, abstractmethod | |||
| class ABC(object): # Provide Python27 compatibility | |||
| __slots__ = () | |||
| __metclass__ = ABCMeta | |||
| Py36 = (sys.version_info[:2] >= (3, 6)) | |||
| NO_VALUE = object() | |||