| @@ -1,5 +1,7 @@ | |||||
| from .utils import Serialize | from .utils import Serialize | ||||
| ###{standalone | |||||
| class Symbol(Serialize): | class Symbol(Serialize): | ||||
| is_term = NotImplemented | is_term = NotImplemented | ||||
| @@ -43,6 +45,24 @@ class NonTerminal(Symbol): | |||||
| is_term = False | is_term = False | ||||
| class RuleOptions(Serialize): | |||||
| __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'empty_indices' | |||||
| def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()): | |||||
| self.keep_all_tokens = keep_all_tokens | |||||
| self.expand1 = expand1 | |||||
| self.priority = priority | |||||
| self.empty_indices = empty_indices | |||||
| def __repr__(self): | |||||
| return 'RuleOptions(%r, %r, %r)' % ( | |||||
| self.keep_all_tokens, | |||||
| self.expand1, | |||||
| self.priority, | |||||
| ) | |||||
| class Rule(Serialize): | class Rule(Serialize): | ||||
| """ | """ | ||||
| origin : a symbol | origin : a symbol | ||||
| @@ -52,7 +72,7 @@ class Rule(Serialize): | |||||
| __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') | __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') | ||||
| __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' | __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' | ||||
| __serialize_namespace__ = lambda: (Terminal, NonTerminal, RuleOptions) | |||||
| __serialize_namespace__ = Terminal, NonTerminal, RuleOptions | |||||
| def __init__(self, origin, expansion, order=0, alias=None, options=None): | def __init__(self, origin, expansion, order=0, alias=None, options=None): | ||||
| self.origin = origin | self.origin = origin | ||||
| @@ -81,18 +101,4 @@ class Rule(Serialize): | |||||
| class RuleOptions(Serialize): | |||||
| __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'empty_indices' | |||||
| def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()): | |||||
| self.keep_all_tokens = keep_all_tokens | |||||
| self.expand1 = expand1 | |||||
| self.priority = priority | |||||
| self.empty_indices = empty_indices | |||||
| def __repr__(self): | |||||
| return 'RuleOptions(%r, %r, %r)' % ( | |||||
| self.keep_all_tokens, | |||||
| self.expand1, | |||||
| self.priority, | |||||
| ) | |||||
| ###} | |||||
| @@ -15,6 +15,7 @@ from .parse_tree_builder import ParseTreeBuilder | |||||
| from .parser_frontends import get_frontend | from .parser_frontends import get_frontend | ||||
| from .grammar import Rule | from .grammar import Rule | ||||
| ###{standalone | |||||
| class LarkOptions(Serialize): | class LarkOptions(Serialize): | ||||
| """Specifies the options for Lark | """Specifies the options for Lark | ||||
| @@ -101,11 +102,11 @@ class LarkOptions(Serialize): | |||||
| assert name in self.options | assert name in self.options | ||||
| self.options[name] = value | self.options[name] = value | ||||
| def serialize(self): | |||||
| def serialize(self, memo): | |||||
| return self.options | return self.options | ||||
| @classmethod | @classmethod | ||||
| def deserialize(cls, data): | |||||
| def deserialize(cls, data, memo): | |||||
| return cls(data) | return cls(data) | ||||
| @@ -240,12 +241,12 @@ class Lark(Serialize): | |||||
| return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | ||||
| @classmethod | @classmethod | ||||
| def deserialize(cls, data): | |||||
| def deserialize(cls, data, memo): | |||||
| inst = cls.__new__(cls) | inst = cls.__new__(cls) | ||||
| inst.options = LarkOptions.deserialize(data['options']) | |||||
| inst.rules = [Rule.deserialize(r) for r in data['rules']] | |||||
| inst.options = LarkOptions.deserialize(data['options'], memo) | |||||
| inst.rules = [Rule.deserialize(r, memo) for r in data['rules']] | |||||
| inst._prepare_callbacks() | inst._prepare_callbacks() | ||||
| inst.parser = inst.parser_class.deserialize(data['parser'], inst._callbacks) | |||||
| inst.parser = inst.parser_class.deserialize(data['parser'], memo, inst._callbacks) | |||||
| return inst | return inst | ||||
| @@ -284,14 +285,4 @@ class Lark(Serialize): | |||||
| "Parse the given text, according to the options provided. Returns a tree, unless specified otherwise." | "Parse the given text, according to the options provided. Returns a tree, unless specified otherwise." | ||||
| return self.parser.parse(text) | return self.parser.parse(text) | ||||
| # if self.profiler: | |||||
| # self.profiler.enter_section('lex') | |||||
| # l = list(self.lex(text)) | |||||
| # self.profiler.enter_section('parse') | |||||
| # try: | |||||
| # return self.parser.parse(l) | |||||
| # finally: | |||||
| # self.profiler.enter_section('outside_lark') | |||||
| # else: | |||||
| # l = list(self.lex(text)) | |||||
| # return self.parser.parse(l) | |||||
| ###} | |||||
| @@ -5,6 +5,8 @@ import re | |||||
| from .utils import Str, classify, get_regexp_width, Py36, Serialize | from .utils import Str, classify, get_regexp_width, Py36, Serialize | ||||
| from .exceptions import UnexpectedCharacters, LexError | from .exceptions import UnexpectedCharacters, LexError | ||||
| ###{standalone | |||||
| class Pattern(Serialize): | class Pattern(Serialize): | ||||
| __serialize_fields__ = 'value', 'flags' | __serialize_fields__ = 'value', 'flags' | ||||
| @@ -61,7 +63,7 @@ class PatternRE(Pattern): | |||||
| class TerminalDef(Serialize): | class TerminalDef(Serialize): | ||||
| __serialize_fields__ = 'name', 'pattern', 'priority' | __serialize_fields__ = 'name', 'pattern', 'priority' | ||||
| __serialize_namespace__ = lambda: (PatternStr, PatternRE) | |||||
| __serialize_namespace__ = PatternStr, PatternRE | |||||
| def __init__(self, name, pattern, priority=1): | def __init__(self, name, pattern, priority=1): | ||||
| assert isinstance(pattern, Pattern), pattern | assert isinstance(pattern, Pattern), pattern | ||||
| @@ -74,7 +76,6 @@ class TerminalDef(Serialize): | |||||
| ###{standalone | |||||
| class Token(Str): | class Token(Str): | ||||
| __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column') | __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column') | ||||
| @@ -205,7 +206,6 @@ class CallChain: | |||||
| return self.callback2(t) if self.cond(t2) else t2 | return self.callback2(t) if self.cond(t2) else t2 | ||||
| ###} | |||||
| @@ -275,7 +275,7 @@ class Lexer(Serialize): | |||||
| class TraditionalLexer(Lexer): | class TraditionalLexer(Lexer): | ||||
| __serialize_fields__ = 'terminals', 'ignore_types', 'newline_types' | __serialize_fields__ = 'terminals', 'ignore_types', 'newline_types' | ||||
| __serialize_namespace__ = lambda: (TerminalDef,) | |||||
| __serialize_namespace__ = TerminalDef, | |||||
| def _deserialize(self): | def _deserialize(self): | ||||
| self.mres = build_mres(self.terminals) | self.mres = build_mres(self.terminals) | ||||
| @@ -328,7 +328,7 @@ class TraditionalLexer(Lexer): | |||||
| class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
| __serialize_fields__ = 'root_lexer', 'lexers' | __serialize_fields__ = 'root_lexer', 'lexers' | ||||
| __serialize_namespace__ = lambda: (TraditionalLexer,) | |||||
| __serialize_namespace__ = TraditionalLexer, | |||||
| def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}): | def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}): | ||||
| tokens_by_name = {} | tokens_by_name = {} | ||||
| @@ -363,3 +363,5 @@ class ContextualLexer(Lexer): | |||||
| yield x | yield x | ||||
| l.lexer = self.lexers[self.parser_state] | l.lexer = self.lexers[self.parser_state] | ||||
| l.state = self.parser_state | l.state = self.parser_state | ||||
| ###} | |||||
| @@ -4,26 +4,29 @@ from functools import partial | |||||
| from .utils import get_regexp_width, Serialize | from .utils import get_regexp_width, Serialize | ||||
| from .parsers.grammar_analysis import GrammarAnalyzer | from .parsers.grammar_analysis import GrammarAnalyzer | ||||
| from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token | from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token | ||||
| from .parsers import lalr_parser, earley, xearley, cyk | |||||
| from .parsers import earley, xearley, cyk | |||||
| from .parsers.lalr_parser import LALR_Parser | |||||
| from .grammar import Rule | from .grammar import Rule | ||||
| from .tree import Tree | from .tree import Tree | ||||
| ###{standalone | |||||
| class WithLexer(Serialize): | class WithLexer(Serialize): | ||||
| lexer = None | lexer = None | ||||
| parser = None | parser = None | ||||
| lexer_conf = None | lexer_conf = None | ||||
| __serialize_fields__ = 'parser', 'lexer' | __serialize_fields__ = 'parser', 'lexer' | ||||
| __serialize_namespace__ = lambda: (Rule, ContextualLexer, LALR_ContextualLexer) | |||||
| __serialize_namespace__ = Rule, ContextualLexer | |||||
| @classmethod | @classmethod | ||||
| def deserialize(cls, data, callbacks): | |||||
| inst = super(WithLexer, cls).deserialize(data) | |||||
| def deserialize(cls, data, memo, callbacks): | |||||
| inst = super(WithLexer, cls).deserialize(data, memo) | |||||
| inst.postlex = None # TODO | inst.postlex = None # TODO | ||||
| inst.parser = lalr_parser.Parser.deserialize(inst.parser, callbacks) | |||||
| inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks) | |||||
| return inst | return inst | ||||
| def _serialize(self, data): | |||||
| def _serialize(self, data, memo): | |||||
| data['parser'] = data['parser'].serialize() | data['parser'] = data['parser'].serialize() | ||||
| def init_traditional_lexer(self, lexer_conf): | def init_traditional_lexer(self, lexer_conf): | ||||
| @@ -54,18 +57,18 @@ class WithLexer(Serialize): | |||||
| class LALR_TraditionalLexer(WithLexer): | class LALR_TraditionalLexer(WithLexer): | ||||
| def __init__(self, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_conf, parser_conf, options=None): | ||||
| debug = options.debug if options else False | debug = options.debug if options else False | ||||
| self.parser = lalr_parser.Parser(parser_conf, debug=debug) | |||||
| self.parser = LALR_Parser(parser_conf, debug=debug) | |||||
| self.init_traditional_lexer(lexer_conf) | self.init_traditional_lexer(lexer_conf) | ||||
| class LALR_ContextualLexer(WithLexer): | class LALR_ContextualLexer(WithLexer): | ||||
| def __init__(self, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_conf, parser_conf, options=None): | ||||
| debug = options.debug if options else False | debug = options.debug if options else False | ||||
| self.parser = lalr_parser.Parser(parser_conf, debug=debug) | |||||
| self.parser = LALR_Parser(parser_conf, debug=debug) | |||||
| self.init_contextual_lexer(lexer_conf) | self.init_contextual_lexer(lexer_conf) | ||||
| class LALR_CustomLexer(WithLexer): | class LALR_CustomLexer(WithLexer): | ||||
| def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None): | ||||
| self.parser = lalr_parser.Parser(parser_conf) | |||||
| self.parser = LALR_Parser(parser_conf) | |||||
| self.lexer_conf = lexer_conf | self.lexer_conf = lexer_conf | ||||
| self.lexer = lexer_cls(lexer_conf) | self.lexer = lexer_cls(lexer_conf) | ||||
| @@ -190,3 +193,5 @@ def get_frontend(parser, lexer): | |||||
| ###} | |||||
| @@ -14,6 +14,8 @@ from ..exceptions import GrammarError | |||||
| from .grammar_analysis import GrammarAnalyzer, Terminal | from .grammar_analysis import GrammarAnalyzer, Terminal | ||||
| ###{standalone | |||||
| class Action: | class Action: | ||||
| def __init__(self, name): | def __init__(self, name): | ||||
| self.name = name | self.name = name | ||||
| @@ -50,7 +52,7 @@ class ParseTable: | |||||
| } | } | ||||
| @classmethod | @classmethod | ||||
| def deserialize(cls, data): | |||||
| def deserialize(cls, data, memo): | |||||
| tokens = data['tokens'] | tokens = data['tokens'] | ||||
| rules = data['rules'] | rules = data['rules'] | ||||
| states = { | states = { | ||||
| @@ -79,8 +81,7 @@ class IntParseTable(ParseTable): | |||||
| end_state = state_to_idx[parse_table.end_state] | end_state = state_to_idx[parse_table.end_state] | ||||
| return cls(int_states, start_state, end_state) | return cls(int_states, start_state, end_state) | ||||
| ###} | |||||
| class LALR_Analyzer(GrammarAnalyzer): | class LALR_Analyzer(GrammarAnalyzer): | ||||
| @@ -9,7 +9,8 @@ from ..utils import Enumerator, Serialize | |||||
| from .lalr_analysis import LALR_Analyzer, Shift, IntParseTable | from .lalr_analysis import LALR_Analyzer, Shift, IntParseTable | ||||
| class Parser: | |||||
| ###{standalone | |||||
| class LALR_Parser(object): | |||||
| def __init__(self, parser_conf, debug=False): | def __init__(self, parser_conf, debug=False): | ||||
| assert all(r.options is None or r.options.priority is None | assert all(r.options is None or r.options.priority is None | ||||
| for r in parser_conf.rules), "LALR doesn't yet support prioritization" | for r in parser_conf.rules), "LALR doesn't yet support prioritization" | ||||
| @@ -22,9 +23,9 @@ class Parser: | |||||
| self.parser = _Parser(analysis.parse_table, callbacks) | self.parser = _Parser(analysis.parse_table, callbacks) | ||||
| @classmethod | @classmethod | ||||
| def deserialize(cls, data, callbacks): | |||||
| def deserialize(cls, data, memo, callbacks): | |||||
| inst = cls.__new__(cls) | inst = cls.__new__(cls) | ||||
| inst.parser = _Parser(IntParseTable.deserialize(data), callbacks) | |||||
| inst.parser = _Parser(IntParseTable.deserialize(data, memo), callbacks) | |||||
| return inst | return inst | ||||
| def serialize(self): | def serialize(self): | ||||
| @@ -33,7 +34,6 @@ class Parser: | |||||
| def parse(self, *args): | def parse(self, *args): | ||||
| return self.parser.parse(*args) | return self.parser.parse(*args) | ||||
| ###{standalone | |||||
| class _Parser: | class _Parser: | ||||
| def __init__(self, parse_table, callbacks): | def __init__(self, parse_table, callbacks): | ||||
| @@ -36,6 +36,7 @@ | |||||
| # | # | ||||
| ###} | ###} | ||||
| import pprint | |||||
| import codecs | import codecs | ||||
| import sys | import sys | ||||
| import os | import os | ||||
| @@ -47,6 +48,10 @@ import lark | |||||
| from lark import Lark | from lark import Lark | ||||
| from lark.parsers.lalr_analysis import Reduce | from lark.parsers.lalr_analysis import Reduce | ||||
| from lark.grammar import RuleOptions | |||||
| from lark.lexer import TerminalDef | |||||
| _dir = path.dirname(__file__) | _dir = path.dirname(__file__) | ||||
| _larkdir = path.join(_dir, path.pardir) | _larkdir = path.join(_dir, path.pardir) | ||||
| @@ -61,9 +66,12 @@ EXTRACT_STANDALONE_FILES = [ | |||||
| 'lexer.py', | 'lexer.py', | ||||
| 'parse_tree_builder.py', | 'parse_tree_builder.py', | ||||
| 'parsers/lalr_parser.py', | 'parsers/lalr_parser.py', | ||||
| 'parsers/lalr_analysis.py', | |||||
| 'parser_frontends.py', | |||||
| 'lark.py', | |||||
| 'grammar.py', | |||||
| ] | ] | ||||
| def extract_sections(lines): | def extract_sections(lines): | ||||
| section = None | section = None | ||||
| text = [] | text = [] | ||||
| @@ -83,152 +91,34 @@ def extract_sections(lines): | |||||
| return {name:''.join(text) for name, text in sections.items()} | return {name:''.join(text) for name, text in sections.items()} | ||||
| def _prepare_mres(mres): | |||||
| return [(p.pattern,{i: t for i, t in d.items()}) for p,d in mres] | |||||
| class TraditionalLexerAtoms: | |||||
| def __init__(self, lexer): | |||||
| self.mres = _prepare_mres(lexer.mres) | |||||
| self.newline_types = lexer.newline_types | |||||
| self.ignore_types = lexer.ignore_types | |||||
| self.callback = {name:_prepare_mres(c.mres) | |||||
| for name, c in lexer.callback.items()} | |||||
| def print_python(self): | |||||
| print('import re') | |||||
| print('class LexerRegexps: pass') | |||||
| print('NEWLINE_TYPES = %s' % self.newline_types) | |||||
| print('IGNORE_TYPES = %s' % self.ignore_types) | |||||
| self._print_python('lexer') | |||||
| def _print_python(self, var_name): | |||||
| print('MRES = (') | |||||
| pprint(self.mres) | |||||
| print(')') | |||||
| print('LEXER_CALLBACK = (') | |||||
| pprint(self.callback) | |||||
| print(')') | |||||
| print('lexer_regexps = LexerRegexps()') | |||||
| print('lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES]') | |||||
| print('lexer_regexps.callback = {n: UnlessCallback([(re.compile(p), d) for p, d in mres])') | |||||
| print(' for n, mres in LEXER_CALLBACK.items()}') | |||||
| print('%s = (lexer_regexps)' % var_name) | |||||
| class ContextualLexerAtoms: | |||||
| def __init__(self, lexer): | |||||
| self.lexer_atoms = {state: TraditionalLexerAtoms(lexer) for state, lexer in lexer.lexers.items()} | |||||
| self.root_lexer_atoms = TraditionalLexerAtoms(lexer.root_lexer) | |||||
| def print_python(self): | |||||
| print('import re') | |||||
| print('class LexerRegexps: pass') | |||||
| print('NEWLINE_TYPES = %s' % self.root_lexer_atoms.newline_types) | |||||
| print('IGNORE_TYPES = %s' % self.root_lexer_atoms.ignore_types) | |||||
| print('LEXERS = {}') | |||||
| for state, lexer_atoms in self.lexer_atoms.items(): | |||||
| lexer_atoms._print_python('LEXERS[%d]' % state) | |||||
| print('class ContextualLexer:') | |||||
| print(' def __init__(self):') | |||||
| print(' self.lexers = LEXERS') | |||||
| print(' self.set_parser_state(None)') | |||||
| print(' def set_parser_state(self, state):') | |||||
| print(' self.parser_state = state') | |||||
| print(' def lex(self, stream):') | |||||
| print(' newline_types = NEWLINE_TYPES') | |||||
| print(' ignore_types = IGNORE_TYPES') | |||||
| print(' lexers = LEXERS') | |||||
| print(' l = _Lex(lexers[self.parser_state], self.parser_state)') | |||||
| print(' for x in l.lex(stream, newline_types, ignore_types):') | |||||
| print(' yield x') | |||||
| print(' l.lexer = lexers[self.parser_state]') | |||||
| print(' l.state = self.parser_state') | |||||
| print('CON_LEXER = ContextualLexer()') | |||||
| print('def lex(stream):') | |||||
| print(' return CON_LEXER.lex(stream)') | |||||
| class GetRule: | |||||
| def __init__(self, rule_id): | |||||
| self.rule_id = rule_id | |||||
| def __repr__(self): | |||||
| return 'RULES[%d]' % self.rule_id | |||||
| rule_ids = {} | |||||
| token_types = {} | |||||
| def _get_token_type(token_type): | |||||
| if token_type not in token_types: | |||||
| token_types[token_type] = len(token_types) | |||||
| return token_types[token_type] | |||||
| class ParserAtoms: | |||||
| def __init__(self, parser): | |||||
| self.parse_table = parser._parse_table | |||||
| def print_python(self): | |||||
| print('class ParseTable: pass') | |||||
| print('parse_table = ParseTable()') | |||||
| print('STATES = {') | |||||
| for state, actions in self.parse_table.states.items(): | |||||
| print(' %r: %r,' % (state, {_get_token_type(token): ((1, rule_ids[arg]) if action is Reduce else (0, arg)) | |||||
| for token, (action, arg) in actions.items()})) | |||||
| print('}') | |||||
| print('TOKEN_TYPES = (') | |||||
| pprint({v:k for k, v in token_types.items()}) | |||||
| print(')') | |||||
| print('parse_table.states = {s: {TOKEN_TYPES[t]: (a, RULES[x] if a is Reduce else x) for t, (a, x) in acts.items()}') | |||||
| print(' for s, acts in STATES.items()}') | |||||
| print('parse_table.start_state = %s' % self.parse_table.start_state) | |||||
| print('parse_table.end_state = %s' % self.parse_table.end_state) | |||||
| print('class Lark_StandAlone:') | |||||
| print(' def __init__(self, transformer=None, postlex=None):') | |||||
| print(' callbacks = parse_tree_builder.create_callback(transformer=transformer)') | |||||
| print(' self.parser = _Parser(parse_table, callbacks)') | |||||
| print(' self.postlex = postlex') | |||||
| print(' def parse(self, stream):') | |||||
| print(' tokens = lex(stream)') | |||||
| print(' sps = CON_LEXER.set_parser_state') | |||||
| print(' if self.postlex: tokens = self.postlex.process(tokens)') | |||||
| print(' return self.parser.parse(tokens, sps)') | |||||
| class TreeBuilderAtoms: | |||||
| def __init__(self, lark): | |||||
| self.rules = lark.rules | |||||
| def print_python(self): | |||||
| # print('class InlineTransformer: pass') | |||||
| print('RULES = {') | |||||
| for i, r in enumerate(self.rules): | |||||
| rule_ids[r] = i | |||||
| print(' %d: Rule(%r, [%s], alias=%r, options=%r),' % (i, r.origin, ', '.join(s.fullrepr for s in r.expansion), r.alias, r.options )) | |||||
| print('}') | |||||
| print('parse_tree_builder = ParseTreeBuilder(RULES.values(), Tree)') | |||||
| def main(fobj, start): | def main(fobj, start): | ||||
| lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) | lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) | ||||
| lexer_atoms = ContextualLexerAtoms(lark_inst.parser.lexer) | |||||
| parser_atoms = ParserAtoms(lark_inst.parser.parser) | |||||
| tree_builder_atoms = TreeBuilderAtoms(lark_inst) | |||||
| print('# The file was automatically generated by Lark v%s' % lark.__version__) | print('# The file was automatically generated by Lark v%s' % lark.__version__) | ||||
| for pyfile in EXTRACT_STANDALONE_FILES: | for pyfile in EXTRACT_STANDALONE_FILES: | ||||
| with open(os.path.join(_larkdir, pyfile)) as f: | with open(os.path.join(_larkdir, pyfile)) as f: | ||||
| print (extract_sections(f)['standalone']) | print (extract_sections(f)['standalone']) | ||||
| with open(os.path.join(_larkdir, 'grammar.py')) as grammar_py: | |||||
| print(grammar_py.read()) | |||||
| data, m = lark_inst.memo_serialize([TerminalDef]) | |||||
| print( 'DATA = (' ) | |||||
| # pprint(data, width=160) | |||||
| print(data) | |||||
| print(')') | |||||
| print( 'MEMO = (') | |||||
| print(m) | |||||
| print(')') | |||||
| print('Shift = 0') | print('Shift = 0') | ||||
| print('Reduce = 1') | print('Reduce = 1') | ||||
| lexer_atoms.print_python() | |||||
| tree_builder_atoms.print_python() | |||||
| parser_atoms.print_python() | |||||
| print("def load_parser():") | |||||
| print(" return Lark.deserialize(DATA)") | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| if len(sys.argv) < 2: | if len(sys.argv) < 2: | ||||
| @@ -1,8 +1,6 @@ | |||||
| import sys | import sys | ||||
| from collections import deque | from collections import deque | ||||
| Py36 = (sys.version_info[:2] >= (3, 6)) | |||||
| class fzset(frozenset): | class fzset(frozenset): | ||||
| def __repr__(self): | def __repr__(self): | ||||
| return '{%s}' % ', '.join(map(repr, self)) | return '{%s}' % ', '.join(map(repr, self)) | ||||
| @@ -44,56 +42,90 @@ def bfs(initial, expand): | |||||
| def _serialize(value): | |||||
| ###{standalone | |||||
| import sys, re | |||||
| Py36 = (sys.version_info[:2] >= (3, 6)) | |||||
| def _serialize(value, memo): | |||||
| if isinstance(value, Serialize): | if isinstance(value, Serialize): | ||||
| return value.serialize() | |||||
| return value.serialize(memo) | |||||
| elif isinstance(value, list): | elif isinstance(value, list): | ||||
| return [_serialize(elem) for elem in value] | |||||
| return [_serialize(elem, memo) for elem in value] | |||||
| elif isinstance(value, frozenset): | elif isinstance(value, frozenset): | ||||
| return list(value) # TODO reversible? | return list(value) # TODO reversible? | ||||
| elif isinstance(value, dict): | elif isinstance(value, dict): | ||||
| return {key:_serialize(elem) for key, elem in value.items()} | |||||
| return {key:_serialize(elem, memo) for key, elem in value.items()} | |||||
| return value | return value | ||||
| def _deserialize(data, namespace): | |||||
| def _deserialize(data, namespace, memo): | |||||
| if isinstance(data, dict): | if isinstance(data, dict): | ||||
| if '__type__' in data: # Object | if '__type__' in data: # Object | ||||
| class_ = namespace[data['__type__']] | class_ = namespace[data['__type__']] | ||||
| return class_.deserialize(data) | |||||
| return {key:_deserialize(value, namespace) for key, value in data.items()} | |||||
| return class_.deserialize(data, memo) | |||||
| return {key:_deserialize(value, namespace, memo) for key, value in data.items()} | |||||
| elif isinstance(data, list): | elif isinstance(data, list): | ||||
| return [_deserialize(value, namespace) for value in data] | |||||
| return [_deserialize(value, namespace, memo) for value in data] | |||||
| return data | return data | ||||
| class Serialize(object): | class Serialize(object): | ||||
| def serialize(self): | |||||
| def memo_serialize(self, types_to_memoize): | |||||
| memo = SerializeMemoizer(types_to_memoize) | |||||
| return self.serialize(memo), memo.serialize() | |||||
| def serialize(self, memo=None): | |||||
| if memo and memo.in_types(self): | |||||
| return {'__memo__': memo.memoized.get(self)} | |||||
| fields = getattr(self, '__serialize_fields__') | fields = getattr(self, '__serialize_fields__') | ||||
| res = {f: _serialize(getattr(self, f)) for f in fields} | |||||
| res = {f: _serialize(getattr(self, f), memo) for f in fields} | |||||
| res['__type__'] = type(self).__name__ | res['__type__'] = type(self).__name__ | ||||
| postprocess = getattr(self, '_serialize', None) | postprocess = getattr(self, '_serialize', None) | ||||
| if postprocess: | if postprocess: | ||||
| postprocess(res) | |||||
| postprocess(res, memo) | |||||
| return res | return res | ||||
| @classmethod | @classmethod | ||||
| def deserialize(cls, data): | |||||
| namespace = getattr(cls, '__serialize_namespace__', dict) | |||||
| namespace = {c.__name__:c for c in namespace()} | |||||
| def deserialize(cls, data, memo): | |||||
| namespace = getattr(cls, '__serialize_namespace__', {}) | |||||
| namespace = {c.__name__:c for c in namespace} | |||||
| fields = getattr(cls, '__serialize_fields__') | fields = getattr(cls, '__serialize_fields__') | ||||
| if '__memo__' in data: | |||||
| return memo[data['__memo__']] | |||||
| inst = cls.__new__(cls) | inst = cls.__new__(cls) | ||||
| for f in fields: | for f in fields: | ||||
| setattr(inst, f, _deserialize(data[f], namespace)) | |||||
| setattr(inst, f, _deserialize(data[f], namespace, memo)) | |||||
| postprocess = getattr(inst, '_deserialize', None) | postprocess = getattr(inst, '_deserialize', None) | ||||
| if postprocess: | if postprocess: | ||||
| postprocess() | postprocess() | ||||
| return inst | return inst | ||||
| class SerializeMemoizer(Serialize): | |||||
| __serialize_fields__ = 'memoized', | |||||
| def __init__(self, types_to_memoize): | |||||
| self.types_to_memoize = tuple(types_to_memoize) | |||||
| self.memoized = Enumerator() | |||||
| def in_types(self, value): | |||||
| return isinstance(value, self.types_to_memoize) | |||||
| def serialize(self): | |||||
| return _serialize(self.memoized.reversed(), None) | |||||
| @classmethod | |||||
| def deserialize(cls, data, namespace, memo): | |||||
| return _deserialize(data, namespace, memo) | |||||
| ###{standalone | |||||
| try: | try: | ||||
| STRING_TYPE = basestring | STRING_TYPE = basestring | ||||
| except NameError: # Python 3 | except NameError: # Python 3 | ||||
| @@ -178,7 +210,7 @@ def get_regexp_width(regexp): | |||||
| raise ValueError(regexp) | raise ValueError(regexp) | ||||
| class Enumerator: | |||||
| class Enumerator(Serialize): | |||||
| def __init__(self): | def __init__(self): | ||||
| self.enums = {} | self.enums = {} | ||||