From 335206911d1cc7f9f6eee2bd7946cc2b5d525deb Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sun, 7 Apr 2019 17:47:32 +0300 Subject: [PATCH] Basic serialize/deserialize working! --- lark/grammar.py | 34 +++++++++++++--- lark/lark.py | 79 ++++++++++++++++++++++++++++++------- lark/lexer.py | 35 +++++++++++++++- lark/parse_tree_builder.py | 4 +- lark/parser_frontends.py | 20 ++++++---- lark/parsers/lalr_parser.py | 7 +++- 6 files changed, 146 insertions(+), 33 deletions(-) diff --git a/lark/grammar.py b/lark/grammar.py index 8691f10..0dc1c21 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -19,6 +19,15 @@ class Symbol(object): fullrepr = property(__repr__) + @classmethod + def deserialize(cls, data): + class_ = { + 'T': Terminal, + 'NT': NonTerminal, + }[data[0]] + return class_(*data[1:]) + + class Terminal(Symbol): is_term = True @@ -71,17 +80,26 @@ class Rule(object): return self.origin == other.origin and self.expansion == other.expansion def serialize(self): - return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None] - # def deserialize(self): - # return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None] + return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.order, self.alias, self.options.serialize() if self.options else None] + + @classmethod + def deserialize(cls, data): + origin, expansion, order, alias, options = data + return cls( + Symbol.deserialize(origin), + [Symbol.deserialize(s) for s in expansion], + order, + alias, + RuleOptions.deserialize(options) if options else None + ) class RuleOptions: - def __init__(self, keep_all_tokens=False, expand1=False, priority=None): + def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()): self.keep_all_tokens = keep_all_tokens self.expand1 = expand1 self.priority = priority - self.empty_indices = () + self.empty_indices = empty_indices def __repr__(self): return 'RuleOptions(%r, %r, %r)' % ( @@ -91,4 +109,8 @@ class RuleOptions: ) def serialize(self): - return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)] \ No newline at end of file + return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)] + + @classmethod + def deserialize(cls, data): + return cls(*data) diff --git a/lark/lark.py b/lark/lark.py index eb73271..9fa5017 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -51,24 +51,39 @@ class LarkOptions(object): if __doc__: __doc__ += OPTIONS_DOC + _defaults = { + 'debug': False, + 'keep_all_tokens': False, + 'tree_class': Tree, + 'cache_grammar': False, + 'postlex': None, + 'parser': 'earley', + 'lexer': 'auto', + 'transformer': None, + 'start': 'start', + 'profile': False, + 'priority': 'auto', + 'ambiguity': 'auto', + 'propagate_positions': False, + 'lexer_callbacks': {}, + 'maybe_placeholders': False, + } + def __init__(self, options_dict): o = dict(options_dict) - self.debug = bool(o.pop('debug', False)) - self.keep_all_tokens = bool(o.pop('keep_all_tokens', False)) - self.tree_class = o.pop('tree_class', Tree) - self.cache_grammar = o.pop('cache_grammar', False) - self.postlex = o.pop('postlex', None) - self.parser = o.pop('parser', 'earley') - self.lexer = o.pop('lexer', 'auto') - self.transformer = o.pop('transformer', None) - self.start = o.pop('start', 'start') - self.profile = o.pop('profile', False) - self.priority = o.pop('priority', 'auto') - self.ambiguity = o.pop('ambiguity', 'auto') - self.propagate_positions = o.pop('propagate_positions', False) - self.lexer_callbacks = o.pop('lexer_callbacks', {}) - self.maybe_placeholders = o.pop('maybe_placeholders', False) + options = {} + for name, default in self._defaults.items(): + if name in o: + value = o.pop(name) + if isinstance(default, bool): + value = bool(value) + else: + value = default + + options[name] = value + + self.__dict__['options'] = options assert self.parser in ('earley', 'lalr', 'cyk', None) @@ -79,6 +94,18 @@ class LarkOptions(object): if o: raise ValueError("Unknown options: %s" % o.keys()) + def __getattr__(self, name): + return self.options[name] + def __setattr__(self, name, value): + self.options[name] = value + + def serialize(self): + return self.options + + @classmethod + def deserialize(cls, data): + return cls(data) + class Profiler: def __init__(self): @@ -208,6 +235,28 @@ class Lark: return self.parser_class(self.lexer_conf, parser_conf, options=self.options) + def serialize(self): + return { + 'parser': self.parser.serialize(), + 'rules': [r.serialize() for r in self.rules], + 'options': self.options.serialize(), + } + + @classmethod + def deserialize(cls, data): + from .grammar import Rule + inst = cls.__new__(cls) + + rules = [Rule.deserialize(r) for r in data['rules']] + options = LarkOptions.deserialize(data['options']) + + ptb = ParseTreeBuilder(rules, options.tree_class, options.propagate_positions, options.keep_all_tokens, options.parser!='lalr' and options.ambiguity=='explicit', options.maybe_placeholders) + callbacks = ptb.create_callback(None) + + parser_class = get_frontend(options.parser, options.lexer) + inst.parser = parser_class.deserialize(data['parser'], callbacks) + return inst + @classmethod def open(cls, grammar_filename, rel_to=None, **options): diff --git a/lark/lexer.py b/lark/lexer.py index 00ff35c..0a7eaed 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -68,6 +68,10 @@ class TerminalDef(object): def serialize(self): return [self.name, self.pattern, self.priority] + @classmethod + def deserialize(cls, data): + return cls(*data) + ###{standalone @@ -268,6 +272,14 @@ class Lexer: set_parser_state = NotImplemented lex = NotImplemented + @classmethod + def deserialize(cls, data): + class_ = { + 'traditional': TraditionalLexer, + 'contextual': ContextualLexer, + }[data['type']] + return class_.deserialize(data) + class TraditionalLexer(Lexer): def __init__(self, terminals, ignore=(), user_callbacks={}): assert all(isinstance(t, TerminalDef) for t in terminals), terminals @@ -312,11 +324,22 @@ class TraditionalLexer(Lexer): def serialize(self): return { + 'type': 'traditional', 'terminals': [t.serialize() for t in self.terminals], 'ignore_types': self.ignore_types, 'newline_types': self.newline_types, } + @classmethod + def deserialize(cls, data): + inst = cls.__new__(cls) + inst.terminals = [TerminalDef.deserialize(t) for t in data['terminals']] + inst.mres = build_mres(inst.terminals) + inst.ignore_types = data['ignore_types'] + inst.newline_types = data['newline_types'] + inst.callback = {} # TODO implement + return inst + class ContextualLexer(Lexer): def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}): @@ -354,5 +377,15 @@ class ContextualLexer(Lexer): l.state = self.parser_state def serialize(self): - return {state: lexer.serialize() for state, lexer in self.lexers.items()} + return { + 'type': 'contextual', + 'root_lexer': self.root_lexer.serialize(), + 'lexers': {state: lexer.serialize() for state, lexer in self.lexers.items()} + } + @classmethod + def deserialize(cls, data): + inst = cls.__new__(cls) + inst.lexers = {state:Lexer.deserialize(lexer) for state, lexer in data['lexers'].items()} + inst.root_lexer = TraditionalLexer.deserialize(data['root_lexer']) + return inst \ No newline at end of file diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index ca12d5f..977c371 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -209,12 +209,12 @@ class ParseTreeBuilder: keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False) expand_single_child = options.expand1 if options else False - wrapper_chain = filter(None, [ + wrapper_chain = list(filter(None, [ (expand_single_child and not rule.alias) and ExpandSingleChild, maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None), self.propagate_positions and PropagatePositions, self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), - ]) + ])) yield rule, wrapper_chain diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index b93592c..e9d3b1b 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -15,11 +15,13 @@ class WithLexer(object): def init_traditional_lexer(self, lexer_conf): self.lexer_conf = lexer_conf self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks) + self.postlex = lexer_conf.postlex def init_contextual_lexer(self, lexer_conf): self.lexer_conf = lexer_conf + self.postlex = lexer_conf.postlex states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} - always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () + always_accept = self.postlex.always_accept if self.postlex else () self.lexer = ContextualLexer(lexer_conf.tokens, states, ignore=lexer_conf.ignore, always_accept=always_accept, @@ -27,8 +29,8 @@ class WithLexer(object): def lex(self, text): stream = self.lexer.lex(text) - if self.lexer_conf.postlex: - return self.lexer_conf.postlex.process(stream) + if self.postlex: + return self.postlex.process(stream) return stream def parse(self, text): @@ -38,15 +40,19 @@ class WithLexer(object): def serialize(self): return { - # 'class': type(self).__name__, + 'type': type(self).__name__, 'parser': self.parser.serialize(), 'lexer': self.lexer.serialize(), } @classmethod - def deserialize(cls, data): - inst = cls.__new__(cls) - inst.parser = lalr_parser.Parser.deserialize(data['parser']) + def deserialize(cls, data, callbacks): + class_ = globals()[data['type']] # XXX unsafe + parser = lalr_parser.Parser.deserialize(data['parser'], callbacks) + assert parser + inst = class_.__new__(class_) + inst.parser = parser inst.lexer = Lexer.deserialize(data['lexer']) + inst.postlex = None # TODO return inst diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 4de9496..c943693 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -59,7 +59,7 @@ class Parser(object): } @classmethod - def deserialize(cls, data): + def deserialize(cls, data, callbacks): tokens = data['tokens'] rules = {idx: Rule.deserialize(r) for idx, r in data['rules'].items()} states = { @@ -68,7 +68,10 @@ class Parser(object): for state, actions in data['states'].items() } parse_table = IntParseTable(states, data['start_state'], data['end_state']) - print(parse_table) + inst = cls.__new__(cls) + inst.parser = _Parser(parse_table, callbacks) + inst.parse = inst.parser.parse + return inst