From 96873d64ba8ef85fcad1daa2dd2e9bf931eb06ba Mon Sep 17 00:00:00 2001 From: Blank Spruce <32396809+BlankSpruce@users.noreply.github.com> Date: Thu, 13 Aug 2020 18:09:05 +0200 Subject: [PATCH 1/2] Make transformer work with tokens in standalone parser, fixes #648 --- lark/common.py | 3 --- lark/lark.py | 9 ++++++++- lark/parser_frontends.py | 16 +++++++++++++--- tests/test_tools.py | 27 +++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/lark/common.py b/lark/common.py index cc8c73c..714399a 100644 --- a/lark/common.py +++ b/lark/common.py @@ -17,9 +17,6 @@ class LexerConf(Serialize): self.skip_validation = skip_validation self.use_bytes = use_bytes - def _deserialize(self): - self.callbacks = {} # TODO - ###} class ParserConf: diff --git a/lark/lark.py b/lark/lark.py index daab45b..3ed96d7 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -344,7 +344,14 @@ class Lark(Serialize): self.rules = [Rule.deserialize(r, memo) for r in data['rules']] self.source = '' self._prepare_callbacks() - self.parser = self.parser_class.deserialize(data['parser'], memo, self._callbacks, self.options.postlex, re_module) + self.parser = self.parser_class.deserialize( + data['parser'], + memo, + self._callbacks, + self.options.postlex, + self.options.transformer, + re_module + ) return self @classmethod diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 33ad9bc..a45bf9c 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -1,6 +1,6 @@ from .utils import get_regexp_width, Serialize from .parsers.grammar_analysis import GrammarAnalyzer -from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token +from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef from .parsers import earley, xearley, cyk from .parsers.lalr_parser import LALR_Parser from .grammar import Rule @@ -58,6 +58,16 @@ class _ParserFrontend(Serialize): return self.parser.parse(input, start, *args) +def _recreate_lexer_callbacks(memo, transformer): + result = {} + terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] + for terminal in terminals: + callback = getattr(transformer, terminal.name, None) + if callback is not None: + result[terminal.name] = callback + return result + + class WithLexer(_ParserFrontend): lexer = None parser = None @@ -73,10 +83,11 @@ class WithLexer(_ParserFrontend): self.postlex = lexer_conf.postlex @classmethod - def deserialize(cls, data, memo, callbacks, postlex, re_module): + def deserialize(cls, data, memo, callbacks, postlex, transformer, re_module): inst = super(WithLexer, cls).deserialize(data, memo) inst.postlex = postlex inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks) + inst.lexer_conf.callbacks = _recreate_lexer_callbacks(memo, transformer) inst.lexer_conf.re_module = re_module inst.lexer_conf.skip_validation=True inst.init_lexer() @@ -229,4 +240,3 @@ class CYK(WithLexer): def _apply_callback(self, tree): return self.callbacks[tree.rule](tree.children) - diff --git a/tests/test_tools.py b/tests/test_tools.py index 1e0d78e..e691237 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -106,6 +106,33 @@ class TestStandalone(TestCase): x = l.parse('(\n)\n') self.assertEqual(x, Tree('start', [])) + def test_transformer(self): + grammar = r""" + start: some_rule "(" SOME_TERMINAL ")" + some_rule: SOME_TERMINAL + SOME_TERMINAL: /[A-Za-z_][A-Za-z0-9_]*/ + """ + context = self._create_standalone(grammar) + _Lark = context["Lark_StandAlone"] + + _Token = context["Token"] + _Tree = context["Tree"] + + class MyTransformer(context["Transformer"]): + def SOME_TERMINAL(self, token): + return _Token("SOME_TERMINAL", "token is transformed") + + def some_rule(self, children): + return _Tree("rule_is_transformed", []) + + parser = _Lark(transformer=MyTransformer()) + self.assertEqual( + parser.parse("FOO(BAR)"), + _Tree("start", [ + _Tree("rule_is_transformed", []), + _Token("SOME_TERMINAL", "token is transformed") + ]) + ) if __name__ == '__main__': From 2f4831f9b6dd857dcb3b8d53a8839474d3c5e5f7 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 13 Aug 2020 21:13:42 +0300 Subject: [PATCH 2/2] Small refactor after PR --- lark/lark.py | 12 +++++------- lark/parser_frontends.py | 9 ++++++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/lark/lark.py b/lark/lark.py index 3ed96d7..8371943 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -11,7 +11,7 @@ from .common import LexerConf, ParserConf from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken from .parse_tree_builder import ParseTreeBuilder -from .parser_frontends import get_frontend +from .parser_frontends import get_frontend, _get_lexer_callbacks from .grammar import Rule import re @@ -278,12 +278,10 @@ class Lark(Serialize): rule.options.priority = None # TODO Deprecate lexer_callbacks? - lexer_callbacks = dict(self.options.lexer_callbacks) - if self.options.transformer: - t = self.options.transformer - for term in self.terminals: - if hasattr(t, term.name): - lexer_callbacks[term.name] = getattr(t, term.name) + lexer_callbacks = (_get_lexer_callbacks(self.options.transformer, self.terminals) + if self.options.transformer + else {}) + lexer_callbacks.update(self.options.lexer_callbacks) self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes) diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index a45bf9c..b993b9f 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -58,9 +58,8 @@ class _ParserFrontend(Serialize): return self.parser.parse(input, start, *args) -def _recreate_lexer_callbacks(memo, transformer): +def _get_lexer_callbacks(transformer, terminals): result = {} - terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] for terminal in terminals: callback = getattr(transformer, terminal.name, None) if callback is not None: @@ -85,12 +84,16 @@ class WithLexer(_ParserFrontend): @classmethod def deserialize(cls, data, memo, callbacks, postlex, transformer, re_module): inst = super(WithLexer, cls).deserialize(data, memo) + inst.postlex = postlex inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks) - inst.lexer_conf.callbacks = _recreate_lexer_callbacks(memo, transformer) + + terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] + inst.lexer_conf.callbacks = _get_lexer_callbacks(transformer, terminals) inst.lexer_conf.re_module = re_module inst.lexer_conf.skip_validation=True inst.init_lexer() + return inst def _serialize(self, data, memo):