From fcaf10ac4d42c7f1017e2c82a51f224a810b2bcd Mon Sep 17 00:00:00 2001 From: Inky <47245667+Inky-developer@users.noreply.github.com> Date: Sat, 18 Jul 2020 19:15:39 +0200 Subject: [PATCH] Fixes caching when custom lexers are used --- lark-stubs/lark.pyi | 1 + lark/parser_frontends.py | 11 ++++++++--- tests/test_cache.py | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi index 511e0ad..d601fc2 100644 --- a/lark-stubs/lark.pyi +++ b/lark-stubs/lark.pyi @@ -55,6 +55,7 @@ class Lark: propagate_positions: bool = False, maybe_placeholders: bool = False, lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, + cache: Union[bool, str] = False, g_regex_flags: int = ... ): ... diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 08f4756..c05f235 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -1,5 +1,3 @@ -from functools import partial - from .utils import get_regexp_width, Serialize from .parsers.grammar_analysis import GrammarAnalyzer from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token @@ -20,7 +18,14 @@ def get_frontend(parser, lexer): elif lexer == 'contextual': return LALR_ContextualLexer elif issubclass(lexer, Lexer): - return partial(LALR_CustomLexer, lexer) + class LALR_CustomLexerWrapper(LALR_CustomLexer): + def __init__(self, lexer_conf, parser_conf, options=None): + super(LALR_CustomLexerWrapper, self).__init__( + lexer, lexer_conf, parser_conf, options=options) + def init_lexer(self): + self.lexer = lexer(self.lexer_conf) + + return LALR_CustomLexerWrapper else: raise ValueError('Unknown lexer: %s' % lexer) elif parser=='earley': diff --git a/tests/test_cache.py b/tests/test_cache.py index 9436081..ca4d781 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -4,6 +4,7 @@ import sys from unittest import TestCase, main from lark import Lark, Tree +from lark.lexer import Lexer, Token import lark.lark as lark_module try: @@ -38,6 +39,15 @@ class MockFS: return name in self.files +class CustomLexer(Lexer): + def __init__(self, lexer_conf): + pass + + def lex(self, data): + for obj in data: + yield Token('A', obj) + + class TestCache(TestCase): def setUp(self): pass @@ -70,6 +80,12 @@ class TestCache(TestCase): parser = Lark(g, parser='lalr', cache=True) assert parser.parse('a') == Tree('start', []) + # Test with custom lexer + mock_fs.files = {} + parser = Lark(g, parser='lalr', lexer=CustomLexer, cache=True) + parser = Lark(g, parser='lalr', lexer=CustomLexer, cache=True) + assert len(mock_fs.files) == 1 + assert parser.parse('a') == Tree('start', []) finally: lark_module.FS = fs