From 8842928963d265e35b9da2c1e2a2acadbee4151a Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Fri, 13 Dec 2019 09:30:41 +0200 Subject: [PATCH] Fixed multithreading bug in ContextualLexer (Issue #493) --- lark/lexer.py | 19 +++++++------------ lark/parser_frontends.py | 16 ++++++++++++---- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/lark/lexer.py b/lark/lexer.py index 871b25e..9d26318 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -288,10 +288,7 @@ class Lexer(object): Method Signatures: lex(self, stream) -> Iterator[Token] - - set_parser_state(self, state) # Optional """ - set_parser_state = NotImplemented lex = NotImplemented @@ -349,6 +346,7 @@ class TraditionalLexer(Lexer): class ContextualLexer(Lexer): + def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}): tokens_by_name = {} for t in terminals: @@ -371,18 +369,15 @@ class ContextualLexer(Lexer): self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks) - self.set_parser_state(None) # Needs to be set on the outside - - def set_parser_state(self, state): - self.parser_state = state - - def lex(self, stream): - l = _Lex(self.lexers[self.parser_state], self.parser_state) + def lex(self, stream, get_parser_state): + parser_state = get_parser_state() + l = _Lex(self.lexers[parser_state], parser_state) try: for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): yield x - l.lexer = self.lexers[self.parser_state] - l.state = self.parser_state + parser_state = get_parser_state() + l.lexer = self.lexers[parser_state] + l.state = parser_state # For debug only, no need to worry about multithreading except UnexpectedCharacters as e: # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, # but not in the current context. diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index ec82299..8b42772 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -79,14 +79,13 @@ class WithLexer(_ParserFrontend): def _serialize(self, data, memo): data['parser'] = data['parser'].serialize(memo) - def lex(self, text): - stream = self.lexer.lex(text) + def lex(self, *args): + stream = self.lexer.lex(*args) return self.postlex.process(stream) if self.postlex else stream def parse(self, text, start=None): token_stream = self.lex(text) - sps = self.lexer.set_parser_state - return self._parse(token_stream, start, *[sps] if sps is not NotImplemented else []) + return self._parse(token_stream, start) def init_traditional_lexer(self): self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks) @@ -114,6 +113,15 @@ class LALR_ContextualLexer(LALR_WithLexer): ignore=self.lexer_conf.ignore, always_accept=always_accept, user_callbacks=self.lexer_conf.callbacks) + + + def parse(self, text, start=None): + parser_state = [None] + def set_parser_state(s): + parser_state[0] = s + + token_stream = self.lex(text, lambda: parser_state[0]) + return self._parse(token_stream, start, set_parser_state) ###} class LALR_CustomLexer(LALR_WithLexer):