Browse Source

Fixed multithreading bug in ContextualLexer (Issue #493)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Erez Sh 6 years ago
parent
commit
8842928963
2 changed files with 19 additions and 16 deletions
  1. +7
    -12
      lark/lexer.py
  2. +12
    -4
      lark/parser_frontends.py

+ 7
- 12
lark/lexer.py View File

@@ -288,10 +288,7 @@ class Lexer(object):


Method Signatures: Method Signatures:
lex(self, stream) -> Iterator[Token] lex(self, stream) -> Iterator[Token]

set_parser_state(self, state) # Optional
""" """
set_parser_state = NotImplemented
lex = NotImplemented lex = NotImplemented




@@ -349,6 +346,7 @@ class TraditionalLexer(Lexer):




class ContextualLexer(Lexer): class ContextualLexer(Lexer):

def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}): def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}):
tokens_by_name = {} tokens_by_name = {}
for t in terminals: for t in terminals:
@@ -371,18 +369,15 @@ class ContextualLexer(Lexer):


self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks) self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks)


self.set_parser_state(None) # Needs to be set on the outside

def set_parser_state(self, state):
self.parser_state = state

def lex(self, stream):
l = _Lex(self.lexers[self.parser_state], self.parser_state)
def lex(self, stream, get_parser_state):
parser_state = get_parser_state()
l = _Lex(self.lexers[parser_state], parser_state)
try: try:
for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
yield x yield x
l.lexer = self.lexers[self.parser_state]
l.state = self.parser_state
parser_state = get_parser_state()
l.lexer = self.lexers[parser_state]
l.state = parser_state # For debug only, no need to worry about multithreading
except UnexpectedCharacters as e: except UnexpectedCharacters as e:
# In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined,
# but not in the current context. # but not in the current context.


+ 12
- 4
lark/parser_frontends.py View File

@@ -79,14 +79,13 @@ class WithLexer(_ParserFrontend):
def _serialize(self, data, memo): def _serialize(self, data, memo):
data['parser'] = data['parser'].serialize(memo) data['parser'] = data['parser'].serialize(memo)


def lex(self, text):
stream = self.lexer.lex(text)
def lex(self, *args):
stream = self.lexer.lex(*args)
return self.postlex.process(stream) if self.postlex else stream return self.postlex.process(stream) if self.postlex else stream


def parse(self, text, start=None): def parse(self, text, start=None):
token_stream = self.lex(text) token_stream = self.lex(text)
sps = self.lexer.set_parser_state
return self._parse(token_stream, start, *[sps] if sps is not NotImplemented else [])
return self._parse(token_stream, start)


def init_traditional_lexer(self): def init_traditional_lexer(self):
self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks) self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
@@ -114,6 +113,15 @@ class LALR_ContextualLexer(LALR_WithLexer):
ignore=self.lexer_conf.ignore, ignore=self.lexer_conf.ignore,
always_accept=always_accept, always_accept=always_accept,
user_callbacks=self.lexer_conf.callbacks) user_callbacks=self.lexer_conf.callbacks)


def parse(self, text, start=None):
parser_state = [None]
def set_parser_state(s):
parser_state[0] = s

token_stream = self.lex(text, lambda: parser_state[0])
return self._parse(token_stream, start, set_parser_state)
###} ###}


class LALR_CustomLexer(LALR_WithLexer): class LALR_CustomLexer(LALR_WithLexer):


Loading…
Cancel
Save