From 83ce0c7a001b412c7676405a8eff7ca164288d2a Mon Sep 17 00:00:00 2001 From: Gary Geng Date: Fri, 2 Apr 2021 14:43:49 -0500 Subject: [PATCH 1/5] Fix copying and comparing of some classes --- lark/lexer.py | 22 ++++++++++++++++++++++ lark/parsers/lalr_parser.py | 2 +- tests/test_parser.py | 9 +++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/lark/lexer.py b/lark/lexer.py index 5a8d21a..7b2fc8f 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -186,6 +186,13 @@ class LineCounter: self.column = 1 self.line_start_pos = 0 + def __eq__(self, other): + if not isinstance(other, LineCounter): + return False + return (self.newline_char == other.newline_char and self.char_pos == other.char_pos + and self.line == other.line and self.column == other.column + and self.line_start_pos == other.line_start_pos) + def feed(self, token, test_newline=True): """Consume a token and calculate the new line & column. @@ -405,6 +412,13 @@ class LexerState: self.line_ctr = line_ctr self.last_token = last_token + def __eq__(self, other): + if not isinstance(other, LexerState): + return False + + return (self.text == other.text and self.line_ctr == other.line_ctr + and self.last_token == other.last_token) + def __copy__(self): return type(self)(self.text, copy(self.line_ctr), self.last_token) @@ -465,4 +479,12 @@ class LexerThread: def lex(self, parser_state): return self.lexer.lex(self.state, parser_state) + + def __copy__(self): + copied = type(self)( + self.lexer, + '' + ) + copied.state = copy(self.state) + return copied ###} diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 6271bd5..2d3e559 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -95,7 +95,7 @@ class ParserState(object): def __eq__(self, other): if not isinstance(other, ParserState): return False - return self.position == other.position + return len(self.state_stack) == len(other.state_stack) and self.position == other.position def __copy__(self): return type(self)( diff --git a/tests/test_parser.py b/tests/test_parser.py index ef91a92..3d18b20 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2408,9 +2408,18 @@ def _make_parser_test(LEXER, PARSER): # Skip comma return True elif e.token.type == 'SIGNED_NUMBER': + # Make a copy and ensure it is properly made + puppet_copy = e.puppet.copy() + assert puppet_copy.parser_state == e.puppet.parser_state + assert puppet_copy.lexer_state.state == e.puppet.lexer_state.state + assert puppet_copy.parser_state is not e.puppet.parser_state + assert puppet_copy.lexer_state.state is not e.puppet.lexer_state.state + assert puppet_copy.lexer_state.state.line_ctr is not e.puppet.lexer_state.state.line_ctr + # Try to feed a comma and retry the number e.puppet.feed_token(Token('COMMA', ',')) e.puppet.feed_token(e.token) + return True # Unhandled error. Will stop parse and raise exception From 6d6c751a79f4504e51e3d58e46de55794395966d Mon Sep 17 00:00:00 2001 From: Gary Geng Date: Fri, 2 Apr 2021 14:59:59 -0500 Subject: [PATCH 2/5] Make LexerThread inherit from object --- lark/lexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lark/lexer.py b/lark/lexer.py index 7b2fc8f..20775c8 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -470,7 +470,7 @@ class ContextualLexer(Lexer): except UnexpectedCharacters: raise e # Raise the original UnexpectedCharacters. The root lexer raises it with the wrong expected set. -class LexerThread: +class LexerThread(object): """A thread that ties a lexer instance and a lexer state, to be used by the parser""" def __init__(self, lexer, text): From 38ab4232aeedd084e2c71228d552de698affcbe5 Mon Sep 17 00:00:00 2001 From: Gary Geng Date: Fri, 2 Apr 2021 15:04:58 -0500 Subject: [PATCH 3/5] make LexerState inherit from object --- lark/lexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lark/lexer.py b/lark/lexer.py index 20775c8..d69db93 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -404,7 +404,7 @@ class TraditionalLexer(Lexer): raise EOFError(self) -class LexerState: +class LexerState(object): __slots__ = 'text', 'line_ctr', 'last_token' def __init__(self, text, line_ctr, last_token=None): From 94e242f51a351996d48a9dc72daf184f7086bd4f Mon Sep 17 00:00:00 2001 From: Gary Geng Date: Fri, 2 Apr 2021 15:14:16 -0500 Subject: [PATCH 4/5] Use NotImplemented for __eq__ and use __new__ for LexerThread --- lark/lexer.py | 10 ++++------ lark/parsers/lalr_parser.py | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/lark/lexer.py b/lark/lexer.py index d69db93..64dc33e 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -188,7 +188,7 @@ class LineCounter: def __eq__(self, other): if not isinstance(other, LineCounter): - return False + return NotImplemented return (self.newline_char == other.newline_char and self.char_pos == other.char_pos and self.line == other.line and self.column == other.column and self.line_start_pos == other.line_start_pos) @@ -414,7 +414,7 @@ class LexerState(object): def __eq__(self, other): if not isinstance(other, LexerState): - return False + return NotImplemented return (self.text == other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token) @@ -481,10 +481,8 @@ class LexerThread(object): return self.lexer.lex(self.state, parser_state) def __copy__(self): - copied = type(self)( - self.lexer, - '' - ) + copied = object.__new__(LexerThread) + copied.lexer = self.lexer copied.state = copy(self.state) return copied ###} diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 2d3e559..6fc76ea 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -94,7 +94,7 @@ class ParserState(object): # Necessary for match_examples() to work def __eq__(self, other): if not isinstance(other, ParserState): - return False + return NotImplemented return len(self.state_stack) == len(other.state_stack) and self.position == other.position def __copy__(self): From 5b0b20762adf61bdbe8e4632a5869c7668b560de Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Fri, 2 Apr 2021 15:27:27 -0600 Subject: [PATCH 5/5] Small correction to PR --- lark/lexer.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lark/lexer.py b/lark/lexer.py index 64dc33e..4d18704 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -189,9 +189,8 @@ class LineCounter: def __eq__(self, other): if not isinstance(other, LineCounter): return NotImplemented - return (self.newline_char == other.newline_char and self.char_pos == other.char_pos - and self.line == other.line and self.column == other.column - and self.line_start_pos == other.line_start_pos) + + return self.char_pos == other.char_pos and self.newline_char == other.newline_char def feed(self, token, test_newline=True): """Consume a token and calculate the new line & column. @@ -416,8 +415,7 @@ class LexerState(object): if not isinstance(other, LexerState): return NotImplemented - return (self.text == other.text and self.line_ctr == other.line_ctr - and self.last_token == other.last_token) + return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token def __copy__(self): return type(self)(self.text, copy(self.line_ctr), self.last_token)