diff --git a/examples/error_puppet.py b/examples/error_puppet.py new file mode 100644 index 0000000..a5e0857 --- /dev/null +++ b/examples/error_puppet.py @@ -0,0 +1,34 @@ +# +# This example demonstrates error handling using a parsing puppet in LALR +# +# When the parser encounters an UnexpectedToken exception, it creates a +# parsing puppet with the current parse-state, and lets you control how +# to proceed step-by-step. When you've achieved the correct parse-state, +# you can resume the run by returning True. +# + +from lark import UnexpectedToken, Token + +from .json_parser import json_parser + +def ignore_errors(e): + if e.token.type == 'COMMA': + # Skip comma + return True + elif e.token.type == 'SIGNED_NUMBER': + # Try to feed a comma and retry the number + e.puppet.feed_token(Token('COMMA', ',')) + e.puppet.feed_token(e.token) + return True + + # Unhandled error. Will stop parse and raise exception + return False + + +def main(): + s = "[0 1, 2,, 3,,, 4, 5 6 ]" + res = json_parser.parse(s, on_error=ignore_errors) + print(res) # prints [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + +main() + diff --git a/lark/lark.py b/lark/lark.py index 3855191..f5d957e 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -9,7 +9,7 @@ from .load_grammar import load_grammar from .tree import Tree from .common import LexerConf, ParserConf -from .lexer import Lexer, TraditionalLexer, TerminalDef +from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import get_frontend from .grammar import Rule @@ -359,13 +359,28 @@ class Lark(Serialize): "Get information about a terminal" return self._terminals_dict[name] - def parse(self, text, start=None): + def parse(self, text, start=None, on_error=None): """Parse the given text, according to the options provided. - The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option). + Parameters: + start: str - required if Lark was given multiple possible start symbols (using the start option). + on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. Returns a tree, unless specified otherwise. """ - return self.parser.parse(text, start=start) + try: + return self.parser.parse(text, start=start) + except UnexpectedToken as e: + if on_error is None: + raise + + while True: + if not on_error(e): + raise e + try: + return e.puppet.resume_parse() + except UnexpectedToken as e2: + e = e2 + ###} diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 991789b..7d5cf3b 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -41,15 +41,15 @@ class _Parser: self.callbacks = callbacks self.debug = debug - def parse(self, seq, start, set_state=None): + def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None): token = None stream = iter(seq) states = self.parse_table.states start_state = self.parse_table.start_states[start] end_state = self.parse_table.end_states[start] - state_stack = [start_state] - value_stack = [] + state_stack = state_stack or [start_state] + value_stack = value_stack or [] if set_state: set_state(start_state) @@ -59,7 +59,7 @@ class _Parser: return states[state][token.type] except KeyError: expected = [s for s in states[state].keys() if s.isupper()] - raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start)) + raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start, stream, set_state)) def reduce(rule): size = len(rule.expansion) @@ -116,25 +116,24 @@ class _Parser: class _ParserPuppet: - def __init__(self, parser, state_stack, value_stack, start): + def __init__(self, parser, state_stack, value_stack, start, stream, set_state): self.parser = parser - self.state_stack = state_stack - self.value_stack = value_stack - self.start = start + self._state_stack = state_stack + self._value_stack = value_stack + self._start = start + self._stream = stream + self._set_state = set_state def feed_token(self, token): - end_state = self.parser.parse_table.end_states[self.start] - state_stack = self.state_stack - value_stack = self.value_stack + end_state = self.parser.parse_table.end_states[self._start] + state_stack = self._state_stack + value_stack = self._value_stack state = state_stack[-1] action, arg = self.parser.parse_table.states[state][token.type] assert arg != end_state - if action is Shift: - state_stack.append(arg) - value_stack.append(token) - else: + while action is Reduce: rule = arg size = len(rule.expansion) if size: @@ -151,8 +150,20 @@ class _ParserPuppet: state_stack.append(new_state) value_stack.append(value) - if state_stack[-1] == end_state: - return value_stack[-1] + if state_stack[-1] == end_state: + return value_stack[-1] + + state = state_stack[-1] + action, arg = self.parser.parse_table.states[state][token.type] + assert arg != end_state + + assert action is Shift + state_stack.append(arg) + value_stack.append(token) + def choices(self): - return self.parser.parse_table.states[self.state_stack[-1]] \ No newline at end of file + return self.parser.parse_table.states[self._state_stack[-1]] + + def resume_parse(self): + return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) \ No newline at end of file