@@ -25,12 +25,21 @@ Example: | |||||
Lark(...) | Lark(...) | ||||
``` | ``` | ||||
#### parse(self, text) | |||||
#### parse(self, text, start=None, on_error=None) | |||||
Return a complete parse tree for the text (of type Tree) | |||||
Parse the given text, according to the options provided. | |||||
Returns a complete parse tree for the text (of type Tree) | |||||
If a transformer is supplied to `__init__`, returns whatever is the result of the transformation. | If a transformer is supplied to `__init__`, returns whatever is the result of the transformation. | ||||
Parameters: | |||||
* start: str - required if Lark was given multiple possible start symbols (using the start option). | |||||
* on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. | |||||
(See `examples/error_puppet.py` for an example of how to use `on_error`.) | |||||
#### save(self, f) / load(cls, f) | #### save(self, f) / load(cls, f) | ||||
@@ -160,6 +169,8 @@ See the [visitors page](visitors.md) | |||||
## UnexpectedToken | ## UnexpectedToken | ||||
TODO: Explain puppet mechanism (related to on_error) | |||||
## UnexpectedException | ## UnexpectedException | ||||
- `UnexpectedInput` | - `UnexpectedInput` | ||||
@@ -6,6 +6,7 @@ | |||||
- EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) | - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) | ||||
- Builds a parse-tree (AST) automagically based on the grammar | - Builds a parse-tree (AST) automagically based on the grammar | ||||
- Stand-alone parser generator - create a small independent parser to embed in your project. | - Stand-alone parser generator - create a small independent parser to embed in your project. | ||||
- Flexible error handling by using a "puppet parser" mechanism (LALR only) | |||||
- Automatic line & column tracking (for both tokens and matched rules) | - Automatic line & column tracking (for both tokens and matched rules) | ||||
- Automatic terminal collision resolution | - Automatic terminal collision resolution | ||||
- Standard library of terminals (strings, numbers, names, etc.) | - Standard library of terminals (strings, numbers, names, etc.) | ||||
@@ -0,0 +1,34 @@ | |||||
# | |||||
# This example demonstrates error handling using a parsing puppet in LALR | |||||
# | |||||
# When the parser encounters an UnexpectedToken exception, it creates a | |||||
# parsing puppet with the current parse-state, and lets you control how | |||||
# to proceed step-by-step. When you've achieved the correct parse-state, | |||||
# you can resume the run by returning True. | |||||
# | |||||
from lark import UnexpectedToken, Token | |||||
from .json_parser import json_parser | |||||
def ignore_errors(e): | |||||
if e.token.type == 'COMMA': | |||||
# Skip comma | |||||
return True | |||||
elif e.token.type == 'SIGNED_NUMBER': | |||||
# Try to feed a comma and retry the number | |||||
e.puppet.feed_token(Token('COMMA', ',')) | |||||
e.puppet.feed_token(e.token) | |||||
return True | |||||
# Unhandled error. Will stop parse and raise exception | |||||
return False | |||||
def main(): | |||||
s = "[0 1, 2,, 3,,, 4, 5 6 ]" | |||||
res = json_parser.parse(s, on_error=ignore_errors) | |||||
print(res) # prints [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] | |||||
main() | |||||
@@ -81,7 +81,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput): | |||||
class UnexpectedToken(ParseError, UnexpectedInput): | class UnexpectedToken(ParseError, UnexpectedInput): | ||||
def __init__(self, token, expected, considered_rules=None, state=None): | |||||
def __init__(self, token, expected, considered_rules=None, state=None, puppet=None): | |||||
self.token = token | self.token = token | ||||
self.expected = expected # XXX str shouldn't necessary | self.expected = expected # XXX str shouldn't necessary | ||||
self.line = getattr(token, 'line', '?') | self.line = getattr(token, 'line', '?') | ||||
@@ -89,6 +89,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
self.considered_rules = considered_rules | self.considered_rules = considered_rules | ||||
self.state = state | self.state = state | ||||
self.pos_in_stream = getattr(token, 'pos_in_stream', None) | self.pos_in_stream = getattr(token, 'pos_in_stream', None) | ||||
self.puppet = puppet | |||||
message = ("Unexpected token %r at line %s, column %s.\n" | message = ("Unexpected token %r at line %s, column %s.\n" | ||||
"Expected one of: \n\t* %s\n" | "Expected one of: \n\t* %s\n" | ||||
@@ -9,7 +9,7 @@ from .load_grammar import load_grammar | |||||
from .tree import Tree | from .tree import Tree | ||||
from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
from .lexer import Lexer, TraditionalLexer, TerminalDef | |||||
from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken | |||||
from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
from .parser_frontends import get_frontend | from .parser_frontends import get_frontend | ||||
from .grammar import Rule | from .grammar import Rule | ||||
@@ -359,13 +359,28 @@ class Lark(Serialize): | |||||
"Get information about a terminal" | "Get information about a terminal" | ||||
return self._terminals_dict[name] | return self._terminals_dict[name] | ||||
def parse(self, text, start=None): | |||||
def parse(self, text, start=None, on_error=None): | |||||
"""Parse the given text, according to the options provided. | """Parse the given text, according to the options provided. | ||||
The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option). | |||||
Parameters: | |||||
start: str - required if Lark was given multiple possible start symbols (using the start option). | |||||
on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. | |||||
Returns a tree, unless specified otherwise. | Returns a tree, unless specified otherwise. | ||||
""" | """ | ||||
return self.parser.parse(text, start=start) | |||||
try: | |||||
return self.parser.parse(text, start=start) | |||||
except UnexpectedToken as e: | |||||
if on_error is None: | |||||
raise | |||||
while True: | |||||
if not on_error(e): | |||||
raise e | |||||
try: | |||||
return e.puppet.resume_parse() | |||||
except UnexpectedToken as e2: | |||||
e = e2 | |||||
###} | ###} |
@@ -41,15 +41,15 @@ class _Parser: | |||||
self.callbacks = callbacks | self.callbacks = callbacks | ||||
self.debug = debug | self.debug = debug | ||||
def parse(self, seq, start, set_state=None): | |||||
def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None): | |||||
token = None | token = None | ||||
stream = iter(seq) | stream = iter(seq) | ||||
states = self.parse_table.states | states = self.parse_table.states | ||||
start_state = self.parse_table.start_states[start] | start_state = self.parse_table.start_states[start] | ||||
end_state = self.parse_table.end_states[start] | end_state = self.parse_table.end_states[start] | ||||
state_stack = [start_state] | |||||
value_stack = [] | |||||
state_stack = state_stack or [start_state] | |||||
value_stack = value_stack or [] | |||||
if set_state: set_state(start_state) | if set_state: set_state(start_state) | ||||
@@ -59,7 +59,7 @@ class _Parser: | |||||
return states[state][token.type] | return states[state][token.type] | ||||
except KeyError: | except KeyError: | ||||
expected = [s for s in states[state].keys() if s.isupper()] | expected = [s for s in states[state].keys() if s.isupper()] | ||||
raise UnexpectedToken(token, expected, state=state) | |||||
raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start, stream, set_state)) | |||||
def reduce(rule): | def reduce(rule): | ||||
size = len(rule.expansion) | size = len(rule.expansion) | ||||
@@ -111,3 +111,59 @@ class _Parser: | |||||
return value_stack[-1] | return value_stack[-1] | ||||
###} | ###} | ||||
class _ParserPuppet: | |||||
def __init__(self, parser, state_stack, value_stack, start, stream, set_state): | |||||
self.parser = parser | |||||
self._state_stack = state_stack | |||||
self._value_stack = value_stack | |||||
self._start = start | |||||
self._stream = stream | |||||
self._set_state = set_state | |||||
def feed_token(self, token): | |||||
end_state = self.parser.parse_table.end_states[self._start] | |||||
state_stack = self._state_stack | |||||
value_stack = self._value_stack | |||||
state = state_stack[-1] | |||||
action, arg = self.parser.parse_table.states[state][token.type] | |||||
assert arg != end_state | |||||
while action is Reduce: | |||||
rule = arg | |||||
size = len(rule.expansion) | |||||
if size: | |||||
s = value_stack[-size:] | |||||
del state_stack[-size:] | |||||
del value_stack[-size:] | |||||
else: | |||||
s = [] | |||||
value = self.parser.callbacks[rule](s) | |||||
_action, new_state = self.parser.parse_table.states[state_stack[-1]][rule.origin.name] | |||||
assert _action is Shift | |||||
state_stack.append(new_state) | |||||
value_stack.append(value) | |||||
if state_stack[-1] == end_state: | |||||
return value_stack[-1] | |||||
state = state_stack[-1] | |||||
action, arg = self.parser.parse_table.states[state][token.type] | |||||
assert arg != end_state | |||||
assert action is Shift | |||||
state_stack.append(arg) | |||||
value_stack.append(token) | |||||
def choices(self): | |||||
return self.parser.parse_table.states[self._state_stack[-1]] | |||||
def resume_parse(self): | |||||
return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) |