Browse Source

Added support for error handling, using a puppet parser.

TODO: Add docs
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.9.0
Erez Sh 5 years ago
parent
commit
66a073d0aa
3 changed files with 82 additions and 22 deletions
  1. +34
    -0
      examples/error_puppet.py
  2. +19
    -4
      lark/lark.py
  3. +29
    -18
      lark/parsers/lalr_parser.py

+ 34
- 0
examples/error_puppet.py View File

@@ -0,0 +1,34 @@
#
# This example demonstrates error handling using a parsing puppet in LALR
#
# When the parser encounters an UnexpectedToken exception, it creates a
# parsing puppet with the current parse-state, and lets you control how
# to proceed step-by-step. When you've achieved the correct parse-state,
# you can resume the run by returning True.
#

from lark import UnexpectedToken, Token

from .json_parser import json_parser

def ignore_errors(e):
if e.token.type == 'COMMA':
# Skip comma
return True
elif e.token.type == 'SIGNED_NUMBER':
# Try to feed a comma and retry the number
e.puppet.feed_token(Token('COMMA', ','))
e.puppet.feed_token(e.token)
return True

# Unhandled error. Will stop parse and raise exception
return False


def main():
s = "[0 1, 2,, 3,,, 4, 5 6 ]"
res = json_parser.parse(s, on_error=ignore_errors)
print(res) # prints [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]

main()


+ 19
- 4
lark/lark.py View File

@@ -9,7 +9,7 @@ from .load_grammar import load_grammar
from .tree import Tree from .tree import Tree
from .common import LexerConf, ParserConf from .common import LexerConf, ParserConf


from .lexer import Lexer, TraditionalLexer, TerminalDef
from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken
from .parse_tree_builder import ParseTreeBuilder from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import get_frontend from .parser_frontends import get_frontend
from .grammar import Rule from .grammar import Rule
@@ -359,13 +359,28 @@ class Lark(Serialize):
"Get information about a terminal" "Get information about a terminal"
return self._terminals_dict[name] return self._terminals_dict[name]


def parse(self, text, start=None):
def parse(self, text, start=None, on_error=None):
"""Parse the given text, according to the options provided. """Parse the given text, according to the options provided.


The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option).
Parameters:
start: str - required if Lark was given multiple possible start symbols (using the start option).
on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing.


Returns a tree, unless specified otherwise. Returns a tree, unless specified otherwise.
""" """
return self.parser.parse(text, start=start)
try:
return self.parser.parse(text, start=start)
except UnexpectedToken as e:
if on_error is None:
raise

while True:
if not on_error(e):
raise e
try:
return e.puppet.resume_parse()
except UnexpectedToken as e2:
e = e2



###} ###}

+ 29
- 18
lark/parsers/lalr_parser.py View File

@@ -41,15 +41,15 @@ class _Parser:
self.callbacks = callbacks self.callbacks = callbacks
self.debug = debug self.debug = debug


def parse(self, seq, start, set_state=None):
def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None):
token = None token = None
stream = iter(seq) stream = iter(seq)
states = self.parse_table.states states = self.parse_table.states
start_state = self.parse_table.start_states[start] start_state = self.parse_table.start_states[start]
end_state = self.parse_table.end_states[start] end_state = self.parse_table.end_states[start]


state_stack = [start_state]
value_stack = []
state_stack = state_stack or [start_state]
value_stack = value_stack or []


if set_state: set_state(start_state) if set_state: set_state(start_state)


@@ -59,7 +59,7 @@ class _Parser:
return states[state][token.type] return states[state][token.type]
except KeyError: except KeyError:
expected = [s for s in states[state].keys() if s.isupper()] expected = [s for s in states[state].keys() if s.isupper()]
raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start))
raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start, stream, set_state))


def reduce(rule): def reduce(rule):
size = len(rule.expansion) size = len(rule.expansion)
@@ -116,25 +116,24 @@ class _Parser:




class _ParserPuppet: class _ParserPuppet:
def __init__(self, parser, state_stack, value_stack, start):
def __init__(self, parser, state_stack, value_stack, start, stream, set_state):
self.parser = parser self.parser = parser
self.state_stack = state_stack
self.value_stack = value_stack
self.start = start
self._state_stack = state_stack
self._value_stack = value_stack
self._start = start
self._stream = stream
self._set_state = set_state


def feed_token(self, token): def feed_token(self, token):
end_state = self.parser.parse_table.end_states[self.start]
state_stack = self.state_stack
value_stack = self.value_stack
end_state = self.parser.parse_table.end_states[self._start]
state_stack = self._state_stack
value_stack = self._value_stack


state = state_stack[-1] state = state_stack[-1]
action, arg = self.parser.parse_table.states[state][token.type] action, arg = self.parser.parse_table.states[state][token.type]
assert arg != end_state assert arg != end_state


if action is Shift:
state_stack.append(arg)
value_stack.append(token)
else:
while action is Reduce:
rule = arg rule = arg
size = len(rule.expansion) size = len(rule.expansion)
if size: if size:
@@ -151,8 +150,20 @@ class _ParserPuppet:
state_stack.append(new_state) state_stack.append(new_state)
value_stack.append(value) value_stack.append(value)


if state_stack[-1] == end_state:
return value_stack[-1]
if state_stack[-1] == end_state:
return value_stack[-1]

state = state_stack[-1]
action, arg = self.parser.parse_table.states[state][token.type]
assert arg != end_state

assert action is Shift
state_stack.append(arg)
value_stack.append(token)



def choices(self): def choices(self):
return self.parser.parse_table.states[self.state_stack[-1]]
return self.parser.parse_table.states[self._state_stack[-1]]

def resume_parse(self):
return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack)

Loading…
Cancel
Save