Browse Source

Merge branch 'puppet'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.9.0
Erez Sh 5 years ago
parent
commit
5e82c668a4
6 changed files with 129 additions and 11 deletions
  1. +13
    -2
      docs/classes.md
  2. +1
    -0
      docs/features.md
  3. +34
    -0
      examples/error_puppet.py
  4. +2
    -1
      lark/exceptions.py
  5. +19
    -4
      lark/lark.py
  6. +60
    -4
      lark/parsers/lalr_parser.py

+ 13
- 2
docs/classes.md View File

@@ -25,12 +25,21 @@ Example:
Lark(...) Lark(...)
``` ```


#### parse(self, text)
#### parse(self, text, start=None, on_error=None)


Return a complete parse tree for the text (of type Tree)
Parse the given text, according to the options provided.

Returns a complete parse tree for the text (of type Tree)


If a transformer is supplied to `__init__`, returns whatever is the result of the transformation. If a transformer is supplied to `__init__`, returns whatever is the result of the transformation.


Parameters:

* start: str - required if Lark was given multiple possible start symbols (using the start option).

* on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only.

(See `examples/error_puppet.py` for an example of how to use `on_error`.)


#### save(self, f) / load(cls, f) #### save(self, f) / load(cls, f)


@@ -160,6 +169,8 @@ See the [visitors page](visitors.md)


## UnexpectedToken ## UnexpectedToken


TODO: Explain puppet mechanism (related to on_error)

## UnexpectedException ## UnexpectedException


- `UnexpectedInput` - `UnexpectedInput`


+ 1
- 0
docs/features.md View File

@@ -6,6 +6,7 @@
- EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md))
- Builds a parse-tree (AST) automagically based on the grammar - Builds a parse-tree (AST) automagically based on the grammar
- Stand-alone parser generator - create a small independent parser to embed in your project. - Stand-alone parser generator - create a small independent parser to embed in your project.
- Flexible error handling by using a "puppet parser" mechanism (LALR only)
- Automatic line & column tracking (for both tokens and matched rules) - Automatic line & column tracking (for both tokens and matched rules)
- Automatic terminal collision resolution - Automatic terminal collision resolution
- Standard library of terminals (strings, numbers, names, etc.) - Standard library of terminals (strings, numbers, names, etc.)


+ 34
- 0
examples/error_puppet.py View File

@@ -0,0 +1,34 @@
#
# This example demonstrates error handling using a parsing puppet in LALR
#
# When the parser encounters an UnexpectedToken exception, it creates a
# parsing puppet with the current parse-state, and lets you control how
# to proceed step-by-step. When you've achieved the correct parse-state,
# you can resume the run by returning True.
#

from lark import UnexpectedToken, Token

from .json_parser import json_parser

def ignore_errors(e):
if e.token.type == 'COMMA':
# Skip comma
return True
elif e.token.type == 'SIGNED_NUMBER':
# Try to feed a comma and retry the number
e.puppet.feed_token(Token('COMMA', ','))
e.puppet.feed_token(e.token)
return True

# Unhandled error. Will stop parse and raise exception
return False


def main():
s = "[0 1, 2,, 3,,, 4, 5 6 ]"
res = json_parser.parse(s, on_error=ignore_errors)
print(res) # prints [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]

main()


+ 2
- 1
lark/exceptions.py View File

@@ -81,7 +81,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput):




class UnexpectedToken(ParseError, UnexpectedInput): class UnexpectedToken(ParseError, UnexpectedInput):
def __init__(self, token, expected, considered_rules=None, state=None):
def __init__(self, token, expected, considered_rules=None, state=None, puppet=None):
self.token = token self.token = token
self.expected = expected # XXX str shouldn't necessary self.expected = expected # XXX str shouldn't necessary
self.line = getattr(token, 'line', '?') self.line = getattr(token, 'line', '?')
@@ -89,6 +89,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
self.considered_rules = considered_rules self.considered_rules = considered_rules
self.state = state self.state = state
self.pos_in_stream = getattr(token, 'pos_in_stream', None) self.pos_in_stream = getattr(token, 'pos_in_stream', None)
self.puppet = puppet


message = ("Unexpected token %r at line %s, column %s.\n" message = ("Unexpected token %r at line %s, column %s.\n"
"Expected one of: \n\t* %s\n" "Expected one of: \n\t* %s\n"


+ 19
- 4
lark/lark.py View File

@@ -9,7 +9,7 @@ from .load_grammar import load_grammar
from .tree import Tree from .tree import Tree
from .common import LexerConf, ParserConf from .common import LexerConf, ParserConf


from .lexer import Lexer, TraditionalLexer, TerminalDef
from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken
from .parse_tree_builder import ParseTreeBuilder from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import get_frontend from .parser_frontends import get_frontend
from .grammar import Rule from .grammar import Rule
@@ -359,13 +359,28 @@ class Lark(Serialize):
"Get information about a terminal" "Get information about a terminal"
return self._terminals_dict[name] return self._terminals_dict[name]


def parse(self, text, start=None):
def parse(self, text, start=None, on_error=None):
"""Parse the given text, according to the options provided. """Parse the given text, according to the options provided.


The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option).
Parameters:
start: str - required if Lark was given multiple possible start symbols (using the start option).
on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only.


Returns a tree, unless specified otherwise. Returns a tree, unless specified otherwise.
""" """
return self.parser.parse(text, start=start)
try:
return self.parser.parse(text, start=start)
except UnexpectedToken as e:
if on_error is None:
raise

while True:
if not on_error(e):
raise e
try:
return e.puppet.resume_parse()
except UnexpectedToken as e2:
e = e2



###} ###}

+ 60
- 4
lark/parsers/lalr_parser.py View File

@@ -41,15 +41,15 @@ class _Parser:
self.callbacks = callbacks self.callbacks = callbacks
self.debug = debug self.debug = debug


def parse(self, seq, start, set_state=None):
def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None):
token = None token = None
stream = iter(seq) stream = iter(seq)
states = self.parse_table.states states = self.parse_table.states
start_state = self.parse_table.start_states[start] start_state = self.parse_table.start_states[start]
end_state = self.parse_table.end_states[start] end_state = self.parse_table.end_states[start]


state_stack = [start_state]
value_stack = []
state_stack = state_stack or [start_state]
value_stack = value_stack or []


if set_state: set_state(start_state) if set_state: set_state(start_state)


@@ -59,7 +59,7 @@ class _Parser:
return states[state][token.type] return states[state][token.type]
except KeyError: except KeyError:
expected = [s for s in states[state].keys() if s.isupper()] expected = [s for s in states[state].keys() if s.isupper()]
raise UnexpectedToken(token, expected, state=state)
raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start, stream, set_state))


def reduce(rule): def reduce(rule):
size = len(rule.expansion) size = len(rule.expansion)
@@ -111,3 +111,59 @@ class _Parser:
return value_stack[-1] return value_stack[-1]


###} ###}




class _ParserPuppet:
def __init__(self, parser, state_stack, value_stack, start, stream, set_state):
self.parser = parser
self._state_stack = state_stack
self._value_stack = value_stack
self._start = start
self._stream = stream
self._set_state = set_state

def feed_token(self, token):
end_state = self.parser.parse_table.end_states[self._start]
state_stack = self._state_stack
value_stack = self._value_stack

state = state_stack[-1]
action, arg = self.parser.parse_table.states[state][token.type]
assert arg != end_state

while action is Reduce:
rule = arg
size = len(rule.expansion)
if size:
s = value_stack[-size:]
del state_stack[-size:]
del value_stack[-size:]
else:
s = []

value = self.parser.callbacks[rule](s)

_action, new_state = self.parser.parse_table.states[state_stack[-1]][rule.origin.name]
assert _action is Shift
state_stack.append(new_state)
value_stack.append(value)

if state_stack[-1] == end_state:
return value_stack[-1]

state = state_stack[-1]
action, arg = self.parser.parse_table.states[state][token.type]
assert arg != end_state

assert action is Shift
state_stack.append(arg)
value_stack.append(token)


def choices(self):
return self.parser.parse_table.states[self._state_stack[-1]]

def resume_parse(self):
return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack)

Loading…
Cancel
Save