Browse Source

Earley error reporting - initial (Issue #760)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
Erez Sh 5 years ago
parent
commit
f285cda4f2
6 changed files with 98 additions and 13 deletions
  1. +79
    -0
      examples/advanced/error_reporting_earley.py
  2. +1
    -1
      examples/advanced/error_reporting_lalr.py
  3. +1
    -1
      lark/__init__.py
  4. +15
    -9
      lark/exceptions.py
  5. +1
    -1
      lark/parsers/earley.py
  6. +1
    -1
      lark/parsers/xearley.py

+ 79
- 0
examples/advanced/error_reporting_earley.py View File

@@ -0,0 +1,79 @@
"""
Example-Driven Error Reporting
==============================

A demonstration of example-driven error reporting with the Earley parser
(See also: error_reporting_lalr.py)
"""
from lark import Lark, UnexpectedInput

from _json_parser import json_grammar # Using the grammar from the json_parser example

json_parser = Lark(json_grammar)

class JsonSyntaxError(SyntaxError):
def __str__(self):
context, line, column = self.args
return '%s at line %s, column %s.\n\n%s' % (self.label, line, column, context)

class JsonMissingValue(JsonSyntaxError):
label = 'Missing Value'

class JsonMissingOpening(JsonSyntaxError):
label = 'Missing Opening'

class JsonMissingClosing(JsonSyntaxError):
label = 'Missing Closing'

class JsonMissingComma(JsonSyntaxError):
label = 'Missing Comma'

class JsonTrailingComma(JsonSyntaxError):
label = 'Trailing Comma'


def parse(json_text):
try:
j = json_parser.parse(json_text)
except UnexpectedInput as u:
exc_class = u.match_examples(json_parser.parse, {
JsonMissingOpening: ['{"foo": ]}',
'{"foor": }}',
'{"foo": }'],
JsonMissingClosing: ['{"foo": [}',
'{',
'{"a": 1',
'[1'],
JsonMissingComma: ['[1 2]',
'[false 1]',
'["b" 1]',
'{"a":true 1:4}',
'{"a":1 1:4}',
'{"a":"b" 1:4}'],
JsonTrailingComma: ['[,]',
'[1,]',
'[1,2,]',
'{"foo":1,}',
'{"foo":false,"bar":true,}']
}, use_accepts=True)
if not exc_class:
raise
raise exc_class(u.get_context(json_text), u.line, u.column)


def test():
try:
parse('{"example1": "value"')
except JsonMissingClosing as e:
print(e)

try:
parse('{"example2": ] ')
except JsonMissingOpening as e:
print(e)


if __name__ == '__main__':
test()



+ 1
- 1
examples/advanced/error_reporting_lalr.py View File

@@ -3,7 +3,7 @@ Example-Driven Error Reporting
============================== ==============================


A demonstration of example-driven error reporting with the LALR parser A demonstration of example-driven error reporting with the LALR parser
(See also: error_reporting_earley.py)
""" """
from lark import Lark, UnexpectedInput from lark import Lark, UnexpectedInput




+ 1
- 1
lark/__init__.py View File

@@ -3,7 +3,7 @@ from .tree import Tree
from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive
from .visitors import InlineTransformer, inline_args # XXX Deprecated from .visitors import InlineTransformer, inline_args # XXX Deprecated
from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
UnexpectedInput, UnexpectedCharacters, LarkError)
UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError)
from .lexer import Token from .lexer import Token
from .lark import Lark from .lark import Lark




+ 15
- 9
lark/exceptions.py View File

@@ -19,14 +19,6 @@ class LexError(LarkError):
pass pass




class UnexpectedEOF(ParseError):
def __init__(self, expected):
self.expected = expected

message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
super(UnexpectedEOF, self).__init__(message)


class UnexpectedInput(LarkError): class UnexpectedInput(LarkError):
"""UnexpectedInput Error. """UnexpectedInput Error.


@@ -47,6 +39,7 @@ class UnexpectedInput(LarkError):
The parser doesn't hold a copy of the text it has to parse, The parser doesn't hold a copy of the text it has to parse,
so you have to provide it again so you have to provide it again
""" """
assert self.pos_in_stream is not None, self
pos = self.pos_in_stream pos = self.pos_in_stream
start = max(pos - span, 0) start = max(pos - span, 0)
end = pos + span end = pos + span
@@ -91,7 +84,7 @@ class UnexpectedInput(LarkError):
parse_fn(malformed) parse_fn(malformed)
except UnexpectedInput as ut: except UnexpectedInput as ut:
if ut.state == self.state: if ut.state == self.state:
if use_accepts and ut.accepts != self.accepts:
if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts:
logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
(self.state, self.accepts, ut.accepts, i, j)) (self.state, self.accepts, ut.accepts, i, j))
continue continue
@@ -114,6 +107,19 @@ class UnexpectedInput(LarkError):


return candidate[0] return candidate[0]


class UnexpectedEOF(ParseError, UnexpectedInput):
def __init__(self, expected, state=None):
self.expected = expected
self.state = state
from .lexer import Token
self.token = Token("<EOF>", "") #, line=-1, column=-1, pos_in_stream=-1)
self.pos_in_stream = -1
self.line = -1
self.column = -1

message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
super(UnexpectedEOF, self).__init__(message)



class UnexpectedCharacters(LexError, UnexpectedInput): class UnexpectedCharacters(LexError, UnexpectedInput):
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):


+ 1
- 1
lark/parsers/earley.py View File

@@ -299,7 +299,7 @@ class Parser:
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
if not solutions: if not solutions:
expected_terminals = [t.expect for t in to_scan] expected_terminals = [t.expect for t in to_scan]
raise UnexpectedEOF(expected_terminals)
raise UnexpectedEOF(expected_terminals, state={i.s for i in to_scan})


if self.debug: if self.debug:
from .earley_forest import ForestToPyDotVisitor from .earley_forest import ForestToPyDotVisitor


+ 1
- 1
lark/parsers/xearley.py View File

@@ -113,7 +113,7 @@ class Parser(BaseParser):
del delayed_matches[i+1] # No longer needed, so unburden memory del delayed_matches[i+1] # No longer needed, so unburden memory


if not next_set and not delayed_matches and not next_to_scan: if not next_set and not delayed_matches and not next_to_scan:
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan))
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan), state={i.s for i in next_to_scan})


return next_to_scan return next_to_scan




Loading…
Cancel
Save