| @@ -0,0 +1,79 @@ | |||||
| """ | |||||
| Example-Driven Error Reporting | |||||
| ============================== | |||||
| A demonstration of example-driven error reporting with the Earley parser | |||||
| (See also: error_reporting_lalr.py) | |||||
| """ | |||||
| from lark import Lark, UnexpectedInput | |||||
| from _json_parser import json_grammar # Using the grammar from the json_parser example | |||||
| json_parser = Lark(json_grammar) | |||||
| class JsonSyntaxError(SyntaxError): | |||||
| def __str__(self): | |||||
| context, line, column = self.args | |||||
| return '%s at line %s, column %s.\n\n%s' % (self.label, line, column, context) | |||||
| class JsonMissingValue(JsonSyntaxError): | |||||
| label = 'Missing Value' | |||||
| class JsonMissingOpening(JsonSyntaxError): | |||||
| label = 'Missing Opening' | |||||
| class JsonMissingClosing(JsonSyntaxError): | |||||
| label = 'Missing Closing' | |||||
| class JsonMissingComma(JsonSyntaxError): | |||||
| label = 'Missing Comma' | |||||
| class JsonTrailingComma(JsonSyntaxError): | |||||
| label = 'Trailing Comma' | |||||
| def parse(json_text): | |||||
| try: | |||||
| j = json_parser.parse(json_text) | |||||
| except UnexpectedInput as u: | |||||
| exc_class = u.match_examples(json_parser.parse, { | |||||
| JsonMissingOpening: ['{"foo": ]}', | |||||
| '{"foor": }}', | |||||
| '{"foo": }'], | |||||
| JsonMissingClosing: ['{"foo": [}', | |||||
| '{', | |||||
| '{"a": 1', | |||||
| '[1'], | |||||
| JsonMissingComma: ['[1 2]', | |||||
| '[false 1]', | |||||
| '["b" 1]', | |||||
| '{"a":true 1:4}', | |||||
| '{"a":1 1:4}', | |||||
| '{"a":"b" 1:4}'], | |||||
| JsonTrailingComma: ['[,]', | |||||
| '[1,]', | |||||
| '[1,2,]', | |||||
| '{"foo":1,}', | |||||
| '{"foo":false,"bar":true,}'] | |||||
| }, use_accepts=True) | |||||
| if not exc_class: | |||||
| raise | |||||
| raise exc_class(u.get_context(json_text), u.line, u.column) | |||||
| def test(): | |||||
| try: | |||||
| parse('{"example1": "value"') | |||||
| except JsonMissingClosing as e: | |||||
| print(e) | |||||
| try: | |||||
| parse('{"example2": ] ') | |||||
| except JsonMissingOpening as e: | |||||
| print(e) | |||||
| if __name__ == '__main__': | |||||
| test() | |||||
| @@ -3,7 +3,7 @@ Example-Driven Error Reporting | |||||
| ============================== | ============================== | ||||
| A demonstration of example-driven error reporting with the LALR parser | A demonstration of example-driven error reporting with the LALR parser | ||||
| (See also: error_reporting_earley.py) | |||||
| """ | """ | ||||
| from lark import Lark, UnexpectedInput | from lark import Lark, UnexpectedInput | ||||
| @@ -3,7 +3,7 @@ from .tree import Tree | |||||
| from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive | from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive | ||||
| from .visitors import InlineTransformer, inline_args # XXX Deprecated | from .visitors import InlineTransformer, inline_args # XXX Deprecated | ||||
| from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | ||||
| UnexpectedInput, UnexpectedCharacters, LarkError) | |||||
| UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError) | |||||
| from .lexer import Token | from .lexer import Token | ||||
| from .lark import Lark | from .lark import Lark | ||||
| @@ -19,14 +19,6 @@ class LexError(LarkError): | |||||
| pass | pass | ||||
| class UnexpectedEOF(ParseError): | |||||
| def __init__(self, expected): | |||||
| self.expected = expected | |||||
| message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) | |||||
| super(UnexpectedEOF, self).__init__(message) | |||||
| class UnexpectedInput(LarkError): | class UnexpectedInput(LarkError): | ||||
| """UnexpectedInput Error. | """UnexpectedInput Error. | ||||
| @@ -47,6 +39,7 @@ class UnexpectedInput(LarkError): | |||||
| The parser doesn't hold a copy of the text it has to parse, | The parser doesn't hold a copy of the text it has to parse, | ||||
| so you have to provide it again | so you have to provide it again | ||||
| """ | """ | ||||
| assert self.pos_in_stream is not None, self | |||||
| pos = self.pos_in_stream | pos = self.pos_in_stream | ||||
| start = max(pos - span, 0) | start = max(pos - span, 0) | ||||
| end = pos + span | end = pos + span | ||||
| @@ -91,7 +84,7 @@ class UnexpectedInput(LarkError): | |||||
| parse_fn(malformed) | parse_fn(malformed) | ||||
| except UnexpectedInput as ut: | except UnexpectedInput as ut: | ||||
| if ut.state == self.state: | if ut.state == self.state: | ||||
| if use_accepts and ut.accepts != self.accepts: | |||||
| if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts: | |||||
| logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | ||||
| (self.state, self.accepts, ut.accepts, i, j)) | (self.state, self.accepts, ut.accepts, i, j)) | ||||
| continue | continue | ||||
| @@ -114,6 +107,19 @@ class UnexpectedInput(LarkError): | |||||
| return candidate[0] | return candidate[0] | ||||
| class UnexpectedEOF(ParseError, UnexpectedInput): | |||||
| def __init__(self, expected, state=None): | |||||
| self.expected = expected | |||||
| self.state = state | |||||
| from .lexer import Token | |||||
| self.token = Token("<EOF>", "") #, line=-1, column=-1, pos_in_stream=-1) | |||||
| self.pos_in_stream = -1 | |||||
| self.line = -1 | |||||
| self.column = -1 | |||||
| message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) | |||||
| super(UnexpectedEOF, self).__init__(message) | |||||
| class UnexpectedCharacters(LexError, UnexpectedInput): | class UnexpectedCharacters(LexError, UnexpectedInput): | ||||
| def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): | def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): | ||||
| @@ -299,7 +299,7 @@ class Parser: | |||||
| solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | ||||
| if not solutions: | if not solutions: | ||||
| expected_terminals = [t.expect for t in to_scan] | expected_terminals = [t.expect for t in to_scan] | ||||
| raise UnexpectedEOF(expected_terminals) | |||||
| raise UnexpectedEOF(expected_terminals, state={i.s for i in to_scan}) | |||||
| if self.debug: | if self.debug: | ||||
| from .earley_forest import ForestToPyDotVisitor | from .earley_forest import ForestToPyDotVisitor | ||||
| @@ -113,7 +113,7 @@ class Parser(BaseParser): | |||||
| del delayed_matches[i+1] # No longer needed, so unburden memory | del delayed_matches[i+1] # No longer needed, so unburden memory | ||||
| if not next_set and not delayed_matches and not next_to_scan: | if not next_set and not delayed_matches and not next_to_scan: | ||||
| raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan)) | |||||
| raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan), state={i.s for i in next_to_scan}) | |||||
| return next_to_scan | return next_to_scan | ||||