From 5c6df8e82536afc066ba970c8319342192e07d14 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 18 Jun 2018 15:14:31 +0300 Subject: [PATCH] Moved and restructured exceptions * All exceptions are now under exceptions.py * UnexpectedInput is now superclass of UnexpectedToken and UnexpectedCharacters, all of which support the get_context() and match_examples() methods. --- examples/error_reporting_lalr.py | 17 +++---- lark/__init__.py | 3 +- lark/common.py | 57 --------------------- lark/exceptions.py | 85 ++++++++++++++++++++++++++++++++ lark/lexer.py | 27 +++------- lark/load_grammar.py | 9 ++-- lark/parse_tree_builder.py | 2 +- lark/parser_frontends.py | 2 +- lark/parsers/cyk.py | 2 +- lark/parsers/earley.py | 6 +-- lark/parsers/grammar_analysis.py | 2 +- lark/parsers/lalr_analysis.py | 2 +- lark/parsers/lalr_parser.py | 4 +- lark/parsers/xearley.py | 4 +- lark/tree.py | 15 ++++++ tests/test_parser.py | 3 +- 16 files changed, 131 insertions(+), 109 deletions(-) create mode 100644 lark/exceptions.py diff --git a/examples/error_reporting_lalr.py b/examples/error_reporting_lalr.py index a1055fd..0e355af 100644 --- a/examples/error_reporting_lalr.py +++ b/examples/error_reporting_lalr.py @@ -2,7 +2,7 @@ # This demonstrates example-driven error reporting with the LALR parser # -from lark import Lark, UnexpectedToken +from lark import Lark, UnexpectedInput from .json_parser import json_grammar # Using the grammar from the json_parser example @@ -32,11 +32,11 @@ class JsonTrailingComma(JsonSyntaxError): def parse(json_text): try: j = json_parser.parse(json_text) - except UnexpectedToken as ut: - exc_class = ut.match_examples(json_parser.parse, { - JsonMissingValue: ['{"foo": }'], + except UnexpectedInput as u: + exc_class = u.match_examples(json_parser.parse, { JsonMissingOpening: ['{"foo": ]}', - '{"foor": }}'], + '{"foor": }}', + '{"foo": }'], JsonMissingClosing: ['{"foo": [}', '{', '{"a": 1', @@ -55,15 +55,10 @@ def parse(json_text): }) if not exc_class: raise - raise exc_class(ut.get_context(json_text), ut.line, ut.column) + raise exc_class(u.get_context(json_text), u.line, u.column) def test(): - try: - parse('{"key":') - except JsonMissingValue: - pass - try: parse('{"key": "value"') except JsonMissingClosing: diff --git a/lark/__init__.py b/lark/__init__.py index 0146664..2ff54e5 100644 --- a/lark/__init__.py +++ b/lark/__init__.py @@ -1,8 +1,7 @@ from .tree import Tree from .visitors import Transformer, Visitor, v_args, Discard from .visitors import InlineTransformer, inline_args # XXX Deprecated -from .common import ParseError, GrammarError, UnexpectedToken -from .lexer import UnexpectedInput, LexError +from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters from .lark import Lark __version__ = "0.5.6" diff --git a/lark/common.py b/lark/common.py index 78ef205..698a3ec 100644 --- a/lark/common.py +++ b/lark/common.py @@ -7,63 +7,6 @@ Py36 = (sys.version_info[:2] >= (3, 6)) ###{standalone -class GrammarError(Exception): - pass - -class ParseError(Exception): - pass - -class UnexpectedToken(ParseError): - def __init__(self, token, expected, seq, index, considered_rules=None, state=None): - self.token = token - self.expected = expected - self.line = getattr(token, 'line', '?') - self.column = getattr(token, 'column', '?') - self.considered_rules = considered_rules - self.state = state - - try: - context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) - except AttributeError: - context = seq[index:index+5] - except TypeError: - context = "" - message = ("Unexpected token %r at line %s, column %s.\n" - "Expected: %s\n" - "Context: %s" % (token, self.line, self.column, expected, context)) - - super(UnexpectedToken, self).__init__(message) - - def match_examples(self, parse_fn, examples): - """ Given a parser instance and a dictionary mapping some label with - some malformed syntax examples, it'll return the label for the - example that bests matches the current error. - """ - assert self.state, "Not supported for this exception" - - candidate = None - for label, example in examples.items(): - assert not isinstance(example, STRING_TYPE) - - for malformed in example: - try: - parse_fn(malformed) - except UnexpectedToken as ut: - if ut.state == self.state: - if ut.token == self.token: # Try exact match first - return label - elif not candidate: - candidate = label - - return candidate - - def get_context(self, text, span=10): - pos = self.token.pos_in_stream - start = max(pos - span, 0) - end = pos + span - before = text[start:pos].rsplit('\n', 1)[-1] - after = text[pos:end].split('\n', 1)[0] - return before + after + '\n' + ' ' * len(before) + '^\n' ###} diff --git a/lark/exceptions.py b/lark/exceptions.py new file mode 100644 index 0000000..7bf1a78 --- /dev/null +++ b/lark/exceptions.py @@ -0,0 +1,85 @@ +from .utils import STRING_TYPE + +class LarkError(Exception): + pass + +class GrammarError(LarkError): + pass + +class ParseError(LarkError): + pass + +class LexError(LarkError): + pass + +class UnexpectedInput(LarkError): + def get_context(self, text, span=10): + pos = self.pos_in_stream + start = max(pos - span, 0) + end = pos + span + before = text[start:pos].rsplit('\n', 1)[-1] + after = text[pos:end].split('\n', 1)[0] + return before + after + '\n' + ' ' * len(before) + '^\n' + + def match_examples(self, parse_fn, examples): + """ Given a parser instance and a dictionary mapping some label with + some malformed syntax examples, it'll return the label for the + example that bests matches the current error. + """ + assert self.state is not None, "Not supported for this exception" + + candidate = None + for label, example in examples.items(): + assert not isinstance(example, STRING_TYPE) + + for malformed in example: + try: + parse_fn(malformed) + except UnexpectedInput as ut: + if ut.state == self.state: + try: + if ut.token == self.token: # Try exact match first + return label + except AttributeError: + pass + if not candidate: + candidate = label + + return candidate + + +class UnexpectedCharacters(LexError, UnexpectedInput): + def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None): + context = seq[lex_pos:lex_pos+10] + message = "No token defined for '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) + if allowed: + message += '\n\nExpecting: %s\n' % allowed + + super(UnexpectedCharacters, self).__init__(message) + + self.line = line + self.column = column + self.context = context + self.allowed = allowed + self.considered_tokens = considered_tokens + self.pos_in_stream = lex_pos + self.state = state + + +class UnexpectedToken(ParseError, UnexpectedInput): + def __init__(self, token, expected, considered_rules=None, state=None): + self.token = token + self.expected = expected # XXX str shouldn't necessary + self.line = getattr(token, 'line', '?') + self.column = getattr(token, 'column', '?') + self.considered_rules = considered_rules + self.state = state + self.pos_in_stream = token.pos_in_stream + + message = ("Unexpected token %r at line %s, column %s.\n" + "Expected: %s\n" + % (token, self.line, self.column, ', '.join(self.expected))) + + super(UnexpectedToken, self).__init__(message) + + diff --git a/lark/lexer.py b/lark/lexer.py index e332e22..ed81d37 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -4,26 +4,9 @@ import re from .utils import Str, classify from .common import PatternStr, PatternRE, TokenDef +from .exceptions import UnexpectedCharacters ###{standalone -class LexError(Exception): - pass - -class UnexpectedInput(LexError): - def __init__(self, seq, lex_pos, line, column, allowed=None, considered_rules=None): - context = seq[lex_pos:lex_pos+5] - message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column) - if allowed: - message += '\n\nExpecting: %s\n' % allowed - - super(UnexpectedInput, self).__init__(message) - - self.line = line - self.column = column - self.context = context - self.allowed = allowed - self.considered_rules = considered_rules - class Token(Str): __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column') @@ -84,8 +67,9 @@ class LineCounter: class _Lex: "Built to serve both Lexer and ContextualLexer" - def __init__(self, lexer): + def __init__(self, lexer, state=None): self.lexer = lexer + self.state = state def lex(self, stream, newline_types, ignore_types): newline_types = list(newline_types) @@ -118,7 +102,7 @@ class _Lex: break else: if line_ctr.char_pos < len(stream): - raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) + raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state) break class UnlessCallback: @@ -251,9 +235,10 @@ class ContextualLexer: self.parser_state = state def lex(self, stream): - l = _Lex(self.lexers[self.parser_state]) + l = _Lex(self.lexers[self.parser_state], self.parser_state) for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): yield x l.lexer = self.lexers[self.parser_state] + l.state = self.parser_state diff --git a/lark/load_grammar.py b/lark/load_grammar.py index bd6fa36..56524d7 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -6,14 +6,15 @@ import re from ast import literal_eval from copy import deepcopy -from .lexer import Token, UnexpectedInput +from .lexer import Token + from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import LALR -from .parsers.lalr_parser import UnexpectedToken -from .common import GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef +from .common import LexerConf, ParserConf, PatternStr, PatternRE, TokenDef from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol from .utils import classify, suppress +from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken from .tree import Tree, SlottedTree as ST from .visitors import Transformer, Visitor, v_args @@ -576,7 +577,7 @@ class GrammarLoader: try: tree = self.canonize_tree.transform( self.parser.parse(grammar_text+'\n') ) - except UnexpectedInput as e: + except UnexpectedCharacters as e: raise GrammarError("Unexpected input %r at line %d column %d in %s" % (e.context, e.line, e.column, name)) except UnexpectedToken as e: context = e.get_context(grammar_text) diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index abca756..e3e14ee 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -1,4 +1,4 @@ -from .common import GrammarError +from .exceptions import GrammarError from .utils import suppress from .lexer import Token from .grammar import Rule diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index f322524..08e2d0e 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -4,7 +4,7 @@ from .utils import get_regexp_width from .parsers.grammar_analysis import GrammarAnalyzer from .lexer import Lexer, ContextualLexer, Token -from .common import GrammarError +from .exceptions import GrammarError from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk from .tree import Tree diff --git a/lark/parsers/cyk.py b/lark/parsers/cyk.py index e2bcd83..d65d485 100644 --- a/lark/parsers/cyk.py +++ b/lark/parsers/cyk.py @@ -8,7 +8,7 @@ from collections import defaultdict import itertools -from ..common import ParseError +from ..exceptions import ParseError from ..lexer import Token from ..tree import Tree from ..grammar import Terminal as T, NonTerminal as NT, Symbol diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 65e0ea5..4ff26b2 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -15,7 +15,7 @@ from ..tree import Tree from ..visitors import Transformer_InPlace, v_args -from ..common import ParseError, UnexpectedToken +from ..exceptions import ParseError, UnexpectedToken from .grammar_analysis import GrammarAnalyzer from ..grammar import NonTerminal @@ -197,8 +197,8 @@ class Parser: next_set.add(item.advance(token) for item in column.to_scan if match(item.expect, token)) if not next_set: - expect = {i.expect for i in column.to_scan} - raise UnexpectedToken(token, expect, stream, set(column.to_scan)) + expect = {i.expect.name for i in column.to_scan} + raise UnexpectedToken(token, expect, considered_rules=set(column.to_scan)) return next_set diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py index f49e4bc..3568414 100644 --- a/lark/parsers/grammar_analysis.py +++ b/lark/parsers/grammar_analysis.py @@ -1,6 +1,6 @@ from ..utils import bfs, fzset, classify -from ..common import GrammarError +from ..exceptions import GrammarError from ..grammar import Rule, Terminal, NonTerminal diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 6903be9..6eec0a1 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -10,7 +10,7 @@ import logging from collections import defaultdict from ..utils import classify, classify_bool, bfs, fzset -from ..common import GrammarError +from ..exceptions import GrammarError from .grammar_analysis import GrammarAnalyzer, Terminal diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 164a227..8fa56f5 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -2,7 +2,7 @@ """ # Author: Erez Shinan (2017) # Email : erezshin@gmail.com -from ..common import UnexpectedToken +from ..exceptions import UnexpectedToken from .lalr_analysis import LALR_Analyzer, Shift @@ -46,7 +46,7 @@ class _Parser: return states[state][key] except KeyError: expected = states[state].keys() - raise UnexpectedToken(token, expected, seq, i, state=state) + raise UnexpectedToken(token, expected, state=state) # TODO filter out rules from expected def reduce(rule): size = len(rule.expansion) diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py index 5e8fb28..02698fb 100644 --- a/lark/parsers/xearley.py +++ b/lark/parsers/xearley.py @@ -20,8 +20,8 @@ from collections import defaultdict -from ..common import ParseError -from ..lexer import Token, UnexpectedInput +from ..exceptions import ParseError, UnexpectedInput +from ..lexer import Token from ..tree import Tree from .grammar_analysis import GrammarAnalyzer from ..grammar import NonTerminal, Terminal diff --git a/lark/tree.py b/lark/tree.py index 000823e..5a29c0f 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -110,6 +110,21 @@ class Tree(object): self.data = data self.children = children + # XXX Deprecated! Here for backwards compatibility <0.6.0 + @property + def line(self): + return self.meta.line + @property + def column(self): + return self.meta.column + @property + def end_line(self): + return self.meta.end_line + @property + def end_column(self): + return self.meta.end_column + + class SlottedTree(Tree): __slots__ = 'data', 'children', 'rule', '_meta' diff --git a/tests/test_parser.py b/tests/test_parser.py index f48f3bd..36cb142 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -18,8 +18,7 @@ from io import ( logging.basicConfig(level=logging.INFO) from lark.lark import Lark -from lark.common import GrammarError, ParseError, UnexpectedToken -from lark.lexer import LexError, UnexpectedInput +from lark.exceptions import GrammarError, ParseError, UnexpectedToken, LexError, UnexpectedInput from lark.tree import Tree from lark.visitors import Transformer