diff --git a/docs/how_to_use.md b/docs/how_to_use.md index 886b440..303098f 100644 --- a/docs/how_to_use.md +++ b/docs/how_to_use.md @@ -30,12 +30,13 @@ Use the reference pages for more in-depth explanations. (links in the [main page ## LALR usage -By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure `logging` framework beforehand. For example: +By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure the `logger` beforehand. For example: ```python -from lark import Lark import logging -logging.basicConfig(level=logging.DEBUG) +from lark import Lark, logger + +logger.setLevel(logging.DEBUG) collision_grammar = ''' start: as as diff --git a/lark/__init__.py b/lark/__init__.py index 8ddab96..1b5e7e3 100644 --- a/lark/__init__.py +++ b/lark/__init__.py @@ -1,3 +1,4 @@ +from .utils import logger from .tree import Tree from .visitors import Transformer, Visitor, v_args, Discard from .visitors import InlineTransformer, inline_args # XXX Deprecated diff --git a/lark/exceptions.py b/lark/exceptions.py index d1b956d..9d2d8dc 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -1,7 +1,6 @@ -from .utils import STRING_TYPE +from .utils import STRING_TYPE, logger ###{standalone -import logging class LarkError(Exception): @@ -62,24 +61,24 @@ class UnexpectedInput(LarkError): except UnexpectedInput as ut: if ut.state == self.state: if use_accepts and ut.accepts != self.accepts: - logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % + logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % (self.state, self.accepts, ut.accepts, i, j)) continue try: if ut.token == self.token: # Try exact match first - logging.debug("Exact Match at example [%s][%s]" % (i, j)) + logger.debug("Exact Match at example [%s][%s]" % (i, j)) return label if token_type_match_fallback: # Fallback to token types match if (ut.token.type == self.token.type) and not candidate[-1]: - logging.debug("Token Type Fallback at example [%s][%s]" % (i, j)) + logger.debug("Token Type Fallback at example [%s][%s]" % (i, j)) candidate = label, True except AttributeError: pass if not candidate[0]: - logging.debug("Same State match at example [%s][%s]" % (i, j)) + logger.debug("Same State match at example [%s][%s]" % (i, j)) candidate = label, False return candidate[0] diff --git a/lark/lark.py b/lark/lark.py index 8371943..9a4e001 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -1,10 +1,10 @@ from __future__ import absolute_import -import sys, os, pickle, hashlib, logging +import sys, os, pickle, hashlib from io import open -from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii +from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger from .load_grammar import load_grammar from .tree import Tree from .common import LexerConf, ParserConf @@ -214,7 +214,7 @@ class Lark(Serialize): cache_fn = '.lark_cache_%s.tmp' % md5 if FS.exists(cache_fn): - logging.debug('Loading grammar from cache: %s', cache_fn) + logger.debug('Loading grammar from cache: %s', cache_fn) with FS.open(cache_fn, 'rb') as f: self._load(f, self.options.transformer, self.options.postlex) return @@ -291,7 +291,7 @@ class Lark(Serialize): self.lexer = self._build_lexer() if cache_fn: - logging.debug('Saving grammar to cache: %s', cache_fn) + logger.debug('Saving grammar to cache: %s', cache_fn) with FS.open(cache_fn, 'wb') as f: self.save(f) diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 59e9a06..098639d 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -10,11 +10,11 @@ is better documented here: http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/ """ -import logging from collections import deque from ..visitors import Transformer_InPlace, v_args from ..exceptions import UnexpectedEOF, UnexpectedToken +from ..utils import logger from .grammar_analysis import GrammarAnalyzer from ..grammar import NonTerminal from .earley_common import Item, TransitiveItem @@ -301,7 +301,7 @@ class Parser: try: debug_walker = ForestToPyDotVisitor() except ImportError: - logging.warning("Cannot find dependency 'pydot', will not generate sppf debug image") + logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image") else: debug_walker.visit(solutions[0], "sppf.png") diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 8890c3c..7a94b4d 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -6,10 +6,9 @@ For now, shift/reduce conflicts are automatically resolved as shifts. # Author: Erez Shinan (2017) # Email : erezshin@gmail.com -import logging -from collections import defaultdict, deque +from collections import defaultdict -from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator +from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger from ..exceptions import GrammarError from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet @@ -256,8 +255,8 @@ class LALR_Analyzer(GrammarAnalyzer): raise GrammarError('Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ]))) if la in actions: if self.debug: - logging.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name) - logging.warning(' * %s', list(rules)[0]) + logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name) + logger.warning(' * %s', list(rules)[0]) else: actions[la] = (Reduce, list(rules)[0]) m[state] = { k.name: v for k, v in actions.items() } diff --git a/lark/utils.py b/lark/utils.py index c70b947..0c41e6b 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -4,51 +4,15 @@ from functools import reduce from ast import literal_eval from collections import deque -class fzset(frozenset): - def __repr__(self): - return '{%s}' % ', '.join(map(repr, self)) - - -def classify_bool(seq, pred): - true_elems = [] - false_elems = [] - - for elem in seq: - if pred(elem): - true_elems.append(elem) - else: - false_elems.append(elem) - - return true_elems, false_elems - - - -def bfs(initial, expand): - open_q = deque(list(initial)) - visited = set(open_q) - while open_q: - node = open_q.popleft() - yield node - for next_node in expand(node): - if next_node not in visited: - visited.add(next_node) - open_q.append(next_node) - - +###{standalone +import logging +logger = logging.getLogger("lark") +logger.addHandler(logging.StreamHandler()) +# Set to highest level, since we have some warnings amongst the code +# By default, we should not output any log messages +logger.setLevel(logging.CRITICAL) -def _serialize(value, memo): - if isinstance(value, Serialize): - return value.serialize(memo) - elif isinstance(value, list): - return [_serialize(elem, memo) for elem in value] - elif isinstance(value, frozenset): - return list(value) # TODO reversible? - elif isinstance(value, dict): - return {key:_serialize(elem, memo) for key, elem in value.items()} - return value - -###{standalone def classify(seq, key=None, value=None): d = {} for item in seq: @@ -302,13 +266,11 @@ def combine_alternatives(lists): return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init) - class FS: open = open exists = os.path.exists - def isascii(s): """ str.isascii only exists in python3.7+ """ try: @@ -318,4 +280,46 @@ def isascii(s): s.encode('ascii') return True except (UnicodeDecodeError, UnicodeEncodeError): - return False \ No newline at end of file + return False + + +class fzset(frozenset): + def __repr__(self): + return '{%s}' % ', '.join(map(repr, self)) + + +def classify_bool(seq, pred): + true_elems = [] + false_elems = [] + + for elem in seq: + if pred(elem): + true_elems.append(elem) + else: + false_elems.append(elem) + + return true_elems, false_elems + + +def bfs(initial, expand): + open_q = deque(list(initial)) + visited = set(open_q) + while open_q: + node = open_q.popleft() + yield node + for next_node in expand(node): + if next_node not in visited: + visited.add(next_node) + open_q.append(next_node) + + +def _serialize(value, memo): + if isinstance(value, Serialize): + return value.serialize(memo) + elif isinstance(value, list): + return [_serialize(elem, memo) for elem in value] + elif isinstance(value, frozenset): + return list(value) # TODO reversible? + elif isinstance(value, dict): + return {key:_serialize(elem, memo) for key, elem in value.items()} + return value \ No newline at end of file diff --git a/tests/__main__.py b/tests/__main__.py index cb26eb4..9ef9f1b 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function import unittest import logging +from lark import logger from .test_trees import TestTrees from .test_tools import TestStandalone @@ -11,11 +12,13 @@ from .test_reconstructor import TestReconstructor try: from .test_nearley.test_nearley import TestNearley except ImportError: - logging.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)") + logger.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)") # from .test_selectors import TestSelectors # from .test_grammars import TestPythonG, TestConfigG +from .test_logger import Testlogger + from .test_parser import ( TestLalrStandard, TestEarleyStandard, @@ -31,7 +34,7 @@ from .test_parser import ( TestParsers, ) -logging.basicConfig(level=logging.INFO) +logger.setLevel(logging.INFO) if __name__ == '__main__': unittest.main() diff --git a/tests/test_logger.py b/tests/test_logger.py new file mode 100644 index 0000000..93dc8ed --- /dev/null +++ b/tests/test_logger.py @@ -0,0 +1,65 @@ +import logging +from contextlib import contextmanager +from lark import Lark, logger +from unittest import TestCase, main + +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + +@contextmanager +def capture_log(): + stream = StringIO() + orig_handler = logger.handlers[0] + del logger.handlers[:] + logger.addHandler(logging.StreamHandler(stream)) + yield stream + del logger.handlers[:] + logger.addHandler(orig_handler) + +class Testlogger(TestCase): + + def test_debug(self): + logger.setLevel(logging.DEBUG) + collision_grammar = ''' + start: as as + as: a* + a: "a" + ''' + with capture_log() as log: + Lark(collision_grammar, parser='lalr', debug=True) + + log = log.getvalue() + # since there are conflicts about A + # symbol A should appear in the log message for hint + self.assertIn("A", log) + + def test_non_debug(self): + logger.setLevel(logging.DEBUG) + collision_grammar = ''' + start: as as + as: a* + a: "a" + ''' + with capture_log() as log: + Lark(collision_grammar, parser='lalr', debug=False) + log = log.getvalue() + # no log messge + self.assertEqual(len(log), 0) + + def test_loglevel_higher(self): + logger.setLevel(logging.ERROR) + collision_grammar = ''' + start: as as + as: a* + a: "a" + ''' + with capture_log() as log: + Lark(collision_grammar, parser='lalr', debug=True) + log = log.getvalue() + # no log messge + self.assertEqual(len(log), 0) + +if __name__ == '__main__': + main() diff --git a/tests/test_nearley/test_nearley.py b/tests/test_nearley/test_nearley.py index 647f489..1ad6449 100644 --- a/tests/test_nearley/test_nearley.py +++ b/tests/test_nearley/test_nearley.py @@ -6,16 +6,17 @@ import logging import os import codecs -logging.basicConfig(level=logging.INFO) - +from lark import logger from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main +logger.setLevel(logging.INFO) + TEST_PATH = os.path.abspath(os.path.dirname(__file__)) NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley') BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin') if not os.path.exists(NEARLEY_PATH): - logging.warn("Nearley not installed. Skipping Nearley tests!") + logger.warn("Nearley not installed. Skipping Nearley tests!") raise ImportError("Skipping Nearley tests!") import js2py # Ensures that js2py exists, to avoid failing tests diff --git a/tests/test_parser.py b/tests/test_parser.py index 48a4674..2f6a15e 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -23,13 +23,13 @@ from io import ( open, ) -logging.basicConfig(level=logging.INFO) try: import regex except ImportError: regex = None +from lark import logger from lark.lark import Lark from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters from lark.tree import Tree @@ -37,6 +37,7 @@ from lark.visitors import Transformer, Transformer_InPlace, v_args from lark.grammar import Rule from lark.lexer import TerminalDef, Lexer, TraditionalLexer +logger.setLevel(logging.INFO) __path__ = os.path.dirname(__file__)