From 74c94bb3695453e13ba540587e6d308b5f9de827 Mon Sep 17 00:00:00 2001 From: julienmalard Date: Tue, 10 Nov 2020 10:34:06 -0500 Subject: [PATCH] Tests now pass! --- lark/load_grammar.py | 6 +++--- lark/reconstruct.py | 7 ++----- lark/utils.py | 12 ++++++++++++ tests/test_nearley/nearley | 2 +- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index eb0273c..dcf90dd 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -6,7 +6,7 @@ from copy import copy, deepcopy from io import open import pkgutil -from .utils import bfs, eval_escaping, Py36, logger, classify_bool +from .utils import bfs, eval_escaping, Py36, logger, classify_bool, isalnum, isalpha from .lexer import Token, TerminalDef, PatternStr, PatternRE from .parse_tree_builder import ParseTreeBuilder @@ -328,9 +328,9 @@ class PrepareAnonTerminals(Transformer_InPlace): try: term_name = _TERMINAL_NAMES[value] except KeyError: - if value.isalnum() and value[0].isalpha() and value.upper() not in self.term_set: + if isalnum(value) and isalpha(value[0]) and value.upper() not in self.term_set: with suppress(UnicodeEncodeError): - value.upper().encode('ascii') # Make sure we don't have unicode in our terminal names + value.upper().encode('utf8') # Why shouldn't we have unicode in our terminal names? term_name = value.upper() if term_name in self.term_set: diff --git a/lark/reconstruct.py b/lark/reconstruct.py index e7cff31..614fb5e 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -8,6 +8,7 @@ from .lexer import Token, PatternStr from .grammar import Terminal, NonTerminal from .tree_matcher import TreeMatcher, is_discarded_terminal +from .utils import isalnum def is_iter_empty(i): try: @@ -56,10 +57,6 @@ class WriteTokensTransformer(Transformer_InPlace): return to_write -def _isalnum(x): - # Categories defined here: https://www.python.org/dev/peps/pep-3131/ - return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'] - class Reconstructor(TreeMatcher): """ A Reconstructor that will, given a full parse Tree, generate source code. @@ -97,7 +94,7 @@ class Reconstructor(TreeMatcher): y = [] prev_item = '' for item in x: - if prev_item and item and _isalnum(prev_item[-1]) and _isalnum(item[0]): + if prev_item and item and isalnum(prev_item[-1]) and isalnum(item[0]): y.append(' ') y.append(item) prev_item = item diff --git a/lark/utils.py b/lark/utils.py index cfd4306..b0f0e22 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -1,4 +1,5 @@ import sys +import unicodedata import os from functools import reduce from ast import literal_eval @@ -12,6 +13,17 @@ logger.addHandler(logging.StreamHandler()) # By default, we should not output any log messages logger.setLevel(logging.CRITICAL) +def isalnum(x): + if len(x) != 1: + return all(isalnum(y) for y in x) + return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'] + + +def isalpha(x): + if len(x) != 1: + return all(isalpha(y) for y in x) + return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'] + def classify(seq, key=None, value=None): d = {} diff --git a/tests/test_nearley/nearley b/tests/test_nearley/nearley index a46b374..cf8925f 160000 --- a/tests/test_nearley/nearley +++ b/tests/test_nearley/nearley @@ -1 +1 @@ -Subproject commit a46b37471db486db0f6e1ce6a2934fb238346b44 +Subproject commit cf8925f729bde741a3076c5856c0c0862bc7f5de