Browse Source

Response to code review

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
julienmalard 5 years ago
parent
commit
364f9ae3a5
3 changed files with 11 additions and 9 deletions
  1. +3
    -5
      lark/load_grammar.py
  2. +2
    -2
      lark/reconstruct.py
  3. +6
    -2
      lark/utils.py

+ 3
- 5
lark/load_grammar.py View File

@@ -6,7 +6,7 @@ from copy import copy, deepcopy
from io import open from io import open
import pkgutil import pkgutil


from .utils import bfs, eval_escaping, Py36, logger, classify_bool, isalnum, isalpha
from .utils import bfs, eval_escaping, Py36, logger, classify_bool, is_id_continue, isalpha
from .lexer import Token, TerminalDef, PatternStr, PatternRE from .lexer import Token, TerminalDef, PatternStr, PatternRE


from .parse_tree_builder import ParseTreeBuilder from .parse_tree_builder import ParseTreeBuilder
@@ -328,10 +328,8 @@ class PrepareAnonTerminals(Transformer_InPlace):
try: try:
term_name = _TERMINAL_NAMES[value] term_name = _TERMINAL_NAMES[value]
except KeyError: except KeyError:
if isalnum(value) and isalpha(value[0]) and value.upper() not in self.term_set:
with suppress(UnicodeEncodeError):
value.upper().encode('utf8') # Why shouldn't we have unicode in our terminal names?
term_name = value.upper()
if is_id_continue(value) and isalpha(value[0]) and value.upper() not in self.term_set:
term_name = value.upper()


if term_name in self.term_set: if term_name in self.term_set:
term_name = None term_name = None


+ 2
- 2
lark/reconstruct.py View File

@@ -8,7 +8,7 @@ from .lexer import Token, PatternStr
from .grammar import Terminal, NonTerminal from .grammar import Terminal, NonTerminal


from .tree_matcher import TreeMatcher, is_discarded_terminal from .tree_matcher import TreeMatcher, is_discarded_terminal
from .utils import isalnum
from .utils import is_id_continue


def is_iter_empty(i): def is_iter_empty(i):
try: try:
@@ -94,7 +94,7 @@ class Reconstructor(TreeMatcher):
y = [] y = []
prev_item = '' prev_item = ''
for item in x: for item in x:
if prev_item and item and isalnum(prev_item[-1]) and isalnum(item[0]):
if prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
y.append(' ') y.append(' ')
y.append(item) y.append(item)
prev_item = item prev_item = item


+ 6
- 2
lark/utils.py View File

@@ -13,9 +13,13 @@ logger.addHandler(logging.StreamHandler())
# By default, we should not output any log messages # By default, we should not output any log messages
logger.setLevel(logging.CRITICAL) logger.setLevel(logging.CRITICAL)


def isalnum(x):
def is_id_continue(x):
"""
Checks if all characters in `x` are alphanumeric characters (Unicode standard, so diactrics, Indian vowels, non-latin
numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier.
"""
if len(x) != 1: if len(x) != 1:
return all(isalnum(y) for y in x)
return all(is_id_continue(y) for y in x)
return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc'] return unicodedata.category(x) in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Mn', 'Mc', 'Nd', 'Pc']






Loading…
Cancel
Save