This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1653 lines
56 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import unittest
  4. import logging
  5. import os
  6. import sys
  7. try:
  8. from cStringIO import StringIO as cStringIO
  9. except ImportError:
  10. # Available only in Python 2.x, 3.x only has io.StringIO from below
  11. cStringIO = None
  12. from io import (
  13. StringIO as uStringIO,
  14. open,
  15. )
  16. logging.basicConfig(level=logging.INFO)
  17. from lark.lark import Lark
  18. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  19. from lark.tree import Tree
  20. from lark.visitors import Transformer, Transformer_InPlace, v_args
  21. from lark.grammar import Rule
  22. from lark.lexer import TerminalDef, Lexer, TraditionalLexer
  23. __path__ = os.path.dirname(__file__)
  24. def _read(n, *args):
  25. with open(os.path.join(__path__, n), *args) as f:
  26. return f.read()
  27. class TestParsers(unittest.TestCase):
  28. def test_same_ast(self):
  29. "Tests that Earley and LALR parsers produce equal trees"
  30. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  31. name_list: NAME | name_list "," NAME
  32. NAME: /\w+/ """, parser='lalr')
  33. l = g.parse('(a,b,c,*x)')
  34. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  35. name_list: NAME | name_list "," NAME
  36. NAME: /\w/+ """)
  37. l2 = g.parse('(a,b,c,*x)')
  38. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  39. def test_infinite_recurse(self):
  40. g = """start: a
  41. a: a | "a"
  42. """
  43. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  44. # TODO: should it? shouldn't it?
  45. # l = Lark(g, parser='earley', lexer='dynamic')
  46. # self.assertRaises(ParseError, l.parse, 'a')
  47. def test_propagate_positions(self):
  48. g = Lark("""start: a
  49. a: "a"
  50. """, propagate_positions=True)
  51. r = g.parse('a')
  52. self.assertEqual( r.children[0].meta.line, 1 )
  53. def test_expand1(self):
  54. g = Lark("""start: a
  55. ?a: b
  56. b: "x"
  57. """)
  58. r = g.parse('x')
  59. self.assertEqual( r.children[0].data, "b" )
  60. g = Lark("""start: a
  61. ?a: b -> c
  62. b: "x"
  63. """)
  64. r = g.parse('x')
  65. self.assertEqual( r.children[0].data, "c" )
  66. g = Lark("""start: a
  67. ?a: B -> c
  68. B: "x"
  69. """)
  70. self.assertEqual( r.children[0].data, "c" )
  71. g = Lark("""start: a
  72. ?a: b b -> c
  73. b: "x"
  74. """)
  75. r = g.parse('xx')
  76. self.assertEqual( r.children[0].data, "c" )
  77. def test_comment_in_rule_definition(self):
  78. g = Lark("""start: a
  79. a: "a"
  80. // A comment
  81. // Another
  82. | "b"
  83. """)
  84. r = g.parse('b')
  85. self.assertEqual( r.children[0].data, "a" )
  86. def test_visit_tokens(self):
  87. class T(Transformer):
  88. def a(self, children):
  89. return children[0] + "!"
  90. def A(self, tok):
  91. return tok.update(value=tok.upper())
  92. # Test regular
  93. g = """start: a
  94. a : A
  95. A: "x"
  96. """
  97. p = Lark(g, parser='lalr')
  98. r = T(False).transform(p.parse("x"))
  99. self.assertEqual( r.children, ["x!"] )
  100. r = T().transform(p.parse("x"))
  101. self.assertEqual( r.children, ["X!"] )
  102. # Test internal transformer
  103. p = Lark(g, parser='lalr', transformer=T())
  104. r = p.parse("x")
  105. self.assertEqual( r.children, ["X!"] )
  106. def test_embedded_transformer(self):
  107. class T(Transformer):
  108. def a(self, children):
  109. return "<a>"
  110. def b(self, children):
  111. return "<b>"
  112. def c(self, children):
  113. return "<c>"
  114. # Test regular
  115. g = Lark("""start: a
  116. a : "x"
  117. """, parser='lalr')
  118. r = T().transform(g.parse("x"))
  119. self.assertEqual( r.children, ["<a>"] )
  120. g = Lark("""start: a
  121. a : "x"
  122. """, parser='lalr', transformer=T())
  123. r = g.parse("x")
  124. self.assertEqual( r.children, ["<a>"] )
  125. # Test Expand1
  126. g = Lark("""start: a
  127. ?a : b
  128. b : "x"
  129. """, parser='lalr')
  130. r = T().transform(g.parse("x"))
  131. self.assertEqual( r.children, ["<b>"] )
  132. g = Lark("""start: a
  133. ?a : b
  134. b : "x"
  135. """, parser='lalr', transformer=T())
  136. r = g.parse("x")
  137. self.assertEqual( r.children, ["<b>"] )
  138. # Test Expand1 -> Alias
  139. g = Lark("""start: a
  140. ?a : b b -> c
  141. b : "x"
  142. """, parser='lalr')
  143. r = T().transform(g.parse("xx"))
  144. self.assertEqual( r.children, ["<c>"] )
  145. g = Lark("""start: a
  146. ?a : b b -> c
  147. b : "x"
  148. """, parser='lalr', transformer=T())
  149. r = g.parse("xx")
  150. self.assertEqual( r.children, ["<c>"] )
  151. def test_embedded_transformer_inplace(self):
  152. @v_args(tree=True)
  153. class T1(Transformer_InPlace):
  154. def a(self, tree):
  155. assert isinstance(tree, Tree), tree
  156. tree.children.append("tested")
  157. return tree
  158. def b(self, tree):
  159. return Tree(tree.data, tree.children + ['tested2'])
  160. @v_args(tree=True)
  161. class T2(Transformer):
  162. def a(self, tree):
  163. assert isinstance(tree, Tree)
  164. tree.children.append("tested")
  165. return tree
  166. def b(self, tree):
  167. return Tree(tree.data, tree.children + ['tested2'])
  168. class T3(Transformer):
  169. @v_args(tree=True)
  170. def a(self, tree):
  171. assert isinstance(tree, Tree)
  172. tree.children.append("tested")
  173. return tree
  174. @v_args(tree=True)
  175. def b(self, tree):
  176. return Tree(tree.data, tree.children + ['tested2'])
  177. for t in [T1(), T2(), T3()]:
  178. for internal in [False, True]:
  179. g = Lark("""start: a b
  180. a : "x"
  181. b : "y"
  182. """, parser='lalr', transformer=t if internal else None)
  183. r = g.parse("xy")
  184. if not internal:
  185. r = t.transform(r)
  186. a, b = r.children
  187. self.assertEqual(a.children, ["tested"])
  188. self.assertEqual(b.children, ["tested2"])
  189. def test_alias(self):
  190. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  191. def _make_full_earley_test(LEXER):
  192. def _Lark(grammar, **kwargs):
  193. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  194. class _TestFullEarley(unittest.TestCase):
  195. def test_anon(self):
  196. # Fails an Earley implementation without special handling for empty rules,
  197. # or re-processing of already completed rules.
  198. g = Lark(r"""start: B
  199. B: ("ab"|/[^b]/)+
  200. """, lexer=LEXER)
  201. self.assertEqual( g.parse('abc').children[0], 'abc')
  202. def test_earley(self):
  203. g = Lark("""start: A "b" c
  204. A: "a"+
  205. c: "abc"
  206. """, parser="earley", lexer=LEXER)
  207. x = g.parse('aaaababc')
  208. def test_earley2(self):
  209. grammar = """
  210. start: statement+
  211. statement: "r"
  212. | "c" /[a-z]/+
  213. %ignore " "
  214. """
  215. program = """c b r"""
  216. l = Lark(grammar, parser='earley', lexer=LEXER)
  217. l.parse(program)
  218. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  219. def test_earley3(self):
  220. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  221. By default, `+` should immitate regexp greedy-matching
  222. """
  223. grammar = """
  224. start: A A
  225. A: "a"+
  226. """
  227. l = Lark(grammar, parser='earley', lexer=LEXER)
  228. res = l.parse("aaa")
  229. self.assertEqual(set(res.children), {'aa', 'a'})
  230. # XXX TODO fix Earley to maintain correct order
  231. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  232. # self.assertEqual(res.children, ['aa', 'a'])
  233. def test_earley4(self):
  234. grammar = """
  235. start: A A?
  236. A: "a"+
  237. """
  238. l = Lark(grammar, parser='earley', lexer=LEXER)
  239. res = l.parse("aaa")
  240. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  241. # XXX TODO fix Earley to maintain correct order
  242. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  243. # self.assertEqual(res.children, ['aaa'])
  244. def test_earley_repeating_empty(self):
  245. # This was a sneaky bug!
  246. grammar = """
  247. !start: "a" empty empty "b"
  248. empty: empty2
  249. empty2:
  250. """
  251. parser = Lark(grammar, parser='earley', lexer=LEXER)
  252. res = parser.parse('ab')
  253. empty_tree = Tree('empty', [Tree('empty2', [])])
  254. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  255. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  256. def test_earley_explicit_ambiguity(self):
  257. # This was a sneaky bug!
  258. grammar = """
  259. start: a b | ab
  260. a: "a"
  261. b: "b"
  262. ab: "ab"
  263. """
  264. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  265. ambig_tree = parser.parse('ab')
  266. self.assertEqual( ambig_tree.data, '_ambig')
  267. self.assertEqual( len(ambig_tree.children), 2)
  268. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  269. def test_ambiguity1(self):
  270. grammar = """
  271. start: cd+ "e"
  272. !cd: "c"
  273. | "d"
  274. | "cd"
  275. """
  276. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  277. ambig_tree = l.parse('cde')
  278. assert ambig_tree.data == '_ambig', ambig_tree
  279. assert len(ambig_tree.children) == 2
  280. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  281. def test_ambiguity2(self):
  282. grammar = """
  283. ANY: /[a-zA-Z0-9 ]+/
  284. a.2: "A" b+
  285. b.2: "B"
  286. c: ANY
  287. start: (a|c)*
  288. """
  289. l = Lark(grammar, parser='earley', lexer=LEXER)
  290. res = l.parse('ABX')
  291. expected = Tree('start', [
  292. Tree('a', [
  293. Tree('b', [])
  294. ]),
  295. Tree('c', [
  296. 'X'
  297. ])
  298. ])
  299. self.assertEqual(res, expected)
  300. def test_fruitflies_ambig(self):
  301. grammar = """
  302. start: noun verb noun -> simple
  303. | noun verb "like" noun -> comparative
  304. noun: adj? NOUN
  305. verb: VERB
  306. adj: ADJ
  307. NOUN: "flies" | "bananas" | "fruit"
  308. VERB: "like" | "flies"
  309. ADJ: "fruit"
  310. %import common.WS
  311. %ignore WS
  312. """
  313. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  314. tree = parser.parse('fruit flies like bananas')
  315. expected = Tree('_ambig', [
  316. Tree('comparative', [
  317. Tree('noun', ['fruit']),
  318. Tree('verb', ['flies']),
  319. Tree('noun', ['bananas'])
  320. ]),
  321. Tree('simple', [
  322. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  323. Tree('verb', ['like']),
  324. Tree('noun', ['bananas'])
  325. ])
  326. ])
  327. # self.assertEqual(tree, expected)
  328. self.assertEqual(tree.data, expected.data)
  329. self.assertEqual(set(tree.children), set(expected.children))
  330. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  331. def test_explicit_ambiguity2(self):
  332. grammar = r"""
  333. start: NAME+
  334. NAME: /\w+/
  335. %ignore " "
  336. """
  337. text = """cat"""
  338. parser = _Lark(grammar, start='start', ambiguity='explicit')
  339. tree = parser.parse(text)
  340. self.assertEqual(tree.data, '_ambig')
  341. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  342. self.assertEqual(combinations, {
  343. ('cat',),
  344. ('ca', 't'),
  345. ('c', 'at'),
  346. ('c', 'a' ,'t')
  347. })
  348. def test_term_ambig_resolve(self):
  349. grammar = r"""
  350. !start: NAME+
  351. NAME: /\w+/
  352. %ignore " "
  353. """
  354. text = """foo bar"""
  355. parser = Lark(grammar)
  356. tree = parser.parse(text)
  357. self.assertEqual(tree.children, ['foo', 'bar'])
  358. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  359. # def test_not_all_derivations(self):
  360. # grammar = """
  361. # start: cd+ "e"
  362. # !cd: "c"
  363. # | "d"
  364. # | "cd"
  365. # """
  366. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  367. # x = l.parse('cde')
  368. # assert x.data != '_ambig', x
  369. # assert len(x.children) == 1
  370. _NAME = "TestFullEarley" + LEXER.capitalize()
  371. _TestFullEarley.__name__ = _NAME
  372. globals()[_NAME] = _TestFullEarley
  373. class CustomLexer(Lexer):
  374. """
  375. Purpose of this custom lexer is to test the integration,
  376. so it uses the traditionalparser as implementation without custom lexing behaviour.
  377. """
  378. def __init__(self, lexer_conf):
  379. self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks)
  380. def lex(self, *args, **kwargs):
  381. return self.lexer.lex(*args, **kwargs)
  382. def _make_parser_test(LEXER, PARSER):
  383. lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
  384. def _Lark(grammar, **kwargs):
  385. return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  386. def _Lark_open(gfilename, **kwargs):
  387. return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  388. class _TestParser(unittest.TestCase):
  389. def test_basic1(self):
  390. g = _Lark("""start: a+ b a* "b" a*
  391. b: "b"
  392. a: "a"
  393. """)
  394. r = g.parse('aaabaab')
  395. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  396. r = g.parse('aaabaaba')
  397. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  398. self.assertRaises(ParseError, g.parse, 'aaabaa')
  399. def test_basic2(self):
  400. # Multiple parsers and colliding tokens
  401. g = _Lark("""start: B A
  402. B: "12"
  403. A: "1" """)
  404. g2 = _Lark("""start: B A
  405. B: "12"
  406. A: "2" """)
  407. x = g.parse('121')
  408. assert x.data == 'start' and x.children == ['12', '1'], x
  409. x = g2.parse('122')
  410. assert x.data == 'start' and x.children == ['12', '2'], x
  411. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  412. def test_stringio_bytes(self):
  413. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  414. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  415. def test_stringio_unicode(self):
  416. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  417. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  418. def test_unicode(self):
  419. g = _Lark(u"""start: UNIA UNIB UNIA
  420. UNIA: /\xa3/
  421. UNIB: /\u0101/
  422. """)
  423. g.parse(u'\xa3\u0101\u00a3')
  424. def test_unicode2(self):
  425. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  426. UNIA: /\xa3/
  427. UNIB: "a\u0101b\ "
  428. UNIC: /a?\u0101c\n/
  429. """)
  430. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  431. def test_unicode3(self):
  432. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  433. UNIA: /\xa3/
  434. UNIB: "\u0101"
  435. UNIC: /\u0203/ /\n/
  436. """)
  437. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  438. def test_hex_escape(self):
  439. g = _Lark(r"""start: A B C
  440. A: "\x01"
  441. B: /\x02/
  442. C: "\xABCD"
  443. """)
  444. g.parse('\x01\x02\xABCD')
  445. def test_unicode_literal_range_escape(self):
  446. g = _Lark(r"""start: A+
  447. A: "\u0061".."\u0063"
  448. """)
  449. g.parse('abc')
  450. def test_hex_literal_range_escape(self):
  451. g = _Lark(r"""start: A+
  452. A: "\x01".."\x03"
  453. """)
  454. g.parse('\x01\x02\x03')
  455. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  456. def test_stack_for_ebnf(self):
  457. """Verify that stack depth isn't an issue for EBNF grammars"""
  458. g = _Lark(r"""start: a+
  459. a : "a" """)
  460. g.parse("a" * (sys.getrecursionlimit()*2 ))
  461. def test_expand1_lists_with_one_item(self):
  462. g = _Lark(r"""start: list
  463. ?list: item+
  464. item : A
  465. A: "a"
  466. """)
  467. r = g.parse("a")
  468. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  469. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  470. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  471. self.assertEqual(len(r.children), 1)
  472. def test_expand1_lists_with_one_item_2(self):
  473. g = _Lark(r"""start: list
  474. ?list: item+ "!"
  475. item : A
  476. A: "a"
  477. """)
  478. r = g.parse("a!")
  479. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  480. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  481. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  482. self.assertEqual(len(r.children), 1)
  483. def test_dont_expand1_lists_with_multiple_items(self):
  484. g = _Lark(r"""start: list
  485. ?list: item+
  486. item : A
  487. A: "a"
  488. """)
  489. r = g.parse("aa")
  490. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  491. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  492. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  493. self.assertEqual(len(r.children), 1)
  494. # Sanity check: verify that 'list' contains the two 'item's we've given it
  495. [list] = r.children
  496. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  497. def test_dont_expand1_lists_with_multiple_items_2(self):
  498. g = _Lark(r"""start: list
  499. ?list: item+ "!"
  500. item : A
  501. A: "a"
  502. """)
  503. r = g.parse("aa!")
  504. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  505. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  506. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  507. self.assertEqual(len(r.children), 1)
  508. # Sanity check: verify that 'list' contains the two 'item's we've given it
  509. [list] = r.children
  510. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  511. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  512. def test_empty_expand1_list(self):
  513. g = _Lark(r"""start: list
  514. ?list: item*
  515. item : A
  516. A: "a"
  517. """)
  518. r = g.parse("")
  519. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  520. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  521. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  522. self.assertEqual(len(r.children), 1)
  523. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  524. [list] = r.children
  525. self.assertSequenceEqual([item.data for item in list.children], ())
  526. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  527. def test_empty_expand1_list_2(self):
  528. g = _Lark(r"""start: list
  529. ?list: item* "!"?
  530. item : A
  531. A: "a"
  532. """)
  533. r = g.parse("")
  534. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  535. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  536. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  537. self.assertEqual(len(r.children), 1)
  538. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  539. [list] = r.children
  540. self.assertSequenceEqual([item.data for item in list.children], ())
  541. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  542. def test_empty_flatten_list(self):
  543. g = _Lark(r"""start: list
  544. list: | item "," list
  545. item : A
  546. A: "a"
  547. """)
  548. r = g.parse("")
  549. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  550. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  551. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  552. [list] = r.children
  553. self.assertSequenceEqual([item.data for item in list.children], ())
  554. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  555. def test_single_item_flatten_list(self):
  556. g = _Lark(r"""start: list
  557. list: | item "," list
  558. item : A
  559. A: "a"
  560. """)
  561. r = g.parse("a,")
  562. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  563. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  564. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  565. [list] = r.children
  566. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  567. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  568. def test_multiple_item_flatten_list(self):
  569. g = _Lark(r"""start: list
  570. #list: | item "," list
  571. item : A
  572. A: "a"
  573. """)
  574. r = g.parse("a,a,")
  575. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  576. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  577. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  578. [list] = r.children
  579. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  580. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  581. def test_recurse_flatten(self):
  582. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  583. g = _Lark(r"""start: a | start a
  584. a : A
  585. A : "a" """)
  586. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  587. # STree data structures, which uses recursion).
  588. g.parse("a" * (sys.getrecursionlimit() // 4))
  589. def test_token_collision(self):
  590. g = _Lark(r"""start: "Hello" NAME
  591. NAME: /\w/+
  592. %ignore " "
  593. """)
  594. x = g.parse('Hello World')
  595. self.assertSequenceEqual(x.children, ['World'])
  596. x = g.parse('Hello HelloWorld')
  597. self.assertSequenceEqual(x.children, ['HelloWorld'])
  598. def test_token_collision_WS(self):
  599. g = _Lark(r"""start: "Hello" NAME
  600. NAME: /\w/+
  601. %import common.WS
  602. %ignore WS
  603. """)
  604. x = g.parse('Hello World')
  605. self.assertSequenceEqual(x.children, ['World'])
  606. x = g.parse('Hello HelloWorld')
  607. self.assertSequenceEqual(x.children, ['HelloWorld'])
  608. def test_token_collision2(self):
  609. g = _Lark("""
  610. !start: "starts"
  611. %import common.LCASE_LETTER
  612. """)
  613. x = g.parse("starts")
  614. self.assertSequenceEqual(x.children, ['starts'])
  615. # def test_string_priority(self):
  616. # g = _Lark("""start: (A | /a?bb/)+
  617. # A: "a" """)
  618. # x = g.parse('abb')
  619. # self.assertEqual(len(x.children), 2)
  620. # # This parse raises an exception because the lexer will always try to consume
  621. # # "a" first and will never match the regular expression
  622. # # This behavior is subject to change!!
  623. # # Thie won't happen with ambiguity handling.
  624. # g = _Lark("""start: (A | /a?ab/)+
  625. # A: "a" """)
  626. # self.assertRaises(LexError, g.parse, 'aab')
  627. def test_undefined_rule(self):
  628. self.assertRaises(GrammarError, _Lark, """start: a""")
  629. def test_undefined_token(self):
  630. self.assertRaises(GrammarError, _Lark, """start: A""")
  631. def test_rule_collision(self):
  632. g = _Lark("""start: "a"+ "b"
  633. | "a"+ """)
  634. x = g.parse('aaaa')
  635. x = g.parse('aaaab')
  636. def test_rule_collision2(self):
  637. g = _Lark("""start: "a"* "b"
  638. | "a"+ """)
  639. x = g.parse('aaaa')
  640. x = g.parse('aaaab')
  641. x = g.parse('b')
  642. def test_token_not_anon(self):
  643. """Tests that "a" is matched as an anonymous token, and not A.
  644. """
  645. g = _Lark("""start: "a"
  646. A: "a" """)
  647. x = g.parse('a')
  648. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  649. g = _Lark("""start: "a" A
  650. A: "a" """)
  651. x = g.parse('aa')
  652. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  653. self.assertEqual(x.children[0].type, "A")
  654. g = _Lark("""start: /a/
  655. A: /a/ """)
  656. x = g.parse('a')
  657. self.assertEqual(len(x.children), 1)
  658. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  659. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  660. def test_maybe(self):
  661. g = _Lark("""start: ["a"] """)
  662. x = g.parse('a')
  663. x = g.parse('')
  664. def test_start(self):
  665. g = _Lark("""a: "a" a? """, start='a')
  666. x = g.parse('a')
  667. x = g.parse('aa')
  668. x = g.parse('aaa')
  669. def test_alias(self):
  670. g = _Lark("""start: "a" -> b """)
  671. x = g.parse('a')
  672. self.assertEqual(x.data, "b")
  673. def test_token_ebnf(self):
  674. g = _Lark("""start: A
  675. A: "a"* ("b"? "c".."e")+
  676. """)
  677. x = g.parse('abcde')
  678. x = g.parse('dd')
  679. def test_backslash(self):
  680. g = _Lark(r"""start: "\\" "a"
  681. """)
  682. x = g.parse(r'\a')
  683. g = _Lark(r"""start: /\\/ /a/
  684. """)
  685. x = g.parse(r'\a')
  686. def test_backslash2(self):
  687. g = _Lark(r"""start: "\"" "-"
  688. """)
  689. x = g.parse('"-')
  690. g = _Lark(r"""start: /\// /-/
  691. """)
  692. x = g.parse('/-')
  693. def test_special_chars(self):
  694. g = _Lark(r"""start: "\n"
  695. """)
  696. x = g.parse('\n')
  697. g = _Lark(r"""start: /\n/
  698. """)
  699. x = g.parse('\n')
  700. # def test_token_recurse(self):
  701. # g = _Lark("""start: A
  702. # A: B
  703. # B: A
  704. # """)
  705. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  706. def test_empty(self):
  707. # Fails an Earley implementation without special handling for empty rules,
  708. # or re-processing of already completed rules.
  709. g = _Lark(r"""start: _empty a "B"
  710. a: _empty "A"
  711. _empty:
  712. """)
  713. x = g.parse('AB')
  714. def test_regex_quote(self):
  715. g = r"""
  716. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  717. SINGLE_QUOTED_STRING : /'[^']*'/
  718. DOUBLE_QUOTED_STRING : /"[^"]*"/
  719. """
  720. g = _Lark(g)
  721. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  722. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  723. def test_lexer_token_limit(self):
  724. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  725. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  726. g = _Lark("""start: %s
  727. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  728. def test_float_without_lexer(self):
  729. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  730. if PARSER == 'cyk':
  731. expected_error = ParseError
  732. g = _Lark("""start: ["+"|"-"] float
  733. float: digit* "." digit+ exp?
  734. | digit+ exp
  735. exp: ("e"|"E") ["+"|"-"] digit+
  736. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  737. """)
  738. g.parse("1.2")
  739. g.parse("-.2e9")
  740. g.parse("+2e-9")
  741. self.assertRaises( expected_error, g.parse, "+2e-9e")
  742. def test_keep_all_tokens(self):
  743. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  744. tree = l.parse('aaa')
  745. self.assertEqual(tree.children, ['a', 'a', 'a'])
  746. def test_token_flags(self):
  747. l = _Lark("""!start: "a"i+
  748. """
  749. )
  750. tree = l.parse('aA')
  751. self.assertEqual(tree.children, ['a', 'A'])
  752. l = _Lark("""!start: /a/i+
  753. """
  754. )
  755. tree = l.parse('aA')
  756. self.assertEqual(tree.children, ['a', 'A'])
  757. # g = """!start: "a"i "a"
  758. # """
  759. # self.assertRaises(GrammarError, _Lark, g)
  760. # g = """!start: /a/i /a/
  761. # """
  762. # self.assertRaises(GrammarError, _Lark, g)
  763. g = """start: NAME "," "a"
  764. NAME: /[a-z_]/i /[a-z0-9_]/i*
  765. """
  766. l = _Lark(g)
  767. tree = l.parse('ab,a')
  768. self.assertEqual(tree.children, ['ab'])
  769. tree = l.parse('AB,a')
  770. self.assertEqual(tree.children, ['AB'])
  771. def test_token_flags3(self):
  772. l = _Lark("""!start: ABC+
  773. ABC: "abc"i
  774. """
  775. )
  776. tree = l.parse('aBcAbC')
  777. self.assertEqual(tree.children, ['aBc', 'AbC'])
  778. def test_token_flags2(self):
  779. g = """!start: ("a"i | /a/ /b/?)+
  780. """
  781. l = _Lark(g)
  782. tree = l.parse('aA')
  783. self.assertEqual(tree.children, ['a', 'A'])
  784. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  785. def test_twice_empty(self):
  786. g = """!start: ("A"?)?
  787. """
  788. l = _Lark(g)
  789. tree = l.parse('A')
  790. self.assertEqual(tree.children, ['A'])
  791. tree = l.parse('')
  792. self.assertEqual(tree.children, [])
  793. def test_undefined_ignore(self):
  794. g = """!start: "A"
  795. %ignore B
  796. """
  797. self.assertRaises( GrammarError, _Lark, g)
  798. def test_alias_in_terminal(self):
  799. g = """start: TERM
  800. TERM: "a" -> alias
  801. """
  802. self.assertRaises( GrammarError, _Lark, g)
  803. def test_line_and_column(self):
  804. g = r"""!start: "A" bc "D"
  805. !bc: "B\nC"
  806. """
  807. l = _Lark(g)
  808. a, bc, d = l.parse("AB\nCD").children
  809. self.assertEqual(a.line, 1)
  810. self.assertEqual(a.column, 1)
  811. bc ,= bc.children
  812. self.assertEqual(bc.line, 1)
  813. self.assertEqual(bc.column, 2)
  814. self.assertEqual(d.line, 2)
  815. self.assertEqual(d.column, 2)
  816. if LEXER != 'dynamic':
  817. self.assertEqual(a.end_line, 1)
  818. self.assertEqual(a.end_column, 2)
  819. self.assertEqual(bc.end_line, 2)
  820. self.assertEqual(bc.end_column, 2)
  821. self.assertEqual(d.end_line, 2)
  822. self.assertEqual(d.end_column, 3)
  823. def test_reduce_cycle(self):
  824. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  825. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  826. """
  827. l = _Lark("""
  828. term: A
  829. | term term
  830. A: "a"
  831. """, start='term')
  832. tree = l.parse("aa")
  833. self.assertEqual(len(tree.children), 2)
  834. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  835. def test_lexer_prioritization(self):
  836. "Tests effect of priority on result"
  837. grammar = """
  838. start: A B | AB
  839. A.2: "a"
  840. B: "b"
  841. AB: "ab"
  842. """
  843. l = _Lark(grammar)
  844. res = l.parse("ab")
  845. self.assertEqual(res.children, ['a', 'b'])
  846. self.assertNotEqual(res.children, ['ab'])
  847. grammar = """
  848. start: A B | AB
  849. A: "a"
  850. B: "b"
  851. AB.3: "ab"
  852. """
  853. l = _Lark(grammar)
  854. res = l.parse("ab")
  855. self.assertNotEqual(res.children, ['a', 'b'])
  856. self.assertEqual(res.children, ['ab'])
  857. grammar = """
  858. start: A B | AB
  859. A: "a"
  860. B.-20: "b"
  861. AB.-10: "ab"
  862. """
  863. l = _Lark(grammar)
  864. res = l.parse("ab")
  865. self.assertEqual(res.children, ['a', 'b'])
  866. grammar = """
  867. start: A B | AB
  868. A.-99999999999999999999999: "a"
  869. B: "b"
  870. AB: "ab"
  871. """
  872. l = _Lark(grammar)
  873. res = l.parse("ab")
  874. self.assertEqual(res.children, ['ab'])
  875. def test_import(self):
  876. grammar = """
  877. start: NUMBER WORD
  878. %import common.NUMBER
  879. %import common.WORD
  880. %import common.WS
  881. %ignore WS
  882. """
  883. l = _Lark(grammar)
  884. x = l.parse('12 elephants')
  885. self.assertEqual(x.children, ['12', 'elephants'])
  886. def test_import_rename(self):
  887. grammar = """
  888. start: N W
  889. %import common.NUMBER -> N
  890. %import common.WORD -> W
  891. %import common.WS
  892. %ignore WS
  893. """
  894. l = _Lark(grammar)
  895. x = l.parse('12 elephants')
  896. self.assertEqual(x.children, ['12', 'elephants'])
  897. def test_relative_import(self):
  898. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  899. x = l.parse('12 lions')
  900. self.assertEqual(x.children, ['12', 'lions'])
  901. def test_relative_import_rename(self):
  902. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  903. x = l.parse('12 lions')
  904. self.assertEqual(x.children, ['12', 'lions'])
  905. def test_relative_rule_import(self):
  906. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  907. x = l.parse('xaabby')
  908. self.assertEqual(x.children, [
  909. 'x',
  910. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  911. 'y'])
  912. def test_relative_rule_import_drop_ignore(self):
  913. # %ignore rules are dropped on import
  914. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  915. rel_to=__file__)
  916. self.assertRaises((ParseError, UnexpectedInput),
  917. l.parse, 'xa abby')
  918. def test_relative_rule_import_subrule(self):
  919. l = _Lark_open('test_relative_rule_import_subrule.lark',
  920. rel_to=__file__)
  921. x = l.parse('xaabby')
  922. self.assertEqual(x.children, [
  923. 'x',
  924. Tree('startab', [
  925. Tree('grammars__ab__expr', [
  926. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  927. ]),
  928. ]),
  929. 'y'])
  930. def test_relative_rule_import_subrule_no_conflict(self):
  931. l = _Lark_open(
  932. 'test_relative_rule_import_subrule_no_conflict.lark',
  933. rel_to=__file__)
  934. x = l.parse('xaby')
  935. self.assertEqual(x.children, [Tree('expr', [
  936. 'x',
  937. Tree('startab', [
  938. Tree('grammars__ab__expr', ['a', 'b']),
  939. ]),
  940. 'y'])])
  941. self.assertRaises((ParseError, UnexpectedInput),
  942. l.parse, 'xaxabyby')
  943. def test_relative_rule_import_rename(self):
  944. l = _Lark_open('test_relative_rule_import_rename.lark',
  945. rel_to=__file__)
  946. x = l.parse('xaabby')
  947. self.assertEqual(x.children, [
  948. 'x',
  949. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  950. 'y'])
  951. def test_multi_import(self):
  952. grammar = """
  953. start: NUMBER WORD
  954. %import common (NUMBER, WORD, WS)
  955. %ignore WS
  956. """
  957. l = _Lark(grammar)
  958. x = l.parse('12 toucans')
  959. self.assertEqual(x.children, ['12', 'toucans'])
  960. def test_relative_multi_import(self):
  961. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  962. x = l.parse('12 capybaras')
  963. self.assertEqual(x.children, ['12', 'capybaras'])
  964. def test_relative_import_preserves_leading_underscore(self):
  965. l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__)
  966. x = l.parse('Ax')
  967. self.assertEqual(next(x.find_data('c')).children, ['A'])
  968. def test_relative_import_of_nested_grammar(self):
  969. l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__)
  970. x = l.parse('N')
  971. self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
  972. def test_relative_import_rules_dependencies_imported_only_once(self):
  973. l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__)
  974. x = l.parse('AAA')
  975. self.assertEqual(next(x.find_data('a')).children, ['A'])
  976. self.assertEqual(next(x.find_data('b')).children, ['A'])
  977. self.assertEqual(next(x.find_data('d')).children, ['A'])
  978. def test_import_errors(self):
  979. grammar = """
  980. start: NUMBER WORD
  981. %import .grammars.bad_test.NUMBER
  982. """
  983. self.assertRaises(IOError, _Lark, grammar)
  984. grammar = """
  985. start: NUMBER WORD
  986. %import bad_test.NUMBER
  987. """
  988. self.assertRaises(IOError, _Lark, grammar)
  989. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  990. def test_earley_prioritization(self):
  991. "Tests effect of priority on result"
  992. grammar = """
  993. start: a | b
  994. a.1: "a"
  995. b.2: "a"
  996. """
  997. # l = Lark(grammar, parser='earley', lexer='standard')
  998. l = _Lark(grammar)
  999. res = l.parse("a")
  1000. self.assertEqual(res.children[0].data, 'b')
  1001. grammar = """
  1002. start: a | b
  1003. a.2: "a"
  1004. b.1: "a"
  1005. """
  1006. l = _Lark(grammar)
  1007. # l = Lark(grammar, parser='earley', lexer='standard')
  1008. res = l.parse("a")
  1009. self.assertEqual(res.children[0].data, 'a')
  1010. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1011. def test_earley_prioritization_sum(self):
  1012. "Tests effect of priority on result"
  1013. grammar = """
  1014. start: ab_ b_ a_ | indirection
  1015. indirection: a_ bb_ a_
  1016. a_: "a"
  1017. b_: "b"
  1018. ab_: "ab"
  1019. bb_.1: "bb"
  1020. """
  1021. l = Lark(grammar, priority="invert")
  1022. res = l.parse('abba')
  1023. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1024. grammar = """
  1025. start: ab_ b_ a_ | indirection
  1026. indirection: a_ bb_ a_
  1027. a_: "a"
  1028. b_: "b"
  1029. ab_.1: "ab"
  1030. bb_: "bb"
  1031. """
  1032. l = Lark(grammar, priority="invert")
  1033. res = l.parse('abba')
  1034. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1035. grammar = """
  1036. start: ab_ b_ a_ | indirection
  1037. indirection: a_ bb_ a_
  1038. a_.2: "a"
  1039. b_.1: "b"
  1040. ab_.3: "ab"
  1041. bb_.3: "bb"
  1042. """
  1043. l = Lark(grammar, priority="invert")
  1044. res = l.parse('abba')
  1045. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1046. grammar = """
  1047. start: ab_ b_ a_ | indirection
  1048. indirection: a_ bb_ a_
  1049. a_.1: "a"
  1050. b_.1: "b"
  1051. ab_.4: "ab"
  1052. bb_.3: "bb"
  1053. """
  1054. l = Lark(grammar, priority="invert")
  1055. res = l.parse('abba')
  1056. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1057. def test_utf8(self):
  1058. g = u"""start: a
  1059. a: "±a"
  1060. """
  1061. l = _Lark(g)
  1062. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  1063. g = u"""start: A
  1064. A: "±a"
  1065. """
  1066. l = _Lark(g)
  1067. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  1068. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  1069. def test_ignore(self):
  1070. grammar = r"""
  1071. COMMENT: /(!|(\/\/))[^\n]*/
  1072. %ignore COMMENT
  1073. %import common.WS -> _WS
  1074. %import common.INT
  1075. start: "INT"i _WS+ INT _WS*
  1076. """
  1077. parser = _Lark(grammar)
  1078. tree = parser.parse("int 1 ! This is a comment\n")
  1079. self.assertEqual(tree.children, ['1'])
  1080. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  1081. self.assertEqual(tree.children, ['1'])
  1082. parser = _Lark(r"""
  1083. start : "a"*
  1084. %ignore "b"
  1085. """)
  1086. tree = parser.parse("bb")
  1087. self.assertEqual(tree.children, [])
  1088. def test_regex_escaping(self):
  1089. g = _Lark("start: /[ab]/")
  1090. g.parse('a')
  1091. g.parse('b')
  1092. self.assertRaises( UnexpectedInput, g.parse, 'c')
  1093. _Lark(r'start: /\w/').parse('a')
  1094. g = _Lark(r'start: /\\w/')
  1095. self.assertRaises( UnexpectedInput, g.parse, 'a')
  1096. g.parse(r'\w')
  1097. _Lark(r'start: /\[/').parse('[')
  1098. _Lark(r'start: /\//').parse('/')
  1099. _Lark(r'start: /\\/').parse('\\')
  1100. _Lark(r'start: /\[ab]/').parse('[ab]')
  1101. _Lark(r'start: /\\[ab]/').parse('\\a')
  1102. _Lark(r'start: /\t/').parse('\t')
  1103. _Lark(r'start: /\\t/').parse('\\t')
  1104. _Lark(r'start: /\\\t/').parse('\\\t')
  1105. _Lark(r'start: "\t"').parse('\t')
  1106. _Lark(r'start: "\\t"').parse('\\t')
  1107. _Lark(r'start: "\\\t"').parse('\\\t')
  1108. def test_ranged_repeat_rules(self):
  1109. g = u"""!start: "A"~3
  1110. """
  1111. l = _Lark(g)
  1112. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1113. self.assertRaises(ParseError, l.parse, u'AA')
  1114. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1115. g = u"""!start: "A"~0..2
  1116. """
  1117. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1118. l = _Lark(g)
  1119. self.assertEqual(l.parse(u''), Tree('start', []))
  1120. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1121. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1122. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1123. g = u"""!start: "A"~3..2
  1124. """
  1125. self.assertRaises(GrammarError, _Lark, g)
  1126. g = u"""!start: "A"~2..3 "B"~2
  1127. """
  1128. l = _Lark(g)
  1129. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1130. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1131. self.assertRaises(ParseError, l.parse, u'AAAB')
  1132. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1133. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1134. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1135. def test_ranged_repeat_terms(self):
  1136. g = u"""!start: AAA
  1137. AAA: "A"~3
  1138. """
  1139. l = _Lark(g)
  1140. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1141. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1142. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1143. g = u"""!start: AABB CC
  1144. AABB: "A"~0..2 "B"~2
  1145. CC: "C"~1..2
  1146. """
  1147. l = _Lark(g)
  1148. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1149. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1150. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1151. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1152. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1153. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1154. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1155. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1156. def test_priority_vs_embedded(self):
  1157. g = """
  1158. A.2: "a"
  1159. WORD: ("a".."z")+
  1160. start: (A | WORD)+
  1161. """
  1162. l = _Lark(g)
  1163. t = l.parse('abc')
  1164. self.assertEqual(t.children, ['a', 'bc'])
  1165. self.assertEqual(t.children[0].type, 'A')
  1166. def test_line_counting(self):
  1167. p = _Lark("start: /[^x]+/")
  1168. text = 'hello\nworld'
  1169. t = p.parse(text)
  1170. tok = t.children[0]
  1171. self.assertEqual(tok, text)
  1172. self.assertEqual(tok.line, 1)
  1173. self.assertEqual(tok.column, 1)
  1174. if _LEXER != 'dynamic':
  1175. self.assertEqual(tok.end_line, 2)
  1176. self.assertEqual(tok.end_column, 6)
  1177. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1178. def test_empty_end(self):
  1179. p = _Lark("""
  1180. start: b c d
  1181. b: "B"
  1182. c: | "C"
  1183. d: | "D"
  1184. """)
  1185. res = p.parse('B')
  1186. self.assertEqual(len(res.children), 3)
  1187. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1188. def test_maybe_placeholders(self):
  1189. # Anonymous tokens shouldn't count
  1190. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1191. self.assertEqual(p.parse("").children, [])
  1192. # All invisible constructs shouldn't count
  1193. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1194. A: "a"
  1195. _c: "c" """, maybe_placeholders=True)
  1196. self.assertEqual(p.parse("").children, [None])
  1197. self.assertEqual(p.parse("c").children, [None])
  1198. self.assertEqual(p.parse("aefc").children, ['a'])
  1199. # ? shouldn't apply
  1200. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1201. self.assertEqual(p.parse("").children, [None, None])
  1202. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1203. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1204. self.assertEqual(p.parse("").children, [None, None, None])
  1205. self.assertEqual(p.parse("a").children, ['a', None, None])
  1206. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1207. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1208. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1209. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1210. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1211. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1212. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1213. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1214. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1215. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1216. self.assertEqual(p.parse("babbcabcb").children,
  1217. [None, 'b', None,
  1218. 'a', 'b', None,
  1219. None, 'b', 'c',
  1220. 'a', 'b', 'c',
  1221. None, 'b', None])
  1222. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1223. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1224. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1225. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1226. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1227. def test_escaped_string(self):
  1228. "Tests common.ESCAPED_STRING"
  1229. grammar = r"""
  1230. start: ESCAPED_STRING+
  1231. %import common (WS_INLINE, ESCAPED_STRING)
  1232. %ignore WS_INLINE
  1233. """
  1234. parser = _Lark(grammar)
  1235. parser.parse(r'"\\" "b" "c"')
  1236. parser.parse(r'"That" "And a \"b"')
  1237. def test_meddling_unused(self):
  1238. "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"
  1239. grammar = """
  1240. start: EKS* x
  1241. x: EKS
  1242. unused: x*
  1243. EKS: "x"
  1244. """
  1245. parser = _Lark(grammar)
  1246. @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
  1247. def test_serialize(self):
  1248. grammar = """
  1249. start: _ANY b "C"
  1250. _ANY: /./
  1251. b: "B"
  1252. """
  1253. parser = _Lark(grammar)
  1254. d = parser.serialize()
  1255. parser2 = Lark.deserialize(d, {}, {})
  1256. self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1257. namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}
  1258. d, m = parser.memo_serialize(namespace.values())
  1259. parser3 = Lark.deserialize(d, namespace, m)
  1260. self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1261. def test_multi_start(self):
  1262. parser = _Lark('''
  1263. a: "x" "a"?
  1264. b: "x" "b"?
  1265. ''', start=['a', 'b'])
  1266. self.assertEqual(parser.parse('xa', 'a'), Tree('a', []))
  1267. self.assertEqual(parser.parse('xb', 'b'), Tree('b', []))
  1268. def test_lexer_detect_newline_tokens(self):
  1269. # Detect newlines in regular tokens
  1270. g = _Lark(r"""start: "go" tail*
  1271. !tail : SA "@" | SB "@" | SC "@" | SD "@"
  1272. SA : "a" /\n/
  1273. SB : /b./s
  1274. SC : "c" /[^a-z]/
  1275. SD : "d" /\s/
  1276. """)
  1277. a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children]
  1278. self.assertEqual(a.line, 2)
  1279. self.assertEqual(b.line, 3)
  1280. self.assertEqual(c.line, 4)
  1281. self.assertEqual(d.line, 5)
  1282. # Detect newlines in ignored tokens
  1283. for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
  1284. g = _Lark('''!start: "a" "a"
  1285. %ignore {}'''.format(re))
  1286. a, b = g.parse('a\na').children
  1287. self.assertEqual(a.line, 1)
  1288. self.assertEqual(b.line, 2)
  1289. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1290. _TestParser.__name__ = _NAME
  1291. globals()[_NAME] = _TestParser
  1292. # Note: You still have to import them in __main__ for the tests to run
  1293. _TO_TEST = [
  1294. ('standard', 'earley'),
  1295. ('standard', 'cyk'),
  1296. ('dynamic', 'earley'),
  1297. ('dynamic_complete', 'earley'),
  1298. ('standard', 'lalr'),
  1299. ('contextual', 'lalr'),
  1300. ('custom', 'lalr'),
  1301. # (None, 'earley'),
  1302. ]
  1303. for _LEXER, _PARSER in _TO_TEST:
  1304. _make_parser_test(_LEXER, _PARSER)
  1305. for _LEXER in ('dynamic', 'dynamic_complete'):
  1306. _make_full_earley_test(_LEXER)
  1307. if __name__ == '__main__':
  1308. unittest.main()