This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

185 lines
4.5 KiB

  1. # coding=utf-8
  2. import json
  3. import unittest
  4. from unittest import TestCase
  5. from lark import Lark
  6. from lark.reconstruct import Reconstructor
  7. common = """
  8. %import common (WS_INLINE, NUMBER, WORD)
  9. %ignore WS_INLINE
  10. """
  11. def _remove_ws(s):
  12. return s.replace(' ', '').replace('\n','')
  13. class TestReconstructor(TestCase):
  14. def assert_reconstruct(self, grammar, code):
  15. parser = Lark(grammar, parser='lalr', maybe_placeholders=False)
  16. tree = parser.parse(code)
  17. new = Reconstructor(parser).reconstruct(tree)
  18. self.assertEqual(_remove_ws(code), _remove_ws(new))
  19. def test_starred_rule(self):
  20. g = """
  21. start: item*
  22. item: NL
  23. | rule
  24. rule: WORD ":" NUMBER
  25. NL: /(\\r?\\n)+\\s*/
  26. """ + common
  27. code = """
  28. Elephants: 12
  29. """
  30. self.assert_reconstruct(g, code)
  31. def test_starred_group(self):
  32. g = """
  33. start: (rule | NL)*
  34. rule: WORD ":" NUMBER
  35. NL: /(\\r?\\n)+\\s*/
  36. """ + common
  37. code = """
  38. Elephants: 12
  39. """
  40. self.assert_reconstruct(g, code)
  41. def test_alias(self):
  42. g = """
  43. start: line*
  44. line: NL
  45. | rule
  46. | "hello" -> hi
  47. rule: WORD ":" NUMBER
  48. NL: /(\\r?\\n)+\\s*/
  49. """ + common
  50. code = """
  51. Elephants: 12
  52. hello
  53. """
  54. self.assert_reconstruct(g, code)
  55. def test_keep_tokens(self):
  56. g = """
  57. start: (NL | stmt)*
  58. stmt: var op var
  59. !op: ("+" | "-" | "*" | "/")
  60. var: WORD
  61. NL: /(\\r?\\n)+\s*/
  62. """ + common
  63. code = """
  64. a+b
  65. """
  66. self.assert_reconstruct(g, code)
  67. def test_expand_rule(self):
  68. g = """
  69. ?start: (NL | mult_stmt)*
  70. ?mult_stmt: sum_stmt ["*" sum_stmt]
  71. ?sum_stmt: var ["+" var]
  72. var: WORD
  73. NL: /(\\r?\\n)+\s*/
  74. """ + common
  75. code = ['a', 'a*b', 'a+b', 'a*b+c', 'a+b*c', 'a+b*c+d']
  76. for c in code:
  77. self.assert_reconstruct(g, c)
  78. def test_json_example(self):
  79. test_json = '''
  80. {
  81. "empty_object" : {},
  82. "empty_array" : [],
  83. "booleans" : { "YES" : true, "NO" : false },
  84. "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ],
  85. "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ],
  86. "nothing" : null
  87. }
  88. '''
  89. json_grammar = r"""
  90. ?start: value
  91. ?value: object
  92. | array
  93. | string
  94. | SIGNED_NUMBER -> number
  95. | "true" -> true
  96. | "false" -> false
  97. | "null" -> null
  98. array : "[" [value ("," value)*] "]"
  99. object : "{" [pair ("," pair)*] "}"
  100. pair : string ":" value
  101. string : ESCAPED_STRING
  102. %import common.ESCAPED_STRING
  103. %import common.SIGNED_NUMBER
  104. %import common.WS
  105. %ignore WS
  106. """
  107. json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
  108. tree = json_parser.parse(test_json)
  109. new_json = Reconstructor(json_parser).reconstruct(tree)
  110. self.assertEqual(json.loads(new_json), json.loads(test_json))
  111. def test_switch_grammar_unicode_terminal(self):
  112. """
  113. This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed
  114. with a grammar that has unicode rules (or vice versa). The original bug assigned ANON terminals to unicode
  115. keywords, which offsets the ANON terminal count in the unicode grammar and causes subsequent identical ANON
  116. tokens (e.g., `+=`) to mis-match between the two grammars.
  117. """
  118. g1 = """
  119. start: (NL | stmt)*
  120. stmt: "keyword" var op var
  121. !op: ("+=" | "-=" | "*=" | "/=")
  122. var: WORD
  123. NL: /(\\r?\\n)+\s*/
  124. """ + common
  125. g2 = """
  126. start: (NL | stmt)*
  127. stmt: "குறிப்பு" var op var
  128. !op: ("+=" | "-=" | "*=" | "/=")
  129. var: WORD
  130. NL: /(\\r?\\n)+\s*/
  131. """ + common
  132. code = """
  133. keyword x += y
  134. """
  135. l1 = Lark(g1, parser='lalr')
  136. l2 = Lark(g2, parser='lalr')
  137. r = Reconstructor(l2)
  138. tree = l1.parse(code)
  139. code2 = r.reconstruct(tree)
  140. assert l2.parse(code2) == tree
  141. if __name__ == '__main__':
  142. unittest.main()