This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
2.9 KiB

  1. """
  2. Using lexer dynamic_complete
  3. ============================
  4. Demonstrates how to use ``lexer='dynamic_complete'`` and ``ambiguity='explicit'``
  5. Sometimes you have data that is highly ambiguous or 'broken' in some sense.
  6. When using ``parser='earley'`` and ``lexer='dynamic_complete'``, Lark will be able
  7. parse just about anything as long as there is a valid way to generate it from
  8. the Grammar, including looking 'into' the Regexes.
  9. This examples shows how to parse a json input where are quotes have been
  10. replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}``
  11. Notice that underscores might still appear inside strings, so a potentially
  12. valid reading of the above might in normal json be:
  13. ``{"foo_:{}, _bar": [], "baz": ""}``
  14. """
  15. from pprint import pprint
  16. from lark import Lark, Tree, Transformer, v_args
  17. from lark.visitors import Transformer_InPlace
  18. GRAMMAR = r"""
  19. %import common.SIGNED_NUMBER
  20. %import common.WS_INLINE
  21. %import common.NEWLINE
  22. %ignore WS_INLINE
  23. ?start: value
  24. ?value: object
  25. | array
  26. | string
  27. | SIGNED_NUMBER -> number
  28. | "true" -> true
  29. | "false" -> false
  30. | "null" -> null
  31. array : "[" [value ("," value)*] "]"
  32. object : "{" [pair ("," pair)*] "}"
  33. pair : string ":" value
  34. string: STRING
  35. STRING : ESCAPED_STRING
  36. ESCAPED_STRING: QUOTE_CHAR _STRING_ESC_INNER QUOTE_CHAR
  37. QUOTE_CHAR: "_"
  38. _STRING_INNER: /.*/
  39. _STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/
  40. """
  41. def score(tree: Tree):
  42. return sum(len(t.children) for t in tree.iter_subtrees())
  43. class RemoveAmbiguities(Transformer_InPlace):
  44. def _ambig(self, options):
  45. return max(options, key=score)
  46. class TreeToJson(Transformer):
  47. @v_args(inline=True)
  48. def string(self, s):
  49. return s[1:-1].replace('\\"', '"')
  50. array = list
  51. pair = tuple
  52. object = dict
  53. number = v_args(inline=True)(float)
  54. null = lambda self, _: None
  55. true = lambda self, _: True
  56. false = lambda self, _: False
  57. parser = Lark(GRAMMAR, parser='earley', ambiguity="explicit", lexer='dynamic_complete')
  58. EXAMPLES = [
  59. r'{_array_:[1,2,3]}',
  60. r'{_abc_: _array must be of the following format [_1_, _2_, _3_]_}',
  61. r'{_foo_:{}, _bar_: [], _baz_: __}',
  62. r'{_error_:_invalid_client_, _error_description_:_AADSTS7000215: Invalid '
  63. r'client secret is provided.\r\nTrace ID: '
  64. r'a0a0aaaa-a0a0-0a00-000a-00a00aaa0a00\r\nCorrelation ID: '
  65. r'aa0aaa00-0aaa-0000-00a0-00000aaaa0aa\r\nTimestamp: 1997-10-10 00:00:00Z_, '
  66. r'_error_codes_:[7000215], _timestamp_:_1997-10-10 00:00:00Z_, '
  67. r'_trace_id_:_a0a0aaaa-a0a0-0a00-000a-00a00aaa0a00_, '
  68. r'_correlation_id_:_aa0aaa00-0aaa-0000-00a0-00000aaaa0aa_, '
  69. r'_error_uri_:_https://example.com_}',
  70. ]
  71. for example in EXAMPLES:
  72. tree = parser.parse(example)
  73. tree = RemoveAmbiguities().transform(tree)
  74. result = TreeToJson().transform(tree)
  75. print('-' * 100)
  76. pprint(result)