From ff588714c1184fb24d7a4325006240bb7e89b26d Mon Sep 17 00:00:00 2001 From: Rob Rose Date: Tue, 27 Mar 2018 15:56:39 -0400 Subject: [PATCH 1/6] Changed Lark standalone file extension - Reasoning addressed in Issue #116 - Renamed example and grammars file to use new extension. - Changed `.g` to new extension of `.lrk` in places where it is referenced. --- MANIFEST.in | 2 +- examples/{python2.g => python2.lrk} | 0 examples/{python3.g => python3.lrk} | 0 examples/python_parser.py | 4 ++-- examples/standalone/create_standalone.sh | 2 +- examples/standalone/{json.g => json.lrk} | 0 lark/grammars/{common.g => common.lrk} | 0 lark/load_grammar.py | 2 +- setup.py | 2 +- 9 files changed, 6 insertions(+), 6 deletions(-) rename examples/{python2.g => python2.lrk} (100%) rename examples/{python3.g => python3.lrk} (100%) rename examples/standalone/{json.g => json.lrk} (100%) rename lark/grammars/{common.g => common.lrk} (100%) diff --git a/MANIFEST.in b/MANIFEST.in index 5ee4903..8288fd6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.g tests/*.py tests/test_nearley/*.py tests/test_nearley/grammars/* +include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.lrk tests/*.py tests/test_nearley/*.py tests/test_nearley/grammars/* diff --git a/examples/python2.g b/examples/python2.lrk similarity index 100% rename from examples/python2.g rename to examples/python2.lrk diff --git a/examples/python3.g b/examples/python3.lrk similarity index 100% rename from examples/python3.g rename to examples/python3.lrk diff --git a/examples/python_parser.py b/examples/python_parser.py index d953a79..d14dacc 100644 --- a/examples/python_parser.py +++ b/examples/python_parser.py @@ -21,8 +21,8 @@ class PythonIndenter(Indenter): tab_len = 8 -grammar2_filename = os.path.join(__path__, 'python2.g') -grammar3_filename = os.path.join(__path__, 'python3.g') +grammar2_filename = os.path.join(__path__, 'python2.lrk') +grammar3_filename = os.path.join(__path__, 'python3.lrk') with open(grammar2_filename) as f: python_parser2 = Lark(f, parser='lalr', postlex=PythonIndenter(), start='file_input') with open(grammar3_filename) as f: diff --git a/examples/standalone/create_standalone.sh b/examples/standalone/create_standalone.sh index 1eba3a4..f5001fe 100755 --- a/examples/standalone/create_standalone.sh +++ b/examples/standalone/create_standalone.sh @@ -1 +1 @@ -python -m lark.tools.standalone json.g > json_parser.py +python -m lark.tools.standalone json.lrk > json_parser.py diff --git a/examples/standalone/json.g b/examples/standalone/json.lrk similarity index 100% rename from examples/standalone/json.g rename to examples/standalone/json.lrk diff --git a/lark/grammars/common.g b/lark/grammars/common.lrk similarity index 100% rename from lark/grammars/common.g rename to lark/grammars/common.lrk diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 1637514..cf74199 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -625,7 +625,7 @@ class GrammarLoader: elif stmt.data == 'import': dotted_path = stmt.children[0].children name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] - grammar_path = os.path.join(*dotted_path[:-1]) + '.g' + grammar_path = os.path.join(*dotted_path[:-1]) + '.lrk' g = import_grammar(grammar_path) token_options = dict(g.token_defs)[dotted_path[-1]] assert isinstance(token_options, tuple) and len(token_options)==2 diff --git a/setup.py b/setup.py index 430ae5c..978b370 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( requires = [], install_requires = [], - package_data = { '': ['*.md', '*.g'] }, + package_data = { '': ['*.md', '*.lrk'] }, test_suite = 'tests.__main__', From ca3d4ca6f4d3da667df1f7f216414469cac66af1 Mon Sep 17 00:00:00 2001 From: Rob Rose Date: Sat, 14 Apr 2018 20:27:39 -0400 Subject: [PATCH 2/6] Changing changes from .lrk to .lark --- MANIFEST.in | 2 +- examples/{python2.lrk => python2.lark} | 0 examples/{python3.lrk => python3.lark} | 0 examples/python_parser.py | 4 ++-- examples/standalone/create_standalone.sh | 2 +- examples/standalone/{json.lrk => json.lark} | 0 lark/grammars/{common.lrk => common.lark} | 0 lark/load_grammar.py | 2 +- setup.py | 2 +- 9 files changed, 6 insertions(+), 6 deletions(-) rename examples/{python2.lrk => python2.lark} (100%) rename examples/{python3.lrk => python3.lark} (100%) rename examples/standalone/{json.lrk => json.lark} (100%) rename lark/grammars/{common.lrk => common.lark} (100%) diff --git a/MANIFEST.in b/MANIFEST.in index 8288fd6..019e37a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.lrk tests/*.py tests/test_nearley/*.py tests/test_nearley/grammars/* +include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.lark tests/*.py tests/test_nearley/*.py tests/test_nearley/grammars/* diff --git a/examples/python2.lrk b/examples/python2.lark similarity index 100% rename from examples/python2.lrk rename to examples/python2.lark diff --git a/examples/python3.lrk b/examples/python3.lark similarity index 100% rename from examples/python3.lrk rename to examples/python3.lark diff --git a/examples/python_parser.py b/examples/python_parser.py index d14dacc..9b3a978 100644 --- a/examples/python_parser.py +++ b/examples/python_parser.py @@ -21,8 +21,8 @@ class PythonIndenter(Indenter): tab_len = 8 -grammar2_filename = os.path.join(__path__, 'python2.lrk') -grammar3_filename = os.path.join(__path__, 'python3.lrk') +grammar2_filename = os.path.join(__path__, 'python2.lark') +grammar3_filename = os.path.join(__path__, 'python3.lark') with open(grammar2_filename) as f: python_parser2 = Lark(f, parser='lalr', postlex=PythonIndenter(), start='file_input') with open(grammar3_filename) as f: diff --git a/examples/standalone/create_standalone.sh b/examples/standalone/create_standalone.sh index f5001fe..a4fa879 100755 --- a/examples/standalone/create_standalone.sh +++ b/examples/standalone/create_standalone.sh @@ -1 +1 @@ -python -m lark.tools.standalone json.lrk > json_parser.py +python -m lark.tools.standalone json.lark > json_parser.py diff --git a/examples/standalone/json.lrk b/examples/standalone/json.lark similarity index 100% rename from examples/standalone/json.lrk rename to examples/standalone/json.lark diff --git a/lark/grammars/common.lrk b/lark/grammars/common.lark similarity index 100% rename from lark/grammars/common.lrk rename to lark/grammars/common.lark diff --git a/lark/load_grammar.py b/lark/load_grammar.py index cf74199..ebecc46 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -625,7 +625,7 @@ class GrammarLoader: elif stmt.data == 'import': dotted_path = stmt.children[0].children name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] - grammar_path = os.path.join(*dotted_path[:-1]) + '.lrk' + grammar_path = os.path.join(*dotted_path[:-1]) + '.lark' g = import_grammar(grammar_path) token_options = dict(g.token_defs)[dotted_path[-1]] assert isinstance(token_options, tuple) and len(token_options)==2 diff --git a/setup.py b/setup.py index 978b370..8543fd4 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( requires = [], install_requires = [], - package_data = { '': ['*.md', '*.lrk'] }, + package_data = { '': ['*.md', '*.lark'] }, test_suite = 'tests.__main__', From 51644a6c584eb9833af71c40198fdc5d8a99c904 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Wed, 25 Apr 2018 19:06:33 +0300 Subject: [PATCH 3/6] Added examples/lark.g - Reference implementation of the Lark grammar (inspired by issue #116) --- examples/README.md | 1 + examples/lark.g | 49 ++++++++++++++++++++++++++++++++++++++++ examples/lark_grammar.py | 18 +++++++++++++++ lark/grammars/common.g | 1 + lark/load_grammar.py | 8 ++++++- 5 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 examples/lark.g create mode 100644 examples/lark_grammar.py diff --git a/examples/README.md b/examples/README.md index 3fbe3ea..25bf504 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,6 +7,7 @@ - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. +- [lark\_grammar.py](lark_grammar.py) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) ### Advanced diff --git a/examples/lark.g b/examples/lark.g new file mode 100644 index 0000000..1fbf592 --- /dev/null +++ b/examples/lark.g @@ -0,0 +1,49 @@ +start: (_item | _NL)* + +_item: rule + | token + | statement + +rule: RULE priority? ":" expansions _NL +token: TOKEN priority? ":" expansions _NL + +priority: "." NUMBER + +statement: "%ignore" expansions _NL -> ignore + | "%import" import_args ["->" TOKEN] _NL -> import + +import_args: name ("." name)* + +?expansions: alias (_VBAR alias)* + +?alias: expansion ["->" RULE] + +?expansion: expr* + +?expr: atom [OP | "~" NUMBER [".." NUMBER]] + +?atom: "(" expansions ")" + | "[" expansions "]" -> maybe + | STRING ".." STRING -> literal_range + | name + | (REGEXP | STRING) -> literal + +name: RULE + | TOKEN + +_VBAR: _NL? "|" +OP: /[+*][?]?|[?](?![a-z])/ +RULE: /!?[_?]?[a-z][_a-z0-9]*/ +TOKEN: /_?[A-Z][_A-Z0-9]*/ +STRING: _STRING "i"? +REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/ +_NL: /(\r?\n)+\s*/ + +%import common.ESCAPED_STRING -> _STRING +%import common.INT -> NUMBER +%import common.WS_INLINE + +COMMENT: "//" /[^\n]/* + +%ignore WS_INLINE +%ignore COMMENT diff --git a/examples/lark_grammar.py b/examples/lark_grammar.py new file mode 100644 index 0000000..88fc4cf --- /dev/null +++ b/examples/lark_grammar.py @@ -0,0 +1,18 @@ +from lark import Lark + +parser = Lark(open('examples/lark.g'), parser="lalr") + +grammar_files = [ + 'examples/python2.g', + 'examples/python3.g', + 'examples/lark.g', + 'lark/grammars/common.g', +] + +def test(): + for grammar_file in grammar_files: + tree = parser.parse(open(grammar_file).read()) + print("All grammars parsed successfully") + +if __name__ == '__main__': + test() diff --git a/lark/grammars/common.g b/lark/grammars/common.g index 2bd02d0..8bc8079 100644 --- a/lark/grammars/common.g +++ b/lark/grammars/common.g @@ -20,6 +20,7 @@ SIGNED_NUMBER: ["+"|"-"] NUMBER // // Strings // +//STRING: /"(\\\"|\\\\|[^"\n])*?"i?/ STRING_INNER: ("\\\""|/[^"]/) ESCAPED_STRING: "\"" STRING_INNER* "\"" diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 43d1bf5..13aeff0 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -122,7 +122,7 @@ RULES = { 'statement': ['ignore', 'import'], 'ignore': ['_IGNORE expansions _NL'], 'import': ['_IMPORT import_args _NL', - '_IMPORT import_args _TO TOKEN'], + '_IMPORT import_args _TO TOKEN _NL'], 'import_args': ['_import_args'], '_import_args': ['name', '_import_args _DOT name'], @@ -375,6 +375,7 @@ class TokenTreeToPattern(Transformer): return p def expansion(self, items): + assert items if len(items) == 1: return items[0] if len({i.flags for i in items}) > 1: @@ -486,6 +487,11 @@ class Grammar: # Convert token-trees to strings/regexps transformer = PrepareLiterals() * TokenTreeToPattern() + for name, (token_tree, priority) in token_defs: + for t in token_tree.find_data('expansion'): + if not t.children: + raise GrammarError("Tokens cannot be empty (%s)" % name) + tokens = [TokenDef(name, transformer.transform(token_tree), priority) for name, (token_tree, priority) in token_defs] From 0a40137ac79d9bfbc477e2d4c443b8503d3e28da Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Wed, 25 Apr 2018 19:09:50 +0300 Subject: [PATCH 4/6] Update README.md --- examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index 25bf504..0951c86 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,7 +7,7 @@ - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. -- [lark\_grammar.py](lark_grammar.py) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) +- [lark\_grammar.py](lark_grammar.py) + [lark.g](lark.g) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) ### Advanced From 836735211fffa7e45015781aad2349f81dfe622c Mon Sep 17 00:00:00 2001 From: Rob Rose Date: Wed, 25 Apr 2018 14:28:12 -0400 Subject: [PATCH 5/6] Resolved upstream changes to use new file-extension - @erezsh added the new Lark reference and some other things using the current .g extension, so I wanted to resolve them for PR #117 - Renamed lark.g to lark.lark. - Changed lark_grammar.py to use .lark file extensions. - Changed urls that used .g to use .lark. --- docs/json_tutorial.md | 2 +- examples/README.md | 2 +- examples/{lark.g => lark.lark} | 0 examples/lark_grammar.py | 10 +++++----- 4 files changed, 7 insertions(+), 7 deletions(-) rename examples/{lark.g => lark.lark} (100%) diff --git a/docs/json_tutorial.md b/docs/json_tutorial.md index 9f3fbf1..96e76fb 100644 --- a/docs/json_tutorial.md +++ b/docs/json_tutorial.md @@ -79,7 +79,7 @@ By the way, if you're curious what these terminals signify, they are roughly equ Lark will accept this, if you really want to complicate your life :) -(You can find the original definitions in [common.g](/lark/grammars/common.g).) +(You can find the original definitions in [common.lark](/lark/grammars/common.lark).) Notice that terminals are written in UPPER-CASE, while rules are written in lower-case. I'll touch more on the differences between rules and terminals later. diff --git a/examples/README.md b/examples/README.md index 0951c86..ef150ad 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,7 +7,7 @@ - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. -- [lark\_grammar.py](lark_grammar.py) + [lark.g](lark.g) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) +- [lark\_grammar.py](lark_grammar.py) + [lark.lark](lark.lark) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) ### Advanced diff --git a/examples/lark.g b/examples/lark.lark similarity index 100% rename from examples/lark.g rename to examples/lark.lark diff --git a/examples/lark_grammar.py b/examples/lark_grammar.py index 88fc4cf..30ccc8b 100644 --- a/examples/lark_grammar.py +++ b/examples/lark_grammar.py @@ -1,12 +1,12 @@ from lark import Lark -parser = Lark(open('examples/lark.g'), parser="lalr") +parser = Lark(open('examples/lark.lark'), parser="lalr") grammar_files = [ - 'examples/python2.g', - 'examples/python3.g', - 'examples/lark.g', - 'lark/grammars/common.g', + 'examples/python2.lark', + 'examples/python3.lark', + 'examples/lark.lark', + 'lark/grammars/common.lark', ] def test(): From 5a6e60456026e9ab6a8feac967845170625a7001 Mon Sep 17 00:00:00 2001 From: Rob Rose Date: Sat, 5 May 2018 22:25:20 -0400 Subject: [PATCH 6/6] Added .gitignore for pyenv --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cdb93cd --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.python-version