| @@ -0,0 +1,20 @@ | |||||
| # Minimal makefile for Sphinx documentation | |||||
| # | |||||
| # You can set these variables from the command line. | |||||
| SPHINXOPTS = | |||||
| SPHINXBUILD = sphinx-build | |||||
| SPHINXPROJ = Lark | |||||
| SOURCEDIR = . | |||||
| BUILDDIR = _build | |||||
| # Put it first so that "make" without argument is like "make help". | |||||
| help: | |||||
| @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) | |||||
| .PHONY: help Makefile | |||||
| # Catch-all target: route all unknown targets to Sphinx using the new | |||||
| # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). | |||||
| %: Makefile | |||||
| @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) | |||||
| @@ -2,6 +2,8 @@ | |||||
| This page details the important classes in Lark. | This page details the important classes in Lark. | ||||
| **TODO** convert to sphinx autodoc! | |||||
| ---- | ---- | ||||
| ## lark.Lark | ## lark.Lark | ||||
| @@ -0,0 +1,177 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| # | |||||
| # Lark documentation build configuration file, created by | |||||
| # sphinx-quickstart on Sun Aug 16 13:09:41 2020. | |||||
| # | |||||
| # This file is execfile()d with the current directory set to its | |||||
| # containing dir. | |||||
| # | |||||
| # Note that not all possible configuration values are present in this | |||||
| # autogenerated file. | |||||
| # | |||||
| # All configuration values have a default; values that are commented out | |||||
| # serve to show the default. | |||||
| # If extensions (or modules to document with autodoc) are in another directory, | |||||
| # add these directories to sys.path here. If the directory is relative to the | |||||
| # documentation root, use os.path.abspath to make it absolute, like shown here. | |||||
| # | |||||
| # import os | |||||
| # import sys | |||||
| # sys.path.insert(0, os.path.abspath('.')) | |||||
| # -- General configuration ------------------------------------------------ | |||||
| # If your documentation needs a minimal Sphinx version, state it here. | |||||
| # | |||||
| # needs_sphinx = '1.0' | |||||
| # Add any Sphinx extension module names here, as strings. They can be | |||||
| # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom | |||||
| # ones. | |||||
| extensions = [ | |||||
| 'sphinx.ext.autodoc', | |||||
| 'sphinx.ext.coverage', | |||||
| 'recommonmark' | |||||
| ] | |||||
| # Add any paths that contain templates here, relative to this directory. | |||||
| templates_path = ['_templates'] | |||||
| # The suffix(es) of source filenames. | |||||
| # You can specify multiple suffix as a list of string: | |||||
| # | |||||
| # source_suffix = ['.rst', '.md'] | |||||
| source_suffix = { | |||||
| '.rst': 'restructuredtext', | |||||
| '.md': 'markdown' | |||||
| } | |||||
| # The master toctree document. | |||||
| master_doc = 'index' | |||||
| # General information about the project. | |||||
| project = 'Lark' | |||||
| copyright = '2020, Erez Shinan' | |||||
| author = 'Erez Shinan' | |||||
| # The version info for the project you're documenting, acts as replacement for | |||||
| # |version| and |release|, also used in various other places throughout the | |||||
| # built documents. | |||||
| # | |||||
| # The short X.Y version. | |||||
| version = '' | |||||
| # The full version, including alpha/beta/rc tags. | |||||
| release = '' | |||||
| # The language for content autogenerated by Sphinx. Refer to documentation | |||||
| # for a list of supported languages. | |||||
| # | |||||
| # This is also used if you do content translation via gettext catalogs. | |||||
| # Usually you set "language" from the command line for these cases. | |||||
| language = None | |||||
| # List of patterns, relative to source directory, that match files and | |||||
| # directories to ignore when looking for source files. | |||||
| # This patterns also effect to html_static_path and html_extra_path | |||||
| exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] | |||||
| # The name of the Pygments (syntax highlighting) style to use. | |||||
| pygments_style = 'sphinx' | |||||
| # If true, `todo` and `todoList` produce output, else they produce nothing. | |||||
| todo_include_todos = False | |||||
| # -- Options for HTML output ---------------------------------------------- | |||||
| # The theme to use for HTML and HTML Help pages. See the documentation for | |||||
| # a list of builtin themes. | |||||
| # | |||||
| html_theme = 'sphinx_rtd_theme' | |||||
| # Theme options are theme-specific and customize the look and feel of a theme | |||||
| # further. For a list of options available for each theme, see the | |||||
| # documentation. | |||||
| # | |||||
| # html_theme_options = {} | |||||
| # Add any paths that contain custom static files (such as style sheets) here, | |||||
| # relative to this directory. They are copied after the builtin static files, | |||||
| # so a file named "default.css" will overwrite the builtin "default.css". | |||||
| html_static_path = ['_static'] | |||||
| # Custom sidebar templates, must be a dictionary that maps document names | |||||
| # to template names. | |||||
| # | |||||
| # This is required for the alabaster theme | |||||
| # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars | |||||
| html_sidebars = { | |||||
| '**': [ | |||||
| 'relations.html', # needs 'show_related': True theme option to display | |||||
| 'searchbox.html', | |||||
| ] | |||||
| } | |||||
| # -- Options for HTMLHelp output ------------------------------------------ | |||||
| # Output file base name for HTML help builder. | |||||
| htmlhelp_basename = 'Larkdoc' | |||||
| # -- Options for LaTeX output --------------------------------------------- | |||||
| latex_elements = { | |||||
| # The paper size ('letterpaper' or 'a4paper'). | |||||
| # | |||||
| # 'papersize': 'letterpaper', | |||||
| # The font size ('10pt', '11pt' or '12pt'). | |||||
| # | |||||
| # 'pointsize': '10pt', | |||||
| # Additional stuff for the LaTeX preamble. | |||||
| # | |||||
| # 'preamble': '', | |||||
| # Latex figure (float) alignment | |||||
| # | |||||
| # 'figure_align': 'htbp', | |||||
| } | |||||
| # Grouping the document tree into LaTeX files. List of tuples | |||||
| # (source start file, target name, title, | |||||
| # author, documentclass [howto, manual, or own class]). | |||||
| latex_documents = [ | |||||
| (master_doc, 'Lark.tex', 'Lark Documentation', | |||||
| 'Erez Shinan', 'manual'), | |||||
| ] | |||||
| # -- Options for manual page output --------------------------------------- | |||||
| # One entry per manual page. List of tuples | |||||
| # (source start file, name, description, authors, manual section). | |||||
| man_pages = [ | |||||
| (master_doc, 'lark', 'Lark Documentation', | |||||
| [author], 1) | |||||
| ] | |||||
| # -- Options for Texinfo output ------------------------------------------- | |||||
| # Grouping the document tree into Texinfo files. List of tuples | |||||
| # (source start file, target name, title, author, | |||||
| # dir menu entry, description, category) | |||||
| texinfo_documents = [ | |||||
| (master_doc, 'Lark', 'Lark Documentation', | |||||
| author, 'Lark', 'One line description of project.', | |||||
| 'Miscellaneous'), | |||||
| ] | |||||
| @@ -1,4 +1,6 @@ | |||||
| # Main Features | |||||
| # Features | |||||
| ## Main Features | |||||
| - Earley parser, capable of parsing any context-free grammar | - Earley parser, capable of parsing any context-free grammar | ||||
| - Implements SPPF, for efficient parsing and storing of ambiguous grammars. | - Implements SPPF, for efficient parsing and storing of ambiguous grammars. | ||||
| - LALR(1) parser, limited in power of expression, but very efficient in space and performance (O(n)). | - LALR(1) parser, limited in power of expression, but very efficient in space and performance (O(n)). | ||||
| @@ -18,10 +20,10 @@ | |||||
| [Read more about the parsers](parsers.md) | [Read more about the parsers](parsers.md) | ||||
| # Extra features | |||||
| ## Extra features | |||||
| - Import rules and tokens from other Lark grammars, for code reuse and modularity. | - Import rules and tokens from other Lark grammars, for code reuse and modularity. | ||||
| - Import grammars from Nearley.js ([read more](/docs/nearley.md)) | |||||
| - Import grammars from Nearley.js ([read more](nearley.md)) | |||||
| - CYK parser | - CYK parser | ||||
| ### Experimental features | ### Experimental features | ||||
| @@ -1,13 +1,5 @@ | |||||
| # Grammar Reference | # Grammar Reference | ||||
| Table of contents: | |||||
| 1. [Definitions](#defs) | |||||
| 1. [Terminals](#terms) | |||||
| 1. [Rules](#rules) | |||||
| 1. [Directives](#dirs) | |||||
| <a name="defs"></a> | |||||
| ## Definitions | ## Definitions | ||||
| A **grammar** is a list of rules and terminals, that together define a language. | A **grammar** is a list of rules and terminals, that together define a language. | ||||
| @@ -20,7 +12,7 @@ Each rule is a list of terminals and rules, whose location and nesting define th | |||||
| A **parsing algorithm** is an algorithm that takes a grammar definition and a sequence of symbols (members of the alphabet), and matches the entirety of the sequence by searching for a structure that is allowed by the grammar. | A **parsing algorithm** is an algorithm that takes a grammar definition and a sequence of symbols (members of the alphabet), and matches the entirety of the sequence by searching for a structure that is allowed by the grammar. | ||||
| ## General Syntax and notes | |||||
| ### General Syntax and notes | |||||
| Grammars in Lark are based on [EBNF](https://en.wikipedia.org/wiki/Extended_Backus–Naur_form) syntax, with several enhancements. | Grammars in Lark are based on [EBNF](https://en.wikipedia.org/wiki/Extended_Backus–Naur_form) syntax, with several enhancements. | ||||
| @@ -58,7 +50,6 @@ Lark begins the parse with the rule 'start', unless specified otherwise in the o | |||||
| Names of rules are always in lowercase, while names of terminals are always in uppercase. This distinction has practical effects, for the shape of the generated parse-tree, and the automatic construction of the lexer (aka tokenizer, or scanner). | Names of rules are always in lowercase, while names of terminals are always in uppercase. This distinction has practical effects, for the shape of the generated parse-tree, and the automatic construction of the lexer (aka tokenizer, or scanner). | ||||
| <a name="terms"></a> | |||||
| ## Terminals | ## Terminals | ||||
| Terminals are used to match text into symbols. They can be defined as a combination of literals and other terminals. | Terminals are used to match text into symbols. They can be defined as a combination of literals and other terminals. | ||||
| @@ -190,7 +181,6 @@ _ambig | |||||
| ``` | ``` | ||||
| <a name="rules"></a> | |||||
| ## Rules | ## Rules | ||||
| **Syntax:** | **Syntax:** | ||||
| @@ -22,11 +22,11 @@ Of course, some specific use-cases may deviate from this process. Feel free to s | |||||
| Browse the [Examples](https://github.com/lark-parser/lark/tree/master/examples) to find a template that suits your purposes. | Browse the [Examples](https://github.com/lark-parser/lark/tree/master/examples) to find a template that suits your purposes. | ||||
| Read the tutorials to get a better understanding of how everything works. (links in the [main page](/)) | |||||
| Read the tutorials to get a better understanding of how everything works. (links in the [main page](/index)) | |||||
| Use the [Cheatsheet (PDF)](lark_cheatsheet.pdf) for quick reference. | |||||
| Use the [Cheatsheet (PDF)](/_static/lark_cheatsheet.pdf) for quick reference. | |||||
| Use the reference pages for more in-depth explanations. (links in the [main page](/)] | |||||
| Use the reference pages for more in-depth explanations. (links in the [main page](/index)] | |||||
| ## LALR usage | ## LALR usage | ||||
| @@ -1,55 +0,0 @@ | |||||
| # Lark | |||||
| A modern parsing library for Python | |||||
| ## Overview | |||||
| Lark can parse any context-free grammar. | |||||
| Lark provides: | |||||
| - Advanced grammar language, based on EBNF | |||||
| - Three parsing algorithms to choose from: Earley, LALR(1) and CYK | |||||
| - Automatic tree construction, inferred from your grammar | |||||
| - Fast unicode lexer with regexp support, and automatic line-counting | |||||
| Lark's code is hosted on Github: [https://github.com/lark-parser/lark](https://github.com/lark-parser/lark) | |||||
| ### Install | |||||
| ```bash | |||||
| $ pip install lark-parser | |||||
| ``` | |||||
| #### Syntax Highlighting | |||||
| - [Sublime Text & TextMate](https://github.com/lark-parser/lark_syntax) | |||||
| - [Visual Studio Code](https://github.com/lark-parser/vscode-lark) (Or install through the vscode plugin system) | |||||
| - [Intellij & PyCharm](https://github.com/lark-parser/intellij-syntax-highlighting) | |||||
| ----- | |||||
| ## Documentation Index | |||||
| * [Philosophy & Design Choices](philosophy.md) | |||||
| * [Full List of Features](features.md) | |||||
| * [Examples](https://github.com/lark-parser/lark/tree/master/examples) | |||||
| * [Online IDE](https://lark-parser.github.io/lark/ide/app.html) | |||||
| * Tutorials | |||||
| * [How to write a DSL](http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/) - Implements a toy LOGO-like language with an interpreter | |||||
| * [How to write a JSON parser](json_tutorial.md) - Teaches you how to use Lark | |||||
| * Unofficial | |||||
| * [Program Synthesis is Possible](https://www.cs.cornell.edu/~asampson/blog/minisynth.html) - Creates a DSL for Z3 | |||||
| * Guides | |||||
| * [How to use Lark](how_to_use.md) | |||||
| * [How to develop Lark](how_to_develop.md) | |||||
| * Reference | |||||
| * [Grammar](grammar.md) | |||||
| * [Tree Construction](tree_construction.md) | |||||
| * [Visitors & Transformers](visitors.md) | |||||
| * [Classes](classes.md) | |||||
| * [Cheatsheet (PDF)](lark_cheatsheet.pdf) | |||||
| * [Importing grammars from Nearley](nearley.md) | |||||
| * Discussion | |||||
| * [Gitter](https://gitter.im/lark-parser/Lobby) | |||||
| * [Forum (Google Groups)](https://groups.google.com/forum/#!forum/lark-parser) | |||||
| @@ -0,0 +1,64 @@ | |||||
| .. Lark documentation master file, created by | |||||
| sphinx-quickstart on Sun Aug 16 13:09:41 2020. | |||||
| You can adapt this file completely to your liking, but it should at least | |||||
| contain the root `toctree` directive. | |||||
| Welcome to Lark's documentation! | |||||
| ================================ | |||||
| .. toctree:: | |||||
| :maxdepth: 2 | |||||
| :hidden: | |||||
| philosophy | |||||
| features | |||||
| parsers | |||||
| .. toctree:: | |||||
| :maxdepth: 2 | |||||
| :caption: Tutorials & Guides | |||||
| :hidden: | |||||
| json_tutorial | |||||
| how_to_use | |||||
| how_to_develop | |||||
| nearley | |||||
| recipes | |||||
| .. toctree:: | |||||
| :maxdepth: 2 | |||||
| :caption: Reference | |||||
| :hidden: | |||||
| grammar | |||||
| tree_construction | |||||
| visitors | |||||
| classes | |||||
| Lark is a modern parsing library for Python. Lark can parse any context-free grammar. | |||||
| Lark provides: | |||||
| - Advanced grammar language, based on EBNF | |||||
| - Three parsing algorithms to choose from: Earley, LALR(1) and CYK | |||||
| - Automatic tree construction, inferred from your grammar | |||||
| - Fast unicode lexer with regexp support, and automatic line-counting | |||||
| **Install Lark**: | |||||
| .. code:: bash | |||||
| $ pip install lark-parser | |||||
| **Syntax Highlighting**: | |||||
| - `Sublime Text & TextMate`_ | |||||
| - `Visual Studio Code`_ (Or install through the vscode plugin system) | |||||
| - `Intellij & PyCharm`_ | |||||
| .. _Sublime Text & TextMate: https://github.com/lark-parser/lark_syntax | |||||
| .. _Visual Studio Code: https://github.com/lark-parser/vscode-lark | |||||
| .. _Intellij & PyCharm: https://github.com/lark-parser/intellij-syntax-highlighting | |||||
| @@ -1,7 +1,6 @@ | |||||
| # Lark Tutorial - JSON parser | |||||
| # JSON parser - Tutorial | |||||
| Lark is a parser - a program that accepts a grammar and text, and produces a structured tree that represents that text. | Lark is a parser - a program that accepts a grammar and text, and produces a structured tree that represents that text. | ||||
| In this tutorial we will write a JSON parser in Lark, and explore Lark's various features in the process. | In this tutorial we will write a JSON parser in Lark, and explore Lark's various features in the process. | ||||
| It has 5 parts. | It has 5 parts. | ||||
| @@ -0,0 +1,36 @@ | |||||
| @ECHO OFF | |||||
| pushd %~dp0 | |||||
| REM Command file for Sphinx documentation | |||||
| if "%SPHINXBUILD%" == "" ( | |||||
| set SPHINXBUILD=sphinx-build | |||||
| ) | |||||
| set SOURCEDIR=. | |||||
| set BUILDDIR=_build | |||||
| set SPHINXPROJ=Lark | |||||
| if "%1" == "" goto help | |||||
| %SPHINXBUILD% >NUL 2>NUL | |||||
| if errorlevel 9009 ( | |||||
| echo. | |||||
| echo.The 'sphinx-build' command was not found. Make sure you have Sphinx | |||||
| echo.installed, then set the SPHINXBUILD environment variable to point | |||||
| echo.to the full path of the 'sphinx-build' executable. Alternatively you | |||||
| echo.may add the Sphinx directory to PATH. | |||||
| echo. | |||||
| echo.If you don't have Sphinx installed, grab it from | |||||
| echo.http://sphinx-doc.org/ | |||||
| exit /b 1 | |||||
| ) | |||||
| %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% | |||||
| goto end | |||||
| :help | |||||
| %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% | |||||
| :end | |||||
| popd | |||||
| @@ -1,7 +1,7 @@ | |||||
| # Parsers | |||||
| Lark implements the following parsing algorithms: Earley, LALR(1), and CYK | Lark implements the following parsing algorithms: Earley, LALR(1), and CYK | ||||
| # Earley | |||||
| ## Earley | |||||
| An [Earley Parser](https://www.wikiwand.com/en/Earley_parser) is a chart parser capable of parsing any context-free grammar at O(n^3), and O(n^2) when the grammar is unambiguous. It can parse most LR grammars at O(n). Most programming languages are LR, and can be parsed at a linear time. | An [Earley Parser](https://www.wikiwand.com/en/Earley_parser) is a chart parser capable of parsing any context-free grammar at O(n^3), and O(n^2) when the grammar is unambiguous. It can parse most LR grammars at O(n). Most programming languages are LR, and can be parsed at a linear time. | ||||
| @@ -30,7 +30,7 @@ Lark provides the following options to combat ambiguity: | |||||
| **TODO: Add documentation on dynamic_complete** | **TODO: Add documentation on dynamic_complete** | ||||
| # LALR(1) | |||||
| ## LALR(1) | |||||
| [LALR(1)](https://www.wikiwand.com/en/LALR_parser) is a very efficient, true-and-tested parsing algorithm. It's incredibly fast and requires very little memory. It can parse most programming languages (For example: Python and Java). | [LALR(1)](https://www.wikiwand.com/en/LALR_parser) is a very efficient, true-and-tested parsing algorithm. It's incredibly fast and requires very little memory. It can parse most programming languages (For example: Python and Java). | ||||
| @@ -42,7 +42,7 @@ The contextual lexer communicates with the parser, and uses the parser's lookahe | |||||
| This is an improvement to LALR(1) that is unique to Lark. | This is an improvement to LALR(1) that is unique to Lark. | ||||
| # CYK Parser | |||||
| ## CYK Parser | |||||
| A [CYK parser](https://www.wikiwand.com/en/CYK_algorithm) can parse any context-free grammar at O(n^3*|G|). | A [CYK parser](https://www.wikiwand.com/en/CYK_algorithm) can parse any context-free grammar at O(n^3*|G|). | ||||
| @@ -4,7 +4,7 @@ Parsers are innately complicated and confusing. They're difficult to understand, | |||||
| Lark's mission is to make the process of writing them as simple and abstract as possible, by following these design principles: | Lark's mission is to make the process of writing them as simple and abstract as possible, by following these design principles: | ||||
| ### Design Principles | |||||
| ## Design Principles | |||||
| 1. Readability matters | 1. Readability matters | ||||
| @@ -23,7 +23,7 @@ In accordance with these principles, I arrived at the following design choices: | |||||
| ----------- | ----------- | ||||
| # Design Choices | |||||
| ## Design Choices | |||||
| ### 1. Separation of code and grammar | ### 1. Separation of code and grammar | ||||
| @@ -1,4 +1,4 @@ | |||||
| # Automatic Tree Construction - Reference | |||||
| # Tree Construction Reference | |||||
| Lark builds a tree automatically based on the structure of the grammar, where each rule that is matched becomes a branch (node) in the tree, and its children are its matches, in the order of matching. | Lark builds a tree automatically based on the structure of the grammar, where each rule that is matched becomes a branch (node) in the tree, and its children are its matches, in the order of matching. | ||||
| @@ -13,7 +13,7 @@ If `maybe_placeholders=False` (the default), then `[]` behaves like `()?`. | |||||
| If `maybe_placeholders=True`, then using `[item]` will return the item if it matched, or the value `None`, if it didn't. | If `maybe_placeholders=True`, then using `[item]` will return the item if it matched, or the value `None`, if it didn't. | ||||
| ### Terminals | |||||
| ## Terminals | |||||
| Terminals are always values in the tree, never branches. | Terminals are always values in the tree, never branches. | ||||
| @@ -74,7 +74,7 @@ Lark will parse "((hello world))" as: | |||||
| The brackets do not appear in the tree by design. The words appear because they are matched by a named terminal. | The brackets do not appear in the tree by design. The words appear because they are matched by a named terminal. | ||||
| # Shaping the tree | |||||
| ## Shaping the tree | |||||
| Users can alter the automatic construction of the tree using a collection of grammar features. | Users can alter the automatic construction of the tree using a collection of grammar features. | ||||
| @@ -1,4 +1,4 @@ | |||||
| ## Transformers & Visitors | |||||
| # Transformers & Visitors | |||||
| Transformers & Visitors provide a convenient interface to process the parse-trees that Lark returns. | Transformers & Visitors provide a convenient interface to process the parse-trees that Lark returns. | ||||