Add 'vendored_parsers/tree-sitter-clojure/' from commit '421546c2547c74d1d9a0d8c296c412071d37e7ca'

Closes #448

git-subtree-dir: vendored_parsers/tree-sitter-clojure
git-subtree-mainline: ebfc043a4a
git-subtree-split: 421546c254
pull/502/head
Wilfred Hughes 2023-03-15 15:36:57 +07:00
commit 20ad284882
54 changed files with 35474 additions and 1 deletions

@ -8,6 +8,8 @@ Added support for Ada.
Improved parsing for TOML.
Updated grammar for Clojure.
### Display
Difftastic no longer uses purple to highlight regions that are missing

@ -198,7 +198,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig {
let language = unsafe { tree_sitter_clojure() };
TreeSitterConfig {
language,
atom_nodes: vec![].into_iter().collect(),
atom_nodes: vec!["kwd_lit"].into_iter().collect(),
delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]")]
.into_iter()
.collect(),

@ -0,0 +1,5 @@
node_modules
bin
build
*.log

@ -0,0 +1,65 @@
## Changelog
Bits may be missing and/or inaccurate :)
### Upcoming?
* Update tree-sitter and friends to 0.19.5 or 0.20.x
* Add formatting docs and utilities
* Revise and enhance package.json scripts
* Revise and update docs
* Add some \_bare\_\* constructs to inline
### v0.0.10 - 2023-01-06
* Tokenize symbols and keywords further
([#31](https://github.com/sogaiu/tree-sitter-clojure/issues/31)) -
dannyfreeman
* Address symbols after metadata issue
([#21](https://github.com/sogaiu/tree-sitter-clojure/issues/21)) -
dannyfreeman
* Change formatting of grammar.js
### v0.0.9 - 2022-06-03
* Add corpus for light testing
* Add highlighting queries for difftastic
([#20](https://github.com/sogaiu/tree-sitter-clojure/issues/20))
* Remove web-tree-sitter dependency
([#19](https://github.com/sogaiu/tree-sitter-clojure/issues/19))
* Re-add binding.gyp
### v0.0.8 - 2021-03-17
* Upgrade tree-sitter to 0.19.3
* Add Cargo.toml
* Upgrade tree-sitter to 0.19.2
* Add generated bindings directory and content
### v0.0.7 - 2021-01-05
* CC0-1.0 ([#11](https://github.com/sogaiu/tree-sitter-clojure/issues/11))
* Tweak grammar.js
### v0.0.3 - 2020-10-30
* Add generated src directory and content
* Major renaming
* More fields
* Refine metadata bits
* Refine \_bare\_\* constructs usage
* Address discard / number parsing issue
([#7](https://github.com/sogaiu/tree-sitter-clojure/issues/7))
* Octal number tweak
* Restore visibility of comment nodes
* ?
### v0.0.2 - 2020-09-02
* Handle whitespace explicitly, no extras
* Start using some \_bare\_\* constructs
* ?
### v0.0.1 - 2020-07-05
* First tagging

@ -0,0 +1,121 @@
Creative Commons Legal Code
CC0 1.0 Universal
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
HEREUNDER.
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.

@ -0,0 +1,25 @@
[package]
name = "tree-sitter-clojure"
description = "clojure grammar for the tree-sitter parsing library"
version = "0.0.9"
keywords = ["incremental", "parsing", "clojure"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/sogaiu/tree-sitter-clojure"
edition = "2018"
build = "bindings/rust/build.rs"
include = [
"bindings/rust/*",
"grammar.js",
"queries/*",
"src/*",
]
[lib]
path = "bindings/rust/lib.rs"
[dependencies]
tree-sitter = "0.19.3"
[build-dependencies]
cc = "1.0"

@ -0,0 +1,156 @@
# tree-sitter-clojure
## Notice
Although no major changes are anticipated at this point, there are no
guarantees. To get a heads-up before such changes occur, please
consider subscribing to the [Potential Changes Announcements
issue](https://github.com/sogaiu/tree-sitter-clojure/issues/33) to be
notified beforehand. The hope is that by commnuicating early enough
about these sorts of things, unnecessary breakage can be avoided
and/or mitigated.
## Status
tree-sitter-clojure has been:
* [Tested in various ways](doc/testing.md)
* [Used in some ways](doc/use.md)
* [Scoped for better behavior](doc/scope.md)
* [Brought about through cooperation](doc/credits.md)
## Prerequisites
Unfortunately, the short of it is that it may be a bit complicated depending on what you want to do.
* If you don't use any of the wasm-related functionality (e.g. previewing parse results in your web browser or you want to build a `.wasm` file for use in a plugin or extension), you probably just need:
* an appropriate version of node (I've tested with various versions >= 12, 14) and
* other typical development-related bits (e.g. git, appropriate c compiler, etc.)
* If you want wasm-related functionality, you get to have fun figuring out which version of [emsdk](https://emscripten.org/docs/getting_started/downloads.html#installation-instructions) currently works with tree-sitter. At the time of this writing, [this file](https://github.com/tree-sitter/tree-sitter/blob/master/cli/emscripten-version) indicates a version that might be appropriate. That may depend on precisely what the versions of other bits (e.g. tree-sitter-cli, web-tree-sitter, etc.) might be though, so if something doesn't work right away, you might consider trying [different versions that have been recorded](https://github.com/tree-sitter/tree-sitter/commits/master/emscripten-version).
Note that there may be an upside to using emsdk though -- it may figure out and arrange for an appropriate version of node, making a separate installation of node unnecessary. I don't use such a setup on a day-to-day basis, but it did work for me at least once.
## Fine Print
* The instructions below assume emsdk has been installed, but `emcc` (tool that can be used to compile to wasm) is not necessarily on one's `PATH`. If an appropriate `emcc` is on one's `PATH` (e.g. emscripten installed via homebrew), the emsdk steps (e.g. `source ~/src/emsdk/emsdk_env.sh`) below may be ignored.
* `node-gyp` (tool for compiling native addon modules for Node.js) may fail on machines upgraded to macos Catalina. [This document](https://github.com/nodejs/node-gyp/blob/master/macOS_Catalina.md) may help cope with such a situation.
## Initial Setup
Suppose typical development sources are stored under `~/src`.
### Short Version
```
# clone repository
cd ~/src
git clone https://github.com/sogaiu/tree-sitter-clojure
cd tree-sitter-clojure
# install tree-sitter-cli and dependencies, then build
npm ci
```
### Long Version
```
# clone repository
cd ~/src
git clone https://github.com/sogaiu/tree-sitter-clojure
cd tree-sitter-clojure
# ensure tree-sitter-cli is avaliable as a dev dependency
npm install --save-dev --save-exact tree-sitter-cli
# create `src` and populate with tree-sitter `.c` goodness
npx tree-sitter generate
# populate `node_modules` with dependencies
npm install
# create `build` and populate appropriately
npx node-gyp configure
# create `build/Release` and build `tree_sitter_clojure_binding.node`
npx node-gyp rebuild
```
## Grammar Development
Hack on grammar.
```
# edit grammar.js using some editor
# rebuild tree-sitter stuff
npx tree-sitter generate && \
npx node-gyp rebuild
```
Parse individual files.
```
# create and populate sample code file for parsing named `sample.clj`
# parse sample file
npx tree-sitter parse sample.clj
# if output has errors, figure out what's wrong
```
Interactively test in the browser (requires emsdk).
```
# prepare emsdk (specifically emcc) for building .wasm
source ~/src/emsdk/emsdk_env.sh
# build .wasm bits and invoke web-ui for interactive testing
npx tree-sitter build-wasm && \
npx tree-sitter web-ui
# in appropriate browser window, paste code in left pane
# examine results in right pane -- can even click on nodes
# if output has errors, figure out what's wrong
```
## Measure Performance
```
# single measurement
npx tree-sitter parse --time sample.clj
# mutliple measurements with `multitime`
multitime -n10 -s1 npx tree-sitter parse --time --quiet sample.clj
```
## Build .wasm
Assuming emsdk is installed appropriately under `~/src/emsdk`.
```
# prepare emsdk (specifically emcc) for use
source ~/src/emsdk/emsdk_env.sh
# create `tree-sitter-clojure.wasm`
npx tree-sitter build-wasm
```
## Resources
* [Guide to your first Tree-sitter grammar](https://gist.github.com/Aerijo/df27228d70c633e088b0591b8857eeef)
* [sublime-clojure](https://github.com/tonsky/sublime-clojure)
* [syntax-highlighter](https://github.com/EvgeniyPeshkov/syntax-highlighter)
* [tree-sitter](http://tree-sitter.github.io/tree-sitter/)
* [tree-sitter-clojure.oakmac](https://github.com/oakmac/tree-sitter-clojure)
* [tree-sitter-clojure.SergeevPavel](https://github.com/SergeevPavel/tree-sitter-clojure)
* [tree-sitter-clojure.Tavistock](https://github.com/Tavistock/tree-sitter-clojure)
* [vscode-tree-sitter](https://github.com/georgewfraser/vscode-tree-sitter)
* [web-tree-sitter API](https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/tree-sitter-web.d.ts)
## Acknowledgments
Please see the [credits](doc/credits.md).

@ -0,0 +1,18 @@
{
"targets": [
{
"target_name": "tree_sitter_clojure_binding",
"include_dirs": [
"<!(node -e \"require('nan')\")",
"src"
],
"sources": [
"src/parser.c",
"bindings/node/binding.cc"
],
"cflags_c": [
"-std=c99",
]
}
]
}

@ -0,0 +1,28 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
using namespace v8;
extern "C" TSLanguage * tree_sitter_clojure();
namespace {
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_clojure());
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("clojure").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
}
NODE_MODULE(tree_sitter_clojure_binding, Init)
} // namespace

@ -0,0 +1,19 @@
try {
module.exports = require("../../build/Release/tree_sitter_clojure_binding");
} catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require("../../build/Debug/tree_sitter_clojure_binding");
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1
}
}
try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");
} catch (_) {}

@ -0,0 +1,40 @@
fn main() {
let src_dir = std::path::Path::new("src");
let mut c_config = cc::Build::new();
c_config.include(&src_dir);
c_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
// If your language uses an external scanner written in C,
// then include this block of code:
/*
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
*/
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
c_config.compile("parser");
// If your language uses an external scanner written in C++,
// then include this block of code:
/*
let mut cpp_config = cc::Build::new();
cpp_config.cpp(true);
cpp_config.include(&src_dir);
cpp_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable");
let scanner_path = src_dir.join("scanner.cc");
cpp_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
cpp_config.compile("scanner");
*/
}

@ -0,0 +1,52 @@
//! This crate provides clojure language support for the [tree-sitter][] parsing library.
//!
//! Typically, you will use the [language][language func] function to add this language to a
//! tree-sitter [Parser][], and then use the parser to parse some code:
//!
//! ```
//! let code = "";
//! let mut parser = tree_sitter::Parser::new();
//! parser.set_language(tree_sitter_javascript::language()).expect("Error loading clojure grammar");
//! let tree = parser.parse(code, None).unwrap();
//! ```
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
//! [language func]: fn.language.html
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
//! [tree-sitter]: https://tree-sitter.github.io/
use tree_sitter::Language;
extern "C" {
fn tree_sitter_clojure() -> Language;
}
/// Get the tree-sitter [Language][] for this grammar.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
pub fn language() -> Language {
unsafe { tree_sitter_clojure() }
}
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
// Uncomment these to include any queries that this grammar contains
// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
#[cfg(test)]
mod tests {
#[test]
fn test_can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(super::language())
.expect("Error loading clojure language");
}
}

@ -0,0 +1,24 @@
================================================================================
Empty Anonymous Function
================================================================================
#()
--------------------------------------------------------------------------------
(source
(anon_fn_lit))
================================================================================
Anonymous Function
================================================================================
#(+ % 8)
--------------------------------------------------------------------------------
(source
(anon_fn_lit
(sym_lit (sym_name))
(sym_lit (sym_name))
(num_lit)))

@ -0,0 +1,21 @@
================================================================================
True
================================================================================
true
--------------------------------------------------------------------------------
(source
(bool_lit))
================================================================================
False
================================================================================
false
--------------------------------------------------------------------------------
(source
(bool_lit))

@ -0,0 +1,43 @@
================================================================================
Simple Char
================================================================================
\a
--------------------------------------------------------------------------------
(source
(char_lit))
================================================================================
Octal Char
================================================================================
\o377
--------------------------------------------------------------------------------
(source
(char_lit))
================================================================================
Named Char
================================================================================
\backspace
--------------------------------------------------------------------------------
(source
(char_lit))
================================================================================
Unicode Char
================================================================================
\u611B
--------------------------------------------------------------------------------
(source
(char_lit))

@ -0,0 +1,34 @@
================================================================================
Simple Comment
================================================================================
; a comment
--------------------------------------------------------------------------------
(source
(comment))
================================================================================
Two semicolons
================================================================================
;; another comment
--------------------------------------------------------------------------------
(source
(comment))
================================================================================
Multiple lines
================================================================================
;; first line
;; second line
--------------------------------------------------------------------------------
(source
(comment)
(comment))

@ -0,0 +1,25 @@
================================================================================
Simple Deref
================================================================================
@x
--------------------------------------------------------------------------------
(source
(derefing_lit
(sym_lit (sym_name))))
================================================================================
Deref of Call
================================================================================
@(ping y)
--------------------------------------------------------------------------------
(source
(derefing_lit
(list_lit
(sym_lit (sym_name))
(sym_lit (sym_name)))))

@ -0,0 +1,68 @@
================================================================================
Discard Number
================================================================================
#_ 1
--------------------------------------------------------------------------------
(source
(dis_expr
(num_lit)))
================================================================================
Discard List
================================================================================
#_ (+ 1 1)
--------------------------------------------------------------------------------
(source
(dis_expr
(list_lit
(sym_lit (sym_name))
(num_lit)
(num_lit))))
================================================================================
Discard Map
================================================================================
#_ {:a 1
:b 2}
--------------------------------------------------------------------------------
(source
(dis_expr
(map_lit
(kwd_lit (kwd_name))
(num_lit)
(kwd_lit (kwd_name))
(num_lit))))
================================================================================
Discard Multiple
================================================================================
(let [x 1
#_ #_ y 2]
(+ x 2))
--------------------------------------------------------------------------------
(source
(list_lit
(sym_lit (sym_name))
(vec_lit
(sym_lit (sym_name))
(num_lit)
(dis_expr
(dis_expr
(sym_lit (sym_name)))
(num_lit)))
(list_lit
(sym_lit (sym_name))
(sym_lit (sym_name))
(num_lit))))

@ -0,0 +1,14 @@
================================================================================
Not Officially Supported
================================================================================
#=(+ 1 1)
--------------------------------------------------------------------------------
(source
(evaling_lit
(list_lit
(sym_lit (sym_name))
(num_lit)
(num_lit))))

@ -0,0 +1,102 @@
================================================================================
Keyword
================================================================================
:smile
--------------------------------------------------------------------------------
(source
(kwd_lit
(kwd_name)))
================================================================================
Keyword with Prefix
================================================================================
:fun/day
--------------------------------------------------------------------------------
(source
(kwd_lit
(kwd_ns)
(kwd_name)))
================================================================================
Autoresolving Keyword
================================================================================
::run
--------------------------------------------------------------------------------
(source
(kwd_lit
(kwd_name)))
================================================================================
Autoresolving Aliased Keyword
================================================================================
::slow/dance
--------------------------------------------------------------------------------
(source
(kwd_lit
(kwd_ns)
(kwd_name)))
================================================================================
Division Symbol Keyword
================================================================================
:/
--------------------------------------------------------------------------------
(source
(kwd_lit
(kwd_name)))
================================================================================
Namespaced Division Symbol Keyword
================================================================================
:clojure.core//
--------------------------------------------------------------------------------
(source
(kwd_lit
(kwd_ns)
(kwd_name)))
================================================================================
Autoresolving Division Symbol Keyword
================================================================================
::/
--------------------------------------------------------------------------------
(source
(kwd_lit
(kwd_name)))
================================================================================
Autoresolving Aliased Division Symbol Keyword
================================================================================
::clojure//
--------------------------------------------------------------------------------
(source
(kwd_lit
(kwd_ns)
(kwd_name)))

@ -0,0 +1,101 @@
================================================================================
Empty List
================================================================================
()
--------------------------------------------------------------------------------
(source
(list_lit))
================================================================================
List with Keywords
================================================================================
(:a :b :c)
--------------------------------------------------------------------------------
(source
(list_lit
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))))
================================================================================
Call with Anonymous Function
================================================================================
(#(+ % 1) 1)
--------------------------------------------------------------------------------
(source
(list_lit
(anon_fn_lit
(sym_lit (sym_name))
(sym_lit (sym_name))
(num_lit))
(num_lit)))
================================================================================
Map Lookup
================================================================================
({:a 1} :a)
--------------------------------------------------------------------------------
(source
(list_lit
(map_lit
(kwd_lit (kwd_name))
(num_lit))
(kwd_lit (kwd_name))))
================================================================================
Map Lookup Alternate
================================================================================
(:b {:b 2})
--------------------------------------------------------------------------------
(source
(list_lit
(kwd_lit (kwd_name))
(map_lit
(kwd_lit (kwd_name))
(num_lit))))
================================================================================
Set Lookup
================================================================================
(#{:c :e} :e)
--------------------------------------------------------------------------------
(source
(list_lit
(set_lit
(kwd_lit (kwd_name))
(kwd_lit (kwd_name)))
(kwd_lit (kwd_name))))
================================================================================
Call with Symbol with Metadata
================================================================================
(.get ^ByteBuffer b)
--------------------------------------------------------------------------------
(source
(list_lit
(sym_lit (sym_name))
(sym_lit
(meta_lit
(sym_lit (sym_name)))
(sym_name))))

@ -0,0 +1,66 @@
================================================================================
Empty Map
================================================================================
{}
--------------------------------------------------------------------------------
(source
(map_lit))
================================================================================
Simple Map
================================================================================
{:a 1 :b 2}
--------------------------------------------------------------------------------
(source
(map_lit
(kwd_lit (kwd_name))
(num_lit)
(kwd_lit (kwd_name))
(num_lit)))
================================================================================
Deeper Map
================================================================================
{:paths ["src"]
:deps {clj-kondo/clj-kondo {:mvn/version "2020.09.09"}}}
--------------------------------------------------------------------------------
(source
(map_lit
(kwd_lit (kwd_name))
(vec_lit
(str_lit))
(kwd_lit (kwd_name))
(map_lit
(sym_lit
(sym_ns)
(sym_name))
(map_lit
(kwd_lit
(kwd_ns)
(kwd_name))
(str_lit)))))
================================================================================
Map with Comma
================================================================================
{:x 1,
:y 2}
--------------------------------------------------------------------------------
(source
(map_lit
(kwd_lit (kwd_name))
(num_lit)
(kwd_lit (kwd_name))
(num_lit)))

@ -0,0 +1,91 @@
================================================================================
Symbol Metadata
================================================================================
^String []
--------------------------------------------------------------------------------
(source
(vec_lit
(meta_lit
(sym_lit (sym_name)))))
================================================================================
Keyword Metadata
================================================================================
^:private {}
--------------------------------------------------------------------------------
(source
(map_lit
(meta_lit
(kwd_lit (kwd_name)))))
================================================================================
String Metadata
================================================================================
^"gnarly" {}
--------------------------------------------------------------------------------
(source
(map_lit
(meta_lit
(str_lit))))
================================================================================
Map Metadata
================================================================================
^{:x 0 :y 1} #{}
--------------------------------------------------------------------------------
(source
(set_lit
(meta_lit
(map_lit
(kwd_lit (kwd_name))
(num_lit)
(kwd_lit (kwd_name))
(num_lit)))))
================================================================================
Reader Conditional Metadata
================================================================================
^#?(:clj "vanilla" :cljr "strawberry" :cljs "chocolate") []
--------------------------------------------------------------------------------
(source
(vec_lit
(meta_lit
(read_cond_lit
(kwd_lit (kwd_name))
(str_lit)
(kwd_lit (kwd_name))
(str_lit)
(kwd_lit (kwd_name))
(str_lit)))))
================================================================================
Multiple Bits of Metadata
================================================================================
^:wake ^:sit ^:sleep #{}
--------------------------------------------------------------------------------
(source
(set_lit
(meta_lit
(kwd_lit (kwd_name)))
(meta_lit
(kwd_lit (kwd_name)))
(meta_lit
(kwd_lit (kwd_name)))))

@ -0,0 +1,10 @@
================================================================================
Nil
================================================================================
nil
--------------------------------------------------------------------------------
(source
(nil_lit))

@ -0,0 +1,66 @@
================================================================================
Simple Namespace Map
================================================================================
#:prefix{:a 1 :b 2}
--------------------------------------------------------------------------------
(source
(ns_map_lit
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))
(num_lit)
(kwd_lit (kwd_name))
(num_lit)))
================================================================================
Nested Namespace Maps
================================================================================
#:outer{:first "Terence"
:last "Tao"
:area #:inner{:name "Mathematics"}}
--------------------------------------------------------------------------------
(source
(ns_map_lit
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))
(str_lit)
(kwd_lit (kwd_name))
(str_lit)
(kwd_lit (kwd_name))
(ns_map_lit
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))
(str_lit))))
================================================================================
Autoresolving Namespace Map
================================================================================
#::{}
--------------------------------------------------------------------------------
(source
(ns_map_lit
(auto_res_mark)))
================================================================================
Namespace Map that Autoresolves with Alias
================================================================================
#::s{:x 1 :y 2}
--------------------------------------------------------------------------------
(source
(ns_map_lit
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))
(num_lit)
(kwd_lit (kwd_name))
(num_lit)))

@ -0,0 +1,241 @@
================================================================================
Integer
================================================================================
1
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Negative Integer
================================================================================
-2
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
BigInt Integer
================================================================================
11N
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
BigDecimal Integer
================================================================================
99M
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Hex
================================================================================
0xaB
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Negative Hex
================================================================================
-0xFF
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Shouting Hex
================================================================================
0XA
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
BigInt Hex
================================================================================
0XeN
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Octal
================================================================================
013
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Negative Octal
================================================================================
-027
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
BigInt Octal
================================================================================
0377N
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Radix
================================================================================
2r0101010001
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Negative Radix
================================================================================
-10r256
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Shouting Radix
================================================================================
36RBREATHESL0WLY
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Ratio
================================================================================
22/7
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Negative Ratio
================================================================================
-1/2
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Double
================================================================================
1.0
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Negative Double
================================================================================
-2.71828
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Double with Exponent
================================================================================
3e8
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Shouting Double with Exponent
================================================================================
1E9
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
Double with Negative Exponent
================================================================================
2e-1
--------------------------------------------------------------------------------
(source
(num_lit))
================================================================================
BigDecimal Double with Exponent
================================================================================
3e1415926535M
--------------------------------------------------------------------------------
(source
(num_lit))

@ -0,0 +1,91 @@
================================================================================
Symbol Metadata
================================================================================
#^String []
--------------------------------------------------------------------------------
(source
(vec_lit
(old_meta_lit
(sym_lit (sym_name)))))
================================================================================
Keyword Metadata
================================================================================
#^:private {}
--------------------------------------------------------------------------------
(source
(map_lit
(old_meta_lit
(kwd_lit (kwd_name)))))
================================================================================
String Metadata
================================================================================
#^"gnarly" {}
--------------------------------------------------------------------------------
(source
(map_lit
(old_meta_lit
(str_lit))))
================================================================================
Map Metadata
================================================================================
#^{:x 0 :y 1} #{}
--------------------------------------------------------------------------------
(source
(set_lit
(old_meta_lit
(map_lit
(kwd_lit (kwd_name))
(num_lit)
(kwd_lit (kwd_name))
(num_lit)))))
================================================================================
Reader Conditional Metadata
================================================================================
#^#?(:clj "vanilla" :cljr "strawberry" :cljs "chocolate") []
--------------------------------------------------------------------------------
(source
(vec_lit
(old_meta_lit
(read_cond_lit
(kwd_lit (kwd_name))
(str_lit)
(kwd_lit (kwd_name))
(str_lit)
(kwd_lit (kwd_name))
(str_lit)))))
================================================================================
Multiple Bits of Metadata
================================================================================
#^:wake #^:sit #^:sleep #{}
--------------------------------------------------------------------------------
(source
(set_lit
(old_meta_lit
(kwd_lit (kwd_name)))
(old_meta_lit
(kwd_lit (kwd_name)))
(old_meta_lit
(kwd_lit (kwd_name)))))

@ -0,0 +1,26 @@
================================================================================
Quoted Symbol
================================================================================
'a-sym
--------------------------------------------------------------------------------
(source
(quoting_lit
(sym_lit (sym_name))))
================================================================================
Quoted List
================================================================================
'(1 2 3)
--------------------------------------------------------------------------------
(source
(quoting_lit
(list_lit
(num_lit)
(num_lit)
(num_lit))))

@ -0,0 +1,18 @@
================================================================================
Platform Reader Conditional
================================================================================
#?(:clj :clj
:cljr :cljr
:cljs :cljs)
--------------------------------------------------------------------------------
(source
(read_cond_lit
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))))

@ -0,0 +1,21 @@
================================================================================
Simple Regular Expression
================================================================================
#"."
--------------------------------------------------------------------------------
(source
(regex_lit))
================================================================================
Hex Digits Regular Expression
================================================================================
#"[0-9a-fA-F]+"
--------------------------------------------------------------------------------
(source
(regex_lit))

@ -0,0 +1,41 @@
================================================================================
Empty Set
================================================================================
#{}
--------------------------------------------------------------------------------
(source
(set_lit))
================================================================================
Simple Set
================================================================================
#{:i :j :k}
--------------------------------------------------------------------------------
(source
(set_lit
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))
(kwd_lit (kwd_name))))
================================================================================
Nested Sets
================================================================================
#{#{1} #{#{0} 2}}
--------------------------------------------------------------------------------
(source
(set_lit
(set_lit
(num_lit))
(set_lit
(set_lit
(num_lit))
(num_lit))))

@ -0,0 +1,27 @@
================================================================================
Splicing Reader Conditional
================================================================================
(list '*
#?@(:clj [x y] :cljr [i j] :cljs [a b]))
--------------------------------------------------------------------------------
(source
(list_lit
(sym_lit (sym_name))
(quoting_lit
(sym_lit (sym_name)))
(splicing_read_cond_lit
(kwd_lit (kwd_name))
(vec_lit
(sym_lit (sym_name))
(sym_lit (sym_name)))
(kwd_lit (kwd_name))
(vec_lit
(sym_lit (sym_name))
(sym_lit (sym_name)))
(kwd_lit (kwd_name))
(vec_lit
(sym_lit (sym_name))
(sym_lit (sym_name))))))

@ -0,0 +1,33 @@
================================================================================
Simple String
================================================================================
"hello there"
--------------------------------------------------------------------------------
(source
(str_lit))
================================================================================
String with Escapes
================================================================================
"first line\nsecond\tline"
--------------------------------------------------------------------------------
(source
(str_lit))
================================================================================
Multiline String
================================================================================
"this is the first line
and what is this one?"
--------------------------------------------------------------------------------
(source
(str_lit))

@ -0,0 +1,63 @@
================================================================================
Simple Symbol
================================================================================
def
--------------------------------------------------------------------------------
(source
(sym_lit (sym_name)))
================================================================================
Symbol with Prefix
================================================================================
clojure.string/blank?
--------------------------------------------------------------------------------
(source
(sym_lit
(sym_ns)
(sym_name)))
================================================================================
Division Symbol
================================================================================
/
--------------------------------------------------------------------------------
(source
(sym_lit
(sym_name)))
================================================================================
Namespaced Division Symbol
================================================================================
clojure.core//
--------------------------------------------------------------------------------
(source
(sym_lit
(sym_ns)
(sym_name)))
================================================================================
Division Symbol followed by delimiter
================================================================================
(+ - * /)
--------------------------------------------------------------------------------
(source
(list_lit
(sym_lit (sym_name))
(sym_lit (sym_name))
(sym_lit (sym_name))
(sym_lit (sym_name))))

@ -0,0 +1,35 @@
================================================================================
Inf
================================================================================
##Inf
--------------------------------------------------------------------------------
(source
(sym_val_lit
(sym_lit (sym_name))))
================================================================================
-Inf
================================================================================
##-Inf
--------------------------------------------------------------------------------
(source
(sym_val_lit
(sym_lit (sym_name))))
================================================================================
NaN
================================================================================
##NaN
--------------------------------------------------------------------------------
(source
(sym_val_lit
(sym_lit (sym_name))))

@ -0,0 +1,27 @@
================================================================================
Syntax Quoted Symbol
================================================================================
`a-sym
--------------------------------------------------------------------------------
(source
(syn_quoting_lit
(sym_lit (sym_name))))
================================================================================
Syntax Quoted List
================================================================================
`(+ ~a 1)
--------------------------------------------------------------------------------
(source
(syn_quoting_lit
(list_lit
(sym_lit (sym_name))
(unquoting_lit
(sym_lit (sym_name)))
(num_lit))))

@ -0,0 +1,44 @@
================================================================================
Tagged Literal
================================================================================
#uuid "00000000-0000-0000-0000-000000000000"
--------------------------------------------------------------------------------
(source
(tagged_or_ctor_lit
(sym_lit (sym_name))
(str_lit)))
================================================================================
Constructor
================================================================================
#user.Fun [1 2]
--------------------------------------------------------------------------------
(source
(tagged_or_ctor_lit
(sym_lit (sym_name))
(vec_lit
(num_lit)
(num_lit))))
================================================================================
Constructor Alternate
================================================================================
#user.Fun {:a 1 :b 2}
--------------------------------------------------------------------------------
(source
(tagged_or_ctor_lit
(sym_lit (sym_name))
(map_lit
(kwd_lit (kwd_name))
(num_lit)
(kwd_lit (kwd_name))
(num_lit))))

@ -0,0 +1,75 @@
================================================================================
Unquote Splicing into List
================================================================================
`(+ ~@(list 2 3))
--------------------------------------------------------------------------------
(source
(syn_quoting_lit
(list_lit
(sym_lit (sym_name))
(unquote_splicing_lit
(list_lit
(sym_lit (sym_name))
(num_lit)
(num_lit))))))
================================================================================
Unquote Splicing into Vector
================================================================================
`[:a ~@(list :b :c)]
--------------------------------------------------------------------------------
(source
(syn_quoting_lit
(vec_lit
(kwd_lit (kwd_name))
(unquote_splicing_lit
(list_lit
(sym_lit (sym_name))
(kwd_lit (kwd_name))
(kwd_lit (kwd_name)))))))
================================================================================
Unquote Splicing into Set
================================================================================
`#{:i ~@(list :j :k)}
--------------------------------------------------------------------------------
(source
(syn_quoting_lit
(set_lit
(kwd_lit (kwd_name))
(unquote_splicing_lit
(list_lit
(sym_lit (sym_name))
(kwd_lit (kwd_name))
(kwd_lit (kwd_name)))))))
================================================================================
Unquote Splicing into Map
================================================================================
`{~@(list :a 1) ~@(list :b 2)}
--------------------------------------------------------------------------------
(source
(syn_quoting_lit
(map_lit
(unquote_splicing_lit
(list_lit
(sym_lit (sym_name))
(kwd_lit (kwd_name))
(num_lit)))
(unquote_splicing_lit
(list_lit
(sym_lit (sym_name))
(kwd_lit (kwd_name))
(num_lit))))))

@ -0,0 +1,30 @@
================================================================================
Unquoting Symbol
================================================================================
`~a
--------------------------------------------------------------------------------
(source
(syn_quoting_lit
(unquoting_lit
(sym_lit (sym_name)))))
================================================================================
Unquoting List
================================================================================
`(dec ~(+ 1 a))
--------------------------------------------------------------------------------
(source
(syn_quoting_lit
(list_lit
(sym_lit (sym_name))
(unquoting_lit
(list_lit
(sym_lit (sym_name))
(num_lit)
(sym_lit (sym_name)))))))

@ -0,0 +1,29 @@
================================================================================
Var Quoting a Symbol
================================================================================
#'my-sym
--------------------------------------------------------------------------------
(source
(var_quoting_lit
(sym_lit (sym_name))))
================================================================================
Var Quoting with Reader Conditional
================================================================================
#'#?(:clj my-sym :cljr your-sym :cljs their-sym)
--------------------------------------------------------------------------------
(source
(var_quoting_lit
(read_cond_lit
(kwd_lit (kwd_name))
(sym_lit (sym_name))
(kwd_lit (kwd_name))
(sym_lit (sym_name))
(kwd_lit (kwd_name))
(sym_lit (sym_name)))))

@ -0,0 +1,48 @@
================================================================================
Empty Vector
================================================================================
[]
--------------------------------------------------------------------------------
(source
(vec_lit))
================================================================================
Vector with Numbers
================================================================================
[1 1 2 3 5 8]
--------------------------------------------------------------------------------
(source
(vec_lit
(num_lit)
(num_lit)
(num_lit)
(num_lit)
(num_lit)
(num_lit)))
================================================================================
Vector with Different Types
================================================================================
[:a 1 'fun {:x 1 :y 2} #{}]
--------------------------------------------------------------------------------
(source
(vec_lit
(kwd_lit (kwd_name))
(num_lit)
(quoting_lit
(sym_lit (sym_name)))
(map_lit
(kwd_lit (kwd_name))
(num_lit)
(kwd_lit (kwd_name))
(num_lit))
(set_lit)))

@ -0,0 +1,55 @@
## Credits
Many people were directly and indirectly involved in bringing about tree-sitter-clojure. I may have missed some people below, please let me know if I have. Thanks to all!
* Aerijo - Guide to your first Tree-sitter grammar
* ahlinc - tree-sitter work
* alehatsman - nvim-treesitter and related discussion
* alexmiller - clojure-related inquiries and docs
* andrewchambers - discussion
* bfredl - neovim and tree-sitter work
* borkdude - analyze-reify, babashka, clj-kondo, edamame, and more
* carocad - parcera and discussions
* cgrand - ClojureDart and related reader explanation
* clojars - including everyone who has uploaded there
* CoenraadS - Bracket-Pair-Colorizer-2
* dannyfreeman - grammar.js enhancements and fixes, clojure-ts-mode and discussions
* EvegeniyPeshkov - syntax-highlighter
* georgewfraser - vscode-tree-sitter
* gfredericks - test.check, generators, and discussions
* GrayJack - discussions and tree-sitter-janet
* hitode909 - vscode-perl-outline
* iarenaza - discussions
* IGJoshua - ClojureCLR investigations
* jafingerhut - clojure-related inquiries and haironfire research
* jeff-hykin - tree-sitter and VSCode related
* kolja - nrepl-alliance and tree-sitter question concerning Clojure on StackOverflow
* lread - rewrite-cljc and discussions
* mauricioszabo - clover and repl-tooling
* maxbrunsfeld - tree-sitter and related
* monnier - emacs-tree-sitter related
* NoahTheDuke - discussion and suggestions
* nwjsmith - tree-sitter upgrade
* oakmac - tree-sitter-clojure.oakmac, conj 2018 unsession, advice, etc.
* p00f - nvim-ts-rainbow
* pedrorgirardi - discussions, vscode and tree-sitter-clojure bits
* PEZ - calva, vscode tips, and general discussion
* pyrmont - review, error-spotting, fix, and discussions
* rewinfrey - helpful bits from tree-sitter-haskell
* richhickey - clojure, etc.
* Saikyun - discussions
* seancorfield - clojure-related inquiries
* SergeevPavel - tree-sitter-clojure.SergeevPavel (fork of tree-sitter-clojure.Tavistock with further work)
* SevereOverfl0w - tree-sitter and vim info
* shackra - tree-sitter-query.el
* SignSpice - discussion
* snoe - discussions
* Tavistock - tree-sitter-clojure.Tavistock
* th0rex - emacs-tree-sitter related
* theHamsta - neovim, nvim-treesitter, tree-sitter-commonlisp
* tobias - clojars work
* tonsky - sublime-clojure work with test data, clojure north talk, alabaster theme
* ubolonton - emacs-tree-sitter
* vigoux - nvim-treesitter and related
* Wilfred - difftastic, bug reporting, and discussion

@ -0,0 +1,56 @@
# Scope of tree-sitter-clojure
## TLDR
Only "primitives" (e.g. [symbols](https://github.com/sogaiu/tree-sitter-clojure/blob/c00293fb0cd5ce3a7005c0601e9b546c1ea73094/grammar.js#L280-L282), [lists](https://github.com/sogaiu/tree-sitter-clojure/blob/c00293fb0cd5ce3a7005c0601e9b546c1ea73094/grammar.js#L307-L309), etc.)
are supported, i.e. no higher level constructs like `defn`.
## The Details
### Why
For some background, Clojure (and other Lisps) have runtime extensible "syntax" via macros, but AFAIU tree-sitter's current design assumes a fixed syntax.
Keeping the above in mind, below are some of the factors that influenced the current stance on scope:
* Clojure has no language specification. This means it's unclear what to try to support in the grammar. For example, `defn` is defined in the `clojure.core` namespace, but then so are a lot of other things.
* Each additional item added to the grammar increases the chance of a conflict which in turn may adversely impact correct parsing, but also makes the grammar harder to extend and maintain. In some cases this may lead to degraded performance (though it may be premature to be concerned about this point).
### Alternatives
It is possible to [use tree-sitter-clojure as a base](https://github.com/tree-sitter/tree-sitter/issues/645)
to add additional constructs to a "derived" grammar. For example, such a grammar
might be specialized to look for "definitions". At least in [emacs-tree-sitter](https://github.com/ubolonton/emacs-tree-sitter),
[it is technically possibly to have multiple grammars be used on single buffer](https://github.com/ubolonton/emacs-tree-sitter/discussions/129#discussioncomment-502836):
> If you want 2 parse trees in the same buffer instead, you would need to define an advice for tree-sitter--do-parse, as well as additional buffer-local variables for the secondary grammar.
Apparently it became possible in September of 2020 for [queries to match on any of a node's supertypes](https://github.com/tree-sitter/tree-sitter/pull/738). It may be possible to make a list supertype that is "composed of" `defn` and things that are not `defn`. [tree-sitter-clojure-def](https://github.com/sogaiu/tree-sitter-clojure-def) is an attempt at realizing this apoproach.
However, depending on one's goals, it might make more sense to consider leveraging
[clj-kondo's analysis capabilities](https://github.com/clj-kondo/clj-kondo/tree/master/analysis) as clj-kondo already understands Clojure pretty well. IIUC,
[clojure-lsp does this](https://github.com/clojure-lsp/clojure-lsp/blob/14724457f0d553795dfe16317d3ee6c5fc97d4ba/deps.edn#L21).
### Miscellaneous Points
* Earlier attempts at adding `def` and friends resulted in unacceptably high error rates [1]. The tests were conducted against code from [Clojars](https://clojars.org/) (uncontrived code). FWIW, two of the previous tree-sitter-clojure attempts (by [oakmac](https://github.com/oakmac/tree-sitter-clojure) and
[Tavistock](https://github.com/Tavistock/tree-sitter-clojure)) also had unacceptably high error rates [2] and they both attempted to support higher level constructs.
* For use cases like structural editing, it seems important to be able to distinguish between the following sorts of cases:
* `defn` used for defining a function, and
* [Using the symbol `defn` within a macro to construct code to define a function](https://github.com/Raynes/conch/blob/685f2c73138f376f2aa0623053dfdaba350a04f4/src/me/raynes/conch.clj#L251-L252)
AFAICT, the approach taken in tree-sitter-clojure-def does not make telling these sorts of things apart possible.
* It doesn't seem possible to support all "defining" macros like `defsomething`
(e.g. https://github.com/redplanetlabs/specter/blob/efaf35558a2c0068f5b6a8ef1dbbd0912702bdbd/src/clj/com/rpl/specter.cljc#L57-L60) since a user's Clojure code can define these.
## Footnotes
* [1] Author's opinion :)
* [2] Author's opinion again :)
## References
* https://www.reddit.com/r/Clojure/comments/fkc6uv/is_anyone_working_on_a_treesitter_parser_for/fksmf67/
* https://github.com/sogaiu/tree-sitter-clojure/issues/15#issuecomment-880729889

@ -0,0 +1,162 @@
# Testing tree-sitter-clojure
## TLDR
[tree-sitter-clojure](https://github.com/sogaiu/tree-sitter-clojure) has been tested using a variety of methods.
## The Details
This document will touch on some of those methods and why they were attempted:
1. Using corpus data from other tree-sitter-clojure attempts
2. Using Clojure source from [Clojars](https://clojars.org/)
3. Generative testing via [Hypothesis](https://github.com/HypothesisWorks/hypothesis)
Other employed methods that won't be covered (in much, if any, detail) here:
1. Sporadic manual invocations
2. Using [tonsky's sublime-clojure](https://github.com/tonsky/sublime-clojure) test data
3. Generative testing via [test.check](https://github.com/clojure/test.check/)
4. [Manual inspection of the grammar](https://github.com/sogaiu/tree-sitter-clojure/issues/3)
## Using corpus data from other tree-sitter-clojure attempts
There were at least two previous attempts at implementing tree-sitter-clojure,
[one by oakmac](https://github.com/oakmac/tree-sitter-clojure) and [another by Tavistock](https://github.com/Tavistock/tree-sitter-clojure). Important things
were learned by trying to make these attempts work, but for reasons not covered
here, a separate attempt was started.
Both earlier attempts had [corpus](https://github.com/oakmac/tree-sitter-clojure/tree/master/corpus) [data](https://github.com/Tavistock/tree-sitter-clojure/tree/master/corpus) that could be adapted for testing. Consequently,
[tsclj-tests-parser](https://gitlab.com/sogaiu/tsclj-tests-parser)
was created to extract [the relevant data as plain files](https://gitlab.com/sogaiu/tsclj-tests-parser/-/tree/master/test-files). These were in turn fed to
tree-sitter's `parse` command using the tree-sitter-clojure grammar to check
for parsing errors.
If changes are made to tree-sitter-clojure's grammar, this method can be used
to quickly check for some forms of undesirable breakage. (This could be taken
a bit further by adapting the content as corpus data for tree-sitter-clojure.)
### But...
One issue with this approach is that it relies on manually identifying and
spelling out appropriate test cases, which in the case of Clojure, is
complicated by the lack of a language specification.
Apart from detailed research, this was partially addressed by testing against
a large sample of Clojure source code written by the community.
## Using Clojure source from Clojars
The most fruitful method of testing was working with Clojure source written
by humans for purposes other than for testing tree-sitter-clojure.
### Where to get samples of Clojure source
Initially, repositories were cloned from a variety of locations, but before
long a decision was made to switch to using "release" jars from Clojars.
The latter decision was motivated by wanting source that was less likely to
be "broken" in various ways. Compared to "release" jar content from Clojars,
the default branch of a repository seemed to have a higher probability of
"not quite working". Although the Clojars "release" idea was an improvement,
weeding out inappropriate Clojure source was still necessary.
A variety of approaches were used to come up with a specific list of jars from
Clojars, but the most recent attempt is [gen-clru-list](https://gitlab.com/sogaiu/gen-clru-list). This is basically a [babashka](https://github.com/babashka/babashka) script that fetches [Clojars' feed.clj](https://github.com/clojars/clojars-web/wiki/Data#useful-extracts-from-the-poms), does some processing, and
writes out a list of urls. For reference, this approach currently yields a number
of urls in the neighborhood of 19,000.
### How to check retrieved Clojure samples
The retrieved content was initially checked using [a-tsclj-checker](https://github.com/sogaiu/a-tsclj-checker) (an adaptation of
[analyze-reify](https://github.com/borkdude/analyze-reify)) which uses
[Rust bindings for tree-sitter](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) and tree-sitter-clojure to parse Clojure
source code. Notably, it can traverse directories and also operate on `.jar`
files.
Once an error is detected, it is easier to investigate if one has direct
access to the Clojure source file in question (as compared with rummaging
around `.jar` files). Thus, it was decided to create a single directory tree
containing extracted data from all retrieved jars. On a side note, the
single directory tree took less than 2 GB of disk space.
A less fancy, but easier to maintain (i.e. not written in Rust) tool --
[ts-grammar-checker](https://gitlab.com/sogaiu/ts-grammar-checker) -- was
developed as an alternative to `a-tsclj-checker`. Strictly speaking,
`ts-grammar-checker` may not be necessary as one can probably employ
tree-sitter's `parse` command in combination with `find`, `xargs` and the like
if on some kind of \*nix. An example of a comparable invocation is:
```
find ~/src/clojars-cljish -type f -regex '.*\.clj[cs]?$' -print0 | xargs -0 npx tree-sitter parse --quiet > my-results.txt
```
`a-tsclj-checker` is the fastest tool but it has not been updated to the most
recent version of tree-sitter-clojure. `ts-grammar-checker` is not quite as
fast, but it can be easily adapted to work with other tree-sitter grammars (e.g.
it's [used](https://gitlab.com/sogaiu/ts-grammar-checker/-/blob/master/janet-checker.janet) for [tree-sitter-janet-simple](https://github.com/sogaiu/tree-sitter-janet-simple) as well). However, it does not support accessing content
within `.jar` files.
Across somewhat less than 150,000 files (.clj, .cljc, .cljs), `a-tsclj-checker`
typically takes a little less than 30 seconds, while `ts-grammar-checker`
typically takes a bit more than 100 seconds (at least on the author's machine).
In subjective terms, it hasn't felt terribly different because knowing there
is at least a 30 second wait, [one typically doesn't sit waiting at a prompt
for execution completion](https://xkcd.com/303/).
For any files that parse with errors, it can be handy to apply
[clj-kondo](https://github.com/clj-kondo/clj-kondo). The specific details that
`clj-kondo` reported were often helpful when examining individual files, but
that diagnostic information also provided a way to partition the files into
groups. Subjectively it can feel more manageable to deal with 5 groups of files
compared with 100 separate files (though it's true that the grouping does
not always turn out to be that meaningful).
An individual "suspect" file is typically viewed manually in an editor (usually
one that has `clj-kondo` support enabled) and examined for "issues".
In practice, testing the grammar against appropriate Clojure source from Clojars
has been the most useful in finding issues with the grammar. The lack of a
specification for Clojure increased the difficulty of creating an appropriate
grammar, but having a large sample of code to test against helped to mitigate
this a bit. On more than one occasion some version of the grammar failed to
parse some legitimate Clojure source and subsequent investigation revealed
that the grammar had not accounted for an uncommom and/or unanticipated usage.
### But...
This method has a significant weakness as there could be cases where
tree-sitter would parse successfully but the result could be inappropriate.
For example, if the grammar definition was faulty, something which should
be parsed as a symbol might end up parsed as a number with no error reported.
To partially address this issue, generative / property-based testing was
attempted.
## Generative testing via Hypothesis
Initially, [some effort was made to use test.check](https://gist.github.com/sogaiu/c0d668d050b63e298ef63549e357f9d2). However, [an outstanding issue with test.check](https://github.com/clojure/test.check/blob/master/doc/growth-and-shrinking.md#unnecessary-bind) (aka TCHECK-112) seemed very likely to be relevant
for the types of tests being considered. Also, the approach used [libpython-clj](https://github.com/clj-python/libpython-clj) to call tree-sitter via [Python bindings for tree-sitter](https://github.com/tree-sitter/py-tree-sitter). Although invoking tree-sitter via Python worked, it was awkward to connect this with `test.check`. For the above reasons, the `test.check` + `libpython-clj` approach (neat as it was) was abandoned.
Interestingly, Python's Hypothesis doesn't suffer from test.check's ["long-standing Hard Problem"](https://clojure.atlassian.net/browse/TCHECK-112) so that was given a try. [prop-test-ts-clj](https://github.com/sogaiu/prop-test-ts-clj) and [hypothesis-grammar-clojure](https://github.com/sogaiu/hypothesis-grammar-clojure) are the resulting
bits.
At least [one issue](https://github.com/sogaiu/tree-sitter-clojure/issues/7) was discovered and it also turned out that
[parcera](https://github.com/carocad/parcera) was [affected](https://github.com/carocad/parcera/issues/86).
The code was also adapted a bit to test [Calva](https://github.com/BetterThanTomorrow/calva). Some issues were discovered and [reported upstream](https://github.com/BetterThanTomorrow/calva/issues/802).
### But...
A drawback of this approach is that details of the tree-sitter-clojure grammar
became embedded in the tests. One consequence is that if
tree-sitter-clojure's grammar changes, then the tests may need to be updated
to reflect changes in the grammar (if there is an intent to continue to
use them).
## Summary
tree-sitter-clojure has been tested in a variety ways attempting to address
various real-world constraints (e.g. lack of a language specification,
limitations of tree-sitter's approach for a language with extensible syntax,
etc.). AFAICT, for what it sets out to do, it seems to work pretty well so
far.

@ -0,0 +1,17 @@
## Use Information
tree-sitter-clojure has been used in the following:
* One of the supported languages in the [nvim-treesitter](https://github.com/nvim-treesitter/nvim-treesitter#supported-languages) plugin for
[neovim](https://github.com/neovim/neovim) where [tree-sitter support is still in the early stages](https://neovim.io/news/2021/07).
* One of the supported languages in [difftastic](https://github.com/Wilfred/difftastic) -- "an experimental diff tool that compares files based on their syntax".
* One of the supported languages in [Cursorless](https://github.com/cursorless-dev/cursorless) -- "a spoken language for structural code editing, enabling developers to code by voice at speeds not possible with a keyboard".
* Exploring [alternative highlighting ideas](https://github.com/ubolonton/emacs-tree-sitter/issues/68) and [an early emacs user foray](https://ag91.github.io/blog/2021/06/22/how-(simple-is)-to-install-a-clojure-tree-sitter-grammar-and-use-it-from-emacs/), both via [emacs-tree-sitter](https://github.com/ubolonton/emacs-tree-sitter).
* Base of [tree-sitter-commonlisp](https://github.com/theHamsta/tree-sitter-commonlisp)
* Older versions of the grammar were used to implement [Atom support](https://github.com/sogaiu/language-clojure/tree/tree-sitter-clojure) as well as a couple of [proof-of-concept](https://github.com/sogaiu/vscode-clojure-defs)
[VSCode extensions](https://github.com/sogaiu/vscode-clojure-colorizer). However, these have not been updated to use the most recent grammar.

@ -0,0 +1,505 @@
// one aim is to try to parse what is correct (in the sense of
// officially supported), but also be looser in parsing additional
// things. this is more or less in line with advice from tree-sitter
// folks.
// java.lang.Character.isWhitespace AND comma
//
// Space Separator (Zs) but NOT including (U+00A0, U+2007, U+202F)
// U+0020, U+1680, U+2000, U+2001, U+2002, U+2003, U+2004, U+2005,
// U+2006, U+2008, U+2009, U+200A, U+205F, U+3000
// Line Separator (Zl)
// U+2028
// Paragraph Separator (Zp)
// U+2029
// Horizontal Tabulation
// \t
// Line Feed
// \n
// Vertical Tabulation
// U+000B
// Form Feed
// \f
// Carriage Return
// \r
// File Separator
// U+001C
// Group Separator
// U+001D
// Record Separator
// U+001E
// Unit Separator
// U+001F
const WHITESPACE_CHAR =
/[\f\n\r\t, \u000B\u001C\u001D\u001E\u001F\u2028\u2029\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2008\u2009\u200a\u205f\u3000]/;
const WHITESPACE =
token(repeat1(WHITESPACE_CHAR));
const COMMENT =
token(/(;|#!).*\n?/);
const DIGIT =
/[0-9]/;
const ALPHANUMERIC =
/[0-9a-zA-Z]/;
const HEX_DIGIT =
/[0-9a-fA-F]/;
const OCTAL_DIGIT =
/[0-7]/;
const HEX_NUMBER =
seq("0",
/[xX]/,
repeat1(HEX_DIGIT),
optional("N"));
const OCTAL_NUMBER =
seq("0",
repeat1(OCTAL_DIGIT),
optional("N"));
// XXX: not constraining number before r/R
// XXX: not constraining portion after r/R
const RADIX_NUMBER =
seq(repeat1(DIGIT),
/[rR]/,
repeat1(ALPHANUMERIC));
// XXX: not accounting for division by zero
const RATIO =
seq(repeat1(DIGIT),
"/",
repeat1(DIGIT));
const DOUBLE =
seq(repeat1(DIGIT),
optional(seq(".",
repeat(DIGIT))),
optional(seq(/[eE]/,
optional(/[+-]/),
repeat1(DIGIT))),
optional("M"));
const INTEGER =
seq(repeat1(DIGIT),
optional(/[MN]/));
const NUMBER =
token(prec(10, seq(optional(/[+-]/),
choice(HEX_NUMBER,
OCTAL_NUMBER,
RADIX_NUMBER,
RATIO,
DOUBLE,
INTEGER))));
const NIL =
token('nil');
const BOOLEAN =
token(choice('false',
'true'));
const KEYWORD_HEAD =
/[^\f\n\r\t ()\[\]{}"@~^;`\\,:/\u000B\u001C\u001D\u001E\u001F\u2028\u2029\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2008\u2009\u200a\u205f\u3000]/;
const KEYWORD_BODY =
choice(/[:']/, KEYWORD_HEAD);
const KEYWORD_NAMESPACED_BODY =
token(repeat1(choice(/[:'\/]/, KEYWORD_HEAD)));
const KEYWORD_NO_SIGIL =
token(seq(KEYWORD_HEAD,
repeat(KEYWORD_BODY)));
const KEYWORD_MARK =
token(":");
const AUTO_RESOLVE_MARK =
token("::");
const STRING =
token(seq('"',
repeat(/[^"\\]/),
repeat(seq("\\",
/./,
repeat(/[^"\\]/))),
'"'));
// XXX: better to match \o378 as a single item
const OCTAL_CHAR =
seq("o",
choice(seq(DIGIT, DIGIT, DIGIT),
seq(DIGIT, DIGIT),
seq(DIGIT)));
// choice(seq(/[0-3]/, OCTAL_DIGIT, OCTAL_DIGIT),
// seq(OCTAL_DIGIT, OCTAL_DIGIT),
// seq(OCTAL_DIGIT)));
const NAMED_CHAR =
choice("backspace",
"formfeed",
"newline",
"return",
"space",
"tab");
// XXX: outside of: (c >= '\uD800' && c <= '\uDFFF') - LispReader.java
// but not doing this
const UNICODE =
seq("u",
HEX_DIGIT,
HEX_DIGIT,
HEX_DIGIT,
HEX_DIGIT);
// XXX: not quite sure what this is supposed to be...
// return Character.valueOf(token.charAt(0)); -- LispReader.java
// java char is 16 bits...what can tree-sitter manage?
//
// XXX: null is supposed to be usable but putting \x00 below
// does not seem to work
const ANY_CHAR =
/.|\n/;
const CHARACTER =
token(seq("\\",
choice(OCTAL_CHAR,
NAMED_CHAR,
UNICODE,
ANY_CHAR)));
const SYMBOL_HEAD =
/[^\f\n\r\t \/()\[\]{}"@~^;`\\,:#'0-9\u000B\u001C\u001D\u001E\u001F\u2028\u2029\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2008\u2009\u200a\u205f\u3000]/;
const NS_DELIMITER =
token("/");
const SYMBOL_BODY =
choice(SYMBOL_HEAD,
/[:#'0-9]/);
const SYMBOL_NAMESPACED_NAME =
token(repeat1(choice(SYMBOL_HEAD,
/[\/:#'0-9]/)));
// XXX: no attempt is made to enforce certain complex things, e.g.
//
// Symbols beginning or ending with ':' are reserved by Clojure.
// A symbol can contain one or more non-repeating ':'s
const SYMBOL =
token(seq(SYMBOL_HEAD,
repeat(SYMBOL_BODY)));
module.exports = grammar({
name: 'clojure',
extras: $ =>
[],
conflicts: $ =>
[],
inline: $ =>
[$._kwd_leading_slash,
$._kwd_just_slash,
$._kwd_qualified,
$._kwd_unqualified,
$._kwd_marker,
$._sym_qualified,
$._sym_unqualified],
rules: {
// THIS MUST BE FIRST -- even though this doesn't look like it matters
source: $ =>
repeat(choice($._form,
$._gap)),
_gap: $ =>
choice($._ws,
$.comment,
$.dis_expr),
_ws: $ =>
WHITESPACE,
comment: $ =>
COMMENT,
dis_expr: $ =>
seq(field('marker', "#_"),
repeat($._gap),
field('value', $._form)),
_form: $ =>
choice($.num_lit, // atom-ish
$.kwd_lit,
$.str_lit,
$.char_lit,
$.nil_lit,
$.bool_lit,
$.sym_lit,
// basic collection-ish
$.list_lit,
$.map_lit,
$.vec_lit,
// dispatch reader macros
$.set_lit,
$.anon_fn_lit,
$.regex_lit,
$.read_cond_lit,
$.splicing_read_cond_lit,
$.ns_map_lit,
$.var_quoting_lit,
$.sym_val_lit,
$.evaling_lit,
$.tagged_or_ctor_lit,
// some other reader macros
$.derefing_lit,
$.quoting_lit,
$.syn_quoting_lit,
$.unquote_splicing_lit,
$.unquoting_lit),
num_lit: $ =>
NUMBER,
kwd_lit: $ =>
choice($._kwd_leading_slash,
$._kwd_just_slash,
$._kwd_qualified,
$._kwd_unqualified),
// (namespace :/usr/bin/env) ; => ""
// (name :/usr/bin/env) ; => "usr/bin/env"
_kwd_leading_slash: $ =>
seq(field('marker', $._kwd_marker),
field('delimiter', NS_DELIMITER),
field('name', alias(KEYWORD_NAMESPACED_BODY, $.kwd_name))),
// (namespace :/) ;=> nil
// (name :/) ;=> "/"
_kwd_just_slash: $ =>
seq(field('marker', $._kwd_marker),
field('name', alias(NS_DELIMITER, $.kwd_name))),
_kwd_qualified: $ =>
prec(2, seq(field('marker', $._kwd_marker),
field('namespace', alias(KEYWORD_NO_SIGIL, $.kwd_ns)),
field('delimiter', NS_DELIMITER),
field('name', alias(KEYWORD_NAMESPACED_BODY, $.kwd_name)))),
_kwd_unqualified: $ =>
prec(1, seq(field('marker', $._kwd_marker),
field('name', alias(KEYWORD_NO_SIGIL, $.kwd_name)))),
_kwd_marker: $ =>
choice(KEYWORD_MARK, AUTO_RESOLVE_MARK),
str_lit: $ =>
STRING,
char_lit: $ =>
CHARACTER,
nil_lit: $ =>
NIL,
bool_lit: $ =>
BOOLEAN,
sym_lit: $ =>
seq(repeat($._metadata_lit),
choice($._sym_qualified, $._sym_unqualified)),
_sym_qualified: $ =>
prec(1, seq(field("namespace", alias(SYMBOL, $.sym_ns)),
field("delimiter", NS_DELIMITER),
field("name", alias(SYMBOL_NAMESPACED_NAME, $.sym_name)))),
_sym_unqualified: $ =>
field('name', alias(choice(NS_DELIMITER, // division symbol
SYMBOL),
$.sym_name)),
_metadata_lit: $ =>
seq(choice(field("meta", $.meta_lit),
field("old_meta", $.old_meta_lit)),
optional(repeat($._gap))),
meta_lit: $ =>
seq(field('marker', "^"),
repeat($._gap),
field('value', choice($.read_cond_lit,
$.map_lit,
$.str_lit,
$.kwd_lit,
$.sym_lit))),
old_meta_lit: $ =>
seq(field('marker', "#^"),
repeat($._gap),
field('value', choice($.read_cond_lit,
$.map_lit,
$.str_lit,
$.kwd_lit,
$.sym_lit))),
list_lit: $ =>
seq(repeat($._metadata_lit),
$._bare_list_lit),
_bare_list_lit: $ =>
seq(field('open', "("),
repeat(choice(field('value', $._form),
$._gap)),
field('close', ")")),
map_lit: $ =>
seq(repeat($._metadata_lit),
$._bare_map_lit),
_bare_map_lit: $ =>
seq(field('open', "{"),
repeat(choice(field('value', $._form),
$._gap)),
field('close', "}")),
vec_lit: $ =>
seq(repeat($._metadata_lit),
$._bare_vec_lit),
_bare_vec_lit: $ =>
seq(field('open', "["),
repeat(choice(field('value', $._form),
$._gap)),
field('close', "]")),
set_lit: $ =>
seq(repeat($._metadata_lit),
$._bare_set_lit),
_bare_set_lit: $ =>
seq(field('marker', "#"),
field('open', "{"),
repeat(choice(field('value', $._form),
$._gap)),
field('close', "}")),
anon_fn_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', "#"),
$._bare_list_lit),
regex_lit: $ =>
seq(field('marker', "#"),
STRING),
read_cond_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', "#?"),
// whitespace possible, but neither comment nor discard
repeat($._ws),
$._bare_list_lit),
splicing_read_cond_lit: $ =>
// XXX: metadata here doesn't seem to make sense, but the repl
// will accept: [^:x #?@(:clj [[:a]] :cljr [[:b]])]
seq(repeat($._metadata_lit),
field('marker', "#?@"),
// whitespace possible, but neither comment nor discard
repeat($._ws),
$._bare_list_lit),
auto_res_mark: $ =>
AUTO_RESOLVE_MARK,
ns_map_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', "#"),
field('prefix', choice($.auto_res_mark,
$.kwd_lit)),
repeat($._gap),
$._bare_map_lit),
var_quoting_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', "#'"),
repeat($._gap),
// XXX: symbol, reader conditional, and tagged literal can work
// any other things?
field('value', $._form)),
sym_val_lit: $ =>
seq(field('marker', "##"),
repeat($._gap),
field('value', $.sym_lit)),
evaling_lit: $ =>
seq(repeat($._metadata_lit), // ^:x #=(vector 1)
field('marker', "#="),
repeat($._gap),
field('value', choice($.list_lit,
$.read_cond_lit,
// #= ^:a java.lang.String
$.sym_lit))),
// #uuid "00000000-0000-0000-0000-000000000000"
// #user.Fun[1 2]
// #user.Fun{:a 1 :b 2}
tagged_or_ctor_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', "#"),
// # uuid "00000000-0000-0000-0000-000000000000"
// # #_ 1 uuid "00000000-0000-0000-0000-000000000000"
// etc.
repeat($._gap),
// # ^:a uuid "00000000-0000-0000-0000-000000000000"
field('tag', $.sym_lit),
repeat($._gap),
field('value', $._form)),
derefing_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', "@"),
repeat($._gap),
field('value', $._form)),
quoting_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', "'"),
repeat($._gap),
field('value', $._form)),
syn_quoting_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', "`"),
repeat($._gap),
field('value', $._form)),
unquote_splicing_lit: $ =>
// XXX: metadata here doesn't seem to make sense, but the repl
// will accept: `(^:x ~@[:a :b :c])
seq(repeat($._metadata_lit),
field('marker', "~@"),
repeat($._gap),
field('value', $._form)),
unquoting_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', "~"),
repeat($._gap),
field('value', $._form)),
}
});
// Local Variables:
// mode: js-mode
// js-indent-align-list-continuation: t
// js-indent-level: 2
// End:

@ -0,0 +1,278 @@
// NOTES
//
// - possibilities (may be as separate grammars?)
// - no fields (but likely that means metadata lives "outside")
// - retain whitespace and comments (for round-tripping)
// - clojure clr's pipe-escaping:
// https://github.com/clojure/clojure-clr/wiki/Specifying-types
//
// - traveral issues
// - use of fields (e.g. value, prefix, tag, metadata)
// - allows skipping certain nodes such as:
// - metadata
// - comment
// - discard-related
// - allows targeted navigation without having to know the
// node type (e.g. field value vs node type map)
// - limitations
// - a bit slower?
// - cannot use fields for things without names, e.g.
// - seq(...) cannot be the 2nd arg to field()
// - $._foo won't work unless it "resolves" to $.bar (non underscore)
// - for a given node, examine child nodes in reverse, that is,
// starting at the end and working backwards
//
// - probably won't do
// - support def, if, and other "primitives"
// - support for {{}} template constructs
//
// - testing
// - clj, cljc, cljs
// - what about edn?
// - approaches
// - "port" hand-written tests
// - oakmac (done)
// - Tavistock (done)
// - tonsky
// - generative testing for token testing (done via hypothesis and py-tree-sitter)
// - look for parsing errors across large sample (e.g. clojars) (done)
// - how to "package" testing facilities
// - currently each approach has its own project directory
//
// - debugging
// - npx tree-sitter parse filepath + look for ERROR in console output
// - npx tree-sitter parse --debug-graph filepath + view log.html
// - npx tree-sitter parse --debug filepath + view console output
//
// - loosening ideas:
// - allow ##Other (not just ##Inf, -##Inf, ##NaN)
// - allow # in keywords
// - allow ::/
// - don't handle "no repeating colons" in symbols and in non-leading
// portions of keywords (currently unimplemented anyway)
//
// - can strings have unicode escapes in them?
//
// - tree-sitter
// - parse subcommand
// - parse from stdin
// - recursively traverse multiple directories (globbing exists)
// - parsing within zips/jars
// - more flexible file type specification
// - custom parsing / processing per "file"
// - web-ui subcommand
// - didn't work when grammar used externals
// - file browsing + loading better than copy-paste
// - indiciate error via color
// - jump to error
// - somehow searching for error doesn't seem to work sometimes
// - ~/.tree-sitter
// - bin
// - contains shared libraries for each grammar
// - parse command seems to install stuff here
// - config.json
// - parser-directories used to customize "scan" for grammars
// - theme used for highlight subcommand
// symbolPat from LispReader.java (for keywords and symbols?)
// "[:]?([\\D&&[^/]].*/)?(/|[\\D&&[^/]][^/]*)"
//
// https://clojure.org/reference/reader#_symbols
// 1. Symbols begin with a non-numeric char -- XXX: see 2 for limits?
// 2. Can contain alphanumeric chars and *, +, !, -, _, ', ?, <, > and =
// 3. / can be used once in the middle of a symbol (sep the ns from the name)
// 4. / by itself names the division function
// 5. . special meaning can be used >= 1 times in the middle of a symbol
// to designate a fully-qualified class name, e.g. java.util.BitSet,
// or in namespace names.
// 6. Symbols beginning or ending with . are reserved by Clojure
// 7. Symbols beginning or ending with : are reserved by Clojure
// 8. A symbol can contain one or more non-repeating ':'s
//
// missing
// 9. $, &, % -- in body and end of symbol
//
// undocumented
// -1a can be made a symbol, but reader will reject? repl rejects
// => number parsing takes priority?
// 'a can be made a symbol, but reader will reject? repl -> quote
//
// implied?
// doesn't start with ,
// doesn't start with '
// doesn't start with #
// doesn't start with `
// doesn't start with @
// doesn't start with ^
// doesn't start with \
// doesn't start with ;
// doesn't start with ~
// doesn't start with "
// doesn't start with ( )
// doesn't start with { }
// doesn't start with [ ]
//
// extra:
//
// is my-ns// valid?
//
// "Consistency of symbols between different readers/edn"
//
// foo// should be valid.
//
// 2014-09-16 clojure-dev google group alex miller
//
// https://groups.google.com/d/msg/clojure-dev/b09WvRR90Zc/c3zzMFqDsRYJ
//
// "CLJ-1238 Allow EdnReader to read foo// (matches LispReader behavior)"
//
// changelog for clojure 1.6
//
// is # allowed as a constituent character in keywords?
//
// following points are reasoning based on edn docs
//
// "Bug in reader or repl? reading keyword :#abc"
//
// Symbols begin with a non-numeric character and can contain
// alphanumeric characters and . * + ! - _ ? $ % & =. If -, + or
// . are the first character, the second character must be
// non-numeric. Additionally, : # are allowed as constituent
// characters in symbols other than as the first character.
//
// 2013-05-02 clojure google group colin jones (fwd by dave sann)
//
// https://groups.google.com/d/msg/clojure/lK7juHxsPCc/TeYjxoW_3csJ
//
// Keywords are identifiers that typically designate
// themselves. They are semantically akin to enumeration
// values. Keywords follow the rules of symbols, except they can
// (and must) begin with :, e.g. :fred or :my/fred. If the target
// platform does not have a keyword type distinct from a symbol
// type, the same type can be used without conflict, since the
// mandatory leading : of keywords is disallowed for symbols.
//
// https://github.com/edn-format/edn#symbols
//
// https://clojure.org/reference/reader#_literals
// 0. Keywords are like symbols, except:
// 1. They can and must begin with a colon, e.g. :fred.
// ~~2. They cannot contain '.' in the name part, or name classes.~~
// 3. They can contain a namespace, :person/name, which may contain '.'s.
// 4. A keyword that begins with two colons is auto-resolved in the current
// namespace to a qualified keyword:
// - If the keyword is unqualified, the namespace will be the current
// namespace. In user, ::rect is read as :user/rect.
// - If the keyword is qualified, the namespace will be resolved using
// aliases in the current namespace. In a namespace where x is aliased
// to example, ::x/foo resolves to :example/foo.
//
// extra:
//
// :/ is a legal keyword(?):
//
// alexmiller: @gfredericks :/ is "open for the language to start
// interpreting" and not an invalid keyword so should be ok to generate.
// and cljs should fix it's weirdness. (#clojure-dev 2019-06-07)
//
// https://clojurians-log.clojureverse.org/clojure-dev/2019-06-07
//
// It is undefined/left for future expansion.
//
// Clojurescript's reading seems weird but given that this is undefined
// it's hard to say it's wrong. :)
//
// 2020-07-10 (or so) alexmiller
//
// https://ask.clojure.org/index.php/9427/clarify-the-position-of-as-a-keyword
// https://clojure.atlassian.net/browse/TCHECK-155
//
// . CAN be in the name part:
//
// "[Bug?] Keyword constraints not enforced"
//
// I think you've both misread "they cannot name classes" to be - "They
// cannot contain class names".
//
// The symbol String can name a class but the keyword :String can't,
// that's all I meant there.
//
// As far as '.', that restriction has been relaxed. I'll try to touch
// up the docs for the next release.
//
// 2008-11-25 clojure google group rich hickey
//
// https://groups.google.com/d/msg/clojure/CCuIp_bZ-ZM/THea7NF91Z4J
//
// Whether keywords can start with numbers:
//
// "puzzled by RuntimeException"
//
// we currently allow keywords starting with numbers and seem to have
// decided this is ok. I would like to get Rich to approve a change to
// the page and do so.
//
// 2014-04-25 clojure google group alex miller
//
// https://groups.google.com/forum/#!msg/clojure/XP1XAaDdKLY/kodfZTk8eeoJ
//
// From a discussion in #clojure, it emerged that while :foo/1 is
// currently not allowed, ::1 is.
//
// 2014-12-10 nicola mometto
//
// https://clojure.atlassian.net/browse/CLJ-1286
//
// "Clarify and align valid symbol and keyword rules for Clojure (and edn)"
//
// https://clojure.atlassian.net/browse/CLJ-1527
//
// consistency of symbols between different readers/edn
//
// https://groups.google.com/forum/#!topic/clojure-dev/b09WvRR90Zc
//
// :1 is accepted because it once accidentally worked and they
// don't like breaking existing code
//
// it was never meant to
//
// 2020-06-14 ish noisesmith on #clojure (slack)
//
// There are libraries out there that assume :1 works. They changed
// Clojure at one point in an alpha to disallow such keywords and it broke
// code so they decided to continue allowing them (even tho' they are
// not "legal").
//
// 2020-06-14 ish seancorfield on #clojure (slack)
//
// Whether # is allowed in a keyword:
//
// "Clarification on # as valid symbol character"
//
// this works now, but is not guaranteed to always be valid
//
// 2016-11-08 clojure google group alex miller
//
// https://groups.google.com/forum/#!topic/clojure/CwZHu1Eszbk
// https://clojure.org/reference/reader#_literals
// 1. Integers can be indefinitely long and will be read as Longs when
// in range and clojure.lang.BigInts otherwise.
// 2. Integers with an N suffix are always read as BigInts.
// 3. When possible, they can be specified in any base with radix from
// 2 to 36 (see Long.parseLong()); for example 2r101010, 8r52, 36r16,
// and 42 are all the same Long.
// 4. Floating point numbers are read as Doubles; with M suffix they are
// read as BigDecimals.
// 5. Ratios are supported, e.g. 22/7.
// intPat
// "([-+]?)(?:(0)|([1-9][0-9]*)|0[xX]([0-9A-Fa-f]+)|0([0-7]+)|([1-9][0-9]?)[rR]([0-9A-Za-z]+)|0[0-9]+)(N)?"
// 0[0-9]+ is for better errors -- thanks seancorfield and andyfingerhut
// ratioPat
// "([-+]?[0-9]+)/([0-9]+)"
// floatPat
// "([-+]?[0-9]+(\\.[0-9]*)?([eE][-+]?[0-9]+)?)(M)?"

@ -0,0 +1,19 @@
{
"name": "tree-sitter-clojure",
"version": "0.0.11",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"nan": {
"version": "2.14.2",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz",
"integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ=="
},
"tree-sitter-cli": {
"version": "0.19.3",
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.19.3.tgz",
"integrity": "sha512-UlntGxLrlkQCKVrhm7guzfi+ovM4wDLVCCu3z5jmfDgFNoUoKa/23ddaQON5afD5jB9a02xv4N5MXJfCx+/mpw==",
"dev": true
}
}
}

@ -0,0 +1,29 @@
{
"name": "tree-sitter-clojure",
"version": "0.0.11",
"description": "Clojure grammar for tree-sitter",
"main": "bindings/node",
"scripts": {
"build": "npx tree-sitter generate && npx node-gyp build",
"test": "npx tree-sitter test"
},
"author": "",
"license": "",
"dependencies": {
"nan": "2.14.2"
},
"devDependencies": {
"tree-sitter-cli": "0.19.3"
},
"tree-sitter": [
{
"scope": "source.clojure",
"file-types": [
"bb",
"clj",
"cljc",
"cljs"
]
}
]
}

@ -0,0 +1,29 @@
;; Literals
(num_lit) @number
[
(char_lit)
(str_lit)
] @string
[
(bool_lit)
(nil_lit)
] @constant.builtin
(kwd_lit) @constant
;; Comments
(comment) @comment
;; Treat quasiquotation as operators for the purpose of highlighting.
[
"'"
"`"
"~"
"@"
"~@"
] @operator

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,223 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char **symbol_names;
const char **field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_