Updated the lisp parser

pull/856/head
Wilfred Hughes 2025-07-02 23:39:05 +07:00
parent eb7edaba84
commit 87521ed3fc
29 changed files with 16 additions and 230835 deletions

@ -2,7 +2,7 @@
### Parsing
Updated Clojure and Zig parsers.
Updated Clojure, Common Lisp and Zig parsers.
File detection is now stricter with Windows-1252 (Latin 1) encoded
text. Windows-1252 was added in 0.63 and some binary files

11
Cargo.lock generated

@ -275,6 +275,7 @@ dependencies = [
"tree-sitter-bash",
"tree-sitter-c",
"tree-sitter-c-sharp",
"tree-sitter-commonlisp",
"tree-sitter-cpp",
"tree-sitter-css",
"tree-sitter-elixir",
@ -1028,6 +1029,16 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-commonlisp"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36c662ae95eee8c5ada519d4fe87261c5fbcc3ffaac4b10c03d460154cd6a9b2"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-cpp"
version = "0.23.4"

@ -106,6 +106,7 @@ tree-sitter-verilog = "1.0.3"
tree-sitter-xml = "0.7.0"
tree-sitter-yaml = "0.7.0"
tree-sitter-zig = "1.1.2"
tree-sitter-commonlisp = "0.4.1"
[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = "0.6"

@ -82,11 +82,6 @@ fn main() {
src_dir: "vendored_parsers/tree-sitter-cmake-src",
extra_files: vec!["scanner.c"],
},
TreeSitterParser {
name: "tree-sitter-commonlisp",
src_dir: "vendored_parsers/tree-sitter-commonlisp-src",
extra_files: vec![],
},
TreeSitterParser {
name: "tree-sitter-dart",
src_dir: "vendored_parsers/tree-sitter-dart-src",

@ -73,7 +73,6 @@ extern "C" {
fn tree_sitter_apex() -> ts::Language;
fn tree_sitter_clojure() -> ts::Language;
fn tree_sitter_cmake() -> ts::Language;
fn tree_sitter_commonlisp() -> ts::Language;
fn tree_sitter_dart() -> ts::Language;
fn tree_sitter_devicetree() -> ts::Language;
fn tree_sitter_elisp() -> ts::Language;
@ -226,7 +225,9 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
}
}
CommonLisp => {
let language = unsafe { tree_sitter_commonlisp() };
let language_fn = tree_sitter_commonlisp::LANGUAGE_COMMONLISP;
let language = tree_sitter::Language::new(language_fn);
TreeSitterConfig {
language: language.clone(),
atom_nodes: ["str_lit", "char_lit"].into_iter().collect(),

@ -1 +0,0 @@
tree-sitter-commonlisp/src

@ -1,34 +0,0 @@
name: CI
on:
push:
branches: [master]
pull_request:
branches: [master]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-node@v1
with:
node-version: '16'
- name: Install dependencies
run: npm ci
- name: Run tests
run: npm test
- name: Parse Petalisp
run: |
git submodule init
git submodule update
if (( $(node_modules/tree-sitter-cli/tree-sitter parse test/Petalisp/**/*.lisp -q | wc -l) > 2 )); then # There are 2 known failures (strings that are not format strings but use ~X syntax)
exit 1
else
echo "Successfully parsed Petalisp."
fi
if (( $(node_modules/tree-sitter-cli/tree-sitter parse test/sly/**/*.lisp -q | wc -l) > 4 )); then # There are 2 known failures (strings that are not format strings but use ~X syntax)
exit 1
else
echo "Successfully parsed Sly"
fi
- name: Run tests
run: npm test

@ -1,6 +0,0 @@
[submodule "test/Petalisp"]
path = test/Petalisp
url = https://github.com/marcoheisig/Petalisp.git
[submodule "test/sly"]
path = test/sly
url = https://github.com/joaotavora/sly.git

@ -1,4 +0,0 @@
test
node_modules
build
*.lisp

@ -1,59 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
dependencies = [
"memchr",
]
[[package]]
name = "cc"
version = "1.0.67"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
[[package]]
name = "memchr"
version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
[[package]]
name = "regex"
version = "1.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"
[[package]]
name = "tree-sitter"
version = "0.19.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad726ec26496bf4c083fff0f43d4eb3a2ad1bba305323af5ff91383c0b6ecac0"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tree-sitter-commonlisp"
version = "0.0.1"
dependencies = [
"cc",
"tree-sitter",
]

@ -1,27 +0,0 @@
[package]
name = "tree-sitter-commonlisp"
description = "Common Lisp grammar for the tree-sitter parsing library"
version = "0.3.0"
keywords = ["incremental", "parsing", "lisp"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/theHamsta/tree-sitter-commonlisp"
edition = "2018"
authors = ["Stephan Seitz"]
license = "MIT"
build = "bindings/rust/build.rs"
include = [
"bindings/rust/*",
"grammar.js",
"queries/*",
"src/*",
]
[lib]
path = "bindings/rust/lib.rs"
[dependencies]
tree-sitter = "0.20.1"
[build-dependencies]
cc = "1.0"

@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2021 Stephan Seitz
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -1,21 +0,0 @@
# tree-sitter-commonlisp
WIP. Goal is to have a better syntax highlighting for Neovim
and some semantic refactoring https://github.com/nvim-treesitter/nvim-treesitter-refactor/.
All praise goes to https://github.com/sogaiu/tree-sitter-clojure which is extended by this grammar.
TODOs:
- support number literatls that are different from clojure (e.g. `.9`)
Macros with special respresentation in syntax tree (when written with lowercase letters):
- defun and friends (e.g. defmethod)
- loop macro
This grammar is used in https://github.com/Wilfred/difftastic to generate syntax-ware diffs for Common Lisp.
## Additional Highlighting Support
Install https://github.com/theHamsta/nvim-treesitter-commonlisp to get highlighting of all functions and macros in the `cl` namespace.

@ -1,18 +0,0 @@
{
"targets": [
{
"target_name": "tree_sitter_commonlisp_binding",
"include_dirs": [
"<!(node -e \"require('nan')\")",
"src"
],
"sources": [
"src/parser.c",
"bindings/node/binding.cc"
],
"cflags_c": [
"-std=c99",
]
}
]
}

@ -1,28 +0,0 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
using namespace v8;
extern "C" TSLanguage * tree_sitter_jinja2();
namespace {
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_jinja2());
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("jinja2").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
}
NODE_MODULE(tree_sitter_jinja2_binding, Init)
} // namespace

@ -1,19 +0,0 @@
try {
module.exports = require("../../build/Release/tree_sitter_jinja2_binding");
} catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require("../../build/Debug/tree_sitter_jinja2_binding");
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1
}
}
try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");
} catch (_) {}

@ -1,40 +0,0 @@
fn main() {
let src_dir = std::path::Path::new("src");
let mut c_config = cc::Build::new();
c_config.include(&src_dir);
c_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
// If your language uses an external scanner written in C,
// then include this block of code:
/*
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
*/
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
c_config.compile("parser");
// If your language uses an external scanner written in C++,
// then include this block of code:
/*
let mut cpp_config = cc::Build::new();
cpp_config.cpp(true);
cpp_config.include(&src_dir);
cpp_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable");
let scanner_path = src_dir.join("scanner.cc");
cpp_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
cpp_config.compile("scanner");
*/
}

@ -1,52 +0,0 @@
//! This crate provides jinja2 language support for the [tree-sitter][] parsing library.
//!
//! Typically, you will use the [language][language func] function to add this language to a
//! tree-sitter [Parser][], and then use the parser to parse some code:
//!
//! ```
//! let code = "";
//! let mut parser = tree_sitter::Parser::new();
//! parser.set_language(tree_sitter_commonlisp::language()).expect("Error loading jinja2 grammar");
//! let tree = parser.parse(code, None).unwrap();
//! ```
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
//! [language func]: fn.language.html
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
//! [tree-sitter]: https://tree-sitter.github.io/
use tree_sitter::Language;
extern "C" {
fn tree_sitter_commonlisp() -> Language;
}
/// Get the tree-sitter [Language][] for this grammar.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
pub fn language() -> Language {
unsafe { tree_sitter_commonlisp() }
}
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
// Uncomment these to include any queries that this grammar contains
// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
#[cfg(test)]
mod tests {
#[test]
fn test_can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(super::language())
.expect("Error loading commonlisp language");
}
}

@ -1,398 +0,0 @@
/*
* grammar.js
* Copyright (C) 2021 Stephan Seitz <stephan.seitz@fau.de>
* Adapted from tree-sitter-clojure
*
* Distributed under terms of the MIT license.
*/
const clojure = require("tree-sitter-clojure/grammar");
const WHITESPACE_CHAR =
/[\f\n\r\t \u000B\u001C\u001D\u001E\u001F\u2028\u2029\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2008\u2009\u200a\u205f\u3000]/;
const WHITESPACE =
token(repeat1(WHITESPACE_CHAR));
const PREC = {
NUM_LIT: 0,
NORMAL: 1,
PACKAGE_LIT: 2,
DOTTET_LIT: 3,
KWD_LIT: 4,
SPECIAL: 5,
META_LIT: 6,
}
const SYMBOL_HEAD =
/[^:\f\n\r\t ()\[\]{}"^;`\\,#'\u000B\u001C\u001D\u001E\u001F\u2028\u2029\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2008\u2009\u200a\u205f\u3000]/;
const SYMBOL_WITHOUT_SLASH =
/[^:\f\n\r\t ()\[\]{}"^;/`\\,#'\u000B\u001C\u001D\u001E\u001F\u2028\u2029\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2008\u2009\u200a\u205f\u3000]/;
const SYMBOL_BODY =
choice(SYMBOL_HEAD,
/[#']/);
const SYMBOL =
token(seq(SYMBOL_HEAD,
repeat(SYMBOL_BODY)));
const STRING =
token(seq('"',
repeat(/[^"\\]/),
repeat(seq("\\",
/./,
repeat(/[^"\\]/))),
'"'));
const DIGIT =
/[0-9]/;
const ALPHANUMERIC =
/[0-9a-zA-Z]/;
const HEX_DIGIT =
/[0-9a-fA-F]/;
const OCTAL_DIGIT =
/[0-7]/;
const BINARY_DIGIT =
/[0-1]/;
const HEX_NUMBER =
seq(choice('#x', '#X'), optional(/[+-]/),
repeat1(HEX_DIGIT));
const OCTAL_NUMBER =
seq(choice('#o', '#O'), optional(/[+-]/),
repeat1(OCTAL_DIGIT));
const BINARY_NUMBER =
seq(choice('#b', '#B'), optional(/[+-]/),
repeat1(BINARY_DIGIT));
const RADIX_NUMBER =
seq('#',
repeat1(DIGIT),
/[rR]/,
repeat1(ALPHANUMERIC));
// XXX: not accounting for division by zero
const RATIO =
seq(repeat1(DIGIT),
"/",
repeat1(DIGIT));
const DOUBLE =
seq(repeat1(DIGIT),
optional(seq(".",
repeat(DIGIT))),
optional(seq(/[eEsSfFdDlL]/,
optional(/[+-]/),
repeat1(DIGIT))),
);
const INTEGER =
seq(repeat1(DIGIT),
optional(/[MN]/));
const NUMBER =
token(seq(optional(/[+-]/),
choice(
HEX_NUMBER,
OCTAL_NUMBER,
RADIX_NUMBER,
BINARY_NUMBER,
RATIO,
DOUBLE,
INTEGER)));
function clSymbol(symbol) {
return seq(optional(seq('cl', ':')), symbol)
}
function loopSymbol(symbol) {
return seq(optional(seq(optional('cl'), ':')), symbol)
}
function optSeq(...args) {
return optional(seq(...args))
}
module.exports = grammar(clojure, {
name: 'commonlisp',
extras: ($, original) => [...original, $.block_comment],
conflicts: ($,
original) => [...original,
[$.for_clause_word, $.package_lit],
[$.with_clause, $.package_lit],
[$.with_clause],
[$.for_clause],
[$.accumulation_clause],
[$.loop_macro, $.defun_keyword, $.package_lit]],
rules: {
block_comment: _ => token(seq('#|', repeat(choice(/[^|]/, /\|[^#]/)), '|#')),
fancy_literal: _ => token(seq('|', repeat(/[^|]/), '|')),
_ws: _ =>
WHITESPACE,
unquoting_lit: $ =>
seq(field('marker', ","),
repeat($._gap),
field('value', $._form)),
unquote_splicing_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', ",@"),
repeat($._gap),
field('value', $._form)),
syn_quoting_lit: $ =>
seq(field('marker', "`"),
repeat($._gap),
field('value', ($._form))),
defun: $ =>
prec(PREC.SPECIAL, seq(field('open', "("),
optional($._gap),
$.defun_header,
optional($._gap),
repeat(choice(field('value', $._form), $._gap)),
field('close', ")"))),
_format_token: $ => choice(alias(NUMBER, $.num_lit), seq("'", alias(/./, $.char_lit))),
// https://en.wikipedia.org/wiki/Format_Common_Lisp)
format_prefix_parameters: _ => choice('v', 'V', '#'),
format_modifiers: $ => seq(repeat(choice($._format_token, ',')), choice('@', '@:', ':', ':@')),
//format_modifiers: _ => choice('@', '@:', ':', ':@'),
format_directive_type: $ => choice(
seq(optional(field('repetitions', $._format_token)), choice('~', '%', '&', '|')),
/[cC]/,
/\^/,
'\n',
'\r',
/[pP]/,
/[iI]/,
/[wW]/,
/[aA]/,
'_',
/[()]/,
/[{}]/,
/[\[\]]/,
/[<>]/,
';',
seq(field('numberOfArgs', $._format_token), '*'),
seq('/', choice(alias($._package_lit_without_slash, $.package_lit), $._sym_lit_without_slash), '/'),
'?',
"Newline",
seq(repeat(choice($._format_token, ',')), /[$rRbBdDgGxXeEoOsStTfF]/),
),
format_specifier: $ =>
prec.left(seq(
'~',
optional($.format_prefix_parameters),
optional($.format_modifiers),
prec(5, $.format_directive_type),
)),
str_lit: $ =>
seq(
'"',
repeat(choice(
token.immediate(prec(1, /[^\\~"]+/)),
token.immediate(seq(/\\./)),
$.format_specifier,
)),
optional('~'),
'"',
),
for_clause_word: _ => loopSymbol(choice(
'in',
'across',
'being',
'using',
/being (the|each) (hash-key[s]?|hash-value[s]?|present-symbol[s]?) (in|of)/,
'below',
'above',
'from',
'to',
'upto',
'upfrom',
'downto',
'downfrom',
'on',
'by',
'then',
'=')),
_for_part: $ => seq(repeat($._gap), $.for_clause_word, repeat($._gap), $._form),
accumulation_verb: _ => loopSymbol(/(maximize|minimize|(collect|append|nconc|count)(ing)?|sum(ming)?|maximizing|minimizing)/),
for_clause: $ => choice(seq(choice(loopSymbol('for'), loopSymbol('and'), loopSymbol('as')), repeat($._gap), field('variable', $._form), optional(field('type', seq(repeat($._gap), $._form))),
repeat1($._for_part)), loopSymbol('and')),
with_clause: $ => seq(loopSymbol('with'), repeat($._gap), choice($._form, seq($._form, repeat($._gap), field('type', $._form))), repeat($._gap), optSeq(loopSymbol("="), repeat($._gap)), optSeq($._form, repeat($._gap))),
do_clause: $ => prec.left(seq(loopSymbol('do'), repeat1(prec.left(seq(repeat($._gap), $._form, repeat($._gap)))))),
while_clause: $ => prec.left(seq(choice(loopSymbol('while'), loopSymbol('until')), repeat($._gap), $._form)),
repeat_clause: $ => prec.left(seq(loopSymbol('repeat'), repeat($._gap), $._form)),
condition_clause: $ => prec.left(choice(seq(choice(loopSymbol('when'), loopSymbol('if'), loopSymbol('unless'), loopSymbol('always'), loopSymbol('thereis'), loopSymbol('never')), repeat($._gap), $._form), loopSymbol("else"))),
accumulation_clause: $ => seq($.accumulation_verb, repeat($._gap), $._form, optional(seq(repeat($._gap), loopSymbol('into'), repeat($._gap), $._form))),
termination_clause: $ => prec.left(seq(choice(loopSymbol('finally'), loopSymbol('return'), loopSymbol('initially')), repeat($._gap), $._form)),
loop_clause: $ =>
seq(choice(
$.for_clause,
$.do_clause,
$.list_lit,
$.while_clause,
$.repeat_clause,
$.accumulation_clause,
$.condition_clause,
$.with_clause,
$.termination_clause,
$.while_clause,
)),
loop_macro: $ =>
prec(PREC.SPECIAL,
seq(field('open', "("),
optional($._gap),
clSymbol('loop'),
repeat(choice($.loop_clause, $._gap)),
field('close', ")"))),
defun_keyword: _ => prec(10, clSymbol(choice('defun', 'defmacro', 'defgeneric', 'defmethod'))),
defun_header: $ =>
prec(PREC.SPECIAL, choice(
seq(field('keyword', $.defun_keyword),
repeat($._gap),
choice($.unquoting_lit, $.unquote_splicing_lit)
),
seq(field('keyword', $.defun_keyword),
repeat($._gap),
field('function_name', $._form),
optional(field('specifier', seq(repeat($._gap), choice($.kwd_lit, $.sym_lit)))),
repeat($._gap),
field('lambda_list', choice($.list_lit, $.unquoting_lit))),
seq(field('keyword', alias('lambda', $.defun_keyword)),
repeat($._gap),
field('lambda_list', choice($.list_lit, $.unquoting_lit)))
)),
array_dimension: _ => prec(100, /\d+[aA]/),
char_lit: _ =>
seq('#', /\\([^\f\n\r\t ()]+|[()])/),
vec_lit: $ =>
prec(PREC.SPECIAL,
choice(
seq(field('open', choice('#0A', '#0a')), choice($.num_lit, $.complex_num_lit)),
seq(field('open', '#'), optional($.array_dimension), $.list_lit))),
path_lit: $ =>
prec(PREC.SPECIAL,
seq(field('open', choice('#P', '#p')), alias(STRING, $.str_lit))),
_bare_list_lit: $ =>
choice(prec(PREC.SPECIAL, $.defun),
prec(PREC.SPECIAL, $.loop_macro),
seq(field('open', "("),
repeat(choice(field('value', $._form), $._gap)),
field('close', ")"))),
package_lit: $ => prec(PREC.PACKAGE_LIT, choice(seq(
field('package', choice($.sym_lit, 'cl')), // Make optional, instead of keywords?
choice(':', '::'),
field('symbol', $.sym_lit)
), prec(1, 'cl'))),
_package_lit_without_slash: $ => seq(
field('package', choice($._sym_lit_without_slash, 'cl')), // Make optional, instead of keywords?
choice(':', '::'),
field('symbol', $._sym_lit_without_slash)
),
kwd_lit: $ => prec(PREC.KWD_LIT, seq(
choice(':', '::'),
$.kwd_symbol,
)),
sym_lit: _ =>
seq(SYMBOL),
_sym_lit_without_slash: $ =>
alias(repeat1(SYMBOL_WITHOUT_SLASH), $.sym_lit),
kwd_symbol: _ =>
seq(SYMBOL),
self_referential_reader_macro: _ => /#\d+[=#]/,
_form: $ =>
seq(optional('#'),
choice(
$.num_lit,
$.fancy_literal,
$.vec_lit,
$.kwd_lit,
// No idea why this is necessary...It is never used but triggers some background magic
alias(seq(field('open', '#'), optional(/\d+[aA]/), $.list_lit), $.vec_lit),
$.str_lit,
$.self_referential_reader_macro,
$.char_lit,
$.nil_lit,
$.path_lit,
$.sym_lit,
$.package_lit,
$.list_lit,
$.set_lit,
$.read_cond_lit,
$.splicing_read_cond_lit,
$.var_quoting_lit,
$.quoting_lit,
$.syn_quoting_lit,
$.unquote_splicing_lit,
$.unquoting_lit,
$.include_reader_macro,
$.complex_num_lit,
".",
)),
num_lit: _ =>
seq(NUMBER, optional(/[sSfFdDlL]/)),
include_reader_macro: $ =>
seq(repeat($._metadata_lit),
field('marker', choice("#+", "#-")),
repeat($._gap),
field('condition', $._form),
repeat($._gap),
field('target', $._form)),
complex_num_lit: $ =>
seq(repeat($._metadata_lit),
field('marker', choice("#C", "#c")),
repeat($._gap),
'(',
repeat($._gap),
field('real', $.num_lit), // only numbers allowed here
repeat($._gap),
field('imaginary', $.num_lit),
repeat($._gap),
')'
),
}
});

@ -1,88 +0,0 @@
{
"name": "tree-sitter-commonlisp",
"version": "0.3.0",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "tree-sitter-commonlisp",
"version": "0.3.0",
"license": "MIT",
"dependencies": {
"nan": "^2.15.0",
"tree-sitter-clojure": "git@github.com:theHamsta/tree-sitter-clojure-1.git#95c7959c461406381b42113dcf4591008c663d21"
},
"devDependencies": {
"tree-sitter-cli": "^0.20.4"
}
},
"node_modules/nan": {
"version": "2.15.0",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.15.0.tgz",
"integrity": "sha512-8ZtvEnA2c5aYCZYd1cvgdnU6cqwixRoYg70xPLWUws5ORTa/lnw+u4amixRS/Ac5U5mQVgp9pnlSUnbNWFaWZQ=="
},
"node_modules/tree-sitter-cli": {
"version": "0.20.4",
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.20.4.tgz",
"integrity": "sha512-G42x0Ev7mxA8WLUfZY+two5LIhPf6R/m7qDZtKxOzE77zXi6didNI/vf17kHaKaRXJrWnyCxHFaVQFO2LL81yg==",
"dev": true,
"hasInstallScript": true,
"bin": {
"tree-sitter": "cli.js"
}
},
"node_modules/tree-sitter-clojure": {
"version": "0.0.8",
"resolved": "git+ssh://git@github.com/theHamsta/tree-sitter-clojure-1.git#95c7959c461406381b42113dcf4591008c663d21",
"integrity": "sha512-h8iD1D2l7Qfrktam6y2KAU5K2SRI4lGHL2NCbJG9bmqiY8yTylkK6iJCwkoGLYapjVgxWUDAgaR9tRW5k3q1+g==",
"dependencies": {
"nan": "2.14.2",
"web-tree-sitter": "0.19.1"
}
},
"node_modules/tree-sitter-clojure/node_modules/nan": {
"version": "2.14.2",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz",
"integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ=="
},
"node_modules/web-tree-sitter": {
"version": "0.19.1",
"resolved": "https://registry.npmjs.org/web-tree-sitter/-/web-tree-sitter-0.19.1.tgz",
"integrity": "sha512-Wlveh+zdegmNdK733B18pf+NmliKs2t5+Aid8IOFIBV2MqJmnYVo3AdukbdZJ+iIxzBnIreYFKfcFFCWVEf4AA=="
}
},
"dependencies": {
"nan": {
"version": "2.15.0",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.15.0.tgz",
"integrity": "sha512-8ZtvEnA2c5aYCZYd1cvgdnU6cqwixRoYg70xPLWUws5ORTa/lnw+u4amixRS/Ac5U5mQVgp9pnlSUnbNWFaWZQ=="
},
"tree-sitter-cli": {
"version": "0.20.4",
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.20.4.tgz",
"integrity": "sha512-G42x0Ev7mxA8WLUfZY+two5LIhPf6R/m7qDZtKxOzE77zXi6didNI/vf17kHaKaRXJrWnyCxHFaVQFO2LL81yg==",
"dev": true
},
"tree-sitter-clojure": {
"version": "git+ssh://git@github.com/theHamsta/tree-sitter-clojure-1.git#95c7959c461406381b42113dcf4591008c663d21",
"integrity": "sha512-h8iD1D2l7Qfrktam6y2KAU5K2SRI4lGHL2NCbJG9bmqiY8yTylkK6iJCwkoGLYapjVgxWUDAgaR9tRW5k3q1+g==",
"from": "tree-sitter-clojure@git@github.com:theHamsta/tree-sitter-clojure-1.git#95c7959c461406381b42113dcf4591008c663d21",
"requires": {
"nan": "2.14.2",
"web-tree-sitter": "0.19.1"
},
"dependencies": {
"nan": {
"version": "2.14.2",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz",
"integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ=="
}
}
},
"web-tree-sitter": {
"version": "0.19.1",
"resolved": "https://registry.npmjs.org/web-tree-sitter/-/web-tree-sitter-0.19.1.tgz",
"integrity": "sha512-Wlveh+zdegmNdK733B18pf+NmliKs2t5+Aid8IOFIBV2MqJmnYVo3AdukbdZJ+iIxzBnIreYFKfcFFCWVEf4AA=="
}
}
}

@ -1,30 +0,0 @@
{
"name": "tree-sitter-commonlisp",
"version": "0.3.0",
"description": "Tree-sitter grammar for Common Lisp",
"tree-sitter": [
{
"scope": "source.lisp",
"file-types": [
"lisp"
]
}
],
"main": "bindings/node",
"scripts": {
"test": "tree-sitter generate && tree-sitter test"
},
"author": "Stephan Seitz",
"license": "MIT",
"dependencies": {
"nan": "^2.15.0",
"tree-sitter-clojure": "git@github.com:theHamsta/tree-sitter-clojure-1.git#95c7959c461406381b42113dcf4591008c663d21"
},
"repository": {
"type": "git",
"url": "git://github.com/theHamsta/tree-sitter-common-lisp.git"
},
"devDependencies": {
"tree-sitter-cli": "^0.20.4"
}
}

@ -1,122 +0,0 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Function Definitions ;;;;;;;;;;;;;;;;;;;;;;;
(defun_header
function_name: (sym_lit) @name) @definition.function
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Function Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Basically, we consider every list literal with symbol as the
;;; first element to be a call to a function named by that element.
;;; But we must exclude some cases. Note, tree-sitter @ignore
;;; cases only work if they are declared before the cases
;;; we want to include.
;; Exclude lambda lists for function definitions
;; For example:
;;
;; (defun my-func (arg1 arg2) ...)
;;
;; do not treat (arg1 arg2) as a call of function arg1
;;
(defun_header
lambda_list: (list_lit . [(sym_lit) (package_lit)] @ignore))
;; Similar to the above, but for
;;
;; (defmethod m ((type1 param1) (type2 param2)) ...)
;;
;; where list literals having symbol as their first element
;; are nested inside the lambda list.
(defun_header
lambda_list: (list_lit (list_lit . [(sym_lit) (package_lit)] @ignore)))
;;
;; (let ((var ...) (var2 ...)) ...)
;;
;; - exclude var, var2
;; - the same for let*, flet, labels, macrolet, symbol-macrolet
(list_lit . [(sym_lit) (package_lit)] @name
. (list_lit (list_lit . [(sym_lit) (package_lit)] @ignore))
(#match? @name
"(?i)^(cl:)?(let|let\\*|flet|labels|macrolet|symbol-macrolet)$")
)
;; TODO:
;; - exclude also:
;; - (defclass name (parent parent2)
;; ((slot1 ...)
;; (slot2 ...))
;; exclude the parent, slot1, slot2
;; - (flet ((func-1 (param1 param2))) ...)
;; - we already exclude func-1, but param1 is still recognized
;; as a function call - exclude it too
;; - the same for labels
;; - the same macrolet
;; - what else?
;; (that's a non-goal to completely support all macros
;; and special operators, but every one we support
;; makes the solution a little bit better)
;; - (flet ((func-1 (param1 param2))) ...)
;; - instead of simply excluding it, as we do today,
;; tag func-1 as @local.definition.function (I suppose)
;; - the same for labels, macrolet
;; - @local.scope for let, let*, flet, labels, macrolet
;; - I guess the whole span of the scope text,
;; till the closing paren, should be tagged as @local.scope;
;; Hopefully, combined with @local.definition.function
;; withing the scope, the usual @reference.call within
;; that scope will refer to the local definition,
;; and there will be no need to use @local.reference.call
;; (which is more difficult to implement).
;; - When implementing, remeber the scope rules differences
;; of let vs let*, flet vs labels.
;; Inlclude all other cases - list literal with symbol as the
;; first element
(list_lit . [(sym_lit) (package_lit)] @name) @reference.call
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; classes
(list_lit . [(sym_lit) (package_lit)] @ignore
. [(sym_lit) (package_lit)] @name
(#match? @ignore "(?i)^(cl:)?defclass$")
) @definition.class
(list_lit . [(sym_lit) (package_lit)] @ignore
. (quoting_lit [(sym_lit) (package_lit)] @name)
(#match? @ignore "(?i)^(cl:)?make-instance$")
) @reference.class
;;; TODO:
;; - @reference.class for base classes
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; TODO:
;; - Symbols referenced in defpackage
;;
;; (defpackage ...
;; (:export (symbol-a :symbol-b #:symbol-c "SYMBOL-D")))
;;
;; The goal is to allow quick navigation from the API
;; overview in the form of defpackage, to the definition
;; where user can read parameters, docstring, ect.
;; - The @name must not include the colon, or sharpsign colon, quotes,
;; just symbol-a, symbol-b, symbol-c, sybmol-d
;; - Downcase the names specified as stirng literals?
;; ("SYMBOL-D" -> symbol-d)
;; - We don't know if the exported symbol is a function, variable,
;; class or something else. The oficial doc
;; (https://tree-sitter.github.io/tree-sitter/code-navigation-systems)
;; does not even suggest a tag for variable reference.
;; (Although in practice, the `tree-sitter tags` command
;; allows any @reference.* and @definition.* tags)
;; Probably it's better to just use @reference.call for all
;; the symbols in the :export clause.
;;
;; - The same for the export function call:
;;
;; (export '(symbol-a :symbol-b #:symbol-c "SYMBOL-D"))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,224 +0,0 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

@ -1,9 +0,0 @@
(loop for dim-idx in xyz below x
for y from 0
for letter in (list 'x 'y 'z) in 2
collect x into 1)
(loop for x below 3 in 2
do (format t "~A~%" x))
(asd into)

File diff suppressed because it is too large Load Diff

@ -1,116 +0,0 @@
;;;; -*- buffer-auto-save-file-name: nil; Mode: LISP; Syntax: COMMON-LISP; indent-tabs-mode: nil; coding: utf-8; show-trailing-whitespace: t -*-
;;;; Disabled Emacs auto save, by setting buffer-auto-save-file-name: nil
;;;; because `tree-sitter tests` complains about
;;;; the auto save file with tilda at the end of the name.
;;;; Negative assertions are not supported,
;;;; https://github.com/tree-sitter/tree-sitter/issues/2304
;;;; But I use exclamation sign to mark the places that
;;;; must not be tagged:
;;;;
;;;; ! tag.name
;;;;
;;;; They are not checked automatically, but we can check
;;;; those places manually, by reviewing the output
;;;; of `tree-sitter tags tags/tags-test.lisp`
(defun test-fn (a b)
;; ^ definition.function
(let ((x (+ a b))
;; ^ reference.call
(y (- a b)))
;; ^ reference.call
(* x y)))
;; ^ reference.call
(let ((a 1) (b 2))
;; ! reference.call
;; ! reference.call
)
(cl:let ((a 1) (b 2))
;; ! reference.call
;; ! reference.call
)
(let* ((a 1) (b 2))
;; ! reference.call
;; ! reference.call
)
(cl:let* ((a 1) (b 2))
;; ! reference.call
;; ! reference.call
)
(LET ((a 1) (bb 2))
;; ! reference.call
;; ! reference.call
)
(lets ((a 1) (bb 2))
;; ! reference.call
;; ! reference.call
)
(zlet ((a 1) (bb 2))
;; ! reference.call
;; ! reference.call
)
(defun tst ()
;; ^ definition.function
(CL:LET ((a 1)
;; ! reference.call
(x 2)
;; ! reference.call
(y (add 3 7)))
;; ^ reference.call
;; ! reference.call
))
(flet ((my-add (a b) (+ a b))
(my-mul (a b) (* a b)))
(my-add 1 (my-mul 7 3)))
;; ^ reference.call
;; ^ reference.call
(defclass test-class (base-class)
;; ^ reference.call
;; ^ definition.class
((slot-a)
(slot-b)))
(dEFclass test-class (base-class)
;; ^ definition.class
((slot-a)
(slot-b)))
(maKE-instance 'test-class)
;; ^ reference.call
;; ^ reference.class
(make-instance 'test-class)
;; ^ reference.call
;; ^ reference.class
(defclasses test-class2 (base-class)
;; ! reference.class
((slot-a)
(slot-b)))
(make-instances 'test-class2)
;; ! reference.class
(ddddefclass test-class3 (base-class)
;; ! definition.class
((slot-a)
(slot-b)))
(mmmmake-instance 'test-class3)
;; ! reference.class