Merge commit 'b466ffb4d7ce0a833754429ad8d3a02e950ed74c'

pull/708/head
Wilfred Hughes 2024-04-28 22:55:25 +07:00
commit cce7b2ee1b
50 changed files with 57538 additions and 54256 deletions

@ -12,7 +12,7 @@ Since difftastic is now conservative with parse errors
(DFT_PARSE_ERROR_LIMIT is 0 by default), this seems like a better
tradeoff.
Updated CSS, Go and Lua parsers.
Updated CSS, Go, Lua and Python parsers.
### Diffing

@ -0,0 +1,38 @@
root = true
[*]
charset = utf-8
insert_final_newline = true
trim_trailing_whitespace = true
[*.{json,toml,yml,gyp}]
indent_style = space
indent_size = 2
[*.js]
indent_style = space
indent_size = 2
[*.rs]
indent_style = space
indent_size = 4
[*.{c,cc,h}]
indent_style = space
indent_size = 4
[*.{py,pyi}]
indent_style = space
indent_size = 4
[*.swift]
indent_style = space
indent_size = 4
[*.go]
indent_style = tab
indent_size = 8
[Makefile]
indent_style = tab
indent_size = 8

@ -1,7 +1,15 @@
/src/parser.c linguist-vendored
/src/*.json linguist-vendored
/examples/* linguist-vendored
* text=auto eol=lf
src/grammar.json -diff
src/node-types.json -diff
src/parser.c -diff
examples/crlf-line-endings.py eol=crlf
examples/python2-grammar-crlf.py eol=crlf
examples/python3-grammar-crlf.py eol=crlf
src/*.json linguist-generated
src/parser.c linguist-generated
src/tree_sitter/* linguist-generated
bindings/** linguist-generated
binding.gyp linguist-generated
setup.py linguist-generated
Makefile linguist-generated
Package.swift linguist-generated

@ -0,0 +1,59 @@
name: Bug Report
description: File a bug or issue
title: "bug: "
labels: [bug]
body:
- type: markdown
attributes:
value: |
**Before** reporting an issue, make sure to search [existing issues](https://github.com/tree-sitter/tree-sitter-python/issues). Usage questions such as ***"How do I...?"*** either belong in [Discussions](https://github.com/tree-sitter/tree-sitter/discussions) upstream or in our [Discord server](https://discord.gg/w7nTvsVJhm) and will be closed.
If your issue is related to a bug in your editor-experience because your editor *leverages* tree-sitter and this parser, then it is likely your issue does *NOT* belong here and belongs in the relevant editor's repository.
- type: checkboxes
attributes:
label: Did you check existing issues?
description: Make sure you've checked all of the below before submitting an issue
options:
- label: I have read all the [tree-sitter docs](https://tree-sitter.github.io/tree-sitter/using-parsers) if it relates to using the parser
required: false
- label: I have searched the existing issues of tree-sitter-python
required: true
- type: input
attributes:
label: "Tree-Sitter CLI Version, if relevant (output of `tree-sitter --version`)"
placeholder: "tree-sitter 0.20.8 (6bbb50bef8249e6460e7d69e42cc8146622fa4fd)"
validations:
required: false
- type: textarea
attributes:
label: Describe the bug
description: A clear and concise description of what the bug is. Please include any related errors you see such as parsing errors or tree-sitter cli errors.
validations:
required: true
- type: textarea
attributes:
label: Steps To Reproduce/Bad Parse Tree
description: Steps to reproduce the behavior. If you have a bad parse tree, please include it here. You can get this by running `tree-sitter parse <path-to-file>` and copying the output.
placeholder: |
1.
2.
3.
validations:
required: true
- type: textarea
attributes:
label: Expected Behavior/Parse Tree
description: A concise description of what you expected to happen, or in the case of a bad parse tree, the expected parse tree.
validations:
required: true
- type: textarea
attributes:
label: Repro
description: Minimal code to reproduce this issue. Ideally this should be reproducible with the C library or the tree-sitter cli, do not suggest an editor or external tool.
value: |
# Example code that causes the issue
def foo():
# Code that fails to parse, or causes an error
...
render: Python
validations:
required: false

@ -0,0 +1,36 @@
name: Feature Request
description: Suggest a new feature
title: "feature: "
labels: [enhancement]
body:
- type: checkboxes
attributes:
label: Did you check the tree-sitter docs?
description: Make sure you read all the docs before submitting a feature request
options:
- label: I have read all the [tree-sitter docs](https://tree-sitter.github.io/tree-sitter/using-parsers) if it relates to using the parser
required: false
- type: textarea
validations:
required: true
attributes:
label: Is your feature request related to a problem? Please describe.
description: A clear and concise description of what the problem is. Ex. I think the grammar models this rule incorrectly and can be improved, or the scanner can be improved by doing [...], or Python has officially added a new feature that should be added to the grammar.
- type: textarea
validations:
required: true
attributes:
label: Describe the solution you'd like
description: A clear and concise description of what you want to happen.
- type: textarea
validations:
required: true
attributes:
label: Describe alternatives you've considered
description: A clear and concise description of any alternative solutions or features you've considered.
- type: textarea
validations:
required: false
attributes:
label: Additional context
description: Add any other context or screenshots about the feature request here. If your feature request is related to a new Python feature, please include a link to the relevant **official** Python documentation.

@ -1,31 +1,72 @@
name: CI
on:
pull_request:
branches:
- "**"
push:
branches:
- "master"
branches: ["*"]
paths:
- grammar.js
- src/**
- test/**
- bindings/**
- binding.gyp
pull_request:
paths:
- grammar.js
- src/**
- test/**
- bindings/**
- binding.gyp
concurrency:
group: ${{github.workflow}}-${{github.ref}}
cancel-in-progress: true
jobs:
test:
runs-on: ${{ matrix.os }}
name: Test parser
runs-on: ${{matrix.os}}
strategy:
fail-fast: true
fail-fast: false
matrix:
os: [macos-latest, ubuntu-latest]
os: [ubuntu-latest, windows-latest, macos-14]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
- name: Set up the repo
uses: tree-sitter/parser-setup-action@v1.1
with:
node-version: ${{vars.NODE_VERSION}}
- name: Set up examples
run: |-
git clone https://github.com/numpy/numpy examples/numpy --single-branch --depth=1 --filter=blob:none
git clone https://github.com/django/django examples/django --single-branch --depth=1 --filter=blob:none
git clone https://github.com/pallets/flask examples/flask --single-branch --depth=1 --filter=blob:none
git clone https://github.com/python/cpython examples/cpython --single-branch --depth=1 --filter=blob:none
- name: Run tests
uses: tree-sitter/parser-test-action@v1.2
with:
node-version: 18
- run: npm install
- run: npm test
test_windows:
runs-on: windows-latest
lint: ${{runner.os == 'Linux'}}
test-library: ${{runner.os == 'Linux'}}
corpus-files: |
examples/**/*.py
!examples/cpython/Lib/test/test_compile.py
!examples/cpython/Tools/build/generate_re_casefix.py
invalid-files: |
examples/cpython/Lib/test/tokenizedata/badsyntax_3131.py
fuzz:
name: Fuzz parser
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
- name: Checkout repository
uses: actions/checkout@v4
with:
node-version: 18
- run: npm install
- run: npm run-script test-windows
fetch-depth: 2
- name: Check for scanner changes
id: scanner-changes
run: |-
if git diff --quiet HEAD^ -- src/scanner.c; then
printf 'changed=false\n' >> "$GITHUB_OUTPUT"
else
printf 'changed=true\n' >> "$GITHUB_OUTPUT"
fi
- name: Fuzz parser
uses: tree-sitter/fuzz-action@v4
if: steps.scanner-changes.outputs.changed == 'true'

@ -1,22 +0,0 @@
name: Fuzz Parser
on:
push:
paths:
- src/scanner.c
pull_request:
paths:
- src/scanner.c
workflow_dispatch:
jobs:
test:
name: Parser fuzzing
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: vigoux/tree-sitter-fuzz-action@v1
with:
language: python
external-scanner: src/scanner.c
time: 60

@ -1,19 +0,0 @@
name: Lint
on:
push:
branches:
- master
pull_request:
branches:
- "**"
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install modules
run: npm install
- name: Run ESLint
run: npm run lint

@ -0,0 +1,23 @@
name: Publish package
on:
push:
tags: ["*"]
concurrency:
group: ${{github.workflow}}-${{github.ref}}
cancel-in-progress: true
jobs:
npm:
uses: tree-sitter/workflows/.github/workflows/package-npm.yml@main
secrets:
NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
crates:
uses: tree-sitter/workflows/.github/workflows/package-crates.yml@main
secrets:
CARGO_REGISTRY_TOKEN: ${{secrets.CARGO_REGISTRY_TOKEN}}
pypi:
uses: tree-sitter/workflows/.github/workflows/package-pypi.yml@main
secrets:
PYPI_API_TOKEN: ${{secrets.PYPI_API_TOKEN}}

@ -1,103 +0,0 @@
name: Release
on:
workflow_run:
workflows: ["CI"]
branches:
- master
types:
- completed
jobs:
release:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Get previous commit SHA
id: get_previous_commit
run: |
LATEST_TAG=$(git describe --tags --abbrev=0)
if [[ -z "$LATEST_TAG" ]]; then
echo "No tag found. Failing..."
exit 1
fi
echo "latest_tag=${LATEST_TAG#v}" >> "$GITHUB_ENV" # Remove 'v' prefix from the tag
- name: Check if version changed and is greater than the previous
id: version_check
run: |
# Compare the current version with the version from the previous commit
PREVIOUS_NPM_VERSION=${{ env.latest_tag }}
CURRENT_NPM_VERSION=$(jq -r '.version' package.json)
CURRENT_CARGO_VERSION=$(awk -F '"' '/^version/ {print $2}' Cargo.toml)
if [[ "$CURRENT_NPM_VERSION" != "$CURRENT_CARGO_VERSION" ]]; then # Cargo.toml and package.json versions must match
echo "Mismatch: NPM version ($CURRENT_NPM_VERSION) and Cargo.toml version ($CURRENT_CARGO_VERSION)"
echo "version_changed=false" >> "$GITHUB_ENV"
else
if [[ "$PREVIOUS_NPM_VERSION" == "$CURRENT_NPM_VERSION" ]]; then
echo "version_changed=" >> "$GITHUB_ENV"
else
IFS='.' read -ra PREVIOUS_VERSION_PARTS <<< "$PREVIOUS_NPM_VERSION"
IFS='.' read -ra CURRENT_VERSION_PARTS <<< "$CURRENT_NPM_VERSION"
VERSION_CHANGED=false
for i in "${!PREVIOUS_VERSION_PARTS[@]}"; do
if [[ ${CURRENT_VERSION_PARTS[i]} -gt ${PREVIOUS_VERSION_PARTS[i]} ]]; then
VERSION_CHANGED=true
break
elif [[ ${CURRENT_VERSION_PARTS[i]} -lt ${PREVIOUS_VERSION_PARTS[i]} ]]; then
break
fi
done
echo "version_changed=$VERSION_CHANGED" >> "$GITHUB_ENV"
echo "current_version=${CURRENT_NPM_VERSION}" >> "$GITHUB_ENV"
fi
fi
- name: Display result
run: |
echo "Version bump detected: ${{ env.version_changed }}"
- name: Fail if version is lower
if: env.version_changed == 'false'
run: exit 1
- name: Setup Node
if: env.version_changed == 'true'
uses: actions/setup-node@v3
with:
node-version: 18
registry-url: "https://registry.npmjs.org"
- name: Publish to NPM
if: env.version_changed == 'true'
env:
NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
run: npm publish
- name: Setup Rust
if: env.version_changed == 'true'
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Publish to Crates.io
if: env.version_changed == 'true'
uses: katyo/publish-crates@v2
with:
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
- name: Tag versions
if: env.version_changed == 'true'
run: |
git checkout master
git config user.name github-actions[bot]
git config user.email github-actions[bot]@users.noreply.github.com
git tag -d "v${{ env.current_version }}" || true
git push origin --delete "v${{ env.current_version }}" || true
git tag -a "v${{ env.current_version }}" -m "Version ${{ env.current_version }}"
git push origin "v${{ env.current_version }}"

@ -1,7 +1,36 @@
Cargo.lock
package-lock.json
node_modules
build
*.log
/examples/*/
# Rust artifacts
/Cargo.lock
/target/
# Node artifacts
/build/
/node_modules/
# Swift artifacts
/.build/
# Python artifacts
/dist/
*.egg-info
*.whl
# Zig artifacts
/zig-cache/
/zig-out/
# C artifacts
*.a
*.so
*.so.*
*.dylib
*.dll
*.pc
# Example dirs
/examples/*/
# Grammar volatiles
dsl.d.ts
*.wasm
*.obj
*.o

@ -1,6 +1,17 @@
/test
/examples
/build
/script
/target
bindings/c
bindings/go
bindings/python
bindings/rust
bindings/swift
Cargo.toml
Makefile
examples
pyproject.toml
setup.py
test
.editorconfig
.github
.gitignore
.gitattributes
.gitmodules
.npmignore

@ -1,14 +1,14 @@
[package]
name = "tree-sitter-python"
description = "Python grammar for tree-sitter"
version = "0.20.4"
version = "0.21.0"
authors = [
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Douglas Creager <dcreager@dcreager.net>",
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Amaan Qureshi <amaanq12@gmail.com>",
]
license = "MIT"
readme = "bindings/rust/README.md"
keywords = ["incremental", "parsing", "python"]
readme = "README.md"
keywords = ["incremental", "parsing", "tree-sitter", "python"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/tree-sitter/tree-sitter-python"
edition = "2021"
@ -21,7 +21,7 @@ include = ["bindings/rust/*", "grammar.js", "queries/*", "src/*"]
path = "bindings/rust/lib.rs"
[dependencies]
tree-sitter = "~0.20.10"
tree-sitter = ">=0.21.0"
[build-dependencies]
cc = "~1.0"
cc = "1.0.89"

@ -0,0 +1,110 @@
VERSION := 0.0.1
LANGUAGE_NAME := tree-sitter-python
# repository
SRC_DIR := src
PARSER_REPO_URL := $(shell git -C $(SRC_DIR) remote get-url origin 2>/dev/null)
ifeq ($(PARSER_URL),)
PARSER_URL := $(subst .git,,$(PARSER_REPO_URL))
ifeq ($(shell echo $(PARSER_URL) | grep '^[a-z][-+.0-9a-z]*://'),)
PARSER_URL := $(subst :,/,$(PARSER_URL))
PARSER_URL := $(subst git@,https://,$(PARSER_URL))
endif
endif
TS ?= tree-sitter
# ABI versioning
SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION)))
SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION)))
# install directory layout
PREFIX ?= /usr/local
INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
PCLIBDIR ?= $(LIBDIR)/pkgconfig
# object files
OBJS := $(patsubst %.c,%.o,$(wildcard $(SRC_DIR)/*.c))
# flags
ARFLAGS := rcs
override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC
# OS-specific bits
ifeq ($(OS),Windows_NT)
$(error "Windows is not supported")
else ifeq ($(shell uname),Darwin)
SOEXT = dylib
SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
LINKSHARED := $(LINKSHARED)-dynamiclib -Wl,
ifneq ($(ADDITIONAL_LIBS),)
LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS),
endif
LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks
else
SOEXT = so
SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
LINKSHARED := $(LINKSHARED)-shared -Wl,
ifneq ($(ADDITIONAL_LIBS),)
LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS)
endif
LINKSHARED := $(LINKSHARED)-soname,lib$(LANGUAGE_NAME).so.$(SONAME_MAJOR)
endif
ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
PCLIBDIR := $(PREFIX)/libdata/pkgconfig
endif
all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc
lib$(LANGUAGE_NAME).a: $(OBJS)
$(AR) $(ARFLAGS) $@ $^
lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS)
$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
ifneq ($(STRIP),)
$(STRIP) $@
endif
$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
sed -e 's|@URL@|$(PARSER_URL)|' \
-e 's|@VERSION@|$(VERSION)|' \
-e 's|@LIBDIR@|$(LIBDIR)|' \
-e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
-e 's|@REQUIRES@|$(REQUIRES)|' \
-e 's|@ADDITIONAL_LIBS@|$(ADDITIONAL_LIBS)|' \
-e 's|=$(PREFIX)|=$${prefix}|' \
-e 's|@PREFIX@|$(PREFIX)|' $< > $@
$(SRC_DIR)/parser.c: grammar.js
$(TS) generate --no-bindings
install: all
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
install -m644 bindings/c/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h
install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a
install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER)
ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR)
ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT)
uninstall:
$(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \
'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \
'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \
'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \
'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \
'$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
clean:
$(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT)
test:
$(TS) test
.PHONY: all install uninstall clean test

@ -3,6 +3,7 @@ import PackageDescription
let package = Package(
name: "TreeSitterPython",
platforms: [.macOS(.v10_13), .iOS(.v11)],
products: [
.library(name: "TreeSitterPython", targets: ["TreeSitterPython"]),
],
@ -11,14 +12,26 @@ let package = Package(
.target(name: "TreeSitterPython",
path: ".",
exclude: [
"binding.gyp",
"bindings",
"Cargo.toml",
"corpus",
"Makefile",
"binding.gyp",
"bindings/c",
"bindings/go",
"bindings/node",
"bindings/python",
"bindings/rust",
"examples",
"grammar.js",
"LICENSE",
"package.json",
"README.md",
"package-lock.json",
"pyproject.toml",
"setup.py",
"test",
"types",
".editorconfig",
".github",
".gitignore",
".gitattributes",
],
sources: [
"src/parser.c",
@ -29,5 +42,6 @@ let package = Package(
],
publicHeadersPath: "bindings/swift",
cSettings: [.headerSearchPath("src")])
]
],
cLanguageStandard: .c11
)

@ -1,6 +1,11 @@
# tree-sitter-python
[![build](https://github.com/tree-sitter/tree-sitter-python/actions/workflows/ci.yml/badge.svg)](https://github.com/tree-sitter/tree-sitter-python/actions/workflows/ci.yml)
[![CI][ci]](https://github.com/tree-sitter/tree-sitter-python/actions/workflows/ci.yml)
[![discord][discord]](https://discord.gg/w7nTvsVJhm)
[![matrix][matrix]](https://matrix.to/#/#tree-sitter-chat:matrix.org)
[![crates][crates]](https://crates.io/crates/tree-sitter-python)
[![npm][npm]](https://www.npmjs.com/package/tree-sitter-python)
[![pypi][pypi]](https://pypi.org/project/tree-sitter-python/)
Python grammar for [tree-sitter][].
@ -10,3 +15,10 @@ Python grammar for [tree-sitter][].
- [Python 2 Grammar](https://docs.python.org/2/reference/grammar.html)
- [Python 3 Grammar](https://docs.python.org/3/reference/grammar.html)
[ci]: https://img.shields.io/github/actions/workflow/status/tree-sitter/tree-sitter-python/ci.yml?logo=github&label=CI
[discord]: https://img.shields.io/discord/1063097320771698699?logo=discord&label=discord
[matrix]: https://img.shields.io/matrix/tree-sitter-chat%3Amatrix.org?logo=matrix&label=matrix
[npm]: https://img.shields.io/npm/v/tree-sitter-python?logo=npm
[crates]: https://img.shields.io/crates/v/tree-sitter-python?logo=rust
[pypi]: https://img.shields.io/pypi/v/tree-sitter-python?logo=pypi&logoColor=ffd242

@ -2,18 +2,20 @@
"targets": [
{
"target_name": "tree_sitter_python_binding",
"dependencies": [
"<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
],
"include_dirs": [
"<!(node -e \"require('nan')\")",
"src"
"src",
],
"sources": [
"bindings/node/binding.cc",
"src/parser.c",
"src/scanner.c"
"src/scanner.c",
],
"cflags_c": [
"-std=c99",
]
"-std=c11",
],
}
]
}

@ -0,0 +1,16 @@
#ifndef TREE_SITTER_PYTHON_H_
#define TREE_SITTER_PYTHON_H_
typedef struct TSLanguage TSLanguage;
#ifdef __cplusplus
extern "C" {
#endif
extern const TSLanguage *tree_sitter_python(void);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PYTHON_H_

@ -0,0 +1,11 @@
prefix=@PREFIX@
libdir=@LIBDIR@
includedir=@INCLUDEDIR@
Name: tree-sitter-python
Description: python grammar for tree-sitter
URL: @URL@
Version: @VERSION@
Requires: @REQUIRES@
Libs: -L${libdir} @ADDITIONAL_LIBS@ -ltree-sitter-python
Cflags: -I${includedir}

@ -0,0 +1,13 @@
package tree_sitter_python
// #cgo CFLAGS: -std=c11 -fPIC
// #include "../../src/parser.c"
// #include "../../src/scanner.c"
import "C"
import "unsafe"
// Get the tree-sitter Language for this grammar.
func Language() unsafe.Pointer {
return unsafe.Pointer(C.tree_sitter_python())
}

@ -0,0 +1,15 @@
package tree_sitter_python_test
import (
"testing"
tree_sitter "github.com/smacker/go-tree-sitter"
"github.com/tree-sitter/tree-sitter-python"
)
func TestCanLoadGrammar(t *testing.T) {
language := tree_sitter.NewLanguage(tree_sitter_python.Language())
if language == nil {
t.Errorf("Error loading Python grammar")
}
}

@ -0,0 +1,5 @@
module github.com/tree-sitter/tree-sitter-python
go 1.22
require github.com/smacker/go-tree-sitter v0.0.0-20230720070738-0d0a9f78d8f8

@ -1,28 +1,20 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
#include <napi.h>
using namespace v8;
typedef struct TSLanguage TSLanguage;
extern "C" TSLanguage * tree_sitter_python();
extern "C" TSLanguage *tree_sitter_python();
namespace {
// "tree-sitter", "language" hashed with BLAKE2
const napi_type_tag LANGUAGE_TYPE_TAG = {
0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
};
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_python());
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("python").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
Napi::Object Init(Napi::Env env, Napi::Object exports) {
exports["name"] = Napi::String::New(env, "python");
auto language = Napi::External<TSLanguage>::New(env, tree_sitter_python());
language.TypeTag(&LANGUAGE_TYPE_TAG);
exports["language"] = language;
return exports;
}
NODE_MODULE(tree_sitter_python_binding, Init)
} // namespace
NODE_API_MODULE(tree_sitter_python_binding, Init)

@ -0,0 +1,28 @@
type BaseNode = {
type: string;
named: boolean;
};
type ChildNode = {
multiple: boolean;
required: boolean;
types: BaseNode[];
};
type NodeInfo =
| (BaseNode & {
subtypes: BaseNode[];
})
| (BaseNode & {
fields: { [name: string]: ChildNode };
children: ChildNode[];
});
type Language = {
name: string;
language: unknown;
nodeTypeInfo: NodeInfo[];
};
declare const language: Language;
export = language;

@ -1,18 +1,6 @@
try {
module.exports = require("../../build/Release/tree_sitter_python_binding");
} catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require("../../build/Debug/tree_sitter_python_binding");
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1
}
}
const root = require("path").join(__dirname, "..", "..");
module.exports = require("node-gyp-build")(root);
try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");

@ -0,0 +1,5 @@
from ._tree_sitter_python import lib as _lib, ffi as _ffi
def language():
"""Get the tree-sitter language for this grammar."""
return int(_ffi.cast("uintptr_t", _lib.tree_sitter_python()))

@ -0,0 +1,3 @@
"Python grammar for tree-sitter"
from ._binding import language

@ -0,0 +1,27 @@
#include <Python.h>
typedef struct TSLanguage TSLanguage;
extern const TSLanguage *tree_sitter_python(void);
static PyObject* _binding_language(PyObject *self, PyObject *args) {
return PyLong_FromVoidPtr((void *)tree_sitter_python());
}
static PyMethodDef methods[] = {
{"language", _binding_language, METH_NOARGS,
"Get the tree-sitter language for this grammar."},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_binding",
.m_doc = NULL,
.m_size = -1,
.m_methods = methods
};
PyMODINIT_FUNC PyInit__binding(void) {
return PyModule_Create(&module);
}

@ -3,10 +3,6 @@ fn main() {
let mut c_config = cc::Build::new();
c_config.include(src_dir);
c_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
@ -14,6 +10,6 @@ fn main() {
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
c_config.compile("parser");
c_config.compile("tree-sitter-python");
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
}

@ -1,27 +1,17 @@
// -*- coding: utf-8 -*-
// ------------------------------------------------------------------------------------------------
// Copyright © 2020, tree-sitter-python authors.
// See the LICENSE file in this repo for license details.
// ------------------------------------------------------------------------------------------------
//! This crate provides a Python grammar for the [tree-sitter][] parsing library.
//! This crate provides Python language support for the [tree-sitter][] parsing library.
//!
//! Typically, you will use the [language][language func] function to add this grammar to a
//! Typically, you will use the [language][language func] function to add this language to a
//! tree-sitter [Parser][], and then use the parser to parse some code:
//!
//! ```
//! use tree_sitter::Parser;
//!
//! let code = r#"
//! def double(x):
//! return x * 2
//! "#;
//! let mut parser = Parser::new();
//! parser.set_language(tree_sitter_python::language()).expect("Error loading Python grammar");
//! let parsed = parser.parse(code, None);
//! # let parsed = parsed.unwrap();
//! # let root = parsed.root_node();
//! # assert!(!root.has_error());
//! let mut parser = tree_sitter::Parser::new();
//! parser.set_language(&tree_sitter_python::language()).expect("Error loading Python grammar");
//! let tree = parser.parse(code, None).unwrap();
//! assert!(!tree.root_node().has_error());
//! ```
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
@ -35,34 +25,28 @@ extern "C" {
fn tree_sitter_python() -> Language;
}
/// Returns the tree-sitter [Language][] for this grammar.
/// Get the tree-sitter [Language][] for this grammar.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
pub fn language() -> Language {
unsafe { tree_sitter_python() }
}
/// The source of the Python tree-sitter grammar description.
pub const GRAMMAR: &str = include_str!("../../grammar.js");
/// The syntax highlighting query for this language.
pub const HIGHLIGHT_QUERY: &str = include_str!("../../queries/highlights.scm");
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
/// The symbol tagging query for this language.
pub const TAGGING_QUERY: &str = include_str!("../../queries/tags.scm");
pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");
#[cfg(test)]
mod tests {
#[test]
fn can_load_grammar() {
fn test_can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(super::language())
.set_language(&super::language())
.expect("Error loading Python grammar");
}
}

@ -7,7 +7,7 @@ typedef struct TSLanguage TSLanguage;
extern "C" {
#endif
extern TSLanguage *tree_sitter_python();
extern const TSLanguage *tree_sitter_python(void);
#ifdef __cplusplus
}

@ -88,6 +88,7 @@ module.exports = grammar({
']',
')',
'}',
'except',
],
inline: $ => [
@ -478,6 +479,7 @@ module.exports = grammar({
type_parameter: $ => seq(
'[',
commaSep1($.type),
optional(','),
']',
),
@ -1156,7 +1158,7 @@ module.exports = grammar({
seq(optional(digits), '.', digits, optional(exponent)),
seq(digits, exponent),
),
optional(choice(/[Ll]/, /[jJ]/)),
optional(/[jJ]/),
));
},

File diff suppressed because it is too large Load Diff

@ -1,30 +1,55 @@
{
"name": "tree-sitter-python",
"version": "0.20.4",
"version": "0.21.0",
"description": "Python grammar for tree-sitter",
"repository": "github:tree-sitter/tree-sitter-python",
"license": "MIT",
"author": "Max Brunsfeld",
"contributors": [
"Amaan Qureshi"
],
"main": "bindings/node",
"types": "bindings/node",
"files": [
"grammar.js",
"binding.gyp",
"prebuilds/**",
"bindings/node/*",
"queries/*",
"src/**"
],
"keywords": [
"parser",
"lexer"
"incremental",
"parsing",
"tree-sitter",
"python"
],
"author": "Max Brunsfeld",
"license": "MIT",
"dependencies": {
"nan": "^2.17.0"
"node-addon-api": "^7.1.0",
"node-gyp-build": "^4.8.0"
},
"devDependencies": {
"eslint": "^8.47.0",
"eslint": "^8.57.0",
"eslint-config-google": "^0.14.0",
"tree-sitter-cli": "^0.20.8"
"tree-sitter-cli": "^0.21.0",
"prebuildify": "^6.0.0"
},
"peerDependencies": {
"tree-sitter": "^0.21.0"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
},
"scripts": {
"build": "tree-sitter generate && node-gyp build",
"build": "tree-sitter generate --no-bindings",
"lint": "eslint grammar.js",
"parse": "tree-sitter parse",
"test": "tree-sitter test && script/parse-examples",
"test-windows": "tree-sitter test"
"test": "tree-sitter test",
"install": "node-gyp-build",
"prebuildify": "prebuildify --napi --strip"
},
"repository": "https://github.com/tree-sitter/tree-sitter-python",
"tree-sitter": [
{
"scope": "source.python",
@ -32,12 +57,8 @@
"py"
],
"injection-regex": "py",
"highlights": [
"queries/highlights.scm"
],
"tags": [
"queries/tags.scm"
]
"highlights": "queries/highlights.scm",
"tags": "queries/tags.scm"
}
]
}

@ -0,0 +1,30 @@
[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "tree-sitter-python"
description = "Python grammar for tree-sitter"
version = "0.21.0"
keywords = ["parsing", "incremental", "python"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Topic :: Software Development :: Compilers",
"Topic :: Text Processing :: Linguistic",
]
authors = [
{name = "Max Brunsfeld"},
{name = "Amaan Qureshi"}
]
requires-python = ">=3.8"
license.text = "MIT"
readme = "README.md"
[project.optional-dependencies]
core = ["tree-sitter~=0.21"]
[tool.cibuildwheel]
build = "cp38-*"
build-frontend = "build"

@ -1,19 +1,13 @@
; Identifier naming conventions
(identifier) @variable
((identifier) @constructor
(#match? @constructor "^[A-Z]"))
((identifier) @constant
(#match? @constant "^[A-Z][A-Z_]*$"))
; Builtin functions
((call
function: (identifier) @function.builtin)
(#match?
@function.builtin
"^(abs|all|any|ascii|bin|bool|breakpoint|bytearray|bytes|callable|chr|classmethod|compile|complex|delattr|dict|dir|divmod|enumerate|eval|exec|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|isinstance|issubclass|iter|len|list|locals|map|max|memoryview|min|next|object|oct|open|ord|pow|print|property|range|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|vars|zip|__import__)$"))
; Function calls
(decorator) @function
@ -23,12 +17,19 @@
(call
function: (identifier) @function)
; Builtin functions
((call
function: (identifier) @function.builtin)
(#match?
@function.builtin
"^(abs|all|any|ascii|bin|bool|breakpoint|bytearray|bytes|callable|chr|classmethod|compile|complex|delattr|dict|dir|divmod|enumerate|eval|exec|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|isinstance|issubclass|iter|len|list|locals|map|max|memoryview|min|next|object|oct|open|ord|pow|print|property|range|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|vars|zip|__import__)$"))
; Function definitions
(function_definition
name: (identifier) @function)
(identifier) @variable
(attribute attribute: (identifier) @property)
(type (identifier) @type)

@ -1,4 +0,0 @@
examples/cpython/Lib/test/badsyntax_3131.py
examples/cpython/Lib/test/badsyntax_future8.py
examples/cpython/Lib/test/test_compile.py
examples/cpython/Tools/build/generate_re_casefix.py

@ -1,47 +0,0 @@
#!/usr/bin/env bash
set -eu
cd "$(dirname "$0")/.."
function clone_repo {
owner=$1
name=$2
sha=$3
path=examples/$name
if [ ! -d "$path" ]; then
echo "Cloning $owner/$name"
git clone "https://github.com/$owner/$name" "$path"
fi
pushd "$path" >/dev/null
actual_sha=$(git rev-parse HEAD)
if [ "$actual_sha" != "$sha" ]; then
echo "Updating $owner/$name to $sha"
git fetch
git reset --hard "$sha"
fi
popd >/dev/null
}
clone_repo numpy numpy 058851c5cfc98f50f11237b1c13d77cfd1f40475
clone_repo django django 01974d7f7549b2dca2a729c3c1a1ea7d4585eb3a
clone_repo pallets flask de464c03e134127140e5622e230790806a133ff9
clone_repo python cpython bb456a08a3db851e6feaefc3328f39096919ec8d
known_failures="$(cat script/known_failures.txt)"
# shellcheck disable=2046
tree-sitter parse -q \
'examples/**/*.py' \
$(for file in $known_failures; do echo "!${file}"; done)
example_count=$(find examples -name '*.py' | wc -l)
failure_count=$(wc -w <<<"$known_failures")
success_count=$((example_count - failure_count))
success_percent=$(bc -l <<<"100*${success_count}/${example_count}")
printf \
"Successfully parsed %d of %d example files (%.1f%%)\n" \
"$success_count" "$example_count" "$success_percent"

@ -0,0 +1,57 @@
from os.path import isdir, join
from platform import system
from setuptools import Extension, find_packages, setup
from setuptools.command.build import build
from wheel.bdist_wheel import bdist_wheel
class Build(build):
def run(self):
if isdir("queries"):
dest = join(self.build_lib, "tree_sitter_python", "queries")
self.copy_tree("queries", dest)
super().run()
class BdistWheel(bdist_wheel):
def get_tag(self):
python, abi, platform = super().get_tag()
if python.startswith("cp"):
python, abi = "cp38", "abi3"
return python, abi, platform
setup(
packages=find_packages("bindings/python"),
package_dir={"": "bindings/python"},
package_data={
"tree_sitter_python": ["*.pyi", "py.typed"],
"tree_sitter_python.queries": ["*.scm"],
},
ext_package="tree_sitter_python",
ext_modules=[
Extension(
name="_binding",
sources=[
"bindings/python/tree_sitter_python/binding.c",
"src/parser.c",
"src/scanner.c",
],
extra_compile_args=(
["-std=c11"] if system() != 'Windows' else []
),
define_macros=[
("Py_LIMITED_API", "0x03080000"),
("PY_SSIZE_T_CLEAN", None)
],
include_dirs=["src"],
py_limited_api=True,
)
],
cmdclass={
"build": Build,
"bdist_wheel": BdistWheel
},
zip_safe=False,
)

@ -1978,6 +1978,18 @@
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": ","
},
{
"type": "BLANK"
}
]
},
{
"type": "STRING",
"value": "]"
@ -5999,17 +6011,8 @@
"type": "CHOICE",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[Ll]"
},
{
"type": "PATTERN",
"value": "[jJ]"
}
]
"type": "PATTERN",
"value": "[jJ]"
},
{
"type": "BLANK"
@ -6260,6 +6263,10 @@
{
"type": "STRING",
"value": "}"
},
{
"type": "STRING",
"value": "except"
}
],
"inline": [
@ -6277,26 +6284,5 @@
"primary_expression",
"pattern",
"parameter"
],
"PREC": {
"lambda": -2,
"typed_parameter": -1,
"conditional": -1,
"parenthesized_expression": 1,
"parenthesized_list_splat": 1,
"or": 10,
"and": 11,
"not": 12,
"compare": 13,
"bitwise_or": 14,
"bitwise_and": 15,
"xor": 16,
"shift": 17,
"plus": 18,
"times": 19,
"unary": 20,
"power": 21,
"call": 22
}
]
}

File diff suppressed because it is too large Load Diff

@ -1,42 +1,10 @@
#include "tree_sitter/array.h"
#include "tree_sitter/parser.h"
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <tree_sitter/parser.h>
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define VEC_RESIZE(vec, _cap) \
void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \
assert(tmp != NULL); \
(vec).data = tmp; \
(vec).cap = (_cap);
#define VEC_GROW(vec, _cap) \
if ((vec).cap < (_cap)) { \
VEC_RESIZE((vec), (_cap)); \
}
#define VEC_PUSH(vec, el) \
if ((vec).cap == (vec).len) { \
VEC_RESIZE((vec), MAX(16, (vec).len * 2)); \
} \
(vec).data[(vec).len++] = (el);
#define VEC_POP(vec) (vec).len--;
#define VEC_NEW \
{ .len = 0, .cap = 0, .data = NULL }
#define VEC_BACK(vec) ((vec).data[(vec).len - 1])
#define VEC_FREE(vec) \
{ \
if ((vec).data != NULL) \
free((vec).data); \
}
#define VEC_CLEAR(vec) (vec).len = 0;
enum TokenType {
NEWLINE,
@ -50,6 +18,7 @@ enum TokenType {
CLOSE_PAREN,
CLOSE_BRACKET,
CLOSE_BRACE,
EXCEPT,
};
typedef enum {
@ -68,21 +37,13 @@ typedef struct {
static inline Delimiter new_delimiter() { return (Delimiter){0}; }
static inline bool is_format(Delimiter *delimiter) {
return delimiter->flags & Format;
}
static inline bool is_format(Delimiter *delimiter) { return delimiter->flags & Format; }
static inline bool is_raw(Delimiter *delimiter) {
return delimiter->flags & Raw;
}
static inline bool is_raw(Delimiter *delimiter) { return delimiter->flags & Raw; }
static inline bool is_triple(Delimiter *delimiter) {
return delimiter->flags & Triple;
}
static inline bool is_triple(Delimiter *delimiter) { return delimiter->flags & Triple; }
static inline bool is_bytes(Delimiter *delimiter) {
return delimiter->flags & Bytes;
}
static inline bool is_bytes(Delimiter *delimiter) { return delimiter->flags & Bytes; }
static inline int32_t end_character(Delimiter *delimiter) {
if (delimiter->flags & SingleQuote) {
@ -97,19 +58,13 @@ static inline int32_t end_character(Delimiter *delimiter) {
return 0;
}
static inline void set_format(Delimiter *delimiter) {
delimiter->flags |= Format;
}
static inline void set_format(Delimiter *delimiter) { delimiter->flags |= Format; }
static inline void set_raw(Delimiter *delimiter) { delimiter->flags |= Raw; }
static inline void set_triple(Delimiter *delimiter) {
delimiter->flags |= Triple;
}
static inline void set_triple(Delimiter *delimiter) { delimiter->flags |= Triple; }
static inline void set_bytes(Delimiter *delimiter) {
delimiter->flags |= Bytes;
}
static inline void set_bytes(Delimiter *delimiter) { delimiter->flags |= Bytes; }
static inline void set_end_character(Delimiter *delimiter, int32_t character) {
switch (character) {
@ -128,34 +83,8 @@ static inline void set_end_character(Delimiter *delimiter, int32_t character) {
}
typedef struct {
uint32_t len;
uint32_t cap;
uint16_t *data;
} indent_vec;
static indent_vec indent_vec_new() {
indent_vec vec = VEC_NEW;
vec.data = calloc(1, sizeof(uint16_t));
vec.cap = 1;
return vec;
}
typedef struct {
uint32_t len;
uint32_t cap;
Delimiter *data;
} delimiter_vec;
static delimiter_vec delimiter_vec_new() {
delimiter_vec vec = VEC_NEW;
vec.data = calloc(1, sizeof(Delimiter));
vec.cap = 1;
return vec;
}
typedef struct {
indent_vec indents;
delimiter_vec delimiters;
Array(uint16_t) indents;
Array(Delimiter) delimiters;
bool inside_f_string;
} Scanner;
@ -163,28 +92,22 @@ static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
bool error_recovery_mode =
valid_symbols[STRING_CONTENT] && valid_symbols[INDENT];
bool within_brackets = valid_symbols[CLOSE_BRACE] ||
valid_symbols[CLOSE_PAREN] ||
valid_symbols[CLOSE_BRACKET];
bool advanced_once = false;
if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.len > 0 &&
(lexer->lookahead == '{' || lexer->lookahead == '}') &&
!error_recovery_mode) {
Delimiter delimiter = VEC_BACK(scanner->delimiters);
if (is_format(&delimiter)) {
bool error_recovery_mode = valid_symbols[STRING_CONTENT] && valid_symbols[INDENT];
bool within_brackets = valid_symbols[CLOSE_BRACE] || valid_symbols[CLOSE_PAREN] || valid_symbols[CLOSE_BRACKET];
bool advanced_once = false;
if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.size > 0 &&
(lexer->lookahead == '{' || lexer->lookahead == '}') && !error_recovery_mode) {
Delimiter *delimiter = array_back(&scanner->delimiters);
if (is_format(delimiter)) {
lexer->mark_end(lexer);
bool is_left_brace = lexer->lookahead == '{';
advance(lexer);
advanced_once = true;
if ((lexer->lookahead == '{' && is_left_brace) ||
(lexer->lookahead == '}' && !is_left_brace)) {
if ((lexer->lookahead == '{' && is_left_brace) || (lexer->lookahead == '}' && !is_left_brace)) {
advance(lexer);
lexer->mark_end(lexer);
lexer->result_symbol = ESCAPE_INTERPOLATION;
@ -194,43 +117,39 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
}
}
if (valid_symbols[STRING_CONTENT] && scanner->delimiters.len > 0 &&
!error_recovery_mode) {
Delimiter delimiter = VEC_BACK(scanner->delimiters);
int32_t end_char = end_character(&delimiter);
if (valid_symbols[STRING_CONTENT] && scanner->delimiters.size > 0 && !error_recovery_mode) {
Delimiter *delimiter = array_back(&scanner->delimiters);
int32_t end_char = end_character(delimiter);
bool has_content = advanced_once;
while (lexer->lookahead) {
if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') &&
is_format(&delimiter)) {
if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') && is_format(delimiter)) {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return has_content;
}
if (lexer->lookahead == '\\') {
if (is_raw(&delimiter)) {
if (is_raw(delimiter)) {
// Step over the backslash.
advance(lexer);
// Step over any escaped quotes.
if (lexer->lookahead == end_character(&delimiter) ||
lexer->lookahead == '\\') {
if (lexer->lookahead == end_character(delimiter) || lexer->lookahead == '\\') {
advance(lexer);
}
// Step over newlines
if (lexer -> lookahead == '\r') {
advance(lexer);
if (lexer -> lookahead == '\n') {
if (lexer->lookahead == '\r') {
advance(lexer);
if (lexer->lookahead == '\n') {
advance(lexer);
}
} else if (lexer -> lookahead == '\n') {
} else if (lexer->lookahead == '\n') {
advance(lexer);
}
continue;
}
if (is_bytes(&delimiter)) {
if (is_bytes(delimiter)) {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == 'N' || lexer->lookahead == 'u' ||
lexer->lookahead == 'U') {
if (lexer->lookahead == 'N' || lexer->lookahead == 'u' || lexer->lookahead == 'U') {
// In bytes string, \N{...}, \uXXXX and \UXXXXXXXX are
// not escape sequences
// https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
@ -245,7 +164,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
return has_content;
}
} else if (lexer->lookahead == end_char) {
if (is_triple(&delimiter)) {
if (is_triple(delimiter)) {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == end_char) {
@ -256,7 +175,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
} else {
advance(lexer);
lexer->mark_end(lexer);
VEC_POP(scanner->delimiters);
array_pop(&scanner->delimiters);
lexer->result_symbol = STRING_END;
scanner->inside_f_string = false;
}
@ -274,15 +193,14 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
lexer->result_symbol = STRING_CONTENT;
} else {
advance(lexer);
VEC_POP(scanner->delimiters);
array_pop(&scanner->delimiters);
lexer->result_symbol = STRING_END;
scanner->inside_f_string = false;
}
lexer->mark_end(lexer);
return true;
} else if (lexer->lookahead == '\n' && has_content &&
!is_triple(&delimiter)) {
} else if (lexer->lookahead == '\n' && has_content && !is_triple(delimiter)) {
return false;
}
advance(lexer);
@ -309,7 +227,8 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
} else if (lexer->lookahead == '\t') {
indent_length += 8;
skip(lexer);
} else if (lexer->lookahead == '#') {
} else if (lexer->lookahead == '#' && (valid_symbols[INDENT] || valid_symbols[DEDENT] ||
valid_symbols[NEWLINE] || valid_symbols[EXCEPT])) {
// If we haven't found an EOL yet,
// then this is a comment after an expression:
// foo = bar # comment
@ -346,32 +265,28 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
}
if (found_end_of_line) {
if (scanner->indents.len > 0) {
uint16_t current_indent_length = VEC_BACK(scanner->indents);
if (scanner->indents.size > 0) {
uint16_t current_indent_length = *array_back(&scanner->indents);
if (valid_symbols[INDENT] &&
indent_length > current_indent_length) {
VEC_PUSH(scanner->indents, indent_length);
if (valid_symbols[INDENT] && indent_length > current_indent_length) {
array_push(&scanner->indents, indent_length);
lexer->result_symbol = INDENT;
return true;
}
bool next_tok_is_string_start = lexer->lookahead == '\"' ||
lexer->lookahead == '\'' ||
lexer->lookahead == '`';
bool next_tok_is_string_start =
lexer->lookahead == '\"' || lexer->lookahead == '\'' || lexer->lookahead == '`';
if ((valid_symbols[DEDENT] ||
(!valid_symbols[NEWLINE] &&
!(valid_symbols[STRING_START] && next_tok_is_string_start) &&
(!valid_symbols[NEWLINE] && !(valid_symbols[STRING_START] && next_tok_is_string_start) &&
!within_brackets)) &&
indent_length < current_indent_length &&
!scanner->inside_f_string &&
indent_length < current_indent_length && !scanner->inside_f_string &&
// Wait to create a dedent token until we've consumed any
// comments
// whose indentation matches the current block.
first_comment_indent_length < (int32_t)current_indent_length) {
VEC_POP(scanner->indents);
array_pop(&scanner->indents);
lexer->result_symbol = DEDENT;
return true;
}
@ -432,7 +347,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
}
if (end_character(&delimiter)) {
VEC_PUSH(scanner->delimiters, delimiter);
array_push(&scanner->delimiters, delimiter);
lexer->result_symbol = STRING_START;
scanner->inside_f_string = is_format(&delimiter);
return true;
@ -445,43 +360,38 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
return false;
}
unsigned tree_sitter_python_external_scanner_serialize(void *payload,
char *buffer) {
unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = (Scanner *)payload;
size_t size = 0;
buffer[size++] = (char)scanner->inside_f_string;
size_t delimiter_count = scanner->delimiters.len;
size_t delimiter_count = scanner->delimiters.size;
if (delimiter_count > UINT8_MAX) {
delimiter_count = UINT8_MAX;
}
buffer[size++] = (char)delimiter_count;
if (delimiter_count > 0) {
memcpy(&buffer[size], scanner->delimiters.data, delimiter_count);
memcpy(&buffer[size], scanner->delimiters.contents, delimiter_count);
}
size += delimiter_count;
int iter = 1;
for (; iter < scanner->indents.len &&
size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
++iter) {
buffer[size++] = (char)scanner->indents.data[iter];
uint32_t iter = 1;
for (; iter < scanner->indents.size && size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
buffer[size++] = (char)*array_get(&scanner->indents, iter);
}
return size;
}
void tree_sitter_python_external_scanner_deserialize(void *payload,
const char *buffer,
unsigned length) {
void tree_sitter_python_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = (Scanner *)payload;
VEC_CLEAR(scanner->delimiters);
VEC_CLEAR(scanner->indents);
VEC_PUSH(scanner->indents, 0);
array_delete(&scanner->delimiters);
array_delete(&scanner->indents);
array_push(&scanner->indents, 0);
if (length > 0) {
size_t size = 0;
@ -490,14 +400,14 @@ void tree_sitter_python_external_scanner_deserialize(void *payload,
size_t delimiter_count = (uint8_t)buffer[size++];
if (delimiter_count > 0) {
VEC_GROW(scanner->delimiters, delimiter_count);
scanner->delimiters.len = delimiter_count;
memcpy(scanner->delimiters.data, &buffer[size], delimiter_count);
array_reserve(&scanner->delimiters, delimiter_count);
scanner->delimiters.size = delimiter_count;
memcpy(scanner->delimiters.contents, &buffer[size], delimiter_count);
size += delimiter_count;
}
for (; size < length; size++) {
VEC_PUSH(scanner->indents, (unsigned char)buffer[size]);
array_push(&scanner->indents, (unsigned char)buffer[size]);
}
}
}
@ -509,15 +419,15 @@ void *tree_sitter_python_external_scanner_create() {
assert(sizeof(Delimiter) == sizeof(char));
#endif
Scanner *scanner = calloc(1, sizeof(Scanner));
scanner->indents = indent_vec_new();
scanner->delimiters = delimiter_vec_new();
array_init(&scanner->indents);
array_init(&scanner->delimiters);
tree_sitter_python_external_scanner_deserialize(scanner, NULL, 0);
return scanner;
}
void tree_sitter_python_external_scanner_destroy(void *payload) {
Scanner *scanner = (Scanner *)payload;
VEC_FREE(scanner->indents);
VEC_FREE(scanner->delimiters);
array_delete(&scanner->indents);
array_delete(&scanner->delimiters);
free(scanner);
}

@ -0,0 +1,54 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
// Allow clients to override allocation functions
#ifdef TREE_SITTER_REUSE_ALLOCATOR
extern void *(*ts_current_malloc)(size_t);
extern void *(*ts_current_calloc)(size_t, size_t);
extern void *(*ts_current_realloc)(void *, size_t);
extern void (*ts_current_free)(void *);
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#else
#ifndef ts_malloc
#define ts_malloc malloc
#endif
#ifndef ts_calloc
#define ts_calloc calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free free
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

@ -0,0 +1,287 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
(_array__grow((Array *)(self), count, array_elem_size(self)), \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)), \
(self)->size += (count))
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

@ -129,9 +129,16 @@ struct TSLanguage {
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
@ -139,8 +146,7 @@ struct TSLanguage {
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead; \
eof = lexer->eof(lexer);
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \

@ -60,8 +60,6 @@ Floats
1_1.
1e+3_4j
.3e1_4
1_0.l
.1l
--------------------------------------------------------------------------------
@ -82,10 +80,6 @@ Floats
(float))
(expression_statement
(float))
(expression_statement
(float))
(expression_statement
(float))
(expression_statement
(float)))

@ -0,0 +1,379 @@
type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string };
type BlankRule = { type: 'BLANK' };
type ChoiceRule = { type: 'CHOICE'; members: Rule[] };
type FieldRule = { type: 'FIELD'; name: string; content: Rule };
type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule };
type PatternRule = { type: 'PATTERN'; value: string };
type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number };
type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number };
type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number };
type PrecRule = { type: 'PREC'; content: Rule; value: number };
type Repeat1Rule = { type: 'REPEAT1'; content: Rule };
type RepeatRule = { type: 'REPEAT'; content: Rule };
type SeqRule = { type: 'SEQ'; members: Rule[] };
type StringRule = { type: 'STRING'; value: string };
type SymbolRule<Name extends string> = { type: 'SYMBOL'; name: Name };
type TokenRule = { type: 'TOKEN'; content: Rule };
type Rule =
| AliasRule
| BlankRule
| ChoiceRule
| FieldRule
| ImmediateTokenRule
| PatternRule
| PrecDynamicRule
| PrecLeftRule
| PrecRightRule
| PrecRule
| Repeat1Rule
| RepeatRule
| SeqRule
| StringRule
| SymbolRule<string>
| TokenRule;
type RuleOrLiteral = Rule | RegExp | string;
type GrammarSymbols<RuleName extends string> = {
[name in RuleName]: SymbolRule<name>;
} &
Record<string, SymbolRule<string>>;
type RuleBuilder<RuleName extends string> = (
$: GrammarSymbols<RuleName>,
previous: Rule,
) => RuleOrLiteral;
type RuleBuilders<
RuleName extends string,
BaseGrammarRuleName extends string
> = {
[name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
};
interface Grammar<
RuleName extends string,
BaseGrammarRuleName extends string = never,
Rules extends RuleBuilders<RuleName, BaseGrammarRuleName> = RuleBuilders<
RuleName,
BaseGrammarRuleName
>
> {
/**
* Name of the grammar language.
*/
name: string;
/** Mapping of grammar rule names to rule builder functions. */
rules: Rules;
/**
* An array of arrays of precedence names or rules. Each inner array represents
* a *descending* ordering. Names/rules listed earlier in one of these arrays
* have higher precedence than any names/rules listed later in the same array.
*
* Using rules is just a shorthand way for using a name then calling prec()
* with that name. It is just a convenience.
*/
precedences?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[][],
) => RuleOrLiteral[][],
/**
* An array of arrays of rule names. Each inner array represents a set of
* rules that's involved in an _LR(1) conflict_ that is _intended to exist_
* in the grammar. When these conflicts occur at runtime, Tree-sitter will
* use the GLR algorithm to explore all of the possible interpretations. If
* _multiple_ parses end up succeeding, Tree-sitter will pick the subtree
* whose corresponding rule has the highest total _dynamic precedence_.
*
* @param $ grammar rules
*/
conflicts?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[][],
) => RuleOrLiteral[][];
/**
* An array of token names which can be returned by an _external scanner_.
* External scanners allow you to write custom C code which runs during the
* lexing process in order to handle lexical rules (e.g. Python's indentation
* tokens) that cannot be described by regular expressions.
*
* @param $ grammar rules
* @param previous array of externals from the base schema, if any
*
* @see https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
*/
externals?: (
$: Record<string, SymbolRule<string>>,
previous: Rule[],
) => RuleOrLiteral[];
/**
* An array of tokens that may appear anywhere in the language. This
* is often used for whitespace and comments. The default value of
* extras is to accept whitespace. To control whitespace explicitly,
* specify extras: `$ => []` in your grammar.
*
* @param $ grammar rules
*/
extras?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
) => RuleOrLiteral[];
/**
* An array of rules that should be automatically removed from the
* grammar by replacing all of their usages with a copy of their definition.
* This is useful for rules that are used in multiple places but for which
* you don't want to create syntax tree nodes at runtime.
*
* @param $ grammar rules
*/
inline?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[],
) => RuleOrLiteral[];
/**
* A list of hidden rule names that should be considered supertypes in the
* generated node types file.
*
* @param $ grammar rules
*
* @see https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
*/
supertypes?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[],
) => RuleOrLiteral[];
/**
* The name of a token that will match keywords for the purpose of the
* keyword extraction optimization.
*
* @param $ grammar rules
*
* @see https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction
*/
word?: ($: GrammarSymbols<RuleName | BaseGrammarRuleName>) => RuleOrLiteral;
}
type GrammarSchema<RuleName extends string> = {
[K in keyof Grammar<RuleName>]: K extends 'rules'
? Record<RuleName, Rule>
: Grammar<RuleName>[K];
};
/**
* Causes the given rule to appear with an alternative name in the syntax tree.
* For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an
* anonymous node, as if the rule had been written as the simple string.
*
* @param rule rule that will be aliased
* @param name target name for the alias
*/
declare function alias(rule: RuleOrLiteral, name: string): AliasRule;
/**
* Causes the given rule to appear as an alternative named node, for instance
* with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named
* node called `bar`.
*
* @param rule rule that will be aliased
* @param symbol target symbol for the alias
*/
declare function alias(
rule: RuleOrLiteral,
symbol: SymbolRule<string>,
): AliasRule;
/**
* Creates a blank rule, matching nothing.
*/
declare function blank(): BlankRule;
/**
* Assigns a field name to the child node(s) matched by the given rule.
* In the resulting syntax tree, you can then use that field name to
* access specific children.
*
* @param name name of the field
* @param rule rule the field should match
*/
declare function field(name: string, rule: RuleOrLiteral): FieldRule;
/**
* Creates a rule that matches one of a set of possible rules. The order
* of the arguments does not matter. This is analogous to the `|` (pipe)
* operator in EBNF notation.
*
* @param options possible rule choices
*/
declare function choice(...options: RuleOrLiteral[]): ChoiceRule;
/**
* Creates a rule that matches zero or one occurrence of a given rule.
* It is analogous to the `[x]` (square bracket) syntax in EBNF notation.
*
* @param value rule to be made optional
*/
declare function optional(rule: RuleOrLiteral): ChoiceRule;
/**
* Marks the given rule with a precedence which will be used to resolve LR(1)
* conflicts at parser-generation time. When two rules overlap in a way that
* represents either a true ambiguity or a _local_ ambiguity given one token
* of lookahead, Tree-sitter will try to resolve the conflict by matching the
* rule with the higher precedence.
*
* Precedence values can either be strings or numbers. When comparing rules
* with numerical precedence, higher numbers indicate higher precedences. To
* compare rules with string precedence, Tree-sitter uses the grammar's `precedences`
* field.
*
* rules is zero. This works similarly to the precedence directives in Yacc grammars.
*
* @param value precedence weight
* @param rule rule being weighted
*
* @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
*/
declare const prec: {
(value: String | number, rule: RuleOrLiteral): PrecRule;
/**
* Marks the given rule as left-associative (and optionally applies a
* numerical precedence). When an LR(1) conflict arises in which all of the
* rules have the same numerical precedence, Tree-sitter will consult the
* rules' associativity. If there is a left-associative rule, Tree-sitter
* will prefer matching a rule that ends _earlier_. This works similarly to
* associativity directives in Yacc grammars.
*
* @param value (optional) precedence weight
* @param rule rule to mark as left-associative
*
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
*/
left(rule: RuleOrLiteral): PrecLeftRule;
left(value: String | number, rule: RuleOrLiteral): PrecLeftRule;
/**
* Marks the given rule as right-associative (and optionally applies a
* numerical precedence). When an LR(1) conflict arises in which all of the
* rules have the same numerical precedence, Tree-sitter will consult the
* rules' associativity. If there is a right-associative rule, Tree-sitter
* will prefer matching a rule that ends _later_. This works similarly to
* associativity directives in Yacc grammars.
*
* @param value (optional) precedence weight
* @param rule rule to mark as right-associative
*
* @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html
*/
right(rule: RuleOrLiteral): PrecRightRule;
right(value: String | number, rule: RuleOrLiteral): PrecRightRule;
/**
* Marks the given rule with a numerical precedence which will be used to
* resolve LR(1) conflicts at _runtime_ instead of parser-generation time.
* This is only necessary when handling a conflict dynamically using the
* `conflicts` field in the grammar, and when there is a genuine _ambiguity_:
* multiple rules correctly match a given piece of code. In that event,
* Tree-sitter compares the total dynamic precedence associated with each
* rule, and selects the one with the highest total. This is similar to
* dynamic precedence directives in Bison grammars.
*
* @param value precedence weight
* @param rule rule being weighted
*
* @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
*/
dynamic(value: String | number, rule: RuleOrLiteral): PrecDynamicRule;
};
/**
* Creates a rule that matches _zero-or-more_ occurrences of a given rule.
* It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This
* rule is implemented in terms of `repeat1` but is included because it
* is very commonly used.
*
* @param rule rule to repeat, zero or more times
*/
declare function repeat(rule: RuleOrLiteral): RepeatRule;
/**
* Creates a rule that matches one-or-more occurrences of a given rule.
*
* @param rule rule to repeat, one or more times
*/
declare function repeat1(rule: RuleOrLiteral): Repeat1Rule;
/**
* Creates a rule that matches any number of other rules, one after another.
* It is analogous to simply writing multiple symbols next to each other
* in EBNF notation.
*
* @param rules ordered rules that comprise the sequence
*/
declare function seq(...rules: RuleOrLiteral[]): SeqRule;
/**
* Creates a symbol rule, representing another rule in the grammar by name.
*
* @param name name of the target rule
*/
declare function sym<Name extends string>(name: Name): SymbolRule<Name>;
/**
* Marks the given rule as producing only a single token. Tree-sitter's
* default is to treat each String or RegExp literal in the grammar as a
* separate token. Each token is matched separately by the lexer and
* returned as its own leaf node in the tree. The token function allows
* you to express a complex rule using the DSL functions (rather
* than as a single regular expression) but still have Tree-sitter treat
* it as a single token.
*
* @param rule rule to represent as a single token
*/
declare const token: {
(rule: RuleOrLiteral): TokenRule;
/**
* Marks the given rule as producing an immediate token. This allows
* the parser to produce a different token based on whether or not
* there are `extras` preceding the token's main content. When there
* are _no_ leading `extras`, an immediate token is preferred over a
* normal token which would otherwise match.
*
* @param rule rule to represent as an immediate token
*/
immediate(rule: RuleOrLiteral): ImmediateTokenRule;
};
/**
* Creates a new language grammar with the provided schema.
*
* @param options grammar options
*/
declare function grammar<RuleName extends string>(
options: Grammar<RuleName>,
): GrammarSchema<RuleName>;
/**
* Extends an existing language grammar with the provided options,
* creating a new language.
*
* @param baseGrammar base grammar schema to extend from
* @param options grammar options for the new extended language
*/
declare function grammar<
BaseGrammarRuleName extends string,
RuleName extends string
>(
baseGrammar: GrammarSchema<BaseGrammarRuleName>,
options: Grammar<RuleName, BaseGrammarRuleName>,
): GrammarSchema<RuleName | BaseGrammarRuleName>;