Merge commit 'c01fb4e38587e959b9058b8cd34b9e6a3068c827'

pull/559/head
Wilfred Hughes 2023-08-21 08:48:17 +07:00
commit 9403e410c2
35 changed files with 112402 additions and 75333 deletions

@ -2,7 +2,7 @@
### Parsing
Updated Bash and Rust parsers.
Updated Bash, Python and Rust parsers.
### Display

@ -283,7 +283,7 @@ fn main() {
TreeSitterParser {
name: "tree-sitter-python",
src_dir: "vendored_parsers/tree-sitter-python-src",
extra_files: vec!["scanner.cc"],
extra_files: vec!["scanner.c"],
},
TreeSitterParser {
name: "tree-sitter-qmljs",

@ -0,0 +1,20 @@
module.exports = {
'env': {
'commonjs': true,
'es2021': true,
},
'extends': 'google',
'overrides': [
],
'parserOptions': {
'ecmaVersion': 'latest',
'sourceType': 'module',
},
'rules': {
'indent': ['error', 2, {'SwitchCase': 1}],
'max-len': [
'error',
{'code': 120, 'ignoreComments': true, 'ignoreUrls': true, 'ignoreStrings': true},
],
},
};

@ -1,2 +1,7 @@
/src/** linguist-vendored
/src/parser.c linguist-vendored
/src/*.json linguist-vendored
/examples/* linguist-vendored
src/grammar.json -diff
src/node-types.json -diff
src/parser.c -diff

@ -1,4 +1,4 @@
name: Build/test
name: CI
on:
pull_request:
branches:
@ -14,18 +14,18 @@ jobs:
matrix:
os: [macos-latest, ubuntu-latest]
steps:
- uses: actions/checkout@v2
- uses: actions/setup-node@v2
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
node-version: 16
node-version: 18
- run: npm install
- run: npm test
test_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-node@v2
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
node-version: 16
node-version: 18
- run: npm install
- run: npm run-script test-windows

@ -0,0 +1,22 @@
name: Fuzz Parser
on:
push:
paths:
- src/scanner.c
pull_request:
paths:
- src/scanner.c
workflow_dispatch:
jobs:
test:
name: Parser fuzzing
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: vigoux/tree-sitter-fuzz-action@v1
with:
language: python
external-scanner: src/scanner.c
time: 60

@ -0,0 +1,19 @@
name: Lint
on:
push:
branches:
- master
pull_request:
branches:
- "**"
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install modules
run: npm install
- name: Run ESLint
run: npm run lint

@ -0,0 +1,103 @@
name: Release
on:
workflow_run:
workflows: ["CI"]
branches:
- master
types:
- completed
jobs:
release:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Get previous commit SHA
id: get_previous_commit
run: |
LATEST_TAG=$(git describe --tags --abbrev=0)
if [[ -z "$LATEST_TAG" ]]; then
echo "No tag found. Failing..."
exit 1
fi
echo "latest_tag=${LATEST_TAG#v}" >> "$GITHUB_ENV" # Remove 'v' prefix from the tag
- name: Check if version changed and is greater than the previous
id: version_check
run: |
# Compare the current version with the version from the previous commit
PREVIOUS_NPM_VERSION=${{ env.latest_tag }}
CURRENT_NPM_VERSION=$(jq -r '.version' package.json)
CURRENT_CARGO_VERSION=$(awk -F '"' '/^version/ {print $2}' Cargo.toml)
if [[ "$CURRENT_NPM_VERSION" != "$CURRENT_CARGO_VERSION" ]]; then # Cargo.toml and package.json versions must match
echo "Mismatch: NPM version ($CURRENT_NPM_VERSION) and Cargo.toml version ($CURRENT_CARGO_VERSION)"
echo "version_changed=false" >> "$GITHUB_ENV"
else
if [[ "$PREVIOUS_NPM_VERSION" == "$CURRENT_NPM_VERSION" ]]; then
echo "version_changed=" >> "$GITHUB_ENV"
else
IFS='.' read -ra PREVIOUS_VERSION_PARTS <<< "$PREVIOUS_NPM_VERSION"
IFS='.' read -ra CURRENT_VERSION_PARTS <<< "$CURRENT_NPM_VERSION"
VERSION_CHANGED=false
for i in "${!PREVIOUS_VERSION_PARTS[@]}"; do
if [[ ${CURRENT_VERSION_PARTS[i]} -gt ${PREVIOUS_VERSION_PARTS[i]} ]]; then
VERSION_CHANGED=true
break
elif [[ ${CURRENT_VERSION_PARTS[i]} -lt ${PREVIOUS_VERSION_PARTS[i]} ]]; then
break
fi
done
echo "version_changed=$VERSION_CHANGED" >> "$GITHUB_ENV"
echo "current_version=${CURRENT_NPM_VERSION}" >> "$GITHUB_ENV"
fi
fi
- name: Display result
run: |
echo "Version bump detected: ${{ env.version_changed }}"
- name: Fail if version is lower
if: env.version_changed == 'false'
run: exit 1
- name: Setup Node
if: env.version_changed == 'true'
uses: actions/setup-node@v3
with:
node-version: 18
registry-url: "https://registry.npmjs.org"
- name: Publish to NPM
if: env.version_changed == 'true'
env:
NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
run: npm publish
- name: Setup Rust
if: env.version_changed == 'true'
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Publish to Crates.io
if: env.version_changed == 'true'
uses: katyo/publish-crates@v2
with:
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
- name: Tag versions
if: env.version_changed == 'true'
run: |
git checkout master
git config user.name github-actions[bot]
git config user.email github-actions[bot]@users.noreply.github.com
git tag -d "v${{ env.current_version }}" || true
git push origin --delete "v${{ env.current_version }}" || true
git tag -a "v${{ env.current_version }}" -m "Version ${{ env.current_version }}"
git push origin "v${{ env.current_version }}"

@ -1,6 +1,6 @@
corpus
examples
build
script
target
/test
/examples
/build
/script
/target
bindings/rust

@ -1,31 +1,27 @@
[package]
name = "tree-sitter-python"
description = "Python grammar for the tree-sitter parsing library"
version = "0.20.2"
description = "Python grammar for tree-sitter"
version = "0.20.4"
authors = [
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Douglas Creager <dcreager@dcreager.net>",
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Douglas Creager <dcreager@dcreager.net>",
]
license = "MIT"
readme = "bindings/rust/README.md"
keywords = ["incremental", "parsing", "python"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/tree-sitter/tree-sitter-python"
edition = "2018"
edition = "2021"
autoexamples = false
build = "bindings/rust/build.rs"
include = [
"bindings/rust/*",
"grammar.js",
"queries/*",
"src/*",
]
include = ["bindings/rust/*", "grammar.js", "queries/*", "src/*"]
[lib]
path = "bindings/rust/lib.rs"
[dependencies]
tree-sitter = ">= 0.19, < 0.21"
tree-sitter = "~0.20.10"
[build-dependencies]
cc = "1.0"
cc = "~1.0"

@ -0,0 +1,33 @@
// swift-tools-version:5.3
import PackageDescription
let package = Package(
name: "TreeSitterPython",
products: [
.library(name: "TreeSitterPython", targets: ["TreeSitterPython"]),
],
dependencies: [],
targets: [
.target(name: "TreeSitterPython",
path: ".",
exclude: [
"binding.gyp",
"bindings",
"Cargo.toml",
"corpus",
"grammar.js",
"LICENSE",
"package.json",
"README.md",
],
sources: [
"src/parser.c",
"src/scanner.c",
],
resources: [
.copy("queries")
],
publicHeadersPath: "bindings/swift",
cSettings: [.headerSearchPath("src")])
]
)

@ -1,5 +1,4 @@
tree-sitter-python
==================
# tree-sitter-python
[![build](https://github.com/tree-sitter/tree-sitter-python/actions/workflows/ci.yml/badge.svg)](https://github.com/tree-sitter/tree-sitter-python/actions/workflows/ci.yml)
@ -7,7 +6,7 @@ Python grammar for [tree-sitter][].
[tree-sitter]: https://github.com/tree-sitter/tree-sitter
#### References
## References
* [Python 2 Grammar](https://docs.python.org/2/reference/grammar.html)
* [Python 3 Grammar](https://docs.python.org/3/reference/grammar.html)
- [Python 2 Grammar](https://docs.python.org/2/reference/grammar.html)
- [Python 3 Grammar](https://docs.python.org/3/reference/grammar.html)

@ -7,9 +7,9 @@
"src"
],
"sources": [
"src/parser.c",
"bindings/node/binding.cc",
"src/scanner.cc"
"src/parser.c",
"src/scanner.c"
],
"cflags_c": [
"-std=c99",

@ -2,20 +2,20 @@
This crate provides a Python grammar for the [tree-sitter][] parsing library.
To use this crate, add it to the `[dependencies]` section of your `Cargo.toml`
file. (Note that you will probably also need to depend on the
file. (Note that you will probably also need to depend on the
[`tree-sitter`][tree-sitter crate] crate to use the parsed result in any useful
way.)
``` toml
```toml
[dependencies]
tree-sitter = "0.17"
tree-sitter-python = "0.17"
tree-sitter = "0.20.10"
tree-sitter-python = "0.20.4"
```
Typically, you will use the [language][language func] function to add this
grammar to a tree-sitter [Parser][], and then use the parser to parse some code:
``` rust
```rust
let code = r#"
def double(x):
return x * 2
@ -28,7 +28,6 @@ let parsed = parser.parse(code, None);
If you have any questions, please reach out to us in the [tree-sitter
discussions] page.
[Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
[language func]: https://docs.rs/tree-sitter-python/*/tree_sitter_python/fn.language.html
[Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
[tree-sitter]: https://tree-sitter.github.io/

@ -1,28 +1,19 @@
use std::path::Path;
extern crate cc;
fn main() {
let src_dir = Path::new("src");
let src_dir = std::path::Path::new("src");
let mut c_config = cc::Build::new();
c_config.include(&src_dir);
c_config.include(src_dir);
c_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
c_config.compile("parser");
let mut cpp_config = cc::Build::new();
cpp_config.cpp(true);
cpp_config.include(&src_dir);
cpp_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable");
let scanner_path = src_dir.join("scanner.cc");
cpp_config.file(&scanner_path);
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
cpp_config.compile("scanner");
c_config.compile("parser");
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
}

@ -43,18 +43,18 @@ pub fn language() -> Language {
}
/// The source of the Python tree-sitter grammar description.
pub const GRAMMAR: &'static str = include_str!("../../grammar.js");
pub const GRAMMAR: &str = include_str!("../../grammar.js");
/// The syntax highlighting query for this language.
pub const HIGHLIGHT_QUERY: &'static str = include_str!("../../queries/highlights.scm");
pub const HIGHLIGHT_QUERY: &str = include_str!("../../queries/highlights.scm");
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
/// The symbol tagging query for this language.
pub const TAGGING_QUERY: &'static str = include_str!("../../queries/tags.scm");
pub const TAGGING_QUERY: &str = include_str!("../../queries/tags.scm");
#[cfg(test)]
mod tests {

@ -0,0 +1,16 @@
#ifndef TREE_SITTER_PYTHON_H_
#define TREE_SITTER_PYTHON_H_
typedef struct TSLanguage TSLanguage;
#ifdef __cplusplus
extern "C" {
#endif
extern TSLanguage *tree_sitter_python();
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PYTHON_H_

File diff suppressed because it is too large Load Diff

@ -1,6 +1,6 @@
{
"name": "tree-sitter-python",
"version": "0.20.2",
"version": "0.20.4",
"description": "Python grammar for tree-sitter",
"main": "bindings/node",
"keywords": [
@ -10,15 +10,18 @@
"author": "Max Brunsfeld",
"license": "MIT",
"dependencies": {
"nan": "^2.15.0"
"nan": "^2.17.0"
},
"devDependencies": {
"tree-sitter-cli": "^0.20.1"
"eslint": "^8.47.0",
"eslint-config-google": "^0.14.0",
"tree-sitter-cli": "^0.20.8"
},
"scripts": {
"build": "tree-sitter generate && node-gyp build",
"test": "tree-sitter test && script/parse-examples",
"lint": "eslint grammar.js",
"parse": "tree-sitter parse",
"test": "tree-sitter test && script/parse-examples",
"test-windows": "tree-sitter test"
},
"repository": "https://github.com/tree-sitter/tree-sitter-python",
@ -27,6 +30,13 @@
"scope": "source.python",
"file-types": [
"py"
],
"injection-regex": "py",
"highlights": [
"queries/highlights.scm"
],
"tags": [
"queries/tags.scm"
]
}
]

@ -66,14 +66,17 @@
"//="
"/="
"&"
"&="
"%"
"%="
"^"
"^="
"+"
"->"
"+="
"<"
"<<"
"<<="
"<="
"<>"
"="
@ -82,8 +85,11 @@
">"
">="
">>"
">>="
"|"
"|="
"~"
"@="
"and"
"in"
"is"

@ -1,3 +1,5 @@
(module (expression_statement (assignment left: (identifier) @name) @definition.constant))
(class_definition
name: (identifier) @name) @definition.class

@ -0,0 +1,4 @@
examples/cpython/Lib/test/badsyntax_3131.py
examples/cpython/Lib/test/badsyntax_future8.py
examples/cpython/Lib/test/test_compile.py
examples/cpython/Tools/build/generate_re_casefix.py

@ -1,36 +1,47 @@
#!/bin/bash
#!/usr/bin/env bash
set -e
set -eu
cd "$(dirname "$0")/.."
function checkout() {
repo=$1; url=$2; sha=$3
if [ ! -d "$repo" ]; then
git clone "https://github.com/$url" "$repo"
fi
pushd "$repo"
git fetch && git reset --hard "$sha"
popd
function clone_repo {
owner=$1
name=$2
sha=$3
path=examples/$name
if [ ! -d "$path" ]; then
echo "Cloning $owner/$name"
git clone "https://github.com/$owner/$name" "$path"
fi
pushd "$path" >/dev/null
actual_sha=$(git rev-parse HEAD)
if [ "$actual_sha" != "$sha" ]; then
echo "Updating $owner/$name to $sha"
git fetch
git reset --hard "$sha"
fi
popd >/dev/null
}
checkout examples/numpy numpy/numpy 058851c5cfc98f50f11237b1c13d77cfd1f40475
checkout examples/django django/django 01974d7f7549b2dca2a729c3c1a1ea7d4585eb3a
checkout examples/flask pallets/flask de464c03e134127140e5622e230790806a133ff9
clone_repo numpy numpy 058851c5cfc98f50f11237b1c13d77cfd1f40475
clone_repo django django 01974d7f7549b2dca2a729c3c1a1ea7d4585eb3a
clone_repo pallets flask de464c03e134127140e5622e230790806a133ff9
clone_repo python cpython bb456a08a3db851e6feaefc3328f39096919ec8d
known_failures="$(cat script/known_failures.txt)"
# shellcheck disable=2046
tree-sitter parse -q \
'examples/**/*.py' \
$(for file in $known_failures; do echo "!${file}"; done)
'examples/**/*.py' \
$(for file in $known_failures; do echo "!${file}"; done)
example_count=$(find examples -name '*.py' | wc -l)
failure_count=$(wc -w <<< "$known_failures")
success_count=$(( $example_count - $failure_count ))
success_percent=$(bc -l <<< "100*${success_count}/${example_count}")
failure_count=$(wc -w <<<"$known_failures")
success_count=$((example_count - failure_count))
success_percent=$(bc -l <<<"100*${success_count}/${example_count}")
printf \
"Successfully parsed %d of %d example files (%.1f%%)\n" \
$success_count $example_count $success_percent
"Successfully parsed %d of %d example files (%.1f%%)\n" \
"$success_count" "$example_count" "$success_percent"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,523 @@
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <tree_sitter/parser.h>
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define VEC_RESIZE(vec, _cap) \
void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \
assert(tmp != NULL); \
(vec).data = tmp; \
(vec).cap = (_cap);
#define VEC_GROW(vec, _cap) \
if ((vec).cap < (_cap)) { \
VEC_RESIZE((vec), (_cap)); \
}
#define VEC_PUSH(vec, el) \
if ((vec).cap == (vec).len) { \
VEC_RESIZE((vec), MAX(16, (vec).len * 2)); \
} \
(vec).data[(vec).len++] = (el);
#define VEC_POP(vec) (vec).len--;
#define VEC_NEW \
{ .len = 0, .cap = 0, .data = NULL }
#define VEC_BACK(vec) ((vec).data[(vec).len - 1])
#define VEC_FREE(vec) \
{ \
if ((vec).data != NULL) \
free((vec).data); \
}
#define VEC_CLEAR(vec) (vec).len = 0;
enum TokenType {
NEWLINE,
INDENT,
DEDENT,
STRING_START,
STRING_CONTENT,
ESCAPE_INTERPOLATION,
STRING_END,
COMMENT,
CLOSE_PAREN,
CLOSE_BRACKET,
CLOSE_BRACE,
};
typedef enum {
SingleQuote = 1 << 0,
DoubleQuote = 1 << 1,
BackQuote = 1 << 2,
Raw = 1 << 3,
Format = 1 << 4,
Triple = 1 << 5,
Bytes = 1 << 6,
} Flags;
typedef struct {
char flags;
} Delimiter;
static inline Delimiter new_delimiter() { return (Delimiter){0}; }
static inline bool is_format(Delimiter *delimiter) {
return delimiter->flags & Format;
}
static inline bool is_raw(Delimiter *delimiter) {
return delimiter->flags & Raw;
}
static inline bool is_triple(Delimiter *delimiter) {
return delimiter->flags & Triple;
}
static inline bool is_bytes(Delimiter *delimiter) {
return delimiter->flags & Bytes;
}
static inline int32_t end_character(Delimiter *delimiter) {
if (delimiter->flags & SingleQuote) {
return '\'';
}
if (delimiter->flags & DoubleQuote) {
return '"';
}
if (delimiter->flags & BackQuote) {
return '`';
}
return 0;
}
static inline void set_format(Delimiter *delimiter) {
delimiter->flags |= Format;
}
static inline void set_raw(Delimiter *delimiter) { delimiter->flags |= Raw; }
static inline void set_triple(Delimiter *delimiter) {
delimiter->flags |= Triple;
}
static inline void set_bytes(Delimiter *delimiter) {
delimiter->flags |= Bytes;
}
static inline void set_end_character(Delimiter *delimiter, int32_t character) {
switch (character) {
case '\'':
delimiter->flags |= SingleQuote;
break;
case '"':
delimiter->flags |= DoubleQuote;
break;
case '`':
delimiter->flags |= BackQuote;
break;
default:
assert(false);
}
}
typedef struct {
uint32_t len;
uint32_t cap;
uint16_t *data;
} indent_vec;
static indent_vec indent_vec_new() {
indent_vec vec = VEC_NEW;
vec.data = calloc(1, sizeof(uint16_t));
vec.cap = 1;
return vec;
}
typedef struct {
uint32_t len;
uint32_t cap;
Delimiter *data;
} delimiter_vec;
static delimiter_vec delimiter_vec_new() {
delimiter_vec vec = VEC_NEW;
vec.data = calloc(1, sizeof(Delimiter));
vec.cap = 1;
return vec;
}
typedef struct {
indent_vec indents;
delimiter_vec delimiters;
bool inside_f_string;
} Scanner;
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
bool error_recovery_mode =
valid_symbols[STRING_CONTENT] && valid_symbols[INDENT];
bool within_brackets = valid_symbols[CLOSE_BRACE] ||
valid_symbols[CLOSE_PAREN] ||
valid_symbols[CLOSE_BRACKET];
bool advanced_once = false;
if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.len > 0 &&
(lexer->lookahead == '{' || lexer->lookahead == '}') &&
!error_recovery_mode) {
Delimiter delimiter = VEC_BACK(scanner->delimiters);
if (is_format(&delimiter)) {
lexer->mark_end(lexer);
bool is_left_brace = lexer->lookahead == '{';
advance(lexer);
advanced_once = true;
if ((lexer->lookahead == '{' && is_left_brace) ||
(lexer->lookahead == '}' && !is_left_brace)) {
advance(lexer);
lexer->mark_end(lexer);
lexer->result_symbol = ESCAPE_INTERPOLATION;
return true;
}
return false;
}
}
if (valid_symbols[STRING_CONTENT] && scanner->delimiters.len > 0 &&
!error_recovery_mode) {
Delimiter delimiter = VEC_BACK(scanner->delimiters);
int32_t end_char = end_character(&delimiter);
bool has_content = advanced_once;
while (lexer->lookahead) {
if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') &&
is_format(&delimiter)) {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return has_content;
}
if (lexer->lookahead == '\\') {
if (is_raw(&delimiter)) {
// Step over the backslash.
advance(lexer);
// Step over any escaped quotes.
if (lexer->lookahead == end_character(&delimiter) ||
lexer->lookahead == '\\') {
advance(lexer);
}
// Step over newlines
if (lexer -> lookahead == '\r') {
advance(lexer);
if (lexer -> lookahead == '\n') {
advance(lexer);
}
} else if (lexer -> lookahead == '\n') {
advance(lexer);
}
continue;
}
if (is_bytes(&delimiter)) {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == 'N' || lexer->lookahead == 'u' ||
lexer->lookahead == 'U') {
// In bytes string, \N{...}, \uXXXX and \UXXXXXXXX are
// not escape sequences
// https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
advance(lexer);
} else {
lexer->result_symbol = STRING_CONTENT;
return has_content;
}
} else {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return has_content;
}
} else if (lexer->lookahead == end_char) {
if (is_triple(&delimiter)) {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == end_char) {
advance(lexer);
if (lexer->lookahead == end_char) {
if (has_content) {
lexer->result_symbol = STRING_CONTENT;
} else {
advance(lexer);
lexer->mark_end(lexer);
VEC_POP(scanner->delimiters);
lexer->result_symbol = STRING_END;
scanner->inside_f_string = false;
}
return true;
}
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return true;
}
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return true;
}
if (has_content) {
lexer->result_symbol = STRING_CONTENT;
} else {
advance(lexer);
VEC_POP(scanner->delimiters);
lexer->result_symbol = STRING_END;
scanner->inside_f_string = false;
}
lexer->mark_end(lexer);
return true;
} else if (lexer->lookahead == '\n' && has_content &&
!is_triple(&delimiter)) {
return false;
}
advance(lexer);
has_content = true;
}
}
lexer->mark_end(lexer);
bool found_end_of_line = false;
uint32_t indent_length = 0;
int32_t first_comment_indent_length = -1;
for (;;) {
if (lexer->lookahead == '\n') {
found_end_of_line = true;
indent_length = 0;
skip(lexer);
} else if (lexer->lookahead == ' ') {
indent_length++;
skip(lexer);
} else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') {
indent_length = 0;
skip(lexer);
} else if (lexer->lookahead == '\t') {
indent_length += 8;
skip(lexer);
} else if (lexer->lookahead == '#') {
// If we haven't found an EOL yet,
// then this is a comment after an expression:
// foo = bar # comment
// Just return, since we don't want to generate an indent/dedent
// token.
if (!found_end_of_line) {
return false;
}
if (first_comment_indent_length == -1) {
first_comment_indent_length = (int32_t)indent_length;
}
while (lexer->lookahead && lexer->lookahead != '\n') {
skip(lexer);
}
skip(lexer);
indent_length = 0;
} else if (lexer->lookahead == '\\') {
skip(lexer);
if (lexer->lookahead == '\r') {
skip(lexer);
}
if (lexer->lookahead == '\n' || lexer->eof(lexer)) {
skip(lexer);
} else {
return false;
}
} else if (lexer->eof(lexer)) {
indent_length = 0;
found_end_of_line = true;
break;
} else {
break;
}
}
if (found_end_of_line) {
if (scanner->indents.len > 0) {
uint16_t current_indent_length = VEC_BACK(scanner->indents);
if (valid_symbols[INDENT] &&
indent_length > current_indent_length) {
VEC_PUSH(scanner->indents, indent_length);
lexer->result_symbol = INDENT;
return true;
}
bool next_tok_is_string_start = lexer->lookahead == '\"' ||
lexer->lookahead == '\'' ||
lexer->lookahead == '`';
if ((valid_symbols[DEDENT] ||
(!valid_symbols[NEWLINE] &&
!(valid_symbols[STRING_START] && next_tok_is_string_start) &&
!within_brackets)) &&
indent_length < current_indent_length &&
!scanner->inside_f_string &&
// Wait to create a dedent token until we've consumed any
// comments
// whose indentation matches the current block.
first_comment_indent_length < (int32_t)current_indent_length) {
VEC_POP(scanner->indents);
lexer->result_symbol = DEDENT;
return true;
}
}
if (valid_symbols[NEWLINE] && !error_recovery_mode) {
lexer->result_symbol = NEWLINE;
return true;
}
}
if (first_comment_indent_length == -1 && valid_symbols[STRING_START]) {
Delimiter delimiter = new_delimiter();
bool has_flags = false;
while (lexer->lookahead) {
if (lexer->lookahead == 'f' || lexer->lookahead == 'F') {
set_format(&delimiter);
} else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') {
set_raw(&delimiter);
} else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') {
set_bytes(&delimiter);
} else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') {
break;
}
has_flags = true;
advance(lexer);
}
if (lexer->lookahead == '`') {
set_end_character(&delimiter, '`');
advance(lexer);
lexer->mark_end(lexer);
} else if (lexer->lookahead == '\'') {
set_end_character(&delimiter, '\'');
advance(lexer);
lexer->mark_end(lexer);
if (lexer->lookahead == '\'') {
advance(lexer);
if (lexer->lookahead == '\'') {
advance(lexer);
lexer->mark_end(lexer);
set_triple(&delimiter);
}
}
} else if (lexer->lookahead == '"') {
set_end_character(&delimiter, '"');
advance(lexer);
lexer->mark_end(lexer);
if (lexer->lookahead == '"') {
advance(lexer);
if (lexer->lookahead == '"') {
advance(lexer);
lexer->mark_end(lexer);
set_triple(&delimiter);
}
}
}
if (end_character(&delimiter)) {
VEC_PUSH(scanner->delimiters, delimiter);
lexer->result_symbol = STRING_START;
scanner->inside_f_string = is_format(&delimiter);
return true;
}
if (has_flags) {
return false;
}
}
return false;
}
unsigned tree_sitter_python_external_scanner_serialize(void *payload,
char *buffer) {
Scanner *scanner = (Scanner *)payload;
size_t size = 0;
buffer[size++] = (char)scanner->inside_f_string;
size_t delimiter_count = scanner->delimiters.len;
if (delimiter_count > UINT8_MAX) {
delimiter_count = UINT8_MAX;
}
buffer[size++] = (char)delimiter_count;
if (delimiter_count > 0) {
memcpy(&buffer[size], scanner->delimiters.data, delimiter_count);
}
size += delimiter_count;
int iter = 1;
for (; iter < scanner->indents.len &&
size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
++iter) {
buffer[size++] = (char)scanner->indents.data[iter];
}
return size;
}
void tree_sitter_python_external_scanner_deserialize(void *payload,
const char *buffer,
unsigned length) {
Scanner *scanner = (Scanner *)payload;
VEC_CLEAR(scanner->delimiters);
VEC_CLEAR(scanner->indents);
VEC_PUSH(scanner->indents, 0);
if (length > 0) {
size_t size = 0;
scanner->inside_f_string = (bool)buffer[size++];
size_t delimiter_count = (uint8_t)buffer[size++];
if (delimiter_count > 0) {
VEC_GROW(scanner->delimiters, delimiter_count);
scanner->delimiters.len = delimiter_count;
memcpy(scanner->delimiters.data, &buffer[size], delimiter_count);
size += delimiter_count;
}
for (; size < length; size++) {
VEC_PUSH(scanner->indents, (unsigned char)buffer[size]);
}
}
}
void *tree_sitter_python_external_scanner_create() {
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
_Static_assert(sizeof(Delimiter) == sizeof(char), "");
#else
assert(sizeof(Delimiter) == sizeof(char));
#endif
Scanner *scanner = calloc(1, sizeof(Scanner));
scanner->indents = indent_vec_new();
scanner->delimiters = delimiter_vec_new();
tree_sitter_python_external_scanner_deserialize(scanner, NULL, 0);
return scanner;
}
void tree_sitter_python_external_scanner_destroy(void *payload) {
Scanner *scanner = (Scanner *)payload;
VEC_FREE(scanner->indents);
VEC_FREE(scanner->delimiters);
free(scanner);
}

@ -1,408 +0,0 @@
#include <tree_sitter/parser.h>
#include <vector>
#include <cwctype>
#include <cstring>
#include <cassert>
#include <stdio.h>
namespace {
using std::vector;
using std::iswspace;
using std::memcpy;
enum TokenType {
NEWLINE,
INDENT,
DEDENT,
STRING_START,
STRING_CONTENT,
STRING_END,
COMMENT,
CLOSE_PAREN,
CLOSE_BRACKET,
CLOSE_BRACE,
};
struct Delimiter {
enum {
SingleQuote = 1 << 0,
DoubleQuote = 1 << 1,
BackQuote = 1 << 2,
Raw = 1 << 3,
Format = 1 << 4,
Triple = 1 << 5,
Bytes = 1 << 6,
};
Delimiter() : flags(0) {}
bool is_format() const {
return flags & Format;
}
bool is_raw() const {
return flags & Raw;
}
bool is_triple() const {
return flags & Triple;
}
bool is_bytes() const {
return flags & Bytes;
}
int32_t end_character() const {
if (flags & SingleQuote) return '\'';
if (flags & DoubleQuote) return '"';
if (flags & BackQuote) return '`';
return 0;
}
void set_format() {
flags |= Format;
}
void set_raw() {
flags |= Raw;
}
void set_triple() {
flags |= Triple;
}
void set_bytes() {
flags |= Bytes;
}
void set_end_character(int32_t character) {
switch (character) {
case '\'':
flags |= SingleQuote;
break;
case '"':
flags |= DoubleQuote;
break;
case '`':
flags |= BackQuote;
break;
default:
assert(false);
}
}
char flags;
};
struct Scanner {
Scanner() {
assert(sizeof(Delimiter) == sizeof(char));
deserialize(NULL, 0);
}
unsigned serialize(char *buffer) {
size_t i = 0;
size_t delimiter_count = delimiter_stack.size();
if (delimiter_count > UINT8_MAX) delimiter_count = UINT8_MAX;
buffer[i++] = delimiter_count;
if (delimiter_count > 0) {
memcpy(&buffer[i], delimiter_stack.data(), delimiter_count);
}
i += delimiter_count;
vector<uint16_t>::iterator
iter = indent_length_stack.begin() + 1,
end = indent_length_stack.end();
for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
buffer[i++] = *iter;
}
return i;
}
void deserialize(const char *buffer, unsigned length) {
delimiter_stack.clear();
indent_length_stack.clear();
indent_length_stack.push_back(0);
if (length > 0) {
size_t i = 0;
size_t delimiter_count = (uint8_t)buffer[i++];
delimiter_stack.resize(delimiter_count);
if (delimiter_count > 0) {
memcpy(delimiter_stack.data(), &buffer[i], delimiter_count);
}
i += delimiter_count;
for (; i < length; i++) {
indent_length_stack.push_back(buffer[i]);
}
}
}
void advance(TSLexer *lexer) {
lexer->advance(lexer, false);
}
void skip(TSLexer *lexer) {
lexer->advance(lexer, true);
}
bool scan(TSLexer *lexer, const bool *valid_symbols) {
bool error_recovery_mode = valid_symbols[STRING_CONTENT] && valid_symbols[INDENT];
bool within_brackets = valid_symbols[CLOSE_BRACE] || valid_symbols[CLOSE_PAREN] || valid_symbols[CLOSE_BRACKET];
if (valid_symbols[STRING_CONTENT] && !delimiter_stack.empty() && !error_recovery_mode) {
Delimiter delimiter = delimiter_stack.back();
int32_t end_character = delimiter.end_character();
bool has_content = false;
while (lexer->lookahead) {
if ((lexer->lookahead == '{' || lexer->lookahead == '}') && delimiter.is_format()) {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return has_content;
} else if (lexer->lookahead == '\\') {
if (delimiter.is_raw()) {
lexer->advance(lexer, false);
} else if (delimiter.is_bytes()) {
lexer->mark_end(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == 'N' || lexer->lookahead == 'u' || lexer->lookahead == 'U') {
// In bytes string, \N{...}, \uXXXX and \UXXXXXXXX are not escape sequences
// https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
lexer->advance(lexer, false);
} else {
lexer->result_symbol = STRING_CONTENT;
return has_content;
}
} else {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return has_content;
}
} else if (lexer->lookahead == end_character) {
if (delimiter.is_triple()) {
lexer->mark_end(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == end_character) {
lexer->advance(lexer, false);
if (lexer->lookahead == end_character) {
if (has_content) {
lexer->result_symbol = STRING_CONTENT;
} else {
lexer->advance(lexer, false);
lexer->mark_end(lexer);
delimiter_stack.pop_back();
lexer->result_symbol = STRING_END;
}
return true;
} else {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return true;
}
} else {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
return true;
}
} else {
if (has_content) {
lexer->result_symbol = STRING_CONTENT;
} else {
lexer->advance(lexer, false);
delimiter_stack.pop_back();
lexer->result_symbol = STRING_END;
}
lexer->mark_end(lexer);
return true;
}
} else if (lexer->lookahead == '\n' && has_content && !delimiter.is_triple()) {
return false;
}
advance(lexer);
has_content = true;
}
}
lexer->mark_end(lexer);
bool found_end_of_line = false;
uint32_t indent_length = 0;
int32_t first_comment_indent_length = -1;
for (;;) {
if (lexer->lookahead == '\n') {
found_end_of_line = true;
indent_length = 0;
skip(lexer);
} else if (lexer->lookahead == ' ') {
indent_length++;
skip(lexer);
} else if (lexer->lookahead == '\r') {
indent_length = 0;
skip(lexer);
} else if (lexer->lookahead == '\t') {
indent_length += 8;
skip(lexer);
} else if (lexer->lookahead == '#') {
if (first_comment_indent_length == -1) {
first_comment_indent_length = (int32_t)indent_length;
}
while (lexer->lookahead && lexer->lookahead != '\n') {
skip(lexer);
}
skip(lexer);
indent_length = 0;
} else if (lexer->lookahead == '\\') {
skip(lexer);
if (lexer->lookahead == '\r') {
skip(lexer);
}
if (lexer->lookahead == '\n') {
skip(lexer);
} else {
return false;
}
} else if (lexer->lookahead == '\f') {
indent_length = 0;
skip(lexer);
} else if (lexer->lookahead == 0) {
indent_length = 0;
found_end_of_line = true;
break;
} else {
break;
}
}
if (found_end_of_line) {
if (!indent_length_stack.empty()) {
uint16_t current_indent_length = indent_length_stack.back();
if (
valid_symbols[INDENT] &&
indent_length > current_indent_length
) {
indent_length_stack.push_back(indent_length);
lexer->result_symbol = INDENT;
return true;
}
if (
(valid_symbols[DEDENT] || (!valid_symbols[NEWLINE] && !within_brackets)) &&
indent_length < current_indent_length &&
// Wait to create a dedent token until we've consumed any comments
// whose indentation matches the current block.
first_comment_indent_length < (int32_t)current_indent_length
) {
indent_length_stack.pop_back();
lexer->result_symbol = DEDENT;
return true;
}
}
if (valid_symbols[NEWLINE] && !error_recovery_mode) {
lexer->result_symbol = NEWLINE;
return true;
}
}
if (first_comment_indent_length == -1 && valid_symbols[STRING_START]) {
Delimiter delimiter;
bool has_flags = false;
while (lexer->lookahead) {
if (lexer->lookahead == 'f' || lexer->lookahead == 'F') {
delimiter.set_format();
} else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') {
delimiter.set_raw();
} else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') {
delimiter.set_bytes();
} else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') {
break;
}
has_flags = true;
advance(lexer);
}
if (lexer->lookahead == '`') {
delimiter.set_end_character('`');
advance(lexer);
lexer->mark_end(lexer);
} else if (lexer->lookahead == '\'') {
delimiter.set_end_character('\'');
advance(lexer);
lexer->mark_end(lexer);
if (lexer->lookahead == '\'') {
advance(lexer);
if (lexer->lookahead == '\'') {
advance(lexer);
lexer->mark_end(lexer);
delimiter.set_triple();
}
}
} else if (lexer->lookahead == '"') {
delimiter.set_end_character('"');
advance(lexer);
lexer->mark_end(lexer);
if (lexer->lookahead == '"') {
advance(lexer);
if (lexer->lookahead == '"') {
advance(lexer);
lexer->mark_end(lexer);
delimiter.set_triple();
}
}
}
if (delimiter.end_character()) {
delimiter_stack.push_back(delimiter);
lexer->result_symbol = STRING_START;
return true;
} else if (has_flags) {
return false;
}
}
return false;
}
vector<uint16_t> indent_length_stack;
vector<Delimiter> delimiter_stack;
};
}
extern "C" {
void *tree_sitter_python_external_scanner_create() {
return new Scanner();
}
bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->scan(lexer, valid_symbols);
}
unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->serialize(buffer);
}
void tree_sitter_python_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = static_cast<Scanner *>(payload);
scanner->deserialize(buffer, length);
}
void tree_sitter_python_external_scanner_destroy(void *payload) {
Scanner *scanner = static_cast<Scanner *>(payload);
delete scanner;
}
}

@ -13,9 +13,8 @@ extern "C" {
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
@ -140,7 +139,8 @@ struct TSLanguage {
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
lookahead = lexer->lookahead; \
eof = lexer->eof(lexer);
#define ADVANCE(state_value) \
{ \
@ -166,7 +166,7 @@ struct TSLanguage {
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
@ -176,7 +176,7 @@ struct TSLanguage {
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
.state = (state_value) \
} \
}}
@ -184,7 +184,7 @@ struct TSLanguage {
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.state = (state_value), \
.repetition = true \
} \
}}

@ -22,6 +22,9 @@ def a(b):
(identifier))
(block
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(identifier)))))
(identifier)))))

@ -95,6 +95,10 @@ Await expressions
await i(j, 5)
return await i(j, 5)
async def region_exists(region: str) -> bool:
return region in await all_regions()
assert await a(b) == c
--------------------------------------------------------------------------------
@ -112,7 +116,32 @@ return await i(j, 5)
(identifier)
(argument_list
(identifier)
(integer))))))
(integer)))))
(function_definition
(identifier)
(parameters
(typed_parameter
(identifier)
(type
(identifier))))
(type
(identifier))
(block
(return_statement
(comparison_operator
(identifier)
(await
(call
(identifier)
(argument_list)))))))
(assert_statement
(comparison_operator
(await
(call
(identifier)
(argument_list
(identifier))))
(identifier))))
================================================================================
Call expressions
@ -281,18 +310,27 @@ exec("""exec _code_ in _globs_, _locs_""")
(identifier)
(argument_list
(string
(escape_sequence)
(escape_sequence))
(string_start)
(string_content
(escape_sequence)
(escape_sequence))
(string_end))
(dictionary
(pair
(string)
(string
(string_start)
(string_content)
(string_end))
(none)))
(identifier))))
(expression_statement
(call
(identifier)
(argument_list
(string)))))
(string
(string_start)
(string_content)
(string_end))))))
================================================================================
Async / await used as identifiers
@ -645,9 +683,11 @@ tail_leaves: List[Leaf] = []
(assignment
(identifier)
(type
(subscript
(generic_type
(identifier)
(identifier)))
(type_parameter
(type
(identifier)))))
(list))))
================================================================================
@ -762,7 +802,10 @@ foo(x := 3, cat='vector')
(integer))
(keyword_argument
(identifier)
(string)))))
(string
(string_start)
(string_content)
(string_end))))))
(expression_statement
(parenthesized_expression
(named_expression
@ -841,7 +884,10 @@ lambda (a, b): (a, b)
(identifier)
(argument_list
(binary_operator
(string)
(string
(string_start)
(string_content)
(string_end))
(identifier))))))
(expression_statement
(lambda
@ -903,6 +949,32 @@ Tuples with yield
(identifier)
(identifier))))))
================================================================================
Default Tuple Arguments
================================================================================
def comp_args((a, b)=(3, 4)):
return a, b
--------------------------------------------------------------------------------
(module
(function_definition
(identifier)
(parameters
(default_parameter
(tuple_pattern
(identifier)
(identifier))
(tuple
(integer)
(integer))))
(block
(return_statement
(expression_list
(identifier)
(identifier))))))
================================================================================
Conditional if expressions
================================================================================
@ -969,21 +1041,21 @@ async with a as b:
left: (identifier)
right: (identifier)))))))))
===========================================
================================================================================
Arbitrary indentation between brackets
==========================================
================================================================================
def a():
b(
1,
2
)
c = [
3
]
---
--------------------------------------------------------------------------------
(module
(function_definition
@ -1000,4 +1072,37 @@ def a():
(assignment
(identifier)
(list
(integer)))))))
(integer)))))))
================================================================================
Splat Inside of Expression List
================================================================================
a,c = [1,2],3
w, x, y, z = 0, *a, c
--------------------------------------------------------------------------------
(module
(expression_statement
(assignment
(pattern_list
(identifier)
(identifier))
(expression_list
(list
(integer)
(integer))
(integer))))
(expression_statement
(assignment
(pattern_list
(identifier)
(identifier)
(identifier)
(identifier))
(expression_list
(integer)
(list_splat
(identifier))
(identifier)))))

@ -134,50 +134,93 @@ string"
b"\x12\u12\U12\x13\N{WINKING FACE}"
"\xab\123\'\"\a\b\f\r\n\t\v\\"
"\xgh\o123\p\q\c\d\e\u12\U1234"
f'\N{GREEK CAPITAL LETTER DELTA}'
--------------------------------------------------------------------------------
(module
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string
(escape_sequence)))
(string_start)
(string_content
(escape_sequence))
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string
(escape_sequence)))
(string_start)
(string_content
(escape_sequence))
(string_end)))
(expression_statement
(string
(escape_sequence)
(escape_sequence)))
(string_start)
(string_content
(escape_sequence)
(escape_sequence))
(string_end)))
(expression_statement
(string
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)))
(string_start)
(string_content
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence))
(string_end)))
(expression_statement
(string)))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string
(string_start)
(string_content
(escape_sequence))
(string_end))))
================================================================================
Raw strings
@ -190,20 +233,57 @@ Raw strings
r'ab\x00cd'
ur"\n"
# raw f-string
fr"\{0}"
r"\\"
r'"a\
de\
fg"'
--------------------------------------------------------------------------------
(module
(expression_statement
(string
(escape_sequence)))
(string_start)
(string_content
(escape_sequence))
(string_end)))
(expression_statement
(string
(escape_sequence)))
(string_start)
(string_content
(escape_sequence))
(string_end)))
(comment)
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string)))
(string
(string_start)
(string_content)
(string_end)))
(comment)
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(integer))
(string_end)))
(expression_statement
(string
(string_start)
(string_end)))
(expression_statement
(string
(string_start)
(string_content)
(string_end))))
================================================================================
Raw strings with escaped quotes
@ -222,8 +302,14 @@ re.compile(r"(\n|\A)#include\s*['\"]"
(identifier))
(argument_list
(concatenated_string
(string)
(string))))))
(string
(string_start)
(string_content)
(string_end))
(string
(string_start)
(string_content)
(string_end)))))))
================================================================================
Format strings
@ -238,6 +324,19 @@ f"a {b}}}"
f"a {{{b}"
f"a {{b}}"
f"a {{{b}}}"
f"{c,}"
f"{yield d}"
f"{*a,}"
def function():
return f"""
{"string1" if True else
"string2"}"""
def test(self):
self.assertEqual(f'''A complex trick: {
2 # two
}''', 'A complex trick: 2')
--------------------------------------------------------------------------------
@ -245,37 +344,142 @@ f"a {{{b}}}"
(comment)
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(call
(identifier)
(argument_list
(string
(string_start)
(string_content)
(interpolation
(identifier))))))))
(identifier))
(string_content)
(string_end)))))
(string_content)
(string_end)))
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(identifier))))
(identifier))
(string_content)
(string_end)))
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(identifier))))
(identifier))
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content
(escape_interpolation)
(escape_interpolation))
(string_end)))
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(identifier))))
(identifier))
(string_content
(escape_interpolation))
(string_end)))
(expression_statement
(string
(string_start)
(string_content
(escape_interpolation))
(interpolation
(identifier))))
(identifier))
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content
(escape_interpolation)
(escape_interpolation))
(string_end)))
(expression_statement
(string
(string_start)
(string_content
(escape_interpolation))
(interpolation
(identifier)))))
(identifier))
(string_content
(escape_interpolation))
(string_end)))
(expression_statement
(string
(string_start)
(interpolation
(expression_list
(identifier)))
(string_end)))
(expression_statement
(string
(string_start)
(interpolation
(yield
(identifier)))
(string_end)))
(expression_statement
(string
(string_start)
(interpolation
(expression_list
(list_splat
(identifier))))
(string_end)))
(function_definition
(identifier)
(parameters)
(block
(return_statement
(string
(string_start)
(string_content)
(interpolation
(conditional_expression
(string
(string_start)
(string_content)
(string_end))
(true)
(string
(string_start)
(string_content)
(string_end))))
(string_end)))))
(function_definition
(identifier)
(parameters
(identifier))
(block
(expression_statement
(call
(attribute
(identifier)
(identifier))
(argument_list
(string
(string_start)
(string_content)
(interpolation
(integer)
(comment))
(string_end))
(string
(string_start)
(string_content)
(string_end))))))))
================================================================================
Format strings with format specifiers
@ -286,20 +490,26 @@ f"{b:{c.d}.{d.e}}"
f"{a:#06x}"
f"{a=}"
f"{a=:.2f}"
f"{value:{width + padding!r}.{precision}}"
--------------------------------------------------------------------------------
(module
(expression_statement
(string
(string_start)
(string_content)
(interpolation
(identifier)
(format_specifier))
(string_content)
(interpolation
(identifier)
(format_specifier))))
(format_specifier))
(string_end)))
(expression_statement
(string
(string_start)
(interpolation
(identifier)
(format_specifier
@ -310,15 +520,42 @@ f"{a=:.2f}"
(format_expression
(attribute
(identifier)
(identifier)))))))
(identifier)))))
(string_end)))
(expression_statement
(string
(interpolation (identifier) (format_specifier))))
(string_start)
(interpolation
(identifier)
(format_specifier))
(string_end)))
(expression_statement
(string (interpolation (identifier))))
(string
(string_start)
(interpolation
(identifier))
(string_end)))
(expression_statement
(string
(interpolation (identifier) (format_specifier)))))
(string_start)
(interpolation
(identifier)
(format_specifier))
(string_end)))
(expression_statement
(string
(string_start)
(interpolation
(identifier)
(format_specifier
(format_expression
(binary_operator
(identifier)
(identifier))
(type_conversion))
(format_expression
(identifier))))
(string_end))))
================================================================================
Unicode escape sequences
@ -331,9 +568,12 @@ Unicode escape sequences
(module
(expression_statement
(string
(escape_sequence)
(escape_sequence)
(escape_sequence))))
(string_start)
(string_content
(escape_sequence)
(escape_sequence)
(escape_sequence))
(string_end))))
================================================================================
Other primitives
@ -364,9 +604,18 @@ Concatenated strings
(module
(expression_statement
(concatenated_string
(string)
(string)
(string))))
(string
(string_start)
(string_content)
(string_end))
(string
(string_start)
(string_content)
(string_end))
(string
(string_start)
(string_content)
(string_end)))))
================================================================================
Multi-line strings
@ -412,25 +661,46 @@ and another escaped newline\n\
(module
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(expression_statement
(string
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence))))
(string_start)
(string_content
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence)
(escape_sequence))
(string_end))))
================================================================================
Lists
@ -468,24 +738,24 @@ Lists
(identifier))))
(expression_statement
(list
(list_splat
(attribute
(identifier)
(identifier)))))
(attribute
(list_splat
(identifier))
(identifier))))
(expression_statement
(list
(list_splat
(attribute
(subscript
(identifier)
(attribute
(subscript
(list_splat
(identifier))
(identifier)))))
(identifier))
(identifier))))
(expression_statement
(list
(list_splat
(call
(identifier)
(argument_list))))))
(call
(list_splat
(identifier))
(argument_list)))))
================================================================================
List comprehensions

File diff suppressed because it is too large Load Diff

@ -124,6 +124,7 @@ print a
print b, c
print 0 or 1, 1 or 0,
print 0 or 1
print not True
--------------------------------------------------------------------------------
@ -143,7 +144,10 @@ print 0 or 1
(print_statement
(boolean_operator
(integer)
(integer))))
(integer)))
(print_statement
(not_operator
(true))))
================================================================================
Print statements with redirection
@ -161,8 +165,14 @@ print >> a, "b", "c"
(print_statement
(chevron
(identifier))
(string)
(string)))
(string
(string_start)
(string_content)
(string_end))
(string
(string_start)
(string_content)
(string_end))))
================================================================================
Assert statements
@ -486,6 +496,17 @@ else:
finally:
f
try:
a
except* b:
c
except* d as e:
f
else:
g
finally:
h
--------------------------------------------------------------------------------
(module
@ -531,6 +552,31 @@ finally:
body: (block
(expression_statement
(identifier))))
(finally_clause
(block
(expression_statement
(identifier)))))
(try_statement
body: (block
(expression_statement
(identifier)))
(except_group_clause
(identifier)
(block
(expression_statement
(identifier))))
(except_group_clause
(as_pattern
(identifier)
alias: (as_pattern_target
(identifier)))
(block
(expression_statement
(identifier))))
(else_clause
body: (block
(expression_statement
(identifier))))
(finally_clause
(block
(expression_statement
@ -547,6 +593,9 @@ with (open('d') as d,
open('e') as e):
f
with e as f, g as h,:
i
--------------------------------------------------------------------------------
(module
@ -563,21 +612,42 @@ with (open('d') as d,
(with_statement
(with_clause
(with_item
(tuple
(as_pattern
(call
(identifier)
(argument_list
(string)))
(as_pattern_target
(identifier)))
(as_pattern
(call
(identifier)
(argument_list
(string)))
(as_pattern_target
(identifier))))))
(as_pattern
(call
(identifier)
(argument_list
(string
(string_start)
(string_content)
(string_end))))
(as_pattern_target
(identifier))))
(with_item
(as_pattern
(call
(identifier)
(argument_list
(string
(string_start)
(string_content)
(string_end))))
(as_pattern_target
(identifier)))))
(block
(expression_statement
(identifier))))
(with_statement
(with_clause
(with_item
(as_pattern
(identifier)
(as_pattern_target
(identifier))))
(with_item
(as_pattern
(identifier)
(as_pattern_target
(identifier)))))
(block
(expression_statement
(identifier)))))
@ -665,9 +735,11 @@ async def d(a:str="default", b=c) -> None:
(typed_parameter
(identifier)
type: (type
(subscript
value: (identifier)
subscript: (identifier)))))
(generic_type
(identifier)
(type_parameter
(type
(identifier)))))))
return_type: (type
(identifier))
body: (block
@ -706,7 +778,10 @@ async def d(a:str="default", b=c) -> None:
name: (identifier)
type: (type
(identifier))
value: (string))
value: (string
(string_start)
(string_content)
(string_end)))
(default_parameter
name: (identifier)
value: (identifier)))
@ -739,6 +814,11 @@ def h(*a):
i((*a))
j(((*a)))
def foo():
pass \
\
\
--------------------------------------------------------------------------------
(module
@ -820,7 +900,15 @@ def h(*a):
(parenthesized_expression
(parenthesized_expression
(list_splat
(identifier))))))))))
(identifier)))))))))
(function_definition
name: (identifier)
parameters: (parameters)
body: (block
(pass_statement)))
(line_continuation)
(line_continuation)
(line_continuation))
================================================================================
Empty blocks
@ -964,6 +1052,10 @@ class C:
async def f():
g
@buttons[0].clicked.connect
def spam():
...
--------------------------------------------------------------------------------
(module
@ -1005,7 +1097,22 @@ class C:
(parameters)
(block
(expression_statement
(identifier)))))))))
(identifier))))))))
(decorated_definition
(decorator
(attribute
(attribute
(subscript
(identifier)
(integer))
(identifier))
(identifier)))
(function_definition
(identifier)
(parameters)
(block
(expression_statement
(ellipsis))))))
================================================================================
Raise statements
@ -1023,12 +1130,18 @@ raise RunTimeError('NO') from e
(call
(identifier)
(argument_list
(string))))
(string
(string_start)
(string_content)
(string_end)))))
(raise_statement
(call
(identifier)
(argument_list
(string)))
(string
(string_start)
(string_content)
(string_end))))
(identifier)))
================================================================================
@ -1115,6 +1228,11 @@ if c:
# three
# four
def a():
if b:
b # comment
b # comment
# five
--------------------------------------------------------------------------------
@ -1134,6 +1252,19 @@ if c:
(identifier))
(comment)
(comment)))
(function_definition
(identifier)
(parameters)
(block
(if_statement
(identifier)
(block
(expression_statement
(identifier))
(comment)))
(expression_statement
(identifier))
(comment)))
(comment))
================================================================================
@ -1146,7 +1277,10 @@ print "a"
(module
(print_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(comment))
================================================================================
@ -1172,19 +1306,40 @@ Exec statements
exec '1+1'
exec 'x+=1' in None
exec 'x+=1' in a, b
func = "print"
exec func in {}
--------------------------------------------------------------------------------
(module
(exec_statement
(string))
(string
(string_start)
(string_content)
(string_end)))
(exec_statement
(string)
(string
(string_start)
(string_content)
(string_end))
(none))
(exec_statement
(string)
(string
(string_start)
(string_content)
(string_end))
(identifier)
(identifier)))
(identifier))
(expression_statement
(assignment
(identifier)
(string
(string_start)
(string_content)
(string_end))))
(exec_statement
(identifier)
(dictionary)))
================================================================================
Extra newlines
@ -1249,11 +1404,18 @@ or len("aa")
(call
(identifier)
(argument_list
(string)))
(string
(string_start)
(string_content)
(string_end))))
(line_continuation)
(call
(identifier)
(argument_list
(string))))))
(string
(string_start)
(string_content)
(string_end)))))))
================================================================================
Statements with semicolons
@ -1276,3 +1438,191 @@ foo; bar;
(identifier))
(expression_statement
(identifier)))
================================================================================
Type Alias Statements
================================================================================
type Point = tuple[float, float]
type Point[T] = tuple[T, T]
type IntFunc[**P] = Callable[P, int] # ParamSpec
type LabeledTuple[*Ts] = tuple[str, *Ts] # TypeVarTuple
type HashableSequence[T: Hashable] = Sequence[T] # TypeVar with bound
type IntOrStrSequence[T: (int, str)] = Sequence[T] # TypeVar with constraints
--------------------------------------------------------------------------------
(module
(type_alias_statement
(type
(identifier))
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))
(type
(identifier))))))
(type_alias_statement
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier)))))
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))
(type
(identifier))))))
(type_alias_statement
(type
(generic_type
(identifier)
(type_parameter
(type
(splat_type
(identifier))))))
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))
(type
(identifier))))))
(comment)
(type_alias_statement
(type
(generic_type
(identifier)
(type_parameter
(type
(splat_type
(identifier))))))
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))
(type
(splat_type
(identifier)))))))
(comment)
(type_alias_statement
(type
(generic_type
(identifier)
(type_parameter
(type
(constrained_type
(type
(identifier))
(type
(identifier)))))))
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))))))
(comment)
(type_alias_statement
(type
(generic_type
(identifier)
(type_parameter
(type
(constrained_type
(type
(identifier))
(type
(tuple
(identifier)
(identifier))))))))
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))))))
(comment))
================================================================================
Generic Function Definitions
================================================================================
def more_generic[T, *Ts, **P]():
type TA[T2, *Ts2, **P2] = tuple[Callable[P, tuple[T, *Ts]], Callable[P2, tuple[T2, *Ts2]]]
return TA
--------------------------------------------------------------------------------
(module
(function_definition
(identifier)
(type_parameter
(type
(identifier))
(type
(splat_type
(identifier)))
(type
(splat_type
(identifier))))
(parameters)
(block
(type_alias_statement
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))
(type
(splat_type
(identifier)))
(type
(splat_type
(identifier))))))
(type
(generic_type
(identifier)
(type_parameter
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))
(type
(splat_type
(identifier)))))))))
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))
(type
(generic_type
(identifier)
(type_parameter
(type
(identifier))
(type
(splat_type
(identifier)))))))))))))
(return_statement
(identifier)))))

@ -0,0 +1,15 @@
class MyClass:
# ^ definition.class
def hello():
# ^ definition.function
print "hello from MyClass"
MyClass.hello()
# ^ reference.call
def main():
# ^ definition.function
print "Hello, world!"
main()
# <- reference.call