mirror of https://github.com/Wilfred/difftastic/
Merge commit 'c01fb4e38587e959b9058b8cd34b9e6a3068c827'
commit
9403e410c2
@ -0,0 +1,20 @@
|
||||
module.exports = {
|
||||
'env': {
|
||||
'commonjs': true,
|
||||
'es2021': true,
|
||||
},
|
||||
'extends': 'google',
|
||||
'overrides': [
|
||||
],
|
||||
'parserOptions': {
|
||||
'ecmaVersion': 'latest',
|
||||
'sourceType': 'module',
|
||||
},
|
||||
'rules': {
|
||||
'indent': ['error', 2, {'SwitchCase': 1}],
|
||||
'max-len': [
|
||||
'error',
|
||||
{'code': 120, 'ignoreComments': true, 'ignoreUrls': true, 'ignoreStrings': true},
|
||||
],
|
||||
},
|
||||
};
|
||||
@ -1,2 +1,7 @@
|
||||
/src/** linguist-vendored
|
||||
/src/parser.c linguist-vendored
|
||||
/src/*.json linguist-vendored
|
||||
/examples/* linguist-vendored
|
||||
|
||||
src/grammar.json -diff
|
||||
src/node-types.json -diff
|
||||
src/parser.c -diff
|
||||
|
||||
@ -0,0 +1,22 @@
|
||||
name: Fuzz Parser
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- src/scanner.c
|
||||
pull_request:
|
||||
paths:
|
||||
- src/scanner.c
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Parser fuzzing
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: vigoux/tree-sitter-fuzz-action@v1
|
||||
with:
|
||||
language: python
|
||||
external-scanner: src/scanner.c
|
||||
time: 60
|
||||
@ -0,0 +1,19 @@
|
||||
name: Lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install modules
|
||||
run: npm install
|
||||
- name: Run ESLint
|
||||
run: npm run lint
|
||||
@ -0,0 +1,103 @@
|
||||
name: Release
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["CI"]
|
||||
branches:
|
||||
- master
|
||||
types:
|
||||
- completed
|
||||
|
||||
jobs:
|
||||
release:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get previous commit SHA
|
||||
id: get_previous_commit
|
||||
run: |
|
||||
LATEST_TAG=$(git describe --tags --abbrev=0)
|
||||
if [[ -z "$LATEST_TAG" ]]; then
|
||||
echo "No tag found. Failing..."
|
||||
exit 1
|
||||
fi
|
||||
echo "latest_tag=${LATEST_TAG#v}" >> "$GITHUB_ENV" # Remove 'v' prefix from the tag
|
||||
|
||||
- name: Check if version changed and is greater than the previous
|
||||
id: version_check
|
||||
run: |
|
||||
# Compare the current version with the version from the previous commit
|
||||
PREVIOUS_NPM_VERSION=${{ env.latest_tag }}
|
||||
CURRENT_NPM_VERSION=$(jq -r '.version' package.json)
|
||||
CURRENT_CARGO_VERSION=$(awk -F '"' '/^version/ {print $2}' Cargo.toml)
|
||||
if [[ "$CURRENT_NPM_VERSION" != "$CURRENT_CARGO_VERSION" ]]; then # Cargo.toml and package.json versions must match
|
||||
echo "Mismatch: NPM version ($CURRENT_NPM_VERSION) and Cargo.toml version ($CURRENT_CARGO_VERSION)"
|
||||
echo "version_changed=false" >> "$GITHUB_ENV"
|
||||
else
|
||||
if [[ "$PREVIOUS_NPM_VERSION" == "$CURRENT_NPM_VERSION" ]]; then
|
||||
echo "version_changed=" >> "$GITHUB_ENV"
|
||||
else
|
||||
IFS='.' read -ra PREVIOUS_VERSION_PARTS <<< "$PREVIOUS_NPM_VERSION"
|
||||
IFS='.' read -ra CURRENT_VERSION_PARTS <<< "$CURRENT_NPM_VERSION"
|
||||
VERSION_CHANGED=false
|
||||
for i in "${!PREVIOUS_VERSION_PARTS[@]}"; do
|
||||
if [[ ${CURRENT_VERSION_PARTS[i]} -gt ${PREVIOUS_VERSION_PARTS[i]} ]]; then
|
||||
VERSION_CHANGED=true
|
||||
break
|
||||
elif [[ ${CURRENT_VERSION_PARTS[i]} -lt ${PREVIOUS_VERSION_PARTS[i]} ]]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
echo "version_changed=$VERSION_CHANGED" >> "$GITHUB_ENV"
|
||||
echo "current_version=${CURRENT_NPM_VERSION}" >> "$GITHUB_ENV"
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Display result
|
||||
run: |
|
||||
echo "Version bump detected: ${{ env.version_changed }}"
|
||||
|
||||
- name: Fail if version is lower
|
||||
if: env.version_changed == 'false'
|
||||
run: exit 1
|
||||
|
||||
- name: Setup Node
|
||||
if: env.version_changed == 'true'
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 18
|
||||
registry-url: "https://registry.npmjs.org"
|
||||
- name: Publish to NPM
|
||||
if: env.version_changed == 'true'
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
|
||||
run: npm publish
|
||||
|
||||
- name: Setup Rust
|
||||
if: env.version_changed == 'true'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Publish to Crates.io
|
||||
if: env.version_changed == 'true'
|
||||
uses: katyo/publish-crates@v2
|
||||
with:
|
||||
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
|
||||
- name: Tag versions
|
||||
if: env.version_changed == 'true'
|
||||
run: |
|
||||
git checkout master
|
||||
git config user.name github-actions[bot]
|
||||
git config user.email github-actions[bot]@users.noreply.github.com
|
||||
git tag -d "v${{ env.current_version }}" || true
|
||||
git push origin --delete "v${{ env.current_version }}" || true
|
||||
git tag -a "v${{ env.current_version }}" -m "Version ${{ env.current_version }}"
|
||||
git push origin "v${{ env.current_version }}"
|
||||
@ -1,6 +1,6 @@
|
||||
corpus
|
||||
examples
|
||||
build
|
||||
script
|
||||
target
|
||||
/test
|
||||
/examples
|
||||
/build
|
||||
/script
|
||||
/target
|
||||
bindings/rust
|
||||
|
||||
@ -1,31 +1,27 @@
|
||||
[package]
|
||||
name = "tree-sitter-python"
|
||||
description = "Python grammar for the tree-sitter parsing library"
|
||||
version = "0.20.2"
|
||||
description = "Python grammar for tree-sitter"
|
||||
version = "0.20.4"
|
||||
authors = [
|
||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
||||
"Douglas Creager <dcreager@dcreager.net>",
|
||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
||||
"Douglas Creager <dcreager@dcreager.net>",
|
||||
]
|
||||
license = "MIT"
|
||||
readme = "bindings/rust/README.md"
|
||||
keywords = ["incremental", "parsing", "python"]
|
||||
categories = ["parsing", "text-editors"]
|
||||
repository = "https://github.com/tree-sitter/tree-sitter-python"
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
autoexamples = false
|
||||
|
||||
build = "bindings/rust/build.rs"
|
||||
include = [
|
||||
"bindings/rust/*",
|
||||
"grammar.js",
|
||||
"queries/*",
|
||||
"src/*",
|
||||
]
|
||||
include = ["bindings/rust/*", "grammar.js", "queries/*", "src/*"]
|
||||
|
||||
[lib]
|
||||
path = "bindings/rust/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
tree-sitter = ">= 0.19, < 0.21"
|
||||
tree-sitter = "~0.20.10"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.0"
|
||||
cc = "~1.0"
|
||||
|
||||
@ -0,0 +1,33 @@
|
||||
// swift-tools-version:5.3
|
||||
import PackageDescription
|
||||
|
||||
let package = Package(
|
||||
name: "TreeSitterPython",
|
||||
products: [
|
||||
.library(name: "TreeSitterPython", targets: ["TreeSitterPython"]),
|
||||
],
|
||||
dependencies: [],
|
||||
targets: [
|
||||
.target(name: "TreeSitterPython",
|
||||
path: ".",
|
||||
exclude: [
|
||||
"binding.gyp",
|
||||
"bindings",
|
||||
"Cargo.toml",
|
||||
"corpus",
|
||||
"grammar.js",
|
||||
"LICENSE",
|
||||
"package.json",
|
||||
"README.md",
|
||||
],
|
||||
sources: [
|
||||
"src/parser.c",
|
||||
"src/scanner.c",
|
||||
],
|
||||
resources: [
|
||||
.copy("queries")
|
||||
],
|
||||
publicHeadersPath: "bindings/swift",
|
||||
cSettings: [.headerSearchPath("src")])
|
||||
]
|
||||
)
|
||||
@ -1,28 +1,19 @@
|
||||
use std::path::Path;
|
||||
extern crate cc;
|
||||
|
||||
fn main() {
|
||||
let src_dir = Path::new("src");
|
||||
let src_dir = std::path::Path::new("src");
|
||||
|
||||
let mut c_config = cc::Build::new();
|
||||
c_config.include(&src_dir);
|
||||
c_config.include(src_dir);
|
||||
c_config
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-Wno-unused-but-set-variable")
|
||||
.flag_if_supported("-Wno-trigraphs");
|
||||
let parser_path = src_dir.join("parser.c");
|
||||
c_config.file(&parser_path);
|
||||
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
|
||||
c_config.compile("parser");
|
||||
|
||||
let mut cpp_config = cc::Build::new();
|
||||
cpp_config.cpp(true);
|
||||
cpp_config.include(&src_dir);
|
||||
cpp_config
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-Wno-unused-but-set-variable");
|
||||
let scanner_path = src_dir.join("scanner.cc");
|
||||
cpp_config.file(&scanner_path);
|
||||
let scanner_path = src_dir.join("scanner.c");
|
||||
c_config.file(&scanner_path);
|
||||
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
|
||||
cpp_config.compile("scanner");
|
||||
|
||||
c_config.compile("parser");
|
||||
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
|
||||
}
|
||||
|
||||
@ -0,0 +1,16 @@
|
||||
#ifndef TREE_SITTER_PYTHON_H_
|
||||
#define TREE_SITTER_PYTHON_H_
|
||||
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern TSLanguage *tree_sitter_python();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_PYTHON_H_
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,4 @@
|
||||
examples/cpython/Lib/test/badsyntax_3131.py
|
||||
examples/cpython/Lib/test/badsyntax_future8.py
|
||||
examples/cpython/Lib/test/test_compile.py
|
||||
examples/cpython/Tools/build/generate_re_casefix.py
|
||||
@ -1,36 +1,47 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
set -eu
|
||||
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
function checkout() {
|
||||
repo=$1; url=$2; sha=$3
|
||||
|
||||
if [ ! -d "$repo" ]; then
|
||||
git clone "https://github.com/$url" "$repo"
|
||||
fi
|
||||
|
||||
pushd "$repo"
|
||||
git fetch && git reset --hard "$sha"
|
||||
popd
|
||||
function clone_repo {
|
||||
owner=$1
|
||||
name=$2
|
||||
sha=$3
|
||||
|
||||
path=examples/$name
|
||||
if [ ! -d "$path" ]; then
|
||||
echo "Cloning $owner/$name"
|
||||
git clone "https://github.com/$owner/$name" "$path"
|
||||
fi
|
||||
|
||||
pushd "$path" >/dev/null
|
||||
actual_sha=$(git rev-parse HEAD)
|
||||
if [ "$actual_sha" != "$sha" ]; then
|
||||
echo "Updating $owner/$name to $sha"
|
||||
git fetch
|
||||
git reset --hard "$sha"
|
||||
fi
|
||||
popd >/dev/null
|
||||
}
|
||||
|
||||
checkout examples/numpy numpy/numpy 058851c5cfc98f50f11237b1c13d77cfd1f40475
|
||||
checkout examples/django django/django 01974d7f7549b2dca2a729c3c1a1ea7d4585eb3a
|
||||
checkout examples/flask pallets/flask de464c03e134127140e5622e230790806a133ff9
|
||||
clone_repo numpy numpy 058851c5cfc98f50f11237b1c13d77cfd1f40475
|
||||
clone_repo django django 01974d7f7549b2dca2a729c3c1a1ea7d4585eb3a
|
||||
clone_repo pallets flask de464c03e134127140e5622e230790806a133ff9
|
||||
clone_repo python cpython bb456a08a3db851e6feaefc3328f39096919ec8d
|
||||
|
||||
known_failures="$(cat script/known_failures.txt)"
|
||||
|
||||
# shellcheck disable=2046
|
||||
tree-sitter parse -q \
|
||||
'examples/**/*.py' \
|
||||
$(for file in $known_failures; do echo "!${file}"; done)
|
||||
'examples/**/*.py' \
|
||||
$(for file in $known_failures; do echo "!${file}"; done)
|
||||
|
||||
example_count=$(find examples -name '*.py' | wc -l)
|
||||
failure_count=$(wc -w <<< "$known_failures")
|
||||
success_count=$(( $example_count - $failure_count ))
|
||||
success_percent=$(bc -l <<< "100*${success_count}/${example_count}")
|
||||
failure_count=$(wc -w <<<"$known_failures")
|
||||
success_count=$((example_count - failure_count))
|
||||
success_percent=$(bc -l <<<"100*${success_count}/${example_count}")
|
||||
|
||||
printf \
|
||||
"Successfully parsed %d of %d example files (%.1f%%)\n" \
|
||||
$success_count $example_count $success_percent
|
||||
"Successfully parsed %d of %d example files (%.1f%%)\n" \
|
||||
"$success_count" "$example_count" "$success_percent"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,523 @@
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <tree_sitter/parser.h>
|
||||
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define VEC_RESIZE(vec, _cap) \
|
||||
void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \
|
||||
assert(tmp != NULL); \
|
||||
(vec).data = tmp; \
|
||||
(vec).cap = (_cap);
|
||||
|
||||
#define VEC_GROW(vec, _cap) \
|
||||
if ((vec).cap < (_cap)) { \
|
||||
VEC_RESIZE((vec), (_cap)); \
|
||||
}
|
||||
|
||||
#define VEC_PUSH(vec, el) \
|
||||
if ((vec).cap == (vec).len) { \
|
||||
VEC_RESIZE((vec), MAX(16, (vec).len * 2)); \
|
||||
} \
|
||||
(vec).data[(vec).len++] = (el);
|
||||
|
||||
#define VEC_POP(vec) (vec).len--;
|
||||
|
||||
#define VEC_NEW \
|
||||
{ .len = 0, .cap = 0, .data = NULL }
|
||||
|
||||
#define VEC_BACK(vec) ((vec).data[(vec).len - 1])
|
||||
|
||||
#define VEC_FREE(vec) \
|
||||
{ \
|
||||
if ((vec).data != NULL) \
|
||||
free((vec).data); \
|
||||
}
|
||||
|
||||
#define VEC_CLEAR(vec) (vec).len = 0;
|
||||
|
||||
enum TokenType {
|
||||
NEWLINE,
|
||||
INDENT,
|
||||
DEDENT,
|
||||
STRING_START,
|
||||
STRING_CONTENT,
|
||||
ESCAPE_INTERPOLATION,
|
||||
STRING_END,
|
||||
COMMENT,
|
||||
CLOSE_PAREN,
|
||||
CLOSE_BRACKET,
|
||||
CLOSE_BRACE,
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
SingleQuote = 1 << 0,
|
||||
DoubleQuote = 1 << 1,
|
||||
BackQuote = 1 << 2,
|
||||
Raw = 1 << 3,
|
||||
Format = 1 << 4,
|
||||
Triple = 1 << 5,
|
||||
Bytes = 1 << 6,
|
||||
} Flags;
|
||||
|
||||
typedef struct {
|
||||
char flags;
|
||||
} Delimiter;
|
||||
|
||||
static inline Delimiter new_delimiter() { return (Delimiter){0}; }
|
||||
|
||||
static inline bool is_format(Delimiter *delimiter) {
|
||||
return delimiter->flags & Format;
|
||||
}
|
||||
|
||||
static inline bool is_raw(Delimiter *delimiter) {
|
||||
return delimiter->flags & Raw;
|
||||
}
|
||||
|
||||
static inline bool is_triple(Delimiter *delimiter) {
|
||||
return delimiter->flags & Triple;
|
||||
}
|
||||
|
||||
static inline bool is_bytes(Delimiter *delimiter) {
|
||||
return delimiter->flags & Bytes;
|
||||
}
|
||||
|
||||
static inline int32_t end_character(Delimiter *delimiter) {
|
||||
if (delimiter->flags & SingleQuote) {
|
||||
return '\'';
|
||||
}
|
||||
if (delimiter->flags & DoubleQuote) {
|
||||
return '"';
|
||||
}
|
||||
if (delimiter->flags & BackQuote) {
|
||||
return '`';
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void set_format(Delimiter *delimiter) {
|
||||
delimiter->flags |= Format;
|
||||
}
|
||||
|
||||
static inline void set_raw(Delimiter *delimiter) { delimiter->flags |= Raw; }
|
||||
|
||||
static inline void set_triple(Delimiter *delimiter) {
|
||||
delimiter->flags |= Triple;
|
||||
}
|
||||
|
||||
static inline void set_bytes(Delimiter *delimiter) {
|
||||
delimiter->flags |= Bytes;
|
||||
}
|
||||
|
||||
static inline void set_end_character(Delimiter *delimiter, int32_t character) {
|
||||
switch (character) {
|
||||
case '\'':
|
||||
delimiter->flags |= SingleQuote;
|
||||
break;
|
||||
case '"':
|
||||
delimiter->flags |= DoubleQuote;
|
||||
break;
|
||||
case '`':
|
||||
delimiter->flags |= BackQuote;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint32_t len;
|
||||
uint32_t cap;
|
||||
uint16_t *data;
|
||||
} indent_vec;
|
||||
|
||||
static indent_vec indent_vec_new() {
|
||||
indent_vec vec = VEC_NEW;
|
||||
vec.data = calloc(1, sizeof(uint16_t));
|
||||
vec.cap = 1;
|
||||
return vec;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint32_t len;
|
||||
uint32_t cap;
|
||||
Delimiter *data;
|
||||
} delimiter_vec;
|
||||
|
||||
static delimiter_vec delimiter_vec_new() {
|
||||
delimiter_vec vec = VEC_NEW;
|
||||
vec.data = calloc(1, sizeof(Delimiter));
|
||||
vec.cap = 1;
|
||||
return vec;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
indent_vec indents;
|
||||
delimiter_vec delimiters;
|
||||
bool inside_f_string;
|
||||
} Scanner;
|
||||
|
||||
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
|
||||
|
||||
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
||||
|
||||
bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
|
||||
const bool *valid_symbols) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
|
||||
bool error_recovery_mode =
|
||||
valid_symbols[STRING_CONTENT] && valid_symbols[INDENT];
|
||||
bool within_brackets = valid_symbols[CLOSE_BRACE] ||
|
||||
valid_symbols[CLOSE_PAREN] ||
|
||||
valid_symbols[CLOSE_BRACKET];
|
||||
|
||||
bool advanced_once = false;
|
||||
if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.len > 0 &&
|
||||
(lexer->lookahead == '{' || lexer->lookahead == '}') &&
|
||||
!error_recovery_mode) {
|
||||
Delimiter delimiter = VEC_BACK(scanner->delimiters);
|
||||
if (is_format(&delimiter)) {
|
||||
lexer->mark_end(lexer);
|
||||
bool is_left_brace = lexer->lookahead == '{';
|
||||
advance(lexer);
|
||||
advanced_once = true;
|
||||
if ((lexer->lookahead == '{' && is_left_brace) ||
|
||||
(lexer->lookahead == '}' && !is_left_brace)) {
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = ESCAPE_INTERPOLATION;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[STRING_CONTENT] && scanner->delimiters.len > 0 &&
|
||||
!error_recovery_mode) {
|
||||
Delimiter delimiter = VEC_BACK(scanner->delimiters);
|
||||
int32_t end_char = end_character(&delimiter);
|
||||
bool has_content = advanced_once;
|
||||
while (lexer->lookahead) {
|
||||
if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') &&
|
||||
is_format(&delimiter)) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
if (lexer->lookahead == '\\') {
|
||||
if (is_raw(&delimiter)) {
|
||||
// Step over the backslash.
|
||||
advance(lexer);
|
||||
// Step over any escaped quotes.
|
||||
if (lexer->lookahead == end_character(&delimiter) ||
|
||||
lexer->lookahead == '\\') {
|
||||
advance(lexer);
|
||||
}
|
||||
// Step over newlines
|
||||
if (lexer -> lookahead == '\r') {
|
||||
advance(lexer);
|
||||
if (lexer -> lookahead == '\n') {
|
||||
advance(lexer);
|
||||
}
|
||||
} else if (lexer -> lookahead == '\n') {
|
||||
advance(lexer);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (is_bytes(&delimiter)) {
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == 'N' || lexer->lookahead == 'u' ||
|
||||
lexer->lookahead == 'U') {
|
||||
// In bytes string, \N{...}, \uXXXX and \UXXXXXXXX are
|
||||
// not escape sequences
|
||||
// https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
|
||||
advance(lexer);
|
||||
} else {
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
} else {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
} else if (lexer->lookahead == end_char) {
|
||||
if (is_triple(&delimiter)) {
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == end_char) {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == end_char) {
|
||||
if (has_content) {
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
} else {
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
VEC_POP(scanner->delimiters);
|
||||
lexer->result_symbol = STRING_END;
|
||||
scanner->inside_f_string = false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return true;
|
||||
}
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return true;
|
||||
}
|
||||
if (has_content) {
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
} else {
|
||||
advance(lexer);
|
||||
VEC_POP(scanner->delimiters);
|
||||
lexer->result_symbol = STRING_END;
|
||||
scanner->inside_f_string = false;
|
||||
}
|
||||
lexer->mark_end(lexer);
|
||||
return true;
|
||||
|
||||
} else if (lexer->lookahead == '\n' && has_content &&
|
||||
!is_triple(&delimiter)) {
|
||||
return false;
|
||||
}
|
||||
advance(lexer);
|
||||
has_content = true;
|
||||
}
|
||||
}
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
bool found_end_of_line = false;
|
||||
uint32_t indent_length = 0;
|
||||
int32_t first_comment_indent_length = -1;
|
||||
for (;;) {
|
||||
if (lexer->lookahead == '\n') {
|
||||
found_end_of_line = true;
|
||||
indent_length = 0;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') {
|
||||
indent_length = 0;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == '\t') {
|
||||
indent_length += 8;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == '#') {
|
||||
// If we haven't found an EOL yet,
|
||||
// then this is a comment after an expression:
|
||||
// foo = bar # comment
|
||||
// Just return, since we don't want to generate an indent/dedent
|
||||
// token.
|
||||
if (!found_end_of_line) {
|
||||
return false;
|
||||
}
|
||||
if (first_comment_indent_length == -1) {
|
||||
first_comment_indent_length = (int32_t)indent_length;
|
||||
}
|
||||
while (lexer->lookahead && lexer->lookahead != '\n') {
|
||||
skip(lexer);
|
||||
}
|
||||
skip(lexer);
|
||||
indent_length = 0;
|
||||
} else if (lexer->lookahead == '\\') {
|
||||
skip(lexer);
|
||||
if (lexer->lookahead == '\r') {
|
||||
skip(lexer);
|
||||
}
|
||||
if (lexer->lookahead == '\n' || lexer->eof(lexer)) {
|
||||
skip(lexer);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (lexer->eof(lexer)) {
|
||||
indent_length = 0;
|
||||
found_end_of_line = true;
|
||||
break;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_end_of_line) {
|
||||
if (scanner->indents.len > 0) {
|
||||
uint16_t current_indent_length = VEC_BACK(scanner->indents);
|
||||
|
||||
if (valid_symbols[INDENT] &&
|
||||
indent_length > current_indent_length) {
|
||||
VEC_PUSH(scanner->indents, indent_length);
|
||||
lexer->result_symbol = INDENT;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool next_tok_is_string_start = lexer->lookahead == '\"' ||
|
||||
lexer->lookahead == '\'' ||
|
||||
lexer->lookahead == '`';
|
||||
|
||||
if ((valid_symbols[DEDENT] ||
|
||||
(!valid_symbols[NEWLINE] &&
|
||||
!(valid_symbols[STRING_START] && next_tok_is_string_start) &&
|
||||
!within_brackets)) &&
|
||||
indent_length < current_indent_length &&
|
||||
!scanner->inside_f_string &&
|
||||
|
||||
// Wait to create a dedent token until we've consumed any
|
||||
// comments
|
||||
// whose indentation matches the current block.
|
||||
first_comment_indent_length < (int32_t)current_indent_length) {
|
||||
VEC_POP(scanner->indents);
|
||||
lexer->result_symbol = DEDENT;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[NEWLINE] && !error_recovery_mode) {
|
||||
lexer->result_symbol = NEWLINE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (first_comment_indent_length == -1 && valid_symbols[STRING_START]) {
|
||||
Delimiter delimiter = new_delimiter();
|
||||
|
||||
bool has_flags = false;
|
||||
while (lexer->lookahead) {
|
||||
if (lexer->lookahead == 'f' || lexer->lookahead == 'F') {
|
||||
set_format(&delimiter);
|
||||
} else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') {
|
||||
set_raw(&delimiter);
|
||||
} else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') {
|
||||
set_bytes(&delimiter);
|
||||
} else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') {
|
||||
break;
|
||||
}
|
||||
has_flags = true;
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '`') {
|
||||
set_end_character(&delimiter, '`');
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
} else if (lexer->lookahead == '\'') {
|
||||
set_end_character(&delimiter, '\'');
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
if (lexer->lookahead == '\'') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '\'') {
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
set_triple(&delimiter);
|
||||
}
|
||||
}
|
||||
} else if (lexer->lookahead == '"') {
|
||||
set_end_character(&delimiter, '"');
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
if (lexer->lookahead == '"') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '"') {
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
set_triple(&delimiter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (end_character(&delimiter)) {
|
||||
VEC_PUSH(scanner->delimiters, delimiter);
|
||||
lexer->result_symbol = STRING_START;
|
||||
scanner->inside_f_string = is_format(&delimiter);
|
||||
return true;
|
||||
}
|
||||
if (has_flags) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned tree_sitter_python_external_scanner_serialize(void *payload,
|
||||
char *buffer) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
|
||||
size_t size = 0;
|
||||
|
||||
buffer[size++] = (char)scanner->inside_f_string;
|
||||
|
||||
size_t delimiter_count = scanner->delimiters.len;
|
||||
if (delimiter_count > UINT8_MAX) {
|
||||
delimiter_count = UINT8_MAX;
|
||||
}
|
||||
buffer[size++] = (char)delimiter_count;
|
||||
|
||||
if (delimiter_count > 0) {
|
||||
memcpy(&buffer[size], scanner->delimiters.data, delimiter_count);
|
||||
}
|
||||
size += delimiter_count;
|
||||
|
||||
int iter = 1;
|
||||
for (; iter < scanner->indents.len &&
|
||||
size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
|
||||
++iter) {
|
||||
buffer[size++] = (char)scanner->indents.data[iter];
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
void tree_sitter_python_external_scanner_deserialize(void *payload,
|
||||
const char *buffer,
|
||||
unsigned length) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
|
||||
VEC_CLEAR(scanner->delimiters);
|
||||
VEC_CLEAR(scanner->indents);
|
||||
VEC_PUSH(scanner->indents, 0);
|
||||
|
||||
if (length > 0) {
|
||||
size_t size = 0;
|
||||
|
||||
scanner->inside_f_string = (bool)buffer[size++];
|
||||
|
||||
size_t delimiter_count = (uint8_t)buffer[size++];
|
||||
if (delimiter_count > 0) {
|
||||
VEC_GROW(scanner->delimiters, delimiter_count);
|
||||
scanner->delimiters.len = delimiter_count;
|
||||
memcpy(scanner->delimiters.data, &buffer[size], delimiter_count);
|
||||
size += delimiter_count;
|
||||
}
|
||||
|
||||
for (; size < length; size++) {
|
||||
VEC_PUSH(scanner->indents, (unsigned char)buffer[size]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void *tree_sitter_python_external_scanner_create() {
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
|
||||
_Static_assert(sizeof(Delimiter) == sizeof(char), "");
|
||||
#else
|
||||
assert(sizeof(Delimiter) == sizeof(char));
|
||||
#endif
|
||||
Scanner *scanner = calloc(1, sizeof(Scanner));
|
||||
scanner->indents = indent_vec_new();
|
||||
scanner->delimiters = delimiter_vec_new();
|
||||
tree_sitter_python_external_scanner_deserialize(scanner, NULL, 0);
|
||||
return scanner;
|
||||
}
|
||||
|
||||
void tree_sitter_python_external_scanner_destroy(void *payload) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
VEC_FREE(scanner->indents);
|
||||
VEC_FREE(scanner->delimiters);
|
||||
free(scanner);
|
||||
}
|
||||
@ -1,408 +0,0 @@
|
||||
#include <tree_sitter/parser.h>
|
||||
#include <vector>
|
||||
#include <cwctype>
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
#include <stdio.h>
|
||||
namespace {
|
||||
|
||||
using std::vector;
|
||||
using std::iswspace;
|
||||
using std::memcpy;
|
||||
|
||||
enum TokenType {
|
||||
NEWLINE,
|
||||
INDENT,
|
||||
DEDENT,
|
||||
STRING_START,
|
||||
STRING_CONTENT,
|
||||
STRING_END,
|
||||
COMMENT,
|
||||
CLOSE_PAREN,
|
||||
CLOSE_BRACKET,
|
||||
CLOSE_BRACE,
|
||||
};
|
||||
|
||||
struct Delimiter {
|
||||
enum {
|
||||
SingleQuote = 1 << 0,
|
||||
DoubleQuote = 1 << 1,
|
||||
BackQuote = 1 << 2,
|
||||
Raw = 1 << 3,
|
||||
Format = 1 << 4,
|
||||
Triple = 1 << 5,
|
||||
Bytes = 1 << 6,
|
||||
};
|
||||
|
||||
Delimiter() : flags(0) {}
|
||||
|
||||
bool is_format() const {
|
||||
return flags & Format;
|
||||
}
|
||||
|
||||
bool is_raw() const {
|
||||
return flags & Raw;
|
||||
}
|
||||
|
||||
bool is_triple() const {
|
||||
return flags & Triple;
|
||||
}
|
||||
|
||||
bool is_bytes() const {
|
||||
return flags & Bytes;
|
||||
}
|
||||
|
||||
int32_t end_character() const {
|
||||
if (flags & SingleQuote) return '\'';
|
||||
if (flags & DoubleQuote) return '"';
|
||||
if (flags & BackQuote) return '`';
|
||||
return 0;
|
||||
}
|
||||
|
||||
void set_format() {
|
||||
flags |= Format;
|
||||
}
|
||||
|
||||
void set_raw() {
|
||||
flags |= Raw;
|
||||
}
|
||||
|
||||
void set_triple() {
|
||||
flags |= Triple;
|
||||
}
|
||||
|
||||
void set_bytes() {
|
||||
flags |= Bytes;
|
||||
}
|
||||
|
||||
void set_end_character(int32_t character) {
|
||||
switch (character) {
|
||||
case '\'':
|
||||
flags |= SingleQuote;
|
||||
break;
|
||||
case '"':
|
||||
flags |= DoubleQuote;
|
||||
break;
|
||||
case '`':
|
||||
flags |= BackQuote;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
char flags;
|
||||
};
|
||||
|
||||
struct Scanner {
|
||||
Scanner() {
|
||||
assert(sizeof(Delimiter) == sizeof(char));
|
||||
deserialize(NULL, 0);
|
||||
}
|
||||
|
||||
unsigned serialize(char *buffer) {
|
||||
size_t i = 0;
|
||||
|
||||
size_t delimiter_count = delimiter_stack.size();
|
||||
if (delimiter_count > UINT8_MAX) delimiter_count = UINT8_MAX;
|
||||
buffer[i++] = delimiter_count;
|
||||
|
||||
if (delimiter_count > 0) {
|
||||
memcpy(&buffer[i], delimiter_stack.data(), delimiter_count);
|
||||
}
|
||||
i += delimiter_count;
|
||||
|
||||
vector<uint16_t>::iterator
|
||||
iter = indent_length_stack.begin() + 1,
|
||||
end = indent_length_stack.end();
|
||||
|
||||
for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
|
||||
buffer[i++] = *iter;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void deserialize(const char *buffer, unsigned length) {
|
||||
delimiter_stack.clear();
|
||||
indent_length_stack.clear();
|
||||
indent_length_stack.push_back(0);
|
||||
|
||||
if (length > 0) {
|
||||
size_t i = 0;
|
||||
|
||||
size_t delimiter_count = (uint8_t)buffer[i++];
|
||||
delimiter_stack.resize(delimiter_count);
|
||||
if (delimiter_count > 0) {
|
||||
memcpy(delimiter_stack.data(), &buffer[i], delimiter_count);
|
||||
}
|
||||
i += delimiter_count;
|
||||
|
||||
for (; i < length; i++) {
|
||||
indent_length_stack.push_back(buffer[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void advance(TSLexer *lexer) {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
void skip(TSLexer *lexer) {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
||||
bool error_recovery_mode = valid_symbols[STRING_CONTENT] && valid_symbols[INDENT];
|
||||
bool within_brackets = valid_symbols[CLOSE_BRACE] || valid_symbols[CLOSE_PAREN] || valid_symbols[CLOSE_BRACKET];
|
||||
|
||||
if (valid_symbols[STRING_CONTENT] && !delimiter_stack.empty() && !error_recovery_mode) {
|
||||
Delimiter delimiter = delimiter_stack.back();
|
||||
int32_t end_character = delimiter.end_character();
|
||||
bool has_content = false;
|
||||
while (lexer->lookahead) {
|
||||
if ((lexer->lookahead == '{' || lexer->lookahead == '}') && delimiter.is_format()) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return has_content;
|
||||
} else if (lexer->lookahead == '\\') {
|
||||
if (delimiter.is_raw()) {
|
||||
lexer->advance(lexer, false);
|
||||
} else if (delimiter.is_bytes()) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == 'N' || lexer->lookahead == 'u' || lexer->lookahead == 'U') {
|
||||
// In bytes string, \N{...}, \uXXXX and \UXXXXXXXX are not escape sequences
|
||||
// https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
} else {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
} else if (lexer->lookahead == end_character) {
|
||||
if (delimiter.is_triple()) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == end_character) {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == end_character) {
|
||||
if (has_content) {
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
} else {
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
delimiter_stack.pop_back();
|
||||
lexer->result_symbol = STRING_END;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (has_content) {
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
} else {
|
||||
lexer->advance(lexer, false);
|
||||
delimiter_stack.pop_back();
|
||||
lexer->result_symbol = STRING_END;
|
||||
}
|
||||
lexer->mark_end(lexer);
|
||||
return true;
|
||||
}
|
||||
} else if (lexer->lookahead == '\n' && has_content && !delimiter.is_triple()) {
|
||||
return false;
|
||||
}
|
||||
advance(lexer);
|
||||
has_content = true;
|
||||
}
|
||||
}
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
bool found_end_of_line = false;
|
||||
uint32_t indent_length = 0;
|
||||
int32_t first_comment_indent_length = -1;
|
||||
for (;;) {
|
||||
if (lexer->lookahead == '\n') {
|
||||
found_end_of_line = true;
|
||||
indent_length = 0;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == '\r') {
|
||||
indent_length = 0;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == '\t') {
|
||||
indent_length += 8;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == '#') {
|
||||
if (first_comment_indent_length == -1) {
|
||||
first_comment_indent_length = (int32_t)indent_length;
|
||||
}
|
||||
while (lexer->lookahead && lexer->lookahead != '\n') {
|
||||
skip(lexer);
|
||||
}
|
||||
skip(lexer);
|
||||
indent_length = 0;
|
||||
} else if (lexer->lookahead == '\\') {
|
||||
skip(lexer);
|
||||
if (lexer->lookahead == '\r') {
|
||||
skip(lexer);
|
||||
}
|
||||
if (lexer->lookahead == '\n') {
|
||||
skip(lexer);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (lexer->lookahead == '\f') {
|
||||
indent_length = 0;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == 0) {
|
||||
indent_length = 0;
|
||||
found_end_of_line = true;
|
||||
break;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_end_of_line) {
|
||||
if (!indent_length_stack.empty()) {
|
||||
uint16_t current_indent_length = indent_length_stack.back();
|
||||
|
||||
if (
|
||||
valid_symbols[INDENT] &&
|
||||
indent_length > current_indent_length
|
||||
) {
|
||||
indent_length_stack.push_back(indent_length);
|
||||
lexer->result_symbol = INDENT;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
(valid_symbols[DEDENT] || (!valid_symbols[NEWLINE] && !within_brackets)) &&
|
||||
indent_length < current_indent_length &&
|
||||
|
||||
// Wait to create a dedent token until we've consumed any comments
|
||||
// whose indentation matches the current block.
|
||||
first_comment_indent_length < (int32_t)current_indent_length
|
||||
) {
|
||||
indent_length_stack.pop_back();
|
||||
lexer->result_symbol = DEDENT;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[NEWLINE] && !error_recovery_mode) {
|
||||
lexer->result_symbol = NEWLINE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (first_comment_indent_length == -1 && valid_symbols[STRING_START]) {
|
||||
Delimiter delimiter;
|
||||
|
||||
bool has_flags = false;
|
||||
while (lexer->lookahead) {
|
||||
if (lexer->lookahead == 'f' || lexer->lookahead == 'F') {
|
||||
delimiter.set_format();
|
||||
} else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') {
|
||||
delimiter.set_raw();
|
||||
} else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') {
|
||||
delimiter.set_bytes();
|
||||
} else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') {
|
||||
break;
|
||||
}
|
||||
has_flags = true;
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '`') {
|
||||
delimiter.set_end_character('`');
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
} else if (lexer->lookahead == '\'') {
|
||||
delimiter.set_end_character('\'');
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
if (lexer->lookahead == '\'') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '\'') {
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
delimiter.set_triple();
|
||||
}
|
||||
}
|
||||
} else if (lexer->lookahead == '"') {
|
||||
delimiter.set_end_character('"');
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
if (lexer->lookahead == '"') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '"') {
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
delimiter.set_triple();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (delimiter.end_character()) {
|
||||
delimiter_stack.push_back(delimiter);
|
||||
lexer->result_symbol = STRING_START;
|
||||
return true;
|
||||
} else if (has_flags) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<uint16_t> indent_length_stack;
|
||||
vector<Delimiter> delimiter_stack;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
void *tree_sitter_python_external_scanner_create() {
|
||||
return new Scanner();
|
||||
}
|
||||
|
||||
bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
|
||||
const bool *valid_symbols) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
return scanner->scan(lexer, valid_symbols);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buffer) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
return scanner->serialize(buffer);
|
||||
}
|
||||
|
||||
void tree_sitter_python_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
scanner->deserialize(buffer, length);
|
||||
}
|
||||
|
||||
void tree_sitter_python_external_scanner_destroy(void *payload) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
delete scanner;
|
||||
}
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,15 @@
|
||||
class MyClass:
|
||||
# ^ definition.class
|
||||
def hello():
|
||||
# ^ definition.function
|
||||
print "hello from MyClass"
|
||||
|
||||
MyClass.hello()
|
||||
# ^ reference.call
|
||||
|
||||
def main():
|
||||
# ^ definition.function
|
||||
print "Hello, world!"
|
||||
|
||||
main()
|
||||
# <- reference.call
|
||||
Loading…
Reference in New Issue