Add 'vendored_parsers/tree-sitter-smali/' from commit '5ae51e15c4d1ac93cba6127caf3d1f0a072c140c'

git-subtree-dir: vendored_parsers/tree-sitter-smali
git-subtree-mainline: 77c641748f
git-subtree-split: 5ae51e15c4
pull/634/head
Evan Richter 2024-02-01 14:35:46 +07:00
commit 6ae210999c
41 changed files with 50172 additions and 0 deletions

@ -0,0 +1,20 @@
image: fedora/latest
packages:
- nodejs
- gcc
- gcc-c++
sources:
- https://git.sr.ht/~yotam/tree-sitter-smali
tasks:
- install: |
cd tree-sitter-smali
npm install
- lint: |
cd tree-sitter-smali
eslint grammar.js
- build: |
cd tree-sitter-smali
npm run build
- test: |
cd tree-sitter-smali
npm run test

@ -0,0 +1,20 @@
module.exports = {
'env': {
'commonjs': true,
'es2021': true,
},
'extends': 'google',
'overrides': [
],
'parserOptions': {
'ecmaVersion': 'latest',
'sourceType': 'module',
},
'rules': {
'indent': ['error', 2, {'SwitchCase': 1}],
'max-len': [
'error',
{'code': 120, 'ignoreComments': true, 'ignoreUrls': true, 'ignoreStrings': true},
],
},
};

@ -0,0 +1,7 @@
/src/parser.c linguist-vendored
/src/*.json linguist-vendored
/examples/* linguist-vendored
src/grammar.json -diff
src/node-types.json -diff
src/parser.c -diff

@ -0,0 +1,34 @@
name: CI
on:
push:
branches:
- master
pull_request:
branches:
- "**"
jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: true
matrix:
os: [macos-latest, ubuntu-latest]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v2
with:
node-version: 16
- run: npm install
- run: npm test
test_windows:
runs-on: windows-2019
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v2
with:
node-version: 16
- run: npm install
- run: npm run-script test-windows

@ -0,0 +1,22 @@
name: Fuzz Parser
on:
push:
paths:
- src/scanner.c
pull_request:
paths:
- src/scanner.c
workflow_dispatch:
jobs:
test:
name: Parser fuzzing
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: vigoux/tree-sitter-fuzz-action@v1
with:
language: smali
external-scanner: src/scanner.c
time: 60

@ -0,0 +1,81 @@
name: Publish
on:
pull_request:
types: [closed]
permissions:
contents: write
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
node-version: 16
- run: npm install
- run: npm test
publish:
if: github.event.pull_request.merged && startsWith(github.event.pull_request.title, 'chore(master):')
needs: build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.merge_commit_sha }}
token: ${{ secrets.GITHUB_TOKEN }}
- name: Extract version
id: extract_version
run: |
PR_TITLE="${{ github.event.pull_request.title }}"
VERSION=$(echo "$PR_TITLE" | grep -oP '(?<=release ).*$')
echo "::set-output name=version::$VERSION"
- name: Update versions
run: |
version="${{ steps.extract_version.outputs.version }}"
repo_name="${{ github.repository }}"
repo_name="${repo_name##*/}"
git config user.name github-actions[bot]
git config user.email github-actions[bot]@users.noreply.github.com
git fetch origin master
git checkout master
sed -i "s/\"version\": \"[^\"]*\"/\"version\": \"$version\"/g" package.json
sed -i "s/version = \"[^\"]*\"/version = \"$version\"/g" Cargo.toml
sed -i "s/$repo_name = \"[^\"]*\"/$repo_name = \"$version\"/g" bindings/rust/README.md
git add package.json Cargo.toml bindings/rust/README.md
git commit -m "chore(manifests): bump version to $version"
git push https://${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git HEAD:master
- name: Setup Node.js
uses: actions/setup-node@v3
with:
node-version: 16
registry-url: https://registry.npmjs.org/
- run: npm publish
env:
NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
- name: Setup Rust
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Publish to Crates.io
uses: katyo/publish-crates@v2
with:
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
- uses: actions/checkout@v3
- name: Tag stable versions
run: |
git config user.name github-actions[bot]
git config user.email github-actions[bot]@users.noreply.github.com
git remote add gh-token "https://${{ secrets.GITHUB_TOKEN }}@github.com/google-github-actions/release-please-action.git"
git tag -d stable || true
git push origin :stable || true
git tag -a stable -m "Last Stable Release"
git push origin stable

@ -0,0 +1,35 @@
name: Release
on:
push:
branches:
- master
workflow_dispatch:
permissions:
contents: write
pull-requests: write
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
node-version: 16
- run: npm install
- run: npm test
release:
name: release
if: ${{ github.ref == 'refs/heads/master' }}
needs:
- build
runs-on: ubuntu-latest
steps:
- uses: google-github-actions/release-please-action@v3
id: release
with:
release-type: simple
package-name: tree-sitter-smali

@ -0,0 +1,6 @@
Cargo.lock
package-lock.json
/build
/node_modules
/examples/*/
/target

@ -0,0 +1,5 @@
build
examples
script
target
test

@ -0,0 +1,33 @@
# Changelog
## 1.0.0 (2023-05-13)
### Features
* (overdue) add queries ([ae15a72](https://github.com/amaanq/tree-sitter-smali/commit/ae15a7228bcac256547ca81b727da6874fe51aee))
* add ci script ([f8c582f](https://github.com/amaanq/tree-sitter-smali/commit/f8c582fdf92fe3896733830199d39f4d39aaf0d7))
* add eslint ([fcb804e](https://github.com/amaanq/tree-sitter-smali/commit/fcb804e90025e6c56629708201c9116eb5345003))
* add GitHub ci for the mirror ([51a914e](https://github.com/amaanq/tree-sitter-smali/commit/51a914ea34c96dcf483aaa0cfb7800dd84ac11f7))
* add missing opcodes and modifiers ([9ca1bb9](https://github.com/amaanq/tree-sitter-smali/commit/9ca1bb941083424fafc99061d300f3cc54862394))
* add Swift bindings ([76d083f](https://github.com/amaanq/tree-sitter-smali/commit/76d083f4a97ec0900869547c6187f37f45825c96))
* add word, update top level definitions as it was too strict before ([448b98b](https://github.com/amaanq/tree-sitter-smali/commit/448b98b28d3ca3d3d99d0a2677eea12b084dcc92))
* complete the grammar ([256f76a](https://github.com/amaanq/tree-sitter-smali/commit/256f76a2b887f72d87b7c1940c04009a97b03e19))
* favor eslint as a linter over prettier ([c9456c6](https://github.com/amaanq/tree-sitter-smali/commit/c9456c6be4f78070c8d59b6bd8d962115135861e))
* file documentation, reference types & checks ([a985f15](https://github.com/amaanq/tree-sitter-smali/commit/a985f151b896e04f28ba72e2802d4ee66a25c81b))
* **queries:** update highlights, add folds, indents, and locals ([7ef7043](https://github.com/amaanq/tree-sitter-smali/commit/7ef7043327183596b3bdda8db5356c500d114cf8))
* remove apktool dependency and use repo with decompiled smali files, add smali2java repo ([5a742af](https://github.com/amaanq/tree-sitter-smali/commit/5a742af7388864a3ff2ce8421328a33e7246a2d5))
* segment class identifiers to pluck individual indentifiers out ([9aea302](https://github.com/amaanq/tree-sitter-smali/commit/9aea3029aeb3374706a43441007d39fa334fe3ee))
* **tests:** update accordingly ([8fcb12a](https://github.com/amaanq/tree-sitter-smali/commit/8fcb12a9e1891afe84f712964cde09aed93f34c8))
* update bindings, Rust binding includes relevant queries ([49afd6f](https://github.com/amaanq/tree-sitter-smali/commit/49afd6f1431baa4648e06ba7d7a869b3c693b2e8))
* update highlights ([f579b45](https://github.com/amaanq/tree-sitter-smali/commit/f579b452dd4bdbab1519a05f35f6515dbac840c1))
* v0.0.2 ([e7da914](https://github.com/amaanq/tree-sitter-smali/commit/e7da91418fb1d88270688c7cbe72a25be4d66039))
* v0.0.3 ([b002dce](https://github.com/amaanq/tree-sitter-smali/commit/b002dceb9b91a6d6de45479ab4b2e9596ebbaaf3))
* v0.0.4 ([9bf8aa6](https://github.com/amaanq/tree-sitter-smali/commit/9bf8aa671a233ae2d2c6e9512c7144ce121b1fb6))
### Bug Fixes
* not *every* class has an access_modifier, those in particular being the $_.smali files ([c810747](https://github.com/amaanq/tree-sitter-smali/commit/c810747af3733f856bd51656486c7570fc010cfd))
* not *every* field has an access_modifier as well, similar to class fix ([7327461](https://github.com/amaanq/tree-sitter-smali/commit/7327461cd2cd36df4943854cf5e49bcb31e70dc9))
* update workflows ([5bc3f10](https://github.com/amaanq/tree-sitter-smali/commit/5bc3f104192310afe53966445eafb456d37174d4))

@ -0,0 +1,24 @@
[package]
name = "tree-sitter-smali"
description = "Smali grammar for tree-sitter"
version = "1.0.0"
authors = ["Yotam Nachum <me@yotam.net>", "Amaan Qureshi <amaanq12@gmail.com>"]
license = "MIT"
readme = "bindings/rust/README.md"
keywords = ["incremental", "parsing", "smali"]
categories = ["parsing", "text-editors"]
repository = "https://git.sr.ht/~yotam/tree-sitter-smali"
edition = "2021"
autoexamples = false
build = "bindings/rust/build.rs"
include = ["bindings/rust/*", "grammar.js", "queries/*", "src/*"]
[lib]
path = "bindings/rust/lib.rs"
[dependencies]
tree-sitter = "~0.20.10"
[build-dependencies]
cc = "1.0"

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2023 Yotam Nachum <me@yotam.net>, Amaan Qureshi <amaanq12@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,37 @@
// swift-tools-version:5.3
import PackageDescription
let package = Package(
name: "TreeSitterSmali",
platforms: [.macOS(.v10_13), .iOS(.v11)],
products: [
.library(name: "TreeSitterSmali", targets: ["TreeSitterSmali"]),
],
dependencies: [],
targets: [
.target(name: "TreeSitterSmali",
path: ".",
exclude: [
"binding.gyp",
"bindings",
"Cargo.toml",
"test",
"grammar.js",
"LICENSE",
"package.json",
"README.md",
"script",
"src/grammar.json",
"src/node-types.json",
],
sources: [
"src/parser.c",
"src/scanner.c",
],
resources: [
.copy("queries")
],
publicHeadersPath: "bindings/swift",
cSettings: [.headerSearchPath("src")])
]
)

@ -0,0 +1,11 @@
# tree-sitter-smali
[![builds.sr.ht status](https://builds.sr.ht/~yotam/tree-sitter-smali.svg)](https://builds.sr.ht/~yotam/tree-sitter-smali?)
[![GitHub Build Status](https://github.com/amaanq/tree-sitter-smali/actions/workflows/ci.yml/badge.svg)](https://github.com/amaanq/tree-sitter-smali/actions/workflows/ci.yml)
[![Discord](https://img.shields.io/discord/1063097320771698699?logo=discord)](https://discord.gg/w7nTvsVJhm)
Smali grammar for [tree-sitter](https://github.com/tree-sitter/tree-sitter).
The Smali syntax is poorly documented so there might be some problems with it
but the authoritative definition for the syntax is at [JesusFreke/smali](https://github.com/JesusFreke/smali),
most notably at [smali/src/main/jflex/smaliLexer.jflex](https://github.com/JesusFreke/smali/blob/master/smali/src/main/jflex/smaliLexer.jflex)

@ -0,0 +1,19 @@
{
"targets": [
{
"target_name": "tree_sitter_smali_binding",
"include_dirs": [
"<!(node -e \"require('nan')\")",
"src"
],
"sources": [
"bindings/node/binding.cc",
"src/parser.c",
"src/scanner.c",
],
"cflags_c": [
"-std=c99",
]
}
]
}

@ -0,0 +1,30 @@
#include "nan.h"
#include "tree_sitter/parser.h"
#include <node.h>
using namespace v8;
extern "C" TSLanguage *tree_sitter_smali();
namespace {
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance =
constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_smali());
Nan::Set(instance, Nan::New("name").ToLocalChecked(),
Nan::New("smali").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
}
NODE_MODULE(tree_sitter_smali_binding, Init)
} // namespace

@ -0,0 +1,19 @@
try {
module.exports = require('../../build/Release/tree_sitter_smali_binding');
} catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require('../../build/Debug/tree_sitter_smali_binding');
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1;
}
}
try {
module.exports.nodeTypeInfo = require('../../src/node-types.json');
} catch (_) {}

@ -0,0 +1,52 @@
# tree-sitter-smali
This crate provides a Smali grammar for the [tree-sitter][] parsing library. To
use this crate, add it to the `[dependencies]` section of your `Cargo.toml`
file. (Note that you will probably also need to depend on the
[`tree-sitter`][tree-sitter crate] crate to use the parsed result in any useful
way.)
```toml
[dependencies]
tree-sitter = "~0.20.3"
tree-sitter-smali = "1.0.0"
```
Typically, you will use the [language][language func] function to add this
grammar to a tree-sitter [Parser][], and then use the parser to parse some code:
```rust
let code = r#"
.class public Lmain;
.super Ljava/lang/Object;
.source "main.java"
.implements Lsome/interface;
.implements Lsome/other/interface;
.field public static aStaticFieldWithoutAnInitializer:I
.field public static methodStaticField:Ljava/lang/reflect/Method; = Lbaksmali/test/class;->testMethod(ILjava/lang/String;)Ljava/lang/String;
.method public constructor <init>()V
.registers 1
invoke-direct {p0}, Ljava/lang/Object;-><init>()V
return-void
.end method
"#;
let mut parser = Parser::new();
parser.set_language(tree_sitter_smali::language()).expect("Error loading Smali grammar");
let parsed = parser.parse(code, None);
```
If you have any questions, please reach out to us in the [tree-sitter
discussions] page.
[language func]: https://docs.rs/tree-sitter-smali/*/tree_sitter_smali/fn.language.html
[parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
[tree-sitter]: https://tree-sitter.github.io/
[tree-sitter crate]: https://crates.io/crates/tree-sitter
[tree-sitter discussions]: https://github.com/tree-sitter/tree-sitter/discussions

@ -0,0 +1,19 @@
fn main() {
let src_dir = std::path::Path::new("src");
let mut c_config = cc::Build::new();
c_config.include(src_dir);
c_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
c_config.compile("parser");
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
}

@ -0,0 +1,69 @@
// ------------------------------------------------------------------------------------------------
// Copyright © 2023, Amaan Qureshi <amaanq12@gmail.com>, Yotam Nachum <me@yotam.net>
// See the LICENSE file in this repo for license details.
// ------------------------------------------------------------------------------------------------
//! This crate provides Smali language support for the [tree-sitter][] parsing library.
//!
//! Typically, you will use the [language][language func] function to add this language to a
//! tree-sitter [Parser][], and then use the parser to parse some code:
//!
//! ```
//! let code = "";
//! let mut parser = tree_sitter::Parser::new();
//! parser.set_language(tree_sitter_smali::language()).expect("Error loading Smali grammar");
//! let tree = parser.parse(code, None).unwrap();
//! ```
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
//! [language func]: fn.language.html
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
//! [tree-sitter]: https://tree-sitter.github.io/
use tree_sitter::Language;
extern "C" {
fn tree_sitter_smali() -> Language;
}
/// Get the tree-sitter [Language][] for this grammar.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
#[must_use]
pub fn language() -> Language {
unsafe { tree_sitter_smali() }
}
/// The source of the Rust tree-sitter grammar description.
pub const GRAMMAR: &str = include_str!("../../grammar.js");
/// The folds query for this language.
pub const FOLDS_QUERY: &str = include_str!("../../queries/folds.scm");
/// The syntax highlighting query for this language.
pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
/// The indents query for this language.
pub const INDENTS_QUERY: &str = include_str!("../../queries/indents.scm");
/// The injection query for this language.
pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm");
/// The symbol tagging query for this language.
pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm");
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
#[cfg(test)]
mod tests {
#[test]
fn test_can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(super::language())
.expect("Error loading Smali grammar");
}
}

@ -0,0 +1,16 @@
#ifndef TREE_SITTER_SMALI_H_
#define TREE_SITTER_SMALI_H_
typedef struct TSLanguage TSLanguage;
#ifdef __cplusplus
extern "C" {
#endif
extern TSLanguage *tree_sitter_smali();
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_SMALI_H_

@ -0,0 +1,772 @@
/**
* @file Smali grammar for tree-sitter
* @author Amaan Qureshi <amaanq12@gmail.com>
* @author Yotam Nachum <me@yotam.net>
* @license MIT
* @see {@link https://github.com/JesusFreke/smali|official implementation}
* @see {@link https://source.android.com/docs/core/runtime/dalvik-bytecode|official dex bytecode reference}
*/
/* eslint-disable arrow-parens */
/* eslint-disable camelcase */
/* eslint-disable-next-line spaced-comment */
/// <reference types="tree-sitter-cli/dsl" />
// @ts-check
const access_flags = [
'public',
'private',
'protected',
'static',
'final',
'synchronized',
'volatile',
'bridge',
'transient',
'varargs',
'native',
'interface',
'abstract',
'strictfp',
'synthetic',
'annotation',
'enum',
'declared-synchronized',
];
const restriction_flags = [
'whitelist',
'greylist',
'blacklist',
'greylist-max-o',
'greylist-max-p',
'greylist-max-q',
'greylist-max-r',
'core-platform-api',
'test-api',
];
const primitives = ['V', 'Z', 'B', 'S', 'C', 'I', 'J', 'F', 'D'];
const opcodes = [
'nop',
'move',
'move/from16',
'move/16',
'move-wide',
'move-wide/from16',
'move-wide/16',
'move-object',
'move-object/from16',
'move-object/16',
'move-result',
'move-result-wide',
'move-result-object',
'move-exception',
'return-void',
'return',
'return-wide',
'return-object',
'const/4',
'const/16',
'const',
'const/high16',
'const-wide/16',
'const-wide/32',
'const-wide',
'const-wide/high16',
'const-string',
'const-string/jumbo',
'const-class',
'const-method-handle',
'const-method-type',
'monitor-enter',
'monitor-exit',
'check-cast',
'instance-of',
'array-length',
'new-instance',
'new-array',
'filled-new-array',
'filled-new-array/range',
'fill-array-data',
'throw',
'throw-verification-error',
'goto',
'goto/16',
'goto/32',
'packed-switch',
'sparse-switch',
'cmpl-float',
'cmpg-float',
'cmpl-double',
'cmpg-double',
'cmp-long',
'if-eq',
'if-ne',
'if-lt',
'if-ge',
'if-gt',
'if-le',
'if-eqz',
'if-nez',
'if-ltz',
'if-gez',
'if-gtz',
'if-lez',
'aget',
'aget-wide',
'aget-object',
'aget-boolean',
'aget-byte',
'aget-char',
'aget-short',
'aput',
'aput-wide',
'aput-object',
'aput-boolean',
'aput-byte',
'aput-char',
'aput-short',
'iget',
'iget-wide',
'iget-object',
'iget-boolean',
'iget-byte',
'iget-char',
'iget-short',
'iget-volatile',
'iget-wide-volatile',
'iget-object-volatile',
'iput',
'iput-wide',
'iput-object',
'iput-boolean',
'iput-byte',
'iput-char',
'iput-short',
'iput-volatile',
'iput-wide-volatile',
'iput-object-volatile',
'sget',
'sget-wide',
'sget-object',
'sget-boolean',
'sget-byte',
'sget-char',
'sget-short',
'sget-volatile',
'sget-wide-volatile',
'sget-object-volatile',
'sput',
'sput-wide',
'sput-object',
'sput-boolean',
'sput-byte',
'sput-char',
'sput-short',
'sput-volatile',
'sput-wide-volatile',
'sput-object-volatile',
'invoke-constructor',
'invoke-custom',
'invoke-direct',
'invoke-direct-empty',
'invoke-instance',
'invoke-interface',
'invoke-polymorphic',
'invoke-static',
'invoke-super',
'invoke-virtual',
'invoke-custom/range',
'invoke-direct/range',
'invoke-interface/range',
'invoke-object-init/range',
'invoke-polymorphic/range',
'invoke-static/range',
'invoke-super/range',
'invoke-virtual/range',
'neg-int',
'not-int',
'neg-long',
'not-long',
'neg-float',
'neg-double',
'int-to-long',
'int-to-float',
'int-to-double',
'long-to-int',
'long-to-float',
'long-to-double',
'float-to-int',
'float-to-long',
'float-to-double',
'double-to-int',
'double-to-long',
'double-to-float',
'int-to-byte',
'int-to-char',
'int-to-short',
'add-int',
'sub-int',
'mul-int',
'div-int',
'rem-int',
'and-int',
'or-int',
'xor-int',
'shl-int',
'shr-int',
'ushr-int',
'add-long',
'sub-long',
'mul-long',
'div-long',
'rem-long',
'and-long',
'or-long',
'xor-long',
'shl-long',
'shr-long',
'ushr-long',
'add-float',
'sub-float',
'mul-float',
'div-float',
'rem-float',
'add-double',
'sub-double',
'mul-double',
'div-double',
'rem-double',
'add-int/2addr',
'sub-int/2addr',
'mul-int/2addr',
'div-int/2addr',
'rem-int/2addr',
'and-int/2addr',
'or-int/2addr',
'xor-int/2addr',
'shl-int/2addr',
'shr-int/2addr',
'ushr-int/2addr',
'add-long/2addr',
'sub-long/2addr',
'mul-long/2addr',
'div-long/2addr',
'rem-long/2addr',
'and-long/2addr',
'or-long/2addr',
'xor-long/2addr',
'shl-long/2addr',
'shr-long/2addr',
'ushr-long/2addr',
'add-float/2addr',
'sub-float/2addr',
'mul-float/2addr',
'div-float/2addr',
'rem-float/2addr',
'add-double/2addr',
'sub-double/2addr',
'mul-double/2addr',
'div-double/2addr',
'rem-double/2addr',
'add-int/lit16',
'sub-int/lit16',
'mul-int/lit16',
'div-int/lit16',
'rem-int/lit16',
'and-int/lit16',
'or-int/lit16',
'xor-int/lit16',
'add-int/lit8',
'sub-int/lit8',
'mul-int/lit8',
'div-int/lit8',
'rem-int/lit8',
'and-int/lit8',
'or-int/lit8',
'xor-int/lit8',
'shl-int/lit8',
'shr-int/lit8',
'ushr-int/lit8',
'static-get',
'static-put',
'instance-get',
'instance-put',
'execute-inline',
'execute-inline/range',
'iget-quick',
'iget-wide-quick',
'iget-object-quick',
'iput-quick',
'iput-wide-quick',
'iput-object-quick',
'iput-boolean-quick',
'iput-byte-quick',
'iput-char-quick',
'iput-short-quick',
'invoke-virtual-quick',
'invoke-virtual-quick/range',
'invoke-super-quick',
'invoke-super-quick/range',
'rsub-int',
'rsub-int/lit8',
];
/**
* Returns an optional tree-sitter rule that matches rule at least once, with a repeat of `,` + `rule`
* @param {Rule} rule - tree-sitter rule
* @param {boolean?} trailing_separator - The trailing separator to use.
*
* @return {ChoiceRule}
*/
function commaSep(rule, trailing_separator = false) {
const sep1 = trailing_separator ?
seq(rule, repeat(seq(',', rule)), optional(',')) :
seq(rule, repeat(seq(',', rule)));
return optional(sep1);
}
/**
* Creates a rule to match one or more of the rules separated by the separator
*
* @param {Rule|RegExp} rule
* @param {Rule|RegExp|string} separator - The separator to use.
*
* @return {SeqRule}
*
*/
function sep1(rule, separator) {
return seq(rule, repeat(seq(separator, rule)));
}
module.exports = grammar({
name: 'smali',
conflicts: $ => [
[$.field_definition], // smali/src/test/resources/LexerTest/RealSmaliFileTest.smali to understand why
],
externals: $ => [
$.L,
$._class_ident,
],
extras: $ => [
$.comment,
/\s/,
],
supertypes: $ => [
$.directive,
$.literal,
$.register,
$.statement,
$.type,
$.value,
],
word: $ => $.identifier,
rules: {
class_definition: $ => seq(
$.class_directive,
$.super_directive,
optional($.source_directive),
repeat($.implements_directive),
repeat(choice(
$.annotation_directive,
$.method_definition,
$.field_definition,
)),
),
// class related
class_directive: $ => seq(
'.class',
optional($.access_modifiers),
$.class_identifier,
),
super_directive: $ => seq('.super', $.class_identifier),
source_directive: $ => seq('.source', $.string),
implements_directive: $ => seq('.implements', $.class_identifier),
field_definition: $ => seq(
'.field',
optional($.access_modifiers),
$._field_body,
optional(seq('=', $.value)),
optional(seq(
repeat($.annotation_directive),
'.end field',
)),
),
// Method
method_definition: $ => seq(
'.method',
optional($._method_access_modifiers),
$.method_signature,
repeat($.statement),
'.end method',
),
// annotation related
annotation_directive: $ => seq(
'.annotation',
$.annotation_visibility,
$.class_identifier,
repeat($.annotation_property),
'.end annotation',
),
annotation_visibility: _ => choice('system', 'build', 'runtime'),
annotation_property: $ => seq($.annotation_key, '=', $.annotation_value),
annotation_key: _ => /\w+/,
annotation_value: $ => choice(
$.literal,
$.body,
$.list,
$.enum_reference,
$.subannotation_directive,
$.class_identifier,
),
subannotation_directive: $ => seq(
'.subannotation',
$.class_identifier,
repeat($.annotation_property),
'.end subannotation',
),
param_directive: $ => prec.right(seq(
'.param',
$.parameter,
optional(choice(
seq(repeat($.annotation_directive), '.end param'),
seq(optional(','), choice($.literal, alias($.identifier, $.param_identifier))),
)),
)),
parameter_directive: $ => prec.right(seq(
'.parameter',
optional($.literal),
optional(seq(
repeat($.annotation_directive),
'.end parameter',
)),
)),
// code lines
statement: $ => choice(
$.label,
$.jmp_label,
$.directive,
$.annotation_directive,
$.expression,
),
// expression
expression: $ => seq(
$.opcode,
commaSep($.value),
'\n',
),
opcode: _ => choice(...opcodes),
value: $ => choice(
$.type,
$.list,
$.label,
$.jmp_label,
$.range,
$.register,
$.body,
$.literal,
$.enum_reference,
$.subannotation_directive,
$.method_handle,
$.custom_invoke,
),
// code declarations
directive: $ => choice(
$.line_directive,
$.locals_directive,
$.local_directive,
$.registers_directive,
$.param_directive,
$.parameter_directive,
$.catch_directive,
$.catchall_directive,
$.packed_switch_directive,
$.sparse_switch_directive,
$.array_data_directive,
$.end_local_directive,
$.restart_local_directive,
$.prologue_directive,
$.epilogue_directive,
$.source_directive,
),
line_directive: $ => seq('.line', $.number),
locals_directive: $ => seq('.locals', $.number),
local_directive: $ => seq(
'.local',
$.register,
optional(seq(
',', choice($.literal, $.identifier),
':', $.type,
optional(seq(',', $.string)),
)),
),
end_local_directive: $ => seq('.end local', $.register),
restart_local_directive: $ => seq('.restart local', $.register),
registers_directive: $ => seq('.registers', $.number),
catch_directive: $ => seq(
'.catch',
$.class_identifier,
choice(
seq('{', $.label, '..', $.label, '}', $.label),
seq('{', $.jmp_label, '..', $.jmp_label, '}', $.jmp_label),
),
),
catchall_directive: $ => seq(
'.catchall',
choice(
seq('{', $.label, '..', $.label, '}', $.label),
seq('{', $.jmp_label, '..', $.jmp_label, '}', $.jmp_label),
),
),
packed_switch_directive: $ => seq(
'.packed-switch',
$.number,
repeat(choice($.label, $.jmp_label)),
'.end packed-switch',
),
sparse_switch_directive: $ => seq(
'.sparse-switch',
repeat(seq($.number, '->', $.label)),
'.end sparse-switch',
),
array_data_directive: $ => seq(
'.array-data',
field('element_width', $.number),
field('value', repeat($.number)),
'.end array-data',
),
prologue_directive: _ => '.prologue',
epilogue_directive: _ => '.epilogue',
identifier: _ => /<?[a-zA-Z_$][a-zA-Z0-9_\-$]*>?/,
// class_identifier: _ => token(/L[^;]+;/),
class_identifier: $ => seq(
alias($.L, 'L'),
// repeat1(seq(alias($._class_ident, $.identifier), '/')),
// alias($._class_ident, $.identifier),
sep1(alias($._class_ident, $.identifier), '/'),
';',
),
// exclude :[SVIJFBZC]
label: _ => prec(-1, token(/:[^SVIJFBZC\s]([^:\sI][\w\d]*)?|:[^:\sI][\w\d]*/)),
jmp_label: _ => prec(-1, token(/\w+:/)),
// various "bodies"
body: $ => choice(
$._field_body,
$._full_field_body,
$.method_signature,
alias($._method_signature_body, $.method_signature),
$.full_method_signature,
),
_field_body: $ => seq(
alias(choice($.identifier, $.number), $.field_identifier),
':',
alias($.type, $.field_type),
),
method_signature: $ => seq(
alias(
choice(
seq(optional('-'), $.identifier),
$.number,
),
$.method_identifier,
),
$._method_signature_body,
),
_method_signature_body: $ => seq(
'(',
alias(repeat($.type), $.parameters),
')',
$.type,
),
method_handle: $ => seq(
$.opcode,
'@',
choice($._full_field_body, $.full_method_signature),
),
_full_field_body: $ => seq(
choice($.class_identifier, $.array_type),
'->',
$._field_body,
),
full_method_signature: $ => seq(
choice($.class_identifier, $.array_type),
'->',
$.method_signature,
),
custom_invoke: $ => seq(
$.identifier,
'(', commaSep(choice($.body, $.method_handle, $.string)), ')',
'@',
$.class_identifier,
'->',
$.method_signature,
),
// types
type: $ => choice(
$.primitive_type,
$.class_identifier,
$.array_type,
),
array_type: $ => seq('[', $.type),
// primitives > identifiers
// I don't know why this works, but for primitives in a statement's value,
// the first choice is needed, and for primitives in a signature/return type,
// the second choice is needed.
// TODO: maybe figure out why?
primitive_type: _ => choice(
token(choice(...primitives)),
token(prec(1, choice(...primitives))),
),
access_modifiers: $ => repeat1($.access_modifier),
_method_access_modifiers: $ => repeat1(choice($.access_modifier, 'constructor')),
access_modifier: _ => choice(...access_flags.concat(restriction_flags)),
enum_reference: $ => seq(
'.enum',
choice($._field_body, $._full_field_body),
),
// special builtins
register: $ => choice($.variable, $.parameter),
variable: _ => token.immediate(/v\d+/),
parameter: _ => token.immediate(/p\d+/),
// lists
list: $ => seq(
'{',
commaSep($.value),
'}',
),
range: $ => seq(
'{',
choice(
seq(field('start', $.register), '..', field('end', $.register)),
seq(field('start', $.number), '..', field('end', $.number)),
seq(field('start', $.jmp_label), '..', field('end', $.jmp_label)),
),
'}',
),
// literals
literal: $ => choice(
$.number,
$.float,
$.NaN,
$.Infinity,
$.string,
$.boolean,
$.character,
$.null,
),
number: $ => {
const hex_literal = seq(
optional(choice('-', '+')),
/0[xX]/,
/[\da-fA-F](_?[\da-fA-F])*/,
);
const decimal_digits = /\d(_?\d)*/;
const signed_integer = seq(optional(choice('-', '+')), decimal_digits);
const decimal_integer_literal = choice(
'0',
seq(optional('0'), /[1-9]/, optional(seq(optional('_'), decimal_digits))),
);
const decimal_literal = choice(
seq(optional('-'), decimal_integer_literal),
decimal_digits,
signed_integer,
);
return token(seq(
choice(hex_literal, decimal_literal),
alias(optional(/[LlSsTt]/), $.number_type),
));
},
float: $ => token(seq(
/-?(\d+(\.\d+)?|\.\d+)([Ee][+-]?\d+)?/,
alias(optional('f'), $.float_type),
)),
// FIXME: adding an optional 'f' doesn't work, I don't know why,
// so this approach was used instead
NaN: _ => token(prec(1, choice('NaN', 'NaNf'))),
Infinity: _ => token(prec(1, choice('Infinity', '-Infinity'))),
// string: _ => /"[^"\\]*(?:\\.[^"\\]*)*"/,
string: $ => seq(
'"',
repeat(choice(
$.string_fragment,
$._escape_sequence,
)),
'"',
),
// Workaround to https://github.com/tree-sitter/tree-sitter/issues/1156
// We give names to the token_ constructs containing a regexp
// so as to obtain a node in the CST.
string_fragment: _ => token.immediate(prec(1, /[^"\\]+/)),
_escape_sequence: $ => choice(
prec(2, token.immediate(seq('\\', /[^abfnrtvxu'\"\\\?]/))),
prec(1, $.escape_sequence),
),
escape_sequence: _ => token.immediate(seq(
'\\',
choice(
/[^xu0-7]/,
/[0-7]{1,3}/,
/x[0-9a-fA-F]{2}/,
/u[0-9a-fA-F]{4}/,
/u{[0-9a-fA-F]+}/,
),
)),
boolean: _ => choice('true', 'false'),
character: $ => seq(
'\'',
optional(choice(
$._escape_sequence,
/[^\\']/,
)),
'\'',
),
null: _ => 'null',
comment: _ => token(seq('#', /.*/)),
},
});

@ -0,0 +1,47 @@
{
"name": "tree-sitter-smali",
"version": "1.0.0",
"description": "Smali grammar for tree-sitter",
"main": "bindings/node",
"keywords": [
"parser",
"lexer",
"smali"
],
"author": "Yotam Nachum <me@yotam.net>",
"contributors": [
"Amaan Qureshi <amaanq12@gmail.com>"
],
"license": "MIT",
"bugs": {
"url": "https://github.com/amaanq/tree-sitter-smali/issues"
},
"homepage": "https://git.sr.ht/~yotam/tree-sitter-smali#readme",
"dependencies": {
"nan": "^2.15.0"
},
"devDependencies": {
"eslint": "^8.32.0",
"eslint-config-google": "^0.14.0",
"tree-sitter-cli": "^0.20.8"
},
"repository": "https://git.sr.ht/~yotam/tree-sitter-smali",
"scripts": {
"build": "tree-sitter generate && node-gyp build",
"parse": "tree-sitter parse",
"test": "tree-sitter test && script/parse-examples",
"test-windows": "tree-sitter test"
},
"tree-sitter": [
{
"scope": "source.smali",
"injection-regex": "smali",
"file-types": [
"smali"
],
"highlights": [
"queries/highlights.scm"
]
}
]
}

@ -0,0 +1,12 @@
[
(annotation_directive)
(array_data_directive)
(field_definition)
(method_definition)
(packed_switch_directive)
(param_directive)
(parameter_directive)
(sparse_switch_directive)
(subannotation_directive)
(list)
] @fold

@ -0,0 +1,218 @@
; Types
(class_identifier
(identifier) @type)
(primitive_type) @type.builtin
((class_identifier
. (identifier) @_first @type.builtin
(identifier) @type.builtin)
(#any-of? @_first "android" "dalvik" "java" "kotlinx"))
((class_identifier
. (identifier) @_first @type.builtin
. (identifier) @_second @type.builtin
(identifier) @type.builtin)
(#eq? @_first "com")
(#any-of? @_second "android" "google"))
; Methods
(method_definition
(method_signature (method_identifier) @method))
(expression
(opcode) @_invoke
(body
(full_method_signature
(method_signature (method_identifier) @method.call)))
(#lua-match? @_invoke "^invoke"))
(method_handle
(full_method_signature
(method_signature (method_identifier) @method.call)))
(custom_invoke
. (identifier) @method.call
(method_signature (method_identifier) @method.call))
(annotation_value
(body
(method_signature (method_identifier) @method.call)))
(annotation_value
(body
(full_method_signature
(method_signature (method_identifier) @method.call))))
(field_definition
(body
(method_signature (method_identifier) @method.call)))
(field_definition
(body
(full_method_signature
(method_signature (method_identifier) @method.call))))
((method_identifier) @constructor
(#any-of? @constructor "<init>" "<clinit>"))
"constructor" @constructor
; Fields
[
(field_identifier)
(annotation_key)
] @field
((field_identifier) @constant
(#lua-match? @constant "^[%u_]*$"))
; Variables
(variable) @variable.builtin
(local_directive
(identifier) @variable)
; Parameters
(parameter) @parameter.builtin
(param_identifier) @parameter
; Labels
[
(label)
(jmp_label)
] @label
; Operators
(opcode) @keyword.operator
((opcode) @keyword.return
(#lua-match? @keyword.return "^return"))
((opcode) @conditional
(#lua-match? @conditional "^if"))
((opcode) @conditional
(#lua-match? @conditional "^cmp"))
((opcode) @exception
(#lua-match? @exception "^throw"))
((opcode) @comment
(#eq? @comment "nop")) ; haha, anyone get it? ;)
[
"="
".."
] @operator
; Keywords
[
".class"
".super"
".implements"
".field"
".end field"
".annotation"
".end annotation"
".subannotation"
".end subannotation"
".param"
".end param"
".parameter"
".end parameter"
".line"
".locals"
".local"
".end local"
".restart local"
".registers"
".packed-switch"
".end packed-switch"
".sparse-switch"
".end sparse-switch"
".array-data"
".end array-data"
".enum"
(prologue_directive)
(epilogue_directive)
] @keyword
[
".source"
] @include
[
".method"
".end method"
] @keyword.function
[
".catch"
".catchall"
] @exception
; Literals
(string) @string
(source_directive (string "\"" _ @text.uri "\""))
(escape_sequence) @string.escape
(character) @character
"L" @character.special
(number) @number
[
(float)
(NaN)
(Infinity)
] @float
(boolean) @boolean
(null) @constant.builtin
; Misc
(annotation_visibility) @storageclass
(access_modifier) @type.qualifier
(array_type
"[" @punctuation.special)
["{" "}"] @punctuation.bracket
["(" ")"] @punctuation.bracket
[
"->"
","
":"
";"
"@"
"/"
] @punctuation.delimiter
(line_directive (number) @text.underline @text.literal)
; Comments
(comment) @comment @spell
(class_definition
(comment) @comment.documentation)
; Errors
(ERROR) @error

@ -0,0 +1,32 @@
[
(annotation_directive)
(array_data_directive)
(field_definition)
(method_definition)
(packed_switch_directive)
(param_directive)
(parameter_directive)
(sparse_switch_directive)
(subannotation_directive)
(list)
] @indent.begin
[
".end annotation"
".end array-data"
".end field"
".end method"
".end packed-switch"
".end param"
".end parameter"
".end sparse-switch"
".end subannotation"
"}"
] @indent.end
[ "{" "}" ] @indent.branch
[
(ERROR)
(comment)
] @indent.auto

@ -0,0 +1,42 @@
[
(class_directive)
(expression)
(annotation_directive)
(array_data_directive)
(method_definition)
(packed_switch_directive)
(sparse_switch_directive)
(subannotation_directive)
] @scope
[
(identifier)
(class_identifier)
(label)
(jmp_label)
] @reference
(enum_reference
(field_identifier) @definition.enum)
((field_definition
(access_modifiers) @_mod
(field_identifier) @definition.enum)
(#eq? @_mod "enum"))
(field_definition
(field_identifier) @definition.field
(field_type) @definition.associated)
(annotation_key) @definition.field
(method_definition
(method_signature (method_identifier) @definition.method))
(param_identifier) @definition.parameter
(annotation_directive
(class_identifier) @definition.type)
(class_directive
(class_identifier) @definition.type)

@ -0,0 +1,15 @@
examples/smali/smali/src/test/resources/LexerTest/ByteLiteralTest.smali
examples/smali/smali/src/test/resources/LexerTest/CharLiteralTest.smali
examples/smali/smali/src/test/resources/LexerTest/CommentTest.smali
examples/smali/smali/src/test/resources/LexerTest/DirectiveTest.smali
examples/smali/smali/src/test/resources/LexerTest/FloatLiteralTest.smali
examples/smali/smali/src/test/resources/LexerTest/InstructionTest.smali
examples/smali/smali/src/test/resources/LexerTest/IntegerLiteralTest.smali
examples/smali/smali/src/test/resources/LexerTest/LongLiteralTest.smali
examples/smali/smali/src/test/resources/LexerTest/MiscTest.smali
examples/smali/smali/src/test/resources/LexerTest/ShortLiteralTest.smali
examples/smali/smali/src/test/resources/LexerTest/StringLiteralTest.smali
examples/smali/smali/src/test/resources/LexerTest/SymbolTest.smali
examples/smali/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali
examples/smali/smali/src/test/resources/LexerTest/TypeAndIdentifierTest_api29.smali
examples/smali2java/test_data/fc.smali

@ -0,0 +1,46 @@
#!/usr/bin/env bash
set -eu
cd "$(dirname "$0")/.."
function clone_repo {
owner=$1
name=$2
sha=$3
path=examples/$name
if [ ! -d "$path" ]; then
echo "Cloning $owner/$name"
git clone "https://github.com/$owner/$name" "$path"
fi
pushd "$path" >/dev/null
actual_sha=$(git rev-parse HEAD)
if [ "$actual_sha" != "$sha" ]; then
echo "Updating $owner/$name to $sha"
git fetch
git reset --hard "$sha"
fi
popd >/dev/null
}
clone_repo JesusFreke smali 2771eae0a11f07bd892732232e6ee4e32437230d
clone_repo AlexeySoshin smali2java 95795b9ccd540ae72987ba68896a783f312b8c29
clone_repo amaanq misc-smali-code 220aa27251f3ed680d563de3f174b1106c9b9f0e
known_failures="$(cat script/known_failures.txt)"
# shellcheck disable=2046
tree-sitter parse -q \
"examples/**/*.smali" \
$(for failure in $known_failures; do echo "!${failure}"; done)
example_count=$(find examples -name "*.smali" | wc -l)
failure_count=$(wc -w <<<"$known_failures")
success_count=$((example_count - failure_count))
success_percent=$(bc -l <<<"100*${success_count}/${example_count}")
printf \
"Successfully parsed %d of %d example files (%.1f%%)\n" \
"$success_count" "$example_count" "$success_percent"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,53 @@
#include <tree_sitter/parser.h>
#include <wctype.h>
enum TokenType {
L,
CLASS_IDENTIFIER,
};
void *tree_sitter_smali_external_scanner_create() { return NULL; }
void tree_sitter_smali_external_scanner_destroy(void *payload) {}
void tree_sitter_smali_external_scanner_reset(void *payload) {}
unsigned tree_sitter_smali_external_scanner_serialize(void *payload,
char *buffer) {
return 0;
}
void tree_sitter_smali_external_scanner_deserialize(void *payload,
const char *buffer,
unsigned length) {}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
bool tree_sitter_smali_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
if (valid_symbols[L]) {
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
if (lexer->lookahead == 'L') {
lexer->result_symbol = L;
advance(lexer);
return true;
}
}
if (valid_symbols[CLASS_IDENTIFIER]) {
// any alnum, stop at /
lexer->result_symbol = CLASS_IDENTIFIER;
while (iswalnum(lexer->lookahead) || lexer->lookahead == '_' ||
lexer->lookahead == '-' || lexer->lookahead == '$') {
advance(lexer);
}
return true;
}
return false;
}

@ -0,0 +1,224 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

@ -0,0 +1,175 @@
========================================================================
Test an empty class
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string)))
========================================================================
Test a class with a source file
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source "SourceFile.java"
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string
(string_fragment))))
========================================================================
Test a class that implements an interface
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.implements LD/EF;
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(implements_directive
(class_identifier
(identifier)
(identifier))))
========================================================================
Test a class that implements multiple interfaces
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.implements LD/EF;
.implements LG/HI;
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(implements_directive
(class_identifier
(identifier)
(identifier)))
(implements_directive
(class_identifier
(identifier)
(identifier))))
========================================================================
Test a class with a non valid Java character
========================================================================
.class public LA/-BC;
.super Ljava/lang/Object;
.source ""
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string)))
========================================================================
Test a class without an access modifier
========================================================================
.class LA/-BC;
.super Ljava/lang/Object;
.source ""
---
(class_definition
(class_directive
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string)))

@ -0,0 +1,33 @@
========================================================================
Test a field without an access modifier
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.field a:Ljava/lang/String;
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(field_definition
(field_identifier)
(field_type
(class_identifier
(identifier)
(identifier)
(identifier)))))

@ -0,0 +1,144 @@
========================================================================
Test an empty interface
========================================================================
.class public interface abstract LA/BC;
.super Ljava/lang/Object;
.source ""
---
(class_definition
(class_directive
(access_modifiers
(access_modifier)
(access_modifier)
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string)))
========================================================================
Test an empty interface that extends another interface
========================================================================
.class public interface abstract LA/BC;
.super Ljava/lang/Object;
.source ""
.implements LD/EF;
---
(class_definition
(class_directive
(access_modifiers
(access_modifier)
(access_modifier)
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(implements_directive
(class_identifier
(identifier)
(identifier))))
========================================================================
Test an interface with one method
========================================================================
.class public interface abstract LA/BC;
.super Ljava/lang/Object;
.source ""
.method public abstract action()V
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier)
(access_modifier)
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(access_modifier)
(method_signature
(method_identifier)
(primitive_type))))
========================================================================
Test an interface with one method with parameters and return value
========================================================================
.class public interface abstract LA/BC;
.super Ljava/lang/Object;
.source ""
.method public abstract action(LD/EF;I)LD/EF;
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier)
(access_modifier)
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(access_modifier)
(method_signature
(method_identifier)
(parameters
(class_identifier
(identifier)
(identifier))
(primitive_type))
(class_identifier
(identifier)
(identifier)))))

@ -0,0 +1,224 @@
========================================================================
Test an empty method
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.method public empty()V
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(method_signature
(method_identifier)
(primitive_type))))
========================================================================
Test a method with no modifiers
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.method empty()V
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(method_signature
(method_identifier)
(primitive_type))))
========================================================================
Test a method with one primitive parameter
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.method public empty(I)V
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(method_signature
(method_identifier)
(parameters
(primitive_type))
(primitive_type))))
========================================================================
Test a method with multiple primitive parameter
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.method public empty(IZJ)V
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(method_signature
(method_identifier)
(parameters
(primitive_type)
(primitive_type)
(primitive_type))
(primitive_type))))
========================================================================
Test a method with an object parameter
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.method public static foo(Ljava/lang/String;)V
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(access_modifier)
(method_signature
(method_identifier)
(parameters
(class_identifier
(identifier)
(identifier)
(identifier)))
(primitive_type))))
========================================================================
Test a method with an array parameter
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.method public static foo([Ljava/lang/String;)V
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(access_modifier)
(method_signature
(method_identifier)
(parameters
(array_type
(class_identifier
(identifier)
(identifier)
(identifier))))
(primitive_type))))

@ -0,0 +1,121 @@
========================================================================
Test a simple param
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.method public simple()V
.param p1
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(method_signature
(method_identifier)
(primitive_type))
(param_directive
(parameter))))
========================================================================
Test a simple param block
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.method public simple()V
.param p1
.end param
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(method_signature
(method_identifier)
(primitive_type))
(param_directive
(parameter))))
========================================================================
Test a param block with an empty annotation
========================================================================
.class public LA/BC;
.super Ljava/lang/Object;
.source ""
.method public simple()V
.param p1
.annotation build Landroidx/annotation/RecentlyNonNull;
.end annotation
.end param
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(method_signature
(method_identifier)
(primitive_type))
(param_directive
(parameter)
(annotation_directive
(annotation_visibility)
(class_identifier
(identifier)
(identifier)
(identifier))))))

@ -0,0 +1,90 @@
========================================================================
Test an empty statement
========================================================================
.class public interface abstract LA/BC;
.super Ljava/lang/Object;
.source ""
.method public empty()V
return-void
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier)
(access_modifier)
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(method_signature
(method_identifier)
(primitive_type))
(expression
(opcode))))
========================================================================
Test statements with variable and number literal arguments
========================================================================
.class public interface abstract LA/BC;
.super Ljava/lang/Object;
.source ""
.method public static main([Ljava/lang/String;)I
const v0, 0x0
return v0
.end method
---
(class_definition
(class_directive
(access_modifiers
(access_modifier)
(access_modifier)
(access_modifier))
(class_identifier
(identifier)
(identifier)))
(super_directive
(class_identifier
(identifier)
(identifier)
(identifier)))
(source_directive
(string))
(method_definition
(access_modifier)
(access_modifier)
(method_signature
(method_identifier)
(parameters
(array_type
(class_identifier
(identifier)
(identifier)
(identifier))))
(primitive_type))
(expression
(opcode)
(variable)
(number))
(expression
(opcode)
(variable))))