Add support for Dart

pull/185/head
Ömer Sinan Ağacan 2022-03-18 21:36:22 +07:00 committed by Wilfred Hughes
parent b8c7ce2398
commit a2488f984c
10 changed files with 111795 additions and 0 deletions

@ -1,5 +1,9 @@
## 0.24 (unreleased)
### Parsing
Added support for Dart.
## 0.23.1 (released 19th March 2022)
Fixed crash where the 'shrink unchanged' logic would not set the

@ -61,6 +61,7 @@ Difftastic supports the following languages:
* Clojure
* Common Lisp
* CSS
* Dart
* Elixir
* Emacs Lisp
* Go

@ -89,6 +89,11 @@ fn main() {
src_dir: "vendor/tree-sitter-css-src",
extra_files: vec!["scanner.c"],
},
TreeSitterParser {
name: "tree-sitter-dart",
src_dir: "vendor/tree-sitter-dart-src",
extra_files: vec!["scanner.c"],
},
TreeSitterParser {
name: "tree-sitter-elisp",
src_dir: "vendor/tree-sitter-elisp-src",

@ -11,6 +11,7 @@ Difftastic uses the following tree-sitter parsers:
| Clojure | [sogaiu/tree-sitter-clojure](https://github.com/sogaiu/tree-sitter-clojure) |
| Common Lisp | [theHamsta/tree-sitter-commonlisp](https://github.com/theHamsta/tree-sitter-commonlisp) |
| CSS | [tree-sitter/tree-sitter-css](https://github.com/tree-sitter/tree-sitter-css) |
| Dart | [UserNobody14/tree-sitter-dart](https://github.com/UserNobody14/tree-sitter-dart) |
| Elixir | [elixir-lang/tree-sitter-elixir](https://github.com/elixir-lang/tree-sitter-elixir) |
| Emacs Lisp | [wilfred/tree-sitter-elisp](https://github.com/Wilfred/tree-sitter-elisp) |
| Go | [tree-sitter/tree-sitter-go](https://github.com/tree-sitter/tree-sitter-go) |

@ -25,6 +25,7 @@ pub enum Language {
CPlusPlus,
CSharp,
Css,
Dart,
Elixir,
EmacsLisp,
Go,
@ -87,6 +88,7 @@ fn from_emacs_mode_header(src: &str) -> Option<Language> {
"clojure" => Some(Clojure),
"csharp" => Some(CSharp),
"css" => Some(Css),
"dart" => Some(Dart),
"c++" => Some(CPlusPlus),
"elixir" => Some(Elixir),
"emacs-lisp" => Some(EmacsLisp),

@ -50,6 +50,7 @@ extern "C" {
fn tree_sitter_cpp() -> ts::Language;
fn tree_sitter_commonlisp() -> ts::Language;
fn tree_sitter_css() -> ts::Language;
fn tree_sitter_dart() -> ts::Language;
fn tree_sitter_elisp() -> ts::Language;
fn tree_sitter_elixir() -> ts::Language;
fn tree_sitter_go() -> ts::Language;
@ -190,6 +191,20 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig {
.unwrap(),
}
}
Dart => {
let language = unsafe { tree_sitter_dart() };
TreeSitterConfig {
name: "CSS",
language,
atom_nodes: HashSet::new(),
delimiter_tokens: (vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")]),
highlight_query: ts::Query::new(
language,
include_str!("../vendor/highlights/dart.scm"),
)
.unwrap(),
}
}
EmacsLisp => {
let language = unsafe { tree_sitter_elisp() };
TreeSitterConfig {

@ -0,0 +1 @@
../tree-sitter-dart/queries/highlights.scm

File diff suppressed because it is too large Load Diff

@ -0,0 +1,172 @@
#include <tree_sitter/parser.h>
#include <wctype.h>
enum TokenType {
AUTOMATIC_SEMICOLON,
TEMPLATE_CHARS_SINGLE,
TEMPLATE_CHARS_DOUBLE,
TEMPLATE_CHARS_SINGLE_SINGLE,
TEMPLATE_CHARS_DOUBLE_SINGLE,
TEMPLATE_CHARS_RAW_SLASH
};
void *tree_sitter_dart_external_scanner_create() { return NULL; }
void tree_sitter_dart_external_scanner_destroy(void *p) {}
void tree_sitter_dart_external_scanner_reset(void *p) {}
unsigned tree_sitter_dart_external_scanner_serialize(void *p, char *buffer) { return 0; }
void tree_sitter_dart_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static bool scan_whitespace_and_comments(TSLexer *lexer) {
for (;;) {
while (iswspace(lexer->lookahead)) {
advance(lexer);
}
if (lexer->lookahead == '/') {
advance(lexer);
if (lexer->lookahead == '/') {
advance(lexer);
while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
advance(lexer);
}
} else if (lexer->lookahead == '*') {
advance(lexer);
while (lexer->lookahead != 0) {
if (lexer->lookahead == '*') {
advance(lexer);
if (lexer->lookahead == '/') {
advance(lexer);
break;
}
} else {
advance(lexer);
}
}
} else {
return false;
}
} else {
return true;
}
}
}
bool tree_sitter_dart_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
if (valid_symbols[TEMPLATE_CHARS_DOUBLE] ||
valid_symbols[TEMPLATE_CHARS_SINGLE] ||
valid_symbols[TEMPLATE_CHARS_DOUBLE_SINGLE] ||
valid_symbols[TEMPLATE_CHARS_SINGLE_SINGLE]) {
if (valid_symbols[AUTOMATIC_SEMICOLON]) return false;
if(valid_symbols[TEMPLATE_CHARS_DOUBLE]) {
lexer->result_symbol = TEMPLATE_CHARS_DOUBLE;
} else if (valid_symbols[TEMPLATE_CHARS_SINGLE]) {
lexer->result_symbol = TEMPLATE_CHARS_SINGLE;
} else if (valid_symbols[TEMPLATE_CHARS_SINGLE_SINGLE]) {
lexer->result_symbol = TEMPLATE_CHARS_SINGLE_SINGLE;
} else {
lexer->result_symbol = TEMPLATE_CHARS_DOUBLE_SINGLE;
}
for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer);
switch (lexer->lookahead) {
case '\'':
case '"':
return has_content;
case '\n':
if (valid_symbols[TEMPLATE_CHARS_DOUBLE_SINGLE] || valid_symbols[TEMPLATE_CHARS_SINGLE_SINGLE]) return false;
advance(lexer);
break;
case '\0':
return false;
case '$':
// advance(lexer);
// if (lexer->lookahead == '{') return has_content;
// break;
return has_content;
case '\\':
if (valid_symbols[TEMPLATE_CHARS_RAW_SLASH]) {
lexer->result_symbol = TEMPLATE_CHARS_RAW_SLASH;
advance(lexer);
} else {
return has_content;
}
break;
default:
advance(lexer);
}
}
} else {
lexer->result_symbol = AUTOMATIC_SEMICOLON;
lexer->mark_end(lexer);
for (;;) {
if (lexer->lookahead == 0) return true;
if (lexer->lookahead == '}') return true;
if (lexer->is_at_included_range_start(lexer)) return true;
if (!iswspace(lexer->lookahead)) return false;
if (lexer->lookahead == '\n') break;
advance(lexer);
}
advance(lexer);
if (!scan_whitespace_and_comments(lexer)) return false;
switch (lexer->lookahead) {
case ',':
case '.':
case ':':
case ';':
case '*':
case '%':
case '>':
case '<':
case '=':
case '[':
case '(':
case '?':
case '^':
case '|':
case '&':
case '/':
return false;
// Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
case '+':
advance(lexer);
return lexer->lookahead == '+';
case '-':
advance(lexer);
return lexer->lookahead == '-';
// Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
case '!':
advance(lexer);
return lexer->lookahead != '=';
// Don't insert a semicolon before `in` or `instanceof`, but do insert one
// before an identifier.
case 'i':
advance(lexer);
if (lexer->lookahead != 'n') return true;
advance(lexer);
if (!iswalpha(lexer->lookahead)) return false;
for (unsigned i = 0; i < 8; i++) {
if (lexer->lookahead != "stanceof"[i]) return true;
advance(lexer);
}
if (!iswalpha(lexer->lookahead)) return false;
break;
}
return true;
}
}

@ -0,0 +1,224 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_