Add basic syntax highlighting for keywords and operators

Helps with #32
pull/48/head
Wilfred Hughes 2021-09-30 22:03:32 +07:00
parent c92007e5b3
commit 3df7bb57e1
25 changed files with 334 additions and 178 deletions

@ -12,6 +12,7 @@ edition = "2018"
include = [
"/build.rs",
"/src/",
"/vendor/highlights/*.scm",
"/vendor/**/*.c",
"/vendor/**/*.h",
"/vendor/**/*.cc",

@ -49,6 +49,14 @@ You can now add the parser to build by including the directory in
`build.rs`. You will also need to update `tree_sitter_parser.rs` to
configure the file extensions, atoms and delimiters.
To add syntax highlighting to the package, you'll also need a symlink
to the `highlights.scm`, if available.
```
$ cd vendor/highlights
$ ln -s ../tree-sitter-java/queries/highlights.scm java.scm
```
## Updating a parser
To update a parser, pull commits from the upstream git repository. For

@ -158,7 +158,7 @@ fn main() {
Some(ts_lang) => {
let bytes = read_or_die(&path);
let src = String::from_utf8_lossy(&bytes).to_string();
let tree = tsp::parse_to_tree(&src, &ts_lang);
let (tree, _) = tsp::parse_to_tree(&src, &ts_lang);
tsp::print_tree(&tree);
}
None => {

@ -131,7 +131,7 @@ mod tests {
use crate::{
graph::Edge::*,
positions::SingleLineSpan,
syntax::{init_info, ChangeKind},
syntax::{init_info, ChangeKind, AtomKind},
};
use itertools::Itertools;
@ -157,9 +157,9 @@ mod tests {
fn identical_atoms() {
let arena = Arena::new();
let lhs = Syntax::new_atom(&arena, pos_helper(0), "foo");
let lhs = Syntax::new_atom(&arena, pos_helper(0), "foo", AtomKind::Normal);
// Same content as LHS.
let rhs = Syntax::new_atom(&arena, pos_helper(0), "foo");
let rhs = Syntax::new_atom(&arena, pos_helper(0), "foo", AtomKind::Normal);
init_info(&[lhs], &[rhs]);
let start = Vertex {
@ -187,7 +187,12 @@ mod tests {
&arena,
"[",
pos_helper(0),
vec![Syntax::new_atom(&arena, pos_helper(1), "foo")],
vec![Syntax::new_atom(
&arena,
pos_helper(1),
"foo",
AtomKind::Normal,
)],
"]",
pos_helper(2),
)];
@ -240,8 +245,8 @@ mod tests {
"[",
pos_helper(0),
vec![
Syntax::new_atom(&arena, pos_helper(1), "foo"),
Syntax::new_atom(&arena, pos_helper(2), "foo"),
Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(2), "foo", AtomKind::Normal),
],
"]",
pos_helper(3),
@ -279,7 +284,7 @@ mod tests {
pos_helper(0),
vec![
Syntax::new_list(&arena, "(", pos_helper(1), vec![], ")", pos_helper(2)),
Syntax::new_atom(&arena, pos_helper(3), "foo"),
Syntax::new_atom(&arena, pos_helper(3), "foo", AtomKind::Normal),
],
"]",
pos_helper(4),
@ -291,7 +296,7 @@ mod tests {
pos_helper(0),
vec![
Syntax::new_list(&arena, "(", pos_helper(1), vec![], ")", pos_helper(2)),
Syntax::new_atom(&arena, pos_helper(3), "foo"),
Syntax::new_atom(&arena, pos_helper(3), "foo", AtomKind::Normal),
],
"}",
pos_helper(4),
@ -327,12 +332,17 @@ mod tests {
let arena = Arena::new();
let lhs = vec![
Syntax::new_atom(&arena, col_helper(1, 0), "foo"),
Syntax::new_atom(&arena, col_helper(2, 0), "bar"),
Syntax::new_atom(&arena, col_helper(2, 1), "foo"),
Syntax::new_atom(&arena, col_helper(1, 0), "foo", AtomKind::Normal),
Syntax::new_atom(&arena, col_helper(2, 0), "bar", AtomKind::Normal),
Syntax::new_atom(&arena, col_helper(2, 1), "foo", AtomKind::Normal),
];
let rhs = vec![Syntax::new_atom(&arena, col_helper(1, 0), "foo")];
let rhs = vec![Syntax::new_atom(
&arena,
col_helper(1, 0),
"foo",
AtomKind::Normal,
)];
init_info(&lhs, &rhs);
let start = Vertex {
@ -364,7 +374,12 @@ mod tests {
&arena,
"[",
col_helper(1, 0),
vec![Syntax::new_atom(&arena, col_helper(1, 2), "1")],
vec![Syntax::new_atom(
&arena,
col_helper(1, 2),
"1",
AtomKind::Normal,
)],
"]",
pos_helper(2),
)];
@ -399,11 +414,16 @@ mod tests {
&arena,
"[",
col_helper(1, 0),
vec![Syntax::new_atom(&arena, col_helper(1, 2), "1")],
vec![Syntax::new_atom(
&arena,
col_helper(1, 2),
"1",
AtomKind::Normal,
)],
"]",
col_helper(2, 1),
),
Syntax::new_atom(&arena, col_helper(2, 2), ";"),
Syntax::new_atom(&arena, col_helper(2, 2), ";", AtomKind::Normal),
];
let rhs = vec![];
@ -437,27 +457,27 @@ mod tests {
"[",
pos_helper(0),
vec![
Syntax::new_atom(&arena, pos_helper(1), "1"),
Syntax::new_atom(&arena, pos_helper(2), "2"),
Syntax::new_atom(&arena, pos_helper(3), "3"),
Syntax::new_atom(&arena, pos_helper(4), "4"),
Syntax::new_atom(&arena, pos_helper(5), "5"),
Syntax::new_atom(&arena, pos_helper(6), "6"),
Syntax::new_atom(&arena, pos_helper(7), "7"),
Syntax::new_atom(&arena, pos_helper(8), "8"),
Syntax::new_atom(&arena, pos_helper(9), "9"),
Syntax::new_atom(&arena, pos_helper(10), "10"),
Syntax::new_atom(&arena, pos_helper(11), "11"),
Syntax::new_atom(&arena, pos_helper(12), "12"),
Syntax::new_atom(&arena, pos_helper(13), "13"),
Syntax::new_atom(&arena, pos_helper(14), "14"),
Syntax::new_atom(&arena, pos_helper(15), "15"),
Syntax::new_atom(&arena, pos_helper(16), "16"),
Syntax::new_atom(&arena, pos_helper(17), "17"),
Syntax::new_atom(&arena, pos_helper(18), "18"),
Syntax::new_atom(&arena, pos_helper(19), "19"),
Syntax::new_atom(&arena, pos_helper(20), "20"),
Syntax::new_atom(&arena, pos_helper(21), "21"),
Syntax::new_atom(&arena, pos_helper(1), "1", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(2), "2", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(3), "3", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(4), "4", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(5), "5", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(6), "6", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(7), "7", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(8), "8", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(9), "9", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(10), "10", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(11), "11", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(12), "12", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(13), "13", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(14), "14", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(15), "15", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(16), "16", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(17), "17", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(18), "18", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(19), "19", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(20), "20", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(21), "21", AtomKind::Normal),
],
"]",
pos_helper(100),
@ -468,28 +488,28 @@ mod tests {
"[",
pos_helper(0),
vec![
Syntax::new_atom(&arena, pos_helper(1), "d1"),
Syntax::new_atom(&arena, pos_helper(2), "d2"),
Syntax::new_atom(&arena, pos_helper(3), "d3"),
Syntax::new_atom(&arena, pos_helper(4), "d4"),
Syntax::new_atom(&arena, pos_helper(5), "d5"),
Syntax::new_atom(&arena, pos_helper(6), "d6"),
Syntax::new_atom(&arena, pos_helper(7), "d7"),
Syntax::new_atom(&arena, pos_helper(8), "d8"),
Syntax::new_atom(&arena, pos_helper(9), "d9"),
Syntax::new_atom(&arena, pos_helper(1), "d1", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(2), "d2", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(3), "d3", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(4), "d4", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(5), "d5", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(6), "d6", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(7), "d7", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(8), "d8", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(9), "d9", AtomKind::Normal),
// This is the only common atom:
Syntax::new_atom(&arena, pos_helper(10), "10"),
Syntax::new_atom(&arena, pos_helper(11), "d11"),
Syntax::new_atom(&arena, pos_helper(12), "d12"),
Syntax::new_atom(&arena, pos_helper(13), "d13"),
Syntax::new_atom(&arena, pos_helper(14), "d14"),
Syntax::new_atom(&arena, pos_helper(15), "d15"),
Syntax::new_atom(&arena, pos_helper(16), "d16"),
Syntax::new_atom(&arena, pos_helper(17), "d17"),
Syntax::new_atom(&arena, pos_helper(18), "d18"),
Syntax::new_atom(&arena, pos_helper(19), "d19"),
Syntax::new_atom(&arena, pos_helper(20), "d20"),
Syntax::new_atom(&arena, pos_helper(21), "d21"),
Syntax::new_atom(&arena, pos_helper(10), "10", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(11), "d11", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(12), "d12", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(13), "d13", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(14), "d14", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(15), "d15", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(16), "d16", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(17), "d17", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(18), "d18", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(19), "d19", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(20), "d20", AtomKind::Normal),
Syntax::new_atom(&arena, pos_helper(21), "d21", AtomKind::Normal),
],
"]",
pos_helper(100),
@ -522,16 +542,18 @@ mod tests {
fn replace_similar_comment() {
let arena = Arena::new();
let lhs = vec![Syntax::new_comment(
let lhs = vec![Syntax::new_atom(
&arena,
pos_helper(1),
"the quick brown fox",
AtomKind::Comment,
)];
let rhs = vec![Syntax::new_comment(
let rhs = vec![Syntax::new_atom(
&arena,
pos_helper(1),
"the quick brown cat",
AtomKind::Comment,
)];
init_info(&lhs, &rhs);
@ -556,13 +578,19 @@ mod tests {
fn replace_very_different_comment() {
let arena = Arena::new();
let lhs = vec![Syntax::new_comment(
let lhs = vec![Syntax::new_atom(
&arena,
pos_helper(1),
"the quick brown fox",
AtomKind::Comment,
)];
let rhs = vec![Syntax::new_comment(&arena, pos_helper(1), "foo bar")];
let rhs = vec![Syntax::new_atom(
&arena,
pos_helper(1),
"foo bar",
AtomKind::Comment,
)];
init_info(&lhs, &rhs);
let start = Vertex {
@ -587,14 +615,25 @@ mod tests {
let arena = Arena::new();
let lhs = vec![
Syntax::new_comment(&arena, pos_helper(1), "the quick brown fox"),
Syntax::new_comment(&arena, pos_helper(2), "the quick brown thing"),
Syntax::new_atom(
&arena,
pos_helper(1),
"the quick brown fox",
AtomKind::Comment,
),
Syntax::new_atom(
&arena,
pos_helper(2),
"the quick brown thing",
AtomKind::Comment,
),
];
let rhs = vec![Syntax::new_comment(
let rhs = vec![Syntax::new_atom(
&arena,
pos_helper(1),
"the quick brown fox.",
AtomKind::Comment,
)];
init_info(&lhs, &rhs);
@ -621,8 +660,8 @@ mod tests {
#[test]
fn mark_syntax_equal_atoms() {
let arena = Arena::new();
let lhs = Syntax::new_atom(&arena, pos_helper(1), "foo");
let rhs = Syntax::new_atom(&arena, pos_helper(1), "foo");
let lhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal);
let rhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal);
init_info(&[lhs], &[rhs]);
mark_syntax(Some(lhs), Some(rhs));
@ -633,8 +672,8 @@ mod tests {
#[test]
fn mark_syntax_different_atoms() {
let arena = Arena::new();
let lhs = Syntax::new_atom(&arena, pos_helper(1), "foo");
let rhs = Syntax::new_atom(&arena, pos_helper(1), "bar");
let lhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal);
let rhs = Syntax::new_atom(&arena, pos_helper(1), "bar", AtomKind::Normal);
init_info(&[lhs], &[rhs]);
mark_syntax(Some(lhs), Some(rhs));

@ -6,7 +6,7 @@ use std::{
};
use strsim::normalized_levenshtein;
use crate::syntax::{ChangeKind, Syntax};
use crate::syntax::{ChangeKind, AtomKind, Syntax};
use Edge::*;
/// A vertex in a directed acyclic graph that represents a diff.
@ -215,12 +215,12 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
if let (
Syntax::Atom {
content: lhs_content,
is_comment: true,
kind: AtomKind::Comment,
..
},
Syntax::Atom {
content: rhs_content,
is_comment: true,
kind: AtomKind::Comment,
..
},
) = (lhs_syntax, rhs_syntax)

@ -2,7 +2,10 @@
use typed_arena::Arena;
use crate::{positions::SingleLineSpan, syntax::Syntax};
use crate::{
positions::SingleLineSpan,
syntax::{AtomKind, Syntax},
};
/// Split `s` by lines, and treat each line as an atom.
///
@ -17,7 +20,7 @@ pub fn parse<'a>(arena: &'a Arena<Syntax<'a>>, s: &str) -> Vec<&'a Syntax<'a>> {
// TODO: this is very hot on large files, such as parser.c,
// because we spend ~65% of execution time computing
// levenshtein distance.
res.push(Syntax::new_comment(
res.push(Syntax::new_atom(
arena,
vec![SingleLineSpan {
line: i.into(),
@ -25,6 +28,7 @@ pub fn parse<'a>(arena: &'a Arena<Syntax<'a>>, s: &str) -> Vec<&'a Syntax<'a>> {
end_col: line.len(),
}],
line,
AtomKind::Comment, // TODO: don't dim plain lines like other comments
));
}
@ -115,13 +119,13 @@ mod tests {
Atom {
position: lhs_position,
content: lhs_content,
is_comment: lhs_is_comment,
kind: lhs_highlight,
..
},
Atom {
position: rhs_position,
content: rhs_content,
is_comment: rhs_is_comment,
kind: rhs_highlight,
..
},
) => {
@ -138,8 +142,8 @@ mod tests {
dbg!(lhs_content, rhs_content);
return false;
}
if lhs_is_comment != rhs_is_comment {
dbg!(lhs_is_comment, rhs_is_comment);
if lhs_highlight != rhs_highlight {
dbg!(lhs_highlight, rhs_highlight);
return false;
}
}
@ -157,7 +161,7 @@ mod tests {
assert_syntaxes(
&parse(&arena, "foo\nbar"),
&[
Syntax::new_comment(
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
@ -165,8 +169,9 @@ mod tests {
end_col: 3,
}],
"foo",
AtomKind::Comment,
),
Syntax::new_comment(
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 1.into(),
@ -174,6 +179,7 @@ mod tests {
end_col: 3,
}],
"bar",
AtomKind::Comment,
),
],
);

@ -476,7 +476,7 @@ impl MaxLine for String {
#[cfg(test)]
mod tests {
use super::*;
use crate::syntax::{HighlightKind, MatchKind};
use crate::syntax::{AtomKind, MatchKind, TokenKind};
use pretty_assertions::assert_eq;
#[test]
@ -511,7 +511,7 @@ mod tests {
fn test_visible_groups_ignores_unchanged() {
let lhs_positions = vec![MatchedPos {
kind: MatchKind::Unchanged {
highlight: HighlightKind::Normal,
highlight: TokenKind::Atom(AtomKind::Normal),
opposite_pos: (vec![], vec![]),
},
pos: vec![SingleLineSpan {
@ -523,7 +523,7 @@ mod tests {
}];
let rhs_positions = vec![MatchedPos {
kind: MatchKind::Unchanged {
highlight: HighlightKind::Normal,
highlight: TokenKind::Atom(AtomKind::Normal),
opposite_pos: (vec![], vec![]),
},
pos: vec![SingleLineSpan {

@ -3,7 +3,7 @@
use crate::{
lines::{codepoint_len, substring_by_codepoint, LineNumber},
positions::SingleLineSpan,
syntax::{HighlightKind, MatchKind, MatchedPos},
syntax::{AtomKind, MatchKind, MatchedPos, TokenKind},
};
use colored::*;
use std::{cmp::min, collections::HashMap};
@ -102,31 +102,31 @@ fn apply(s: &str, styles: &[(SingleLineSpan, Style)]) -> String {
pub fn apply_colors(s: &str, is_lhs: bool, positions: &[MatchedPos]) -> String {
let mut styles = vec![];
for pos in positions {
let style = match pos.kind {
MatchKind::Unchanged { highlight, .. } => Style {
foreground: Color::White,
background: None,
bold: false,
dimmed: highlight == HighlightKind::Comment,
},
MatchKind::Novel | MatchKind::ChangedCommentPart => Style {
foreground: if is_lhs {
Color::BrightRed
} else {
Color::BrightGreen
},
background: None,
bold: true,
dimmed: false,
},
MatchKind::UnchangedCommentPart { .. } => Style {
foreground: if is_lhs { Color::Red } else { Color::Green },
background: None,
bold: false,
dimmed: false,
},
};
for line_pos in &pos.pos {
let style = match pos.kind {
MatchKind::Unchanged { highlight, .. } => Style {
foreground: Color::White,
background: None,
bold: highlight == TokenKind::Atom(AtomKind::Keyword),
dimmed: highlight == TokenKind::Atom(AtomKind::Comment),
},
MatchKind::Novel | MatchKind::ChangedCommentPart => Style {
foreground: if is_lhs {
Color::BrightRed
} else {
Color::BrightGreen
},
background: None,
bold: true,
dimmed: false,
},
MatchKind::UnchangedCommentPart { .. } => Style {
foreground: if is_lhs { Color::Red } else { Color::Green },
background: None,
bold: false,
dimmed: false,
},
};
styles.push((*line_pos, style));
}
}

@ -81,7 +81,7 @@ pub enum Syntax<'a> {
info: SyntaxInfo<'a>,
position: Vec<SingleLineSpan>,
content: String,
is_comment: bool,
kind: AtomKind,
},
}
@ -137,6 +137,7 @@ impl<'a> fmt::Debug for Syntax<'a> {
content,
position,
info,
kind: highlight,
..
} => {
let mut ds = f.debug_struct(&format!(
@ -148,6 +149,7 @@ impl<'a> fmt::Debug for Syntax<'a> {
ds.field("position", &dbg_pos(position));
if env::var("DFT_VERBOSE").is_ok() {
ds.field("highlight", highlight);
ds.field("change", &info.change.get());
let next_s = match info.next.get() {
Some(List { .. }) => "Some(List)",
@ -197,30 +199,13 @@ impl<'a> Syntax<'a> {
arena: &'a Arena<Syntax<'a>>,
position: Vec<SingleLineSpan>,
content: &str,
kind: AtomKind,
) -> &'a Syntax<'a> {
Self::new_atom_(arena, position, content, false)
}
pub fn new_comment(
arena: &'a Arena<Syntax<'a>>,
position: Vec<SingleLineSpan>,
content: &str,
) -> &'a Syntax<'a> {
Self::new_atom_(arena, position, content, true)
}
#[allow(clippy::mut_from_ref)] // Clippy doesn't understand arenas.
fn new_atom_(
arena: &'a Arena<Syntax<'a>>,
position: Vec<SingleLineSpan>,
content: &str,
is_comment: bool,
) -> &'a mut Syntax<'a> {
arena.alloc(Atom {
info: SyntaxInfo::new(),
position,
content: content.into(),
is_comment,
kind,
})
}
@ -313,7 +298,13 @@ pub fn init_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>])
set_content_id(rhs_roots, &mut existing);
}
type ContentKey = (Option<String>, Option<String>, Vec<u32>, bool, bool);
type ContentKey = (
Option<String>,
Option<String>,
Vec<u32>,
bool,
Option<AtomKind>,
);
fn set_content_id<'a>(nodes: &[&'a Syntax<'a>], existing: &mut HashMap<ContentKey, u32>) {
for node in nodes {
@ -335,25 +326,26 @@ fn set_content_id<'a>(nodes: &[&'a Syntax<'a>], existing: &mut HashMap<ContentKe
Some(close_content.clone()),
children_content_ids,
true,
false,
None,
)
}
Atom {
content,
is_comment,
kind: highlight,
..
} => {
let clean_content = if *is_comment && content.lines().count() > 1 {
content
.lines()
.map(|l| l.trim_start())
.collect::<Vec<_>>()
.join("\n")
.to_string()
} else {
content.clone()
};
(Some(clean_content), None, vec![], false, *is_comment)
let clean_content =
if *highlight == AtomKind::Comment && content.lines().count() > 1 {
content
.lines()
.map(|l| l.trim_start())
.collect::<Vec<_>>()
.join("\n")
.to_string()
} else {
content.clone()
};
(Some(clean_content), None, vec![], false, Some(*highlight))
}
};
@ -476,16 +468,25 @@ impl<'a> Hash for Syntax<'a> {
}
}
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub enum HighlightKind {
#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
pub enum AtomKind {
Normal,
Comment,
Keyword,
}
/// Unlike atoms, tokens can be delimiters like `{`.
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub enum TokenKind {
Delimiter,
Atom(AtomKind),
}
#[derive(PartialEq, Eq, Debug, Clone)]
pub enum MatchKind {
Unchanged {
highlight: HighlightKind,
highlight: TokenKind,
// as this match could be for a list.
opposite_pos: (Vec<SingleLineSpan>, Vec<SingleLineSpan>),
},
Novel,
@ -583,7 +584,7 @@ fn split_comment_words(
impl MatchedPos {
fn new(
ck: ChangeKind,
is_comment: bool,
highlight: TokenKind,
pos: Vec<SingleLineSpan>,
prev_opposite_pos: Vec<SingleLineSpan>,
) -> Vec<Self> {
@ -619,11 +620,7 @@ impl MatchedPos {
};
MatchKind::Unchanged {
highlight: if is_comment {
HighlightKind::Comment
} else {
HighlightKind::Normal
},
highlight,
opposite_pos,
}
}
@ -698,7 +695,7 @@ fn change_positions_<'a>(
positions.extend(MatchedPos::new(
change,
false,
TokenKind::Delimiter,
open_position.clone(),
prev_opposite_pos.clone(),
));
@ -724,7 +721,7 @@ fn change_positions_<'a>(
}
positions.extend(MatchedPos::new(
change,
false,
TokenKind::Delimiter,
close_position.clone(),
prev_opposite_pos.clone(),
));
@ -732,7 +729,7 @@ fn change_positions_<'a>(
Atom {
info,
position,
is_comment,
kind,
..
} => {
let change = info
@ -755,7 +752,7 @@ fn change_positions_<'a>(
}
positions.extend(MatchedPos::new(
change,
*is_comment,
TokenKind::Atom(*kind),
position.clone(),
prev_opposite_pos.clone(),
));
@ -1076,6 +1073,7 @@ mod tests {
end_col: 3,
}],
"foo",
AtomKind::Normal,
);
atom.set_change(ChangeKind::Novel);
let nodes: Vec<&Syntax> = vec![atom];
@ -1101,8 +1099,8 @@ mod tests {
let arena = Arena::new();
let comment = Syntax::new_comment(&arena, pos.clone(), "foo");
let atom = Syntax::new_atom(&arena, pos, "foo");
let comment = Syntax::new_atom(&arena, pos.clone(), "foo", AtomKind::Comment);
let atom = Syntax::new_atom(&arena, pos, "foo", AtomKind::Normal);
init_info(&[comment], &[atom]);
assert_ne!(comment, atom);
@ -1118,8 +1116,8 @@ mod tests {
let arena = Arena::new();
let x = Syntax::new_comment(&arena, pos.clone(), "foo\nbar");
let y = Syntax::new_comment(&arena, pos, "foo\n bar");
let x = Syntax::new_atom(&arena, pos.clone(), "foo\nbar", AtomKind::Comment);
let y = Syntax::new_atom(&arena, pos, "foo\n bar", AtomKind::Comment);
init_info(&[x], &[y]);
assert_eq!(x, y);
@ -1139,7 +1137,7 @@ mod tests {
end_col: 3,
}],
content: "foo".into(),
is_comment: false,
kind: AtomKind::Normal,
};
let rhs = Atom {
info: SyntaxInfo {
@ -1152,7 +1150,7 @@ mod tests {
end_col: 3,
}],
content: "foo".into(),
is_comment: false,
kind: AtomKind::Normal,
};
init_info(&[&lhs], &[&rhs]);

@ -2,10 +2,13 @@
use std::{borrow::Borrow, collections::HashSet, ffi::OsStr};
use tree_sitter::{Language, Parser, TreeCursor};
use tree_sitter::{Language, Parser, Query, QueryCursor, TreeCursor};
use typed_arena::Arena;
use crate::{lines::NewlinePositions, syntax::Syntax};
use crate::{
lines::NewlinePositions,
syntax::{AtomKind, Syntax},
};
pub struct TreeSitterConfig {
pub name: &'static str,
@ -22,6 +25,7 @@ pub struct TreeSitterConfig {
// https://github.com/tree-sitter/tree-sitter/issues/1156
atom_nodes: HashSet<&'static str>,
delimiter_tokens: Vec<(&'static str, &'static str)>,
highlight_queries: &'static str,
}
extern "C" {
@ -56,6 +60,7 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
.into_iter()
.collect(),
delimiter_tokens: (vec![("(", ")"), ("{", "}"), ("[", "]")]),
highlight_queries: include_str!("../vendor/highlights/c.scm"),
}),
// Treat .h as C++ rather than C. This is an arbitrary choice,
// but C++ is more widely used than C according to
@ -72,6 +77,10 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
.into_iter()
.collect(),
delimiter_tokens: (vec![("(", ")"), ("{", "}"), ("[", "]")]),
highlight_queries: concat!(
include_str!("../vendor/highlights/c.scm"),
include_str!("../vendor/highlights/cpp.scm")
),
}),
"bb" | "boot" | "clj" | "cljc" | "clje" | "cljs" | "cljx" | "edn" | "joke" | "joker" => {
Some(TreeSitterConfig {
@ -81,6 +90,7 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
delimiter_tokens: (vec![("{", "}"), ("(", ")"), ("[", "]")])
.into_iter()
.collect(),
highlight_queries: "",
})
}
"cs" => Some(TreeSitterConfig {
@ -94,12 +104,14 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
.into_iter()
.collect(),
delimiter_tokens: (vec![("{", "}"), ("(", ")")]),
highlight_queries: include_str!("../vendor/highlights/c-sharp.scm"),
}),
"css" => Some(TreeSitterConfig {
name: "CSS",
language: unsafe { tree_sitter_css() },
atom_nodes: (vec!["integer_value"]).into_iter().collect(),
delimiter_tokens: (vec![("{", "}"), ("(", ")")]),
highlight_queries: include_str!("../vendor/highlights/css.scm"),
}),
"el" => Some(TreeSitterConfig {
name: "Emacs Lisp",
@ -108,6 +120,7 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
delimiter_tokens: (vec![("{", "}"), ("(", ")"), ("[", "]")])
.into_iter()
.collect(),
highlight_queries: include_str!("../vendor/highlights/elisp.scm"),
}),
"ex" | "exs" => Some(TreeSitterConfig {
name: "Elixir",
@ -116,6 +129,7 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
delimiter_tokens: (vec![("(", ")"), ("{", "}"), ("do", "end")])
.into_iter()
.collect(),
highlight_queries: include_str!("../vendor/highlights/elixir.scm"),
}),
"go" => Some(TreeSitterConfig {
name: "Go",
@ -126,18 +140,21 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
delimiter_tokens: (vec![("{", "}"), ("[", "]"), ("(", ")")])
.into_iter()
.collect(),
highlight_queries: include_str!("../vendor/highlights/go.scm"),
}),
"hs" => Some(TreeSitterConfig {
name: "Haskell",
language: unsafe { tree_sitter_haskell() },
atom_nodes: (vec![]).into_iter().collect(),
delimiter_tokens: (vec![("[", "]"), ("(", ")")]),
highlight_queries: include_str!("../vendor/highlights/haskell.scm"),
}),
"java" => Some(TreeSitterConfig {
name: "Java",
language: unsafe { tree_sitter_java() },
atom_nodes: (vec![]).into_iter().collect(),
delimiter_tokens: (vec![("(", ")"), ("{", "}")]),
highlight_queries: include_str!("../vendor/highlights/java.scm"),
}),
"cjs" | "js" | "jsx" | "mjs" => Some(TreeSitterConfig {
name: "JavaScript",
@ -152,12 +169,14 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
// > at the same level in JSX.
("<", ">"),
]),
highlight_queries: include_str!("../vendor/highlights/javascript.scm"),
}),
"json" => Some(TreeSitterConfig {
name: "JSON",
language: unsafe { tree_sitter_json() },
atom_nodes: (vec!["string"]).into_iter().collect(),
delimiter_tokens: (vec![("{", "}"), ("[", "]")]),
highlight_queries: include_str!("../vendor/highlights/json.scm"),
}),
"ml" => Some(TreeSitterConfig {
name: "OCaml",
@ -165,18 +184,21 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
atom_nodes: (vec!["character", "string"]).into_iter().collect(),
// TODO: begin/end and object/end.
delimiter_tokens: (vec![("(", ")"), ("[", "]"), ("{", "}")]),
highlight_queries: include_str!("../vendor/highlights/ocaml.scm"),
}),
"mli" => Some(TreeSitterConfig {
name: "OCaml Interface",
language: unsafe { tree_sitter_ocaml_interface() },
atom_nodes: (vec!["character", "string"]).into_iter().collect(),
delimiter_tokens: (vec![("(", ")"), ("[", "]"), ("{", "}")]),
highlight_queries: include_str!("../vendor/highlights/ocaml.scm"),
}),
"py" => Some(TreeSitterConfig {
name: "Python",
language: unsafe { tree_sitter_python() },
atom_nodes: (vec!["string"]).into_iter().collect(),
delimiter_tokens: (vec![("(", ")"), ("[", "]"), ("{", "}")]),
highlight_queries: include_str!("../vendor/highlights/python.scm"),
}),
"rs" => Some(TreeSitterConfig {
name: "Rust",
@ -185,31 +207,62 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
.into_iter()
.collect(),
delimiter_tokens: (vec![("{", "}"), ("(", ")"), ("[", "]"), ("|", "|")]),
highlight_queries: include_str!("../vendor/highlights/rust.scm"),
}),
"ts" => Some(TreeSitterConfig {
name: "TypeScript",
language: unsafe { tree_sitter_typescript() },
atom_nodes: (vec!["string", "template_string"]).into_iter().collect(),
delimiter_tokens: (vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")]),
highlight_queries: include_str!("../vendor/highlights/typescript.scm"),
}),
"tsx" => Some(TreeSitterConfig {
name: "TypeScript TSX",
language: unsafe { tree_sitter_tsx() },
atom_nodes: (vec!["string", "template_string"]).into_iter().collect(),
delimiter_tokens: (vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")]),
highlight_queries: include_str!("../vendor/highlights/typescript.scm"),
}),
_ => None,
}
}
/// Parse `src` with tree-sitter.
pub fn parse_to_tree(src: &str, config: &TreeSitterConfig) -> tree_sitter::Tree {
pub fn parse_to_tree(src: &str, config: &TreeSitterConfig) -> (tree_sitter::Tree, HashSet<usize>) {
let mut parser = Parser::new();
parser
.set_language(config.language)
.expect("Incompatible tree-sitter version");
parser.parse(src, None).unwrap()
let tree = parser.parse(src, None).unwrap();
let query = Query::new(config.language, config.highlight_queries).unwrap();
let mut node_keyword_ids = HashSet::new();
let mut keyword_ish_ids = vec![];
if let Some(idx) = query.capture_index_for_name("keyword") {
keyword_ish_ids.push(idx);
}
if let Some(idx) = query.capture_index_for_name("operator") {
keyword_ish_ids.push(idx);
}
if let Some(idx) = query.capture_index_for_name("constant") {
keyword_ish_ids.push(idx);
}
let mut qc = QueryCursor::new();
let q_matches = qc.matches(&query, tree.root_node(), src.as_bytes());
for m in q_matches {
for c in m.captures {
if keyword_ish_ids.contains(&c.index) {
node_keyword_ids.insert(c.node.id());
}
}
}
(tree, node_keyword_ids)
}
pub fn print_tree(tree: &tree_sitter::Tree) {
@ -239,7 +292,7 @@ pub fn parse<'a>(
src: &str,
config: &TreeSitterConfig,
) -> Vec<&'a Syntax<'a>> {
let tree = parse_to_tree(src, config);
let (tree, keyword_ids) = parse_to_tree(src, config);
let nl_pos = NewlinePositions::from(src);
let mut cursor = tree.walk();
@ -248,7 +301,7 @@ pub fn parse<'a>(
// each top level syntax item.
cursor.goto_first_child();
all_syntaxes_from_cursor(arena, src, &nl_pos, &mut cursor, config)
all_syntaxes_from_cursor(arena, src, &nl_pos, &mut cursor, config, &keyword_ids)
}
fn child_tokens<'a>(src: &'a str, cursor: &mut TreeCursor) -> Vec<Option<&'a str>> {
@ -309,11 +362,19 @@ fn all_syntaxes_from_cursor<'a>(
nl_pos: &NewlinePositions,
cursor: &mut TreeCursor,
config: &TreeSitterConfig,
keyword_ids: &HashSet<usize>,
) -> Vec<&'a Syntax<'a>> {
let mut result: Vec<&Syntax> = vec![];
loop {
result.push(syntax_from_cursor(arena, src, nl_pos, cursor, config));
result.push(syntax_from_cursor(
arena,
src,
nl_pos,
cursor,
config,
keyword_ids,
));
if !cursor.goto_next_sibling() {
break;
@ -331,17 +392,18 @@ fn syntax_from_cursor<'a>(
nl_pos: &NewlinePositions,
cursor: &mut TreeCursor,
config: &TreeSitterConfig,
keyword_ids: &HashSet<usize>,
) -> &'a Syntax<'a> {
let node = cursor.node();
if config.atom_nodes.contains(node.kind()) {
// Treat nodes like string literals as atoms, regardless
// of whether they have children.
atom_from_cursor(arena, src, nl_pos, cursor)
atom_from_cursor(arena, src, nl_pos, cursor, keyword_ids)
} else if node.child_count() > 0 {
list_from_cursor(arena, src, nl_pos, cursor, config)
list_from_cursor(arena, src, nl_pos, cursor, config, keyword_ids)
} else {
atom_from_cursor(arena, src, nl_pos, cursor)
atom_from_cursor(arena, src, nl_pos, cursor, keyword_ids)
}
}
@ -353,6 +415,7 @@ fn list_from_cursor<'a>(
nl_pos: &NewlinePositions,
cursor: &mut TreeCursor,
config: &TreeSitterConfig,
keyword_ids: &HashSet<usize>,
) -> &'a Syntax<'a> {
let root_node = cursor.node();
@ -395,17 +458,38 @@ fn list_from_cursor<'a>(
loop {
let node = cursor.node();
if node_i < i {
before_delim.push(syntax_from_cursor(arena, src, nl_pos, cursor, config));
before_delim.push(syntax_from_cursor(
arena,
src,
nl_pos,
cursor,
config,
keyword_ids,
));
} else if node_i == i {
inner_open_content = &src[node.start_byte()..node.end_byte()];
inner_open_position = nl_pos.from_offsets(node.start_byte(), node.end_byte());
} else if node_i < j {
between_delim.push(syntax_from_cursor(arena, src, nl_pos, cursor, config));
between_delim.push(syntax_from_cursor(
arena,
src,
nl_pos,
cursor,
config,
keyword_ids,
));
} else if node_i == j {
inner_close_content = &src[node.start_byte()..node.end_byte()];
inner_close_position = nl_pos.from_offsets(node.start_byte(), node.end_byte());
} else if node_i > j {
after_delim.push(syntax_from_cursor(arena, src, nl_pos, cursor, config));
after_delim.push(syntax_from_cursor(
arena,
src,
nl_pos,
cursor,
config,
keyword_ids,
));
}
if !cursor.goto_next_sibling() {
@ -455,16 +539,21 @@ fn atom_from_cursor<'a>(
src: &str,
nl_pos: &NewlinePositions,
cursor: &mut TreeCursor,
keyword_ids: &HashSet<usize>,
) -> &'a Syntax<'a> {
let node = cursor.node();
let position = nl_pos.from_offsets(node.start_byte(), node.end_byte());
let content = &src[node.start_byte()..node.end_byte()];
if node.is_extra() {
Syntax::new_comment(arena, position, content)
let highlight = if node.is_extra() {
AtomKind::Comment
} else if keyword_ids.contains(&node.id()) {
AtomKind::Keyword
} else {
Syntax::new_atom(arena, position, content)
}
AtomKind::Normal
};
Syntax::new_atom(arena, position, content, highlight)
}
#[cfg(test)]

@ -0,0 +1 @@
../tree-sitter-c-sharp/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-c/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-cpp/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-css/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-elisp/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-elixir/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-go/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-haskell/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-java/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-javascript/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-json/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-ocaml/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-python/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-rust/queries/highlights.scm

@ -0,0 +1 @@
../tree-sitter-typescript/queries/highlights.scm