Also consider highlights.scm when marking nodes as comments

This removes the need to special-case Perl, and is necessary for
CMake (which has nodes bracket_comment and line_comment that aren't
marked as 'extra').
pull/341/head
Wilfred Hughes 2022-08-20 18:01:51 +07:00
parent fb38e7a24f
commit 58c8f47298
3 changed files with 19 additions and 3 deletions

@ -2,6 +2,9 @@
### Parsing
Improved comment detection using tree-sitter syntax highlighting
queries.
Fixed an issue with language detection when file names were very
short.

@ -29,7 +29,7 @@ sample_files/dart_before.dart sample_files/dart_after.dart
a13ab6e933b145d3f948b640caf38968 -
sample_files/elisp_before.el sample_files/elisp_after.el
f4233ebbe6c46a7c07bc88eca20e4856 -
b98257eb3492eaf323ffb0f3961aaa41 -
sample_files/elisp_contiguous_before.el sample_files/elisp_contiguous_after.el
e3946aef566a707c718edd7a86340566 -

@ -806,6 +806,7 @@ fn tree_highlights(
let mut keyword_ish_capture_ids: Vec<u32> = vec![];
let mut string_capture_ids = vec![];
let mut type_capture_ids = vec![];
let mut comment_capture_ids = vec![];
// Query names are often written with namespacing, so
// highlights.scm might contain @constant or the more specific
@ -849,17 +850,24 @@ fn tree_highlights(
if name == "label" {
type_capture_ids.push(idx as u32);
}
if name == "comment" {
comment_capture_ids.push(idx as u32);
}
}
let mut qc = ts::QueryCursor::new();
let q_matches = qc.matches(&config.highlight_query, tree.root_node(), src.as_bytes());
let mut comment_ids = HashSet::new();
let mut keyword_ids = HashSet::new();
let mut string_ids = HashSet::new();
let mut type_ids = HashSet::new();
for m in q_matches {
for c in m.captures {
if keyword_ish_capture_ids.contains(&c.index) {
if comment_capture_ids.contains(&c.index) {
comment_ids.insert(c.node.id());
} else if keyword_ish_capture_ids.contains(&c.index) {
keyword_ids.insert(c.node.id());
} else if string_capture_ids.contains(&c.index) {
string_ids.insert(c.node.id());
@ -870,6 +878,7 @@ fn tree_highlights(
}
HighlightedNodeIds {
comment_ids,
keyword_ids,
string_ids,
type_ids,
@ -987,6 +996,7 @@ fn find_delim_positions(
pub struct HighlightedNodeIds {
keyword_ids: HashSet<usize>,
comment_ids: HashSet<usize>,
string_ids: HashSet<usize>,
type_ids: HashSet<usize>,
}
@ -1195,7 +1205,10 @@ fn atom_from_cursor<'a>(
}
// Most languages use "comment", but Perl uses "comments".
let highlight = if node.is_extra() || node.kind() == "comment" || node.kind() == "comments" {
let highlight = if node.is_extra()
|| node.kind() == "comment"
|| highlights.comment_ids.contains(&node.id())
{
AtomKind::Comment
} else if highlights.keyword_ids.contains(&node.id()) {
AtomKind::Keyword