Treat tree-sitter nodes highlighted as comments as atoms

Fixes #711
pull/708/merge
Wilfred Hughes 2024-05-11 16:09:25 +07:00
parent e51834fa77
commit 28d8673371
5 changed files with 19 additions and 10 deletions

@ -1,5 +1,12 @@
## 0.59 (unreleased)
### Parsing
Difftastic now uses tree-sitter comment highlighing as a hint that
nodes should be treated as atoms. This ensures comments are treated
more consistently across languages. This fixes cases in Elm where
comment differences were ignored, and may improve other languages too.
## 0.58 (released 11th May 2024)
### Parsing

@ -56,7 +56,7 @@ sample_files/elisp_contiguous_1.el sample_files/elisp_contiguous_2.el
beaf7d6c5136d3db7a36ff49a131b251 -
sample_files/elm_1.elm sample_files/elm_2.elm
54d73b12de1571cfb9b27d3b2a7f4f62 -
33b71893107538cff574276f2837adbb -
sample_files/elvish_1.elv sample_files/elvish_2.elv
f80b47646e7dd2bd3a49393d00657465 -

@ -1,6 +1,8 @@
module Main exposing (blue, green, list, x, y, z)
{- bar
-}
list : List Int
list =
[ 1, 2, 3 ]

@ -6,6 +6,8 @@ module Main exposing
)
{- foo
-}
list : List Int
list =
[ 1

@ -507,7 +507,6 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
atom_nodes: vec![
"doctype",
"quoted_attribute_value",
"comment",
"raw_text",
"tag_name",
"text",
@ -956,10 +955,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
let language = unsafe { tree_sitter_scala() };
TreeSitterConfig {
language,
// TODO: probably all comments should be treated as atoms
atom_nodes: vec!["string", "template_string", "comment", "block_comment"]
.into_iter()
.collect(),
atom_nodes: vec!["string", "template_string"].into_iter().collect(),
delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]")],
highlight_query: ts::Query::new(
language,
@ -973,9 +969,8 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
let language = unsafe { tree_sitter_scheme() };
TreeSitterConfig {
language,
atom_nodes: vec!["block_comment", "comment", "string"]
.into_iter()
.collect(),
//
atom_nodes: vec!["string"].into_iter().collect(),
delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]")],
highlight_query: ts::Query::new(
language,
@ -1615,9 +1610,12 @@ fn syntax_from_cursor<'a>(
*error_count += 1;
}
if config.atom_nodes.contains(node.kind()) {
if config.atom_nodes.contains(node.kind()) || highlights.comment_ids.contains(&node.id()) {
// Treat nodes like string literals as atoms, regardless
// of whether they have children.
//
// Also, if this node is highlighted as a comment, treat it as
// an atom unconditionally.
atom_from_cursor(arena, src, nl_pos, cursor, highlights, ignore_comments)
} else if node.child_count() > 0 {
Some(list_from_cursor(