Ignore indentation changes on multiline comments

pull/25/head
Wilfred Hughes 2021-07-25 12:27:24 +07:00
parent a711d1e424
commit 04214ef1ef
7 changed files with 102 additions and 18 deletions

@ -11,6 +11,9 @@ Fixed an issue where comment replacements were not detected.
Changed words in comments are now only highlighted when comments are
relatively similar (according to their Levenshtein distance).
Multiline comments are now considered unchanged if only their
indentation changes.
Improved alignment for lines at the beginning of a changed group of
lines.

@ -1,8 +0,0 @@
;; Unchanged comment.
(f1)
;; Changing a comment word.
(f2)
;; Completely novel text.
(f3)

@ -0,0 +1,12 @@
// Unchanged comment.
f1();
// Changing a comment word.
f2();
// A single comment about something.
f3();
/* A multiline comment
* whose indentation changes.
*/

@ -1,8 +0,0 @@
;; Unchanged comment.
(f1)
;; Changing a single word.
(f2)
;; Changing virtually everything written in the comment.
(f3)

@ -0,0 +1,14 @@
// Unchanged comment.
f1();
// Changing a single word.
f2();
// A completely different sentence.
f3();
if true {
/* A multiline comment
* whose indentation changes.
*/
}

@ -265,6 +265,8 @@ impl ParseState {
#[cfg(test)]
mod tests {
use std::cell::Cell;
use super::*;
use crate::syntax::Syntax::*;
@ -686,6 +688,40 @@ mod tests {
);
}
#[test]
fn test_parse_indented_multiline_comment() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, " /* foo\n bar */", &lang()),
// Deliberately construct an Atom directly, because
// Syntax::new_comment has the multiline logic.
&[&Atom {
info: crate::syntax::SyntaxInfo {
unique_id: Cell::new(0),
pos_content_hash: 0,
next: Cell::new(None),
change: Cell::new(None),
num_ancestors: Cell::new(0),
},
content: "/* foo\nbar */".into(),
is_comment: true,
position: vec![
SingleLineSpan {
line: 0.into(),
start_col: 2,
end_col: 8,
},
SingleLineSpan {
line: 1.into(),
start_col: 2,
end_col: 8,
},
],
}],
);
}
#[test]
fn test_parse_list() {
let arena = Arena::new();

@ -3,7 +3,7 @@
use itertools::{EitherOrBoth, Itertools};
use regex::Regex;
use std::cell::Cell;
use std::cmp::min;
use std::cmp::{max, min};
use std::collections::hash_map::DefaultHasher;
use std::collections::HashMap;
use std::fmt;
@ -130,6 +130,21 @@ impl<'a> fmt::Debug for Syntax<'a> {
}
}
fn trim_left(max_trim: usize, content: &str, pos: SingleLineSpan) -> (String, SingleLineSpan) {
let chars: Vec<_> = content.chars().collect();
match chars.iter().position(|c| *c != ' ' && *c != '\t') {
Some(first_non_whitespace) => {
let skip_num = max(max_trim, first_non_whitespace);
let mut new_pos = pos;
new_pos.start_col += skip_num;
(chars.iter().skip(skip_num).collect(), new_pos)
}
None => (content.to_string(), pos),
}
}
impl<'a> Syntax<'a> {
#[allow(clippy::mut_from_ref)] // Clippy doesn't understand arenas.
pub fn new_list(
@ -192,7 +207,27 @@ impl<'a> Syntax<'a> {
position: Vec<SingleLineSpan>,
content: &str,
) -> &'a mut Syntax<'a> {
Self::new_atom_(arena, position, content, true)
// Ignore leading whitespace in multiline comments, so changes
// in comment indentation are ignored.
let first_line_indent = match position.first() {
Some(line_pos) => line_pos.start_col,
None => 0,
};
let mut new_lines: Vec<String> = vec![];
let mut new_position = vec![];
for (i, (line, span)) in content.lines().zip(position).enumerate() {
if i == 0 {
new_lines.push(line.to_string());
new_position.push(span);
} else {
let (new_line, new_span) = trim_left(first_line_indent, line, span);
new_lines.push(new_line);
new_position.push(new_span);
}
}
Self::new_atom_(arena, new_position, &new_lines.join("\n"), true)
}
#[allow(clippy::mut_from_ref)] // Clippy doesn't understand arenas.