Only split numbers inside comments

Inside text files, it seems to be better to be conservative and
consider abc123def as one word rather than three.

This is noticeable when looking at changes to the compare.expected
file, which contains hashes. 123c456 and 345c789 don't really have a
`c` in common, so subword highlighting is ugly.
text_sliders
Wilfred Hughes 2023-07-07 08:40:06 +07:00
parent c07e640b24
commit 87d27c5598
4 changed files with 65 additions and 10 deletions

@ -17,7 +17,7 @@ sample_files/change_outer_before.el sample_files/change_outer_after.el
4d202515307556b443806ea25aac0b84 -
sample_files/chinese_before.po sample_files/chinese_after.po
b2cace14b01c272217eec27d16adddbe -
46cc71b46688dd2abc51f9fd82864c90 -
sample_files/clojure_before.clj sample_files/clojure_after.clj
b8e17b8eb649ba0b8d29b57a23e4ac81 -
@ -62,7 +62,7 @@ sample_files/hack_before.php sample_files/hack_after.php
83d4a92c596b5d465ff024aa1b30be92 -
sample_files/hare_before.ha sample_files/hare_after.ha
ea834f886bd44133115c567a200f1996 -
2b3a9433cd692d9ffab872477312e3b8 -
sample_files/haskell_before.hs sample_files/haskell_after.hs
5a2c0c5d4a04f79e2f8f32299e6cd364 -
@ -83,7 +83,7 @@ sample_files/html_simple_before.html sample_files/html_simple_after.html
ce3bfa12bc21d0eb5528766e18387e86 -
sample_files/huge_cpp_before.cpp sample_files/huge_cpp_after.cpp
b0dd65cc5431f3cca9e93ca6f4a64676 -
c879bed2d8551579975617262245337c -
sample_files/identical_before.scala sample_files/identical_after.scala
9c7319f61833e46a0a8cb6c01cc997c9 -
@ -209,10 +209,10 @@ sample_files/tab_before.c sample_files/tab_after.c
b652d15f3a05b82a7d871cfeca2f453f -
sample_files/tailwind_before.css sample_files/tailwind_after.css
3e07691cbb537948db60bcc80813eaf9 -
cee5ee7415b1bd50bdc2dacd11e7303a -
sample_files/text_before.txt sample_files/text_after.txt
5fbdac2d1156ed8bb6b098e87f30d319 -
db9c0a184326ab8b3b1561035ad3545d -
sample_files/todomvc_before.gleam sample_files/todomvc_after.gleam
b142169ae6ac08ef64d0cf67a2e66f5b -

@ -3,3 +3,4 @@ novel
world
foo bar
31df1778815171897c907daf454c4419cfaa46f9

@ -2,3 +2,4 @@ hello
world
foo
c07e640b246c7885cbc3d5c627acbcb2d2ab9c95

@ -608,8 +608,47 @@ pub struct MatchedPos {
/// Split `s` into a vec of things that look like words and individual
/// non-word characters.
///
/// "foo..bar23" -> vec!["foo", ".", ".", "bar", "23"]
/// "foo..bar23" -> vec!["foo", ".", ".", "bar23"]
///
/// See also `split_words_and_numbers`. Both these functions are hot,
/// so they are separate implementations rather than passing a bool to
/// customise number handling.
pub fn split_words(s: &str) -> Vec<&str> {
let mut res = vec![];
let mut word_start: Option<usize> = None;
for (idx, c) in s.char_indices() {
match word_start {
Some(start) => {
if c.is_alphanumeric() || c == '-' || c == '_' {
// Just carry on in this word.
} else {
// Push the previous word, then this non-word character.
res.push(&s[start..idx]);
res.push(&s[idx..idx + c.len_utf8()]);
word_start = None;
}
}
None => {
if c.is_alphanumeric() || c == '-' || c == '_' {
word_start = Some(idx);
} else {
res.push(&s[idx..idx + c.len_utf8()]);
}
}
}
}
if let Some(start) = word_start {
res.push(&s[start..]);
}
res
}
/// Split `s` into a vec of things that look like words and individual
/// non-word characters.
///
/// "foo..bar23" -> vec!["foo", ".", ".", "bar23"]
pub fn split_words_and_numbers(s: &str) -> Vec<&str> {
let mut res = vec![];
let mut word_start: Option<(usize, char)> = None;
for (idx, c) in s.char_indices() {
@ -656,8 +695,8 @@ fn split_comment_words(
) -> Vec<MatchedPos> {
// TODO: merge adjacent single-line comments unless there are
// blank lines between them.
let content_parts = split_words(content);
let other_parts = split_words(opposite_content);
let content_parts = split_words_and_numbers(content);
let other_parts = split_words_and_numbers(opposite_content);
let content_newlines = NewlinePositions::from(content);
let opposite_content_newlines = NewlinePositions::from(opposite_content);
@ -1118,7 +1157,7 @@ mod tests {
}
#[test]
fn test_split_words_punctuations() {
fn test_split_words_punctuation() {
let s = "example..";
let res = split_words(s);
assert_eq!(res, vec!["example", ".", "."])
@ -1128,7 +1167,7 @@ mod tests {
fn test_split_words_numbers() {
let s = "foo123bar";
let res = split_words(s);
assert_eq!(res, vec!["foo", "123", "bar"])
assert_eq!(res, vec!["foo123bar"])
}
#[test]
@ -1158,4 +1197,18 @@ mod tests {
let res = split_words(s);
assert_eq!(res, vec!["a", " ", "xöy", " ", "b"])
}
#[test]
fn test_split_words_and_numbers() {
let s = "a123b";
let res = split_words_and_numbers(s);
assert_eq!(res, vec!["a", "123", "b"])
}
#[test]
fn test_split_words_and_numbers_spaces() {
let s = "foo bar";
let res = split_words_and_numbers(s);
assert_eq!(res, vec!["foo", " ", "bar"])
}
}