Don't consider - as a word character

This produced some unfortunate subword diffs when mixing words, numbers and
hyphens.

Fixes #918
pull/919/head
Wilfred Hughes 2025-11-14 16:36:26 +07:00
parent c2c562f579
commit 3943c1401a
4 changed files with 18 additions and 26 deletions

@ -41,7 +41,7 @@ sample_files/comma_and_comment_1.js sample_files/comma_and_comment_2.js
0a5ccbcd368607e62eaff0c4ae25049f - 0a5ccbcd368607e62eaff0c4ae25049f -
sample_files/comments_1.rs sample_files/comments_2.rs sample_files/comments_1.rs sample_files/comments_2.rs
cc3a5f9134765192e034c65fb9d02026 - a75827163654d6aed8f837bb586e733c -
sample_files/context_1.rs sample_files/context_2.rs sample_files/context_1.rs sample_files/context_2.rs
1a1633bcf672a582867c815381ae1609 - 1a1633bcf672a582867c815381ae1609 -
@ -65,7 +65,7 @@ sample_files/elisp_contiguous_1.el sample_files/elisp_contiguous_2.el
4a5a33873a4f84ee055d95e1448fba35 - 4a5a33873a4f84ee055d95e1448fba35 -
sample_files/elixir_1.ex sample_files/elixir_2.ex sample_files/elixir_1.ex sample_files/elixir_2.ex
6bcd4f912e6adfd9cf2f83c602d72415 - 85494310196ac5065b3b4ce1d4b350fd -
sample_files/elm_1.elm sample_files/elm_2.elm sample_files/elm_1.elm sample_files/elm_2.elm
ccc1f4bb568cd72781dbcd623b612c43 - ccc1f4bb568cd72781dbcd623b612c43 -
@ -86,7 +86,7 @@ sample_files/hare_1.ha sample_files/hare_2.ha
ef6fd59edc55241311a97d21dd81e4c0 - ef6fd59edc55241311a97d21dd81e4c0 -
sample_files/haskell_1.hs sample_files/haskell_2.hs sample_files/haskell_1.hs sample_files/haskell_2.hs
6791dd931d74391b3d9fb9e351a6de54 - 68fd7f9865c2b1defe05ffd509e08b93 -
sample_files/hcl_1.hcl sample_files/hcl_2.hcl sample_files/hcl_1.hcl sample_files/hcl_2.hcl
7c2aaa3a8b401bc007817f5dd608946d - 7c2aaa3a8b401bc007817f5dd608946d -
@ -98,13 +98,13 @@ sample_files/helpful_1.el sample_files/helpful_2.el
295640aa4cbc23640658a80ad2393ce4 - 295640aa4cbc23640658a80ad2393ce4 -
sample_files/html_1.html sample_files/html_2.html sample_files/html_1.html sample_files/html_2.html
64285a8ed6ddecab1e24bcf0ce649b62 - 3cc8b445a56b74f05e1d7bb84874edab -
sample_files/html_simple_1.html sample_files/html_simple_2.html sample_files/html_simple_1.html sample_files/html_simple_2.html
bb129dce38cd26eac81ca52d2016bade - bb129dce38cd26eac81ca52d2016bade -
sample_files/huge_cpp_1.cpp sample_files/huge_cpp_2.cpp sample_files/huge_cpp_1.cpp sample_files/huge_cpp_2.cpp
7f65e42e16ee318bbfc342b8bcc03d2e - 09e8a30ad7be5686e4d03a3e6b2588aa -
sample_files/identical_1.scala sample_files/identical_2.scala sample_files/identical_1.scala sample_files/identical_2.scala
15c5a789e644348cb7e0de051ff4b63e - 15c5a789e644348cb7e0de051ff4b63e -
@ -146,7 +146,7 @@ sample_files/lua_1.lua sample_files/lua_2.lua
81ad9478e64494320e96284cb7632ced - 81ad9478e64494320e96284cb7632ced -
sample_files/makefile_1.mk sample_files/makefile_2.mk sample_files/makefile_1.mk sample_files/makefile_2.mk
d0572210b5121ce68ac0ce45e43b922b - 4759883325ade33566f2c8afa09e2d82 -
sample_files/many_newlines_1.txt sample_files/many_newlines_2.txt sample_files/many_newlines_1.txt sample_files/many_newlines_2.txt
52ca05855e520876479e6f608c5e7831 - 52ca05855e520876479e6f608c5e7831 -
@ -164,7 +164,7 @@ sample_files/multiline_string_1.ml sample_files/multiline_string_2.ml
ed80815053ba156505d156277d0f4195 - ed80815053ba156505d156277d0f4195 -
sample_files/multiline_string_eof_1.yml sample_files/multiline_string_eof_2.yml sample_files/multiline_string_eof_1.yml sample_files/multiline_string_eof_2.yml
ba8a8e7ed2f4b519feaa391fd05c95fe - cd9cfd627c28b8ecd7c990adc683281a -
sample_files/nest_1.rs sample_files/nest_2.rs sample_files/nest_1.rs sample_files/nest_2.rs
d3a799fe2cd9d81aa251c96af5cd9711 - d3a799fe2cd9d81aa251c96af5cd9711 -
@ -260,7 +260,7 @@ sample_files/string_subwords_1.el sample_files/string_subwords_2.el
b66e960672189960c2d35ef68b47a195 - b66e960672189960c2d35ef68b47a195 -
sample_files/strings_1.el sample_files/strings_2.el sample_files/strings_1.el sample_files/strings_2.el
fe61803e3391fb14f5a3f05750bb94ff - 26ea57243abb16043088b17bfee482a4 -
sample_files/swift_1.swift sample_files/swift_2.swift sample_files/swift_1.swift sample_files/swift_2.swift
73830b14bd8aacac8d4590a3bed61c40 - 73830b14bd8aacac8d4590a3bed61c40 -

@ -0,0 +1,3 @@
{
"name": "foo-d123-pretty-long"
}

@ -0,0 +1,3 @@
{
"name": "d123-pretty-long"
}

@ -12,7 +12,7 @@ pub(crate) fn split_words(s: &str) -> Vec<&str> {
for (idx, c) in s.char_indices() { for (idx, c) in s.char_indices() {
match word_start { match word_start {
Some(start) => { Some(start) => {
if c.is_alphanumeric() || c == '-' || c == '_' { if c.is_alphanumeric() || c == '_' {
// Just carry on in this word. // Just carry on in this word.
} else { } else {
// Push the previous word, then this non-word character. // Push the previous word, then this non-word character.
@ -22,7 +22,7 @@ pub(crate) fn split_words(s: &str) -> Vec<&str> {
} }
} }
None => { None => {
if c.is_alphanumeric() || c == '-' || c == '_' { if c.is_alphanumeric() || c == '_' {
word_start = Some(idx); word_start = Some(idx);
} else { } else {
words.push(&s[idx..idx + c.len_utf8()]); words.push(&s[idx..idx + c.len_utf8()]);
@ -47,7 +47,7 @@ pub(crate) fn split_words_and_numbers(s: &str) -> Vec<&str> {
for (idx, c) in s.char_indices() { for (idx, c) in s.char_indices() {
match word_start { match word_start {
Some((start, start_c)) => { Some((start, start_c)) => {
if c.is_alphanumeric() || c == '-' || c == '_' { if c.is_alphanumeric() || c == '_' {
// Word character, add to the current word if it's // Word character, add to the current word if it's
// not a number. // not a number.
if c.is_ascii_digit() == start_c.is_ascii_digit() { if c.is_ascii_digit() == start_c.is_ascii_digit() {
@ -65,7 +65,7 @@ pub(crate) fn split_words_and_numbers(s: &str) -> Vec<&str> {
} }
} }
None => { None => {
if c.is_alphanumeric() || c == '-' || c == '_' { if c.is_alphanumeric() || c == '_' {
word_start = Some((idx, c)); word_start = Some((idx, c));
} else { } else {
words.push(&s[idx..idx + c.len_utf8()]); words.push(&s[idx..idx + c.len_utf8()]);
@ -93,13 +93,6 @@ mod tests {
assert_eq!(res, vec!["example", ".", "com"]) assert_eq!(res, vec!["example", ".", "com"])
} }
#[test]
fn test_split_words_hyphens() {
let s = "foo -bar-baz-";
let res = split_words(s);
assert_eq!(res, vec!["foo", " ", "-bar-baz-"])
}
#[test] #[test]
fn test_split_words_punctuation() { fn test_split_words_punctuation() {
let s = "example.."; let s = "example..";
@ -149,13 +142,6 @@ mod tests {
assert_eq!(res, vec!["a", "123", "b"]) assert_eq!(res, vec!["a", "123", "b"])
} }
#[test]
fn test_split_words_and_numbers_hyphens() {
let s = "a-b -c-";
let res = split_words_and_numbers(s);
assert_eq!(res, vec!["a-b", " ", "-c-"])
}
#[test] #[test]
fn test_split_words_and_numbers_spaces() { fn test_split_words_and_numbers_spaces() {
let s = "foo bar"; let s = "foo bar";