Make word diffs more granular

Fixes #39
pull/48/head
Wilfred Hughes 2021-10-03 16:43:08 +07:00
parent e9b5fe13ab
commit febfbcea3e
2 changed files with 22 additions and 8 deletions

@ -4,6 +4,9 @@
Improved handling of paired delimiters, particularly in C, C++ and C#.
Improved word splitting in when diffing similar comments (it's now
more granular).
Fixed a rare issue where single-item lists were flattened.
### Integration

@ -3,6 +3,8 @@
#![allow(clippy::mutable_key_type)] // Hash for Syntax doesn't use mutable fields.
use itertools::{EitherOrBoth, Itertools};
use lazy_static::lazy_static;
use regex::Regex;
use std::{
cell::Cell,
cmp::min,
@ -517,6 +519,14 @@ pub struct MatchedPos {
pub prev_opposite_pos: Vec<SingleLineSpan>,
}
fn split_words(s: &str) -> Vec<String> {
lazy_static! {
static ref RE: Regex = Regex::new(r"[a-zA-Z0-9]+|[^a-zA-Z0-9]+").unwrap();
}
RE.find_iter(s).map(|m| m.as_str().to_owned()).collect()
}
fn split_comment_words(
content: &str,
pos: &[SingleLineSpan],
@ -524,16 +534,10 @@ fn split_comment_words(
opposite_pos: &[SingleLineSpan],
prev_opposite_pos: &[SingleLineSpan],
) -> Vec<MatchedPos> {
// TODO: also split on whitespace, so "// (foo)" splits before "(".
// TODO: merge adjacent single-line comments unless there are
// blank lines between them.
let content_parts: Vec<_> = content
.split_inclusive(&[' ', '\n', '\t'] as &[char])
.collect();
let other_parts: Vec<_> = opposite_content
.split_inclusive(&[' ', '\n', '\t'] as &[char])
.collect();
let content_parts = split_words(content);
let other_parts = split_words(opposite_content);
let content_newlines = NewlinePositions::from(content);
let opposite_content_newlines = NewlinePositions::from(opposite_content);
@ -1193,4 +1197,11 @@ mod tests {
},]
);
}
#[test]
fn test_split_words() {
let s = "example.com";
let res = split_words(s);
assert_eq!(res, vec!["example", ".", "com"])
}
}