WIP don't use StringIgnoringNewline due to #755

2024-09-29 21:46:01 +07:00 · 2024-09-29 21:46:01 +07:00 · f1c69d3b92
parent 393845ddcb
commit f1c69d3b92
1 changed files with 24 additions and 82 deletions
--- a/src/line_parser.rs
+++ b/src/line_parser.rs
@ -1,10 +1,8 @@
 //! A fallback "parser" for plain text.

-use lazy_static::lazy_static;
 use line_numbers::{LinePositions, SingleLineSpan};
-use regex::Regex;
-use std::hash::Hash;

+use crate::lines::split_on_newlines;
 use crate::words::split_words;
 use crate::{
    diff::myers_diff,
@ -13,25 +11,6 @@ use crate::{

 const MAX_WORDS_IN_LINE: usize = 1000;

-fn split_lines_keep_newline(s: &str) -> Vec<&str> {
-    lazy_static! {
-        static ref NEWLINE_RE: Regex = Regex::new("\n").unwrap();
-    }
-
-    let mut offset = 0;
-    let mut lines = vec![];
-    for newline_match in NEWLINE_RE.find_iter(s) {
-        lines.push(s[offset..newline_match.end()].into());
-        offset = newline_match.end();
-    }
-
-    if offset < s.len() {
-        lines.push(s[offset..].into());
-    }
-
-    lines
-}
-
 #[derive(Debug)]
 enum TextChangeKind {
    Novel,
@ -74,66 +53,24 @@ fn merge_novel<'a>(
    res
 }

-#[derive(Debug, Clone)]
-struct StringIgnoringNewline<'a>(&'a str);
-
-impl PartialEq for StringIgnoringNewline<'_> {
-    fn eq(&self, other: &Self) -> bool {
-        let mut s = self.0;
-        if s.ends_with('\n') {
-            s = &s[..s.len() - 1];
-        }
-
-        let mut other_s = other.0;
-        if other_s.ends_with('\n') {
-            other_s = &other_s[..other_s.len() - 1];
-        }
-
-        s == other_s
-    }
-}
-
-impl Eq for StringIgnoringNewline<'_> {}
-
-impl Hash for StringIgnoringNewline<'_> {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        let mut s = self.0;
-        if s.ends_with('\n') {
-            s = &s[..s.len() - 1];
-        }
-
-        s.hash(state);
-    }
-}
-
 fn changed_parts<'a>(
    src: &'a str,
    opposite_src: &'a str,
 ) -> Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> {
-    let src_lines = split_lines_keep_newline(src)
-        .into_iter()
-        .map(StringIgnoringNewline)
-        .collect::<Vec<_>>();
-    let opposite_src_lines = split_lines_keep_newline(opposite_src)
-        .into_iter()
-        .map(StringIgnoringNewline)
-        .collect::<Vec<_>>();
+    let src_lines = split_on_newlines(src).collect::<Vec<_>>();
+    let opposite_src_lines = split_on_newlines(opposite_src).collect::<Vec<_>>();

    let mut res: Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> = vec![];
    for diff_res in myers_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) {
        match diff_res {
            myers_diff::DiffResult::Left(line) => {
-                res.push((TextChangeKind::Novel, vec![line.0], vec![]));
+                res.push((TextChangeKind::Novel, vec![line], vec![]));
            }
            myers_diff::DiffResult::Both(line, opposite_line) => {
-                res.push((
-                    TextChangeKind::Unchanged,
-                    vec![line.0],
-                    vec![opposite_line.0],
-                ));
+                res.push((TextChangeKind::Unchanged, vec![line], vec![opposite_line]));
            }
            myers_diff::DiffResult::Right(opposite_line) => {
-                res.push((TextChangeKind::Novel, vec![], vec![opposite_line.0]));
+                res.push((TextChangeKind::Novel, vec![], vec![opposite_line]));
            }
        }
    }
@ -168,6 +105,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
            TextChangeKind::Unchanged => {
                seen_unchanged = true;
                for (lhs_line, rhs_line) in lhs_lines.iter().zip(rhs_lines) {
+                    // offset crashing from from_region here
                    let lhs_pos =
                        lhs_lp.from_region(lhs_offset, lhs_offset + line_len_in_bytes(lhs_line));
                    let rhs_pos =
@ -182,13 +120,13 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
                        pos: lhs_pos[0],
                    });

-                    lhs_offset += lhs_line.len();
-                    rhs_offset += rhs_line.len();
+                    lhs_offset += lhs_line.len() + "\n".len();
+                    rhs_offset += rhs_line.len() + "\n".len();
                }
            }
            TextChangeKind::Novel => {
-                let lhs_part = lhs_lines.join("");
-                let rhs_part = rhs_lines.join("");
+                let lhs_part = lhs_lines.join("\n");
+                let rhs_part = rhs_lines.join("\n");

                let lhs_words = split_words(&lhs_part);
                let rhs_words = split_words(&rhs_part);
@ -209,8 +147,12 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
                        }
                    }

-                    lhs_offset += lhs_part.len();
-                    rhs_offset += rhs_part.len();
+                    // if !lhs_lines.is_empty() {
+                    //     lhs_offset += "\n".len();
+                    // }
+                    // if !rhs_lines.is_empty() {
+                    //     rhs_offset += "\n".len();
+                    // }
                    continue;
                }

@ -254,6 +196,13 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
                        }
                    }
                }
+
+                if !lhs_lines.is_empty() {
+                    lhs_offset += "\n".len();
+                }
+                if !rhs_lines.is_empty() {
+                    rhs_offset += "\n".len();
+                }
            }
        }
    }
@ -295,13 +244,6 @@ mod tests {

    use super::*;

-    #[test]
-    fn test_split_newlines() {
-        let s = "foo\nbar\nbaz";
-        let res = split_lines_keep_newline(s);
-        assert_eq!(res, vec!["foo\n", "bar\n", "baz"])
-    }
-
    #[test]
    fn test_positions_no_changes() {
        let positions = change_positions("foo", "foo");