diff --git a/Cargo.lock b/Cargo.lock index a4d0d7cec..f381f2979 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -252,6 +252,7 @@ dependencies = [ "lazy_static", "libc", "libmimalloc-sys", + "line-numbers", "log", "mimalloc", "owo-colors", @@ -447,6 +448,12 @@ dependencies = [ "cc", ] +[[package]] +name = "line-numbers" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793a75315eb63b8699158825bdea85d63eeb850e7543cb834abef3c7b5b53780" + [[package]] name = "lock_api" version = "0.4.9" diff --git a/Cargo.toml b/Cargo.toml index c55ad39e8..3123a9e28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,7 @@ hashbrown = "0.12.3" humansize = "2.1.3" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +line-numbers = "0.2.2" [dev-dependencies] # assert_cmd 2.0.6 requires rust 1.60 diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 6e311da60..37c9c4fb6 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -249,15 +249,16 @@ pub fn mark_syntax<'a>( #[cfg(test)] mod tests { use super::*; + use crate::{ diff::changes::ChangeKind, diff::graph::Edge::*, options::DEFAULT_GRAPH_LIMIT, - positions::SingleLineSpan, syntax::{init_all_info, AtomKind}, }; use itertools::Itertools; + use line_numbers::SingleLineSpan; use typed_arena::Arena; fn pos_helper(line: u32) -> Vec { diff --git a/src/diff/sliders.rs b/src/diff/sliders.rs index f581cc840..1c50e046e 100644 --- a/src/diff/sliders.rs +++ b/src/diff/sliders.rs @@ -29,11 +29,12 @@ //! can change which item is marked as novel (e.g. either `B` in the //! example above) whilst still showing a valid, minimal diff. +use line_numbers::SingleLineSpan; + use crate::{ diff::changes::{insert_deep_novel, insert_deep_unchanged, ChangeKind::*, ChangeMap}, parse::guess_language, parse::syntax::Syntax, - positions::SingleLineSpan, }; use Syntax::*; diff --git a/src/display/context.rs b/src/display/context.rs index 2aab55a0c..dbf31caaf 100644 --- a/src/display/context.rs +++ b/src/display/context.rs @@ -3,11 +3,11 @@ use std::cmp::Ordering; use std::collections::HashSet; +use line_numbers::LineNumber; use rustc_hash::FxHashSet; use crate::{ hash::DftHashMap, - lines::LineNumber, parse::syntax::{zip_repeat_shorter, MatchKind, MatchedPos}, }; @@ -729,9 +729,10 @@ fn compact_gaps( mod tests { use std::iter::FromIterator; - use crate::{positions::SingleLineSpan, syntax::TokenKind}; + use crate::syntax::TokenKind; use super::*; + use line_numbers::SingleLineSpan; use pretty_assertions::assert_eq; #[test] diff --git a/src/display/hunks.rs b/src/display/hunks.rs index 2d0ff9e79..b14ef9d53 100644 --- a/src/display/hunks.rs +++ b/src/display/hunks.rs @@ -6,13 +6,13 @@ const MAX_DISTANCE: u32 = 4; use std::collections::HashSet; +use line_numbers::LineNumber; use crate::{ constants::Side, display::context::{add_context, opposite_positions}, display::side_by_side::lines_with_novel, hash::DftHashMap, - lines::LineNumber, parse::syntax::{zip_pad_shorter, MatchKind, MatchedPos}, }; @@ -679,11 +679,11 @@ pub fn matched_lines_indexes_for_hunk( #[cfg(test)] mod tests { use std::iter::FromIterator; + use line_numbers::SingleLineSpan; use super::*; use crate::{ hash::DftHashMap, - positions::SingleLineSpan, syntax::{MatchKind, TokenKind}, }; use pretty_assertions::assert_eq; diff --git a/src/display/json.rs b/src/display/json.rs index 3e68873e3..48033554b 100644 --- a/src/display/json.rs +++ b/src/display/json.rs @@ -4,10 +4,11 @@ use crate::{ hunks::{matched_lines_indexes_for_hunk, matched_pos_to_hunks, merge_adjacent}, side_by_side::lines_with_novel, }, - lines::{LineNumber, MaxLine}, + lines::MaxLine, parse::syntax::{self, MatchedPos}, summary::{DiffResult, FileContent, FileFormat}, }; +use line_numbers::LineNumber; use serde::{ser::SerializeStruct, Serialize, Serializer}; use std::collections::HashMap; diff --git a/src/display/side_by_side.rs b/src/display/side_by_side.rs index 30372a621..6f789a3ac 100644 --- a/src/display/side_by_side.rs +++ b/src/display/side_by_side.rs @@ -1,5 +1,7 @@ //! Side-by-side (two column) display of diffs. +use line_numbers::LineNumber; +use line_numbers::SingleLineSpan; use owo_colors::{OwoColorize, Style}; use std::{ @@ -16,10 +18,9 @@ use crate::{ BackgroundColor, }, hash::DftHashMap, - lines::{codepoint_len, format_line_num, LineNumber}, + lines::{codepoint_len, format_line_num}, options::{DisplayMode, DisplayOptions}, parse::syntax::{zip_pad_shorter, MatchedPos}, - positions::SingleLineSpan, summary::FileFormat, }; @@ -28,7 +29,7 @@ const SPACER: &str = " "; fn format_line_num_padded(line_num: LineNumber, column_width: usize) -> String { format!( "{:width$} ", - line_num.one_indexed(), + line_num.as_usize() + 1, width = column_width - 1 ) } @@ -54,7 +55,7 @@ fn format_missing_line_num( style = style.dimmed(); } - let num_digits = format!("{}", prev_num.one_indexed()).len(); + let num_digits = prev_num.display().len(); format!( "{:>width$} ", (if after_end { " " } else { "." }).repeat(num_digits), diff --git a/src/display/style.rs b/src/display/style.rs index 47c9ab843..d0e1a0afd 100644 --- a/src/display/style.rs +++ b/src/display/style.rs @@ -3,12 +3,14 @@ use crate::{ constants::Side, hash::DftHashMap, - lines::{byte_len, LineNumber}, + lines::byte_len, options::DisplayOptions, parse::syntax::{AtomKind, MatchKind, MatchedPos, TokenKind}, - positions::SingleLineSpan, summary::FileFormat, }; + +use line_numbers::LineNumber; +use line_numbers::SingleLineSpan; use owo_colors::{OwoColorize, Style}; use std::cmp::{max, min}; use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; diff --git a/src/line_parser.rs b/src/line_parser.rs index f54787b33..31f803fd9 100644 --- a/src/line_parser.rs +++ b/src/line_parser.rs @@ -1,11 +1,11 @@ //! A fallback "parser" for plain text. use lazy_static::lazy_static; +use line_numbers::LinePositions as NewlinePositions; use regex::Regex; use crate::{ diff::myers_diff, - lines::NewlinePositions, parse::syntax::{split_words, AtomKind, MatchKind, MatchedPos, TokenKind}, }; @@ -194,9 +194,8 @@ pub fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec { #[cfg(test)] mod tests { - use crate::positions::SingleLineSpan; - use super::*; + use line_numbers::SingleLineSpan; use pretty_assertions::assert_eq; #[test] diff --git a/src/lines.rs b/src/lines.rs index 1342b310d..858d42623 100644 --- a/src/lines.rs +++ b/src/lines.rs @@ -1,50 +1,10 @@ //! Manipulate lines of text and groups of lines. -// The `from_offset*` methods on NewlinePositions are sensible names, -// and the docs clippy cites: -// https://rust-lang.github.io/api-guidelines/naming.html#ad-hoc-conversions-follow-as_-to_-into_-conventions-c-conv -// don't actually have an opinion on `from_foo` names. -#![allow(clippy::wrong_self_convention)] - -use crate::positions::SingleLineSpan; +use line_numbers::LineNumber; use std::ops::Sub; -use std::{cmp::Ordering, fmt}; - -/// A distinct number type for line numbers, to prevent confusion with -/// other numerical data. -/// -/// Zero-indexed internally. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct LineNumber(pub u32); - -impl LineNumber { - pub fn one_indexed(self) -> u32 { - self.0 + 1 - } - - pub fn as_usize(self) -> usize { - self.0 as usize - } -} - -impl fmt::Debug for LineNumber { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_fmt(format_args!( - "LineNumber: {} (zero-indexed: {})", - self.one_indexed(), - self.0 - )) - } -} - -impl From for LineNumber { - fn from(number: u32) -> Self { - Self(number) - } -} pub fn format_line_num(line_num: LineNumber) -> String { - format!("{} ", line_num.one_indexed()) + format!("{} ", line_num.display()) } /// A position in a single line of a string. @@ -55,104 +15,6 @@ struct LinePosition { column: usize, } -/// A struct for efficiently converting absolute string positions to -/// line-relative positions. -#[derive(Debug)] -pub struct NewlinePositions { - /// A vector of the start and end positions of all the lines in - /// `s`. Positions include the newline character itself. - positions: Vec<(usize, usize)>, -} - -impl From<&str> for NewlinePositions { - fn from(s: &str) -> Self { - let mut line_start = 0; - let mut positions = vec![]; - for line in s.split('\n') { - let line_end = line_start + line.len() + "\n".len(); - // TODO: this assumes lines terminate with \n, not \r\n. - positions.push((line_start, line_end - 1)); - line_start = line_end; - } - - NewlinePositions { positions } - } -} - -impl NewlinePositions { - fn from_offset(&self, offset: usize) -> usize { - let idx = self.positions.binary_search_by(|(line_start, line_end)| { - if *line_end < offset { - return Ordering::Less; - } - if *line_start > offset { - return Ordering::Greater; - } - - Ordering::Equal - }); - - idx.expect("line should be present") - } - - /// Convert to single-line spans. If the original span crosses a - /// newline, the vec will contain multiple items. - pub fn from_offsets(&self, region_start: usize, region_end: usize) -> Vec { - assert!(region_start <= region_end); - - let first_idx = self.from_offset(region_start); - let last_idx = self.from_offset(region_end); - - let mut res = vec![]; - for idx in first_idx..=last_idx { - let (line_start, line_end) = self.positions[idx]; - res.push(SingleLineSpan { - line: (idx as u32).into(), - start_col: if line_start > region_start { - 0 - } else { - region_start - line_start - } as u32, - end_col: if region_end < line_end { - region_end - line_start - } else { - line_end - line_start - } as u32, - }); - } - - res - } - - pub fn from_offsets_relative_to( - &self, - start: SingleLineSpan, - region_start: usize, - region_end: usize, - ) -> Vec { - assert!(region_start <= region_end); - - let mut res = vec![]; - for pos in self.from_offsets(region_start, region_end) { - if pos.line.0 == 0 { - res.push(SingleLineSpan { - line: start.line, - start_col: start.start_col + pos.start_col, - end_col: start.start_col + pos.end_col, - }); - } else { - res.push(SingleLineSpan { - line: (start.line.0 + pos.line.0).into(), - start_col: pos.start_col, - end_col: pos.end_col, - }); - } - } - - res - } -} - /// Return the length of `s` in codepoints. This is important when /// finding character boundaries for slicing without errors. pub fn codepoint_len(s: &str) -> usize { @@ -190,58 +52,10 @@ pub fn is_all_whitespace(s: &str) -> bool { #[cfg(test)] mod tests { use super::*; + use line_numbers::LinePositions as NewlinePositions; + use line_numbers::SingleLineSpan; use pretty_assertions::assert_eq; - #[test] - fn from_offsets_first_line() { - let newline_positions: NewlinePositions = "foo".into(); - let line_spans = newline_positions.from_offsets(1, 3); - assert_eq!( - line_spans, - vec![SingleLineSpan { - line: 0.into(), - start_col: 1, - end_col: 3 - }] - ); - } - - #[test] - fn from_offsets_first_char() { - let newline_positions: NewlinePositions = "foo".into(); - let line_spans = newline_positions.from_offsets(0, 0); - assert_eq!( - line_spans, - vec![SingleLineSpan { - line: 0.into(), - start_col: 0, - end_col: 0 - }] - ); - } - - #[test] - fn from_offsets_split_over_multiple_lines() { - let newline_positions: NewlinePositions = "foo\nbar\nbaz\naaaaaaaaaaa".into(); - let line_spans = newline_positions.from_offsets(5, 10); - - assert_eq!( - line_spans, - vec![ - SingleLineSpan { - line: 1.into(), - start_col: 1, - end_col: 3 - }, - SingleLineSpan { - line: 2.into(), - start_col: 0, - end_col: 2 - } - ] - ); - } - #[test] fn str_max_line() { let line: String = "foo\nbar".into(); diff --git a/src/main.rs b/src/main.rs index b8e9e0233..4bd575b65 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,7 +34,6 @@ mod line_parser; mod lines; mod options; mod parse; -mod positions; mod summary; mod version; diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index 79d9bd171..473c8c556 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -2,6 +2,8 @@ #![allow(clippy::mutable_key_type)] // Hash for Syntax doesn't use mutable fields. +use line_numbers::LinePositions as NewlinePositions; +use line_numbers::SingleLineSpan; use std::{cell::Cell, env, fmt, hash::Hash, num::NonZeroU32}; use typed_arena::Arena; @@ -10,8 +12,7 @@ use crate::{ diff::changes::{ChangeKind::*, ChangeMap}, diff::myers_diff, hash::DftHashMap, - lines::{is_all_whitespace, NewlinePositions}, - positions::SingleLineSpan, + lines::is_all_whitespace, }; use Syntax::*; @@ -312,7 +313,7 @@ impl<'a> Syntax<'a> { } => { let line = open_position .first() - .map(|p| format!("{}", p.line.one_indexed())) + .map(|p| p.line.display()) .unwrap_or_else(|| "?".to_owned()); format!("line:{} {} ... {}", line, open_content, close_content) @@ -322,7 +323,7 @@ impl<'a> Syntax<'a> { } => { let line = position .first() - .map_or_else(|| "?".to_owned(), |p| p.line.one_indexed().to_string()); + .map_or_else(|| "?".to_owned(), |p| p.line.display()); format!("line:{} {}", line, content) } diff --git a/src/parse/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs index c9febc429..6e5820f8c 100644 --- a/src/parse/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -7,9 +7,9 @@ use crate::options::DiffOptions; use crate::parse::guess_language as guess; use tree_sitter as ts; use typed_arena::Arena; +use line_numbers::LinePositions as NewlinePositions; use crate::{ - lines::NewlinePositions, parse::syntax::{AtomKind, Syntax}, }; diff --git a/src/positions.rs b/src/positions.rs deleted file mode 100644 index e58c4252c..000000000 --- a/src/positions.rs +++ /dev/null @@ -1,12 +0,0 @@ -//! Represents positions within a string. - -use crate::lines::LineNumber; - -/// A range within a single line of a string. -#[derive(Debug, PartialEq, Clone, Copy, Eq, PartialOrd, Ord, Hash)] -pub struct SingleLineSpan { - /// All zero-indexed. - pub line: LineNumber, - pub start_col: u32, - pub end_col: u32, -}