Use my line_numbers crate for newline position calculations

pull/504/merge
Wilfred Hughes 2023-08-26 16:25:32 +07:00
parent 79b4e44113
commit 41c9165c79
15 changed files with 40 additions and 224 deletions

7
Cargo.lock generated

@ -252,6 +252,7 @@ dependencies = [
"lazy_static",
"libc",
"libmimalloc-sys",
"line-numbers",
"log",
"mimalloc",
"owo-colors",
@ -447,6 +448,12 @@ dependencies = [
"cc",
]
[[package]]
name = "line-numbers"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793a75315eb63b8699158825bdea85d63eeb850e7543cb834abef3c7b5b53780"
[[package]]
name = "lock_api"
version = "0.4.9"

@ -66,6 +66,7 @@ hashbrown = "0.12.3"
humansize = "2.1.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
line-numbers = "0.2.2"
[dev-dependencies]
# assert_cmd 2.0.6 requires rust 1.60

@ -249,15 +249,16 @@ pub fn mark_syntax<'a>(
#[cfg(test)]
mod tests {
use super::*;
use crate::{
diff::changes::ChangeKind,
diff::graph::Edge::*,
options::DEFAULT_GRAPH_LIMIT,
positions::SingleLineSpan,
syntax::{init_all_info, AtomKind},
};
use itertools::Itertools;
use line_numbers::SingleLineSpan;
use typed_arena::Arena;
fn pos_helper(line: u32) -> Vec<SingleLineSpan> {

@ -29,11 +29,12 @@
//! can change which item is marked as novel (e.g. either `B` in the
//! example above) whilst still showing a valid, minimal diff.
use line_numbers::SingleLineSpan;
use crate::{
diff::changes::{insert_deep_novel, insert_deep_unchanged, ChangeKind::*, ChangeMap},
parse::guess_language,
parse::syntax::Syntax,
positions::SingleLineSpan,
};
use Syntax::*;

@ -3,11 +3,11 @@
use std::cmp::Ordering;
use std::collections::HashSet;
use line_numbers::LineNumber;
use rustc_hash::FxHashSet;
use crate::{
hash::DftHashMap,
lines::LineNumber,
parse::syntax::{zip_repeat_shorter, MatchKind, MatchedPos},
};
@ -729,9 +729,10 @@ fn compact_gaps(
mod tests {
use std::iter::FromIterator;
use crate::{positions::SingleLineSpan, syntax::TokenKind};
use crate::syntax::TokenKind;
use super::*;
use line_numbers::SingleLineSpan;
use pretty_assertions::assert_eq;
#[test]

@ -6,13 +6,13 @@
const MAX_DISTANCE: u32 = 4;
use std::collections::HashSet;
use line_numbers::LineNumber;
use crate::{
constants::Side,
display::context::{add_context, opposite_positions},
display::side_by_side::lines_with_novel,
hash::DftHashMap,
lines::LineNumber,
parse::syntax::{zip_pad_shorter, MatchKind, MatchedPos},
};
@ -679,11 +679,11 @@ pub fn matched_lines_indexes_for_hunk(
#[cfg(test)]
mod tests {
use std::iter::FromIterator;
use line_numbers::SingleLineSpan;
use super::*;
use crate::{
hash::DftHashMap,
positions::SingleLineSpan,
syntax::{MatchKind, TokenKind},
};
use pretty_assertions::assert_eq;

@ -4,10 +4,11 @@ use crate::{
hunks::{matched_lines_indexes_for_hunk, matched_pos_to_hunks, merge_adjacent},
side_by_side::lines_with_novel,
},
lines::{LineNumber, MaxLine},
lines::MaxLine,
parse::syntax::{self, MatchedPos},
summary::{DiffResult, FileContent, FileFormat},
};
use line_numbers::LineNumber;
use serde::{ser::SerializeStruct, Serialize, Serializer};
use std::collections::HashMap;

@ -1,5 +1,7 @@
//! Side-by-side (two column) display of diffs.
use line_numbers::LineNumber;
use line_numbers::SingleLineSpan;
use owo_colors::{OwoColorize, Style};
use std::{
@ -16,10 +18,9 @@ use crate::{
BackgroundColor,
},
hash::DftHashMap,
lines::{codepoint_len, format_line_num, LineNumber},
lines::{codepoint_len, format_line_num},
options::{DisplayMode, DisplayOptions},
parse::syntax::{zip_pad_shorter, MatchedPos},
positions::SingleLineSpan,
summary::FileFormat,
};
@ -28,7 +29,7 @@ const SPACER: &str = " ";
fn format_line_num_padded(line_num: LineNumber, column_width: usize) -> String {
format!(
"{:width$} ",
line_num.one_indexed(),
line_num.as_usize() + 1,
width = column_width - 1
)
}
@ -54,7 +55,7 @@ fn format_missing_line_num(
style = style.dimmed();
}
let num_digits = format!("{}", prev_num.one_indexed()).len();
let num_digits = prev_num.display().len();
format!(
"{:>width$} ",
(if after_end { " " } else { "." }).repeat(num_digits),

@ -3,12 +3,14 @@
use crate::{
constants::Side,
hash::DftHashMap,
lines::{byte_len, LineNumber},
lines::byte_len,
options::DisplayOptions,
parse::syntax::{AtomKind, MatchKind, MatchedPos, TokenKind},
positions::SingleLineSpan,
summary::FileFormat,
};
use line_numbers::LineNumber;
use line_numbers::SingleLineSpan;
use owo_colors::{OwoColorize, Style};
use std::cmp::{max, min};
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};

@ -1,11 +1,11 @@
//! A fallback "parser" for plain text.
use lazy_static::lazy_static;
use line_numbers::LinePositions as NewlinePositions;
use regex::Regex;
use crate::{
diff::myers_diff,
lines::NewlinePositions,
parse::syntax::{split_words, AtomKind, MatchKind, MatchedPos, TokenKind},
};
@ -194,9 +194,8 @@ pub fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos> {
#[cfg(test)]
mod tests {
use crate::positions::SingleLineSpan;
use super::*;
use line_numbers::SingleLineSpan;
use pretty_assertions::assert_eq;
#[test]

@ -1,50 +1,10 @@
//! Manipulate lines of text and groups of lines.
// The `from_offset*` methods on NewlinePositions are sensible names,
// and the docs clippy cites:
// https://rust-lang.github.io/api-guidelines/naming.html#ad-hoc-conversions-follow-as_-to_-into_-conventions-c-conv
// don't actually have an opinion on `from_foo` names.
#![allow(clippy::wrong_self_convention)]
use crate::positions::SingleLineSpan;
use line_numbers::LineNumber;
use std::ops::Sub;
use std::{cmp::Ordering, fmt};
/// A distinct number type for line numbers, to prevent confusion with
/// other numerical data.
///
/// Zero-indexed internally.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct LineNumber(pub u32);
impl LineNumber {
pub fn one_indexed(self) -> u32 {
self.0 + 1
}
pub fn as_usize(self) -> usize {
self.0 as usize
}
}
impl fmt::Debug for LineNumber {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_fmt(format_args!(
"LineNumber: {} (zero-indexed: {})",
self.one_indexed(),
self.0
))
}
}
impl From<u32> for LineNumber {
fn from(number: u32) -> Self {
Self(number)
}
}
pub fn format_line_num(line_num: LineNumber) -> String {
format!("{} ", line_num.one_indexed())
format!("{} ", line_num.display())
}
/// A position in a single line of a string.
@ -55,104 +15,6 @@ struct LinePosition {
column: usize,
}
/// A struct for efficiently converting absolute string positions to
/// line-relative positions.
#[derive(Debug)]
pub struct NewlinePositions {
/// A vector of the start and end positions of all the lines in
/// `s`. Positions include the newline character itself.
positions: Vec<(usize, usize)>,
}
impl From<&str> for NewlinePositions {
fn from(s: &str) -> Self {
let mut line_start = 0;
let mut positions = vec![];
for line in s.split('\n') {
let line_end = line_start + line.len() + "\n".len();
// TODO: this assumes lines terminate with \n, not \r\n.
positions.push((line_start, line_end - 1));
line_start = line_end;
}
NewlinePositions { positions }
}
}
impl NewlinePositions {
fn from_offset(&self, offset: usize) -> usize {
let idx = self.positions.binary_search_by(|(line_start, line_end)| {
if *line_end < offset {
return Ordering::Less;
}
if *line_start > offset {
return Ordering::Greater;
}
Ordering::Equal
});
idx.expect("line should be present")
}
/// Convert to single-line spans. If the original span crosses a
/// newline, the vec will contain multiple items.
pub fn from_offsets(&self, region_start: usize, region_end: usize) -> Vec<SingleLineSpan> {
assert!(region_start <= region_end);
let first_idx = self.from_offset(region_start);
let last_idx = self.from_offset(region_end);
let mut res = vec![];
for idx in first_idx..=last_idx {
let (line_start, line_end) = self.positions[idx];
res.push(SingleLineSpan {
line: (idx as u32).into(),
start_col: if line_start > region_start {
0
} else {
region_start - line_start
} as u32,
end_col: if region_end < line_end {
region_end - line_start
} else {
line_end - line_start
} as u32,
});
}
res
}
pub fn from_offsets_relative_to(
&self,
start: SingleLineSpan,
region_start: usize,
region_end: usize,
) -> Vec<SingleLineSpan> {
assert!(region_start <= region_end);
let mut res = vec![];
for pos in self.from_offsets(region_start, region_end) {
if pos.line.0 == 0 {
res.push(SingleLineSpan {
line: start.line,
start_col: start.start_col + pos.start_col,
end_col: start.start_col + pos.end_col,
});
} else {
res.push(SingleLineSpan {
line: (start.line.0 + pos.line.0).into(),
start_col: pos.start_col,
end_col: pos.end_col,
});
}
}
res
}
}
/// Return the length of `s` in codepoints. This is important when
/// finding character boundaries for slicing without errors.
pub fn codepoint_len(s: &str) -> usize {
@ -190,58 +52,10 @@ pub fn is_all_whitespace(s: &str) -> bool {
#[cfg(test)]
mod tests {
use super::*;
use line_numbers::LinePositions as NewlinePositions;
use line_numbers::SingleLineSpan;
use pretty_assertions::assert_eq;
#[test]
fn from_offsets_first_line() {
let newline_positions: NewlinePositions = "foo".into();
let line_spans = newline_positions.from_offsets(1, 3);
assert_eq!(
line_spans,
vec![SingleLineSpan {
line: 0.into(),
start_col: 1,
end_col: 3
}]
);
}
#[test]
fn from_offsets_first_char() {
let newline_positions: NewlinePositions = "foo".into();
let line_spans = newline_positions.from_offsets(0, 0);
assert_eq!(
line_spans,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 0
}]
);
}
#[test]
fn from_offsets_split_over_multiple_lines() {
let newline_positions: NewlinePositions = "foo\nbar\nbaz\naaaaaaaaaaa".into();
let line_spans = newline_positions.from_offsets(5, 10);
assert_eq!(
line_spans,
vec![
SingleLineSpan {
line: 1.into(),
start_col: 1,
end_col: 3
},
SingleLineSpan {
line: 2.into(),
start_col: 0,
end_col: 2
}
]
);
}
#[test]
fn str_max_line() {
let line: String = "foo\nbar".into();

@ -34,7 +34,6 @@ mod line_parser;
mod lines;
mod options;
mod parse;
mod positions;
mod summary;
mod version;

@ -2,6 +2,8 @@
#![allow(clippy::mutable_key_type)] // Hash for Syntax doesn't use mutable fields.
use line_numbers::LinePositions as NewlinePositions;
use line_numbers::SingleLineSpan;
use std::{cell::Cell, env, fmt, hash::Hash, num::NonZeroU32};
use typed_arena::Arena;
@ -10,8 +12,7 @@ use crate::{
diff::changes::{ChangeKind::*, ChangeMap},
diff::myers_diff,
hash::DftHashMap,
lines::{is_all_whitespace, NewlinePositions},
positions::SingleLineSpan,
lines::is_all_whitespace,
};
use Syntax::*;
@ -312,7 +313,7 @@ impl<'a> Syntax<'a> {
} => {
let line = open_position
.first()
.map(|p| format!("{}", p.line.one_indexed()))
.map(|p| p.line.display())
.unwrap_or_else(|| "?".to_owned());
format!("line:{} {} ... {}", line, open_content, close_content)
@ -322,7 +323,7 @@ impl<'a> Syntax<'a> {
} => {
let line = position
.first()
.map_or_else(|| "?".to_owned(), |p| p.line.one_indexed().to_string());
.map_or_else(|| "?".to_owned(), |p| p.line.display());
format!("line:{} {}", line, content)
}

@ -7,9 +7,9 @@ use crate::options::DiffOptions;
use crate::parse::guess_language as guess;
use tree_sitter as ts;
use typed_arena::Arena;
use line_numbers::LinePositions as NewlinePositions;
use crate::{
lines::NewlinePositions,
parse::syntax::{AtomKind, Syntax},
};

@ -1,12 +0,0 @@
//! Represents positions within a string.
use crate::lines::LineNumber;
/// A range within a single line of a string.
#[derive(Debug, PartialEq, Clone, Copy, Eq, PartialOrd, Ord, Hash)]
pub struct SingleLineSpan {
/// All zero-indexed.
pub line: LineNumber,
pub start_col: u32,
pub end_col: u32,
}