Use 32-bit integers for line numbers

This saves 16% time, 18% memory and 1% instruction counts from the
sample files in #293. These are large files, treated as textual diffs,
so reducing the storage per-line is significant. Files with structural
diffing, such as sample_files/slow_before.rs, are unchanged.

Fixes #293
Closes #297
pull/301/head
Wilfred Hughes 2022-06-13 21:42:52 +07:00
parent 5d5eaea84e
commit ed168635c9
8 changed files with 51 additions and 30 deletions

@ -223,7 +223,7 @@ mod tests {
use itertools::Itertools; use itertools::Itertools;
use typed_arena::Arena; use typed_arena::Arena;
fn pos_helper(line: usize) -> Vec<SingleLineSpan> { fn pos_helper(line: u32) -> Vec<SingleLineSpan> {
vec![SingleLineSpan { vec![SingleLineSpan {
line: line.into(), line: line.into(),
start_col: 0, start_col: 0,
@ -231,7 +231,7 @@ mod tests {
}] }]
} }
fn col_helper(line: usize, col: u32) -> Vec<SingleLineSpan> { fn col_helper(line: u32, col: u32) -> Vec<SingleLineSpan> {
vec![SingleLineSpan { vec![SingleLineSpan {
line: line.into(), line: line.into(),
start_col: col, start_col: col,

@ -538,7 +538,7 @@ fn slide_to_next_node<'a>(
/// Return the distance between two syntax nodes, as a tuple of number /// Return the distance between two syntax nodes, as a tuple of number
/// of lines and number of columns. /// of lines and number of columns.
fn distance_between(prev: &Syntax, next: &Syntax) -> (usize, u32) { fn distance_between(prev: &Syntax, next: &Syntax) -> (u32, u32) {
let prev_pos = prev.last_line_span(); let prev_pos = prev.last_line_span();
let next_pos = next.first_line_span(); let next_pos = next.first_line_span();

@ -98,16 +98,16 @@ fn add_ends(
let mut lhs_line: LineNumber = (lhs_max.0 + 1).into(); let mut lhs_line: LineNumber = (lhs_max.0 + 1).into();
let mut rhs_line: LineNumber = (rhs_max.0 + 1).into(); let mut rhs_line: LineNumber = (rhs_max.0 + 1).into();
while lhs_line.0 < lhs_lines.len() && rhs_line.0 < rhs_lines.len() { while (lhs_line.0 as usize) < lhs_lines.len() && (rhs_line.0 as usize) < rhs_lines.len() {
res.push((Some(lhs_line), Some(rhs_line))); res.push((Some(lhs_line), Some(rhs_line)));
lhs_line = (lhs_line.0 + 1).into(); lhs_line = (lhs_line.0 + 1).into();
rhs_line = (rhs_line.0 + 1).into(); rhs_line = (rhs_line.0 + 1).into();
} }
while lhs_line.0 < lhs_lines.len() { while (lhs_line.0 as usize) < lhs_lines.len() {
res.push((Some(lhs_line), None)); res.push((Some(lhs_line), None));
lhs_line = (lhs_line.0 + 1).into(); lhs_line = (lhs_line.0 + 1).into();
} }
while rhs_line.0 < rhs_lines.len() { while (rhs_line.0 as usize) < rhs_lines.len() {
res.push((None, Some(rhs_line))); res.push((None, Some(rhs_line)));
rhs_line = (rhs_line.0 + 1).into(); rhs_line = (rhs_line.0 + 1).into();
} }
@ -229,7 +229,7 @@ fn match_blanks_between(
let mut res = vec![]; let mut res = vec![];
while current_lhs > prev.0 && current_rhs > prev.1 { while current_lhs > prev.0 && current_rhs > prev.1 {
if lhs_lines[current_lhs.0] == "" && rhs_lines[current_rhs.0] == "" { if lhs_lines[current_lhs.as_usize()] == "" && rhs_lines[current_rhs.as_usize()] == "" {
res.push((Some(current_lhs), Some(current_rhs))); res.push((Some(current_lhs), Some(current_rhs)));
current_lhs = (current_lhs.0 - 1).into(); current_lhs = (current_lhs.0 - 1).into();
@ -258,7 +258,7 @@ fn match_blanks_before(
let mut res = vec![]; let mut res = vec![];
loop { loop {
if lhs_lines[current_lhs.0] == "" && rhs_lines[current_rhs.0] == "" { if lhs_lines[current_lhs.as_usize()] == "" && rhs_lines[current_rhs.as_usize()] == "" {
res.push((Some(current_lhs), Some(current_rhs))); res.push((Some(current_lhs), Some(current_rhs)));
if current_lhs.0 == 0 || current_rhs.0 == 0 { if current_lhs.0 == 0 || current_rhs.0 == 0 {

@ -3,7 +3,7 @@
/// The maximum number of lines that may occur between changed lines in a hunk. /// The maximum number of lines that may occur between changed lines in a hunk.
/// ///
/// If we exceed this, the lines are stored in separate hunks. /// If we exceed this, the lines are stored in separate hunks.
const MAX_DISTANCE: usize = 4; const MAX_DISTANCE: u32 = 4;
use std::collections::HashSet; use std::collections::HashSet;

@ -76,7 +76,11 @@ pub fn print(
for (lhs_line, _) in before_lines { for (lhs_line, _) in before_lines {
if let Some(lhs_line) = lhs_line { if let Some(lhs_line) = lhs_line {
println!("{} {}", format_line_num(lhs_line), lhs_lines[lhs_line.0]); println!(
"{} {}",
format_line_num(lhs_line),
lhs_lines[lhs_line.as_usize()]
);
} }
} }
@ -85,7 +89,7 @@ pub fn print(
println!( println!(
"{} {}", "{} {}",
format_line_num(*lhs_line).red().bold(), format_line_num(*lhs_line).red().bold(),
lhs_lines[lhs_line.0] lhs_lines[lhs_line.as_usize()]
); );
} }
} }
@ -94,14 +98,18 @@ pub fn print(
println!( println!(
" {}{}", " {}{}",
format_line_num(*rhs_line).green().bold(), format_line_num(*rhs_line).green().bold(),
rhs_lines[rhs_line.0] rhs_lines[rhs_line.as_usize()]
); );
} }
} }
for (_, rhs_line) in &after_lines { for (_, rhs_line) in &after_lines {
if let Some(rhs_line) = rhs_line { if let Some(rhs_line) = rhs_line {
println!(" {}{}", format_line_num(*rhs_line), rhs_lines[rhs_line.0]); println!(
" {}{}",
format_line_num(*rhs_line),
rhs_lines[rhs_line.as_usize()]
);
} }
} }
println!(); println!();

@ -85,7 +85,7 @@ fn display_single_column(
is_lhs: bool, is_lhs: bool,
display_options: &DisplayOptions, display_options: &DisplayOptions,
) -> String { ) -> String {
let column_width = format_line_num(src.lines().count().into()).len(); let column_width = format_line_num((src.lines().count() as u32).into()).len();
let mut result = String::with_capacity(src.len()); let mut result = String::with_capacity(src.len());
result.push_str(&style::header( result.push_str(&style::header(
@ -105,7 +105,7 @@ fn display_single_column(
for (i, line) in src.lines().enumerate() { for (i, line) in src.lines().enumerate() {
result.push_str( result.push_str(
&format_line_num_padded(i.into(), column_width) &format_line_num_padded((i as u32).into(), column_width)
.style(style) .style(style)
.to_string(), .to_string(),
); );
@ -197,11 +197,17 @@ impl SourceDimensions {
for (lhs_line_num, rhs_line_num) in line_nums { for (lhs_line_num, rhs_line_num) in line_nums {
if let Some(lhs_line_num) = lhs_line_num { if let Some(lhs_line_num) = lhs_line_num {
lhs_max_line = max(lhs_max_line, *lhs_line_num); lhs_max_line = max(lhs_max_line, *lhs_line_num);
lhs_max_content = max(lhs_max_content, codepoint_len(lhs_lines[lhs_line_num.0])); lhs_max_content = max(
lhs_max_content,
codepoint_len(lhs_lines[lhs_line_num.as_usize()]),
);
} }
if let Some(rhs_line_num) = rhs_line_num { if let Some(rhs_line_num) = rhs_line_num {
rhs_max_line = max(rhs_max_line, *rhs_line_num); rhs_max_line = max(rhs_max_line, *rhs_line_num);
rhs_max_content = max(rhs_max_content, codepoint_len(rhs_lines[rhs_line_num.0])); rhs_max_content = max(
rhs_max_content,
codepoint_len(rhs_lines[rhs_line_num.as_usize()]),
);
} }
} }
@ -283,7 +289,7 @@ fn highlight_as_novel(
return true; return true;
} }
let line_content = lines.get(line_num.0).map(|s| str::trim(s)); let line_content = lines.get(line_num.as_usize()).map(|s| str::trim(s));
// If this is a blank line without a corresponding line on the // If this is a blank line without a corresponding line on the
// other side, highlight it too. This helps highlight novel // other side, highlight it too. This helps highlight novel
// blank lines. // blank lines.
@ -437,7 +443,7 @@ pub fn print(
if no_lhs_changes && !show_both { if no_lhs_changes && !show_both {
match rhs_line_num { match rhs_line_num {
Some(rhs_line_num) => { Some(rhs_line_num) => {
let rhs_line = &rhs_colored_lines[rhs_line_num.0]; let rhs_line = &rhs_colored_lines[rhs_line_num.as_usize()];
if same_lines { if same_lines {
println!("{}{}", display_rhs_line_num, rhs_line); println!("{}{}", display_rhs_line_num, rhs_line);
} else { } else {
@ -457,7 +463,7 @@ pub fn print(
} else if no_rhs_changes && !show_both { } else if no_rhs_changes && !show_both {
match lhs_line_num { match lhs_line_num {
Some(lhs_line_num) => { Some(lhs_line_num) => {
let lhs_line = &lhs_colored_lines[lhs_line_num.0]; let lhs_line = &lhs_colored_lines[lhs_line_num.as_usize()];
if same_lines { if same_lines {
println!("{}{}", display_lhs_line_num, lhs_line); println!("{}{}", display_lhs_line_num, lhs_line);
} else { } else {
@ -474,7 +480,7 @@ pub fn print(
} else { } else {
let lhs_line = match lhs_line_num { let lhs_line = match lhs_line_num {
Some(lhs_line_num) => split_and_apply( Some(lhs_line_num) => split_and_apply(
lhs_lines[lhs_line_num.0], lhs_lines[lhs_line_num.as_usize()],
source_dims.lhs_content_width, source_dims.lhs_content_width,
display_options.use_color, display_options.use_color,
lhs_highlights.get(&lhs_line_num).unwrap_or(&vec![]), lhs_highlights.get(&lhs_line_num).unwrap_or(&vec![]),
@ -484,7 +490,7 @@ pub fn print(
}; };
let rhs_line = match rhs_line_num { let rhs_line = match rhs_line_num {
Some(rhs_line_num) => split_and_apply( Some(rhs_line_num) => split_and_apply(
rhs_lines[rhs_line_num.0], rhs_lines[rhs_line_num.as_usize()],
source_dims.rhs_content_width, source_dims.rhs_content_width,
display_options.use_color, display_options.use_color,
rhs_highlights.get(&rhs_line_num).unwrap_or(&vec![]), rhs_highlights.get(&rhs_line_num).unwrap_or(&vec![]),

@ -221,7 +221,9 @@ fn apply(s: &str, styles: &[(SingleLineSpan, Style)]) -> String {
let mut res = String::with_capacity(s.len()); let mut res = String::with_capacity(s.len());
for (i, line) in s.lines().enumerate() { for (i, line) in s.lines().enumerate() {
let ranges = ranges_by_line.remove(&i.into()).unwrap_or_default(); let ranges = ranges_by_line
.remove(&(i as u32).into())
.unwrap_or_default();
res.push_str(&apply_line(line, &ranges)); res.push_str(&apply_line(line, &ranges));
res.push('\n'); res.push('\n');
} }

@ -9,12 +9,16 @@ use std::{cmp::Ordering, fmt};
/// ///
/// Zero-indexed internally. /// Zero-indexed internally.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct LineNumber(pub usize); pub struct LineNumber(pub u32);
impl LineNumber { impl LineNumber {
pub fn one_indexed(self) -> usize { pub fn one_indexed(self) -> u32 {
self.0 + 1 self.0 + 1
} }
pub fn as_usize(self) -> usize {
self.0 as usize
}
} }
impl fmt::Debug for LineNumber { impl fmt::Debug for LineNumber {
@ -27,8 +31,8 @@ impl fmt::Debug for LineNumber {
} }
} }
impl From<usize> for LineNumber { impl From<u32> for LineNumber {
fn from(number: usize) -> Self { fn from(number: u32) -> Self {
Self(number) Self(number)
} }
} }
@ -97,7 +101,7 @@ impl NewlinePositions {
for idx in first_idx..=last_idx { for idx in first_idx..=last_idx {
let (line_start, line_end) = self.positions[idx]; let (line_start, line_end) = self.positions[idx];
res.push(SingleLineSpan { res.push(SingleLineSpan {
line: idx.into(), line: (idx as u32).into(),
start_col: if line_start > region_start { start_col: if line_start > region_start {
0 0
} else { } else {
@ -163,10 +167,11 @@ pub trait MaxLine {
impl<S: AsRef<str>> MaxLine for S { impl<S: AsRef<str>> MaxLine for S {
fn max_line(&self) -> LineNumber { fn max_line(&self) -> LineNumber {
self.as_ref() (self
.as_ref()
.trim_end() // Remove extra trailing whitespaces. .trim_end() // Remove extra trailing whitespaces.
.split('\n') // Split by `\n` to calculate lines. .split('\n') // Split by `\n` to calculate lines.
.count() .count() as u32)
.sub(1) // Sub 1 to make zero-indexed LineNumber .sub(1) // Sub 1 to make zero-indexed LineNumber
.into() .into()
} }