Ensure we use codepoints when slicing strings to avoid crashes

Rust will panic if we try to slice on a boundary that isn't a char
boundary. Count unicode codepoints and slice to the relevant byte.

Added a test file that was crashing before due to the use of curly
quotes.
ida_star
Wilfred Hughes 2021-08-29 15:07:57 +07:00
parent 209932bfa4
commit 1dfedc6534
6 changed files with 2152 additions and 15 deletions

@ -11,6 +11,10 @@ matching of delimiters.
JSON (legacy parser): fixed parsing string literals (broken in 0.7).
### Display
Fixed crashes on files with non-ASCII characters on long lines.
### Command Line Interface
Removed the unused `--lang` argument.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -414,15 +414,29 @@ impl NewlinePositions {
}
}
/// Return the length of `s` in codepoints. This ensures that it's
/// safe to slice `s` at this boundary.
pub fn codepoint_len(s: &str) -> usize {
s.chars().count()
}
/// The first `len` codepoints of `s`. This is safer than slicing by
/// bytes, which panics if the byte isn't on a codepoint boundary.
pub fn substring_by_codepoint(s: &str, start: usize, end: usize) -> &str {
let byte_start = s.char_indices().nth(start).unwrap().0;
match s.char_indices().nth(end) {
Some(byte_end) => &s[byte_start..byte_end.0],
None => &s[byte_start..],
}
}
/// Ensure that every line in `s` has this length. Pad short lines and
/// truncate long lines.
pub fn enforce_exact_length(s: &str, line_length: usize) -> String {
let mut result = String::with_capacity(s.len());
for line in s.lines() {
// TODO: use length in chars not bytes.
if line.len() > line_length {
// Truncate.
result.push_str(&line[0..line_length]);
if codepoint_len(line) > line_length {
result.push_str(substring_by_codepoint(line, 0, line_length));
result.push('\n');
} else {
// Pad with spaces.
@ -439,8 +453,7 @@ pub fn enforce_max_length(s: &str, line_length: usize) -> String {
for line in s.lines() {
// TODO: use length in chars not bytes.
if line.len() > line_length {
// Truncate.
result.push_str(&line[0..line_length]);
result.push_str(substring_by_codepoint(line, 0, line_length));
result.push('\n');
} else {
result.push_str(&format!("{}\n", line));
@ -603,4 +616,9 @@ mod tests {
// Intervals are inclusive of `start` but exclusive of `end`.
assert_eq!(group.lhs_lines(), vec![1.into(), 2.into()])
}
#[test]
fn codepoint_len_non_ascii() {
assert_eq!(codepoint_len("ƒoo"), 3);
}
}

@ -2,7 +2,9 @@ use atty::Stream;
use std::cmp::{max, min};
use std::collections::HashMap;
use crate::lines::{enforce_exact_length, enforce_max_length, format_line_num, LineGroup, LineNumber};
use crate::lines::{
codepoint_len, enforce_exact_length, enforce_max_length, format_line_num, LineGroup, LineNumber,
};
use crate::style::apply_colors;
use crate::syntax::{aligned_lines, MatchedPos};
@ -34,7 +36,7 @@ fn longest_visible_line_lhs(s: &str, groups: &[LineGroup]) -> usize {
for group in groups {
if let Some(lhs_lines) = &group.lhs_lines {
for line_num in lhs_lines.start.0..lhs_lines.end.0 {
let current_len = lines[line_num].len();
let current_len = codepoint_len(&lines[line_num]);
longest = max(longest, current_len);
}
}
@ -50,7 +52,7 @@ fn longest_visible_line_rhs(s: &str, groups: &[LineGroup]) -> usize {
for group in groups {
if let Some(rhs_lines) = &group.rhs_lines {
for line_num in rhs_lines.start.0..rhs_lines.end.0 {
let current_len = lines[line_num].len();
let current_len = codepoint_len(&lines[line_num]);
longest = max(longest, current_len);
}
}

@ -1,6 +1,6 @@
//! Apply colours and styling to strings.
use crate::lines::LineNumber;
use crate::lines::{codepoint_len, substring_by_codepoint, LineNumber};
use crate::positions::SingleLineSpan;
use crate::syntax::{MatchKind, MatchedPos};
use colored::*;
@ -27,6 +27,8 @@ impl Style {
}
}
/// Return a copy of `line` with styles applied to all the spans specified.
/// Dim any parts of the line that have no spans.
fn apply_line(line: &str, styles: &[(SingleLineSpan, Style)]) -> String {
if styles.is_empty() {
return line.dimmed().to_string();
@ -35,20 +37,28 @@ fn apply_line(line: &str, styles: &[(SingleLineSpan, Style)]) -> String {
let mut res = String::with_capacity(line.len());
let mut i = 0;
for (span, style) in styles {
if span.start_col >= line.len() {
// The remaining spans are beyond the end of this line. This
// occurs when we truncate the line to fit on the display.
if span.start_col >= codepoint_len(line) {
break;
}
// Dim text before the next span.
if i < span.start_col {
res.push_str(&line[i..span.start_col].dimmed());
res.push_str(&substring_by_codepoint(line, i, span.start_col).dimmed());
}
res.push_str(&style.apply(&line[span.start_col..min(line.len(), span.end_col)]));
// Apply style to the substring in this span.
let span_s =
substring_by_codepoint(line, span.start_col, min(codepoint_len(line), span.end_col));
res.push_str(&style.apply(span_s));
i = span.end_col;
}
if i < line.len() {
res.push_str(&line[i..line.len()].dimmed());
// Dim text after the last span.
if i < codepoint_len(line) {
let span_s = substring_by_codepoint(line, i, codepoint_len(line));
res.push_str(&span_s.dimmed());
}
res
}