Split parse into regex_parser and line_parser

ida_star
Wilfred Hughes 2021-08-21 22:12:29 +07:00
parent 7e30b8cc04
commit 6a83368a4f
4 changed files with 69 additions and 59 deletions

@ -31,7 +31,7 @@ line-oriented diff.
Difftastic treats source code as a sequence of atoms or (possibly
nested) lists.
Language syntax is defined in `parse.rs`: you provide regular
Language syntax is defined in `src/regex_parser.rs`: you provide regular
expressions for atoms (including comments), open delimiters, and close
delimiters.

@ -0,0 +1,59 @@
use typed_arena::Arena;
use crate::{positions::SingleLineSpan, syntax::Syntax};
/// Split `s` by lines, and treat each line as an atom.
///
/// This is a fallback for files that we don't know how to parse.
pub fn parse<'a>(arena: &'a Arena<Syntax<'a>>, s: &str) -> Vec<&'a Syntax<'a>> {
let mut res = vec![];
for (i, line) in s.lines().enumerate() {
res.push(Syntax::new_atom(
arena,
vec![SingleLineSpan {
line: i.into(),
start_col: 0,
end_col: line.len(),
}],
line,
));
}
res
}
#[cfg(test)]
mod tests {
use super::*;
// TODO: move assert_syntaxes to a more relevant file.
use crate::regex_parser::tests::assert_syntaxes;
#[test]
fn test_parse_lines() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "foo\nbar"),
&[
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3,
}],
"foo",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 1.into(),
start_col: 0,
end_col: 3,
}],
"bar",
),
],
);
}
}

@ -1,9 +1,10 @@
mod dijkstra;
mod inline;
mod intervals;
mod line_parser;
mod lines;
mod parse;
mod positions;
mod regex_parser;
mod side_by_side;
mod sitter;
mod style;
@ -16,7 +17,6 @@ use typed_arena::Arena;
use crate::dijkstra::mark_syntax;
use crate::lines::{join_overlapping, visible_groups, MaxLine};
use crate::parse::{from_extension, parse, parse_lines};
use crate::syntax::{change_positions, init_info, matching_lines};
fn read_or_die(path: &str) -> Vec<u8> {
@ -113,16 +113,16 @@ fn main() {
sitter::parse(&arena, &rhs_src, extension),
)
} else {
match from_extension(extension) {
match regex_parser::from_extension(extension) {
Some(lang) => (
lang.name.clone(),
parse(&arena, &lhs_src, &lang),
parse(&arena, &rhs_src, &lang),
regex_parser::parse(&arena, &lhs_src, &lang),
regex_parser::parse(&arena, &rhs_src, &lang),
),
None => (
"text".into(),
parse_lines(&arena, &lhs_src),
parse_lines(&arena, &rhs_src),
line_parser::parse(&arena, &lhs_src),
line_parser::parse(&arena, &rhs_src),
),
}
};

@ -210,26 +210,6 @@ pub fn from_extension(extension: &OsStr) -> Option<Language> {
}
}
/// Split `s` by lines, and treat each line as an atom.
///
/// This is a fallback for files that we don't know how to parse.
pub fn parse_lines<'a>(arena: &'a Arena<Syntax<'a>>, s: &str) -> Vec<&'a Syntax<'a>> {
let mut res: Vec<&'a Syntax<'a>> = vec![];
for (i, line) in s.lines().enumerate() {
res.push(Syntax::new_atom(
arena,
vec![SingleLineSpan {
line: i.into(),
start_col: 0,
end_col: line.len(),
}],
line,
));
}
res
}
/// Parse `s` according to `lang`.
pub fn parse<'a>(arena: &'a Arena<Syntax<'a>>, s: &str, lang: &Language) -> Vec<&'a Syntax<'a>> {
let nl_pos = NewlinePositions::from(s);
@ -368,7 +348,7 @@ impl ParseState {
}
#[cfg(test)]
mod tests {
pub mod tests {
use super::*;
use crate::syntax::Syntax::*;
@ -376,7 +356,7 @@ mod tests {
from_extension(&OsStr::new("js")).unwrap()
}
fn assert_syntaxes<'a>(actual: &[&'a Syntax<'a>], expected: &[&'a Syntax<'a>]) {
pub fn assert_syntaxes<'a>(actual: &[&'a Syntax<'a>], expected: &[&'a Syntax<'a>]) {
if !syntaxes_match(actual, expected) {
dbg!(expected, actual);
assert!(false);
@ -494,35 +474,6 @@ mod tests {
true
}
#[test]
fn test_parse_lines() {
let arena = Arena::new();
assert_syntaxes(
&parse_lines(&arena, "foo\nbar"),
&[
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3,
}],
"foo",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 1.into(),
start_col: 0,
end_col: 3,
}],
"bar",
),
],
);
}
#[test]
fn test_parse_integer() {
let arena = Arena::new();