Split parse into regex_parser and line_parser

ida_star
Wilfred Hughes 2021-08-21 22:12:29 +07:00
parent 7e30b8cc04
commit 6a83368a4f
4 changed files with 69 additions and 59 deletions

@ -31,7 +31,7 @@ line-oriented diff.
Difftastic treats source code as a sequence of atoms or (possibly Difftastic treats source code as a sequence of atoms or (possibly
nested) lists. nested) lists.
Language syntax is defined in `parse.rs`: you provide regular Language syntax is defined in `src/regex_parser.rs`: you provide regular
expressions for atoms (including comments), open delimiters, and close expressions for atoms (including comments), open delimiters, and close
delimiters. delimiters.

@ -0,0 +1,59 @@
use typed_arena::Arena;
use crate::{positions::SingleLineSpan, syntax::Syntax};
/// Split `s` by lines, and treat each line as an atom.
///
/// This is a fallback for files that we don't know how to parse.
pub fn parse<'a>(arena: &'a Arena<Syntax<'a>>, s: &str) -> Vec<&'a Syntax<'a>> {
let mut res = vec![];
for (i, line) in s.lines().enumerate() {
res.push(Syntax::new_atom(
arena,
vec![SingleLineSpan {
line: i.into(),
start_col: 0,
end_col: line.len(),
}],
line,
));
}
res
}
#[cfg(test)]
mod tests {
use super::*;
// TODO: move assert_syntaxes to a more relevant file.
use crate::regex_parser::tests::assert_syntaxes;
#[test]
fn test_parse_lines() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "foo\nbar"),
&[
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3,
}],
"foo",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 1.into(),
start_col: 0,
end_col: 3,
}],
"bar",
),
],
);
}
}

@ -1,9 +1,10 @@
mod dijkstra; mod dijkstra;
mod inline; mod inline;
mod intervals; mod intervals;
mod line_parser;
mod lines; mod lines;
mod parse;
mod positions; mod positions;
mod regex_parser;
mod side_by_side; mod side_by_side;
mod sitter; mod sitter;
mod style; mod style;
@ -16,7 +17,6 @@ use typed_arena::Arena;
use crate::dijkstra::mark_syntax; use crate::dijkstra::mark_syntax;
use crate::lines::{join_overlapping, visible_groups, MaxLine}; use crate::lines::{join_overlapping, visible_groups, MaxLine};
use crate::parse::{from_extension, parse, parse_lines};
use crate::syntax::{change_positions, init_info, matching_lines}; use crate::syntax::{change_positions, init_info, matching_lines};
fn read_or_die(path: &str) -> Vec<u8> { fn read_or_die(path: &str) -> Vec<u8> {
@ -113,16 +113,16 @@ fn main() {
sitter::parse(&arena, &rhs_src, extension), sitter::parse(&arena, &rhs_src, extension),
) )
} else { } else {
match from_extension(extension) { match regex_parser::from_extension(extension) {
Some(lang) => ( Some(lang) => (
lang.name.clone(), lang.name.clone(),
parse(&arena, &lhs_src, &lang), regex_parser::parse(&arena, &lhs_src, &lang),
parse(&arena, &rhs_src, &lang), regex_parser::parse(&arena, &rhs_src, &lang),
), ),
None => ( None => (
"text".into(), "text".into(),
parse_lines(&arena, &lhs_src), line_parser::parse(&arena, &lhs_src),
parse_lines(&arena, &rhs_src), line_parser::parse(&arena, &rhs_src),
), ),
} }
}; };

@ -210,26 +210,6 @@ pub fn from_extension(extension: &OsStr) -> Option<Language> {
} }
} }
/// Split `s` by lines, and treat each line as an atom.
///
/// This is a fallback for files that we don't know how to parse.
pub fn parse_lines<'a>(arena: &'a Arena<Syntax<'a>>, s: &str) -> Vec<&'a Syntax<'a>> {
let mut res: Vec<&'a Syntax<'a>> = vec![];
for (i, line) in s.lines().enumerate() {
res.push(Syntax::new_atom(
arena,
vec![SingleLineSpan {
line: i.into(),
start_col: 0,
end_col: line.len(),
}],
line,
));
}
res
}
/// Parse `s` according to `lang`. /// Parse `s` according to `lang`.
pub fn parse<'a>(arena: &'a Arena<Syntax<'a>>, s: &str, lang: &Language) -> Vec<&'a Syntax<'a>> { pub fn parse<'a>(arena: &'a Arena<Syntax<'a>>, s: &str, lang: &Language) -> Vec<&'a Syntax<'a>> {
let nl_pos = NewlinePositions::from(s); let nl_pos = NewlinePositions::from(s);
@ -368,7 +348,7 @@ impl ParseState {
} }
#[cfg(test)] #[cfg(test)]
mod tests { pub mod tests {
use super::*; use super::*;
use crate::syntax::Syntax::*; use crate::syntax::Syntax::*;
@ -376,7 +356,7 @@ mod tests {
from_extension(&OsStr::new("js")).unwrap() from_extension(&OsStr::new("js")).unwrap()
} }
fn assert_syntaxes<'a>(actual: &[&'a Syntax<'a>], expected: &[&'a Syntax<'a>]) { pub fn assert_syntaxes<'a>(actual: &[&'a Syntax<'a>], expected: &[&'a Syntax<'a>]) {
if !syntaxes_match(actual, expected) { if !syntaxes_match(actual, expected) {
dbg!(expected, actual); dbg!(expected, actual);
assert!(false); assert!(false);
@ -494,35 +474,6 @@ mod tests {
true true
} }
#[test]
fn test_parse_lines() {
let arena = Arena::new();
assert_syntaxes(
&parse_lines(&arena, "foo\nbar"),
&[
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3,
}],
"foo",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 1.into(),
start_col: 0,
end_col: 3,
}],
"bar",
),
],
);
}
#[test] #[test]
fn test_parse_integer() { fn test_parse_integer() {
let arena = Arena::new(); let arena = Arena::new();