From 6a83368a4f7a812039bbfee82c0bab182d66aba1 Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Sat, 21 Aug 2021 22:12:29 -0700 Subject: [PATCH] Split parse into regex_parser and line_parser --- README.md | 2 +- src/line_parser.rs | 59 +++++++++++++++++++++++++++++++ src/main.rs | 14 ++++---- src/{parse.rs => regex_parser.rs} | 53 ++------------------------- 4 files changed, 69 insertions(+), 59 deletions(-) create mode 100644 src/line_parser.rs rename src/{parse.rs => regex_parser.rs} (95%) diff --git a/README.md b/README.md index 105c323fa..8a5cafc26 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ line-oriented diff. Difftastic treats source code as a sequence of atoms or (possibly nested) lists. -Language syntax is defined in `parse.rs`: you provide regular +Language syntax is defined in `src/regex_parser.rs`: you provide regular expressions for atoms (including comments), open delimiters, and close delimiters. diff --git a/src/line_parser.rs b/src/line_parser.rs new file mode 100644 index 000000000..5ce2283f8 --- /dev/null +++ b/src/line_parser.rs @@ -0,0 +1,59 @@ +use typed_arena::Arena; + +use crate::{positions::SingleLineSpan, syntax::Syntax}; + +/// Split `s` by lines, and treat each line as an atom. +/// +/// This is a fallback for files that we don't know how to parse. +pub fn parse<'a>(arena: &'a Arena>, s: &str) -> Vec<&'a Syntax<'a>> { + let mut res = vec![]; + for (i, line) in s.lines().enumerate() { + res.push(Syntax::new_atom( + arena, + vec![SingleLineSpan { + line: i.into(), + start_col: 0, + end_col: line.len(), + }], + line, + )); + } + + res +} + +#[cfg(test)] +mod tests { + use super::*; + // TODO: move assert_syntaxes to a more relevant file. + use crate::regex_parser::tests::assert_syntaxes; + + #[test] + fn test_parse_lines() { + let arena = Arena::new(); + + assert_syntaxes( + &parse(&arena, "foo\nbar"), + &[ + Syntax::new_atom( + &arena, + vec![SingleLineSpan { + line: 0.into(), + start_col: 0, + end_col: 3, + }], + "foo", + ), + Syntax::new_atom( + &arena, + vec![SingleLineSpan { + line: 1.into(), + start_col: 0, + end_col: 3, + }], + "bar", + ), + ], + ); + } +} diff --git a/src/main.rs b/src/main.rs index 6b1cfe2b6..a0b520cb4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,10 @@ mod dijkstra; mod inline; mod intervals; +mod line_parser; mod lines; -mod parse; mod positions; +mod regex_parser; mod side_by_side; mod sitter; mod style; @@ -16,7 +17,6 @@ use typed_arena::Arena; use crate::dijkstra::mark_syntax; use crate::lines::{join_overlapping, visible_groups, MaxLine}; -use crate::parse::{from_extension, parse, parse_lines}; use crate::syntax::{change_positions, init_info, matching_lines}; fn read_or_die(path: &str) -> Vec { @@ -113,16 +113,16 @@ fn main() { sitter::parse(&arena, &rhs_src, extension), ) } else { - match from_extension(extension) { + match regex_parser::from_extension(extension) { Some(lang) => ( lang.name.clone(), - parse(&arena, &lhs_src, &lang), - parse(&arena, &rhs_src, &lang), + regex_parser::parse(&arena, &lhs_src, &lang), + regex_parser::parse(&arena, &rhs_src, &lang), ), None => ( "text".into(), - parse_lines(&arena, &lhs_src), - parse_lines(&arena, &rhs_src), + line_parser::parse(&arena, &lhs_src), + line_parser::parse(&arena, &rhs_src), ), } }; diff --git a/src/parse.rs b/src/regex_parser.rs similarity index 95% rename from src/parse.rs rename to src/regex_parser.rs index 2a400745e..a24d57c50 100644 --- a/src/parse.rs +++ b/src/regex_parser.rs @@ -210,26 +210,6 @@ pub fn from_extension(extension: &OsStr) -> Option { } } -/// Split `s` by lines, and treat each line as an atom. -/// -/// This is a fallback for files that we don't know how to parse. -pub fn parse_lines<'a>(arena: &'a Arena>, s: &str) -> Vec<&'a Syntax<'a>> { - let mut res: Vec<&'a Syntax<'a>> = vec![]; - for (i, line) in s.lines().enumerate() { - res.push(Syntax::new_atom( - arena, - vec![SingleLineSpan { - line: i.into(), - start_col: 0, - end_col: line.len(), - }], - line, - )); - } - - res -} - /// Parse `s` according to `lang`. pub fn parse<'a>(arena: &'a Arena>, s: &str, lang: &Language) -> Vec<&'a Syntax<'a>> { let nl_pos = NewlinePositions::from(s); @@ -368,7 +348,7 @@ impl ParseState { } #[cfg(test)] -mod tests { +pub mod tests { use super::*; use crate::syntax::Syntax::*; @@ -376,7 +356,7 @@ mod tests { from_extension(&OsStr::new("js")).unwrap() } - fn assert_syntaxes<'a>(actual: &[&'a Syntax<'a>], expected: &[&'a Syntax<'a>]) { + pub fn assert_syntaxes<'a>(actual: &[&'a Syntax<'a>], expected: &[&'a Syntax<'a>]) { if !syntaxes_match(actual, expected) { dbg!(expected, actual); assert!(false); @@ -494,35 +474,6 @@ mod tests { true } - #[test] - fn test_parse_lines() { - let arena = Arena::new(); - - assert_syntaxes( - &parse_lines(&arena, "foo\nbar"), - &[ - Syntax::new_atom( - &arena, - vec![SingleLineSpan { - line: 0.into(), - start_col: 0, - end_col: 3, - }], - "foo", - ), - Syntax::new_atom( - &arena, - vec![SingleLineSpan { - line: 1.into(), - start_col: 0, - end_col: 3, - }], - "bar", - ), - ], - ); - } - #[test] fn test_parse_integer() { let arena = Arena::new();