Remove regex parser

edge_only_predecessors
Wilfred Hughes 2021-09-19 12:17:25 +07:00
parent 0f27c97865
commit d7b0c917c1
7 changed files with 129 additions and 967 deletions

@ -7,6 +7,8 @@ Added a C parser.
Added a C++ parser. Difftastic prefers the C++ parser for `.h`
files. Please file a bug if you see issues.
Removed legacy regex-based parsing backend.
## 0.9
### Parsing

@ -1,7 +1,6 @@
use std::ffi::OsStr;
use criterion::{criterion_group, criterion_main, Criterion};
use difftastic::regex_parser as rp;
use difftastic::tree_sitter_parser::{from_extension, parse};
use typed_arena::Arena;
@ -17,18 +16,6 @@ pub fn parsing_benchmark(c: &mut Criterion) {
);
})
});
let css_rx_config = rp::from_extension(OsStr::new("css")).unwrap();
c.bench_function("Tiny CSS file (regex)", |b| {
b.iter(|| {
let arena = Arena::new();
rp::parse(
&arena,
".foo { color: red; border: 1px solid green; }",
&css_rx_config,
);
})
});
}
criterion_group!(benches, parsing_benchmark);

@ -57,17 +57,6 @@ configure the file extensions, atoms and delimiters.
$ git subtree pull --prefix=vendor/tree-sitter-java git@github.com:tree-sitter/tree-sitter-java.git master
```
## Parsing with Regular Expressions
Difftastic also has a legacy parser that uses regular expressions,
inspired by [Comby](https://github.com/comby-tools/comby).
```
$ DFT_RX=1 difftastic sample_files/before.js sample_files/after.js
```
This parser is less precise and will probably be removed in future.
## Simplified Syntax
Difftastic converts the tree-sitter parse tree to a simplified syntax

@ -211,35 +211,22 @@ fn main() {
let extension = Path::new(&display_path).extension();
let extension = extension.unwrap_or_else(|| OsStr::new(""));
// Try tree-sitter parser first unless DFT_RX (difftastic regex)
// environment variable is set.
let ts_lang = if env::var("DFT_RX").is_ok() {
None
} else {
tsp::from_extension(extension)
};
let ts_lang = tsp::from_extension(extension);
let (lang_name, lhs, rhs) = match ts_lang {
Some(ts_lang) => (
ts_lang.name.into(),
ts_lang.name,
tsp::parse(&arena, &lhs_src, &ts_lang),
tsp::parse(&arena, &rhs_src, &ts_lang),
),
None => match regex_parser::from_extension(extension) {
Some(lang) => (
format!("{} (regex parser)", lang.name),
regex_parser::parse(&arena, &lhs_src, &lang),
regex_parser::parse(&arena, &rhs_src, &lang),
),
None => (
"text".into(),
line_parser::parse(&arena, &lhs_src),
line_parser::parse(&arena, &rhs_src),
),
},
None => (
"text",
line_parser::parse(&arena, &lhs_src),
line_parser::parse(&arena, &rhs_src),
),
};
println!("{}", style::header(&display_path, &lang_name));
println!("{}", style::header(&display_path, lang_name));
init_info(&lhs, &rhs);
mark_syntax(lhs.get(0).copied(), rhs.get(0).copied());

@ -18,7 +18,6 @@ mod intervals;
pub mod line_parser;
pub mod lines;
mod positions;
pub mod regex_parser;
pub mod side_by_side;
pub mod style;
pub mod syntax;

@ -34,8 +34,125 @@ pub fn parse<'a>(arena: &'a Arena<Syntax<'a>>, s: &str) -> Vec<&'a Syntax<'a>> {
#[cfg(test)]
mod tests {
use super::*;
// TODO: move assert_syntaxes to a more relevant file.
use crate::regex_parser::tests::assert_syntaxes;
use Syntax::*;
fn assert_syntaxes<'a>(actual: &[&'a Syntax<'a>], expected: &[&'a Syntax<'a>]) {
if !syntaxes_match(actual, expected) {
dbg!(expected, actual);
assert!(false);
}
}
fn syntaxes_match<'a>(actual: &[&'a Syntax<'a>], expected: &[&'a Syntax<'a>]) -> bool {
if actual.len() != expected.len() {
return false;
} else {
for (lhs_child, rhs_child) in actual.iter().zip(expected.iter()) {
if !syntax_matches(lhs_child, rhs_child) {
return false;
}
}
}
true
}
/// Compare all the fields in a Syntax value, not just
/// those used in its Eq implementation.
fn syntax_matches<'a>(actual: &'a Syntax<'a>, expected: &'a Syntax<'a>) -> bool {
match (actual, expected) {
(
List {
info: lhs_info,
open_position: lhs_open_position,
open_content: lhs_start_content,
children: lhs_children,
close_content: lhs_end_content,
close_position: lhs_close_position,
num_descendants: lhs_num_descendants,
..
},
List {
info: rhs_info,
open_position: rhs_open_position,
open_content: rhs_start_content,
children: rhs_children,
close_content: rhs_end_content,
close_position: rhs_close_position,
num_descendants: rhs_num_descendants,
..
},
) => {
if lhs_info.change.get() != rhs_info.change.get() {
dbg!(lhs_info.change.get(), rhs_info.change.get());
return false;
}
if lhs_open_position != rhs_open_position {
dbg!(lhs_open_position, rhs_open_position);
return false;
}
if lhs_start_content != rhs_start_content {
dbg!(lhs_start_content, rhs_start_content);
return false;
}
if lhs_end_content != rhs_end_content {
dbg!(lhs_end_content, rhs_end_content);
return false;
}
if lhs_close_position != rhs_close_position {
dbg!(lhs_close_position, rhs_close_position);
return false;
}
if lhs_num_descendants != rhs_num_descendants {
dbg!(lhs_num_descendants, rhs_num_descendants);
return false;
}
if !syntaxes_match(lhs_children, rhs_children) {
return false;
}
}
(
Atom {
info: lhs_info,
position: lhs_position,
content: lhs_content,
is_comment: lhs_is_comment,
..
},
Atom {
info: rhs_info,
position: rhs_position,
content: rhs_content,
is_comment: rhs_is_comment,
..
},
) => {
if lhs_info.change.get() != rhs_info.change.get() {
dbg!(lhs_info.change.get(), rhs_info.change.get());
return false;
}
if lhs_position != rhs_position {
dbg!(lhs_position, rhs_position);
return false;
}
if lhs_content != rhs_content {
dbg!(lhs_content, rhs_content);
return false;
}
if lhs_is_comment != rhs_is_comment {
dbg!(lhs_is_comment, rhs_is_comment);
return false;
}
}
_ => {
return false;
}
}
true
}
#[test]
fn test_parse_lines() {

@ -1,919 +0,0 @@
//! Lexes source code and parses delimiters according to a simple
//! regex-based parser.
use std::{borrow::Borrow, ffi::OsStr};
use crate::{lines::NewlinePositions, positions::SingleLineSpan, syntax::Syntax};
use regex::Regex;
use typed_arena::Arena;
pub struct Language {
pub name: String,
atom_patterns: Vec<Regex>,
comment_patterns: Vec<Regex>,
open_delimiter_pattern: Regex,
close_delimiter_pattern: Regex,
}
pub fn from_extension(extension: &OsStr) -> Option<Language> {
match extension.to_string_lossy().borrow() {
"clj" => Some(Language {
name: "Clojure".into(),
atom_patterns: vec![
// Numbers
Regex::new(r"[0-9]+").unwrap(),
// Symbols (e.g. variable names)
Regex::new(r":?[a-zA-Z0-9_>+=-]+").unwrap(),
// Operators
Regex::new(r"[`'~@&]").unwrap(),
// Double-quoted strings
Regex::new(r#""((\\.)|[^"])*""#).unwrap(),
],
comment_patterns: vec![Regex::new(r";.*").unwrap()],
open_delimiter_pattern: Regex::new(r"(\[|\{|\()").unwrap(),
close_delimiter_pattern: Regex::new(r"(\]|\}|\))").unwrap(),
}),
"css" => Some(Language {
name: "CSS".into(),
atom_patterns: vec![
// Numbers
Regex::new(r"[0-9]+").unwrap(),
// Symbols
Regex::new(r"[.a-zA-Z0-9_]+").unwrap(),
// Punctuation
Regex::new(r"[:;]").unwrap(),
// Double-quoted strings
Regex::new(r#""((\\.)|[^"])*""#).unwrap(),
// Single quoted strings
Regex::new(r"'((\\')|[^'])*'").unwrap(),
],
comment_patterns: vec![
// Multi-line comments
Regex::new(r"/\*(?s:.)*?\*/").unwrap(),
],
open_delimiter_pattern: Regex::new(r"(\[|\{|\()").unwrap(),
close_delimiter_pattern: Regex::new(r"(\]|\}|\))").unwrap(),
}),
"el" => Some(Language {
name: "Emacs Lisp".into(),
atom_patterns: vec![
// Numbers
Regex::new(r"[0-9]+").unwrap(),
// Symbols (e.g. variable names)
Regex::new(r"[a-zA-Z0-9_?:/*+=<>-]+").unwrap(),
// Operators
Regex::new(r"[`',#.&@]").unwrap(),
// Double-quoted strings
Regex::new(r#""((\\.)|[^"])*""#).unwrap(),
],
comment_patterns: vec![Regex::new(r";.*").unwrap()],
open_delimiter_pattern: Regex::new(r"(\[|\{|\()").unwrap(),
close_delimiter_pattern: Regex::new(r"(\]|\}|\))").unwrap(),
}),
"go" => Some(Language {
name: "Go".into(),
atom_patterns: vec![
// Numbers
Regex::new(r"[0-9]+").unwrap(),
// Symbols (e.g. variable names)
Regex::new(r"[.a-zA-Z0-9_]+").unwrap(),
// Two character operators
Regex::new(r"(!=|:=|&&|\|\|)").unwrap(),
// Single character operators
Regex::new(r"[.;:,=&!*+-]").unwrap(),
// Double-quoted strings
Regex::new(r#""((\\.)|[^"])*""#).unwrap(),
],
comment_patterns: vec![
// Single line comments
Regex::new("//.*(\n|$)").unwrap(),
// Multi-line comments
Regex::new(r"/\*(?s:.)*?\*/").unwrap(),
],
open_delimiter_pattern: Regex::new(r"(\[|\{|\()").unwrap(),
close_delimiter_pattern: Regex::new(r"(\]|\}|\))").unwrap(),
}),
"js" => Some(Language {
name: "JavaScript".into(),
atom_patterns: vec![
// Numbers
Regex::new(r"[0-9]+").unwrap(),
// Symbols (e.g. variable names)
Regex::new(r"[.a-zA-Z0-9_]+").unwrap(),
// Two character operators
Regex::new(r"(&&|\|\||\+\+|--|\*\*)").unwrap(),
// Single character operators
Regex::new(r"[=<>/*+?:;,-]").unwrap(),
// Double-quoted strings
Regex::new(r#""((\\.)|[^"])*""#).unwrap(),
// Single quoted strings
Regex::new(r"'((\\')|[^'])*'").unwrap(),
// Backtick strings
Regex::new(r"`((\\`)|[^`])*1`").unwrap(),
],
comment_patterns: vec![
// Single line comments
Regex::new("//.*(\n|$)").unwrap(),
// Multi-line comments
Regex::new(r"/\*(?s:.)*?\*/").unwrap(),
],
open_delimiter_pattern: Regex::new(r"(\[|\{|\()").unwrap(),
close_delimiter_pattern: Regex::new(r"(\]|\}|\))").unwrap(),
}),
"json" => Some(Language {
name: "JSON".into(),
atom_patterns: vec![
// Numbers
Regex::new(r"[0-9]+").unwrap(),
// Booleans
Regex::new(r"(true)|(false)|(null)").unwrap(),
// Punctuation
Regex::new(r"[:,]").unwrap(),
// Double-quoted strings
Regex::new(r#""((\\.)|[^"])*""#).unwrap(),
],
comment_patterns: vec![],
open_delimiter_pattern: Regex::new(r"(\[|\{)").unwrap(),
close_delimiter_pattern: Regex::new(r"(\]|\})").unwrap(),
}),
"ml" | "mli" => Some(Language {
name: "OCaml".into(),
atom_patterns: vec![
// Numbers
Regex::new(r"[0-9]+").unwrap(),
// Symbols (e.g. variable names)
Regex::new(r"[.a-zA-Z0-9_]+").unwrap(),
// Two character operators
Regex::new(r"(->|<-|:=|&&|\|\|)").unwrap(),
// Single character operators
Regex::new(r"[?~=<>/*+,&|:;'#!-]").unwrap(),
// Double-quoted strings
Regex::new(r#""((\\.)|[^"])*""#).unwrap(),
],
comment_patterns: vec![
// Multi-line comments
Regex::new(r"\(\*(?s:.)*?\*\)").unwrap(),
],
open_delimiter_pattern: Regex::new(r"(\[|\{|\()").unwrap(),
close_delimiter_pattern: Regex::new(r"(\]|\}|\))").unwrap(),
}),
"rs" => Some(Language {
name: "Rust".into(),
atom_patterns: vec![
// Numbers
Regex::new(r"[0-9]+").unwrap(),
// Single quoted character 'a' or '\n', trying to avoid matching
// lifetimes.
Regex::new(r"'..?'").unwrap(),
// Lifetimes
Regex::new(r"'[a-z_]+").unwrap(),
// Bindings in macros.
Regex::new(r"\$[a-z_]+").unwrap(),
// Symbols (e.g. variable names)
Regex::new(r"[a-zA-Z0-9_]+!?").unwrap(),
// Two character operators
Regex::new(r"(::|&&|\|\||\.\.|=>|<=|>=|==|!=|->)").unwrap(),
// Single character operators
// | is a delimiter for lambdas, but also used in pattern matching.
Regex::new(r"[.&=<>/*+:;,|#!?$-]").unwrap(),
// Double-quoted strings
Regex::new(r#""((\\.)|[^"])*""#).unwrap(),
],
comment_patterns: vec![
// Single line comments
Regex::new("//.*(\n|$)").unwrap(),
// Multi-line comments
Regex::new(r"/\*(?s:.)*?\*/").unwrap(),
],
open_delimiter_pattern: Regex::new(r"(\[|\{|\()").unwrap(),
close_delimiter_pattern: Regex::new(r"(\]|\}|\))").unwrap(),
}),
_ => None,
}
}
/// Parse `s` according to `lang`.
pub fn parse<'a>(arena: &'a Arena<Syntax<'a>>, s: &str, lang: &Language) -> Vec<&'a Syntax<'a>> {
let nl_pos = NewlinePositions::from(s);
parse_from(arena, s, &nl_pos, lang, &mut ParseState::new())
}
enum LexKind {
Comment,
Atom,
OpenDelimiter,
CloseDelimiter,
}
fn parse_from<'a>(
arena: &'a Arena<Syntax<'a>>,
s: &str,
nl_pos: &NewlinePositions,
lang: &Language,
state: &mut ParseState,
) -> Vec<&'a Syntax<'a>> {
let mut result: Vec<&'a Syntax<'a>> = vec![];
while state.str_i < s.len() {
let mut current_match: Option<(LexKind, regex::Match)> = None;
for pattern in &lang.comment_patterns {
if let Some(m) = pattern.find(&s[state.str_i..]) {
match current_match {
Some((_, prev_m)) if prev_m.start() <= m.start() => {}
_ => {
current_match = Some((LexKind::Comment, m));
}
}
}
}
for pattern in &lang.atom_patterns {
if let Some(m) = pattern.find(&s[state.str_i..]) {
match current_match {
Some((_, prev_m)) if prev_m.start() <= m.start() => {}
_ => {
current_match = Some((LexKind::Atom, m));
}
}
}
}
if let Some(m) = lang.open_delimiter_pattern.find(&s[state.str_i..]) {
match current_match {
Some((_, prev_m)) if prev_m.start() <= m.start() => {}
_ => {
current_match = Some((LexKind::OpenDelimiter, m));
}
}
};
if let Some(m) = lang.close_delimiter_pattern.find(&s[state.str_i..]) {
match current_match {
Some((_, prev_m)) if prev_m.start() <= m.start() => {}
_ => {
current_match = Some((LexKind::CloseDelimiter, m));
}
}
};
match current_match {
Some((match_kind, m)) => match match_kind {
LexKind::Comment => {
let atom = Syntax::new_comment(
arena,
nl_pos.from_offsets(state.str_i + m.start(), state.str_i + m.end()),
m.as_str(),
);
result.push(atom);
state.str_i += m.end();
}
LexKind::Atom => {
let atom = Syntax::new_atom(
arena,
nl_pos.from_offsets(state.str_i + m.start(), state.str_i + m.end()),
m.as_str(),
);
result.push(atom);
state.str_i += m.end();
}
LexKind::OpenDelimiter => {
let start = state.str_i;
state.str_i += m.end();
let children = parse_from(arena, s, nl_pos, lang, state);
let (close_brace, close_pos) = state.close_brace.take().unwrap_or((
"UNCLOSED".into(),
nl_pos.from_offsets(state.str_i, state.str_i + 1),
));
let open_pos = nl_pos.from_offsets(start + m.start(), start + m.end());
let items = Syntax::new_list(
arena,
m.as_str(),
open_pos,
children,
&close_brace,
close_pos,
);
result.push(items);
}
LexKind::CloseDelimiter => {
state.close_brace = Some((
m.as_str().into(),
nl_pos.from_offsets(state.str_i + m.start(), state.str_i + m.end()),
));
state.str_i += m.end();
return result;
}
},
None => break,
};
}
result
}
#[derive(Debug, Clone)]
struct ParseState {
str_i: usize,
close_brace: Option<(String, Vec<SingleLineSpan>)>,
}
impl ParseState {
fn new() -> Self {
Self {
str_i: 0,
close_brace: None,
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::syntax::Syntax::*;
fn lang() -> Language {
from_extension(&OsStr::new("js")).unwrap()
}
pub fn assert_syntaxes<'a>(actual: &[&'a Syntax<'a>], expected: &[&'a Syntax<'a>]) {
if !syntaxes_match(actual, expected) {
dbg!(expected, actual);
assert!(false);
}
}
fn syntaxes_match<'a>(actual: &[&'a Syntax<'a>], expected: &[&'a Syntax<'a>]) -> bool {
if actual.len() != expected.len() {
return false;
} else {
for (lhs_child, rhs_child) in actual.iter().zip(expected.iter()) {
if !syntax_matches(lhs_child, rhs_child) {
return false;
}
}
}
true
}
/// Compare all the fields in a Syntax value, not just
/// those used in its Eq implementation.
fn syntax_matches<'a>(actual: &'a Syntax<'a>, expected: &'a Syntax<'a>) -> bool {
match (actual, expected) {
(
List {
info: lhs_info,
open_position: lhs_open_position,
open_content: lhs_start_content,
children: lhs_children,
close_content: lhs_end_content,
close_position: lhs_close_position,
num_descendants: lhs_num_descendants,
..
},
List {
info: rhs_info,
open_position: rhs_open_position,
open_content: rhs_start_content,
children: rhs_children,
close_content: rhs_end_content,
close_position: rhs_close_position,
num_descendants: rhs_num_descendants,
..
},
) => {
if lhs_info.change.get() != rhs_info.change.get() {
dbg!(lhs_info.change.get(), rhs_info.change.get());
return false;
}
if lhs_open_position != rhs_open_position {
dbg!(lhs_open_position, rhs_open_position);
return false;
}
if lhs_start_content != rhs_start_content {
dbg!(lhs_start_content, rhs_start_content);
return false;
}
if lhs_end_content != rhs_end_content {
dbg!(lhs_end_content, rhs_end_content);
return false;
}
if lhs_close_position != rhs_close_position {
dbg!(lhs_close_position, rhs_close_position);
return false;
}
if lhs_num_descendants != rhs_num_descendants {
dbg!(lhs_num_descendants, rhs_num_descendants);
return false;
}
if !syntaxes_match(lhs_children, rhs_children) {
return false;
}
}
(
Atom {
info: lhs_info,
position: lhs_position,
content: lhs_content,
is_comment: lhs_is_comment,
..
},
Atom {
info: rhs_info,
position: rhs_position,
content: rhs_content,
is_comment: rhs_is_comment,
..
},
) => {
if lhs_info.change.get() != rhs_info.change.get() {
dbg!(lhs_info.change.get(), rhs_info.change.get());
return false;
}
if lhs_position != rhs_position {
dbg!(lhs_position, rhs_position);
return false;
}
if lhs_content != rhs_content {
dbg!(lhs_content, rhs_content);
return false;
}
if lhs_is_comment != rhs_is_comment {
dbg!(lhs_is_comment, rhs_is_comment);
return false;
}
}
_ => {
return false;
}
}
true
}
#[test]
fn test_parse_integer() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "123", &lang()),
&[Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3,
}],
"123",
)],
);
}
#[test]
fn test_parse_empty_string() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "\"\"", &lang()),
&[Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 2,
}],
"\"\"",
)],
);
}
#[test]
fn test_parse_string_escaped_doublequote() {
let arena = Arena::new();
assert_syntaxes(
// "\""
&parse(&arena, "\"\\\"\"", &lang()),
&[Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 4,
}],
"\"\\\"\"",
)],
);
}
#[test]
fn test_parse_string_escaped_backlash() {
let arena = Arena::new();
assert_syntaxes(
// "\\"
&parse(&arena, "\"\\\\\"", &lang()),
&[Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 4,
}],
"\"\\\\\"",
)],
);
}
#[test]
fn test_parse_multiline_string() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "\"\n\"", &lang()),
&[Syntax::new_atom(
&arena,
vec![
SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 1,
},
SingleLineSpan {
line: 1.into(),
start_col: 0,
end_col: 1,
},
],
"\"\n\"",
)],
);
}
#[test]
fn test_parse_string_escaped_backlash_and_second_string() {
let arena = Arena::new();
assert_syntaxes(
// "\\" "a"
&parse(&arena, "\"\\\\\" \"a\"", &lang()),
&[
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 4,
}],
"\"\\\\\"",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 5,
end_col: 8,
}],
"\"a\"",
),
],
);
}
#[test]
fn test_parse_multiple() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "123 456", &lang()),
&[
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3,
}],
"123",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 4,
end_col: 7,
}],
"456",
),
],
);
}
#[test]
fn test_parse_symbol() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, ".foo", &lang()),
&[Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 4,
}],
".foo",
)],
);
}
#[test]
fn test_parse_integer_with_whitespace() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, " 123 ", &lang()),
&[Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 1,
end_col: 4,
}],
"123",
)],
);
}
#[test]
fn test_parse_string() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "\"abc\"", &lang()),
&[Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 5,
}],
"\"abc\"",
)],
);
}
#[test]
fn test_parse_comment() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "// foo\nx", &lang()),
&[
Syntax::new_comment(
&arena,
vec![
SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 6,
},
SingleLineSpan {
line: 1.into(),
start_col: 0,
end_col: 0,
},
],
"// foo\n",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 1.into(),
start_col: 0,
end_col: 1,
}],
"x",
),
],
);
}
#[test]
fn test_parse_multiline_comment() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "/* foo\nbar */", &lang()),
&[Syntax::new_comment(
&arena,
vec![
SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 6,
},
SingleLineSpan {
line: 1.into(),
start_col: 0,
end_col: 6,
},
],
"/* foo\nbar */",
)],
);
}
#[test]
fn test_parse_list() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "[ 123 ]", &lang()),
&[Syntax::new_list(
&arena,
"[",
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 1,
}],
vec![Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 2,
end_col: 5,
}],
"123",
)],
"]",
vec![SingleLineSpan {
line: 0.into(),
start_col: 6,
end_col: 7,
}],
)],
);
}
#[test]
fn test_parse_empty_list() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "[]", &lang()),
&[Syntax::new_list(
&arena,
"[",
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 1,
}],
vec![],
"]",
vec![SingleLineSpan {
line: 0.into(),
start_col: 1,
end_col: 2,
}],
)],
);
}
#[test]
fn test_parse_parens() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "()", &lang()),
&[Syntax::new_list(
&arena,
"(",
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 1,
}],
vec![],
")",
vec![SingleLineSpan {
line: 0.into(),
start_col: 1,
end_col: 2,
}],
)],
);
}
#[test]
fn test_parse_list_with_commas() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "[123, 456]", &lang()),
&[Syntax::new_list(
&arena,
"[",
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 1,
}],
vec![
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 1,
end_col: 4,
}],
"123",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 4,
end_col: 5,
}],
",",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 6,
end_col: 9,
}],
"456",
),
],
"]",
vec![SingleLineSpan {
line: 0.into(),
start_col: 9,
end_col: 10,
}],
)],
);
}
#[test]
fn test_parse_object() {
let arena = Arena::new();
assert_syntaxes(
&parse(&arena, "{x: 1}", &lang()),
&[Syntax::new_list(
&arena,
"{",
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 1,
}],
vec![
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 1,
end_col: 2,
}],
"x",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 2,
end_col: 3,
}],
":",
),
Syntax::new_atom(
&arena,
vec![SingleLineSpan {
line: 0.into(),
start_col: 4,
end_col: 5,
}],
"1",
),
],
"}",
vec![SingleLineSpan {
line: 0.into(),
start_col: 5,
end_col: 6,
}],
)],
);
}
}