Represent byte limit and parse error limit as Result return types

pull/492/head
Wilfred Hughes 2023-02-24 08:38:15 +07:00
parent f7f3e18fbf
commit e99b2ce27c
2 changed files with 176 additions and 130 deletions

@ -368,47 +368,27 @@ fn diff_file_content(
let mut language_used = None;
let (lang_name, lhs_positions, rhs_positions) = match lang_config {
_ if lhs_bytes.len() > diff_options.byte_limit
|| rhs_bytes.len() > diff_options.byte_limit =>
{
let num_bytes = std::cmp::max(lhs_bytes.len(), rhs_bytes.len());
None => {
let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
(
Some(format!(
"Text ({} exceeded DFT_BYTE_LIMIT)",
&format_num_bytes(num_bytes),
)),
lhs_positions,
rhs_positions,
)
(None, lhs_positions, rhs_positions)
}
Some(ts_lang) => {
let lhs_tree = tsp::to_tree(&lhs_src, &ts_lang);
let rhs_tree = tsp::to_tree(&rhs_src, &ts_lang);
let arena = Arena::new();
let (lhs, lhs_err_count) = tsp::to_syntax(
&lhs_tree,
match tsp::to_tree_with_limit(diff_options, &ts_lang, &lhs_src, &rhs_src) {
Ok((lhs_tree, rhs_tree)) => {
match tsp::to_syntax_with_limit(
&lhs_src,
&arena,
&ts_lang,
diff_options.ignore_comments,
);
let (rhs, rhs_err_count) = tsp::to_syntax(
&rhs_tree,
&rhs_src,
&lhs_tree,
&rhs_tree,
&arena,
&ts_lang,
diff_options.ignore_comments,
);
init_all_info(&lhs, &rhs);
diff_options,
) {
Ok((lhs, rhs)) => {
if diff_options.check_only {
let lang_name = language.map(|l| language_name(l).into());
// TODO: respect syntax limit.
let has_syntactic_changes = lhs != rhs;
language_used = language;
@ -455,35 +435,21 @@ fn diff_file_content(
}
if exceeded_graph_limit {
let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
let lhs_positions =
line_parser::change_positions(&lhs_src, &rhs_src);
let rhs_positions =
line_parser::change_positions(&rhs_src, &lhs_src);
(
Some("Text (exceeded DFT_GRAPH_LIMIT)".into()),
lhs_positions,
rhs_positions,
)
} else if lhs_err_count + rhs_err_count > diff_options.parse_error_limit {
// TODO: doing a syntactic diff here is wasteful.
let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
(
Some(format!(
"Text ({} error{}, exceeded DFT_PARSE_ERROR_LIMIT)",
(lhs_err_count + rhs_err_count),
if (lhs_err_count + rhs_err_count) == 1 {
""
} else {
"s"
}
)),
lhs_positions,
rhs_positions,
)
} else {
language_used = language;
// TODO: Make this .expect() unnecessary.
let language =
language.expect("If we had a ts_lang, we must have guessed the language");
let language = language.expect(
"If we had a ts_lang, we must have guessed the language",
);
fix_all_sliders(language, &lhs, &mut change_map);
fix_all_sliders(language, &rhs, &mut change_map);
@ -491,10 +457,12 @@ fn diff_file_content(
let mut rhs_positions = syntax::change_positions(&rhs, &change_map);
if diff_options.ignore_comments {
let lhs_comments = tsp::comment_positions(&lhs_tree, &lhs_src, &ts_lang);
let lhs_comments =
tsp::comment_positions(&lhs_tree, &lhs_src, &ts_lang);
lhs_positions.extend(lhs_comments);
let rhs_comments = tsp::comment_positions(&rhs_tree, &rhs_src, &ts_lang);
let rhs_comments =
tsp::comment_positions(&rhs_tree, &rhs_src, &ts_lang);
rhs_positions.extend(rhs_comments);
}
@ -505,10 +473,34 @@ fn diff_file_content(
)
}
}
None => {
Err(tsp::ExceededParseErrorLimit(error_count)) => {
let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
(None, lhs_positions, rhs_positions)
(
Some(format!(
"Text ({} error{}, exceeded DFT_PARSE_ERROR_LIMIT)",
error_count,
if error_count == 1 { "" } else { "s" }
)),
lhs_positions,
rhs_positions,
)
}
}
}
Err(tsp::ExceededByteLimit(num_bytes)) => {
let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
(
Some(format!(
"Text ({} exceeded DFT_BYTE_LIMIT)",
&format_num_bytes(num_bytes),
)),
lhs_positions,
rhs_positions,
)
}
}
}
};

@ -3,6 +3,7 @@
use std::collections::HashMap;
use std::collections::HashSet;
use crate::options::DiffOptions;
use crate::parse::guess_language as guess;
use tree_sitter as ts;
use typed_arena::Arena;
@ -978,6 +979,23 @@ pub fn to_tree(src: &str, config: &TreeSitterConfig) -> tree_sitter::Tree {
parser.parse(src, None).unwrap()
}
#[derive(Debug)]
pub struct ExceededByteLimit(pub usize);
pub fn to_tree_with_limit(
diff_options: &DiffOptions,
config: &TreeSitterConfig,
lhs_src: &str,
rhs_src: &str,
) -> Result<(tree_sitter::Tree, tree_sitter::Tree), ExceededByteLimit> {
if lhs_src.len() > diff_options.byte_limit || rhs_src.len() > diff_options.byte_limit {
let num_bytes = std::cmp::max(lhs_src.len(), rhs_src.len());
return Err(ExceededByteLimit(num_bytes));
}
Ok((to_tree(lhs_src, config), to_tree(rhs_src, config)))
}
/// Find any nodes that can be parsed as other languages (e.g. JavaScript embedded in HTML),
/// and return a map of their node IDs mapped to parsed trees. Every time we see such a node,
/// we will ignore it and recurse into the root node of the given tree instead.
@ -1163,6 +1181,42 @@ pub fn comment_positions(
.collect()
}
#[derive(Debug)]
pub struct ExceededParseErrorLimit(pub usize);
pub fn to_syntax_with_limit<'a>(
lhs_src: &str,
rhs_src: &str,
lhs_tree: &tree_sitter::Tree,
rhs_tree: &tree_sitter::Tree,
arena: &'a Arena<Syntax<'a>>,
config: &TreeSitterConfig,
diff_options: &DiffOptions,
) -> Result<(Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>), ExceededParseErrorLimit> {
let (lhs_nodes, lhs_error_count) = to_syntax(
lhs_tree,
lhs_src,
arena,
config,
diff_options.ignore_comments,
);
let (rhs_nodes, rhs_error_count) = to_syntax(
rhs_tree,
rhs_src,
arena,
config,
diff_options.ignore_comments,
);
syntax::init_all_info(&lhs_nodes, &rhs_nodes);
let error_count = lhs_error_count + rhs_error_count;
if error_count > diff_options.parse_error_limit {
return Err(ExceededParseErrorLimit(error_count));
}
Ok((lhs_nodes, rhs_nodes))
}
pub fn to_syntax<'a>(
tree: &tree_sitter::Tree,
src: &str,