pull/926/merge
Peter Travers 2025-12-08 13:53:59 +07:00 committed by GitHub
commit f449937fdb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 211 additions and 7 deletions

@ -92,6 +92,23 @@ const OCAML_ATOM_NODES: [&str; 6] = [
"attribute_id",
];
const TEMPLATE_STRING_CSS_INJECTION_QUERY: &str = r#"
(_
; Capture the 'callee' or 'left' side.
; We accept any named node here to act as the anchor.
(_) @callee
; Capture the template string contents.
(template_string) @contents
; Predicate: The 'callee' text must start with 'styled' or 'css'.
; This matches "styled.div", "styled(C)", and even "styled.div<{}>"
; (which might be parsed as a binary expression 'styled.div < {}' or
; a call expression depending on the grammar version).
(#match? @callee "^(styled|css)")
)
"#;
pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
use guess::Language::*;
match language {
@ -592,7 +609,10 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
],
highlight_query: ts::Query::new(&language, tree_sitter_javascript::HIGHLIGHT_QUERY)
.unwrap(),
sub_languages: vec![],
sub_languages: vec![TreeSitterSubLanguage {
query: ts::Query::new(&language, TEMPLATE_STRING_CSS_INJECTION_QUERY).unwrap(),
parse_as: Css,
}],
}
}
Json => {
@ -1057,7 +1077,10 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
atom_nodes: ["string", "template_string"].into_iter().collect(),
delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")],
highlight_query: ts::Query::new(&language, &highlight_query).unwrap(),
sub_languages: vec![],
sub_languages: vec![TreeSitterSubLanguage {
query: ts::Query::new(&language, TEMPLATE_STRING_CSS_INJECTION_QUERY).unwrap(),
parse_as: Css,
}],
}
}
TypeScript => {
@ -1074,7 +1097,10 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
.collect(),
delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")],
highlight_query: ts::Query::new(&language, &highlight_query).unwrap(),
sub_languages: vec![],
sub_languages: vec![TreeSitterSubLanguage {
query: ts::Query::new(&language, TEMPLATE_STRING_CSS_INJECTION_QUERY).unwrap(),
parse_as: Css,
}],
}
}
Xml => {
@ -1194,6 +1220,7 @@ pub(crate) fn parse_subtrees(
src: &str,
config: &TreeSitterConfig,
tree: &tree_sitter::Tree,
nl_pos: &LinePositions,
) -> DftHashMap<usize, (tree_sitter::Tree, TreeSitterConfig, HighlightedNodeIds)> {
let mut subtrees = DftHashMap::default();
@ -1202,8 +1229,50 @@ pub(crate) fn parse_subtrees(
let mut query_matches =
query_cursor.matches(&language.query, tree.root_node(), src.as_bytes());
let content_idx = language
.query
.capture_index_for_name("contents")
.unwrap_or(0);
while let Some(m) = query_matches.next() {
let node = m.nodes_for_capture_index(0).next().unwrap();
let node = match m.nodes_for_capture_index(content_idx).next() {
None => continue,
Some(node) => node,
};
let mut range = node.range();
match (language.parse_as, node.grammar_name()) {
(guess::Language::Css, "template_string") => {
// If this is a template string (starts/ends with backtick), shrink the range.
// We check the text to be safe, or just assume based on the node kind.
let node_text = &src[node.start_byte()..node.end_byte()];
if node_text.starts_with('`')
&& node_text.ends_with('`')
&& node_text.len() >= 2
{
range.start_byte += 1;
range.end_byte -= 1;
// We also need to update start_point and end_point for Tree-sitter to be happy.
// Use line positions to calculate the exact row/col for the new byte offsets.
// this handles cases where the backtick is followed by a newline or
// other complex formatting.
let start_span = nl_pos.from_region(range.start_byte, range.start_byte)[0];
let end_span = nl_pos.from_region(range.end_byte, range.end_byte)[0];
range.start_point = ts::Point {
row: start_span.line.0 as usize,
column: start_span.start_col as usize,
};
range.end_point = ts::Point {
row: end_span.line.0 as usize,
column: end_span.start_col as usize,
};
}
}
_ => {}
};
if node.byte_range().is_empty() {
continue;
}
@ -1214,7 +1283,7 @@ pub(crate) fn parse_subtrees(
.set_language(&subconfig.language)
.expect("Incompatible tree-sitter version");
parser
.set_included_ranges(&[node.range()])
.set_included_ranges(&[range])
.expect("Incompatible tree-sitter version");
let tree = parser.parse(src, None).unwrap();
@ -1429,11 +1498,13 @@ pub(crate) fn to_syntax<'a>(
let highlights = tree_highlights(tree, src, config);
// Use line numbers to handle sub-language ranges correctly.
let nl_pos = LinePositions::from(src);
// Parse sub-languages, if any, which will be used both for
// highlighting and for more precise Syntax nodes where applicable.
let subtrees = parse_subtrees(src, config, tree);
let subtrees = parse_subtrees(src, config, tree, &nl_pos);
let nl_pos = LinePositions::from(src);
let mut cursor = tree.walk();
let mut error_count: usize = 0;
@ -1837,6 +1908,8 @@ fn atom_from_cursor<'a>(
#[cfg(test)]
mod tests {
use std::collections::VecDeque;
use strum::IntoEnumIterator as _;
use super::*;
@ -1884,6 +1957,137 @@ mod tests {
};
}
fn assert_contains_atoms<'a>(nodes: &[&'a Syntax<'a>], expected_sequence: Vec<Vec<&str>>) {
let mut to_search = VecDeque::from(nodes.to_vec());
let mut to_check = VecDeque::from(expected_sequence);
while let Some(node) = to_search.pop_front() {
if let Some(expected) = to_check.front() {
match node {
Syntax::List { children, .. } => {
// Extract just the normal atoms from this list to see if they match the line
let atom_texts: Vec<&str> = children
.iter()
.filter_map(|child| match child {
Syntax::Atom { content, .. } => Some(content.as_str()),
_ => None,
})
.collect();
// If this list matches the current expected line, advance expectation
if !atom_texts.is_empty() && atom_texts == *expected {
to_check.pop_front();
}
// Continue searching children in order (DFS)
for child in children.iter().rev() {
to_search.push_front(child);
}
}
_ => {}
}
} else {
// All expectations met
return;
}
}
if !to_check.is_empty() {
panic!(
"Could not find all atom sequences. \nMissing: {:?}\nDebug Tree:\n{}",
to_check,
nodes
.to_vec()
.iter()
.map(|node| node.dbg_content())
.collect::<Vec<String>>()
.join("\n")
);
}
}
#[test]
fn test_typescript_css_injection_table() {
let arena = Arena::new();
let configs = vec![
from_language(guess::Language::TypeScript),
from_language(guess::Language::TypeScriptTsx),
from_language(guess::Language::JavaScript),
from_language(guess::Language::JavascriptJsx),
];
let cases = vec![
// Case 1: Standard styled.button
(
r#"
const Button = styled.button`
background: #BF4F74;
border-radius: 3px;
border: none;
color: white;
`
"#,
vec![
vec!["background", ":", "#BF4F74", ";"],
vec!["border-radius", ":", "3px", ";"],
vec!["border", ":", "none", ";"],
vec!["color", ":", "white", ";"],
],
),
// Case 2: Component wrapping styled(C)
(
r#"
const Button = styled(OtherButton)`
color: white;
`
"#,
vec![vec!["color", ":", "white", ";"]],
),
// Case 3: Helper css`...`
(
r#"
const Button = css`
color: white;
`
"#,
vec![vec!["color", ":", "white", ";"]],
),
// Case 4: Nested Interpolation
(
r#"
const Button = styled.button`
color: white;
${props => props.$withSeparator && css`padding-top: 22px;`}
`
"#,
vec![vec!["color", ":", "white", ";"]],
),
// Case 5: Generics
(
r#"
export const Button = styled.button<{}>` color: white; `;
"#,
vec![vec!["color", ":", "white", ";"]],
),
// Case 6: Multiline Edge Case
(
r#"
const Button = styled.button`
color: green;
`;
"#,
vec![vec!["color", ":", "green", ";"]],
),
];
for config in configs {
for (src, expected_atoms) in cases.iter() {
let nodes = parse(&arena, src, &config, false);
assert_contains_atoms(&nodes, expected_atoms.clone());
}
}
}
/// Ensure that we don't crash when loading any of the
/// configs. This can happen on bad highlighting/foo.scm files.
#[test]