Add C++ support

edge_only_predecessors
Wilfred Hughes 2021-09-17 22:37:41 +07:00
parent 3068ad64a9
commit 6c08507b0d
6 changed files with 30 additions and 2 deletions

@ -4,6 +4,9 @@
Added a C parser.
Added a C++ parser. Difftastic prefers the C++ parser for `.h`
files. Please file a bug if you see issues.
## 0.9
### Parsing

@ -17,6 +17,7 @@ See [the manual](http://difftastic.wilfred.me.uk/) to get started.
Difftastic supports the following languages:
* C
* C++
* Clojure
* CSS
* Elixir

@ -37,6 +37,11 @@ fn main() {
// TODO: build these in parallel.
build("tree-sitter-c", "vendor/tree-sitter-c-src", &[]);
build(
"tree-sitter-cpp",
"vendor/tree-sitter-cpp-src",
&["scanner.cc"],
);
build("tree-sitter-clojure", "vendor/tree-sitter-clojure-src", &[]);
build(
"tree-sitter-css",

@ -1,9 +1,10 @@
# Introduction
Difftastic is a diff tool that understands syntax. It currently
supports 15 languages:
supports 16 languages:
* C
* C++
* Clojure
* CSS
* Elixir

@ -27,6 +27,7 @@ pub struct TreeSitterConfig {
extern "C" {
fn tree_sitter_c() -> Language;
fn tree_sitter_clojure() -> Language;
fn tree_sitter_cpp() -> Language;
fn tree_sitter_css() -> Language;
fn tree_sitter_elisp() -> Language;
fn tree_sitter_elixir() -> Language;
@ -46,7 +47,7 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
// TODO: find a nice way to extract name and extension information
// from the package.json in these parsers.
match extension.to_string_lossy().borrow() {
"c" | "h" => Some(TreeSitterConfig {
"c" => Some(TreeSitterConfig {
name: "C",
language: unsafe { tree_sitter_c() },
atom_nodes: (vec!["string_literal", "char_literal"])
@ -55,6 +56,22 @@ pub fn from_extension(extension: &OsStr) -> Option<TreeSitterConfig> {
// TODO: Handle array_declarator where [ is the second token.
open_delimiter_tokens: (vec!["(", "{"]).into_iter().collect(),
}),
// Treat .h as C++ rather than C. This is an arbitrary choice,
// but C++ is more widely used than C according to
// https://madnight.github.io/githut/
//
// TODO: allow users to override the association between
// extensions and parses.
"cc" | "cpp" | "h" | "hpp" => Some(TreeSitterConfig {
name: "C++",
language: unsafe { tree_sitter_cpp() },
// The C++ grammar extends the C grammar, so the node
// names are generally the same.
atom_nodes: (vec!["string_literal", "char_literal"])
.into_iter()
.collect(),
open_delimiter_tokens: (vec!["(", "{"]).into_iter().collect(),
}),
"bb" | "boot" | "clj" | "cljc" | "clje" | "cljs" | "cljx" | "edn" | "joke" | "joker" => {
Some(TreeSitterConfig {
name: "Clojure",

@ -0,0 +1 @@
tree-sitter-cpp/src