From b85f996ff01e5f9ec9c1523606a7b75b0d7b677e Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 22 Apr 2022 10:14:19 +0800 Subject: [PATCH] feat: Add perl support Signed-off-by: Xuanwo --- build.rs | 10 +++++- sample_files/compare.expected | 3 ++ sample_files/perl_after.pl | 38 ++++++++++++++++++++++ sample_files/perl_before.pl | 25 ++++++++++++++ src/guess_language.rs | 4 +++ src/sliders.rs | 4 +-- src/tree_sitter_parser.rs | 11 +++++++ vendor/tree-sitter-perl-src | 1 + vendor/tree-sitter-perl/examples/import.pl | 2 +- 9 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 sample_files/perl_after.pl create mode 100644 sample_files/perl_before.pl create mode 120000 vendor/tree-sitter-perl-src diff --git a/build.rs b/build.rs index ba97f1aec..9f9325800 100644 --- a/build.rs +++ b/build.rs @@ -36,7 +36,10 @@ impl TreeSitterParser { .cpp(true) .flag_if_supported("-Wno-implicit-fallthrough") .flag_if_supported("-Wno-unused-parameter") - .flag_if_supported("-Wno-ignored-qualifiers"); + .flag_if_supported("-Wno-ignored-qualifiers") + // Workaround for: https://github.com/ganezdragon/tree-sitter-perl/issues/16 + // should be removed after fixed. + .flag_if_supported("-Wno-return-type"); if cfg!(windows) { cpp_build.flag("/std:c++14"); @@ -174,6 +177,11 @@ fn main() { src_dir: "vendor/tree-sitter-php-src", extra_files: vec!["scanner.cc"], }, + TreeSitterParser { + name: "tree-sitter-perl", + src_dir: "vendor/tree-sitter-perl-src", + extra_files: vec!["scanner.cc"], + }, TreeSitterParser { name: "tree-sitter-python", src_dir: "vendor/tree-sitter-python-src", diff --git a/sample_files/compare.expected b/sample_files/compare.expected index e3e3d1f17..ba2953058 100644 --- a/sample_files/compare.expected +++ b/sample_files/compare.expected @@ -106,6 +106,9 @@ sample_files/ocaml_before.ml sample_files/ocaml_after.ml sample_files/outer_delimiter_before.el sample_files/outer_delimiter_after.el 73130b8572a4f17fa6cf828f74e226ce - +sample_files/perl_before.pl sample_files/perl_after.pl +9f4da9c44da143c236b4e0899d199f0d - + sample_files/preprocesor_before.h sample_files/preprocesor_after.h 3e4331cb935cbe735a79ebc43786cd3a - diff --git a/sample_files/perl_after.pl b/sample_files/perl_after.pl new file mode 100644 index 000000000..227a5d6cf --- /dev/null +++ b/sample_files/perl_after.pl @@ -0,0 +1,38 @@ +use strict 'refs'; +use warnings; +use if $] < 5.008, "utf8"; +use if WANT_WARNINGS, warnings => qw(all); + +use constant PI => 4 * atan2(1, 1); +use constant DEBUG , 0; # comma (,) is also used instead of '=>' + +print "Pi equals ", PI, "...\n"; + +use constant { + SEC => 0, + MIN => 1, + HOUR => 2, + MDAY => 3, + MON => 4, + YEAR => 5, + WDAY => 6, + YDAY => 7, + ISDST => 10, +}; + +my $setting = { + open => 1, + close => 2, + run => 3, + awesome => 'yes', +}; +my %final; +foreach my $key (woof()) { + $final{IRONMAN}{$key} = $setting->{$key}; + + print Dumper \%final; +} + +use constant WEEKDAYS => qw( + Sunday Monday Tuesday Wednesday Thursday Friday Saturday +); diff --git a/sample_files/perl_before.pl b/sample_files/perl_before.pl new file mode 100644 index 000000000..701f63875 --- /dev/null +++ b/sample_files/perl_before.pl @@ -0,0 +1,25 @@ +use strict 'refs'; +use warnings; +use if $] < 5.008, "utf8"; +use if WANT_WARNINGS, warnings => qw(all); + +use constant PI => 4 * atan2(1, 1); +use constant DEBUG , 0; # comma (,) is also used instead of '=>' + +print "Pi equals ", PI, "...\n"; + +use constant { + SEC => 0, + MIN => 1, + HOUR => 2, + MDAY => 3, + MON => 4, + YEAR => 5, + WDAY => 6, + YDAY => 7, + ISDST => 8, +}; + +use constant WEEKDAYS => qw( + Sunday Monday Tuesday Wednesday Thursday Friday Saturday +); diff --git a/src/guess_language.rs b/src/guess_language.rs index 4d0503a8f..aec481d82 100644 --- a/src/guess_language.rs +++ b/src/guess_language.rs @@ -43,6 +43,7 @@ pub enum Language { OCaml, OCamlInterface, Php, + Perl, Python, Ruby, Rust, @@ -112,6 +113,7 @@ fn from_emacs_mode_header(src: &str) -> Option { "java" => Some(Java), "js" | "js2" => Some(JavaScript), "lisp" => Some(CommonLisp), + "perl" => Some(Perl), "python" => Some(Python), "rjsx" => Some(Jsx), "ruby" => Some(Ruby), @@ -153,6 +155,7 @@ fn from_shebang(src: &str) -> Option { "chakra" | "d8" | "gjs" | "js" | "node" | "nodejs" | "qjs" | "rhino" | "v8" | "v8-shell" => return Some(JavaScript), "ocaml" | "ocamlrun" | "ocamlscript" => return Some(OCaml), + "perl" => return Some(Perl), "python" | "python2" | "python3" => return Some(Python), "ruby" | "macruby" | "rake" | "jruby" | "rbx" => return Some(Ruby), "deno" | "ts-node" => return Some(TypeScript), @@ -220,6 +223,7 @@ pub fn from_extension(extension: &OsStr) -> Option { "ml" => Some(OCaml), "mli" => Some(OCamlInterface), "php" => Some(Php), + "pm" | "pl" => Some(Perl), "py" | "py3" | "pyi" | "bzl" => Some(Python), "rb" | "builder" | "spec" | "rake" => Some(Ruby), "rs" => Some(Rust), diff --git a/src/sliders.rs b/src/sliders.rs index 8b600a61d..8df0bd94c 100644 --- a/src/sliders.rs +++ b/src/sliders.rs @@ -61,8 +61,8 @@ fn prefer_outer_delimiter(language: guess_language::Language) -> bool { // languages have syntax like `foo(bar)` or `foo[bar]` where // the inner delimiter is more relevant. Bash | C | CPlusPlus | CSharp | Css | Dart | Elixir | Elm | Gleam | Go | Haskell | Java - | JavaScript | Jsx | Kotlin | Lua | Nix | OCaml | OCamlInterface | Php | Python | Ruby - | Rust | Scala | Tsx | TypeScript | Yaml | Zig => false, + | JavaScript | Jsx | Kotlin | Lua | Nix | OCaml | OCamlInterface | Perl | Php | Python + | Ruby | Rust | Scala | Tsx | TypeScript | Yaml | Zig => false, } } diff --git a/src/tree_sitter_parser.rs b/src/tree_sitter_parser.rs index 897b3850d..5a2ab07d5 100644 --- a/src/tree_sitter_parser.rs +++ b/src/tree_sitter_parser.rs @@ -67,6 +67,7 @@ extern "C" { fn tree_sitter_ocaml() -> ts::Language; fn tree_sitter_ocaml_interface() -> ts::Language; fn tree_sitter_php() -> ts::Language; + fn tree_sitter_perl() -> ts::Language; fn tree_sitter_python() -> ts::Language; fn tree_sitter_ruby() -> ts::Language; fn tree_sitter_rust() -> ts::Language; @@ -473,6 +474,16 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { .unwrap(), } } + Perl => { + let language = unsafe { tree_sitter_perl() }; + TreeSitterConfig { + name: "Perl", + language, + atom_nodes: vec!["string"].into_iter().collect(), + delimiter_tokens: vec![("(", ")"), ("{", "}"), ("[", "]")], + highlight_query: ts::Query::new(language, "").unwrap(), + } + } Python => { let language = unsafe { tree_sitter_python() }; TreeSitterConfig { diff --git a/vendor/tree-sitter-perl-src b/vendor/tree-sitter-perl-src new file mode 120000 index 000000000..9c9840a68 --- /dev/null +++ b/vendor/tree-sitter-perl-src @@ -0,0 +1 @@ +tree-sitter-perl/src \ No newline at end of file diff --git a/vendor/tree-sitter-perl/examples/import.pl b/vendor/tree-sitter-perl/examples/import.pl index 14c1a7651..701f63875 100644 --- a/vendor/tree-sitter-perl/examples/import.pl +++ b/vendor/tree-sitter-perl/examples/import.pl @@ -22,4 +22,4 @@ use constant { use constant WEEKDAYS => qw( Sunday Monday Tuesday Wednesday Thursday Friday Saturday -); \ No newline at end of file +);