From 44712123d36edd35633646b2698705ada7691fb4 Mon Sep 17 00:00:00 2001 From: Yeegsing Date: Sat, 10 Sep 2022 15:08:50 +0800 Subject: [PATCH] create the manual-zh-CN folder, and organize the translation file structure --- translation/{ => zh-CN}/README-zh-CN.md | 0 translation/zh-CN/manual-zh-CN/.gitignore | 1 + translation/zh-CN/manual-zh-CN/book.toml | 20 + .../replace_version_placeholder.sh | 5 + translation/zh-CN/manual-zh-CN/src/SUMMARY.md | 17 + .../zh-CN/manual-zh-CN/src/adding_a_parser.md | 136 +++++++ .../manual-zh-CN/src/alternative_projects.md | 5 + .../zh-CN/manual-zh-CN/src/contributing.md | 120 ++++++ translation/zh-CN/manual-zh-CN/src/diffing.md | 100 +++++ translation/zh-CN/manual-zh-CN/src/git.md | 70 ++++ .../zh-CN/manual-zh-CN/src/glossary.md | 33 ++ .../zh-CN/manual-zh-CN/src/installation.md | 64 +++ .../zh-CN/manual-zh-CN/src/introduction.md | 64 +++ .../manual-zh-CN/src/languages_supported.md | 57 +++ .../zh-CN/manual-zh-CN/src/mercurial.md | 38 ++ .../manual-zh-CN/src/parser_vendoring.md | 23 ++ translation/zh-CN/manual-zh-CN/src/parsing.md | 97 +++++ translation/zh-CN/manual-zh-CN/src/robots.txt | 2 + .../zh-CN/manual-zh-CN/src/tree_diffing.md | 120 ++++++ .../zh-CN/manual-zh-CN/src/tricky_cases.md | 382 ++++++++++++++++++ translation/zh-CN/manual-zh-CN/src/usage.md | 45 +++ 21 files changed, 1399 insertions(+) rename translation/{ => zh-CN}/README-zh-CN.md (100%) create mode 100644 translation/zh-CN/manual-zh-CN/.gitignore create mode 100644 translation/zh-CN/manual-zh-CN/book.toml create mode 100644 translation/zh-CN/manual-zh-CN/replace_version_placeholder.sh create mode 100644 translation/zh-CN/manual-zh-CN/src/SUMMARY.md create mode 100644 translation/zh-CN/manual-zh-CN/src/adding_a_parser.md create mode 100644 translation/zh-CN/manual-zh-CN/src/alternative_projects.md create mode 100644 translation/zh-CN/manual-zh-CN/src/contributing.md create mode 100644 translation/zh-CN/manual-zh-CN/src/diffing.md create mode 100644 translation/zh-CN/manual-zh-CN/src/git.md create mode 100644 translation/zh-CN/manual-zh-CN/src/glossary.md create mode 100644 translation/zh-CN/manual-zh-CN/src/installation.md create mode 100644 translation/zh-CN/manual-zh-CN/src/introduction.md create mode 100644 translation/zh-CN/manual-zh-CN/src/languages_supported.md create mode 100644 translation/zh-CN/manual-zh-CN/src/mercurial.md create mode 100644 translation/zh-CN/manual-zh-CN/src/parser_vendoring.md create mode 100644 translation/zh-CN/manual-zh-CN/src/parsing.md create mode 100644 translation/zh-CN/manual-zh-CN/src/robots.txt create mode 100644 translation/zh-CN/manual-zh-CN/src/tree_diffing.md create mode 100644 translation/zh-CN/manual-zh-CN/src/tricky_cases.md create mode 100644 translation/zh-CN/manual-zh-CN/src/usage.md diff --git a/translation/README-zh-CN.md b/translation/zh-CN/README-zh-CN.md similarity index 100% rename from translation/README-zh-CN.md rename to translation/zh-CN/README-zh-CN.md diff --git a/translation/zh-CN/manual-zh-CN/.gitignore b/translation/zh-CN/manual-zh-CN/.gitignore new file mode 100644 index 000000000..7585238ef --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/.gitignore @@ -0,0 +1 @@ +book diff --git a/translation/zh-CN/manual-zh-CN/book.toml b/translation/zh-CN/manual-zh-CN/book.toml new file mode 100644 index 000000000..2c79be66b --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/book.toml @@ -0,0 +1,20 @@ +[book] +authors = ["Wilfred Hughes"] +language = "en" +multilingual = false +src = "src" +title = "Difftastic Manual" +description = "The official manual for difftastic, the syntactic differ" + +[output.html] +git-repository-url = "https://github.com/wilfred/difftastic" + +[output.html.redirect] +"/getting_started.html" = "./installation.html" +"/upstream_parsers.html" = "/languages_supported.html" + +[output.html.playground] +copyable = false + +[preprocessor.replace-version-placeholder] +command = "./replace_version_placeholder.sh" diff --git a/translation/zh-CN/manual-zh-CN/replace_version_placeholder.sh b/translation/zh-CN/manual-zh-CN/replace_version_placeholder.sh new file mode 100644 index 000000000..022bc5e22 --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/replace_version_placeholder.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +DFT_VERSION=$(cargo read-manifest | jq -r .version) + +jq .[1] | jq '.sections[0].Chapter.content |= sub("DFT_VERSION_HERE"; "'$DFT_VERSION'")' diff --git a/translation/zh-CN/manual-zh-CN/src/SUMMARY.md b/translation/zh-CN/manual-zh-CN/src/SUMMARY.md new file mode 100644 index 000000000..a302cafee --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/SUMMARY.md @@ -0,0 +1,17 @@ +# Summary + +- [Introduction](./introduction.md) +- [Installation](./installation.md) +- [Usage](./usage.md) + - [Git](./git.md) + - [Mercurial](./mercurial.md) +- [Languages Supported](./languages_supported.md) +- [Internals: Parsing](./parsing.md) +- [Internals: Diffing](./diffing.md) + - [Tricky Cases](./tricky_cases.md) +- [Contributing](./contributing.md) + - [Parser Vendoring](./parser_vendoring.md) + - [Adding A Parser](./adding_a_parser.md) +- [Glossary](./glossary.md) +- [Alternative Projects](./alternative_projects.md) + - [Tree Diffing](./tree_diffing.md) diff --git a/translation/zh-CN/manual-zh-CN/src/adding_a_parser.md b/translation/zh-CN/manual-zh-CN/src/adding_a_parser.md new file mode 100644 index 000000000..1b6903b9d --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/adding_a_parser.md @@ -0,0 +1,136 @@ +# Adding A Parser + +## Finding a parser + +New parsers for difftastic must be reasonably complete and maintained. + +There are many tree-sitter parsers available, and the tree-sitter +website includes [a list of some well-known +parsers](https://tree-sitter.github.io/tree-sitter/#available-parsers). + +## Add the source code + +Once you've found a parser, add it as a git subtree to +`vendor/`. We'll use +[tree-sitter-json](https://github.com/tree-sitter/tree-sitter-json) as +an example. + +``` +$ git subtree add --prefix=vendor/tree-sitter-json git@github.com:tree-sitter/tree-sitter-json.git master +``` + +## Configure the build + +Cargo does not allow packages to include subdirectories that contain a +`Cargo.toml`. Add a symlink to the `src/` parser subdirectory. + +``` +$ cd vendor +$ ln -s tree-sitter-json/src tree-sitter-json-src +``` + +You can now add the parser to build by including the directory in +`build.rs`. + +``` +TreeSitterParser { + name: "tree-sitter-json", + src_dir: "vendor/tree-sitter-json-src", + extra_files: vec![], +}, +``` + +If your parser includes custom C or C++ files for lexing (e.g. a +`scanner.cc`), add them to `extra_files`. + +## Configure parsing + +Add an entry to `tree_sitter_parser.rs` for your language. + +``` +Json => { + let language = unsafe { tree_sitter_json() }; + TreeSitterConfig { + name: "JSON", + language, + atom_nodes: vec!["string"].into_iter().collect(), + delimiter_tokens: vec![("{", "}"), ("[", "]")], + highlight_query: ts::Query::new( + language, + include_str!("../vendor/highlights/json.scm"), + ) + .unwrap(), + } +} +``` + +`name` is the human-readable name shown in the UI. + +`atom_nodes` is a list of tree-sitter node names that should be +treated as atoms even though the nodes have children. This is common +for things like string literals or interpolated strings, where the +node might have children for the opening and closing quote. + +If you don't set `atom_nodes`, you may notice added/removed content +shown in white. This is usually a sign that child node should have its +parent treated as an atom. + +`delimiter_tokens` are delimiters that difftastic stores on +the enclosing list node. This allows difftastic to distinguish +delimiter tokens from other punctuation in the language. + +If you don't set `delimiter_tokens`, difftastic will consider the +tokens in isolation, and may think that a `(` was added but the `)` +was unchanged. + +You can use `difft --dump-ts foo.json` to see the results of the +tree-sitter parser, and `difft --dump-syntax foo.json` to confirm that +you've set atoms and delimiters correctly. + +## Configure sliders + +Add an entry to `sliders.rs` for your language. + +## Configure language detection + +Update `from_extension` in `guess_language.rs` to detect your new +language. + +``` +"json" => Some(Json), +``` + +There may also file names or shebangs associated with your +language. [GitHub's linguist +definitions](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml) +is a useful source of common file extensions. + +## Syntax highlighting (Optional) + +To add syntax highlighting for your language, you'll also need a symlink +to the `queries/highlights.scm` file, if available. + +``` +$ cd vendor/highlights +$ ln -s ../tree-sitter-json/queries/highlights.scm json.scm +``` + +## Add a regression test + +Finally, add a regression test for your language. This ensures that +the output for your test file doesn't change unexpectedly. + +Regression test files live in `sample_files/` and have the form +`foo_before.abc` and `foo_after.abc`. + +``` +$ nano simple_before.json +$ nano simple_after.json +``` + +Run the regression test script and update the `.expected` file. + +``` +$ ./sample_files/compare_all.sh +$ cp sample_files/compare.result sample_files/compare.expected +``` diff --git a/translation/zh-CN/manual-zh-CN/src/alternative_projects.md b/translation/zh-CN/manual-zh-CN/src/alternative_projects.md new file mode 100644 index 000000000..61cd933f0 --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/alternative_projects.md @@ -0,0 +1,5 @@ +# Alternative Projects + +Many different tools exist for diffing files. This section of the +manual discusses the design of other tools that have influenced +difftastic. diff --git a/translation/zh-CN/manual-zh-CN/src/contributing.md b/translation/zh-CN/manual-zh-CN/src/contributing.md new file mode 100644 index 000000000..da60eac27 --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/contributing.md @@ -0,0 +1,120 @@ +# Contributing + +## Building + +Install Rust with [rustup](https://rustup.rs/), then clone the code. + +``` +$ git clone git@github.com:Wilfred/difftastic.git +$ cd difftastic +``` + +Difftastic uses [Cargo](https://doc.rust-lang.org/cargo/) for +building. + +``` +$ cargo build +``` + +Debug builds are significantly slower than release builds. For files +with more than fifty lines, it's usually worth using an optimised +build. + +``` +$ cargo build --release +``` + +## Manual + +This website is generated with +[mdbook](https://github.com/rust-lang/mdBook/). mdbook can be +installed with Cargo. + +``` +$ cargo install mdbook +``` + +You can then use the `mdbook` binary to build and serve the site +locally. + +``` +$ cd manual +$ mdbook serve +``` + +## API Documentation + +You can browse the internal API documentation generated by rustdoc +[here](https://difftastic.wilfred.me.uk/rustdoc/difft/). + +Difftastic's internal docs are not available on docs.rs, as it [does +not support binary crates today](https://difftastic.wilfred.me.uk/rustdoc/difft/). + +## Testing + +``` +$ cargo test +``` + +There are also several files in `sample_files/` that you can use. + +The best way to test difftastic is to look at history from a real +project. Set `GIT_EXTERNAL_DIFF` to point to your current build. + +For example, you can run difftastic on its own source code. + +``` +$ GIT_EXTERNAL_DIFF=./target/release/difft git log -p --ext-diff -- src +``` + +## Logging + +Difftastic uses the `pretty_env_logger` library to log some additional +debug information. + +``` +$ RUST_LOG=debug cargo run sample_files/old.jsx sample_files/new.jsx +``` + +See the [`env_logger` +documentation](https://docs.rs/env_logger/0.9.0/env_logger/) for full details. + +## Profiling + +If you have a file that's particularly slow, you can use +[cargo-flamegraph](https://github.com/flamegraph-rs/flamegraph) to see +which functions are slow. + +``` +$ CARGO_PROFILE_RELEASE_DEBUG=true cargo flamegraph --bin difft sample_files/slow_before.rs sample_files/slow_after.rs +``` + +It's also worth looking at memory usage, as graph traversal bugs can +lead to huge memory consumption. + +``` +$ /usr/bin/time -v ./target/release/difft sample_files/slow_before.rs sample_files/slow_after.rs +``` + +If timing measurement are noisy, Linux's `perf` tool will report +instructions executed, which is more stable. + +``` +$ perf stat ./target/release/difft sample_files/slow_before.rs sample_files/slow_after.rs +$ perf stat ./target/release/difft sample_files/typing_old.ml sample_files/typing_new.ml +``` + +Many more profiling techniques are discussed in the [The Rust +Performance Book](https://nnethercote.github.io/perf-book/). + +## Releasing + +Use Cargo to create a new release, and tag it in git. Difftastic has a +helper script for this: + +``` +$ ./scripts/release.sh +``` + +You can now increment the version in Cargo.toml and add a new entry to +CHANGELOG.md. diff --git a/translation/zh-CN/manual-zh-CN/src/diffing.md b/translation/zh-CN/manual-zh-CN/src/diffing.md new file mode 100644 index 000000000..dcd47c639 --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/diffing.md @@ -0,0 +1,100 @@ +# Diffing + +Difftastic treats diff calculations as a route finding problem on a +directed acyclic graph. + +## Graph Representation + +A vertex in the graph represents a position in two syntax trees. + +The start vertex has both positions pointing to the first syntax node +in both trees. The end vertex has both positions just +after the last syntax node in both trees. + +Consider comparing `A` with `X A`. + +``` +START ++---------------------+ +| Left: A Right: X A | +| ^ ^ | ++---------------------+ + +END ++---------------------+ +| Left: A Right: X A | +| ^ ^| ++---------------------+ +``` + +From the start vertex, we have two options: + +* we can mark the first syntax node on the left as novel, and advance + to the next syntax node on the left (vertex 1 above), or +* we can mark the first syntax node on the right as novel, and advance + to the next syntax node on the right (vertex 2 above). + +``` + START + +---------------------+ + | Left: A Right: X A | + | ^ ^ | + +---------------------+ + / \ + Novel atom L / \ Novel atom R +1 v 2 v ++---------------------+ +---------------------+ +| Left: A Right: X A | | Left: A Right: X A | +| ^ ^ | | ^ ^ | ++---------------------+ +---------------------+ +``` + + +Choosing "novel atom R" to vertex 2 will turn out to be the best +choice. From vertex 2, we can see three routes to the end vertex. + +``` + 2 + +---------------------+ + | Left: A Right: X A | + | ^ ^ | + +---------------------+ + / | \ + Novel atom L / | \ Novel atom R + v | v ++---------------------+ | +---------------------+ +| Left: A Right: X A | | | Left: A Right: X A | +| ^ ^ | | | ^ ^| ++---------------------+ | +---------------------+ + | | | + | Novel atom R | Nodes match | Novel atom L + | | | + | END v | + | +---------------------+ | + +-------->| Left: A Right: X A |<---------+ + | ^ ^| + +---------------------+ +``` + +## Comparing Routes + +We assign a cost to each edge. Marking a syntax node as novel is worse +than finding a matching syntax node, so the "novel atom" edge has a +higher cost than the "syntax nodes match" edge. + +The best route is the lowest cost route from the start vertex to the +end vertex. + +## Finding The Best Route + +Difftastic uses Dijkstra's algorithm to find the best (i.e. lowest cost) +route. + +One big advantage of this algorithm is that we don't need to construct +the graph in advance. Constructing the whole graph would require +exponential memory relative to the number of syntax nodes. Instead, +vertex neighbours are constructed as the graph is explored. + +There are lots of resources explaining Dijkstra's algorithm online, +but I particularly recommend the [graph search section of Red Blob +Games](https://www.redblobgames.com/pathfinding/a-star/introduction.html#dijkstra). diff --git a/translation/zh-CN/manual-zh-CN/src/git.md b/translation/zh-CN/manual-zh-CN/src/git.md new file mode 100644 index 000000000..47a8000be --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/git.md @@ -0,0 +1,70 @@ +# Git + +Git [supports external diff +tools](https://git-scm.com/docs/diff-config#Documentation/diff-config.txt-diffexternal). You +can use `GIT_EXTERNAL_DIFF` for a one-off git command. + +``` +$ GIT_EXTERNAL_DIFF=difft git diff +$ GIT_EXTERNAL_DIFF=difft git log -p --ext-diff +$ GIT_EXTERNAL_DIFF=difft git show e96a7241760319 --ext-diff +``` + +If you want to use difftastic by default, use `git config`. + +``` +# Set git configuration for the current repository. +$ git config diff.external difft + +# Set git configuration for all repositories. +$ git config --global diff.external difft +``` + +After running `git config`, `git diff` will use `difft` +automatically. Other git commands require `--ext-diff` to use +`diff.external`. + +``` +$ git diff +$ git log -p --ext-diff +$ git show e96a7241760319 --ext-diff +``` + +## git-difftool + +[git difftool](https://git-scm.com/docs/git-difftool) is a git command +for viewing the current changes with a different diff tool. It's +useful if you want to use difftastic occasionally. + +Add the +following to your `.gitconfig` to use difftastic as your difftool. + +```ini +[diff] + tool = difftastic + +[difftool] + prompt = false + +[difftool "difftastic"] + cmd = difft "$LOCAL" "$REMOTE" +``` + +You can then run `git difftool` to see current changes with difftastic. + +``` +$ git difftool +``` + +We also recommend the following settings to get the best difftool +experience. + +```ini +# Use a pager for large output, just like other git commands. +[pager] + difftool = true + +# `git dft` is less to type than `git difftool`. +[alias] + dft = difftool +``` diff --git a/translation/zh-CN/manual-zh-CN/src/glossary.md b/translation/zh-CN/manual-zh-CN/src/glossary.md new file mode 100644 index 000000000..022c99335 --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/glossary.md @@ -0,0 +1,33 @@ +# Glossary + +**Atom**: An atom is an item in difftastic's syntax tree structure +that has no children. It represents things like literals, variable +names, and comments. See also 'list'. + +**Delimiter**: A paired piece of syntax. A list has an open delimiter +and a close delimiter, such as `[` and `]`. Delimiters may not be +punctuation (e.g. `begin` and `end`) and may be empty strings (e.g. infix +syntax converted to difftastic's syntax tree). + +**LHS**: Left-hand side. Difftastic compares two items, and LHS refers +to the first item. See also 'RHS'. + +**List**: A list is an item in difftastic's syntax tree structure that +has an open delimiter, children, and a close delimiter. It represents +things like expressions and function definitions. See also 'atom'. + +**Novel**: An addition or a removal. Syntax is novel if it occurs +in only one of the two items being compared. + +**RHS**: Right-hand side. Difftastic compares two items, and RHS +refers to the second item. See also 'LHS'. + +**Root**: A syntax tree without a parent node. Roots represent +top-level definitions in the file being diffed. + +**Syntax node**: An item in difftastic's syntax tree structure. Either +an atom or a list. + +**Token**: A small piece of syntax tracked by difftastic (e.g. `$x`, +`function` or `]`), for highlighting and aligned display. This is +either an atom or a non-empty delimiter. diff --git a/translation/zh-CN/manual-zh-CN/src/installation.md b/translation/zh-CN/manual-zh-CN/src/installation.md new file mode 100644 index 000000000..38e598933 --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/installation.md @@ -0,0 +1,64 @@ +# Installation + +## Installing a binary + +Difftastic [provides GitHub +releases](https://github.com/Wilfred/difftastic/releases) with +prebuilt binaries. + +Packages are also available on the following platforms. + +[![Packaging status](https://repology.org/badge/vertical-allrepos/difftastic.svg)](https://repology.org/project/difftastic/versions) + + +## Installing via homebrew (on macOS or Linux) + +Difftastic can be installed with [Homebrew](https://formulae.brew.sh/formula/difftastic) on macOS or Linux. + + +``` +$ brew install difftastic +``` + +## Installing from source + +### Build Requirements + +Difftastic is written in Rust, so you will need Rust installed. I +recommend [rustup](https://rustup.rs/) to install Rust. Difftastic +requires Rust version 1.57 or later. + +You will also need a C++ compiler that supports C++14. If you're using +GCC, you need at least version 8. + +### Build + +You can download and build [difftastic on +crates.io](https://crates.io/crates/difftastic) with Cargo (which is +part of Rust). + +``` +$ cargo install difftastic +``` + +Difftastic uses the `cc` crate for building C/C++ dependencies. This +allows you to use environment variables `CC` and `CXX` to control the +compiler used (see [the cc +docs](https://github.com/alexcrichton/cc-rs#external-configuration-via-environment-variables)). + +See [contributing](./contributing.md) for instructions on debug +builds. + +## (Optional) Install MIME Database + +If a MIME database is available, difftastic will use it to detect +binary files more accurately. This is the same database used by the +`file` command, so you probably already have it. + +The MIME database path is [specified in the XDG +specification](https://specifications.freedesktop.org/shared-mime-info-spec/0.11/ar01s03.html). The +database should be at one of the following paths: + +* `/usr/share/mime/magic` +* `/usr/local/share/mime/magic` +* `$HOME/.local/share/mime/magic` diff --git a/translation/zh-CN/manual-zh-CN/src/introduction.md b/translation/zh-CN/manual-zh-CN/src/introduction.md new file mode 100644 index 000000000..f2c2cacf6 --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/introduction.md @@ -0,0 +1,64 @@ +# Introduction + +Difftastic is a structural diff tool that understands syntax. It +supports [over 20 programming languages](./languages_supported.html) +and when it works, it's *fantastic*. + +Difftastic is open source software (MIT license) and [available on +GitHub](https://github.com/wilfred/difftastic). + +This copy of the manual describes version DFT_VERSION_HERE. The +[changelog](https://github.com/Wilfred/difftastic/blob/master/CHANGELOG.md) +records which features and bug fixes are in each version. + +## Syntactic Diffing + +Difftastic [detects the language](./usage.html#language-detection), parses the code, and then +compares the syntax trees. Let's look at an example. + +``` +// old.rs +let ts_lang = guess(path, guess_src).map(tsp::from_language); +``` +``` +// new.rs +let ts_lang = language_override + .or_else(|| guess(path, guess_src)) + .map(tsp::from_language); +``` + +
$ difft old.rs new.rs
+
+1 1 let ts_lang = language_override
+. 2     .or_else(|| guess(path, guess_src))
+. 3     .map(tsp::from_language);
+
+
+ +Notice how difftastic recognises that `.map` is unchanged, even though +it's now on a new line with whitespace. + +A line-oriented diff does a much worse job here. + +
$ diff -u old.rs new.rs
+
+@@ -1 +1,3 @@
+-let ts_lang = guess(path, guess_src).map(tsp::from_language);
++let ts_lang = language_override
++    .or_else(|| guess(path, guess_src))
++    .map(tsp::from_language);
+
+
+ +Some textual diff tools also highlight word changes (e.g. GitHub or +git's `--word-diff`). They still don't understand the code +though. Difftastic will always find matched delimiters: you can see +the closing `)` from `or_else` has been highlighted. + +## Fallback Textual Diffing + +If input files are not in a format that difftastic understands, it +uses a conventional line-oriented text diff with word highlighting. + +Difftastic will also use textual diffing when given extremely large +inputs. diff --git a/translation/zh-CN/manual-zh-CN/src/languages_supported.md b/translation/zh-CN/manual-zh-CN/src/languages_supported.md new file mode 100644 index 000000000..563a66d8c --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/languages_supported.md @@ -0,0 +1,57 @@ +# Languages Supported + +This page lists all the languages supported by difftastic. You can +also view the languages supported in your current installed version +with `difft --list-languages`. + +## Programming Languages + +| Language | Parser Used | +|-----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Bash | [tree-sitter/tree-sitter-bash](https://github.com/tree-sitter/tree-sitter-bash) | +| C | [tree-sitter/tree-sitter-c](https://github.com/tree-sitter/tree-sitter-c) | +| C++ | [tree-sitter/tree-sitter-cpp](https://github.com/tree-sitter/tree-sitter-cpp) | +| C# | [tree-sitter/tree-sitter-c-sharp](https://github.com/tree-sitter/tree-sitter-c-sharp) | +| Clojure | [sogaiu/tree-sitter-clojure](https://github.com/sogaiu/tree-sitter-clojure) ([branched](https://github.com/sogaiu/tree-sitter-clojure/tree/issue-21)) | +| CMake | [uyha/tree-sitter-cmake](https://github.com/uyha/tree-sitter-cmake) | +| Common Lisp | [theHamsta/tree-sitter-commonlisp](https://github.com/theHamsta/tree-sitter-commonlisp) | +| Dart | [UserNobody14/tree-sitter-dart](https://github.com/UserNobody14/tree-sitter-dart) | +| Elixir | [elixir-lang/tree-sitter-elixir](https://github.com/elixir-lang/tree-sitter-elixir) | +| Elm | [elm-tooling/tree-sitter-elm](https://github.com/elm-tooling/tree-sitter-elm) | +| Elvish | [ckafi/tree-sitter-elvish](https://github.com/ckafi/tree-sitter-elvish) | +| Emacs Lisp | [wilfred/tree-sitter-elisp](https://github.com/Wilfred/tree-sitter-elisp) | +| Gleam | [gleam-lang/tree-sitter-gleam](https://github.com/gleam-lang/tree-sitter-gleam) | +| Go | [tree-sitter/tree-sitter-go](https://github.com/tree-sitter/tree-sitter-go) | +| Hack | [slackhq/tree-sitter-hack](https://github.com/slackhq/tree-sitter-hack) | +| Haskell | [tree-sitter/tree-sitter-haskell](https://github.com/tree-sitter/tree-sitter-haskell) | +| Janet | [sogaiu/tree-sitter-janet-simple](https://github.com/sogaiu/tree-sitter-janet-simple) | +| Java | [tree-sitter/tree-sitter-java](https://github.com/tree-sitter/tree-sitter-java) | +| JavaScript, JSX | [tree-sitter/tree-sitter-javascript](https://github.com/tree-sitter/tree-sitter-javascript) | +| Julia | [tree-sitter/tree-sitter-julia](https://github.com/tree-sitter/tree-sitter-julia) | +| Kotlin | [fwcd/tree-sitter-kotlin](https://github.com/fwcd/tree-sitter-kotlin) | +| Lua | [nvim-treesitter/tree-sitter-lua](https://github.com/nvim-treesitter/tree-sitter-lua) | +| Make | [alemuller/tree-sitter-make](https://github.com/alemuller/tree-sitter-make) | +| Nix | [cstrahan/tree-sitter-nix](https://github.com/cstrahan/tree-sitter-nix) | +| OCaml | [tree-sitter/tree-sitter-ocaml](https://github.com/tree-sitter/tree-sitter-ocaml) | +| Perl | [ganezdragon/tree-sitter-perl](https://github.com/ganezdragon/tree-sitter-perl) | +| PHP | [tree-sitter/tree-sitter-php](https://github.com/tree-sitter/tree-sitter-php) | +| Python | [tree-sitter/tree-sitter-python](https://github.com/tree-sitter/tree-sitter-python) | +| Ruby | [tree-sitter/tree-sitter-ruby](https://github.com/tree-sitter/tree-sitter-ruby) | +| Rust | [tree-sitter/tree-sitter-rust](https://github.com/tree-sitter/tree-sitter-rust) ([forked](https://github.com/Wilfred/tree-sitter-rust/tree/non_special_token)) | +| Scala | [tree-sitter/tree-sitter-scala](https://github.com/tree-sitter/tree-sitter-scala) | +| SQL | [m-novikov/tree-sitter-sql](https://github.com/m-novikov/tree-sitter-sql) | +| Swift | [alex-pinkus/tree-sitter-swift](https://github.com/alex-pinkus/tree-sitter-swift) | +| TypeScript, TSX | [tree-sitter/tree-sitter-typescript](https://github.com/tree-sitter/tree-sitter-typescript) | +| Zig | [maxxnino/tree-sitter-zig](https://github.com/maxxnino/tree-sitter-zig) | + +## Structured Text Formats + +| Language | Parser Used | +|----------|-----------------------------------------------------------------------------------| +| CSS | [tree-sitter/tree-sitter-css](https://github.com/tree-sitter/tree-sitter-css) | +| HCL | [MichaHoffmann/tree-sitter-hcl](https://github.com/MichaHoffmann/tree-sitter-hcl) | +| HTML | [tree-sitter/tree-sitter-html](https://github.com/tree-sitter/tree-sitter-html) | +| JSON | [tree-sitter/tree-sitter-json](https://github.com/tree-sitter/tree-sitter-json) | +| TOML | [ikatyang/tree-sitter-toml](https://github.com/ikatyang/tree-sitter-toml) | +| YAML | [ikatyang/tree-sitter-yaml](https://github.com/ikatyang/tree-sitter-yaml) | + diff --git a/translation/zh-CN/manual-zh-CN/src/mercurial.md b/translation/zh-CN/manual-zh-CN/src/mercurial.md new file mode 100644 index 000000000..d02da099f --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/mercurial.md @@ -0,0 +1,38 @@ +# Mercurial + +Mercurial [supports external diff +tools](https://www.mercurial-scm.org/wiki/ExtdiffExtension) with the +Extdiff extension. Enable it by adding an entry to `extensions` in +your `.hgrc`. + +``` +[extensions] +extdiff = +``` + +You can then run `hg extdiff -p difft` (assumes the `difft` binary is +on your `$PATH`). + +You can also define an alias to run difftastic with hg. Add the +following to your `.hgrc` to run difftastic with `hg dft`. + +``` +[extdiff] +cmd.dft = difft +opts.dft = --missing-as-empty +``` + +## hg log -p + +Mercurial does not have a way of changing the default diff tool, at +least to the author's knowledge. + +If you just want to view the diff of the most recent commit, you can +use the following. + +``` +GIT_PAGER_IN_USE=1 hg dft -r .^ -r . | less +``` + +This is equivalent to `hg log -l 1 -p`, although it does not show the +commit message. diff --git a/translation/zh-CN/manual-zh-CN/src/parser_vendoring.md b/translation/zh-CN/manual-zh-CN/src/parser_vendoring.md new file mode 100644 index 000000000..a7c528448 --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/parser_vendoring.md @@ -0,0 +1,23 @@ +# Vendoring + +## Git Subtrees + +Tree-sitter parsers are sometimes packaged on npm, sometimes packaged +on crates.io, and have different release frequencies. Difftastic uses +git subtrees (not git submodules) to track parsers. + +## Updating a parser + +To update a parser, pull commits from the upstream git repository. For +example, the following command will update the Java parser: + +``` +$ git subtree pull --prefix=vendor/tree-sitter-java git@github.com:tree-sitter/tree-sitter-java.git master +``` + +To see when each parser was last updated, use the following shell +command: + +``` +$ for d in $(git log | grep git-subtree-dir | tr -d ' ' | cut -d ":" -f2 | sort); do echo "$d"; git log --pretty=" %cs" -n 1 $d; done +``` diff --git a/translation/zh-CN/manual-zh-CN/src/parsing.md b/translation/zh-CN/manual-zh-CN/src/parsing.md new file mode 100644 index 000000000..4cc2be459 --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/parsing.md @@ -0,0 +1,97 @@ +# Parsing + +Difftastic uses +[tree-sitter](https://tree-sitter.github.io/tree-sitter/) to build a +parse tree. The parse tree is then converted to a simpler tree which +can be diffed. + +## Parsing with Tree-sitter + +Difftastic relies on tree-sitter to understand syntax. You can view +the parse tree that tree-sitter produces using the `--dump-ts` +flag. + +``` +$ difft --dump-ts sample_files/javascript_simple_before.js | head +program (0, 0) - (7, 0) + comment (0, 0) - (0, 8) "// hello" + expression_statement (1, 0) - (1, 6) + call_expression (1, 0) - (1, 5) + identifier (1, 0) - (1, 3) "foo" + arguments (1, 3) - (1, 5) + ( (1, 3) - (1, 4) "(" + ) (1, 4) - (1, 5) ")" + ; (1, 5) - (1, 6) ";" + expression_statement (2, 0) - (2, 6) +``` + +## Simplified Syntax + +Difftastic converts the tree-sitter parse tree to a simplified syntax +tree. The syntax tree is a uniform representation where everything is +either an atom (e.g. integer literals, comments, variable names) or a +list (consisting of the open delimiter, children and the close +delimiter). + +The flag `--dump-syntax` will display the syntax tree generated for a +file. + +``` +$ difft --dump-syntax sample_files/before.js +[ + Atom id:1 { + content: "// hello", + position: "0:0-8", + }, + List id:2 { + open_content: "", + open_position: "1:0-0", + children: [ + ... +``` + +### Conversion Process + +The simple representation of the difftastic parse tree makes diffing +much easier. Converting the detailed tree-sitter parse tree is a +recursive tree walk, treating tree-sitter leaf nodes as atoms. There +are two exceptions. + +(1) Tree-sitter parse trees sometimes include unwanted structure. Some +grammars consider string literals to be a single token, whereas others +treat strings as a complex structure where the delimiters are +separate. + +`tree_sitter_parser.rs` uses `atom_nodes` to mark specific tree-sitter +node names as flat atoms even if the node has children. + +(2) Tree-sitter parse trees include open and closing delimiters as +tokens. A list `[1]` will have a parse tree that includes `[` and `]` +as nodes. + +``` +$ echo '[1]' > example.js +$ difft --dump-ts example.js +program (0, 0) - (1, 0) + expression_statement (0, 0) - (0, 3) + array (0, 0) - (0, 3) + [ (0, 0) - (0, 1) "[" + number (0, 1) - (0, 2) "1" + ] (0, 2) - (0, 3) "]" +``` + +`tree_sitter_parser.rs` uses `open_delimiter_tokens` to ensure that +`[` and `]` are used as delimiter content in the enclosing list, +rather than converitng them to atoms. + +Difftastic can match up atoms that occur in different parts of the +simplified syntax tree. If e.g. a `[` is treated as an atom, +difftastic might match it with another `[` elsewhere. The resulting +diff would be unbalanced, highlighting different numbers of open and +close delimiters. + +### Lossy Syntax Trees + +The simplified syntax tree only stores node content and node +position. It does not store whitespace between nodes, and position is +largely ignored during diffing. diff --git a/translation/zh-CN/manual-zh-CN/src/robots.txt b/translation/zh-CN/manual-zh-CN/src/robots.txt new file mode 100644 index 000000000..c2a49f4fb --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Allow: / diff --git a/translation/zh-CN/manual-zh-CN/src/tree_diffing.md b/translation/zh-CN/manual-zh-CN/src/tree_diffing.md new file mode 100644 index 000000000..a3234768f --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/tree_diffing.md @@ -0,0 +1,120 @@ +# Tree Diffing + +This page summarises some of the other tree diffing tools available. + +If you're in a hurry, start by looking at Autochrome. It's extremely +capable, and has an excellent description of the design. + +If you're interested in a summary of the academic literature, [this +blog +post](http://useless-factor.blogspot.com/2008/01/matching-diffing-and-merging-xml.html) +(and its [accompanying +paper](http://useless-factor.blogspot.com/2008/01/matching-diffing-and-merging-xml.html) +-- mirrored under a CC BY-NC license) are great resources. + +## json-diff (2012) + +Languages: JSON +Algorithm: Pairwise comparison +Output: CLI colours + +[json-diff](https://github.com/andreyvit/json-diff) performs a +structural diff of JSON files. It considers subtrees to be different +if they don't match exactly, so e.g. `"foo"` and `["foo"]` are +entirely different. + +json-diff is also noteworthy for its extremely readable display of +results. + +## GumTree (2014) + +Languages: [~10 programming +languages](https://github.com/GumTreeDiff/gumtree/wiki/Languages) +Parser: Several, including [srcML](https://www.srcml.org/) +Algorithm: Top-down, then bottom-up +Ouput: HTML, Swing GUI, or text + +[GumTree](https://github.com/GumTreeDiff/gumtree) can parse several +programming languages and then performs a tree-based diff, outputting +an HTML display. + +The GumTree algorithm is described in the associated paper +'Fine-grained and accurate source code differencing' by Falleri et al +([DOI](http://doi.acm.org/10.1145/2642937.2642982), +[PDF](https://hal.archives-ouvertes.fr/hal-01054552/document)). It +performs a greedy top-down search for identical subtrees, then +performs a bottom-up search to match up the rest. + +## Tree Diff (2017) + +Languages: S-expression data format +Algorithm: A* search +Output: Merged s-expression file + +Tristan Hume wrote a tree diffing algorithm during his 2017 internship +and Jane Street. The source code is not available, but [he has a blog +post](https://thume.ca/2017/06/17/tree-diffing/) discussing the design +in depth. + +This project finds minimal diffs between s-expression files used as +configuration by Jane Street. It uses A* search to find the minimal +diff between them, and builds a new s-expression with a section marked +with `:date-switch` for the differing parts. + +(Jane Street also has patdiff, but that seems to be a line-oriented +diff with some whitespace/integer display polish. It doesn't +understand that e.g. whitespace in `"foo "` is meaningful). + +## Autochrome (2017) + +Languages: Clojure +Parser: Custom, preserves comments +Algorithm: Dijkstra (previously A* search) +Output: HTML + +[Autochrome](https://fazzone.github.io/autochrome.html) parses Clojure +with a custom parser that preserves comments. Autochrome uses +Dijkstra's algorithm to compare syntax trees. + +Autochrome's webpage includes worked examples of the algorithm and a +discussion of design tradeoffs. It's a really great resource for +understanding tree diffing techniques in general. + +## graphtage (2020) + +Languages: JSON, XML, HTML, YAML, plist, and CSS +Parser: json5, pyYAML, ignores comments +Algorithm: Levenshtein distance +Output: CLI colours + +[graphtage](https://blog.trailofbits.com/2020/08/28/graphtage/) +compares structured data by parsing into a generic file format, then +displaying a diff. It even allows things like diffing JSON against +YAML. + +As with json-diff, it does not consider `["foo"]` and `"foo"` to have +any similarities. + +## Diffsitter (2020) + +Parser: [Tree-sitter](https://tree-sitter.github.io/tree-sitter/) +Algorithm: Longest-common-subsequence +Output: CLI colours + +[Diffsitter](https://github.com/afnanenayet/diffsitter) is another +tree-sitter based diff tool. It uses [LCS diffing on the leaves of the +syntax +tree](https://github.com/afnanenayet/diffsitter/blob/b0fd72612c6fcfdb8c061d3afa3bea2b0b754f33/src/ast.rs#L310-L313). + +## sdiff (2021) + +Languages: Scheme +Parser: Scheme's built-in `read`, ignores comments +Algorithm: MH-Diff from the Chawathe paper +Output: CLI colours + +[Semantically meaningful S-expression diff: Tree-diff for lisp source +code](https://archive.fosdem.org/2021/schedule/event/sexpressiondiff/) +was presented at FOSDEM 2021. + + diff --git a/translation/zh-CN/manual-zh-CN/src/tricky_cases.md b/translation/zh-CN/manual-zh-CN/src/tricky_cases.md new file mode 100644 index 000000000..db629a8ed --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/tricky_cases.md @@ -0,0 +1,382 @@ +# Tricky Cases + +Tree diffing is challenging in some situations. This page demonstrates +difficult cases observed during development. + +Not all of these cases work well in difftastic yet. + +## Adding Delimiters + +``` +;; Before +x + +;; After +(x) +``` + +Desired result: (x) + +This is tricky because `x` has changed its depth in the tree, but `x` +itself is unchanged. + +Not all tree diff algorithms handle this case. It is also challenging +to display this case clearly: we want to highlight the changed +delimiters, but not their content. This is challenging in larger +expressions. + +## Changing Delimiters + +``` +;; Before +(x) + +;; After +[x] +``` + +As with the wrapping case, we want to highlight the delimiters rather +than the `x`. + +## Expanding Delimiters + +``` +;; Before +(x) y + +;; After +(x y) +``` + +Desired output: (x y) + +In this case, we want to highlight `y`. Highlighting the delimiters +could make `x` look changed. + +## Contracting Delimiters + +``` +;; Before +(x y) + +;; After +(x) y +``` + +This should be highlighted similar to the expanding delimiter case. + +## Disconnected Delimiters + +``` +;; Before +(foo (bar)) + +;; After +(foo (novel) (bar)) +``` + +Desired result: (foo (novel) (bar) + +It is easy to end up with +(foo (novel) (bar)), +where a later pair of delimiters are chosen. + +## Rewrapping Large Nodes + +``` +;; Before +[[foo]] +(x y) + +;; After +([[foo]] x y) +``` + +We want to highlight `[[foo]]` being moved inside the +parentheses. However, a naive syntax differ prefers consider a removal +of `()` in the before and an addition of `()` in the after to be more +minimal diff. + +(Reported as [issue 44](https://github.com/Wilfred/difftastic/issues/44).) + +## Reordering Within A List + +``` +;; Before +(x y) + +;; After +(y x) +``` + +Desired result: (y x) + +We want to highlight the list contents and not the delimiters. + +## Middle Insertions + +``` +// Before +foo(bar(123)) + +// After +foo(extra(bar(123))) +``` + +Desired result: foo(extra(bar(123))) + +We want to consider both `foo` and `bar` to be unchanged. This case is +challenging for diffing algorithms that do a bottom-up then top-down +matching of trees. + +## Sliders (Flat) + +Sliders are a common problem in text based diffs, where lines are +matched in a confusing way. + +They typically look like this. The diff has to arbitrarily choose a +line containing delimiter, and it chooses the wrong one. + +``` ++ } ++ ++ function foo () { + } +``` + +git-diff has some heuristics to reduce the risk of this (e.g. the +"patience diff"), but it can still occur. + +There's a similar problem in tree diffs. + +``` +;; Before +A B +C D + +;; After +A B +A B +C D +``` + +Ideally we'd prefer marking contiguous nodes as novel, so we highlight +`A B` rather than `B\nA`. From the perspective of a +longest-common-subsequence algorithm, these two choices are +equivalent. + +## Sliders (Nested) + +``` +// Before +old1(old2) + +// After +old1(new1(old2)) +``` + +Should this be old1(new1(old2)) or +old1(new1(old2))? + +The correct answer depends on the language. Most languages want to +prefer the inner delimiter, whereas Lisps and JSON prefer the outer +delimiter. + +## Minimising Depth Changes + +``` +// Before +if true { + foo(123); +} +foo(456); + +// After +foo(789); +``` + +Do we consider `foo(123)` or `foo(456)` to match with `foo(789)`? +Difftastic prefers `foo(456)` by preferring nodes at the same nesting depth. + +## Replacements With Minor Similarities + +``` +// Before +function foo(x) { return x + 1; } + +// After +function bar(y) { baz(y); } +``` + +In this example, we've deleted a function and written a completely +different one. A tree-based diff could match up the `function` and the +outer delimiters, resulting in a confusing display showing lots of +small changes. + +As with sliders, the replacement problem can also occur in textual +line-based diffs. Line-diffs struggle if there are a small number of +common lines. The more precise, granular behaviour of tree diffs makes +this problem much more common though. + +## Matching Substrings In Comments + +``` +// Before +/* The quick brown fox. */ +foobar(); + +// After +/* The slow brown fox. */ +foobaz(); +``` + +`foobar` and `foobaz` are completely different, and their common +prefix `fooba` should not be matched up. However, matching common +prefixes or suffixes for comments is desirable. + +## Multiline Comments + +``` +// Before +/* Hello + * World. */ + +// After +if (x) { + /* Hello + * World. */ +} +``` + +The inner content of these two comments is technically different. We +want to treat them as identical however. + +## Reflowing Doc Comments + +Block comments have prefixes that aren't meaningful. + +``` +// Before +/* The quick brown fox jumps + * over the lazy dog. */ + +// After +/* The quick brown fox immediately + * jumps over the lazy dog. */ +``` + +The inner content has changed from `jumps * over` to `immediately * +jumps over`. However, the `*` is decorative and we don't care that +it's moved. + +## Small Changes To Large Strings + +``` +// Before +"""A very long string +with lots of words about +lots of stuff.""" + +// After +"""A very long string +with lots of NOVEL words about +lots of stuff.""" +``` + +It would be correct to highlight the entire string literal as being +removed and replaced with a new string literal. However, this makes it +hard to see what's actually changed. + +It's clear that variable names should be treated atomically, and +comments are safe to show subword changes. It's not clear how to +handle a small change in a 20 line string literal. + +It's tempting to split strings on spaces and diff that, but users +still want to know when whitespace changes inside strings. `" "` and +`" "` are not the same. + +## Autoformatter Punctuation + +``` +// Before +foo("looooong", "also looooong"); + +// Before +foo( + "looooong", + "novel", + "also looooong", +); +``` + +Autoformatters (e.g. [prettier](https://prettier.io/)) will sometimes +add or remove punctuation when formatting. Commas and parentheses are +the most common. + +Syntactic diffing can ignore whitespace changes, but it has to assume +punctuation is meaningful. This can lead to punctuation changes being +highlighted, which may be quite far from the relevant content change. + +## Novel Blank Lines + +Blank lines are challenging for syntactic diffs. We are comparing +syntactic tokens, so we don't see blank lines. + +``` +// Before +A +B + +// After +A + +B +``` + +Generally we want syntactic diffing to ignore blank lines. In this +first example, this should show no changes. + +This is occasionally problematic, as it can hide accidental code +reformatting. + +``` +// Before +A +B + +// After +A +X + +Y +B +``` + +In this second example, we've inserted X and Y and a blank line. We +want to highlight the blank line as an addition. + +``` +// Before +A + + +B + +// After +A +X +B +``` + +In this third example, the syntactic diffing only sees an +addition. From the user's perspective, there has also been a removal +of two blank lines. + +## Invalid Syntax + +There's no guarantee that the input we're given is valid syntax. Even +if the code is valid, it might use syntax that isn't supported by the +parser. + +Tree-sitter provided explicit error nodes, and difftastic treats them +as atoms so it can run the same tree diff algorithm regardless. diff --git a/translation/zh-CN/manual-zh-CN/src/usage.md b/translation/zh-CN/manual-zh-CN/src/usage.md new file mode 100644 index 000000000..582479069 --- /dev/null +++ b/translation/zh-CN/manual-zh-CN/src/usage.md @@ -0,0 +1,45 @@ +# Usage + +## Diffing Files + +``` +$ difft sample_files/before.js sample_files/after.js +``` + +## Diffing Directories + +``` +$ difft sample_files/dir_before/ sample_files/dir_after/ +``` + +Difftastic will recursively walk the two directories, diffing files +with the same name. + +The `--skip-unchanged` option is useful when diffing directories that +contain many unchanged files. + +## Language Detection + +Difftastic guesses the language used based on the file extension, file +name, and the contents of the first lines. + +You can override the language detection by passing the `--language` +option. Difftastic will treat input files as if they had that +extension, and ignore other language detection heuristics. + + +``` +$ difft --language cpp before.c after.c +``` + +## Options + +Difftastic includes a range of configuration CLI options, see `difft +--help` for the full list. + +Difftastic can also be configured with environment variables. These +are also visible in `--help`. + +For example, `DFT_BACKGROUND=light` is equivalent to +`--background=light`. This is useful when using VCS tools like git, +where you are not invoking the `difft` binary directly.