Merge branch 'Wilfred:master' into translation

pull/370/head
Eric Tao 2022-09-10 18:05:15 +07:00 committed by GitHub
commit 5b57b2d279
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 190 additions and 45 deletions

@ -27,10 +27,10 @@ jobs:
strategy:
matrix:
include:
- target: aarch64-unknown-linux-gnu
os: ubuntu-latest
- target: aarch64-apple-darwin
os: macos-latest
# - target: aarch64-unknown-linux-gnu
# os: ubuntu-latest
# - target: aarch64-apple-darwin
# os: macos-latest
- target: x86_64-unknown-linux-gnu
os: ubuntu-latest
- target: x86_64-apple-darwin

@ -19,15 +19,15 @@ jobs:
- uses: dtolnay/rust-toolchain@stable
- run: cargo +stable test
test_linux_arm64:
name: Test Linux arm64 (build only)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.57.0
- run: rustup target add aarch64-unknown-linux-gnu
- run: sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
- run: cargo build --target=aarch64-unknown-linux-gnu
# test_linux_arm64:
# name: Test Linux arm64 (build only)
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - uses: dtolnay/rust-toolchain@1.57.0
# - run: rustup target add aarch64-unknown-linux-gnu
# - run: sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
# - run: cargo build --target=aarch64-unknown-linux-gnu
test_mac:
name: Test macOS

@ -1,5 +1,10 @@
## 0.36 (unreleased)
### Diffing
Improved diff cost model to prefer finding unchanged variable names
over unchanged punctuation.
## 0.35 (released 2nd September 2022)
### Diffing

@ -0,0 +1,26 @@
// From lodash under a MIT license.
// https://github.com/lodash/lodash/blob/master/LICENSE
/**
* This function is like `arrayIncludes` except that it accepts a comparator.
*
* @private
* @param {Array} [array] The array to inspect.
* @param {*} target The value to search for.
* @param {Function} comparator The comparator invoked per element.
* @returns {boolean} Returns `true` if `target` is found, else `false`.
*/
function arrayIncludesWith(array, target, comparator) {
if (array == null) {
return false
}
for (const value of array) {
if (comparator(target, value)) {
return true
}
}
return false
}
export default arrayIncludesWith

@ -0,0 +1,25 @@
// From lodash under a MIT license.
// https://github.com/lodash/lodash/blob/master/LICENSE
/**
* This function is like `arrayIncludes` except that it accepts a comparator.
*
* @private
* @param {Array} [array] The array to inspect.
* @param {*} target The value to search for.
* @param {Function} comparator The comparator invoked per element.
* @returns {boolean} Returns `true` if `target` is found, else `false`.
*/
function arrayIncludesWith(array, value, comparator) {
let index = -1
const length = array == null ? 0 : array.length
while (++index < length) {
if (comparator(value, array[index])) {
return true
}
}
return false
}
export default arrayIncludesWith

@ -13,6 +13,9 @@ sample_files/chinese_before.po sample_files/chinese_after.po
sample_files/clojure_before.clj sample_files/clojure_after.clj
b916e224f289888252cd7597bab339e6 -
sample_files/comma_before.js sample_files/comma_after.js
6b06dbb3eb6cc44ce51d2e40695d823b -
sample_files/comments_before.rs sample_files/comments_after.rs
0b2756c60659993310f899b131cca84f -

@ -20,6 +20,7 @@ cargo build --release
# Set language so we expand globs in a consistent order regardless of
# locale (e.g. on GitHub actions).
LANG=en_US.UTF-8
unset LC_ALL LC_COLLATE
echo "==> Check outputs"
for before_f in sample_files/*before.*; do

@ -326,7 +326,10 @@ mod tests {
EnterUnchangedDelimiter {
depth_difference: 0
},
NovelAtomLHS { contiguous: false },
NovelAtomLHS {
contiguous: false,
probably_punctuation: false,
},
ExitDelimiterBoth,
]
);
@ -369,8 +372,14 @@ mod tests {
EnterUnchangedDelimiter {
depth_difference: 0
},
NovelAtomRHS { contiguous: false },
NovelAtomRHS { contiguous: false },
NovelAtomRHS {
contiguous: false,
probably_punctuation: false
},
NovelAtomRHS {
contiguous: false,
probably_punctuation: false
},
ExitDelimiterBoth,
]
);
@ -456,8 +465,14 @@ mod tests {
UnchangedNode {
depth_difference: 0
},
NovelAtomLHS { contiguous: false },
NovelAtomLHS { contiguous: true },
NovelAtomLHS {
contiguous: false,
probably_punctuation: false
},
NovelAtomLHS {
contiguous: true,
probably_punctuation: false
},
]
);
}
@ -492,7 +507,10 @@ mod tests {
actions,
vec![
EnterNovelDelimiterLHS { contiguous: false },
NovelAtomLHS { contiguous: true },
NovelAtomLHS {
contiguous: true,
probably_punctuation: false
},
ExitDelimiterLHS,
]
);
@ -531,9 +549,15 @@ mod tests {
actions,
vec![
EnterNovelDelimiterLHS { contiguous: false },
NovelAtomLHS { contiguous: true },
NovelAtomLHS {
contiguous: true,
probably_punctuation: false
},
ExitDelimiterLHS,
NovelAtomLHS { contiguous: true },
NovelAtomLHS {
contiguous: true,
probably_punctuation: true
},
]
);
}
@ -640,7 +664,10 @@ mod tests {
ReplacedComment {
levenshtein_pct: 95
},
NovelAtomLHS { contiguous: false }
NovelAtomLHS {
contiguous: false,
probably_punctuation: false
}
]
);
}

@ -278,20 +278,38 @@ impl<'a, 'b> Vertex<'a, 'b> {
/// See [`neighbours`] for all the edges available for a given `Vertex`.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum Edge {
UnchangedNode { depth_difference: u32 },
EnterUnchangedDelimiter { depth_difference: u32 },
ReplacedComment { levenshtein_pct: u8 },
NovelAtomLHS { contiguous: bool },
NovelAtomRHS { contiguous: bool },
UnchangedNode {
depth_difference: u32,
},
EnterUnchangedDelimiter {
depth_difference: u32,
},
ReplacedComment {
levenshtein_pct: u8,
},
NovelAtomLHS {
contiguous: bool,
probably_punctuation: bool,
},
NovelAtomRHS {
contiguous: bool,
probably_punctuation: bool,
},
// TODO: An EnterNovelDelimiterBoth edge might help performance
// rather doing LHS and RHS separately.
EnterNovelDelimiterLHS { contiguous: bool },
EnterNovelDelimiterRHS { contiguous: bool },
EnterNovelDelimiterLHS {
contiguous: bool,
},
EnterNovelDelimiterRHS {
contiguous: bool,
},
ExitDelimiterLHS,
ExitDelimiterRHS,
ExitDelimiterBoth,
}
const NOT_CONTIGUOUS_PENALTY: u64 = 50;
impl Edge {
pub fn cost(self) -> u64 {
match self {
@ -317,21 +335,38 @@ impl Edge {
ReplacedComment { levenshtein_pct } => 150 + u64::from(100 - levenshtein_pct),
// Otherwise, we've added/removed a node.
NovelAtomLHS { contiguous }
| NovelAtomRHS { contiguous }
| EnterNovelDelimiterLHS { contiguous }
| EnterNovelDelimiterRHS { contiguous } => {
if contiguous {
300
} else {
NovelAtomLHS {
contiguous,
probably_punctuation,
}
| NovelAtomRHS {
contiguous,
probably_punctuation,
} => {
let mut cost = 300;
if !contiguous {
cost += NOT_CONTIGUOUS_PENALTY;
}
// If it's only punctuation, decrease the cost
// slightly. It's better to have novel punctuation
// than novel variable names.
if probably_punctuation {
cost -= 10;
}
cost
}
EnterNovelDelimiterLHS { contiguous } | EnterNovelDelimiterRHS { contiguous } => {
let mut cost = 300;
if !contiguous {
// This needs to be more than 40 greater than the
// contiguous case. Otherwise, we end up choosing
// a badly positioned unchanged delimiter just
// because it has a better depth difference.
//
// TODO: write a test for this case.
350
cost += NOT_CONTIGUOUS_PENALTY;
}
cost
}
}
}
@ -374,6 +409,14 @@ fn allocate_if_new<'syn, 'b>(
}
}
/// Does this atom look like punctuation?
///
/// This check is deliberately conservative, becuase it's hard to
/// accurately recognise punctuation in a language-agnostic way.
fn looks_like_punctuation(content: &str) -> bool {
content == "," || content == ";"
}
/// Compute the neighbours of `v` if we haven't previously done so,
/// write them to the .neighbours cell inside `v`, and return them.
pub fn get_set_neighbours<'syn, 'b>(
@ -579,12 +622,13 @@ pub fn get_set_neighbours<'syn, 'b>(
if let Some(lhs_syntax) = &v.lhs_syntax {
match lhs_syntax {
// Step over this novel atom.
Syntax::Atom { .. } => {
Syntax::Atom { content, .. } => {
res.push((
NovelAtomLHS {
// TODO: should this apply if prev is a parent
// node rather than a sibling?
contiguous: lhs_syntax.prev_is_contiguous(),
probably_punctuation: looks_like_punctuation(content),
},
allocate_if_new(
Vertex {
@ -634,10 +678,11 @@ pub fn get_set_neighbours<'syn, 'b>(
if let Some(rhs_syntax) = &v.rhs_syntax {
match rhs_syntax {
// Step over this novel atom.
Syntax::Atom { .. } => {
Syntax::Atom { content, .. } => {
res.push((
NovelAtomRHS {
contiguous: rhs_syntax.prev_is_contiguous(),
probably_punctuation: looks_like_punctuation(content),
},
allocate_if_new(
Vertex {

@ -52,7 +52,7 @@ use parse::guess_language::{guess, language_name};
static GLOBAL: MiMalloc = MiMalloc;
use diff::sliders::fix_all_sliders;
use options::{DisplayMode, DisplayOptions, Mode};
use options::{DisplayMode, DisplayOptions, Mode, DEFAULT_TAB_WIDTH};
use owo_colors::OwoColorize;
use rayon::prelude::*;
use std::{env, path::Path};
@ -93,6 +93,8 @@ fn main() {
let path = Path::new(&path);
let bytes = read_or_die(path);
let src = String::from_utf8_lossy(&bytes).to_string();
// TODO: Load display options rather than hard-coding.
let src = replace_tabs(&src, DEFAULT_TAB_WIDTH);
let language = language_override.or_else(|| guess(path, &src));
match language {
@ -113,6 +115,8 @@ fn main() {
let path = Path::new(&path);
let bytes = read_or_die(path);
let src = String::from_utf8_lossy(&bytes).to_string();
// TODO: Load display options rather than hard-coding.
let src = replace_tabs(&src, DEFAULT_TAB_WIDTH);
let language = language_override.or_else(|| guess(path, &src));
match language {
@ -196,6 +200,16 @@ fn main() {
};
}
/// Return a copy of `str` with all the tab characters replaced by
/// `tab_width` strings.
///
/// TODO: This break parsers that require tabs, such as Makefile
/// parsing. We shouldn't do this transform until after parsing.
fn replace_tabs(src: &str, tab_width: usize) -> String {
let tab_as_spaces = " ".repeat(tab_width);
src.replace('\t', &tab_as_spaces)
}
/// Print a diff between two files.
fn diff_file(
lhs_display_path: &str,
@ -247,9 +261,8 @@ fn diff_file_content(
};
// TODO: don't replace tab characters inside string literals.
let tab_as_spaces = " ".repeat(tab_width);
lhs_src = lhs_src.replace('\t', &tab_as_spaces);
rhs_src = rhs_src.replace('\t', &tab_as_spaces);
lhs_src = replace_tabs(&lhs_src, tab_width);
rhs_src = replace_tabs(&rhs_src, tab_width);
// Ignore the trailing newline, if present.
// TODO: highlight if this has changes (#144).

@ -116,7 +116,7 @@ fn app() -> clap::Command<'static> {
)
.arg(
Arg::new("syntax-highlight").long("syntax-highlight")
.value_name("ON/OFF")
.value_name("on/off")
.env("DFT_SYNTAX_HIGHLIGHT")
.possible_values(["on", "off"])
.default_value("on")

@ -508,7 +508,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig {
let language = unsafe { tree_sitter_make() };
TreeSitterConfig {
language,
atom_nodes: vec![].into_iter().collect(),
atom_nodes: vec!["shell_text"].into_iter().collect(),
delimiter_tokens: vec![("(", ")")].into_iter().collect(),
highlight_query: ts::Query::new(
language,