Demo files

2023-12-16 13:00:22 +07:00 · 2023-12-16 13:00:22 +07:00 · 5cf0915b4e
parent c0f1615e79
commit 5cf0915b4e
4 changed files with 1298 additions and 0 deletions
--- a/demo_files/1/src/conflicts.rs
+++ b/demo_files/1/src/conflicts.rs
@ -0,0 +1,137 @@
+//! Apply conflict markers to obtain the original file contents.
+//!
+//! https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-mergeconflictStyle
+
+use ConflictState::*;
+
+#[derive(Debug, Clone, Copy)]
+enum ConflictState {
+    NoConflict,
+    Left,
+    Base,
+    Right,
+}
+
+pub const START_LHS_MARKER: &str = "<<<<<<<";
+const START_BASE_MARKER: &str = "|||||||";
+const START_RHS_MARKER: &str = "=======";
+const END_RHS_MARKER: &str = ">>>>>>>";
+
+pub struct ConflictFiles {
+    pub lhs_name: String,
+    pub lhs_content: String,
+    pub rhs_name: String,
+    pub rhs_content: String,
+    pub num_conflicts: usize,
+}
+
+/// Convert a string with conflict markers into the two conflicting
+/// file contents.
+pub fn apply_conflict_markers(s: &str) -> Result<ConflictFiles, String> {
+    let mut lhs_name = String::new();
+    let mut rhs_name = String::new();
+
+    let mut lhs_content = String::with_capacity(s.len());
+    let mut rhs_content = String::with_capacity(s.len());
+    let mut num_conflicts = 0;
+
+    let mut state = NoConflict;
+    let mut conflict_start_line = None;
+    for (i, line) in s.split_inclusive('\n').enumerate() {
+        if let Some(hunk_lhs_name) = line.strip_prefix(START_LHS_MARKER) {
+            state = Left;
+            num_conflicts += 1;
+            conflict_start_line = Some(i);
+
+            let hunk_lhs_name = hunk_lhs_name.trim();
+            if hunk_lhs_name.len() > lhs_name.len() {
+                lhs_name = hunk_lhs_name.to_owned();
+            }
+
+            continue;
+        }
+        if line.starts_with(START_BASE_MARKER) {
+            state = Base;
+            continue;
+        }
+        if line.starts_with(START_RHS_MARKER) {
+            state = Right;
+            continue;
+        }
+        if let Some(hunk_rhs_name) = line.strip_prefix(END_RHS_MARKER) {
+            state = NoConflict;
+
+            let hunk_rhs_name = hunk_rhs_name.trim();
+            if hunk_rhs_name.len() > rhs_name.len() {
+                rhs_name = hunk_rhs_name.to_owned();
+            }
+            continue;
+        }
+
+        match state {
+            NoConflict => {
+                lhs_content.push_str(line);
+                rhs_content.push_str(line);
+            }
+            Left => {
+                lhs_content.push_str(line);
+            }
+            Right => {
+                rhs_content.push_str(line);
+            }
+            Base => {}
+        }
+    }
+
+    if matches!(state, NoConflict) {
+        Ok(ConflictFiles {
+            lhs_name,
+            lhs_content,
+            rhs_name,
+            rhs_content,
+            num_conflicts,
+        })
+    } else {
+        let message = match conflict_start_line {
+            Some(line_i) => format!(
+                "Could not parse conflict markers, line {} has no matching {}.",
+                line_i, END_RHS_MARKER
+            ),
+            None => "Could not parse conflict markers.".to_owned(),
+        };
+        Err(message)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_with_base() {
+        // Deliberately avoid a multiline string literal to avoid
+        // confusing text editors when we open this file.
+        let s = "before\n<<<<<<< Temporary merge branch 1\nnew in left\n||||||| merged common ancestors\noriginal\n=======\nnew in right\n>>>>>>> Temporary merge branch 2\nafter";
+
+        let conflict_files = apply_conflict_markers(s).unwrap();
+        assert_eq!(conflict_files.lhs_content, "before\nnew in left\nafter");
+        assert_eq!(conflict_files.rhs_content, "before\nnew in right\nafter");
+
+        assert_eq!(conflict_files.lhs_name, "Temporary merge branch 1");
+        assert_eq!(conflict_files.rhs_name, "Temporary merge branch 2");
+    }
+
+    #[test]
+    fn test_without_base() {
+        // Deliberately avoid a multiline string literal to avoid
+        // confusing text editors when we open this file.
+        let s = "before\n<<<<<<< Temporary merge branch 1\nnew in left\n=======\nnew in right\n>>>>>>> Temporary merge branch 2\nafter";
+
+        let conflict_files = apply_conflict_markers(s).unwrap();
+        assert_eq!(conflict_files.lhs_content, "before\nnew in left\nafter");
+        assert_eq!(conflict_files.rhs_content, "before\nnew in right\nafter");
+
+        assert_eq!(conflict_files.lhs_name, "Temporary merge branch 1");
+        assert_eq!(conflict_files.rhs_name, "Temporary merge branch 2");
+    }
+}
--- a/demo_files/1/src/main.rs
+++ b/demo_files/1/src/main.rs
@ -0,0 +1,511 @@
+//! Difftastic is a syntactic diff tool.
+//!
+//! For usage instructions and advice on contributing, see [the
+//! manual](http://difftastic.wilfred.me.uk/).
+//!
+
+// This tends to trigger on larger tuples of simple types, and naming
+// them would probably be worse for readability.
+#![allow(clippy::type_complexity)]
+// == "" is often clearer when dealing with strings.
+#![allow(clippy::comparison_to_empty)]
+// It's common to have pairs foo_lhs and foo_rhs, leading to double
+// the number of arguments and triggering this lint.
+#![allow(clippy::too_many_arguments)]
+
+mod context;
+mod dijkstra;
+mod files;
+mod graph;
+mod guess_language;
+mod hunks;
+mod inline;
+mod line_parser;
+mod lines;
+mod myers_diff;
+mod options;
+mod positions;
+mod side_by_side;
+mod sliders;
+mod style;
+mod summary;
+mod syntax;
+mod tree_sitter_parser;
+mod unchanged;
+
+#[macro_use]
+extern crate log;
+
+use crate::hunks::{matched_pos_to_hunks, merge_adjacent};
+use context::opposite_positions;
+use files::read_files_or_die;
+use guess_language::guess;
+use log::info;
+use mimalloc::MiMalloc;
+
+/// The global allocator used by difftastic.
+///
+/// Diffing allocates a large amount of memory, and `MiMalloc` performs
+/// better.
+#[global_allocator]
+static GLOBAL: MiMalloc = MiMalloc;
+
+use options::{should_use_color, DisplayMode, Mode};
+use sliders::fix_all_sliders;
+use std::{env, path::Path};
+use style::BackgroundColor;
+use summary::{DiffResult, FileContent};
+use syntax::init_next_prev;
+use typed_arena::Arena;
+use walkdir::WalkDir;
+
+use crate::{
+    dijkstra::mark_syntax,
+    files::{is_probably_binary, read_or_die},
+    lines::MaxLine,
+    syntax::init_all_info,
+    tree_sitter_parser as tsp,
+};
+
+extern crate pretty_env_logger;
+
+/// Terminate the process if we get SIGPIPE.
+#[cfg(unix)]
+fn reset_sigpipe() {
+    unsafe {
+        libc::signal(libc::SIGPIPE, libc::SIG_DFL);
+    }
+}
+
+#[cfg(not(unix))]
+fn reset_sigpipe() {
+    // Do nothing.
+}
+
+/// The entrypoint.
+fn main() {
+    pretty_env_logger::init();
+    reset_sigpipe();
+
+    match options::parse_args() {
+        Mode::DumpTreeSitter { path } => {
+            let path = Path::new(&path);
+            let bytes = read_or_die(path);
+            let src = String::from_utf8_lossy(&bytes).to_string();
+            match guess(path, &src) {
+                Some(lang) => {
+                    let ts_lang = tsp::from_language(lang);
+                    let tree = tsp::parse_to_tree(&src, &ts_lang);
+                    tsp::print_tree(&src, &tree);
+                }
+                None => {
+                    eprintln!("No tree-sitter parser for file: {:?}", path);
+                }
+            }
+        }
+        Mode::DumpSyntax { path } => {
+            let path = Path::new(&path);
+            let bytes = read_or_die(path);
+            let src = String::from_utf8_lossy(&bytes).to_string();
+
+            match guess(path, &src) {
+                Some(lang) => {
+                    let ts_lang = tsp::from_language(lang);
+                    let arena = Arena::new();
+                    let ast = tsp::parse(&arena, &src, &ts_lang);
+                    init_all_info(&ast, &[]);
+                    println!("{:#?}", ast);
+                }
+                None => {
+                    eprintln!("No tree-sitter parser for file: {:?}", path);
+                }
+            }
+        }
+        Mode::Diff {
+            node_limit,
+            byte_limit,
+            print_unchanged,
+            missing_as_empty,
+            display_mode,
+            background_color,
+            color_output,
+            display_width,
+            display_path,
+            lhs_path,
+            rhs_path,
+            ..
+        } => {
+            let use_color = should_use_color(color_output);
+
+            let lhs_path = Path::new(&lhs_path);
+            let rhs_path = Path::new(&rhs_path);
+
+            if lhs_path == rhs_path {
+                eprintln!(
+                    "warning: You've specified the same {} twice.\n",
+                    if lhs_path.is_dir() {
+                        "directory"
+                    } else {
+                        "file"
+                    }
+                );
+            }
+
+            if lhs_path.is_dir() && rhs_path.is_dir() {
+                for diff_result in
+                    diff_directories(lhs_path, rhs_path, missing_as_empty, node_limit, byte_limit)
+                {
+                    print_diff_result(
+                        display_width,
+                        use_color,
+                        display_mode,
+                        background_color,
+                        print_unchanged,
+                        &diff_result,
+                    );
+                }
+            } else {
+                let diff_result = diff_file(
+                    &display_path,
+                    lhs_path,
+                    rhs_path,
+                    missing_as_empty,
+                    node_limit,
+                    byte_limit,
+                );
+                print_diff_result(
+                    display_width,
+                    use_color,
+                    display_mode,
+                    background_color,
+                    print_unchanged,
+                    &diff_result,
+                );
+            }
+        }
+    };
+}
+
+/// Print a diff between two files.
+fn diff_file(
+    display_path: &str,
+    lhs_path: &Path,
+    rhs_path: &Path,
+    missing_as_empty: bool,
+    node_limit: u32,
+    byte_limit: usize,
+) -> DiffResult {
+    let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty);
+    diff_file_content(display_path, &lhs_bytes, &rhs_bytes, node_limit, byte_limit)
+}
+
+fn diff_file_content(
+    display_path: &str,
+    lhs_bytes: &[u8],
+    rhs_bytes: &[u8],
+    node_limit: u32,
+    byte_limit: usize,
+) -> DiffResult {
+    if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) {
+        return DiffResult {
+            path: display_path.into(),
+            language: None,
+            lhs_src: FileContent::Binary(lhs_bytes.to_vec()),
+            rhs_src: FileContent::Binary(rhs_bytes.to_vec()),
+            lhs_positions: vec![],
+            rhs_positions: vec![],
+        };
+    }
+
+    // TODO: don't replace tab characters inside string literals.
+    let mut lhs_src = String::from_utf8_lossy(lhs_bytes)
+        .to_string()
+        .replace('\t', "    ");
+    let mut rhs_src = String::from_utf8_lossy(rhs_bytes)
+        .to_string()
+        .replace('\t', "    ");
+
+    // Ignore the trailing newline, if present.
+    // TODO: highlight if this has changes (#144).
+    // TODO: factor out a string cleaning function.
+    if lhs_src.ends_with('\n') {
+        lhs_src.pop();
+    }
+    if rhs_src.ends_with('\n') {
+        rhs_src.pop();
+    }
+
+    // TODO: take a Path directly instead.
+    let path = Path::new(&display_path);
+
+    // Take the larger of the two files when guessing the
+    // language. This is useful when we've added or removed a whole
+    // file.
+    let guess_src = if lhs_src.len() > rhs_src.len() {
+        &lhs_src
+    } else {
+        &rhs_src
+    };
+    let ts_lang = guess(path, guess_src).map(tsp::from_language);
+
+    if lhs_bytes == rhs_bytes {
+        // If the two files are completely identical, return early
+        // rather than doing any more work.
+        return DiffResult {
+            path: display_path.into(),
+            language: ts_lang.map(|l| l.name.into()),
+            lhs_src: FileContent::Text("".into()),
+            rhs_src: FileContent::Text("".into()),
+            lhs_positions: vec![],
+            rhs_positions: vec![],
+        };
+    }
+
+    let (lang_name, lhs_positions, rhs_positions) = match ts_lang {
+        _ if lhs_bytes.len() > byte_limit || rhs_bytes.len() > byte_limit => {
+            let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
+            let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
+            (
+                Some("Text (exceeded DFT_BYTE_LIMIT)".into()),
+                lhs_positions,
+                rhs_positions,
+            )
+        }
+        Some(ts_lang) => {
+            let arena = Arena::new();
+            let lhs = tsp::parse(&arena, &lhs_src, &ts_lang);
+            let rhs = tsp::parse(&arena, &rhs_src, &ts_lang);
+
+            init_all_info(&lhs, &rhs);
+
+            let possibly_changed = if env::var("DFT_DBG_KEEP_UNCHANGED").is_ok() {
+                vec![(lhs.clone(), rhs.clone())]
+            } else {
+                unchanged::mark_unchanged(&lhs, &rhs)
+            };
+
+            let possibly_changed_max = max_num_nodes(&possibly_changed);
+            if possibly_changed_max > node_limit {
+                info!(
+                    "Found {} nodes, exceeding the limit {}",
+                    possibly_changed_max, node_limit
+                );
+
+                let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
+                let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
+                (
+                    Some("Text (exceeded DFT_NODE_LIMIT)".into()),
+                    lhs_positions,
+                    rhs_positions,
+                )
+            } else {
+                for (lhs_section_nodes, rhs_section_nodes) in possibly_changed {
+                    init_next_prev(&lhs_section_nodes);
+                    init_next_prev(&rhs_section_nodes);
+
+                    mark_syntax(
+                        lhs_section_nodes.get(0).copied(),
+                        rhs_section_nodes.get(0).copied(),
+                    );
+
+                    fix_all_sliders(&lhs_section_nodes);
+                    fix_all_sliders(&rhs_section_nodes);
+                }
+
+                let lhs_positions = syntax::change_positions(&lhs);
+                let rhs_positions = syntax::change_positions(&rhs);
+                (Some(ts_lang.name.into()), lhs_positions, rhs_positions)
+            }
+        }
+        None => {
+            let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
+            let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
+            (None, lhs_positions, rhs_positions)
+        }
+    };
+
+    DiffResult {
+        path: display_path.into(),
+        language: lang_name,
+        lhs_src: FileContent::Text(lhs_src),
+        rhs_src: FileContent::Text(rhs_src),
+        lhs_positions,
+        rhs_positions,
+    }
+}
+
+/// Given two directories that contain the files, compare them
+/// pairwise. Returns an iterator, so we can print results
+/// incrementally.
+///
+/// When more than one file is modified, the hg extdiff extension passes directory
+/// paths with the all the modified files.
+fn diff_directories<'a>(
+    lhs_dir: &'a Path,
+    rhs_dir: &'a Path,
+    missing_as_empty: bool,
+    node_limit: u32,
+    byte_limit: usize,
+) -> impl Iterator<Item = DiffResult> + 'a {
+    WalkDir::new(lhs_dir)
+        .into_iter()
+        .filter_map(Result::ok)
+        .map(|entry| entry.into_path())
+        .filter(|lhs_path| !lhs_path.is_dir())
+        .map(move |lhs_path| {
+            info!("LHS path is {:?} inside {:?}", lhs_path, lhs_dir);
+
+            let rel_path = lhs_path.strip_prefix(lhs_dir).unwrap();
+            let rhs_path = Path::new(rhs_dir).join(rel_path);
+
+            diff_file(
+                &rel_path.to_string_lossy(),
+                &lhs_path,
+                &rhs_path,
+                missing_as_empty,
+                node_limit,
+                byte_limit,
+            )
+        })
+}
+
+// TODO: factor out a DiffOptions struct.
+fn print_diff_result(
+    display_width: usize,
+    use_color: bool,
+    display_mode: DisplayMode,
+    background: BackgroundColor,
+    print_unchanged: bool,
+    summary: &DiffResult,
+) {
+    match (&summary.lhs_src, &summary.rhs_src) {
+        (FileContent::Text(lhs_src), FileContent::Text(rhs_src)) => {
+            let opposite_to_lhs = opposite_positions(&summary.lhs_positions);
+            let opposite_to_rhs = opposite_positions(&summary.rhs_positions);
+
+            let hunks = matched_pos_to_hunks(&summary.lhs_positions, &summary.rhs_positions);
+            let hunks = merge_adjacent(
+                &hunks,
+                &opposite_to_lhs,
+                &opposite_to_rhs,
+                lhs_src.max_line(),
+                rhs_src.max_line(),
+            );
+
+            let lang_name = summary.language.clone().unwrap_or_else(|| "Text".into());
+            if hunks.is_empty() {
+                if print_unchanged {
+                    println!(
+                        "{}",
+                        style::header(&summary.path, 1, 1, &lang_name, use_color, background)
+                    );
+                    if lang_name == "Text" || summary.lhs_src == summary.rhs_src {
+                        // TODO: there are other Text names now, so
+                        // they will hit the second case incorrectly.
+                        println!("No changes.\n");
+                    } else {
+                        println!("No syntactic changes.\n");
+                    }
+                }
+                return;
+            }
+
+            match display_mode {
+                DisplayMode::Inline => {
+                    inline::print(
+                        lhs_src,
+                        rhs_src,
+                        &summary.lhs_positions,
+                        &summary.rhs_positions,
+                        &hunks,
+                        &summary.path,
+                        &lang_name,
+                        use_color,
+                        background,
+                    );
+                }
+                DisplayMode::SideBySide | DisplayMode::SideBySideShowBoth => {
+                    side_by_side::print(
+                        &hunks,
+                        display_width,
+                        use_color,
+                        display_mode,
+                        background,
+                        &summary.path,
+                        &lang_name,
+                        lhs_src,
+                        rhs_src,
+                        &summary.lhs_positions,
+                        &summary.rhs_positions,
+                    );
+                }
+            }
+        }
+        (FileContent::Binary(lhs_bytes), FileContent::Binary(rhs_bytes)) => {
+            let changed = lhs_bytes != rhs_bytes;
+            if print_unchanged || changed {
+                println!(
+                    "{}",
+                    style::header(&summary.path, 1, 1, "binary", use_color, background)
+                );
+                if changed {
+                    println!("Binary contents changed.");
+                } else {
+                    println!("No changes.");
+                }
+            }
+        }
+        (_, FileContent::Binary(_)) | (FileContent::Binary(_), _) => {
+            // We're diffing a binary file against a text file.
+            println!(
+                "{}",
+                style::header(&summary.path, 1, 1, "binary", use_color, background)
+            );
+            println!("Binary contents changed.");
+        }
+    }
+}
+
+/// What is the total number of nodes in `roots`?
+fn num_nodes(roots: &[&syntax::Syntax]) -> u32 {
+    roots
+        .iter()
+        .map(|n| {
+            1 + match n {
+                syntax::Syntax::List {
+                    num_descendants, ..
+                } => *num_descendants,
+                syntax::Syntax::Atom { .. } => 0,
+            }
+        })
+        .sum()
+}
+
+fn max_num_nodes(roots_vec: &[(Vec<&syntax::Syntax>, Vec<&syntax::Syntax>)]) -> u32 {
+    roots_vec
+        .iter()
+        .map(|(lhs, rhs)| num_nodes(lhs) + num_nodes(rhs))
+        .max()
+        .unwrap_or(0)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::options::{DEFAULT_BYTE_LIMIT, DEFAULT_NODE_LIMIT};
+
+    #[test]
+    fn test_diff_identical_content() {
+        let s = "foo";
+        let res = diff_file_content(
+            "foo.el",
+            s.as_bytes(),
+            s.as_bytes(),
+            DEFAULT_NODE_LIMIT,
+            DEFAULT_BYTE_LIMIT,
+        );
+
+        assert_eq!(res.lhs_positions, vec![]);
+        assert_eq!(res.rhs_positions, vec![]);
+    }
+}
--- a/demo_files/2/src/conflicts.rs
+++ b/demo_files/2/src/conflicts.rs
@ -0,0 +1,137 @@
+//! Apply conflict markers to obtain the original file contents.
+//!
+//! https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-mergeconflictStyle
+
+use ConflictState::*;
+
+#[derive(Debug, Clone, Copy)]
+enum ConflictState {
+    NoConflict,
+    Left,
+    Base,
+    Right,
+}
+
+pub const START_LHS_MARKER: &str = "<<<<<<<";
+const START_BASE_MARKER: &str = "|||||||";
+const START_RHS_MARKER: &str = "=======";
+const END_RHS_MARKER: &str = ">>>>>>>";
+
+pub struct ConflictFiles {
+    pub lhs_name: Option<String>,
+    pub lhs_content: String,
+    pub rhs_name: Option<String>,
+    pub rhs_content: String,
+    pub num_conflicts: usize,
+}
+
+/// Convert a string with conflict markers into the two conflicting
+/// file contents.
+pub fn apply_conflict_markers(s: &str) -> Result<ConflictFiles, String> {
+    let mut lhs_name = String::new();
+    let mut rhs_name = String::new();
+
+    let mut lhs_content = String::with_capacity(s.len());
+    let mut rhs_content = String::with_capacity(s.len());
+    let mut num_conflicts = 0;
+
+    let mut state = NoConflict;
+    let mut conflict_start_line = None;
+    for (i, line) in s.split_inclusive('\n').enumerate() {
+        if let Some(hunk_lhs_name) = line.strip_prefix(START_LHS_MARKER) {
+            state = Left;
+            num_conflicts += 1;
+            conflict_start_line = Some(i);
+
+            let hunk_lhs_name = hunk_lhs_name.trim();
+            if hunk_lhs_name.len() > lhs_name.len() {
+                lhs_name = hunk_lhs_name.to_owned();
+            }
+
+            continue;
+        }
+        if line.starts_with(START_BASE_MARKER) {
+            state = Base;
+            continue;
+        }
+        if line.starts_with(START_RHS_MARKER) {
+            state = Right;
+            continue;
+        }
+        if let Some(hunk_rhs_name) = line.strip_prefix(END_RHS_MARKER) {
+            state = NoConflict;
+
+            let hunk_rhs_name = hunk_rhs_name.trim();
+            if hunk_rhs_name.len() > rhs_name.len() {
+                rhs_name = hunk_rhs_name.to_owned();
+            }
+            continue;
+        }
+
+        match state {
+            NoConflict => {
+                lhs_content.push_str(line);
+                rhs_content.push_str(line);
+            }
+            Left => {
+                lhs_content.push_str(line);
+            }
+            Right => {
+                rhs_content.push_str(line);
+            }
+            Base => {}
+        }
+    }
+
+    if matches!(state, NoConflict) {
+        Ok(ConflictFiles {
+            lhs_name,
+            lhs_content,
+            rhs_name,
+            rhs_content,
+            num_conflicts,
+        })
+    } else {
+        let message = match conflict_start_line {
+            Some(line_i) => format!(
+                "Could not parse conflict markers, line {} has no matching {}.",
+                line_i, END_RHS_MARKER
+            ),
+            None => "Could not parse conflict markers.".to_owned(),
+        };
+        Err(message)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_with_base() {
+        // Deliberately avoid a multiline string literal to avoid
+        // confusing text editors when we open this file.
+        let s = "before\n<<<<<<< Temporary merge branch 1\nnew in left\n||||||| merged common ancestors\noriginal\n=======\nnew in right\n>>>>>>> Temporary merge branch 2\nafter";
+
+        let conflict_files = apply_conflict_markers(s).unwrap();
+        assert_eq!(conflict_files.lhs_content, "before\nnew in left\nafter");
+        assert_eq!(conflict_files.rhs_content, "before\nnew in right\nafter");
+
+        assert_eq!(conflict_files.lhs_name, "Temporary merge branch 1");
+        assert_eq!(conflict_files.rhs_name, "Temporary merge branch 2");
+    }
+
+    #[test]
+    fn test_without_base() {
+        // Deliberately avoid a multiline string literal to avoid
+        // confusing text editors when we open this file.
+        let s = "before\n<<<<<<< Temporary merge branch 1\nnew in left\n=======\nnew in right\n>>>>>>> Temporary merge branch 2\nafter";
+
+        let conflict_files = apply_conflict_markers(s).unwrap();
+        assert_eq!(conflict_files.lhs_content, "before\nnew in left\nafter");
+        assert_eq!(conflict_files.rhs_content, "before\nnew in right\nafter");
+
+        assert_eq!(conflict_files.lhs_name, "Temporary merge branch 1");
+        assert_eq!(conflict_files.rhs_name, "Temporary merge branch 2");
+    }
+}
--- a/demo_files/2/src/main.rs
+++ b/demo_files/2/src/main.rs
@ -0,0 +1,513 @@
+//! Difftastic is a syntactic diff tool.
+//!
+//! For usage instructions and advice on contributing, see [the
+//! manual](http://difftastic.wilfred.me.uk/).
+//!
+
+// This tends to trigger on larger tuples of simple types, and naming
+// them would probably be worse for readability.
+#![allow(clippy::type_complexity)]
+// == "" is often clearer when dealing with strings.
+#![allow(clippy::comparison_to_empty)]
+// It's common to have pairs foo_lhs and foo_rhs, leading to double
+// the number of arguments and triggering this lint.
+#![allow(clippy::too_many_arguments)]
+
+mod context;
+mod dijkstra;
+mod files;
+mod graph;
+mod guess_language;
+mod hunks;
+mod inline;
+mod line_parser;
+mod lines;
+mod myers_diff;
+mod options;
+mod positions;
+mod side_by_side;
+mod sliders;
+mod style;
+mod summary;
+mod syntax;
+mod tree_sitter_parser;
+mod unchanged;
+
+#[macro_use]
+extern crate log;
+
+use crate::hunks::{matched_pos_to_hunks, merge_adjacent};
+use context::opposite_positions;
+use files::read_files_or_die;
+use guess_language::guess;
+use log::info;
+use mimalloc::MiMalloc;
+
+/// The global allocator used by difftastic.
+///
+/// Diffing allocates a large amount of memory, and `MiMalloc` performs
+/// better.
+#[global_allocator]
+static GLOBAL: MiMalloc = MiMalloc;
+
+use options::{should_use_color, DisplayMode, Mode};
+use sliders::fix_all_sliders;
+use std::{env, path::Path};
+use style::BackgroundColor;
+use summary::{DiffResult, FileContent};
+use syntax::init_next_prev;
+use typed_arena::Arena;
+use walkdir::WalkDir;
+
+use crate::{
+    dijkstra::mark_syntax,
+    files::{is_probably_binary, read_or_die},
+    lines::MaxLine,
+    syntax::init_all_info,
+    tree_sitter_parser as tsp,
+};
+
+extern crate pretty_env_logger;
+
+/// Terminate the process if we get SIGPIPE.
+#[cfg(unix)]
+fn reset_sigpipe() {
+    unsafe {
+        libc::signal(libc::SIGPIPE, libc::SIG_DFL);
+    }
+}
+
+#[cfg(not(unix))]
+fn reset_sigpipe() {
+    // Do nothing.
+}
+
+/// The entrypoint.
+fn main() {
+    pretty_env_logger::init();
+    reset_sigpipe();
+
+    match options::parse_args() {
+        Mode::DumpTreeSitter { path } => {
+            let path = Path::new(&path);
+            let bytes = read_or_die(path);
+            let src = String::from_utf8_lossy(&bytes).to_string();
+            match guess(path, &src) {
+                Some(lang) => {
+                    let ts_lang = tsp::from_language(lang);
+                    let tree = tsp::parse_to_tree(&src, &ts_lang);
+                    tsp::print_tree(&src, &tree);
+                }
+                None => {
+                    eprintln!("No tree-sitter parser for file: {:?}", path);
+                }
+            }
+        }
+        Mode::DumpSyntax { path } => {
+            let path = Path::new(&path);
+            let bytes = read_or_die(path);
+            let src = String::from_utf8_lossy(&bytes).to_string();
+
+            match guess(path, &src) {
+                Some(lang) => {
+                    let ts_lang = tsp::from_language(lang);
+                    let arena = Arena::new();
+                    let ast = tsp::parse(&arena, &src, &ts_lang);
+                    init_all_info(&ast, &[]);
+                    println!("{:#?}", ast);
+                }
+                None => {
+                    eprintln!("No tree-sitter parser for file: {:?}", path);
+                }
+            }
+        }
+        Mode::Diff {
+            node_limit,
+            byte_limit,
+            print_unchanged,
+            missing_as_empty,
+            display_mode,
+            background_color,
+            color_output,
+            display_width,
+            display_path,
+            lhs_path,
+            rhs_path,
+            ..
+        } => {
+            let use_color = should_use_color(color_output);
+
+            let lhs_path = Path::new(&lhs_path);
+            let rhs_path = Path::new(&rhs_path);
+
+            if lhs_path == rhs_path {
+                eprintln!(
+                    "warning: You've specified the same {} twice.\n",
+                    if lhs_path.is_dir() {
+                        "directory"
+                    } else {
+                        "file"
+                    }
+                );
+            }
+
+            if lhs_path.is_dir() && rhs_path.is_dir() {
+                for diff_result in
+                    diff_directories(lhs_path, rhs_path, missing_as_empty, node_limit, byte_limit)
+                {
+                    print_diff_result(
+                        display_width,
+                        use_color,
+                        display_mode,
+                        background_color,
+                        print_unchanged,
+                        &diff_result,
+                    );
+                }
+            } else {
+                let diff_result = diff_file(
+                    &display_path,
+                    lhs_path,
+                    rhs_path,
+                    missing_as_empty,
+                    node_limit,
+                    byte_limit,
+                );
+                print_diff_result(
+                    display_width,
+                    use_color,
+                    display_mode,
+                    background_color,
+                    print_unchanged,
+                    &diff_result,
+                );
+            }
+        }
+    };
+}
+
+/// Print a diff between two files.
+fn diff_file(
+    display_path: &str,
+    lhs_path: &Path,
+    rhs_path: &Path,
+    missing_as_empty: bool,
+    node_limit: u32,
+    byte_limit: usize,
+) -> DiffResult {
+    let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty);
+    diff_file_content(display_path, &lhs_bytes, &rhs_bytes, node_limit, byte_limit)
+}
+
+fn diff_file_content(
+    display_path: &str,
+    lhs_bytes: &[u8],
+    rhs_bytes: &[u8],
+    node_limit: u32,
+    byte_limit: usize,
+) -> DiffResult {
+    if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) {
+        return DiffResult {
+            path: display_path.into(),
+            language: None,
+            lhs_src: FileContent::Binary(lhs_bytes.to_vec()),
+            rhs_src: FileContent::Binary(rhs_bytes.to_vec()),
+            lhs_positions: vec![],
+            rhs_positions: vec![],
+        };
+    }
+
+    // TODO: don't replace tab characters inside string literals.
+    let mut lhs_src = String::from_utf8_lossy(lhs_bytes)
+        .to_string()
+        .replace('\t', "    ");
+    let mut rhs_src = String::from_utf8_lossy(rhs_bytes)
+        .to_string()
+        .replace('\t', "    ");
+
+    // Ignore the trailing newline, if present.
+    // TODO: highlight if this has changes (#144).
+    // TODO: factor out a string cleaning function.
+    if lhs_src.ends_with('\n') {
+        lhs_src.pop();
+    }
+    if rhs_src.ends_with('\n') {
+        rhs_src.pop();
+    }
+
+    // TODO: take a Path directly instead.
+    let path = Path::new(&display_path);
+
+    // Take the larger of the two files when guessing the
+    // language. This is useful when we've added or removed a whole
+    // file.
+    let guess_src = if lhs_src.len() > rhs_src.len() {
+        &lhs_src
+    } else {
+        &rhs_src
+    };
+    let ts_lang = language_override
+        .or_else(|| guess(path, guess_src))
+        .map(tsp::from_language);
+
+    if lhs_bytes == rhs_bytes {
+        // If the two files are completely identical, return early
+        // rather than doing any more work.
+        return DiffResult {
+            path: display_path.into(),
+            language: ts_lang.map(|l| l.name.into()),
+            lhs_src: FileContent::Text("".into()),
+            rhs_src: FileContent::Text("".into()),
+            lhs_positions: vec![],
+            rhs_positions: vec![],
+        };
+    }
+
+    let (lang_name, lhs_positions, rhs_positions) = match ts_lang {
+        _ if lhs_bytes.len() > byte_limit || rhs_bytes.len() > byte_limit => {
+            let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
+            let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
+            (
+                Some("Text (exceeded DFT_BYTE_LIMIT)".into()),
+                lhs_positions,
+                rhs_positions,
+            )
+        }
+        Some(ts_lang) => {
+            let arena = Arena::new();
+            let lhs = tsp::parse(&arena, &lhs_src, &ts_lang);
+            let rhs = tsp::parse(&arena, &rhs_src, &ts_lang);
+
+            init_all_info(&lhs, &rhs);
+
+            let possibly_changed = if env::var("DFT_DBG_KEEP_UNCHANGED").is_ok() {
+                vec![(lhs.clone(), rhs.clone())]
+            } else {
+                unchanged::mark_unchanged(&lhs, &rhs)
+            };
+
+            let possibly_changed_max = max_num_nodes(&possibly_changed);
+            if possibly_changed_max > node_limit {
+                info!(
+                    "Found {} nodes, exceeding the limit {}",
+                    possibly_changed_max, node_limit
+                );
+
+                let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
+                let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
+                (
+                    Some("Text (exceeded DFT_NODE_LIMIT)".into()),
+                    lhs_positions,
+                    rhs_positions,
+                )
+            } else {
+                for (lhs_section_nodes, rhs_section_nodes) in possibly_changed {
+                    init_next_prev(&lhs_section_nodes);
+                    init_next_prev(&rhs_section_nodes);
+
+                    mark_syntax(
+                        lhs_section_nodes.get(0).copied(),
+                        rhs_section_nodes.get(0).copied(),
+                    );
+
+                    fix_all_sliders(&lhs_section_nodes);
+                    fix_all_sliders(&rhs_section_nodes);
+                }
+
+                let lhs_positions = syntax::change_positions(&lhs);
+                let rhs_positions = syntax::change_positions(&rhs);
+                (Some(ts_lang.name.into()), lhs_positions, rhs_positions)
+            }
+        }
+        None => {
+            let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
+            let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
+            (None, lhs_positions, rhs_positions)
+        }
+    };
+
+    DiffResult {
+        path: display_path.into(),
+        language: lang_name,
+        lhs_src: FileContent::Text(lhs_src),
+        rhs_src: FileContent::Text(rhs_src),
+        lhs_positions,
+        rhs_positions,
+    }
+}
+
+/// Given two directories that contain the files, compare them
+/// pairwise. Returns an iterator, so we can print results
+/// incrementally.
+///
+/// When more than one file is modified, the hg extdiff extension passes directory
+/// paths with the all the modified files.
+fn diff_directories<'a>(
+    lhs_dir: &'a Path,
+    rhs_dir: &'a Path,
+    missing_as_empty: bool,
+    node_limit: u32,
+    byte_limit: usize,
+) -> impl Iterator<Item = DiffResult> + 'a {
+    WalkDir::new(lhs_dir)
+        .into_iter()
+        .filter_map(Result::ok)
+        .map(|entry| entry.into_path())
+        .filter(|lhs_path| !lhs_path.is_dir())
+        .map(move |lhs_path| {
+            info!("LHS path is {:?} inside {:?}", lhs_path, lhs_dir);
+
+            let rel_path = lhs_path.strip_prefix(lhs_dir).unwrap();
+            let rhs_path = Path::new(rhs_dir).join(rel_path);
+
+            diff_file(
+                &rel_path.to_string_lossy(),
+                &lhs_path,
+                &rhs_path,
+                missing_as_empty,
+                node_limit,
+                byte_limit,
+            )
+        })
+}
+
+// TODO: factor out a DiffOptions struct.
+fn print_diff_result(
+    display_width: usize,
+    use_color: bool,
+    display_mode: DisplayMode,
+    background: BackgroundColor,
+    print_unchanged: bool,
+    summary: &DiffResult,
+) {
+    match (&summary.lhs_src, &summary.rhs_src) {
+        (FileContent::Text(lhs_src), FileContent::Text(rhs_src)) => {
+            let opposite_to_lhs = opposite_positions(&summary.lhs_positions);
+            let opposite_to_rhs = opposite_positions(&summary.rhs_positions);
+
+            let hunks = matched_pos_to_hunks(&summary.lhs_positions, &summary.rhs_positions);
+            let hunks = merge_adjacent(
+                &hunks,
+                &opposite_to_lhs,
+                &opposite_to_rhs,
+                lhs_src.max_line(),
+                rhs_src.max_line(),
+            );
+
+            let lang_name = summary.language.clone().unwrap_or_else(|| "Text".into());
+            if hunks.is_empty() {
+                if print_unchanged {
+                    println!(
+                        "{}",
+                        style::header(&summary.path, 1, 1, &lang_name, use_color, background)
+                    );
+                    if lang_name == "Text" || summary.lhs_src == summary.rhs_src {
+                        // TODO: there are other Text names now, so
+                        // they will hit the second case incorrectly.
+                        println!("No changes.\n");
+                    } else {
+                        println!("No syntactic changes.\n");
+                    }
+                }
+                return;
+            }
+
+            match display_mode {
+                DisplayMode::Inline => {
+                    inline::print(
+                        lhs_src,
+                        rhs_src,
+                        &summary.lhs_positions,
+                        &summary.rhs_positions,
+                        &hunks,
+                        &summary.path,
+                        &lang_name,
+                        use_color,
+                        background,
+                    );
+                }
+                DisplayMode::SideBySide | DisplayMode::SideBySideShowBoth => {
+                    side_by_side::print(
+                        &hunks,
+                        display_width,
+                        use_color,
+                        display_mode,
+                        background,
+                        &summary.path,
+                        &lang_name,
+                        lhs_src,
+                        rhs_src,
+                        &summary.lhs_positions,
+                        &summary.rhs_positions,
+                    );
+                }
+            }
+        }
+        (FileContent::Binary(lhs_bytes), FileContent::Binary(rhs_bytes)) => {
+            let changed = lhs_bytes != rhs_bytes;
+            if print_unchanged || changed {
+                println!(
+                    "{}",
+                    style::header(&summary.path, 1, 1, "binary", use_color, background)
+                );
+                if changed {
+                    println!("Binary contents changed.");
+                } else {
+                    println!("No changes.");
+                }
+            }
+        }
+        (_, FileContent::Binary(_)) | (FileContent::Binary(_), _) => {
+            // We're diffing a binary file against a text file.
+            println!(
+                "{}",
+                style::header(&summary.path, 1, 1, "binary", use_color, background)
+            );
+            println!("Binary contents changed.");
+        }
+    }
+}
+
+/// What is the total number of nodes in `roots`?
+fn num_nodes(roots: &[&syntax::Syntax]) -> u32 {
+    roots
+        .iter()
+        .map(|n| {
+            1 + match n {
+                syntax::Syntax::List {
+                    num_descendants, ..
+                } => *num_descendants,
+                syntax::Syntax::Atom { .. } => 0,
+            }
+        })
+        .sum()
+}
+
+fn max_num_nodes(roots_vec: &[(Vec<&syntax::Syntax>, Vec<&syntax::Syntax>)]) -> u32 {
+    roots_vec
+        .iter()
+        .map(|(lhs, rhs)| num_nodes(lhs) + num_nodes(rhs))
+        .max()
+        .unwrap_or(0)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::options::{DEFAULT_BYTE_LIMIT, DEFAULT_NODE_LIMIT};
+
+    #[test]
+    fn test_diff_identical_content() {
+        let s = "foo";
+        let res = diff_file_content(
+            "foo.el",
+            s.as_bytes(),
+            s.as_bytes(),
+            DEFAULT_NODE_LIMIT,
+            DEFAULT_BYTE_LIMIT,
+        );
+
+        assert_eq!(res.lhs_positions, vec![]);
+        assert_eq!(res.rhs_positions, vec![]);
+    }
+}