From da27abf2361c876213c8293a83f8944240de46f2 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Thu, 16 Apr 2026 11:30:09 -0400 Subject: [PATCH 01/12] feat(patch_set): git diff parsing/application * Parse `diff --git` extended headers * split multi-file git diffs at `diff --git` boundaries --- src/patch_set/error.rs | 8 + src/patch_set/mod.rs | 68 +++++- src/patch_set/parse.rs | 519 ++++++++++++++++++++++++++++++++++++++++- src/patch_set/tests.rs | 311 +++++++++++++++++++++++- tests/compat/common.rs | 3 + tests/replay.rs | 4 + 6 files changed, 908 insertions(+), 5 deletions(-) diff --git a/src/patch_set/error.rs b/src/patch_set/error.rs index ba60e86..3a8fc61 100644 --- a/src/patch_set/error.rs +++ b/src/patch_set/error.rs @@ -70,6 +70,12 @@ pub(crate) enum PatchSetParseErrorKind { /// Create patch missing modified path. CreateMissingModifiedPath, + + /// Invalid file mode string. + InvalidFileMode(String), + + /// Invalid `diff --git` path. + InvalidDiffGitPath, } impl fmt::Display for PatchSetParseErrorKind { @@ -81,6 +87,8 @@ impl fmt::Display for PatchSetParseErrorKind { Self::BothDevNull => write!(f, "patch has both original and modified as /dev/null"), Self::DeleteMissingOriginalPath => write!(f, "delete patch has no original path"), Self::CreateMissingModifiedPath => write!(f, "create patch has no modified path"), + Self::InvalidFileMode(mode) => write!(f, "invalid file mode: {mode}"), + Self::InvalidDiffGitPath => write!(f, "invalid diff --git path"), } } } diff --git a/src/patch_set/mod.rs b/src/patch_set/mod.rs index 67ac1f6..ed3f68e 100644 --- a/src/patch_set/mod.rs +++ b/src/patch_set/mod.rs @@ -15,11 +15,23 @@ use crate::utils::Text; use crate::Patch; pub use error::PatchSetParseError; +use error::PatchSetParseErrorKind; pub use parse::PatchSet; /// Options for parsing patch content. /// -/// Use [`ParseOptions::unidiff()`] to create options for the desired format. +/// Use [`ParseOptions::unidiff()`] or [`ParseOptions::gitdiff()`] +/// to create options for the desired format. +/// +/// ## Binary Files +/// +/// When parsing git diffs, binary file changes are detected by: +/// +/// * `Binary files a/path and b/path differ` (`git diff` without `--binary` flag) +/// * `GIT binary patch` (from `git diff --binary`) +/// +/// Note that this is not a documented Git behavior, +/// so the implementation here is subject to change if Git changes. /// /// ## Example /// @@ -48,6 +60,8 @@ pub struct ParseOptions { pub(crate) enum Format { /// Standard unified diff format. UniDiff, + /// Git extended diff format. + GitDiff, } impl ParseOptions { @@ -68,6 +82,22 @@ impl ParseOptions { format: Format::UniDiff, } } + + /// Parse as [git extended diff format][git-diff-format]. + /// + /// Supports all features of [`unidiff()`](Self::unidiff) plus: + /// + /// * `diff --git` headers + /// * Extended headers (`new file mode`, `deleted file mode`, etc.) + /// * Rename/copy detection (`rename from`/`rename to`, `copy from`/`copy to`) + /// * Binary file detection (emitted a marker by defualt) + /// + /// [git-diff-format]: https://git-scm.com/docs/diff-format + pub fn gitdiff() -> Self { + Self { + format: Format::GitDiff, + } + } } /// File mode extracted from git extended headers. @@ -83,11 +113,27 @@ pub enum FileMode { Gitlink, } +impl std::str::FromStr for FileMode { + type Err = PatchSetParseError; + + fn from_str(mode: &str) -> Result { + match mode { + "100644" => Ok(Self::Regular), + "100755" => Ok(Self::Executable), + "120000" => Ok(Self::Symlink), + "160000" => Ok(Self::Gitlink), + _ => Err(PatchSetParseErrorKind::InvalidFileMode(mode.to_owned()).into()), + } + } +} + /// The kind of patch content in a [`FilePatch`]. #[derive(Clone, PartialEq, Eq)] pub enum PatchKind<'a, T: ToOwned + ?Sized> { /// Text patch with hunks. Text(Patch<'a, T>), + /// Binary patch (literal or delta encoded, or marker-only). + Binary, } impl std::fmt::Debug for PatchKind<'_, T> @@ -98,6 +144,7 @@ where fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { PatchKind::Text(patch) => f.debug_tuple("Text").field(patch).finish(), + PatchKind::Binary => f.write_str("Binary"), } } } @@ -107,8 +154,14 @@ impl<'a, T: ToOwned + ?Sized> PatchKind<'a, T> { pub fn as_text(&self) -> Option<&Patch<'a, T>> { match self { PatchKind::Text(patch) => Some(patch), + PatchKind::Binary => None, } } + + /// Returns `true` if this is a binary diff. + pub fn is_binary(&self) -> bool { + matches!(self, PatchKind::Binary) + } } /// A single file's patch with operation metadata. @@ -154,6 +207,19 @@ impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> { } } + fn new_binary( + operation: FileOperation<'a, T>, + old_mode: Option, + new_mode: Option, + ) -> Self { + Self { + operation, + kind: PatchKind::Binary, + old_mode, + new_mode, + } + } + /// Returns the file operation for this patch. pub fn operation(&self) -> &FileOperation<'a, T> { &self.operation diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs index e0ef4b0..08339da 100644 --- a/src/patch_set/parse.rs +++ b/src/patch_set/parse.rs @@ -1,11 +1,12 @@ //! Parse multiple file patches from a unified diff. use super::{ - error::PatchSetParseErrorKind, FileOperation, FilePatch, Format, ParseOptions, + error::PatchSetParseErrorKind, FileMode, FileOperation, FilePatch, Format, ParseOptions, PatchSetParseError, }; use crate::patch::parse::parse_one; -use crate::utils::Text; +use crate::utils::{escaped_filename, Text}; +use crate::Patch; use std::borrow::Cow; @@ -112,6 +113,7 @@ fn next_patch<'a, T: Text + ?Sized>( let result = match ps.opts.format { Format::UniDiff => next_unidiff_patch(ps), + Format::GitDiff => next_gitdiff_patch(ps), }; if result.is_none() { @@ -212,6 +214,509 @@ fn strip_email_preamble(input: &T) -> &T { } } +fn next_gitdiff_patch<'a, T: Text + ?Sized>( + ps: &mut PatchSet<'a, T>, +) -> Option, PatchSetParseError>> { + let patch_start = find_gitdiff_start(remaining(ps))?; + ps.offset += patch_start; + ps.found_any = true; + + let abs_patch_start = ps.offset; + + // Parse extended headers incrementally and stops at first unrecognized line + let (header, header_consumed) = GitHeader::parse(remaining(ps)); + ps.offset += header_consumed; + + // Handle binary markers ("Binary files ... differ") and binary patches ("GIT binary patch") + if header.is_binary_marker || header.is_binary_patch { + let operation = match extract_file_op_binary(&header, abs_patch_start) { + Ok(op) => op, + Err(e) => return Some(Err(e)), + }; + let (old_mode, new_mode) = match parse_file_modes(&header) { + Ok(modes) => modes, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + return Some(Ok(FilePatch::new_binary(operation, old_mode, new_mode))); + } + + // `git diff` output format is stricter. + // There is no preamble between Git headers and unidiff patch portion, + // so we safely don't perform the preamble skipping. + // + // If we did, it would fail the pure rename/mode-change operation + // since those ops have no unidiff patch portion + // and is directly followed by the next `diff --git` header. + let opts = crate::patch::parse::ParseOpts::default().no_skip_preamble(); + let (result, consumed) = parse_one(remaining(ps), opts); + ps.offset += consumed; + let patch = match result { + Ok(patch) => patch, + Err(e) => return Some(Err(e.into())), + }; + + // FIXME: error spans point at `diff --git` line, not the specific offending line + let operation = match extract_file_op_gitdiff(&header, &patch) { + Ok(op) => op, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + + // FIXME: error spans point at `diff --git` line, not the specific offending line + let (old_mode, new_mode) = match parse_file_modes(&header) { + Ok(modes) => modes, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + + Some(Ok(FilePatch::new(operation, patch, old_mode, new_mode))) +} + +/// Finds the byte offset of the first `diff --git` line in `input`. +fn find_gitdiff_start(input: &T) -> Option { + let mut offset = 0; + for line in input.lines() { + if line.starts_with("diff --git ") { + return Some(offset); + } + offset += line.len(); + } + None +} + +/// Git extended header metadata. +/// +/// Extracted from lines between `diff --git` and `---` (or end of patch). +/// See [git-diff format documentation](https://git-scm.com/docs/diff-format). +#[derive(Debug)] +struct GitHeader<'a, T: ?Sized> { + /// Raw content after "diff --git " prefix. + /// + /// Only parsed in fallback when `---`/`+++` is absent (mode-only, binary, empty file). + diff_git_line: Option<&'a T>, + /// Source path from `rename from `. + rename_from: Option<&'a T>, + /// Destination path from `rename to `. + rename_to: Option<&'a T>, + /// Source path from `copy from `. + copy_from: Option<&'a T>, + /// Destination path from `copy to `. + copy_to: Option<&'a T>, + /// File mode from `old mode `. + old_mode: Option<&'a T>, + /// File mode from `new mode `. + new_mode: Option<&'a T>, + /// File mode from `new file mode `. + new_file_mode: Option<&'a T>, + /// File mode from `deleted file mode `. + deleted_file_mode: Option<&'a T>, + /// Whether this is a binary diff with no actual patch content. + /// + /// Observed `git diff` output (without `--binary`): + /// + /// ```text + /// diff --git a/image.png b/image.png + /// new file mode 100644 + /// index 0000000..7c4530c + /// Binary files /dev/null and b/image.png differ + /// ``` + is_binary_marker: bool, + /// Whether this is a binary diff with actual patch content. + /// + /// Observed `git diff --binary` output: + /// + /// ```text + /// diff --git a/image.png b/image.png + /// new file mode 100644 + /// index 0000000..7c4530c + /// GIT binary patch + /// literal 67 + /// zcmV-J0KET+... + /// + /// literal 0 + /// KcmV+b0RR6000031 + /// ``` + is_binary_patch: bool, +} + +impl Default for GitHeader<'_, T> { + fn default() -> Self { + Self { + diff_git_line: None, + rename_from: None, + rename_to: None, + copy_from: None, + copy_to: None, + old_mode: None, + new_mode: None, + new_file_mode: None, + deleted_file_mode: None, + is_binary_marker: false, + is_binary_patch: false, + } + } +} + +impl<'a, T: Text + ?Sized> GitHeader<'a, T> { + /// Parses git extended headers incrementally from the current position. + /// + /// Consumes the `diff --git` line and all recognized extended header lines, + /// stopping at the first unrecognized line (typically `---`/`+++`/`@@` + /// or the next `diff --git`). + /// + /// Returns the parsed header and the number of bytes consumed. + fn parse(input: &'a T) -> (Self, usize) { + let mut header = GitHeader::default(); + let mut consumed = 0; + + for line in input.lines() { + let trimmed = strip_line_ending(line); + + if let Some(rest) = trimmed.strip_prefix("diff --git ") { + // Only accept the first `diff --git` line. + // A second one means we've reached the next patch. + if header.diff_git_line.is_some() { + break; + } + header.diff_git_line = Some(rest); + } else if let Some(path) = trimmed.strip_prefix("rename from ") { + header.rename_from = Some(path); + } else if let Some(path) = trimmed.strip_prefix("rename to ") { + header.rename_to = Some(path); + } else if let Some(path) = trimmed.strip_prefix("copy from ") { + header.copy_from = Some(path); + } else if let Some(path) = trimmed.strip_prefix("copy to ") { + header.copy_to = Some(path); + } else if let Some(mode) = trimmed.strip_prefix("old mode ") { + header.old_mode = Some(mode); + } else if let Some(mode) = trimmed.strip_prefix("new mode ") { + header.new_mode = Some(mode); + } else if let Some(mode) = trimmed.strip_prefix("new file mode ") { + header.new_file_mode = Some(mode); + } else if let Some(mode) = trimmed.strip_prefix("deleted file mode ") { + header.deleted_file_mode = Some(mode); + } else if trimmed.starts_with("index ") + || trimmed.starts_with("similarity index ") + || trimmed.starts_with("dissimilarity index ") + { + // Recognized but nothing to extract. + } else if trimmed.starts_with("Binary files ") { + header.is_binary_marker = true; + } else if trimmed.starts_with("GIT binary patch") { + header.is_binary_patch = true; + } else { + // Unrecognized line: End of extended headers + // (typically `---`/`+++`/`@@` or trailing content). + break; + } + + consumed += line.len(); + } + + (header, consumed) + } +} + +/// Determines the file operation from git headers and patch paths. +fn extract_file_op_gitdiff<'a, T: Text + ?Sized>( + header: &GitHeader<'a, T>, + patch: &Patch<'a, T>, +) -> Result, PatchSetParseError> { + // Git headers are authoritative for rename/copy. + // Paths may be quoted (e.g., `rename from "foo\tbar.txt"`). + if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) { + return Ok(FileOperation::Rename { + from: escaped_filename(from)?, + to: escaped_filename(to)?, + }); + } + if let (Some(from), Some(to)) = (header.copy_from, header.copy_to) { + return Ok(FileOperation::Copy { + from: escaped_filename(from)?, + to: escaped_filename(to)?, + }); + } + + // Try ---/+++ paths first + if patch.original().is_some() || patch.modified().is_some() { + return extract_file_op_unidiff(patch.original_path(), patch.modified_path()); + } + + // Fall back to `diff --git ` for mode-only and empty file changes + let Some((original, modified)) = header.diff_git_line.and_then(parse_diff_git_path) else { + return Err(PatchSetParseErrorKind::InvalidDiffGitPath.into()); + }; + + if header.new_file_mode.is_some() { + Ok(FileOperation::Create(modified)) + } else if header.deleted_file_mode.is_some() { + Ok(FileOperation::Delete(original)) + } else { + Ok(FileOperation::Modify { original, modified }) + } +} + +/// Parses file modes from git extended headers. +fn parse_file_modes( + header: &GitHeader<'_, T>, +) -> Result<(Option, Option), PatchSetParseError> { + let parse_mode = |mode: &T| -> Result { + mode.as_str() + .ok_or_else(|| { + let s = String::from_utf8_lossy(mode.as_bytes()).into_owned(); + PatchSetParseErrorKind::InvalidFileMode(s) + })? + .parse::() + }; + let old_mode = header + .old_mode + .or(header.deleted_file_mode) + .map(parse_mode) + .transpose()?; + let new_mode = header + .new_mode + .or(header.new_file_mode) + .map(parse_mode) + .transpose()?; + Ok((old_mode, new_mode)) +} + +/// Extracts both old and new paths from `diff --git` line content. +/// +/// ## Assumption #1: old and new paths are the same +/// +/// This extraction has one strong assumption: +/// Beside their prefixes, old and new paths are the same. +/// +/// From [git-diff format documentation]: +/// +/// > The `a/` and `b/` filenames are the same unless rename/copy is involved. +/// > Especially, even for a creation or a deletion, `/dev/null` is not used +/// > in place of the `a/` or `b/` filenames. +/// > +/// > When a rename/copy is involved, file1 and file2 show the name of the +/// > source file of the rename/copy and the name of the file that the +/// > rename/copy produces, respectively. +/// +/// Since rename/copy operations use `rename from/to` and `copy from/to` headers +/// we have handled earlier in [`extract_file_op_gitdiff`], +/// (which have no `a/`/`b/` prefix per git spec), +/// +/// this extraction is only used +/// * when unified diff headers (`---`/`+++`) are absent +/// * Only for mode-only and empty file cases +/// +/// [git-diff format documentation]: https://git-scm.com/docs/diff-format +/// +/// ## Assumption #2: the longest common path suffix is the shared path +/// +/// When custom prefixes contain spaces, +/// multiple splits may produce valid path suffixes. +/// +/// Example: `src/foo.rs src/foo.rs src/foo.rs src/foo.rs` +/// +/// Three splits all produce valid path suffixes (contain `/`): +/// +/// * Position 10 +/// * old path: `src/foo.rs` +/// * new path: `src/foo.rs src/foo.rs src/foo.rs` +/// * common suffix: `foo.rs` +/// * Position 21 +/// * old path: `src/foo.rs src/foo.rs` +/// * new path: `src/foo.rs src/foo.rs` +/// * common suffix: `foo.rs src/foo.rs` +/// * Position 32 +/// * old path: `src/foo.rs src/foo.rs src/foo.rs` +/// * new path: `src/foo.rs` +/// * common suffix: `foo.rs` +/// +/// We observed that `git apply` would pick position 21, +/// which has the longest path suffix, +/// hence this heuristic. +/// +/// ## Supported formats +/// +/// * `a/ b/` (default prefix) +/// * ` ` (`git diff --no-prefix`) +/// * ` ` (custom prefix) +/// * `"" ""` (quoted, with escapes) +/// * Mixed quoted/unquoted +fn parse_diff_git_path<'a, T: Text + ?Sized>(line: &'a T) -> Option<(Cow<'a, T>, Cow<'a, T>)> { + if line.starts_with("\"") || line.ends_with("\"") { + parse_quoted_diff_git_path(line) + } else { + parse_unquoted_diff_git_path(line) + } +} + +/// See [`parse_diff_git_path`]. +fn parse_unquoted_diff_git_path<'a, T: Text + ?Sized>( + line: &'a T, +) -> Option<(Cow<'a, T>, Cow<'a, T>)> { + let bytes = line.as_bytes(); + let mut best_match = None; + let mut longest_path_len = 0; + + for (i, _) in bytes.iter().enumerate().filter(|(_, &b)| b == b' ') { + let (left, right_with_space) = line.split_at(i); + // skip the space + let (_, right) = right_with_space.split_at(1); + if left.is_empty() || right.is_empty() { + continue; + } + // Select split with longest common path suffix. + // On ties (`>` not `>=`), the first (leftmost) split wins. + // + // Observed: `git apply` rejects ambiguous splits: + // + // > git diff header lacks filename information + // > when removing N leading pathname component(s)" + // + // Also in : + // + // > The a/ and b/ filenames are the same unless rename/copy is involved. + // + // This kinda tells git-apply's path resolution is strip-level-aware, + // unlike ours. + // + // See `fail_ambiguous_suffix_tie` compat test. + if let Some(path) = longest_common_path_suffix(left, right) { + if path.len() > longest_path_len { + longest_path_len = path.len(); + best_match = Some((left, right)); + } + } + } + + best_match.map(|(l, r)| (Cow::Borrowed(l), Cow::Borrowed(r))) +} + +/// See [`parse_diff_git_path`]. +fn parse_quoted_diff_git_path<'a, T: Text + ?Sized>( + line: &'a T, +) -> Option<(Cow<'a, T>, Cow<'a, T>)> { + let (left_raw, right_raw) = if line.starts_with("\"") { + // First token is quoted. + let bytes = line.as_bytes(); + let mut i = 1; // skip starting `"` + + // Find the closing `"`. + // The only escape where literal `"` appears right after `\` is `\"`, + // an octal double quote `\042` has 3 digits. + // So, `i += 2` correctly skips past `"` and octal digits. + let end = loop { + match bytes.get(i)? { + b'"' => break i + 1, + b'\\' => i += 2, + _ => i += 1, + } + }; + let (first, rest) = line.split_at(end); + let rest = rest.strip_prefix(" ")?; + (first, rest) + } else if let Some(pos) = line.find(" \"") { + // First token is unquoted. The second must be quoted. + let (left, rest) = line.split_at(pos); + let (_, right) = rest.split_at(1); // skip the space + (left, right) + } else { + // Malformed: ends with `"` but no valid quoted path found + return None; + }; + + let left = escaped_filename(left_raw).ok()?; + let right = escaped_filename(right_raw).ok()?; + + // Verify both sides share the same path. + longest_common_path_suffix(left.as_ref(), right.as_ref())?; + Some((left, right)) +} + +/// Extracts the longest common path suffix shared by `a` and `b`. +/// +/// Returns `None` if no valid common path exists. +/// +/// * If both strings are identical, returns the whole string +/// (e.g., `file.rs` vs `file.rs` → `file.rs`). +/// * Otherwise, returns the portion after the first `/` in the common suffix +/// (e.g., `foo/bar.rs` vs `fooo/bar.rs` → `bar.rs`). +fn longest_common_path_suffix<'a, T: Text + ?Sized>(a: &'a T, b: &T) -> Option<&'a T> { + if a.is_empty() || b.is_empty() { + return None; + } + + let suffix_len = a + .as_bytes() + .iter() + .rev() + .zip(b.as_bytes().iter().rev()) + .take_while(|(x, y)| x == y) + .count(); + + if suffix_len == 0 { + return None; + } + + // Identical strings + if suffix_len == a.len() && a.len() == b.len() { + return Some(a); + } + + // Find first '/' in suffix and return path after it + let suffix_start = a.len() - suffix_len; + let (_, suffix) = a.split_at(suffix_start); + suffix + .split_at_exclusive("/") + .map(|(_, path)| path) + .filter(|p| !p.is_empty()) +} + +/// Extracts the file operation for a binary patch from git headers. +/// +/// Binary patches have no `---`/`+++` headers, so paths come from the +/// `diff --git` line or rename/copy headers. +fn extract_file_op_binary<'a, T: Text + ?Sized>( + header: &GitHeader<'a, T>, + abs_patch_start: usize, +) -> Result, PatchSetParseError> { + // Git headers are authoritative for rename/copy. + // Paths may be quoted (e.g., `rename from "foo\tbar.txt"`). + if let (Some(from), Some(to)) = (header.rename_from, header.rename_to) { + return Ok(FileOperation::Rename { + from: escaped_filename(from)?, + to: escaped_filename(to)?, + }); + } + if let (Some(from), Some(to)) = (header.copy_from, header.copy_to) { + return Ok(FileOperation::Copy { + from: escaped_filename(from)?, + to: escaped_filename(to)?, + }); + } + + let Some((original, modified)) = header.diff_git_line.and_then(parse_diff_git_path) else { + return Err(PatchSetParseError::new( + PatchSetParseErrorKind::InvalidDiffGitPath, + abs_patch_start..abs_patch_start, + )); + }; + + if header.new_file_mode.is_some() { + Ok(FileOperation::Create(modified)) + } else if header.deleted_file_mode.is_some() { + Ok(FileOperation::Delete(original)) + } else { + Ok(FileOperation::Modify { original, modified }) + } +} + /// Extracts the file operation from a patch based on its header paths. fn extract_file_op_unidiff<'a, T: Text + ?Sized>( original: Option<&Cow<'a, T>>, @@ -257,3 +762,13 @@ fn extract_file_op_unidiff<'a, T: Text + ?Sized>( } } } + +/// Strips the trailing `\n` from a line yielded by [`Text::lines`]. +/// +/// [`Text::lines`] includes line endings; strip for matching. +fn strip_line_ending(line: &T) -> &T { + // TODO: GNU patch strips trailing CRs from CRLF patches automatically. + // We should consider adding compat tests for GNU patch. + // And `git apply` seems to reject. Worth adding tests as well. + line.strip_suffix("\n").unwrap_or(line) +} diff --git a/src/patch_set/tests.rs b/src/patch_set/tests.rs index 52f1c01..ee7f75b 100644 --- a/src/patch_set/tests.rs +++ b/src/patch_set/tests.rs @@ -1,6 +1,6 @@ //! Tests for patchset parsing. -use super::{error::PatchSetParseErrorKind, FileOperation, ParseOptions, PatchKind, PatchSet}; +use super::{error::PatchSetParseErrorKind, FileOperation, ParseOptions, PatchSet}; mod file_operation { use super::*; @@ -464,6 +464,313 @@ In a hole in the ground there lived a hobbit } } +mod patchset_gitdiff { + use super::*; + fn parse_gitdiff(input: &str) -> Vec> { + PatchSet::parse(input, ParseOptions::gitdiff()) + .collect::, _>>() + .unwrap() + } + + /// `parse_one` must stop at `diff --git` boundaries so that + /// back-to-back patches are split correctly. + /// Without this, the second patch's `diff --git` line would be + /// swallowed as trailing junk by the first patch's hunk parser. + #[test] + fn multi_file_stops_at_diff_git_boundary() { + let input = "\ +diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old foo ++new foo +diff --git a/bar b/bar +--- a/bar ++++ b/bar +@@ -1 +1 @@ +-old bar ++new bar +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 2); + } + + #[test] + fn pure_rename() { + let input = "\ +diff --git a/old.rs b/new.rs +similarity index 100% +rename from old.rs +rename to new.rs +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Rename { + from: "old.rs".into(), + to: "new.rs".into(), + } + ); + } + + /// Empty file creation has no ---/+++ headers, so the path comes + /// from the `diff --git` line and retains the `b/` prefix. + /// Callers use `strip_prefix(1)` to remove it. + #[test] + fn new_empty_file() { + let input = "\ +diff --git a/empty b/empty +new file mode 100644 +index 0000000..e69de29 +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Create("b/empty".into()) + ); + let p = patches[0].patch().as_text().unwrap(); + assert!(p.hunks().is_empty()); + } + + #[test] + fn rename_then_modify() { + // Rename with no hunks followed by a modify with hunks. + // Tests that offset advances correctly across both. + let input = "\ +diff --git a/old.rs b/new.rs +similarity index 100% +rename from old.rs +rename to new.rs +diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old ++new +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 2); + assert!(matches!( + patches[0].operation(), + FileOperation::Rename { .. } + )); + assert!(matches!( + patches[1].operation(), + FileOperation::Modify { .. } + )); + } + + /// Quoted path containing an escaped quote (`\"`). + /// Git produces this for filenames with literal double quotes. + /// + /// Observed with git 2.53.0: + /// $ printf 'x' > 'with"quote' && git add -A + /// $ git diff --cached | head -1 + /// diff --git "a/with\"quote" "b/with\"quote" + #[test] + fn path_quoted_with_escaped_quote() { + let input = "\ +diff --git \"a/with\\\"quote\" \"b/with\\\"quote\" +--- \"a/with\\\"quote\" ++++ \"b/with\\\"quote\" +@@ -1 +1 @@ +-old ++new +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Modify { + original: "a/with\"quote".to_owned().into(), + modified: "b/with\"quote".to_owned().into(), + } + ); + } + + /// Copy operation extracted from git extended headers. + #[test] + fn copy_operation() { + let input = "\ +diff --git a/original.rs b/copied.rs +similarity index 100% +copy from original.rs +copy to copied.rs +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Copy { + from: "original.rs".into(), + to: "copied.rs".into(), + } + ); + } + + /// Rename with both paths quoted (escapes in both). + #[test] + fn rename_both_quoted() { + let input = "\ +diff --git \"a/foo\\tbar.rs\" \"b/baz\\tqux.rs\" +similarity index 100% +rename from \"foo\\tbar.rs\" +rename to \"baz\\tqux.rs\" +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Rename { + from: "foo\tbar.rs".into(), + to: "baz\tqux.rs".into(), + } + ); + } + + /// Rename from quoted (has escape) to unquoted (plain). + #[test] + fn rename_quoted_to_unquoted() { + let input = "\ +diff --git \"a/foo\\tbar.rs\" b/normal.rs +similarity index 100% +rename from \"foo\\tbar.rs\" +rename to normal.rs +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Rename { + from: "foo\tbar.rs".into(), + to: "normal.rs".into(), + } + ); + } + + /// Rename from unquoted to quoted (has escape). + #[test] + fn rename_unquoted_to_quoted() { + let input = "\ +diff --git a/normal.rs \"b/foo\\tbar.rs\" +similarity index 100% +rename from normal.rs +rename to \"foo\\tbar.rs\" +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Rename { + from: "normal.rs".into(), + to: "foo\tbar.rs".into(), + } + ); + } + + /// Deleted file: `deleted file mode` header + /dev/null in +++. + #[test] + fn deleted_file_with_mode() { + let input = "\ +diff --git a/gone.rs b/gone.rs +deleted file mode 100644 +index abc1234..0000000 +--- a/gone.rs ++++ /dev/null +@@ -1 +0,0 @@ +-content +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_delete()); + assert_eq!( + patches[0].old_mode(), + Some(&super::super::FileMode::Regular) + ); + } + + /// Mode-only change: no hunks, no ---/+++ headers. + /// File operation falls back to `diff --git` line paths. + #[test] + fn mode_only_change() { + let input = "\ +diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755 +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_modify()); + assert_eq!( + patches[0].old_mode(), + Some(&super::super::FileMode::Regular), + ); + assert_eq!( + patches[0].new_mode(), + Some(&super::super::FileMode::Executable), + ); + let p = patches[0].patch().as_text().unwrap(); + assert!(p.hunks().is_empty()); + } + + /// New file with content: `new file mode` header + /dev/null in ---. + #[test] + fn new_file_with_content() { + let input = "\ +diff --git a/new.rs b/new.rs +new file mode 100644 +index 0000000..abc1234 +--- /dev/null ++++ b/new.rs +@@ -0,0 +1 @@ ++hello +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_create()); + assert_eq!( + patches[0].new_mode(), + Some(&super::super::FileMode::Regular), + ); + } + + /// `diff --git` line with no-prefix paths (`git diff --no-prefix`). + /// Fallback path parsing works when ---/+++ are absent. + #[test] + fn no_prefix_empty_file() { + let input = "\ +diff --git file.rs file.rs +new file mode 100644 +index 0000000..e69de29 +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_create()); + } + + #[test] + fn binary_emits_marker() { + let input = "\ +diff --git a/img.png b/img.png +Binary files a/img.png and b/img.png differ +diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old ++new +"; + let patches = parse_gitdiff(input); + assert_eq!(patches.len(), 2); + assert!(patches[0].patch().is_binary()); + assert!(patches[0].operation().is_modify()); + assert!(!patches[1].patch().is_binary()); + } +} + mod patchset_unidiff_bytes { use super::*; use crate::patch::Line; @@ -502,7 +809,7 @@ mod patchset_unidiff_bytes { .unwrap(); assert_eq!(patches.len(), 1); - let PatchKind::Text(patch) = patches[0].patch(); + let patch = patches[0].patch().as_text().unwrap(); let lines = patch.hunks()[0].lines(); assert_eq!(lines[0], Line::Delete(b"old\x89PNG\n".as_slice())); assert_eq!(lines[1], Line::Insert(b"new\x89PNG\n".as_slice())); diff --git a/tests/compat/common.rs b/tests/compat/common.rs index 60cd2e2..2ee41d7 100644 --- a/tests/compat/common.rs +++ b/tests/compat/common.rs @@ -288,6 +288,9 @@ pub fn apply_diffy( } fs::write(&result_path, &result).unwrap(); } + PatchKind::Binary => { + // No patch data to apply — nothing to do. + } } } diff --git a/tests/replay.rs b/tests/replay.rs index 38cddd7..8a62696 100644 --- a/tests/replay.rs +++ b/tests/replay.rs @@ -523,6 +523,10 @@ fn process_commit( ); } } + PatchKind::Binary => { + skipped += 1; + continue; + } } applied += 1; From da4cf136f95f3ffc01d6c40c3d711393498945af Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Thu, 16 Apr 2026 11:42:02 -0400 Subject: [PATCH 02/12] test(compat): add git apply compatibility tests Compat test for also `git apply`. --- tests/compat/common.rs | 112 ++++++++- .../fail_ambiguous_suffix_tie/in/foo.patch | 3 + .../git/fail_ambiguous_suffix_tie/out/x c/x | 0 .../compat/git/fail_both_devnull/in/foo.patch | 6 + .../git/fail_prefix_no_slash/in/foo.patch | 13 ++ .../git/fail_prefix_no_slash/in/old.txt | 10 + .../format_patch_diff_in_message/in/file.txt | 1 + .../format_patch_diff_in_message/in/foo.patch | 20 ++ .../format_patch_diff_in_message/out/file.txt | 1 + tests/compat/git/format_patch_mbox/in/a.txt | 1 + tests/compat/git/format_patch_mbox/in/b.txt | 1 + tests/compat/git/format_patch_mbox/in/c.txt | 1 + .../compat/git/format_patch_mbox/in/foo.patch | 56 +++++ tests/compat/git/format_patch_mbox/out/a.txt | 1 + tests/compat/git/format_patch_mbox/out/b.txt | 1 + tests/compat/git/format_patch_mbox/out/c.txt | 1 + .../in/file.txt | 1 + .../in/foo.patch | 21 ++ .../out/file.txt | 3 + .../git/format_patch_preamble/in/file.txt | 1 + .../git/format_patch_preamble/in/foo.patch | 20 ++ .../git/format_patch_preamble/out/file.txt | 1 + .../git/format_patch_signature/in/file.txt | 1 + .../git/format_patch_signature/in/foo.patch | 19 ++ .../git/format_patch_signature/out/file.txt | 1 + .../compat/git/junk_between_files/in/bar.txt | 1 + .../git/junk_between_files/in/foo.patch | 17 ++ .../compat/git/junk_between_files/in/foo.txt | 1 + .../compat/git/junk_between_files/out/bar.txt | 1 + .../compat/git/junk_between_files/out/foo.txt | 1 + .../compat/git/junk_between_hunks/in/file.txt | 9 + .../git/junk_between_hunks/in/foo.patch | 15 ++ .../git/junk_between_hunks/out/file.txt | 9 + tests/compat/git/mod.rs | 216 ++++++++++++++++++ .../git/nested_diff_signature/in/example.rs | 1 + .../git/nested_diff_signature/in/foo.patch | 25 ++ .../nested_diff_signature/in/mir-test.diff | 12 + .../git/nested_diff_signature/out/example.rs | 2 + .../nested_diff_signature/out/mir-test.diff | 0 .../git/non_utf8_hunk_content/in/file.bin | 1 + .../git/non_utf8_hunk_content/in/foo.patch | 6 + .../git/non_utf8_hunk_content/out/file.bin | 1 + .../git/path_ambiguous_suffix/in/foo.patch | 3 + .../out/foo.rs src/foo.rs | 0 .../path_containing_space_b/in/foo b/baz.txt | 1 + .../git/path_containing_space_b/in/foo.patch | 7 + .../path_containing_space_b/out/foo b/baz.txt | 1 + .../git/path_custom_prefix/in/foo.patch | 13 ++ .../compat/git/path_custom_prefix/in/old.txt | 10 + .../compat/git/path_custom_prefix/out/old.txt | 10 + .../git/path_mixed_quoted/in/foo\tbar.txt" | 10 + .../compat/git/path_mixed_quoted/in/foo.patch | 16 ++ .../git/path_mixed_quoted/out/normal.txt | 10 + tests/compat/git/path_no_prefix/in/file.txt | 1 + tests/compat/git/path_no_prefix/in/foo.patch | 7 + tests/compat/git/path_no_prefix/out/file.txt | 1 + .../git/path_quoted_escapes/in/foo\tbar.txt" | 1 + .../git/path_quoted_escapes/in/foo.patch | 7 + .../git/path_quoted_escapes/out/foo\tbar.txt" | 1 + .../git/path_quoted_inner_quote/in/foo.patch | 7 + .../in/with\"quote.txt" | 1 + .../out/with\"quote.txt" | 1 + .../git/path_quoted_named_escape/in/foo.patch | 6 + .../git/path_quoted_named_escape/out/bel\a" | 1 + .../git/path_quoted_octal_escape/in/foo.patch | 6 + .../git/path_quoted_octal_escape/out/tl\033" | 1 + .../git/path_with_spaces/in/foo bar.txt | 1 + .../compat/git/path_with_spaces/in/foo.patch | 7 + .../git/path_with_spaces/out/foo bar.txt | 1 + .../gnu_patch/format_patch_mbox/in/a.txt | 1 + .../gnu_patch/format_patch_mbox/in/b.txt | 1 + .../gnu_patch/format_patch_mbox/in/c.txt | 1 + .../gnu_patch/format_patch_mbox/in/foo.patch | 56 +++++ .../gnu_patch/format_patch_mbox/out/a.txt | 1 + .../gnu_patch/format_patch_mbox/out/b.txt | 1 + .../gnu_patch/format_patch_mbox/out/c.txt | 1 + tests/compat/gnu_patch/mod.rs | 7 + tests/compat/main.rs | 4 +- 78 files changed, 805 insertions(+), 13 deletions(-) create mode 100644 tests/compat/git/fail_ambiguous_suffix_tie/in/foo.patch create mode 100644 tests/compat/git/fail_ambiguous_suffix_tie/out/x c/x create mode 100644 tests/compat/git/fail_both_devnull/in/foo.patch create mode 100644 tests/compat/git/fail_prefix_no_slash/in/foo.patch create mode 100644 tests/compat/git/fail_prefix_no_slash/in/old.txt create mode 100644 tests/compat/git/format_patch_diff_in_message/in/file.txt create mode 100644 tests/compat/git/format_patch_diff_in_message/in/foo.patch create mode 100644 tests/compat/git/format_patch_diff_in_message/out/file.txt create mode 100644 tests/compat/git/format_patch_mbox/in/a.txt create mode 100644 tests/compat/git/format_patch_mbox/in/b.txt create mode 100644 tests/compat/git/format_patch_mbox/in/c.txt create mode 100644 tests/compat/git/format_patch_mbox/in/foo.patch create mode 100644 tests/compat/git/format_patch_mbox/out/a.txt create mode 100644 tests/compat/git/format_patch_mbox/out/b.txt create mode 100644 tests/compat/git/format_patch_mbox/out/c.txt create mode 100644 tests/compat/git/format_patch_multiple_separators/in/file.txt create mode 100644 tests/compat/git/format_patch_multiple_separators/in/foo.patch create mode 100644 tests/compat/git/format_patch_multiple_separators/out/file.txt create mode 100644 tests/compat/git/format_patch_preamble/in/file.txt create mode 100644 tests/compat/git/format_patch_preamble/in/foo.patch create mode 100644 tests/compat/git/format_patch_preamble/out/file.txt create mode 100644 tests/compat/git/format_patch_signature/in/file.txt create mode 100644 tests/compat/git/format_patch_signature/in/foo.patch create mode 100644 tests/compat/git/format_patch_signature/out/file.txt create mode 100644 tests/compat/git/junk_between_files/in/bar.txt create mode 100644 tests/compat/git/junk_between_files/in/foo.patch create mode 100644 tests/compat/git/junk_between_files/in/foo.txt create mode 100644 tests/compat/git/junk_between_files/out/bar.txt create mode 100644 tests/compat/git/junk_between_files/out/foo.txt create mode 100644 tests/compat/git/junk_between_hunks/in/file.txt create mode 100644 tests/compat/git/junk_between_hunks/in/foo.patch create mode 100644 tests/compat/git/junk_between_hunks/out/file.txt create mode 100644 tests/compat/git/mod.rs create mode 100644 tests/compat/git/nested_diff_signature/in/example.rs create mode 100644 tests/compat/git/nested_diff_signature/in/foo.patch create mode 100644 tests/compat/git/nested_diff_signature/in/mir-test.diff create mode 100644 tests/compat/git/nested_diff_signature/out/example.rs create mode 100644 tests/compat/git/nested_diff_signature/out/mir-test.diff create mode 100644 tests/compat/git/non_utf8_hunk_content/in/file.bin create mode 100644 tests/compat/git/non_utf8_hunk_content/in/foo.patch create mode 100644 tests/compat/git/non_utf8_hunk_content/out/file.bin create mode 100644 tests/compat/git/path_ambiguous_suffix/in/foo.patch create mode 100644 tests/compat/git/path_ambiguous_suffix/out/foo.rs src/foo.rs create mode 100644 tests/compat/git/path_containing_space_b/in/foo b/baz.txt create mode 100644 tests/compat/git/path_containing_space_b/in/foo.patch create mode 100644 tests/compat/git/path_containing_space_b/out/foo b/baz.txt create mode 100644 tests/compat/git/path_custom_prefix/in/foo.patch create mode 100644 tests/compat/git/path_custom_prefix/in/old.txt create mode 100644 tests/compat/git/path_custom_prefix/out/old.txt create mode 100644 "tests/compat/git/path_mixed_quoted/in/foo\tbar.txt" create mode 100644 tests/compat/git/path_mixed_quoted/in/foo.patch create mode 100644 tests/compat/git/path_mixed_quoted/out/normal.txt create mode 100644 tests/compat/git/path_no_prefix/in/file.txt create mode 100644 tests/compat/git/path_no_prefix/in/foo.patch create mode 100644 tests/compat/git/path_no_prefix/out/file.txt create mode 100644 "tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" create mode 100644 tests/compat/git/path_quoted_escapes/in/foo.patch create mode 100644 "tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" create mode 100644 tests/compat/git/path_quoted_inner_quote/in/foo.patch create mode 100644 "tests/compat/git/path_quoted_inner_quote/in/with\"quote.txt" create mode 100644 "tests/compat/git/path_quoted_inner_quote/out/with\"quote.txt" create mode 100644 tests/compat/git/path_quoted_named_escape/in/foo.patch create mode 100644 "tests/compat/git/path_quoted_named_escape/out/bel\a" create mode 100644 tests/compat/git/path_quoted_octal_escape/in/foo.patch create mode 100644 "tests/compat/git/path_quoted_octal_escape/out/tl\033" create mode 100644 tests/compat/git/path_with_spaces/in/foo bar.txt create mode 100644 tests/compat/git/path_with_spaces/in/foo.patch create mode 100644 tests/compat/git/path_with_spaces/out/foo bar.txt create mode 100644 tests/compat/gnu_patch/format_patch_mbox/in/a.txt create mode 100644 tests/compat/gnu_patch/format_patch_mbox/in/b.txt create mode 100644 tests/compat/gnu_patch/format_patch_mbox/in/c.txt create mode 100644 tests/compat/gnu_patch/format_patch_mbox/in/foo.patch create mode 100644 tests/compat/gnu_patch/format_patch_mbox/out/a.txt create mode 100644 tests/compat/gnu_patch/format_patch_mbox/out/b.txt create mode 100644 tests/compat/gnu_patch/format_patch_mbox/out/c.txt diff --git a/tests/compat/common.rs b/tests/compat/common.rs index 2ee41d7..0993e2a 100644 --- a/tests/compat/common.rs +++ b/tests/compat/common.rs @@ -2,16 +2,27 @@ use std::{ fs, + io::Write, path::{Path, PathBuf}, - process::Command, + process::{Command, Stdio}, sync::Once, }; use diffy::patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError}; +/// Which external tool to compare against. +#[derive(Clone, Copy)] +pub enum CompatMode { + /// `git apply` with `ParseOptions::gitdiff()` + Git, + /// GNU `patch` with `ParseOptions::unidiff()` + GnuPatch, +} + /// A test case with fluent builder API. pub struct Case<'a> { case_name: &'a str, + mode: CompatMode, /// Strip level for path prefixes (default: 0) strip_level: u32, /// Whether diffy is expected to succeed (default: true) @@ -21,20 +32,37 @@ pub struct Case<'a> { } impl<'a> Case<'a> { + /// Create a test case for `git apply` comparison. + pub fn git(name: &'a str) -> Self { + Self { + case_name: name, + mode: CompatMode::Git, + strip_level: 0, + expect_success: true, + expect_compat: true, + } + } + /// Create a test case for GNU patch comparison. pub fn gnu_patch(name: &'a str) -> Self { Self { case_name: name, + mode: CompatMode::GnuPatch, strip_level: 0, expect_success: true, expect_compat: true, } } - /// Get the case directory path. + /// Get the case directory path based on mode. fn case_dir(&self) -> PathBuf { + let subdir = match self.mode { + CompatMode::Git => "git", + CompatMode::GnuPatch => "gnu_patch", + }; PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("tests/compat/gnu_patch") + .join("tests/compat") + .join(subdir) .join(self.case_name) } @@ -62,12 +90,19 @@ impl<'a> Case<'a> { .unwrap_or_else(|e| panic!("failed to read {}: {e}", patch_path.display())); let case_name = self.case_name; + let prefix = match self.mode { + CompatMode::Git => "git", + CompatMode::GnuPatch => "gnu", + }; let temp_base = temp_base(); - let diffy_output = temp_base.join(format!("gnu-{case_name}-diffy")); + let diffy_output = temp_base.join(format!("{prefix}-{case_name}-diffy")); create_output_dir(&diffy_output); - let opts = ParseOptions::unidiff(); + let opts = match self.mode { + CompatMode::Git => ParseOptions::gitdiff(), + CompatMode::GnuPatch => ParseOptions::unidiff(), + }; // Apply with diffy let diffy_result = apply_diffy(&in_dir, &patch, &diffy_output, opts, self.strip_level); @@ -81,12 +116,19 @@ impl<'a> Case<'a> { // In CI mode, also verify external tool behavior if is_ci() { - let external_output = temp_base.join(format!("gnu-{case_name}-external")); + let external_output = temp_base.join(format!("{prefix}-{case_name}-external")); create_output_dir(&external_output); - print_patch_version(); - let external_result = - gnu_patch_apply(&in_dir, &patch_path, &external_output, self.strip_level); + let external_result = match self.mode { + CompatMode::Git => { + print_git_version(); + git_apply(&external_output, &patch, self.strip_level, &in_dir) + } + CompatMode::GnuPatch => { + print_patch_version(); + gnu_patch_apply(&in_dir, &patch_path, &external_output, self.strip_level) + } + }; // For success cases where both succeed and are expected to be compatible, // verify outputs match @@ -149,6 +191,52 @@ fn gnu_patch_apply( } } +fn git_apply( + output_dir: &Path, + patch: &[u8], + strip_level: u32, + in_dir: &Path, +) -> Result<(), String> { + copy_input_files(in_dir, output_dir, &["patch"]); + + let mut cmd = Command::new("git"); + cmd.env("GIT_CONFIG_NOSYSTEM", "1"); + cmd.env("GIT_CONFIG_GLOBAL", "/dev/null"); + cmd.current_dir(output_dir); + cmd.args(["apply", &format!("-p{strip_level}"), "-"]); + cmd.stdin(Stdio::piped()); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + let mut child = cmd.spawn().expect("failed to spawn git apply"); + child.stdin.as_mut().unwrap().write_all(patch).unwrap(); + + let output = child.wait_with_output().unwrap(); + if output.status.success() { + Ok(()) + } else { + Err(String::from_utf8_lossy(&output.stderr).to_string()) + } +} + +fn print_git_version() { + static ONCE: Once = Once::new(); + ONCE.call_once(|| { + let output = Command::new("git").arg("--version").output(); + match output { + Ok(o) if o.status.success() => { + let version = String::from_utf8_lossy(&o.stdout); + eprintln!( + "git version: {}", + version.lines().next().unwrap_or("unknown") + ); + } + Ok(o) => eprintln!("git --version failed: {}", o.status), + Err(e) => eprintln!("git command not found: {e}"), + } + }); +} + fn print_patch_version() { static ONCE: Once = Once::new(); ONCE.call_once(|| { @@ -172,6 +260,7 @@ fn print_patch_version() { pub enum TestError { Parse(PatchSetParseError), Apply(diffy::ApplyError), + Io(std::io::Error), } impl std::fmt::Display for TestError { @@ -179,6 +268,7 @@ impl std::fmt::Display for TestError { match self { TestError::Parse(e) => write!(f, "parse error: {e}"), TestError::Apply(e) => write!(f, "apply error: {e}"), + TestError::Io(e) => write!(f, "io error: {e}"), } } } @@ -273,9 +363,7 @@ pub fn apply_diffy( PatchKind::Text(patch) => { let original = if let Some(name) = original_name { let original_path = in_dir.join(bytes_to_path(name)); - fs::read(&original_path).unwrap_or_else(|e| { - panic!("failed to read {}: {e}", original_path.display()) - }) + fs::read(&original_path).map_err(TestError::Io)? } else { Vec::new() }; diff --git a/tests/compat/git/fail_ambiguous_suffix_tie/in/foo.patch b/tests/compat/git/fail_ambiguous_suffix_tie/in/foo.patch new file mode 100644 index 0000000..fe6658e --- /dev/null +++ b/tests/compat/git/fail_ambiguous_suffix_tie/in/foo.patch @@ -0,0 +1,3 @@ +diff --git a/x b/x c/x +new file mode 100644 +index 0000000..e69de29 diff --git a/tests/compat/git/fail_ambiguous_suffix_tie/out/x c/x b/tests/compat/git/fail_ambiguous_suffix_tie/out/x c/x new file mode 100644 index 0000000..e69de29 diff --git a/tests/compat/git/fail_both_devnull/in/foo.patch b/tests/compat/git/fail_both_devnull/in/foo.patch new file mode 100644 index 0000000..26b2827 --- /dev/null +++ b/tests/compat/git/fail_both_devnull/in/foo.patch @@ -0,0 +1,6 @@ +diff --git a/foo b/foo +--- /dev/null ++++ /dev/null +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/fail_prefix_no_slash/in/foo.patch b/tests/compat/git/fail_prefix_no_slash/in/foo.patch new file mode 100644 index 0000000..c6fb4d2 --- /dev/null +++ b/tests/compat/git/fail_prefix_no_slash/in/foo.patch @@ -0,0 +1,13 @@ +diff --git fooold.txt barold.txt +index 4083766..40c94fa 100644 +--- fooold.txt ++++ barold.txt +@@ -2,7 +2,7 @@ line1 + line2 + line3 + line4 +-line5 ++changed + line6 + line7 + line8 diff --git a/tests/compat/git/fail_prefix_no_slash/in/old.txt b/tests/compat/git/fail_prefix_no_slash/in/old.txt new file mode 100644 index 0000000..4083766 --- /dev/null +++ b/tests/compat/git/fail_prefix_no_slash/in/old.txt @@ -0,0 +1,10 @@ +line1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 +line10 diff --git a/tests/compat/git/format_patch_diff_in_message/in/file.txt b/tests/compat/git/format_patch_diff_in_message/in/file.txt new file mode 100644 index 0000000..3367afd --- /dev/null +++ b/tests/compat/git/format_patch_diff_in_message/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_diff_in_message/in/foo.patch b/tests/compat/git/format_patch_diff_in_message/in/foo.patch new file mode 100644 index 0000000..ce33371 --- /dev/null +++ b/tests/compat/git/format_patch_diff_in_message/in/foo.patch @@ -0,0 +1,20 @@ +From 8a14b135fe7ba10bab09a77c4a687faaf1d92a26 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] Fix diff --git parsing bug + +The line `diff --git a/x b/x` in messages was incorrectly parsed. +--- + file.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..3e75765 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_diff_in_message/out/file.txt b/tests/compat/git/format_patch_diff_in_message/out/file.txt new file mode 100644 index 0000000..3e75765 --- /dev/null +++ b/tests/compat/git/format_patch_diff_in_message/out/file.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/format_patch_mbox/in/a.txt b/tests/compat/git/format_patch_mbox/in/a.txt new file mode 100644 index 0000000..ffc7496 --- /dev/null +++ b/tests/compat/git/format_patch_mbox/in/a.txt @@ -0,0 +1 @@ +old a diff --git a/tests/compat/git/format_patch_mbox/in/b.txt b/tests/compat/git/format_patch_mbox/in/b.txt new file mode 100644 index 0000000..568e619 --- /dev/null +++ b/tests/compat/git/format_patch_mbox/in/b.txt @@ -0,0 +1 @@ +old b diff --git a/tests/compat/git/format_patch_mbox/in/c.txt b/tests/compat/git/format_patch_mbox/in/c.txt new file mode 100644 index 0000000..89cb37d --- /dev/null +++ b/tests/compat/git/format_patch_mbox/in/c.txt @@ -0,0 +1 @@ +old c diff --git a/tests/compat/git/format_patch_mbox/in/foo.patch b/tests/compat/git/format_patch_mbox/in/foo.patch new file mode 100644 index 0000000..720648e --- /dev/null +++ b/tests/compat/git/format_patch_mbox/in/foo.patch @@ -0,0 +1,56 @@ +From aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa Mon Sep 17 00:00:00 2001 +From: Alice +Date: Mon, 1 Jan 2024 00:00:00 +0000 +Subject: [PATCH 1/3] first patch + +First commit message. +--- + a.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/a.txt b/a.txt +--- a/a.txt ++++ b/a.txt +@@ -1 +1 @@ +-old a ++new a +-- +2.40.0 + +From bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb Mon Sep 17 00:00:00 2001 +From: Bob +Date: Mon, 1 Jan 2024 00:00:01 +0000 +Subject: [PATCH 2/3] second patch + +Second commit message. +--- + b.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/b.txt b/b.txt +--- a/b.txt ++++ b/b.txt +@@ -1 +1 @@ +-old b ++new b +-- +2.40.0 + +From cccccccccccccccccccccccccccccccccccccccc Mon Sep 17 00:00:00 2001 +From: Carol +Date: Mon, 1 Jan 2024 00:00:02 +0000 +Subject: [PATCH 3/3] third patch + +Third commit message. +--- + c.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/c.txt b/c.txt +--- a/c.txt ++++ b/c.txt +@@ -1 +1 @@ +-old c ++new c +-- +2.40.0 diff --git a/tests/compat/git/format_patch_mbox/out/a.txt b/tests/compat/git/format_patch_mbox/out/a.txt new file mode 100644 index 0000000..56db76c --- /dev/null +++ b/tests/compat/git/format_patch_mbox/out/a.txt @@ -0,0 +1 @@ +new a diff --git a/tests/compat/git/format_patch_mbox/out/b.txt b/tests/compat/git/format_patch_mbox/out/b.txt new file mode 100644 index 0000000..81e3be4 --- /dev/null +++ b/tests/compat/git/format_patch_mbox/out/b.txt @@ -0,0 +1 @@ +new b diff --git a/tests/compat/git/format_patch_mbox/out/c.txt b/tests/compat/git/format_patch_mbox/out/c.txt new file mode 100644 index 0000000..8ed6781 --- /dev/null +++ b/tests/compat/git/format_patch_mbox/out/c.txt @@ -0,0 +1 @@ +new c diff --git a/tests/compat/git/format_patch_multiple_separators/in/file.txt b/tests/compat/git/format_patch_multiple_separators/in/file.txt new file mode 100644 index 0000000..3367afd --- /dev/null +++ b/tests/compat/git/format_patch_multiple_separators/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_multiple_separators/in/foo.patch b/tests/compat/git/format_patch_multiple_separators/in/foo.patch new file mode 100644 index 0000000..a0da5b9 --- /dev/null +++ b/tests/compat/git/format_patch_multiple_separators/in/foo.patch @@ -0,0 +1,21 @@ +From 6bfbbfa49a16bb8173145a933fe5ad918ad48a31 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] Add content with --- markers + +--- + file.txt | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..c4d4ea8 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1,3 @@ +-old ++--- ++new ++--- +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_multiple_separators/out/file.txt b/tests/compat/git/format_patch_multiple_separators/out/file.txt new file mode 100644 index 0000000..c4d4ea8 --- /dev/null +++ b/tests/compat/git/format_patch_multiple_separators/out/file.txt @@ -0,0 +1,3 @@ +--- +new +--- diff --git a/tests/compat/git/format_patch_preamble/in/file.txt b/tests/compat/git/format_patch_preamble/in/file.txt new file mode 100644 index 0000000..3367afd --- /dev/null +++ b/tests/compat/git/format_patch_preamble/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_preamble/in/foo.patch b/tests/compat/git/format_patch_preamble/in/foo.patch new file mode 100644 index 0000000..9d90425 --- /dev/null +++ b/tests/compat/git/format_patch_preamble/in/foo.patch @@ -0,0 +1,20 @@ +From ddbc9053359329dd016ed89f0d6e460b3b8ff5e3 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] Add new content + +This is the commit body. +--- + file.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..3e75765 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_preamble/out/file.txt b/tests/compat/git/format_patch_preamble/out/file.txt new file mode 100644 index 0000000..3e75765 --- /dev/null +++ b/tests/compat/git/format_patch_preamble/out/file.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/format_patch_signature/in/file.txt b/tests/compat/git/format_patch_signature/in/file.txt new file mode 100644 index 0000000..3367afd --- /dev/null +++ b/tests/compat/git/format_patch_signature/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/format_patch_signature/in/foo.patch b/tests/compat/git/format_patch_signature/in/foo.patch new file mode 100644 index 0000000..7614287 --- /dev/null +++ b/tests/compat/git/format_patch_signature/in/foo.patch @@ -0,0 +1,19 @@ +From b3bb3125eff3d2648f15af2a6e0cdcdf6ad8fce1 Mon Sep 17 00:00:00 2001 +From: Test +Date: Wed, 1 Jan 2020 00:00:00 +0000 +Subject: [PATCH] modify + +--- + file.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/file.txt b/file.txt +index 3367afd..3e75765 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new +-- +2.52.0 + diff --git a/tests/compat/git/format_patch_signature/out/file.txt b/tests/compat/git/format_patch_signature/out/file.txt new file mode 100644 index 0000000..3e75765 --- /dev/null +++ b/tests/compat/git/format_patch_signature/out/file.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/junk_between_files/in/bar.txt b/tests/compat/git/junk_between_files/in/bar.txt new file mode 100644 index 0000000..601d8ee --- /dev/null +++ b/tests/compat/git/junk_between_files/in/bar.txt @@ -0,0 +1 @@ +bar line1 diff --git a/tests/compat/git/junk_between_files/in/foo.patch b/tests/compat/git/junk_between_files/in/foo.patch new file mode 100644 index 0000000..dbc5939 --- /dev/null +++ b/tests/compat/git/junk_between_files/in/foo.patch @@ -0,0 +1,17 @@ +diff --git a/foo.txt b/foo.txt +index 1234567..89abcdef 100644 +--- a/foo.txt ++++ b/foo.txt +@@ -1 +1 @@ +-foo line1 ++FOO LINE1 +JUNK BETWEEN FILES!!!! +This preamble text should be ignored +by both git apply and diffy +diff --git a/bar.txt b/bar.txt +index 1234567..89abcdef 100644 +--- a/bar.txt ++++ b/bar.txt +@@ -1 +1 @@ +-bar line1 ++BAR LINE1 diff --git a/tests/compat/git/junk_between_files/in/foo.txt b/tests/compat/git/junk_between_files/in/foo.txt new file mode 100644 index 0000000..b11358e --- /dev/null +++ b/tests/compat/git/junk_between_files/in/foo.txt @@ -0,0 +1 @@ +foo line1 diff --git a/tests/compat/git/junk_between_files/out/bar.txt b/tests/compat/git/junk_between_files/out/bar.txt new file mode 100644 index 0000000..76c036d --- /dev/null +++ b/tests/compat/git/junk_between_files/out/bar.txt @@ -0,0 +1 @@ +BAR LINE1 diff --git a/tests/compat/git/junk_between_files/out/foo.txt b/tests/compat/git/junk_between_files/out/foo.txt new file mode 100644 index 0000000..787bc66 --- /dev/null +++ b/tests/compat/git/junk_between_files/out/foo.txt @@ -0,0 +1 @@ +FOO LINE1 diff --git a/tests/compat/git/junk_between_hunks/in/file.txt b/tests/compat/git/junk_between_hunks/in/file.txt new file mode 100644 index 0000000..822aed3 --- /dev/null +++ b/tests/compat/git/junk_between_hunks/in/file.txt @@ -0,0 +1,9 @@ +line1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 diff --git a/tests/compat/git/junk_between_hunks/in/foo.patch b/tests/compat/git/junk_between_hunks/in/foo.patch new file mode 100644 index 0000000..242e959 --- /dev/null +++ b/tests/compat/git/junk_between_hunks/in/foo.patch @@ -0,0 +1,15 @@ +diff --git a/file.txt b/file.txt +index 1234567..89abcdef 100644 +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,3 @@ +-line1 ++LINE1 + line2 + line3 +JUNK BETWEEN HUNKS +@@ -7,3 +7,3 @@ + line7 +-line8 ++LINE8 + line9 diff --git a/tests/compat/git/junk_between_hunks/out/file.txt b/tests/compat/git/junk_between_hunks/out/file.txt new file mode 100644 index 0000000..2e5e454 --- /dev/null +++ b/tests/compat/git/junk_between_hunks/out/file.txt @@ -0,0 +1,9 @@ +LINE1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 diff --git a/tests/compat/git/mod.rs b/tests/compat/git/mod.rs new file mode 100644 index 0000000..041edaf --- /dev/null +++ b/tests/compat/git/mod.rs @@ -0,0 +1,216 @@ +//! Git compatibility tests. See [`crate`] for test structure and usage. +//! +//! Focus areas: +//! +//! - `diff --git` path parsing edge cases (quotes, spaces, ambiguous prefixes) +//! - `git format-patch` email format (preamble/signature stripping) +//! - Agreement between diffy and `git apply` + +use crate::common::Case; + +#[test] +fn path_no_prefix() { + Case::git("path_no_prefix").run(); +} + +#[test] +fn path_quoted_escapes() { + Case::git("path_quoted_escapes").strip(1).run(); +} + +// Git uses C-style named escapes (\a, \b, \f, \v) for certain control +// characters in quoted filenames. Both `git apply` and GNU patch decode +// these correctly. +// +// Observed with git 2.53.0: +// $ printf 'x' > "$(printf 'bel\a')" && git add -A +// $ git diff --cached | grep '+++' +// +++ "b/bel\a" +// +// diffy now decodes these correctly. +#[test] +fn path_quoted_named_escape() { + Case::git("path_quoted_named_escape").strip(1).run(); +} + +// Git uses 3-digit octal escapes (\000-\377) for bytes that don't have +// a named escape. Both `git apply` and GNU patch decode these correctly. +// +// Observed with git 2.53.0: +// $ printf 'x' > "$(printf 'tl\033')" && git add -A +// $ git diff --cached | grep '+++' +// +++ "b/tl\033" +// +// Found via full-history replay test against llvm/llvm-project +// (commits 17af06ba..229c95ab, 6c031780..0683a1e5). +#[test] +fn path_quoted_octal_escape() { + Case::git("path_quoted_octal_escape").strip(1).run(); +} + +// Git uses `\"` inside quoted filenames for literal double-quote characters. +// +// Observed with git 2.53.0: +// $ printf 'x' > 'with"quote.txt' && git add -A +// $ git diff --cached | grep '+++' +// +++ "b/with\"quote.txt" +#[test] +fn path_quoted_inner_quote() { + Case::git("path_quoted_inner_quote").strip(1).run(); +} + +#[test] +fn path_with_spaces() { + Case::git("path_with_spaces").strip(1).run(); +} + +#[test] +fn path_containing_space_b() { + Case::git("path_containing_space_b").strip(1).run(); +} + +#[test] +fn format_patch_preamble() { + // Ambiguous: where does preamble end? First `\n---\n` - verify matches git + Case::git("format_patch_preamble").strip(1).run(); +} + +#[test] +fn format_patch_diff_in_message() { + // `diff --git` in commit message must NOT trigger early parsing + Case::git("format_patch_diff_in_message").strip(1).run(); +} + +#[test] +fn format_patch_multiple_separators() { + // Git uses first `\n---\n` as separator (observed git mailinfo behavior) + Case::git("format_patch_multiple_separators").strip(1).run(); +} + +// Mbox stream: three concatenated `git format-patch` emails in one file. +// Each email has full headers, commit message, `---` separator, and signature. +// `git apply` splits on `diff --git` boundaries, ignoring inter-email content. +#[test] +fn format_patch_mbox() { + Case::git("format_patch_mbox").strip(1).run(); +} + +#[test] +fn format_patch_signature() { + // Ambiguous: `\n-- \n` could appear in patch content - verify matches git + Case::git("format_patch_signature").strip(1).run(); +} + +#[test] +fn nested_diff_signature() { + // Patch that deletes a diff file containing `-- ` patterns within its content, + // followed by a real email signature at the end. + // + // Tests that we correctly distinguish between: + // - `-- ` appearing as patch content (from inner diff's empty context lines) + // - `-- ` appearing as the actual email signature separator + // + // Both git apply and GNU patch handle this correctly. + Case::git("nested_diff_signature").strip(1).run(); +} + +#[test] +fn path_ambiguous_suffix() { + // Multiple valid splits in `diff --git` line; algorithm picks longest common suffix. + // Tests the pathological case from parse.rs comments where custom prefix + // creates `src/foo.rs src/foo.rs src/foo.rs src/foo.rs` - verify matches git. + Case::git("path_ambiguous_suffix").strip(1).run(); +} + +// Ambiguous `diff --git` line where two splits produce the same suffix length. +// `diff --git a/x b/x c/x` (from `--dst-prefix='b/x c/'` on file `x`): +// split at 3: a/x vs b/x c/x → suffix `x` (len 1) +// split at 7: a/x b/x vs c/x → suffix `x` (len 1) +// +// - git apply: rejects: "git diff header lacks filename information when +// removing 1 leading pathname component") +// - diffy: succeeds, picks first (leftmost) split +#[test] +fn fail_ambiguous_suffix_tie() { + Case::git("fail_ambiguous_suffix_tie") + .strip(1) + .expect_success(true) + .expect_compat(false) + .run(); +} + +// Both --- and +++ point to /dev/null. +// git apply rejects: "dev/null: No such file or directory" +#[test] +fn fail_both_devnull() { + Case::git("fail_both_devnull") + .strip(1) + .expect_success(false) + .run(); +} + +// Mixed quoted/unquoted paths in `diff --git` line and rename headers. +// +// Rename from a file with tab in its name (quoted) to a normal name (unquoted): +// `diff --git "a/foo\tbar.txt" b/normal.txt` +// `rename from "foo\tbar.txt"` +// `rename to normal.txt` +#[test] +fn path_mixed_quoted() { + Case::git("path_mixed_quoted").strip(1).run(); +} + +// Custom prefix with slash (e.g. `--src-prefix=src/ --dst-prefix=dst/`). +// +// Produces `diff --git src/old.txt dst/old.txt` and matching ---/+++ headers. +// Both git apply and diffy handle this correctly with strip(1). +#[test] +fn path_custom_prefix() { + Case::git("path_custom_prefix").strip(1).run(); +} + +// Custom prefix without slash (e.g. `--src-prefix=foo --dst-prefix=bar`). +// +// Produces paths like `fooold.txt` / `barold.txt` with no `/` separator, +// making strip impossible. Both git apply and diffy fail: +// - git apply: "git diff header lacks filename information when removing 1 +// leading pathname component" +// - diffy: paths don't match any input file +#[test] +fn fail_prefix_no_slash() { + Case::git("fail_prefix_no_slash") + .strip(1) + .expect_success(false) + .run(); +} + +// Patch with non-UTF-8 bytes (0x80, 0xff) in hunk content. +// Both git apply and diffy handle raw bytes correctly. +#[test] +fn non_utf8_hunk_content() { + Case::git("non_utf8_hunk_content").strip(1).run(); +} + +// Single-file patch with junk between hunks. +// +// - git apply: errors ("patch fragment without header") +// - diffy: succeeds, ignores trailing junk (matches GNU patch behavior) +#[test] +fn junk_between_hunks() { + Case::git("junk_between_hunks") + .strip(1) + .expect_compat(false) + .run(); +} + +// Multi-file patch with junk/preamble text between different files. +// +// git apply behavior: Ignores content between `diff --git` boundaries. +// In GitDiff mode, splitting occurs at `diff --git`, so junk between +// files becomes trailing content of the previous chunk (harmless). +// +// This is different from junk between HUNKS of the same file (which fails). +#[test] +fn junk_between_files() { + Case::git("junk_between_files").strip(1).run(); +} diff --git a/tests/compat/git/nested_diff_signature/in/example.rs b/tests/compat/git/nested_diff_signature/in/example.rs new file mode 100644 index 0000000..8f3b7ef --- /dev/null +++ b/tests/compat/git/nested_diff_signature/in/example.rs @@ -0,0 +1 @@ +fn foo() {} diff --git a/tests/compat/git/nested_diff_signature/in/foo.patch b/tests/compat/git/nested_diff_signature/in/foo.patch new file mode 100644 index 0000000..5d876c6 --- /dev/null +++ b/tests/compat/git/nested_diff_signature/in/foo.patch @@ -0,0 +1,25 @@ +diff --git a/mir-test.diff b/mir-test.diff +deleted file mode 100644 +index 98012d7..0000000 +--- a/mir-test.diff ++++ /dev/null +@@ -1,12 +0,0 @@ +-- // MIR before +-+ // MIR after +- +- fn opt() { +- bb0: { +-- nop; +-- } +-- +-- bb1: { +-- nop; +- } +- } +diff --git a/example.rs b/example.rs +index 8f3b7ef..2a40712 100644 +--- a/example.rs ++++ b/example.rs +@@ -1 +1,2 @@ + fn foo() {} ++fn bar() {} diff --git a/tests/compat/git/nested_diff_signature/in/mir-test.diff b/tests/compat/git/nested_diff_signature/in/mir-test.diff new file mode 100644 index 0000000..98012d7 --- /dev/null +++ b/tests/compat/git/nested_diff_signature/in/mir-test.diff @@ -0,0 +1,12 @@ +- // MIR before ++ // MIR after + + fn opt() { + bb0: { +- nop; +- } +- +- bb1: { +- nop; + } + } diff --git a/tests/compat/git/nested_diff_signature/out/example.rs b/tests/compat/git/nested_diff_signature/out/example.rs new file mode 100644 index 0000000..2a40712 --- /dev/null +++ b/tests/compat/git/nested_diff_signature/out/example.rs @@ -0,0 +1,2 @@ +fn foo() {} +fn bar() {} diff --git a/tests/compat/git/nested_diff_signature/out/mir-test.diff b/tests/compat/git/nested_diff_signature/out/mir-test.diff new file mode 100644 index 0000000..e69de29 diff --git a/tests/compat/git/non_utf8_hunk_content/in/file.bin b/tests/compat/git/non_utf8_hunk_content/in/file.bin new file mode 100644 index 0000000..5ddb01f --- /dev/null +++ b/tests/compat/git/non_utf8_hunk_content/in/file.bin @@ -0,0 +1 @@ +hello €ÿ world diff --git a/tests/compat/git/non_utf8_hunk_content/in/foo.patch b/tests/compat/git/non_utf8_hunk_content/in/foo.patch new file mode 100644 index 0000000..0c3f72f --- /dev/null +++ b/tests/compat/git/non_utf8_hunk_content/in/foo.patch @@ -0,0 +1,6 @@ +diff --git a/file.bin b/file.bin +--- a/file.bin ++++ b/file.bin +@@ -1 +1 @@ +-hello €ÿ world ++hello €ÿ universe diff --git a/tests/compat/git/non_utf8_hunk_content/out/file.bin b/tests/compat/git/non_utf8_hunk_content/out/file.bin new file mode 100644 index 0000000..715e61f --- /dev/null +++ b/tests/compat/git/non_utf8_hunk_content/out/file.bin @@ -0,0 +1 @@ +hello €ÿ universe diff --git a/tests/compat/git/path_ambiguous_suffix/in/foo.patch b/tests/compat/git/path_ambiguous_suffix/in/foo.patch new file mode 100644 index 0000000..a6815eb --- /dev/null +++ b/tests/compat/git/path_ambiguous_suffix/in/foo.patch @@ -0,0 +1,3 @@ +diff --git src/foo.rs src/foo.rs src/foo.rs src/foo.rs +new file mode 100644 +index 0000000..e69de29 diff --git a/tests/compat/git/path_ambiguous_suffix/out/foo.rs src/foo.rs b/tests/compat/git/path_ambiguous_suffix/out/foo.rs src/foo.rs new file mode 100644 index 0000000..e69de29 diff --git a/tests/compat/git/path_containing_space_b/in/foo b/baz.txt b/tests/compat/git/path_containing_space_b/in/foo b/baz.txt new file mode 100644 index 0000000..3367afd --- /dev/null +++ b/tests/compat/git/path_containing_space_b/in/foo b/baz.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_containing_space_b/in/foo.patch b/tests/compat/git/path_containing_space_b/in/foo.patch new file mode 100644 index 0000000..15c6fca --- /dev/null +++ b/tests/compat/git/path_containing_space_b/in/foo.patch @@ -0,0 +1,7 @@ +diff --git a/foo b/baz.txt b/foo b/baz.txt +index 3367afd..3e75765 100644 +--- a/foo b/baz.txt ++++ b/foo b/baz.txt +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/path_containing_space_b/out/foo b/baz.txt b/tests/compat/git/path_containing_space_b/out/foo b/baz.txt new file mode 100644 index 0000000..3e75765 --- /dev/null +++ b/tests/compat/git/path_containing_space_b/out/foo b/baz.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/path_custom_prefix/in/foo.patch b/tests/compat/git/path_custom_prefix/in/foo.patch new file mode 100644 index 0000000..013810f --- /dev/null +++ b/tests/compat/git/path_custom_prefix/in/foo.patch @@ -0,0 +1,13 @@ +diff --git src/old.txt dst/old.txt +index 4083766..40c94fa 100644 +--- src/old.txt ++++ dst/old.txt +@@ -2,7 +2,7 @@ line1 + line2 + line3 + line4 +-line5 ++changed + line6 + line7 + line8 diff --git a/tests/compat/git/path_custom_prefix/in/old.txt b/tests/compat/git/path_custom_prefix/in/old.txt new file mode 100644 index 0000000..4083766 --- /dev/null +++ b/tests/compat/git/path_custom_prefix/in/old.txt @@ -0,0 +1,10 @@ +line1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 +line10 diff --git a/tests/compat/git/path_custom_prefix/out/old.txt b/tests/compat/git/path_custom_prefix/out/old.txt new file mode 100644 index 0000000..40c94fa --- /dev/null +++ b/tests/compat/git/path_custom_prefix/out/old.txt @@ -0,0 +1,10 @@ +line1 +line2 +line3 +line4 +changed +line6 +line7 +line8 +line9 +line10 diff --git "a/tests/compat/git/path_mixed_quoted/in/foo\tbar.txt" "b/tests/compat/git/path_mixed_quoted/in/foo\tbar.txt" new file mode 100644 index 0000000..4083766 --- /dev/null +++ "b/tests/compat/git/path_mixed_quoted/in/foo\tbar.txt" @@ -0,0 +1,10 @@ +line1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 +line10 diff --git a/tests/compat/git/path_mixed_quoted/in/foo.patch b/tests/compat/git/path_mixed_quoted/in/foo.patch new file mode 100644 index 0000000..23016da --- /dev/null +++ b/tests/compat/git/path_mixed_quoted/in/foo.patch @@ -0,0 +1,16 @@ +diff --git "a/foo\tbar.txt" b/normal.txt +similarity index 87% +rename from "foo\tbar.txt" +rename to normal.txt +index 4083766..40c94fa 100644 +--- "a/foo\tbar.txt" ++++ b/normal.txt +@@ -2,7 +2,7 @@ line1 + line2 + line3 + line4 +-line5 ++changed + line6 + line7 + line8 diff --git a/tests/compat/git/path_mixed_quoted/out/normal.txt b/tests/compat/git/path_mixed_quoted/out/normal.txt new file mode 100644 index 0000000..40c94fa --- /dev/null +++ b/tests/compat/git/path_mixed_quoted/out/normal.txt @@ -0,0 +1,10 @@ +line1 +line2 +line3 +line4 +changed +line6 +line7 +line8 +line9 +line10 diff --git a/tests/compat/git/path_no_prefix/in/file.txt b/tests/compat/git/path_no_prefix/in/file.txt new file mode 100644 index 0000000..3367afd --- /dev/null +++ b/tests/compat/git/path_no_prefix/in/file.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_no_prefix/in/foo.patch b/tests/compat/git/path_no_prefix/in/foo.patch new file mode 100644 index 0000000..5e2a9f8 --- /dev/null +++ b/tests/compat/git/path_no_prefix/in/foo.patch @@ -0,0 +1,7 @@ +diff --git file.txt file.txt +index 3367afd..3e75765 100644 +--- file.txt ++++ file.txt +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/path_no_prefix/out/file.txt b/tests/compat/git/path_no_prefix/out/file.txt new file mode 100644 index 0000000..3e75765 --- /dev/null +++ b/tests/compat/git/path_no_prefix/out/file.txt @@ -0,0 +1 @@ +new diff --git "a/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" "b/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" new file mode 100644 index 0000000..3367afd --- /dev/null +++ "b/tests/compat/git/path_quoted_escapes/in/foo\tbar.txt" @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_quoted_escapes/in/foo.patch b/tests/compat/git/path_quoted_escapes/in/foo.patch new file mode 100644 index 0000000..2669223 --- /dev/null +++ b/tests/compat/git/path_quoted_escapes/in/foo.patch @@ -0,0 +1,7 @@ +diff --git "a/foo\tbar.txt" "b/foo\tbar.txt" +index 3367afd..3e75765 100644 +--- "a/foo\tbar.txt" ++++ "b/foo\tbar.txt" +@@ -1 +1 @@ +-old ++new diff --git "a/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" "b/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" new file mode 100644 index 0000000..3e75765 --- /dev/null +++ "b/tests/compat/git/path_quoted_escapes/out/foo\tbar.txt" @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/path_quoted_inner_quote/in/foo.patch b/tests/compat/git/path_quoted_inner_quote/in/foo.patch new file mode 100644 index 0000000..473251d --- /dev/null +++ b/tests/compat/git/path_quoted_inner_quote/in/foo.patch @@ -0,0 +1,7 @@ +diff --git "a/with\"quote.txt" "b/with\"quote.txt" +index 3367afd..3e75765 100644 +--- "a/with\"quote.txt" ++++ "b/with\"quote.txt" +@@ -1 +1 @@ +-old ++new diff --git "a/tests/compat/git/path_quoted_inner_quote/in/with\"quote.txt" "b/tests/compat/git/path_quoted_inner_quote/in/with\"quote.txt" new file mode 100644 index 0000000..3367afd --- /dev/null +++ "b/tests/compat/git/path_quoted_inner_quote/in/with\"quote.txt" @@ -0,0 +1 @@ +old diff --git "a/tests/compat/git/path_quoted_inner_quote/out/with\"quote.txt" "b/tests/compat/git/path_quoted_inner_quote/out/with\"quote.txt" new file mode 100644 index 0000000..3e75765 --- /dev/null +++ "b/tests/compat/git/path_quoted_inner_quote/out/with\"quote.txt" @@ -0,0 +1 @@ +new diff --git a/tests/compat/git/path_quoted_named_escape/in/foo.patch b/tests/compat/git/path_quoted_named_escape/in/foo.patch new file mode 100644 index 0000000..cfd3f56 --- /dev/null +++ b/tests/compat/git/path_quoted_named_escape/in/foo.patch @@ -0,0 +1,6 @@ +diff --git "a/bel\a" "b/bel\a" +new file mode 100644 +--- /dev/null ++++ "b/bel\a" +@@ -0,0 +1 @@ ++hello diff --git "a/tests/compat/git/path_quoted_named_escape/out/bel\a" "b/tests/compat/git/path_quoted_named_escape/out/bel\a" new file mode 100644 index 0000000..ce01362 --- /dev/null +++ "b/tests/compat/git/path_quoted_named_escape/out/bel\a" @@ -0,0 +1 @@ +hello diff --git a/tests/compat/git/path_quoted_octal_escape/in/foo.patch b/tests/compat/git/path_quoted_octal_escape/in/foo.patch new file mode 100644 index 0000000..5dda9ec --- /dev/null +++ b/tests/compat/git/path_quoted_octal_escape/in/foo.patch @@ -0,0 +1,6 @@ +diff --git "a/tl\033" "b/tl\033" +new file mode 100644 +--- /dev/null ++++ "b/tl\033" +@@ -0,0 +1 @@ ++hello diff --git "a/tests/compat/git/path_quoted_octal_escape/out/tl\033" "b/tests/compat/git/path_quoted_octal_escape/out/tl\033" new file mode 100644 index 0000000..ce01362 --- /dev/null +++ "b/tests/compat/git/path_quoted_octal_escape/out/tl\033" @@ -0,0 +1 @@ +hello diff --git a/tests/compat/git/path_with_spaces/in/foo bar.txt b/tests/compat/git/path_with_spaces/in/foo bar.txt new file mode 100644 index 0000000..3367afd --- /dev/null +++ b/tests/compat/git/path_with_spaces/in/foo bar.txt @@ -0,0 +1 @@ +old diff --git a/tests/compat/git/path_with_spaces/in/foo.patch b/tests/compat/git/path_with_spaces/in/foo.patch new file mode 100644 index 0000000..b3d1d46 --- /dev/null +++ b/tests/compat/git/path_with_spaces/in/foo.patch @@ -0,0 +1,7 @@ +diff --git a/foo bar.txt b/foo bar.txt +index 3367afd..3e75765 100644 +--- a/foo bar.txt ++++ b/foo bar.txt +@@ -1 +1 @@ +-old ++new diff --git a/tests/compat/git/path_with_spaces/out/foo bar.txt b/tests/compat/git/path_with_spaces/out/foo bar.txt new file mode 100644 index 0000000..3e75765 --- /dev/null +++ b/tests/compat/git/path_with_spaces/out/foo bar.txt @@ -0,0 +1 @@ +new diff --git a/tests/compat/gnu_patch/format_patch_mbox/in/a.txt b/tests/compat/gnu_patch/format_patch_mbox/in/a.txt new file mode 100644 index 0000000..ffc7496 --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/in/a.txt @@ -0,0 +1 @@ +old a diff --git a/tests/compat/gnu_patch/format_patch_mbox/in/b.txt b/tests/compat/gnu_patch/format_patch_mbox/in/b.txt new file mode 100644 index 0000000..568e619 --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/in/b.txt @@ -0,0 +1 @@ +old b diff --git a/tests/compat/gnu_patch/format_patch_mbox/in/c.txt b/tests/compat/gnu_patch/format_patch_mbox/in/c.txt new file mode 100644 index 0000000..89cb37d --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/in/c.txt @@ -0,0 +1 @@ +old c diff --git a/tests/compat/gnu_patch/format_patch_mbox/in/foo.patch b/tests/compat/gnu_patch/format_patch_mbox/in/foo.patch new file mode 100644 index 0000000..720648e --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/in/foo.patch @@ -0,0 +1,56 @@ +From aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa Mon Sep 17 00:00:00 2001 +From: Alice +Date: Mon, 1 Jan 2024 00:00:00 +0000 +Subject: [PATCH 1/3] first patch + +First commit message. +--- + a.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/a.txt b/a.txt +--- a/a.txt ++++ b/a.txt +@@ -1 +1 @@ +-old a ++new a +-- +2.40.0 + +From bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb Mon Sep 17 00:00:00 2001 +From: Bob +Date: Mon, 1 Jan 2024 00:00:01 +0000 +Subject: [PATCH 2/3] second patch + +Second commit message. +--- + b.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/b.txt b/b.txt +--- a/b.txt ++++ b/b.txt +@@ -1 +1 @@ +-old b ++new b +-- +2.40.0 + +From cccccccccccccccccccccccccccccccccccccccc Mon Sep 17 00:00:00 2001 +From: Carol +Date: Mon, 1 Jan 2024 00:00:02 +0000 +Subject: [PATCH 3/3] third patch + +Third commit message. +--- + c.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/c.txt b/c.txt +--- a/c.txt ++++ b/c.txt +@@ -1 +1 @@ +-old c ++new c +-- +2.40.0 diff --git a/tests/compat/gnu_patch/format_patch_mbox/out/a.txt b/tests/compat/gnu_patch/format_patch_mbox/out/a.txt new file mode 100644 index 0000000..56db76c --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/out/a.txt @@ -0,0 +1 @@ +new a diff --git a/tests/compat/gnu_patch/format_patch_mbox/out/b.txt b/tests/compat/gnu_patch/format_patch_mbox/out/b.txt new file mode 100644 index 0000000..81e3be4 --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/out/b.txt @@ -0,0 +1 @@ +new b diff --git a/tests/compat/gnu_patch/format_patch_mbox/out/c.txt b/tests/compat/gnu_patch/format_patch_mbox/out/c.txt new file mode 100644 index 0000000..8ed6781 --- /dev/null +++ b/tests/compat/gnu_patch/format_patch_mbox/out/c.txt @@ -0,0 +1 @@ +new c diff --git a/tests/compat/gnu_patch/mod.rs b/tests/compat/gnu_patch/mod.rs index 643d16e..3931a41 100644 --- a/tests/compat/gnu_patch/mod.rs +++ b/tests/compat/gnu_patch/mod.rs @@ -158,6 +158,13 @@ fn non_utf8_hunk_content() { Case::gnu_patch("non_utf8_hunk_content").run(); } +// Mbox stream: three concatenated `git format-patch` emails in one file. +// GNU patch finds ---/+++ boundaries regardless of email headers. +#[test] +fn format_patch_mbox() { + Case::gnu_patch("format_patch_mbox").strip(1).run(); +} + // Failure cases #[test] diff --git a/tests/compat/main.rs b/tests/compat/main.rs index 8faf9ea..e35ed07 100644 --- a/tests/compat/main.rs +++ b/tests/compat/main.rs @@ -39,9 +39,11 @@ //! //! 1. Create `case_name/in/` with input file(s) and `foo.patch` //! 2. Run `SNAPSHOTS=overwrite cargo test --test compat` to generate `out/` -//! 3. Add `#[test] fn case_name() { Case::gnu_patch(...).run(); }` in the module +//! 3. Add `#[test] fn case_name() { Case::{gnu_patch,git}(...).run(); }` in the module //! //! For failure tests, use `.expect_success(false)` and skip step 2. +//! For intentional compat divergence, use `.expect_compat(false)`. mod common; +mod git; mod gnu_patch; From d4ff209c80db5911d261b8c103aa8e98f0c3dc8c Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Thu, 16 Apr 2026 14:31:26 -0400 Subject: [PATCH 03/12] test(replay): add gitdiff mode support Unlike unidiff, gitdiff produces patches for empty file creations/deletions (`0\t0` in numstat) because they carry `diff --git` + extended headers even without hunks. Binary files (`-\t-\t`) are skipped in gitdiff mode for now. --- tests/replay.rs | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/tests/replay.rs b/tests/replay.rs index 8a62696..c98eef1 100644 --- a/tests/replay.rs +++ b/tests/replay.rs @@ -18,8 +18,7 @@ //! * A range (e.g., `abc123..def456`) for a specific commit range //! //! Defaults to 200. Use `0` to verify entire history. -//! * `DIFFY_TEST_PARSE_MODE`: Parse mode to use. -//! Currently only `unidiff` is supported. +//! * `DIFFY_TEST_PARSE_MODE`: Parse mode to use (`unidiff` or `gitdiff`). //! Defaults to `unidiff`. //! //! ## Requirements @@ -163,12 +162,14 @@ impl CatFile { #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum TestMode { UniDiff, + GitDiff, } impl From for ParseOptions { fn from(value: TestMode) -> Self { match value { TestMode::UniDiff => ParseOptions::unidiff(), + TestMode::GitDiff => ParseOptions::gitdiff(), } } } @@ -230,7 +231,8 @@ fn test_mode() -> TestMode { }; match val.trim().to_lowercase().as_str() { "unidiff" => TestMode::UniDiff, - _ => panic!("invalid DIFFY_TEST_PARSE_MODE='{val}': expected 'unidiff'"), + "gitdiff" => TestMode::GitDiff, + _ => panic!("invalid DIFFY_TEST_PARSE_MODE='{val}': expected 'unidiff' or 'gitdiff'"), } } @@ -329,8 +331,13 @@ fn process_commit( // UniDiff format cannot express pure renames (no ---/+++ headers). // Use `--no-renames` to represent them as delete + create instead. + // GitDiff mode handles renames via extended headers natively. let diff_output = match mode { TestMode::UniDiff => git_bytes(repo, &["diff", "--no-renames", parent, child]), + // TODO: pass `--binary` once binary patch support lands, + // so binary files get actual delta/literal data instead of + // "Binary files differ" markers. + TestMode::GitDiff => git_bytes(repo, &["diff", parent, child]), }; if diff_output.is_empty() { @@ -379,6 +386,20 @@ fn process_commit( } text_files + type_changes } + TestMode::GitDiff => { + // Can't use `--numstat` for GitDiff: it shows `-\t-\t` for both + // actual binary diffs AND pure binary renames (100% similarity). + // Use `--raw` for total count instead. + let raw = git(repo, &["diff", "--raw", parent, child]); + let (mut total, mut type_changes) = (0, 0); + for line in raw.lines().filter(|l| !l.is_empty()) { + total += 1; + if is_type_change(line) { + type_changes += 1; + } + } + total + type_changes + } }; if expected_file_count == 0 { @@ -564,6 +585,7 @@ fn replay() { .unwrap_or_else(|| ".".to_string()); let mode_name = match mode { TestMode::UniDiff => "unidiff", + TestMode::GitDiff => "gitdiff", }; // Shared state for progress reporting From dfbf385627b53b6213aaffa83c04e812a11d7747 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Sun, 12 Apr 2026 12:59:07 -0400 Subject: [PATCH 04/12] chore(ci): run gitdiff also in replay workflow --- .github/workflows/replay.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/replay.yml b/.github/workflows/replay.yml index a9f8f99..08384d6 100644 --- a/.github/workflows/replay.yml +++ b/.github/workflows/replay.yml @@ -44,7 +44,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - parse_mode: [unidiff] + parse_mode: [unidiff, gitdiff] name: ${{ inputs.name && matrix.parse_mode || format('{0} ({1}, {2})', inputs.repo_url, matrix.parse_mode, inputs.commits) }} steps: - uses: actions/checkout@v6 From 065d80148f8518a4d2e9abea82b30347e91edc98 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Tue, 14 Apr 2026 01:03:26 -0400 Subject: [PATCH 05/12] chore(ci): we can now replay more commits! https://github.com/bmwill/diffy/pull/62 optimized it --- .github/workflows/ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1c8292..2f514b3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,19 +58,19 @@ jobs: commits: '0' - name: golang/go repo_url: https://github.com/golang/go - commits: '200' + commits: '2000' - name: git/git repo_url: https://github.com/git/git - commits: '100' + commits: '1000' - name: rust-lang/rust repo_url: https://github.com/rust-lang/rust - commits: '30' + commits: '50' - name: torvalds/linux repo_url: https://github.com/torvalds/linux - commits: '30' + commits: '40' - name: llvm/llvm-project repo_url: https://github.com/llvm/llvm-project - commits: '30' + commits: '50' name: replay (${{ matrix.name }}) uses: ./.github/workflows/replay.yml with: From 58f95d709f7fe07ba74a55bd12f9457bbfd9f43b Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Fri, 17 Apr 2026 01:38:06 -0400 Subject: [PATCH 06/12] feat(binary): binary patch types and parser * Added types representing both literal and delta Git binary patches * Added a parser for the `GIT binary patch` format. This doesn't include the patch application (which will be added in later commits) The implementation is based on * Specification from * Behavior observation of Git CLI --- src/binary/mod.rs | 352 ++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 2 files changed, 353 insertions(+) create mode 100644 src/binary/mod.rs diff --git a/src/binary/mod.rs b/src/binary/mod.rs new file mode 100644 index 0000000..696f0d6 --- /dev/null +++ b/src/binary/mod.rs @@ -0,0 +1,352 @@ +//! Git binary diffs support. +//! +//! This module provides parsing and decoding for Git's binary diff format, +//! as generated by `git diff --binary` or `git format-patch --binary`. +//! +//! Based on [DiffX Binary Diffs specification](https://diffx.org/spec/binary-diffs.html). + +use std::{fmt, ops::Range}; + +/// The type of a binary patch block. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryBlockKind { + /// [Literal](https://diffx.org/spec/binary-diffs.html#git-literal-binary-diffs): + /// contains the full file content, zlib-compressed and Base85-encoded. + Literal, + /// [Delta](https://diffx.org/spec/binary-diffs.html#git-delta-binary-diffs): + /// contains delta instructions to transform one file into another. + Delta, +} + +/// A single block in a binary patch. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BinaryBlock<'a> { + /// The type of this block (literal content or delta instructions). + pub kind: BinaryBlockKind, + /// The encoded data. + pub data: BinaryData<'a>, +} + +/// A parsed binary patch. +/// +/// A binary patch contains encoded binary data that can be decoded +/// to recover the original and modified file contents. +/// +/// Git may use different encodings for each direction: +/// +/// - `literal`: full file content +/// - `delta`: instructions to transform one file into another +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BinaryPatch<'a> { + /// A full binary patch with forward and reverse data. + /// + /// The forward block transforms original -> modified. + /// The reverse block transforms modified -> original. + /// + /// Each block can independently be either `literal` or `delta`. + Full { + /// Forward transformation (original -> modified). + forward: BinaryBlock<'a>, + /// Reverse transformation (modified -> original). + reverse: BinaryBlock<'a>, + }, + /// A Git binary diff marker. + /// + /// This represents the `Binary files a/path and b/path differ` case, + /// where git detected a binary change but didn't include the actual data. + Marker, +} + +/// Represents a single binary payload in a Git binary diff. +/// +/// For example, the following patch block +/// +/// * is parsed as `BinaryData { size: 10, data: "UcmV+l0QLU>0RjUA1qKHQ2>\`DEE&u=k" }` +/// * The line starts with a length indicator (`U` = 21 decoded bytes) +/// +/// +/// ```text +/// literal 10 +/// UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k +/// ``` +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BinaryData<'a> { + /// Uncompressed size in bytes. + pub size: u64, + /// Raw Base85 lines with length indicators. + pub data: &'a str, +} + +/// Error type for binary patch operations. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BinaryPatchParseError { + pub(crate) kind: BinaryPatchParseErrorKind, + span: Option>, +} + +impl BinaryPatchParseError { + /// Creates a new error with the given kind and span. + pub(crate) fn new(kind: BinaryPatchParseErrorKind, span: Range) -> Self { + Self { + kind, + span: Some(span), + } + } + + /// Returns the byte range in the input where the error occurred. + pub fn span(&self) -> Option> { + self.span.clone() + } +} + +impl fmt::Display for BinaryPatchParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(span) = &self.span { + write!( + f, + "error parsing binary patch at byte {}: {}", + span.start, self.kind + ) + } else { + write!(f, "error parsing binary patch: {}", self.kind) + } + } +} + +impl std::error::Error for BinaryPatchParseError {} + +impl From for BinaryPatchParseError { + fn from(kind: BinaryPatchParseErrorKind) -> Self { + Self { kind, span: None } + } +} + +/// The kind of error that occurred when parsing a binary patch. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub(crate) enum BinaryPatchParseErrorKind { + /// Missing or invalid "GIT binary patch" header. + InvalidHeader, + + /// First binary block (forward) not found. + MissingForwardBlock, + + /// Second binary block (reverse) not found. + MissingReverseBlock, + + /// No binary data available (marker-only patch). + NoBinaryData, + + /// Invalid line length indicator in Base85 data. + InvalidLineLengthIndicator, +} + +impl fmt::Display for BinaryPatchParseErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::InvalidHeader => write!(f, "invalid binary patch header"), + Self::MissingForwardBlock => write!(f, "first binary block not found"), + Self::MissingReverseBlock => write!(f, "second binary block not found"), + Self::NoBinaryData => write!(f, "no binary data available"), + Self::InvalidLineLengthIndicator => write!(f, "invalid line length indicator"), + } + } +} + +/// Simple streaming parser for binary patches using lines() iterator. +struct BinaryParser<'a> { + input: &'a str, + offset: usize, +} + +impl<'a> BinaryParser<'a> { + fn new(input: &'a str) -> Self { + Self { input, offset: 0 } + } + + fn peek_line(&self) -> Option<&'a str> { + self.input[self.offset..].lines().next() + } + + /// Creates an error with the current offset as span. + fn error(&self, kind: BinaryPatchParseErrorKind) -> BinaryPatchParseError { + BinaryPatchParseError::new(kind, self.offset..self.offset) + } + + fn next_line(&mut self) -> Option<&'a str> { + let rest = &self.input[self.offset..]; + let line = rest.lines().next()?; + self.offset += line.len(); + + // Skip line ending (CRLF or LF) + let remaining = &self.input[self.offset..]; + if remaining.starts_with("\r\n") { + self.offset += 2; + } else if remaining.starts_with('\n') { + self.offset += 1; + } + + Some(line) + } + + fn slice_from(&self, start_offset: usize) -> &'a str { + let end = self.offset; + // Strip trailing line ending before blank line + let slice = &self.input[start_offset..end]; + slice + .strip_suffix("\r\n") + .or_else(|| slice.strip_suffix('\n')) + .unwrap_or(slice) + } +} + +/// Parses binary patch content after git extended headers. +/// +/// Expects input starting with "GIT binary patch" line. +/// Returns the parsed patch and the number of bytes consumed. +/// +/// Format: +/// +/// ```text +/// GIT binary patch +/// +/// +/// +/// +/// +/// ``` +pub(crate) fn parse_binary_patch( + input: &str, +) -> Result<(BinaryPatch<'_>, usize), BinaryPatchParseError> { + let mut parser = BinaryParser::new(input); + + // Expect "GIT binary patch" marker + if parser.next_line() != Some("GIT binary patch") { + return Err(parser.error(BinaryPatchParseErrorKind::InvalidHeader)); + } + + // Parse first block (forward: original -> modified) + let Some(forward) = parse_binary_block(&mut parser) else { + return Err(parser.error(BinaryPatchParseErrorKind::MissingForwardBlock)); + }; + + // Parse second block (reverse: modified -> original) + let Some(reverse) = parse_binary_block(&mut parser) else { + return Err(parser.error(BinaryPatchParseErrorKind::MissingReverseBlock)); + }; + + Ok((BinaryPatch::Full { forward, reverse }, parser.offset)) +} + +/// Parses a single binary block. +/// +/// Returns a `BinaryBlock` with kind (literal/delta) and data. +fn parse_binary_block<'a>(parser: &mut BinaryParser<'a>) -> Option> { + // Parse "literal 10" or "delta 18" + let format_line = parser.next_line()?; + let (patch_type, size_str) = format_line.split_once(' ')?; + let size: u64 = size_str.parse().ok()?; + + let kind = match patch_type { + "literal" => BinaryBlockKind::Literal, + "delta" => BinaryBlockKind::Delta, + _ => return None, + }; + + // Record start of Base85 data + let data_start = parser.offset; + + // Consume Base85 lines until blank line + while let Some(line) = parser.peek_line() { + if line.is_empty() { + parser.next_line(); // Consume blank line + break; + } + parser.next_line(); + } + + let data = parser.slice_from(data_start); + + Some(BinaryBlock { + kind, + data: BinaryData { size, data }, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_literal_format_simple() { + let input = "GIT binary patch\nliteral 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\nliteral 0\nKcmV+b0RR6000031\n\n"; + let (patch, consumed) = parse_binary_patch(input).unwrap(); + + assert_eq!(consumed, input.len()); + match &patch { + BinaryPatch::Full { forward, reverse } => { + assert_eq!(forward.kind, BinaryBlockKind::Literal); + assert_eq!(forward.data.size, 10); + assert_eq!(reverse.kind, BinaryBlockKind::Literal); + assert_eq!(reverse.data.size, 0); + } + _ => panic!("expected Full variant"), + } + } + + #[test] + fn parse_delta_format() { + let input = "GIT binary patch\ndelta 18\nccmV+t0PX*P2!IH%^Z^9`00000v-trB0x!=5aR2}S\n\ndelta 18\nccmV+t0PX*P2!IH%^Z^BFm9#}av-trB0zxAOrvLx|\n\n"; + let (patch, _) = parse_binary_patch(input).unwrap(); + + match &patch { + BinaryPatch::Full { forward, reverse } => { + assert_eq!(forward.kind, BinaryBlockKind::Delta); + assert_eq!(forward.data.size, 18); + assert_eq!(reverse.kind, BinaryBlockKind::Delta); + assert_eq!(reverse.data.size, 18); + } + _ => panic!("expected Full variant"), + } + } + + #[test] + fn parse_invalid_header() { + // Without "GIT binary patch" marker, parse_binary_patch returns error + let input = "literal 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\n"; + let err = parse_binary_patch(input).unwrap_err(); + assert_eq!(err.kind, BinaryPatchParseErrorKind::InvalidHeader); + } + + #[test] + fn parse_with_crlf_line_endings() { + let input = "GIT binary patch\r\nliteral 10\r\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\r\n\r\nliteral 0\r\nKcmV+b0RR6000031\r\n\r\n"; + let (patch, consumed) = parse_binary_patch(input).unwrap(); + + assert_eq!(consumed, input.len()); + match &patch { + BinaryPatch::Full { forward, reverse } => { + assert_eq!(forward.kind, BinaryBlockKind::Literal); + assert_eq!(forward.data.size, 10); + assert_eq!(reverse.kind, BinaryBlockKind::Literal); + assert_eq!(reverse.data.size, 0); + } + _ => panic!("expected Full variant"), + } + } + + #[test] + fn parse_mixed_format() { + // Git can use different encoding for each direction + let input = "GIT binary patch\nliteral 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\ndelta 18\nccmV+t0PX*P2!IH%^Z^9`00000v-trB0x!=5aR2}S\n\n"; + let (patch, _) = parse_binary_patch(input).unwrap(); + + match &patch { + BinaryPatch::Full { forward, reverse } => { + assert_eq!(forward.kind, BinaryBlockKind::Literal); + assert_eq!(reverse.kind, BinaryBlockKind::Delta); + } + _ => panic!("expected Full variant"), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 33ebbb6..5e11a40 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -223,6 +223,7 @@ //! [`create_patch_bytes`]: fn.create_patch_bytes.html mod apply; +pub mod binary; mod diff; mod merge; mod patch; From b759b67db29ca006a2d64036f0b11f7177a8bd7a Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Fri, 17 Apr 2026 10:33:33 -0400 Subject: [PATCH 07/12] refactor(utils): add `Text::as_str_prefix` method Returns the longest valid UTF-8 prefix of the input. For `str` this is the entire input; for `[u8]` it truncates at the first invalid byte. This will be used at call sites where generic `T: Text` input needs to be narrowed to `&str` for parsers that only handle ASCII data (e.g. binary patch base85 content). --- src/utils.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/utils.rs b/src/utils.rs index c491456..e337e6c 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -144,6 +144,11 @@ pub trait Text: Eq + Hash + ToOwned { fn find(&self, needle: &str) -> Option; fn split_at(&self, mid: usize) -> (&Self, &Self); fn as_str(&self) -> Option<&str>; + /// Returns the longest valid UTF-8 prefix. + /// + /// For `str` this is the entire input. + /// For `[u8]` this returns up to the first invalid UTF-8 byte. + fn as_str_prefix(&self) -> &str; fn as_bytes(&self) -> &[u8]; #[allow(unused)] fn lines(&self) -> LineIter<'_, Self>; @@ -205,6 +210,10 @@ impl Text for str { Some(self) } + fn as_str_prefix(&self) -> &str { + self + } + fn as_bytes(&self) -> &[u8] { self.as_bytes() } @@ -259,6 +268,13 @@ impl Text for [u8] { std::str::from_utf8(self).ok() } + fn as_str_prefix(&self) -> &str { + match std::str::from_utf8(self) { + Ok(s) => s, + Err(e) => std::str::from_utf8(&self[..e.valid_up_to()]).unwrap(), + } + } + fn as_bytes(&self) -> &[u8] { self } From c611ce70fda6893f4242044f6c987d357dae3738 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Fri, 17 Apr 2026 10:41:00 -0400 Subject: [PATCH 08/12] feat(patch_set): wire binary diff parsing --- src/patch_set/error.rs | 11 +++++++++ src/patch_set/mod.rs | 22 ++++++++++++----- src/patch_set/parse.rs | 55 ++++++++++++++++++++++++++++++++++++------ src/patch_set/tests.rs | 7 +++--- tests/compat/common.rs | 13 +++++++--- tests/replay.rs | 4 ++- 6 files changed, 92 insertions(+), 20 deletions(-) diff --git a/src/patch_set/error.rs b/src/patch_set/error.rs index 3a8fc61..2c9116b 100644 --- a/src/patch_set/error.rs +++ b/src/patch_set/error.rs @@ -3,6 +3,7 @@ use std::fmt; use std::ops::Range; +use crate::binary::BinaryPatchParseError; use crate::patch::ParsePatchError; /// An error returned when parsing patches fails. @@ -76,6 +77,9 @@ pub(crate) enum PatchSetParseErrorKind { /// Invalid `diff --git` path. InvalidDiffGitPath, + + /// Binary patch parsing failed. + BinaryParse(BinaryPatchParseError), } impl fmt::Display for PatchSetParseErrorKind { @@ -89,6 +93,7 @@ impl fmt::Display for PatchSetParseErrorKind { Self::CreateMissingModifiedPath => write!(f, "create patch has no modified path"), Self::InvalidFileMode(mode) => write!(f, "invalid file mode: {mode}"), Self::InvalidDiffGitPath => write!(f, "invalid diff --git path"), + Self::BinaryParse(e) => write!(f, "{e}"), } } } @@ -98,3 +103,9 @@ impl From for PatchSetParseError { PatchSetParseErrorKind::Patch(e).into() } } + +impl From for PatchSetParseError { + fn from(e: BinaryPatchParseError) -> Self { + PatchSetParseErrorKind::BinaryParse(e).into() + } +} diff --git a/src/patch_set/mod.rs b/src/patch_set/mod.rs index ed3f68e..16ec280 100644 --- a/src/patch_set/mod.rs +++ b/src/patch_set/mod.rs @@ -11,6 +11,7 @@ mod tests; use std::borrow::Cow; use std::fmt; +use crate::binary::BinaryPatch; use crate::utils::Text; use crate::Patch; @@ -90,7 +91,7 @@ impl ParseOptions { /// * `diff --git` headers /// * Extended headers (`new file mode`, `deleted file mode`, etc.) /// * Rename/copy detection (`rename from`/`rename to`, `copy from`/`copy to`) - /// * Binary file detection (emitted a marker by defualt) + /// * Binary file detection /// /// [git-diff-format]: https://git-scm.com/docs/diff-format pub fn gitdiff() -> Self { @@ -133,7 +134,7 @@ pub enum PatchKind<'a, T: ToOwned + ?Sized> { /// Text patch with hunks. Text(Patch<'a, T>), /// Binary patch (literal or delta encoded, or marker-only). - Binary, + Binary(BinaryPatch<'a>), } impl std::fmt::Debug for PatchKind<'_, T> @@ -144,7 +145,7 @@ where fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { PatchKind::Text(patch) => f.debug_tuple("Text").field(patch).finish(), - PatchKind::Binary => f.write_str("Binary"), + PatchKind::Binary(patch) => f.debug_tuple("Binary").field(patch).finish(), } } } @@ -154,13 +155,21 @@ impl<'a, T: ToOwned + ?Sized> PatchKind<'a, T> { pub fn as_text(&self) -> Option<&Patch<'a, T>> { match self { PatchKind::Text(patch) => Some(patch), - PatchKind::Binary => None, + PatchKind::Binary(_) => None, + } + } + + /// Returns the binary patch, or `None` if this is a text patch. + pub fn as_binary(&self) -> Option<&BinaryPatch<'a>> { + match self { + PatchKind::Binary(patch) => Some(patch), + PatchKind::Text(_) => None, } } /// Returns `true` if this is a binary diff. pub fn is_binary(&self) -> bool { - matches!(self, PatchKind::Binary) + matches!(self, PatchKind::Binary(_)) } } @@ -209,12 +218,13 @@ impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> { fn new_binary( operation: FileOperation<'a, T>, + patch: BinaryPatch<'a>, old_mode: Option, new_mode: Option, ) -> Self { Self { operation, - kind: PatchKind::Binary, + kind: PatchKind::Binary(patch), old_mode, new_mode, } diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs index 08339da..e552e64 100644 --- a/src/patch_set/parse.rs +++ b/src/patch_set/parse.rs @@ -4,6 +4,7 @@ use super::{ error::PatchSetParseErrorKind, FileMode, FileOperation, FilePatch, Format, ParseOptions, PatchSetParseError, }; +use crate::binary::{parse_binary_patch, BinaryPatch}; use crate::patch::parse::parse_one; use crate::utils::{escaped_filename, Text}; use crate::Patch; @@ -227,8 +228,9 @@ fn next_gitdiff_patch<'a, T: Text + ?Sized>( let (header, header_consumed) = GitHeader::parse(remaining(ps)); ps.offset += header_consumed; - // Handle binary markers ("Binary files ... differ") and binary patches ("GIT binary patch") - if header.is_binary_marker || header.is_binary_patch { + // Handle "Binary files ... differ" (no patch data) + if header.is_binary_marker { + // FIXME: error spans point at `diff --git` line, not the specific offending line let operation = match extract_file_op_binary(&header, abs_patch_start) { Ok(op) => op, Err(e) => return Some(Err(e)), @@ -240,7 +242,45 @@ fn next_gitdiff_patch<'a, T: Text + ?Sized>( return Some(Err(e)); } }; - return Some(Ok(FilePatch::new_binary(operation, old_mode, new_mode))); + return Some(Ok(FilePatch::new_binary( + operation, + BinaryPatch::Marker, + old_mode, + new_mode, + ))); + } + + // Handle "GIT binary patch" (has patch data) + if let Some(binary_patch_start) = header.binary_patch_offset { + // GitHeader::parse consumed the marker line but not the payload. + // Use the recorded offset to pass input from the marker onward. + let (_, binary_input) = ps.input.split_at(abs_patch_start + binary_patch_start); + // Binary patch data is always ASCII (base85-encoded). + let binary_input = binary_input.as_str_prefix(); + let (binary_patch, consumed) = match parse_binary_patch(binary_input) { + Ok(result) => result, + Err(e) => return Some(Err(e.into())), + }; + ps.offset = abs_patch_start + binary_patch_start + consumed; + + // FIXME: error spans point at `diff --git` line, not the specific offending line + let operation = match extract_file_op_binary(&header, abs_patch_start) { + Ok(op) => op, + Err(e) => return Some(Err(e)), + }; + let (old_mode, new_mode) = match parse_file_modes(&header) { + Ok(modes) => modes, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + return Some(Ok(FilePatch::new_binary( + operation, + binary_patch, + old_mode, + new_mode, + ))); } // `git diff` output format is stricter. @@ -328,7 +368,8 @@ struct GitHeader<'a, T: ?Sized> { /// Binary files /dev/null and b/image.png differ /// ``` is_binary_marker: bool, - /// Whether this is a binary diff with actual patch content. + /// Byte offset of `"GIT binary patch"` line relative to header input, + /// or `None` if no binary patch content was found. /// /// Observed `git diff --binary` output: /// @@ -343,7 +384,7 @@ struct GitHeader<'a, T: ?Sized> { /// literal 0 /// KcmV+b0RR6000031 /// ``` - is_binary_patch: bool, + binary_patch_offset: Option, } impl Default for GitHeader<'_, T> { @@ -359,7 +400,7 @@ impl Default for GitHeader<'_, T> { new_file_mode: None, deleted_file_mode: None, is_binary_marker: false, - is_binary_patch: false, + binary_patch_offset: None, } } } @@ -410,7 +451,7 @@ impl<'a, T: Text + ?Sized> GitHeader<'a, T> { } else if trimmed.starts_with("Binary files ") { header.is_binary_marker = true; } else if trimmed.starts_with("GIT binary patch") { - header.is_binary_patch = true; + header.binary_patch_offset = Some(consumed); } else { // Unrecognized line: End of extended headers // (typically `---`/`+++`/`@@` or trailing content). diff --git a/src/patch_set/tests.rs b/src/patch_set/tests.rs index ee7f75b..d1ff880 100644 --- a/src/patch_set/tests.rs +++ b/src/patch_set/tests.rs @@ -752,7 +752,8 @@ index 0000000..e69de29 } #[test] - fn binary_emits_marker() { + fn binary_marker_kept_by_default() { + // Default is Keep: binary marker is returned as BinaryPatch::Marker. let input = "\ diff --git a/img.png b/img.png Binary files a/img.png and b/img.png differ @@ -765,9 +766,9 @@ diff --git a/foo b/foo "; let patches = parse_gitdiff(input); assert_eq!(patches.len(), 2); - assert!(patches[0].patch().is_binary()); + assert!(patches[0].patch().as_binary().is_some()); assert!(patches[0].operation().is_modify()); - assert!(!patches[1].patch().is_binary()); + assert!(patches[1].patch().as_text().is_some()); } } diff --git a/tests/compat/common.rs b/tests/compat/common.rs index 0993e2a..ad26fd6 100644 --- a/tests/compat/common.rs +++ b/tests/compat/common.rs @@ -8,7 +8,10 @@ use std::{ sync::Once, }; -use diffy::patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError}; +use diffy::{ + binary::BinaryPatch, + patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError}, +}; /// Which external tool to compare against. #[derive(Clone, Copy)] @@ -376,8 +379,12 @@ pub fn apply_diffy( } fs::write(&result_path, &result).unwrap(); } - PatchKind::Binary => { - // No patch data to apply — nothing to do. + PatchKind::Binary(BinaryPatch::Marker) => { + // Dont do anything if it is just a binary patch marker. + } + PatchKind::Binary(_) => { + // Binary patch application requires the `binary` feature. + // Will be wired up when that feature is added. } } } diff --git a/tests/replay.rs b/tests/replay.rs index c98eef1..e6fbab3 100644 --- a/tests/replay.rs +++ b/tests/replay.rs @@ -544,7 +544,9 @@ fn process_commit( ); } } - PatchKind::Binary => { + PatchKind::Binary(_) => { + // Binary patch application not yet wired up in replay tests. + // Will be done once the `binary` Cargo feature is added. skipped += 1; continue; } From e658c8e1a30c74eb95b2100b09752b544396a510 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Fri, 17 Apr 2026 10:43:30 -0400 Subject: [PATCH 09/12] refactor: clippy manual_div_ceil The API was stabilized in 1.73. The lint was added in 1.93. This is required for a MSRV bump to 1.75 --- src/diff/myers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/myers.rs b/src/diff/myers.rs index 73eb410..502d077 100644 --- a/src/diff/myers.rs +++ b/src/diff/myers.rs @@ -69,7 +69,7 @@ impl ::std::fmt::Display for Snake { fn max_d(len1: usize, len2: usize) -> usize { // XXX look into reducing the need to have the additional '+ 1' - (len1 + len2 + 1) / 2 + 1 + (len1 + len2).div_ceil(2) + 1 } // The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes some of which may From ff96dafb713ffea346e60b579a621bfe466ea1fb Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Fri, 17 Apr 2026 10:43:59 -0400 Subject: [PATCH 10/12] chore: dependency flate2 behind binary feature This is a preparation for binary diff application support. * Git binary patch is compressed by zlib hence flate2 * zlib-rs (which is the most performant zlib backend) requires MSRV 1.75.0+ hence the bump. --- .github/workflows/ci.yml | 2 +- Cargo.lock | 39 +++++++++++++++++++++++++++++++++++++++ Cargo.toml | 4 +++- deny.toml | 1 + 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f514b3..d0ca9b8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - rust: [stable, beta, nightly, 1.70.0] + rust: [stable, beta, nightly, 1.75.0] steps: - uses: actions/checkout@v6 diff --git a/Cargo.lock b/Cargo.lock index ad290e3..c726b1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "anstream" version = "0.6.21" @@ -109,6 +115,7 @@ name = "diffy" version = "0.4.2" dependencies = [ "anstyle", + "flate2", "rayon", "snapbox", ] @@ -152,6 +159,16 @@ dependencies = [ "libredox", ] +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "miniz_oxide", + "zlib-rs", +] + [[package]] name = "getrandom" version = "0.3.4" @@ -200,6 +217,16 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -281,6 +308,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "similar" version = "2.7.0" @@ -446,3 +479,9 @@ name = "wit-bindgen" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" + +[[package]] +name = "zlib-rs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" diff --git a/Cargo.toml b/Cargo.toml index 25c6e51..68ce3ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,14 +9,16 @@ repository = "https://github.com/bmwill/diffy" readme = "README.md" keywords = ["diff", "patch", "merge"] categories = ["text-processing"] -rust-version = "1.70.0" +rust-version = "1.75.0" edition = "2021" [features] +binary = ["dep:flate2"] color = ["dep:anstyle"] [dependencies] anstyle = { version = "1.0.13", optional = true } +flate2 = { version = "1.1.9", optional = true, default-features = false, features = ["zlib-rs"] } [dev-dependencies] rayon = "1.10.0" diff --git a/deny.toml b/deny.toml index d495b7c..740f35a 100644 --- a/deny.toml +++ b/deny.toml @@ -91,6 +91,7 @@ ignore = [ allow = [ "MIT", "Apache-2.0", + "Zlib", #"Apache-2.0 WITH LLVM-exception", ] # The confidence threshold for detecting a license from license text. From ecb0afb0fa8e1a08a2410980155036732ce2e538 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Fri, 17 Apr 2026 10:50:14 -0400 Subject: [PATCH 11/12] feat(binary): base85/delta decode and patch application * Add base85 encoder/decoder and Git delta format decoder. * Wire them into `BinaryPatch::apply() and `apply_reverse()` for decoding zlib-compressed, base85-encoded binary payload. These are feature-gated behind the `binary` feature. --- src/binary/base85.rs | 229 +++++++++++++++++++++++++++ src/binary/delta.rs | 362 +++++++++++++++++++++++++++++++++++++++++++ src/binary/mod.rs | 210 +++++++++++++++++++++++++ 3 files changed, 801 insertions(+) create mode 100644 src/binary/base85.rs create mode 100644 src/binary/delta.rs diff --git a/src/binary/base85.rs b/src/binary/base85.rs new file mode 100644 index 0000000..182efe8 --- /dev/null +++ b/src/binary/base85.rs @@ -0,0 +1,229 @@ +//! Base85 encoding and decoding using the character set defined in [RFC 1924]. +//! +//! ## References +//! +//! * [RFC 1924] +//! * [Wikipedia: Ascii85 § RFC 1924 version](https://en.wikipedia.org/wiki/Ascii85#RFC_1924_version) +//! +//! [RFC 1924]: https://datatracker.ietf.org/doc/html/rfc1924 + +use std::fmt; + +/// Base85 character set (RFC 1924). +const ALPHABET: &[u8; 85] = b"0123456789\ + ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + !#$%&()*+-;<=>?@^_`{|}~"; + +/// Pre-computed lookup table for Base85 decoding. +/// +/// Maps ASCII byte value → digit value or `0xFF` for invalid characters. +/// This provides O(1) lookup. +const TABLE: [u8; 256] = { + let mut table = [0xFFu8; 256]; + let mut i = 0usize; + while i < 85 { + table[ALPHABET[i] as usize] = i as u8; + i += 1; + } + table +}; + +/// Error type for Base85 operations. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Base85Error { + /// Invalid character that is not in RFC 1924 alphabet. + InvalidCharacter(char), + /// Invalid input length for the operation. + InvalidLength, +} + +impl fmt::Display for Base85Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Base85Error::InvalidCharacter(c) => write!(f, "invalid base85 character: {:?}", c), + Base85Error::InvalidLength => write!(f, "invalid input length"), + } + } +} + +impl std::error::Error for Base85Error {} + +/// Decodes a Base85 string to the provided output. +/// +/// ## Limitations +/// +/// The input length must be a multiple of 5. +/// +/// This function does not handle padding for partial chunks. +/// When decoding data where the original byte count isn't a multiple of 4, +/// callers must handle truncation at a higher level. +/// For example, via a length indicator in Git binary patch. +pub fn decode_into(input: &str, output: &mut impl Extend) -> Result<(), Base85Error> { + let bytes = input.as_bytes(); + + if bytes.len() % 5 != 0 { + return Err(Base85Error::InvalidLength); + } + + // TODO: Use `as_chunks::<5>()` when MSRV >= 1.88 + for chunk in bytes.chunks_exact(5) { + let mut value: u32 = 0; + for &byte in chunk { + let digit = TABLE[byte as usize]; + if digit == 0xFF { + return Err(Base85Error::InvalidCharacter(byte as char)); + } + value = value * 85 + digit as u32; + } + + output.extend(value.to_be_bytes()); + } + + Ok(()) +} + +/// Encodes bytes in Base85 to the provided output. +/// +/// ## Limitations +/// +/// The input length must be a multiple of 4. +/// +/// This function does not handle padding for partial chunks. +/// Callers encoding data where the byte count isn't a multiple of 4 +/// must handle padding at a higher level. +/// For example, via a length indicator in Git binary patch format. +#[allow(dead_code)] // will be used for patch formatting +pub fn encode_into(input: &[u8], output: &mut impl Extend) -> Result<(), Base85Error> { + if input.len() % 4 != 0 { + return Err(Base85Error::InvalidLength); + } + + // TODO: Use `as_chunks::<4>()` when MSRV >= 1.88 + for chunk in input.chunks_exact(4) { + let mut value = u32::from_be_bytes(chunk.try_into().unwrap()); + + // Extract 5 base85 digits (least to most significant order) + let mut digits = [0u8; 5]; + for digit in digits.iter_mut().rev() { + *digit = ALPHABET[(value % 85) as usize]; + value /= 85; + } + output.extend(digits.iter().map(|&b| b as char)); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn decode(input: &str) -> Result, Base85Error> { + let mut result = Vec::with_capacity((input.len() / 5) * 4); + decode_into(input, &mut result)?; + Ok(result) + } + + fn encode(input: &[u8]) -> Result { + let mut result = String::with_capacity((input.len() / 4) * 5); + encode_into(input, &mut result)?; + Ok(result) + } + + const TEST_VECTORS: &[(&[u8], &str)] = &[ + (b"", ""), + (&[0x00, 0x00, 0x00, 0x00], "00000"), + (&[0xff, 0xff, 0xff, 0xff], "|NsC0"), + // Rust ecosystem phrases + (b"Rust", "Qgw55"), + (b"Fearless concurrency", "MrC1gY-MwEAY*TCV|8+JWo~16"), + (b"memory safe!", "ZDnn5a(N(gVP<6^"), + (b"blazing fast", "Vr*f0X>MmAW?^%5"), + ( + b"zero-cost abstraction!??", + "dS!BNEn{zUbRc13b98cHV{~b6ZXrKE", + ), + ]; + + #[test] + fn table_covers_all_alphabet_chars() { + for (i, &c) in ALPHABET.iter().enumerate() { + assert_eq!( + TABLE[c as usize], i as u8, + "mismatch for char '{}' at index {}", + c as char, i + ); + } + } + + #[test] + fn table_rejects_invalid_chars() { + let invalid_chars = b" \t\n\r\"'\\[],:"; + for &c in invalid_chars { + assert_eq!( + TABLE[c as usize], 0xFF, + "char '{}' should be invalid", + c as char + ); + } + } + + #[test] + fn decode_test_vectors() { + for (bytes, encoded) in TEST_VECTORS { + let result = decode(encoded).unwrap(); + assert_eq!(&result, *bytes, "decode({:?}) failed", encoded); + } + } + + #[test] + fn encode_test_vectors() { + for (bytes, encoded) in TEST_VECTORS { + let result = encode(bytes).unwrap(); + assert_eq!(result, *encoded, "encode({:?}) failed", bytes); + } + } + + #[test] + fn decode_invalid_length() { + assert!(matches!(decode("0000"), Err(Base85Error::InvalidLength))); + assert!(matches!(decode("000"), Err(Base85Error::InvalidLength))); + assert!(matches!(decode("00"), Err(Base85Error::InvalidLength))); + assert!(matches!(decode("0"), Err(Base85Error::InvalidLength))); + } + + #[test] + fn decode_invalid_character() { + assert!(matches!( + decode("0000 "), + Err(Base85Error::InvalidCharacter(' ')) + )); + assert!(matches!( + decode("0000\""), + Err(Base85Error::InvalidCharacter('"')) + )); + } + + #[test] + fn encode_invalid_length() { + assert!(matches!(encode(&[0]), Err(Base85Error::InvalidLength))); + assert!(matches!(encode(&[0, 0]), Err(Base85Error::InvalidLength))); + assert!(matches!( + encode(&[0, 0, 0]), + Err(Base85Error::InvalidLength) + )); + assert!(matches!( + encode(&[0, 0, 0, 0, 0]), + Err(Base85Error::InvalidLength) + )); + } + + #[test] + fn round_trip() { + for (bytes, _) in TEST_VECTORS { + let encoded = encode(bytes).unwrap(); + let decoded = decode(&encoded).unwrap(); + assert_eq!(&decoded, *bytes, "round-trip failed for {:?}", bytes); + } + } +} diff --git a/src/binary/delta.rs b/src/binary/delta.rs new file mode 100644 index 0000000..fa1de26 --- /dev/null +++ b/src/binary/delta.rs @@ -0,0 +1,362 @@ +//! Git delta binary diff support. +//! +//! A delta payload contains: +//! +//! 1. Header: variable-length encoded sizes (original_size, modified_size) +//! 2. Instructions: sequence of `ADD` and `COPY` operations +//! +//! Based on Diffx's [Git Delta Binary Diffs](https://diffx.org/spec/binary-diffs.html#git-delta-binary-diffs) + +use std::fmt; + +/// Applies delta instructions to an original file, producing the modified file. +pub fn apply(original: &[u8], delta: &[u8]) -> Result, DeltaError> { + let mut cursor = DeltaCursor::new(delta); + + let header_orig_size = cursor.read_size()?; + let header_mod_size = cursor.read_size()?; + + // Validate original size + if original.len() as u64 != header_orig_size { + return Err(DeltaError::OriginalSizeMismatch { + expected: header_orig_size, + actual: original.len() as u64, + }); + } + + let mut result = Vec::with_capacity(header_mod_size as usize); + + // Process instructions until we've consumed all delta data + while !cursor.is_empty() { + let control = cursor.read_byte()?; + + if control & 0x80 != 0 { + // COPY instruction + let (src_offset, copy_len) = cursor.read_copy_params(control)?; + let src_end = src_offset + .checked_add(copy_len) + .ok_or(DeltaError::InvalidCopyRange)?; + + if src_end > original.len() { + return Err(DeltaError::CopyOutOfBounds { + offset: src_offset, + length: copy_len, + original_size: original.len(), + }); + } + + result.extend_from_slice(&original[src_offset..src_end]); + } else { + // ADD instruction + let add_len = control as usize; + let data = cursor.read_bytes(add_len)?; + result.extend_from_slice(data); + } + } + + // Validate result size + if result.len() as u64 != header_mod_size { + return Err(DeltaError::ModifiedSizeMismatch { + expected: header_mod_size, + actual: result.len() as u64, + }); + } + + Ok(result) +} + +/// Cursor for reading delta instructions. +struct DeltaCursor<'a> { + data: &'a [u8], + offset: usize, +} + +impl<'a> DeltaCursor<'a> { + fn new(data: &'a [u8]) -> Self { + Self { data, offset: 0 } + } + + fn is_empty(&self) -> bool { + self.offset >= self.data.len() + } + + fn read_byte(&mut self) -> Result { + if self.offset >= self.data.len() { + return Err(DeltaError::UnexpectedEof); + } + let byte = self.data[self.offset]; + self.offset += 1; + Ok(byte) + } + + fn read_bytes(&mut self, len: usize) -> Result<&'a [u8], DeltaError> { + let end = self + .offset + .checked_add(len) + .ok_or(DeltaError::UnexpectedEof)?; + if end > self.data.len() { + return Err(DeltaError::UnexpectedEof); + } + let bytes = &self.data[self.offset..end]; + self.offset = end; + Ok(bytes) + } + + /// Reads a variable-length encoded size from the header. + /// + /// Format: each byte uses 7 bits for value, MSB indicates continuation. + /// Bytes are in little-endian order (LSB first). + fn read_size(&mut self) -> Result { + let mut file_len: u64 = 0; + let mut shift: u32 = 0; + + loop { + let byte = self.read_byte()?; + + // Add 7 bits of value at current shift position + let value = (byte & 0x7F) as u64; + file_len |= value.checked_shl(shift).ok_or(DeltaError::SizeOverflow)?; + + // MSB clear means this is the last byte + if byte & 0x80 == 0 { + break; + } + + shift += 7; + } + + Ok(file_len) + } + + /// Reads `COPY` instruction parameters from the control byte. + /// Returns `(src_offset, copy_len)`. + /// + /// Control byte format is `1oooosss`: + /// + /// * Bits 0-3: src_offset bytes + /// * Bits 4-6: copy_len bytes + fn read_copy_params(&mut self, control: u8) -> Result<(usize, usize), DeltaError> { + let mut src_offset: u32 = 0; + for (mask, shift) in [(0x01, 0), (0x02, 8), (0x04, 16), (0x08, 24)] { + if control & mask != 0 { + let byte = self.read_byte()? as u32; + src_offset |= byte.checked_shl(shift).ok_or(DeltaError::SizeOverflow)?; + } + } + + let mut copy_len: u32 = 0; + for (mask, shift) in [(0x10, 0), (0x20, 8), (0x40, 16)] { + if control & mask != 0 { + let byte = self.read_byte()? as u32; + copy_len |= byte.checked_shl(shift).ok_or(DeltaError::SizeOverflow)?; + } + } + + if copy_len == 0 { + // Size of 0 means 65536 + copy_len = 0x10000; + } + + Ok((src_offset as usize, copy_len as usize)) + } +} + +/// Error type for delta operations. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DeltaError { + /// Unexpected end of delta data. + UnexpectedEof, + /// Size value overflowed during decoding. + SizeOverflow, + /// Original file size doesn't match header. + OriginalSizeMismatch { expected: u64, actual: u64 }, + /// Modified file size doesn't match header. + ModifiedSizeMismatch { expected: u64, actual: u64 }, + /// COPY instruction references out-of-bounds data. + CopyOutOfBounds { + offset: usize, + length: usize, + original_size: usize, + }, + /// COPY range calculation overflowed. + InvalidCopyRange, +} + +impl fmt::Display for DeltaError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + DeltaError::UnexpectedEof => write!(f, "unexpected end of delta data"), + DeltaError::SizeOverflow => write!(f, "size value overflow"), + DeltaError::OriginalSizeMismatch { expected, actual } => { + write!( + f, + "original size mismatch: expected {expected}, got {actual}" + ) + } + DeltaError::ModifiedSizeMismatch { expected, actual } => { + write!( + f, + "modified size mismatch: expected {expected}, got {actual}" + ) + } + DeltaError::CopyOutOfBounds { + offset, + length, + original_size, + } => { + write!( + f, + "copy out of bounds: offset={offset}, length={length}, original_size={original_size}" + ) + } + DeltaError::InvalidCopyRange => write!(f, "copy range calculation overflow"), + } + } +} + +impl std::error::Error for DeltaError {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn read_size_single_byte() { + // 0x0A = 10, MSB clear = end + let data = [0x0A]; + let mut cursor = DeltaCursor::new(&data); + assert_eq!(cursor.read_size().unwrap(), 10); + } + + #[test] + fn read_size_multi_byte() { + // 0x80 | 0x01 = 1, continue; 0x02 = 2 << 7 = 256; total = 257 + let data = [0x81, 0x02]; + let mut cursor = DeltaCursor::new(&data); + assert_eq!(cursor.read_size().unwrap(), 1 + (2 << 7)); + } + + #[test] + fn apply_add_only() { + // Header: orig_size=0, mod_size=5 + // ADD 5 bytes: "hello" + let delta = [ + 0x00, // orig_size = 0 + 0x05, // mod_size = 5 + 0x05, // ADD 5 bytes + b'h', b'e', b'l', b'l', b'o', + ]; + let result = apply(&[], &delta).unwrap(); + assert_eq!(result, b"hello"); + } + + #[test] + fn apply_copy_only() { + // Header: orig_size=5, mod_size=5 + // COPY offset=0, len=5 + let delta = [ + 0x05, // orig_size = 5 + 0x05, // mod_size = 5 + 0x90, // COPY: control=0x90 (0x80 | 0x10), offset=0, size byte present + 0x05, // size = 5 + ]; + let original = b"hello"; + let result = apply(original, &delta).unwrap(); + assert_eq!(result, b"hello"); + } + + #[test] + fn apply_copy_with_offset() { + // Header: orig_size=10, mod_size=5 + // COPY offset=5, len=5 + let delta = [ + 0x0A, // orig_size = 10 + 0x05, // mod_size = 5 + 0x91, // COPY: 0x80 | 0x10 | 0x01 (offset1 + size1 present) + 0x05, // offset = 5 + 0x05, // size = 5 + ]; + let original = b"helloworld"; + let result = apply(original, &delta).unwrap(); + assert_eq!(result, b"world"); + } + + #[test] + fn apply_mixed_instructions() { + // Create "HELLO world" from "hello world" + // Header: orig_size=11, mod_size=11 + // ADD 5: "HELLO" + // COPY offset=5, len=6: " world" + let delta = [ + 0x0B, // orig_size = 11 + 0x0B, // mod_size = 11 + 0x05, // ADD 5 bytes + b'H', b'E', b'L', b'L', b'O', // "HELLO" + 0x91, // COPY: offset1 + size1 present + 0x05, // offset = 5 + 0x06, // size = 6 + ]; + let original = b"hello world"; + let result = apply(original, &delta).unwrap(); + assert_eq!(result, b"HELLO world"); + } + + #[test] + fn apply_copy_size_zero_means_65536() { + // When size bytes result in 0, it means 65536 + // Header: orig_size=65536, mod_size=65536 + // COPY offset=0, len=65536 (encoded as 0) + let original = vec![0xAB; 65536]; + let delta = [ + 0x80, 0x80, 0x04, // orig_size = 65536 (varint) + 0x80, 0x80, 0x04, // mod_size = 65536 (varint) + 0x80, // COPY: no offset bytes, no size bytes = offset 0, size 65536 + ]; + let result = apply(&original, &delta).unwrap(); + assert_eq!(result.len(), 65536); + assert_eq!(result, original); + } + + #[test] + fn error_original_size_mismatch() { + let delta = [ + 0x0A, // orig_size = 10 + 0x05, // mod_size = 5 + ]; + let original = b"short"; // only 5 bytes + let err = apply(original, &delta).unwrap_err(); + assert!(matches!( + err, + DeltaError::OriginalSizeMismatch { + expected: 10, + actual: 5 + } + )); + } + + #[test] + fn error_copy_out_of_bounds() { + let delta = [ + 0x05, // orig_size = 5 + 0x05, // mod_size = 5 + 0x91, // COPY + 0x0A, // offset = 10 (out of bounds!) + 0x05, // size = 5 + ]; + let original = b"hello"; + let err = apply(original, &delta).unwrap_err(); + assert!(matches!(err, DeltaError::CopyOutOfBounds { .. })); + } + + #[test] + fn error_unexpected_eof_in_add() { + let delta = [ + 0x00, // orig_size = 0 + 0x05, // mod_size = 5 + 0x05, // ADD 5 bytes + b'h', b'i', // only 2 bytes provided + ]; + let err = apply(&[], &delta).unwrap_err(); + assert_eq!(err, DeltaError::UnexpectedEof); + } +} diff --git a/src/binary/mod.rs b/src/binary/mod.rs index 696f0d6..c1c7ab4 100644 --- a/src/binary/mod.rs +++ b/src/binary/mod.rs @@ -5,6 +5,11 @@ //! //! Based on [DiffX Binary Diffs specification](https://diffx.org/spec/binary-diffs.html). +#[cfg(feature = "binary")] +mod base85; +#[cfg(feature = "binary")] +mod delta; + use std::{fmt, ops::Range}; /// The type of a binary patch block. @@ -54,9 +59,69 @@ pub enum BinaryPatch<'a> { /// /// This represents the `Binary files a/path and b/path differ` case, /// where git detected a binary change but didn't include the actual data. + /// + /// Calling [`apply()`](Self::apply) on this variant returns an error. Marker, } +impl<'a> BinaryPatch<'a> { + /// Applies a binary patch forward: original -> modified. + /// + /// - If the forward block is `Literal`: returns the decoded content directly. + /// - If the forward block is `Delta`: applies delta instructions to `original`. + /// + /// Unlike `git apply`, this doesn't validate the original content hash. + #[cfg(feature = "binary")] + pub fn apply(&self, original: &[u8]) -> Result, BinaryPatchParseError> { + match self { + BinaryPatch::Full { forward, .. } => Self::apply_block(forward, original), + BinaryPatch::Marker => Err(BinaryPatchParseErrorKind::NoBinaryData.into()), + } + } + + /// Applies a binary patch in reverse: modified -> original. + /// + /// - If the reverse block is `Literal`: returns the decoded content directly. + /// - If the reverse block is `Delta`: applies delta instructions to `modified`. + /// + /// Unlike `git apply`, this doesn't validate the modified content hash. + #[cfg(feature = "binary")] + pub fn apply_reverse(&self, modified: &[u8]) -> Result, BinaryPatchParseError> { + match self { + BinaryPatch::Full { reverse, .. } => Self::apply_block(reverse, modified), + BinaryPatch::Marker => Err(BinaryPatchParseErrorKind::NoBinaryData.into()), + } + } + + /// Applies a single block (either literal or delta). + #[cfg(feature = "binary")] + fn apply_block(block: &BinaryBlock<'_>, base: &[u8]) -> Result, BinaryPatchParseError> { + match block.kind { + BinaryBlockKind::Literal => Self::decode_data(&block.data), + BinaryBlockKind::Delta => { + let delta_instructions = Self::decode_data(&block.data)?; + delta::apply(base, &delta_instructions).map_err(BinaryPatchParseError::from) + } + } + } + + /// See [Decoding Logic](https://diffx.org/spec/binary-diffs.html#decoding-logic) + #[cfg(feature = "binary")] + fn decode_data(binary_data: &BinaryData<'_>) -> Result, BinaryPatchParseError> { + use std::io::Read; + + let compressed = decode_base85_lines(binary_data.data)?; + + let mut decoder = flate2::read::ZlibDecoder::new(&compressed[..]); + let mut decompressed = Vec::new(); + decoder + .read_to_end(&mut decompressed) + .map_err(|e| BinaryPatchParseErrorKind::DecompressionFailed(e.to_string()))?; + + Ok(decompressed) + } +} + /// Represents a single binary payload in a Git binary diff. /// /// For example, the following patch block @@ -115,6 +180,20 @@ impl fmt::Display for BinaryPatchParseError { impl std::error::Error for BinaryPatchParseError {} +#[cfg(feature = "binary")] +impl From for BinaryPatchParseError { + fn from(e: base85::Base85Error) -> Self { + BinaryPatchParseErrorKind::Base85(e).into() + } +} + +#[cfg(feature = "binary")] +impl From for BinaryPatchParseError { + fn from(e: delta::DeltaError) -> Self { + BinaryPatchParseErrorKind::Delta(e).into() + } +} + impl From for BinaryPatchParseError { fn from(kind: BinaryPatchParseErrorKind) -> Self { Self { kind, span: None } @@ -126,6 +205,8 @@ impl From for BinaryPatchParseError { #[non_exhaustive] pub(crate) enum BinaryPatchParseErrorKind { /// Missing or invalid "GIT binary patch" header. + // TODO: Switch to #[expect(dead_code)] when MSRV >= 1.81 + #[cfg_attr(not(feature = "binary"), allow(dead_code))] InvalidHeader, /// First binary block (forward) not found. @@ -135,10 +216,26 @@ pub(crate) enum BinaryPatchParseErrorKind { MissingReverseBlock, /// No binary data available (marker-only patch). + // TODO: Switch to #[expect(dead_code)] when MSRV >= 1.81 + #[cfg_attr(not(feature = "binary"), allow(dead_code))] NoBinaryData, /// Invalid line length indicator in Base85 data. + // TODO: Switch to #[expect(dead_code)] when MSRV >= 1.81 + #[cfg_attr(not(feature = "binary"), allow(dead_code))] InvalidLineLengthIndicator, + + /// Base85 decoding failed. + #[cfg(feature = "binary")] + Base85(base85::Base85Error), + + /// Delta application failed. + #[cfg(feature = "binary")] + Delta(delta::DeltaError), + + /// Zlib decompression failed. + #[cfg(feature = "binary")] + DecompressionFailed(String), } impl fmt::Display for BinaryPatchParseErrorKind { @@ -149,6 +246,12 @@ impl fmt::Display for BinaryPatchParseErrorKind { Self::MissingReverseBlock => write!(f, "second binary block not found"), Self::NoBinaryData => write!(f, "no binary data available"), Self::InvalidLineLengthIndicator => write!(f, "invalid line length indicator"), + #[cfg(feature = "binary")] + Self::Base85(e) => write!(f, "{e}"), + #[cfg(feature = "binary")] + Self::Delta(e) => write!(f, "{e}"), + #[cfg(feature = "binary")] + Self::DecompressionFailed(msg) => write!(f, "decompression failed: {msg}"), } } } @@ -273,6 +376,62 @@ fn parse_binary_block<'a>(parser: &mut BinaryParser<'a>) -> Option` +/// +/// From [5.1.1. Binary Payloads](https://diffx.org/spec/binary-diffs.html#binary-payloads): +/// +/// > Each line represents up to 52 bytes of pre-encoded data. +/// > There may be an unlimited number of lines. +/// > They contain the following fields: +/// > +/// > * `len_c` is a line length character. +/// > This encodes the length of the (pre-encoded) data written on this line. +/// > * `data` is Base85-encoded data for this line. +#[cfg(feature = "binary")] +fn decode_base85_lines(data: &str) -> Result, BinaryPatchParseError> { + // A rough estimate: In Base85, 5 chars -> 4 bytes + let mut result = Vec::with_capacity(data.len() * 4 / 5); + + for line in data.lines() { + if line.is_empty() { + continue; + } + + let line_bytes = line.as_bytes(); + + let length = decode_line_length(line_bytes[0]) + .ok_or(BinaryPatchParseErrorKind::InvalidLineLengthIndicator)?; + let encoded = &line[1..]; + let start = result.len(); + base85::decode_into(encoded, &mut result)?; + result.truncate(start + length); + } + + Ok(result) +} + +/// Decodes a line length character to its numeric value. +/// +/// From [Line Length Characters](https://diffx.org/spec/binary-diffs.html#line-length-characters): +/// +/// > Each encoded line in a binary diff payload is prefixed by a line length character. +/// > This encodes the length of the compressed (but not encoded) data for the line. +/// > +/// > Line length characters always represent a value between 1 and 52: +/// > +/// > * A value of A-Z represents a number between 1..26. +/// > * A value of a-z represents a number between 27..52. +#[cfg(feature = "binary")] +fn decode_line_length(c: u8) -> Option { + match c { + b'A'..=b'Z' => Some((c - b'A' + 1) as usize), + b'a'..=b'z' => Some((c - b'a' + 27) as usize), + _ => None, + } +} + #[cfg(test)] mod tests { use super::*; @@ -350,3 +509,54 @@ mod tests { } } } + +#[cfg(test)] +#[cfg(feature = "binary")] +mod apply_tests { + use super::*; + + #[test] + fn decode_line_length_uppercase() { + assert_eq!(decode_line_length(b'A'), Some(1)); + assert_eq!(decode_line_length(b'B'), Some(2)); + assert_eq!(decode_line_length(b'Z'), Some(26)); + } + + #[test] + fn decode_line_length_lowercase() { + assert_eq!(decode_line_length(b'a'), Some(27)); + assert_eq!(decode_line_length(b'b'), Some(28)); + assert_eq!(decode_line_length(b'z'), Some(52)); + } + + #[test] + fn decode_line_length_invalid() { + assert_eq!(decode_line_length(b'0'), None); + assert_eq!(decode_line_length(b'!'), None); + assert_eq!(decode_line_length(b' '), None); + } + + #[test] + fn apply_literal_patch() { + let input = "GIT binary patch\nliteral 10\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\n\nliteral 0\nKcmV+b0RR6000031\n\n"; + let (patch, _) = parse_binary_patch(input).unwrap(); + + let modified = patch.apply(&[]).unwrap(); + assert_eq!(modified.len(), 10); + assert_eq!(modified, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + + let original = patch.apply_reverse(&[]).unwrap(); + assert_eq!(original.len(), 0); + } + + #[test] + fn apply_with_crlf_line_endings() { + let input = "GIT binary patch\r\nliteral 10\r\nUcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k\r\n\r\nliteral 0\r\nKcmV+b0RR6000031\r\n\r\n"; + let (patch, _) = parse_binary_patch(input).unwrap(); + + let modified = patch.apply(&[]).unwrap(); + assert_eq!(modified, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + let original = patch.apply_reverse(&[]).unwrap(); + assert_eq!(original.len(), 0); + } +} From a2190d2b89242efd9c4a46fc4f151ee5df7d28c8 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Fri, 17 Apr 2026 11:01:07 -0400 Subject: [PATCH 12/12] test: add binary patch to compat and replay tests Now both tests require `binary` Cargo feature. --- .github/workflows/ci.yml | 1 + .github/workflows/replay.yml | 2 +- Cargo.toml | 8 ++ tests/compat/common.rs | 45 +++++---- .../git/binary_and_text_mixed/in/foo.patch | 16 ++++ .../git/binary_and_text_mixed/in/image.png | 2 + .../git/binary_and_text_mixed/in/text.txt | 1 + .../git/binary_and_text_mixed/out/image.png | Bin 0 -> 10 bytes .../git/binary_and_text_mixed/out/text.txt | 1 + tests/compat/git/binary_delta/in/foo.patch | 9 ++ tests/compat/git/binary_delta/in/large.bin | Bin 0 -> 5120 bytes tests/compat/git/binary_delta/out/large.bin | Bin 0 -> 5120 bytes .../binary_delta_wrong_original/in/foo.patch | 9 ++ .../binary_delta_wrong_original/in/large.bin | 1 + tests/compat/git/binary_literal/in/foo.patch | 10 ++ tests/compat/git/binary_literal/out/small.bin | Bin 0 -> 10 bytes .../binary_literal_wrong_original/in/file.bin | 1 + .../in/foo.patch | 8 ++ .../out/file.bin | Bin 0 -> 10 bytes .../binary_mixed_delta_literal/in/favicon.png | Bin 0 -> 2919 bytes .../binary_mixed_delta_literal/in/foo.patch | 86 ++++++++++++++++++ .../out/favicon.png | Bin 0 -> 1125 bytes tests/compat/git/mod.rs | 69 ++++++++++++++ tests/compat/main.rs | 10 +- tests/replay.rs | 63 ++++++++++--- 25 files changed, 306 insertions(+), 36 deletions(-) create mode 100644 tests/compat/git/binary_and_text_mixed/in/foo.patch create mode 100644 tests/compat/git/binary_and_text_mixed/in/image.png create mode 100644 tests/compat/git/binary_and_text_mixed/in/text.txt create mode 100644 tests/compat/git/binary_and_text_mixed/out/image.png create mode 100644 tests/compat/git/binary_and_text_mixed/out/text.txt create mode 100644 tests/compat/git/binary_delta/in/foo.patch create mode 100644 tests/compat/git/binary_delta/in/large.bin create mode 100644 tests/compat/git/binary_delta/out/large.bin create mode 100644 tests/compat/git/binary_delta_wrong_original/in/foo.patch create mode 100644 tests/compat/git/binary_delta_wrong_original/in/large.bin create mode 100644 tests/compat/git/binary_literal/in/foo.patch create mode 100644 tests/compat/git/binary_literal/out/small.bin create mode 100644 tests/compat/git/binary_literal_wrong_original/in/file.bin create mode 100644 tests/compat/git/binary_literal_wrong_original/in/foo.patch create mode 100644 tests/compat/git/binary_literal_wrong_original/out/file.bin create mode 100644 tests/compat/git/binary_mixed_delta_literal/in/favicon.png create mode 100644 tests/compat/git/binary_mixed_delta_literal/in/foo.patch create mode 100644 tests/compat/git/binary_mixed_delta_literal/out/favicon.png diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d0ca9b8..481f878 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,6 +30,7 @@ jobs: - run: rustup toolchain install ${{ matrix.rust }} --profile minimal - run: cargo +${{ matrix.rust }} check --all-targets --all-features - run: cargo +${{ matrix.rust }} test + - run: cargo +${{ matrix.rust }} test -F binary lint: runs-on: ubuntu-latest diff --git a/.github/workflows/replay.yml b/.github/workflows/replay.yml index 08384d6..48368fc 100644 --- a/.github/workflows/replay.yml +++ b/.github/workflows/replay.yml @@ -67,7 +67,7 @@ jobs: exit 1 fi - run: rustup toolchain install stable --profile minimal - - run: cargo test --release --test replay -- --ignored --nocapture + - run: cargo test --release --test replay -F binary -- --ignored --nocapture env: DIFFY_TEST_REPO: ${{ inputs.repo_url == '' && '.' || 'target/test-repo' }} DIFFY_TEST_COMMITS: ${{ inputs.commits }} diff --git a/Cargo.toml b/Cargo.toml index 68ce3ff..125a3c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,3 +27,11 @@ snapbox = { version = "0.6.24", features = ["dir"] } [[example]] name = "patch_formatter" required-features = ["color"] + +[[test]] +name = "compat" +required-features = ["binary"] + +[[test]] +name = "replay" +required-features = ["binary"] diff --git a/tests/compat/common.rs b/tests/compat/common.rs index ad26fd6..40d2625 100644 --- a/tests/compat/common.rs +++ b/tests/compat/common.rs @@ -9,7 +9,7 @@ use std::{ }; use diffy::{ - binary::BinaryPatch, + binary::{BinaryPatch, BinaryPatchParseError}, patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet, PatchSetParseError}, }; @@ -263,7 +263,7 @@ fn print_patch_version() { pub enum TestError { Parse(PatchSetParseError), Apply(diffy::ApplyError), - Io(std::io::Error), + Binary(BinaryPatchParseError), } impl std::fmt::Display for TestError { @@ -271,7 +271,7 @@ impl std::fmt::Display for TestError { match self { TestError::Parse(e) => write!(f, "parse error: {e}"), TestError::Apply(e) => write!(f, "apply error: {e}"), - TestError::Io(e) => write!(f, "io error: {e}"), + TestError::Binary(e) => write!(f, "binary patch error: {e}"), } } } @@ -362,29 +362,40 @@ pub fn apply_diffy( } }; + let read_original = || { + if let Some(name) = original_name { + let original_path = in_dir.join(bytes_to_path(name)); + fs::read(&original_path).unwrap_or_default() + } else { + Vec::new() + } + }; + + let write_modified = |result: &[u8]| { + let result_path = output_dir.join(bytes_to_path(target_name)); + if let Some(parent) = result_path.parent() { + fs::create_dir_all(parent).unwrap(); + } + fs::write(&result_path, result).unwrap(); + }; + match file_patch.patch() { PatchKind::Text(patch) => { - let original = if let Some(name) = original_name { - let original_path = in_dir.join(bytes_to_path(name)); - fs::read(&original_path).map_err(TestError::Io)? - } else { - Vec::new() - }; + let original = read_original(); let result = diffy::apply_bytes(&original, patch).map_err(TestError::Apply)?; - let result_path = output_dir.join(bytes_to_path(target_name)); - if let Some(parent) = result_path.parent() { - fs::create_dir_all(parent).unwrap(); - } - fs::write(&result_path, &result).unwrap(); + write_modified(&result); } PatchKind::Binary(BinaryPatch::Marker) => { // Dont do anything if it is just a binary patch marker. } - PatchKind::Binary(_) => { - // Binary patch application requires the `binary` feature. - // Will be wired up when that feature is added. + PatchKind::Binary(patch) => { + let original = read_original(); + + let result = patch.apply(&original).map_err(TestError::Binary)?; + + write_modified(&result); } } } diff --git a/tests/compat/git/binary_and_text_mixed/in/foo.patch b/tests/compat/git/binary_and_text_mixed/in/foo.patch new file mode 100644 index 0000000..9ae73cb --- /dev/null +++ b/tests/compat/git/binary_and_text_mixed/in/foo.patch @@ -0,0 +1,16 @@ +diff --git a/image.png b/image.png +index 0dd1608e45a9c4d35bfc1e6f266a796364aa8754..bccac03558b00545e7ea8ced4a3a1ee232cc185a 100644 +GIT binary patch +literal 10 +UcmV+l0QLWgP)&C2V_V*#WK--S zBVwa$Qc>CVe~2gp%FIc0GITmkOWRdRQEMmVnHk@)n0}jy)YRZ2E>S4v)yQrWOqy%C zdgGx1O3NTlf2Us8A}W>x>EtwIF$OTi6@1sv;(4 zSerdbxr+=xZ*CHL1$$Y~T$p4Ql#onXq!jP!-SBRm?3{^{)&p}f(rnh>tgKb z0#OkH>rALj017{~{J53G%CxL`Dy*DE6{&tSlxro?3Fc|vT#w|asfzO_Ul0Z*R^kdE#G>fnm!7AlT4LsQiQA1JQJ$~VH7UW}xT0Ar zR{}HtO#4SxnK5{FQyL3u3R?X$C5`2p`SRH;FOE28UQeuoI4mJxbsBUgASe$R4tsvVn630le?D zMN5q`%O)Rccq~)bw>wPs!fx~VfZcbPP?U9f!pLQO-(_^If@v&kSR@~n4asb zmXL$%jpwQhAcB3){9)QPqT59E_@{H|Pig%{_3x0M6)!E_HFJ|=>M~Gs zQCwuqC7jz*s7kk{zjyQJVA)-#xX>`FoNN*ekORq?UJHH;BJBw3Mf;-h6UTuqy@Eqi zF?|buaQ}k5X>co?%e8nRuBJG-KO+wWbx5OFfGB-)y7VPv1|VYGJk4IMbQ@`b_{z{2 z`}fZoQxe{^$-dg+i{+4y5iPaFmsgx3D&Q7+7FiYU1Pk1#gOJ)x8yav9>4S>u7S0l` z-Vx_v@G{J@^6tq(J(ec$wJ^ZJ1WV$gcCds}_O%9%zav3@9T?o3Q5o`OJ&rv|J_0ZQ z^DS}&B2Gsod;)y2sZM-i5~?~@(3Uh_>qFkF@Uyf`Mno%hxhc$nG$xVyFc zUec{06WnI{V8}@7YU~_h+{b3%Vy`4C>QJo1mF31Os_gZFR470hjHws7t??a}R`X^K zQCB$+d#d5+MU)W1k>&uzMkTg(fFbR6B2mi=e?y+TQ!!?|0K{A@=nf1uqr&(YQcYFI{L$hVD3E@DkNU>X@dKXf+j)=7vwc-Pbly&x0||iaQ~(g zT3D#h6~k5`x}cK8E>9(1wK}^|z1}3x@G$ZL#?ZiTq7f?<*$VpXj>$IT2@~(WC*e*n z6uQVCeQ7fJ4;7isICnMpoSba_2oesDirM%K0o?%a;BzmBtz0Qss(z24_d|aG6&O>( z{{v2hi)5iN?FIK?bC{KeEMqie#(8jM^35J0t_|e}S}k%+S_jg`hVZ8j3K~APN>}Uj zahvzpmhU>;KP|7vqkdCWTF?zBbcm%#ytAt2yNt46;WQWWy5*Bc2(a`Iogup=I97#c zXy(B5W*9~tFC;N(1h>!{9^yoZXUVVMAy+-^fa~5|;3|e_VeU*DL<{drWd&wVMZ6(+M0E##EkDskklG00;4I<+wi1#G$v~Chc`qIg5%jF&Wk=wdmUJV-fw6~=9 zzqEZw&CTp+GA&E|5d*KnFkSsIYp&6(DXE4O?x{^Z(^ChNjqJIi*Z2NyrI(Y#?GMl7 zlps#EOUvXErQ-3$Fe!NXOv3QIwbHToa>p0TXj$Bi^UoQjW!``l z@Nrc%U^^a0l5iz*bAd54vC>TRD5k=TJc`vvVW8O0D#<~+Ia}{&W{^c`(3i)HHdKsU zU-@hy$ReEvxLr9X7W@NG!%WQRq}=owID9M1Djm-dYu=KnvqU`hz}q;Mdv;aG{m3IA zRz_PF5e;LJ-s+9zZW0= zq#jU81@MSGvD#o+?)NsE$J^Ih2qyV=2OyOlr{U%*Dw(`&vL45QUz_ASOl~K~gxt+Q zEJlrYO(2=|J>G1+f*t>4As{B%(k+nE-{Nfh`T3Z*`3@a)2KB?!qNMC|;8Api+BrOg z03oHz&!P#4Ur<+#2|Fvi?vQ$4V&0p3vpUe1Lp>h&|;m6HRaL8j>vu*`q4~2}K55!{sMXA)w1c`(ZPN z)0$Xj_cxW@w!4(ndA!@`?Rz6gMUbf7(|W?Cm#w0m z$gP}{QKad(5UrzwZf{k$h&fT?f!r62RB}{;Wy7Xg8pO-B}0irpZ&%kyAqY!^3=|MUtn1FR~~)u09I0)?;5O}LxXc#Aha zQ;Dhl5?>aDy}9ynzvJzs#(3xekyylTY=eJTQ4ijc+|$YA?s=TvLGt?ai~6OrX9BBg z*2GrN8@vK#m{xAO2OE^2;~*L}ed2Oz%_H^agmv_qHtn95$?vAU0als~BXv7P{H7q~ z2U9;6SCO|q)i+@Uaz>Y(2hUZs zq3z>{I7>;5A{7^<>kPvrN_{>av~k@7>fn*p;0UIFP`$*J)wlYxfgj+5A+wjiSa`@j z)aP`e&u7gM@Y;3tm@@S=dJCQ-XftSRp(!Eg1EplGo?K~0ty&~6)sG(BVk}as!^~|5 z)l*wPiKfYxt&Y)yJF%`~1jNVIbam_2$Qz)w$@g5QpWg=Kp&1=P-o%d~&x}+JxO{Fh zscLvTrX}s7qLQf4kb$!=mCw?+FVx1+2_t!r)+$Bb<0i}_wgyh|?OR{bxG5Rk?u7ud`8|o{i$#bL5Y3j1$32rRr)&EnGpTMTtay?y<|&3IfTW@KcU$%3TN2DuT;OA<1VTsn z9f-0y(t({nRKk~KEuZ)n;$=XJBxOeSFcXWWCLL6~_|CQqU!e(Vyj`su)OI=oyhdy= z5ok1#IPois2V`hupRGSYd*PoM zj0!d^VO{X6y+y_!BEoO@#$MBulI2|(`0lbt= z#nzsXLKycOl?w#&=?Fo;6~}fBY$8h=(~35@IjB0iDaToC>ONJyv?0=M#8+uBLp+ww z&t_ddbUe@@2s{fXlxh>ov*vBN+Dh$9IE*LBHj-&bUGi!y=?6*+knenfQP+7|Vsb=M zU+U(mr-})lCx8z5sO?;EQeYTN0KoVKsklKQG=QLc6^gM?ArA<$(@MYgK#$H9Pmtdk z<9C0eT~>x@MVEZ&G1U{rn&g>Kwpe#X&^$TB+U&&G*a5keR)BvfqEM;#aApH zVzoyFu=(TvSAJ5|Cy!+h_CGz1`NL7)7E6+o2Zbxo-MoFo<2?|_z9zpnIv%o1SuK7x zaZ>=%cUu!X>59F3#I=Ib-4TL+Z9Y|BbGPMNgP)XA$T5eyR{1 z>QO4j!WobyV7xk0F)J6Ui{L+_E~=Q8w3gLUqfCZbIdPhFd>$87JNxB|Mv5o8Iq4rO z9{uM_RA*6!3I`Jw_kWtMLW>*hhC1-O$QaYTtBy~k{^0kqyjLZJ=_1q2wItMCx} zA3EIUBEw)qU>R4wg)aJ(GDeCshXQr`T%IE;=yrYok~UNT7Y^&+;R)bP`biKpV*weBQk2c(_sD5lif zC2MKzR~%}yh(Grk3JHW^Bk%D1o(^+!=E_3 zn7S^}Y^)$5e_mFPiD=~qdIV{c+dm@#R?|PY@9c&yVusFF)+GyXOdnftGM0VqAdKHU z5=+q3I^puAIyw-fn~`2DB7+5h`c{#<6w!xMi7&_BBDdz%F=EZjamcz!Z0B94s#7y8 z>wTX>=!?n)1WQYVq*-SbF`@#jNv32D)}VP8CLc6XQRI~w>GJb}452Hpgu&<5o8*VT zi<$l^&c`DH03?v~g)O7Wv_2Ow$-or2_zy4@#|jLBaf1sQep142KTO`*v_x z$N=RsAzD5zwawueOGUJ?3jT}z<03j8 iFeF}$w^8qC1SDUCXp$k{m_!>&X-}m2V55dcq+r238RXvp literal 0 HcmV?d00001 diff --git a/tests/compat/git/binary_delta/out/large.bin b/tests/compat/git/binary_delta/out/large.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8a025a84f32c7552841b683361d03d3e171294d GIT binary patch literal 5120 zcmV+b6#whcY*DIPbx_e&SH6(E=m?lE#2}*s*|N=_Pn6rew1;C{e>&C2V_V*#WK--S zBVwa$Qc>CVe~2gp%FIc0GITmkOWRdRQEMmVnHk@)n0}jy)YRZ2E>S4v)yQrWOqy%C zdgG!0|Ns9^f2Us8A}W>x>EtwIF$OTi6@1sv;(4 zSerdbxr+=xZ*CHL1$$Y~T$p4Ql#onXq!jP!-SBRm?3{^{)&p}f(rnh>tgKb z0#OkH>rALj017{~{J53G%CxL`Dy*DE6{&tSlxro?3Fc|vT#w|asfzO_Ul0Z*R^kdE#G>fnm!7AlT4LsQiQA1JQJ$~VH7UW}xT0Ar zR{}HtO#4SxnK5{FQyL3u3R?X$C5`2p`SRH;FOE28UQeuoI4mJxbsBUgASe$R4tsvVn630le?D zMN5q`%O)Rccq~)bw>wPs!fx~VfZcbPP?U9f!pLQO-(_^If@v&kSR@~n4asb zmXL$%jpwQhAcB3){9)QPqT59E_@{H|Pig%{_3x0M6)!E_HFJ|=>M~Gs zQCwuqC7jz*s7kk{zjyQJVA)-#xX>`FoNN*ekORq?UJHH;BJBw3Mf;-h6UTuqy@Eqi zF?|buaQ}k5X>co?%e8nRuBJG-KO+wWbx5OFfGB-)y7VPv1|VYGJk4IMbQ@`b_{z{2 z`}fZoQxe{^$-dg+i{+4y5iPaFmsgx3D&Q7+7FiYU1Pk1#gOJ)x8yav9>4S>u7S0l` z-Vx_v@G{J@^6tq(J(ec$wJ^ZJ1WV$gcCds}_O%9%zav3@9T?o3Q5o`OJ&rv|J_0ZQ z^DS}&B2Gsod;)y2sZM-i5~?~@(3Uh_>qFkF@Uyf`Mno%hxhc$nG$xVyFc zUec{06WnI{V8}@7YU~_h+{b3%Vy`4C>QJo1mF31Os_gZFR470hjHws7t??a}R`X^K zQCB$+d#d5+MU)W1k>&uzMkTg(fFbR6B2mi=e?y+TQ!!?|0K{A@=nf1uqr&(YQcYFI{L$hVD3E@DkNU>X@dKXf+j)=7vwc-Pbly&x0||iaQ~(g zT3D#h6~k5`x}cK8E>9(1wK}^|z1}3x@G$ZL#?ZiTq7f?<*$VpXj>$IT2@~(WC*e*n z6uQVCeQ7fJ4;7isICnMpoSba_2oesDirM%K0o?%a;BzmBtz0Qss(z24_d|aG6&O>( z{{v2hi)5iN?FIK?bC{KeEMqie#(8jM^35J0t_|e}S}k%+S_jg`hVZ8j3K~APN>}Uj zahvzpmhU>;KP|7vqkdCWTF?zBbcm%#ytAt2yNt46;WQWWy5*Bc2(a`Iogup=I97#c zXy(B5W*9~tFC;N(1h>!{9^yoZXUVVMAy+-^fa~5|;3|e_VeU*DL<{drWd&wVMZ6(+M0E##EkDskklG00;4I<+wi1#G$v~Chc`qIg5%jF&Wk=wdmUJV-fw6~=9 zzqEZw&CTp+GA&E|5d*KnFkSsIYp&6(DXE4O?x{^Z(^ChNjqJIi*Z2NyrI(Y#?GMl7 zlps#EOUvXErQ-3$Fe!NXOv3QIwbHToa>p0TXj$Bi^UoQjW!``l z@Nrc%U^^a0l5iz*bAd54vC>TRD5k=TJc`vvVW8O0D#<~+Ia}{&W{^c`(3i)HHdKsU zU-@hy$ReEvxLr9X7W@NG!%WQRq}=owID9M1Djm-dYu=KnvqU`hz}q;Mdv;aG{m3IA zRz_PF5e;LJ-s+9zZW0= zq#jU81@MSGvD#o+?)NsE$J^Ih2qyV=2OyOlr{U%*Dw(`&vL45QUz_ASOl~K~gxt+Q zEJlrYO(2=|J>G1+f*t>4As{B%(k+nE-{Nfh`T3Z*`3@a)2KB?!qNMC|;8Api+BrOg z03oHz&!P#4Ur<+#2|Fvi?vQ$4V&0p3vpUe1Lp>h&|;m6HRaL8j>vu*`q4~2}K55!{sMXA)w1c`(ZPN z)0$Xj_cxW@w!4(ndA!@`?Rz6gMUbf7(|W?Cm#w0m z$gP}{QKad(5UrzwZf{k$h&fT?f!r62RB}{;Wy7Xg8pO-B}0irpZ&%kyAqY!^3=|MUtn1FR~~)u09I0)?;5O}LxXc#Aha zQ;Dhl5?>aDy}9ynzvJzs#(3xekyylTY=eJTQ4ijc+|$YA?s=TvLGt?ai~6OrX9BBg z*2GrN8@vK#m{xAO2OE^2;~*L}ed2Oz%_H^agmv_qHtn95$?vAU0als~BXv7P{H7q~ z2U9;6SCO|q)i+@Uaz>Y(2hUZs zq3z>{I7>;5A{7^<>kPvrN_{>av~k@7>fn*p;0UIFP`$*J)wlYxfgj+5A+wjiSa`@j z)aP`e&u7gM@Y;3tm@@S=dJCQ-XftSRp(!Eg1EplGo?K~0ty&~6)sG(BVk}as!^~|5 z)l*wPiKfYxt&Y)yJF%`~1jNVIbam_2$Qz)w$@g5QpWg=Kp&1=P-o%d~&x}+JxO{Fh zscLvTrX}s7qLQf4kb$!=mCw?+FVx1+2_t!r)+$Bb<0i}_wgyh|?OR{bxG5Rk?u7ud`8|o{i$#bL5Y3j1$32rRr)&EnGpTMTtay?y<|&3IfTW@KcU$%3TN2DuT;OA<1VTsn z9f-0y(t({nRKk~KEuZ)n;$=XJBxOeSFcXWWCLL6~_|CQqU!e(Vyj`su)OI=oyhdy= z5ok1#IPois2V`hupRGSYd*PoM zj0!d^VO{X6y+y_!BEoO@#$MBulI2|(`0lbt= z#nzsXLKycOl?w#&=?Fo;6~}fBY$8h=(~35@IjB0iDaToC>ONJyv?0=M#8+uBLp+ww z&t_ddbUe@@2s{fXlxh>ov*vBN+Dh$9IE*LBHj-&bUGi!y=?6*+knenfQP+7|Vsb=M zU+U(mr-})lCx8z5sO?;EQeYTN0KoVKsklKQG=QLc6^gM?ArA<$(@MYgK#$H9Pmtdk z<9C0eT~>x@MVEZ&G1U{rn&g>Kwpe#X&^$TB+U&&G*a5keR)BvfqEM;#aApH zVzoyFu=(TvSAJ5|Cy!+h_CGz1`NL7)7E6+o2Zbxo-MoFo<2?|_z9zpnIv%o1SuK7x zaZ>=%cUu!X>59F3#I=Ib-4TL+Z9Y|BbGPMNgP)XA$T5eyR{1 z>QO4j!WobyV7xk0F)J6Ui{L+_E~=Q8w3gLUqfCZbIdPhFd>$87JNxB|Mv5o8Iq4rO z9{uM_RA*6!3I`Jw_kWtMLW>*hhC1-O$QaYTtBy~k{^0kqyjLZJ=_1q2wItMCx} zA3EIUBEw)qU>R4wg)aJ(GDeCshXQr`T%IE;=yrYok~UNT7Y^&+;R)bP`biKpV*weBQk2c(_sD5lif zC2MKzR~%}yh(Grk3JHW^Bk%D1o(^+!=E_3 zn7S^}Y^)$5e_mFPiD=~qdIV{c+dm@#R?|PY@9c&yVusFF)+GyXOdnftGM0VqAdKHU z5=+q3I^puAIyw-fn~`2DB7+5h`c{#<6w!xMi7&_BBDdz%F=EZjamcz!Z0B94s#7y8 z>wTX>=!?n)1WQYVq*-SbF`@#jNv32D)}VP8CLc6XQRI~w>GJb}452Hpgu&<5o8*VT zi<$l^&c`DH03?v~g)O7Wv_2Ow$-or2_zy4@#|jLBaf1sQep142KTO`*v_x z$N=RsAzD5zwawueOGUJ?3jT}z<03j8 iFeF}$w^8qC1SDUCXp$k{m_!>&X-}m2V55dcq+r3GG3TWK literal 0 HcmV?d00001 diff --git a/tests/compat/git/binary_delta_wrong_original/in/foo.patch b/tests/compat/git/binary_delta_wrong_original/in/foo.patch new file mode 100644 index 0000000..9abf334 --- /dev/null +++ b/tests/compat/git/binary_delta_wrong_original/in/foo.patch @@ -0,0 +1,9 @@ +diff --git a/large.bin b/large.bin +index ffba9ca51637158e8f46f0a2a9014372778eeef4..a8a025a84f32c7552841b683361d03d3e171294d 100644 +GIT binary patch +delta 15 +ZcmV+q0Pz2SD1a!CWCZ{J|NpUQm=imI2nhfH + +delta 15 +ZcmV+q0Pz2SD1a!CWCQ_9%OJ66m=h@q1w#M; + diff --git a/tests/compat/git/binary_delta_wrong_original/in/large.bin b/tests/compat/git/binary_delta_wrong_original/in/large.bin new file mode 100644 index 0000000..0d6307b --- /dev/null +++ b/tests/compat/git/binary_delta_wrong_original/in/large.bin @@ -0,0 +1 @@ +WRONG CONTENT \ No newline at end of file diff --git a/tests/compat/git/binary_literal/in/foo.patch b/tests/compat/git/binary_literal/in/foo.patch new file mode 100644 index 0000000..61ea8bf --- /dev/null +++ b/tests/compat/git/binary_literal/in/foo.patch @@ -0,0 +1,10 @@ +diff --git a/small.bin b/small.bin +new file mode 100644 +index 0000000000000000000000000000000000000000..df93f5f3f72487244976c34e85525cf445016566 +GIT binary patch +literal 10 +UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k + +literal 0 +KcmV+b0RR6000031 + diff --git a/tests/compat/git/binary_literal/out/small.bin b/tests/compat/git/binary_literal/out/small.bin new file mode 100644 index 0000000000000000000000000000000000000000..df93f5f3f72487244976c34e85525cf445016566 GIT binary patch literal 10 RcmZQzWMXDvWn<^y1ONc904@Lk literal 0 HcmV?d00001 diff --git a/tests/compat/git/binary_literal_wrong_original/in/file.bin b/tests/compat/git/binary_literal_wrong_original/in/file.bin new file mode 100644 index 0000000..605cc40 --- /dev/null +++ b/tests/compat/git/binary_literal_wrong_original/in/file.bin @@ -0,0 +1 @@ +WRONG CONTENT - not matching the hash in patch \ No newline at end of file diff --git a/tests/compat/git/binary_literal_wrong_original/in/foo.patch b/tests/compat/git/binary_literal_wrong_original/in/foo.patch new file mode 100644 index 0000000..d3e39d9 --- /dev/null +++ b/tests/compat/git/binary_literal_wrong_original/in/foo.patch @@ -0,0 +1,8 @@ +diff --git a/file.bin b/file.bin +index df93f5f3f72487244976c34e85525cf445016566..a8a025a84f32c7552841b683361d03d3e171294d 100644 +GIT binary patch +literal 10 +UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k + +literal 10 +UcmV+l0QLU>0RjUA1qKHQ2>`DEE&u=k diff --git a/tests/compat/git/binary_literal_wrong_original/out/file.bin b/tests/compat/git/binary_literal_wrong_original/out/file.bin new file mode 100644 index 0000000000000000000000000000000000000000..df93f5f3f72487244976c34e85525cf445016566 GIT binary patch literal 10 RcmZQzWMXDvWn<^y1ONc904@Lk literal 0 HcmV?d00001 diff --git a/tests/compat/git/binary_mixed_delta_literal/in/favicon.png b/tests/compat/git/binary_mixed_delta_literal/in/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..5109c1de8bea744180448b347fb44ddef23b90f4 GIT binary patch literal 2919 zcmai030P8T7skEJu&Hdcv?0>65vtoO=gysTc){dn_8A;mgY3Mwv7J;8=I#3|L3_2=X>9CzO#MjlI-o}Hd}k4HUfc| zO?4;xz;Be|o}mdpp_rIq1VX(S^!1nc(>w`mA>W+E5rzWha=r-05eTA#T*P9B0}@my z5C#fJ=%K5nXcWjHq5Z6AIGV@>;DPQjVt^jw<;#uoG@8VBUyC+h;}WB70)unGigVUEN8lP&?d-=HgyFX$qkP$GnQLeX>ppKUP> zf1wDc@9QdZmgs9_8ts46`TWnqfFzVCI1wL{`k7CEZvgp5ivX+-012gHHb9921d^r7 zOepd~@CM~Tgg+VN0|E%nE(yIF|1U7$8=#9YLMW!gO$Ct97D^pO<_J_iD-7@hITGHq ztxxNofFSJOfWor~siL$D0SL!3wZBrw{}V<++u-4*QWRVX1$-5nt^+~o9ZkrAom;Kc z`UFv3T)f3XE*Jr0kdK=)ib`>@wy?9d#^BAT0)dN0fOSD`G?^uVS$G^Chr!ukEO5RS zHUvvN0cQn^1l*KAN;Z5JI4lY4|M3+ajUvJtG#bGjgd{?7v~p>x&FMholr$wqfXegak(Iz&S(^TbignHAV!(e zSYp&_3zQ<56`$+McmQno+fiaa`iNDWy}z9#V!sH3s3iR3u)=y%Yw!?&rwMj?oWR2L zKmr8t6cWSZCPU1#K_FBTLAXF)6b6AXO_8QFtRka%~Izh4ror z*EZ_|=QE<;c-&c(KbCDA2%Is!@H+g)rZQ&0D#{5Cz@kMoLfJM}sdZNl#oCX>4%v0a znisL7DOS0$j+g^6mUBlNs`}n%hY?$wJRPq86v`{7)93jX?p_lVRnlJ&&afZLZ#&hz zO*L;r_Yz7At=pnIS&x1(ID65shfw2gS^Mv+f$LH_YO+OYq_KaMk=!7=N$s8 zYVb^P@k%r7@*~gE_dVG>kS=f8ngJ&BpPbVtC(o&BusyzO&arf-3>V3FCjyziw}vL} z9Oh8PdKp&CrT}9PWUANt^pi0gLS2UocM;D-O-_!s-5S<7^r8#bN(^WZTqFPeacoP0 zy)NeEyBWEQyN`L_-b6@Dk~P_QYzX;bVSD#L!3h)D<2bbqiwl3s9^aQ+(V3xBlAVY} z((ZcO3=N+xYIsemXzw|b7b4Ro^u(3l)Me_0bv4ljj>4afnIiW92!WV2PjRatG7sv* zpP6bZ*~!;`Yi0U6&-pUA8W*JpZr^s@LEElz(fOFOSxl}IQgcO)DY)q9iK>b%>u&__ zvRF9OMNB(k`N6GOoqZPEZY5um`P|9V%DR}L>-`rGr4p1@kcTSTc%3vbz{xP`*qS+< zRl?zRy}OoBI<{=+M)PZfMxH~$tT?`djmk)EW4fNbO1!E#X-UWeTiuEXf-vFTtUuHS z%c>{aPOG#ca`;4~smG4gTVB7OEBF6ZZRp{SU~?`nB*f81&>>G@IbC>r8?!JFg9}h z32oc#$D||Zkr31zZ$H2E5}cLQ$@}*M)^!a&yv*?R?TI>OXd04|dAgFp%d+PC2!;8l zk|J<;{INS(HnDoo>swFLhh7Tjp3`(h%_%E5>*@Eb%#s#0aLuc*=6Z?Q$bk9d*ai8|Oy~^8zMPyKV`^t-=c~-|`ud_RhdcWd7D=8|%sj<> zA7A=zVZ6Jv>@X*rAYDtLP#zQjJCTOAsM><&`NGa(hEys=@*n4N#$HYUv+miL+Q_QJePvpR)YMcigQ0iSd;4*>-39u=C2}WQ!>F$6s;U`Vok8)> zNF&uFB_#(}ht)T>wN-X>bZBR7R4>U&`(V;9b#d9Esik%3RtbL3Y$H1{`GS946ZEnj z)7IL0;h&yB z^p~mghPtj97>Q&~mJ1n?k&&4}bb4yGLqc9k%6GjzJw+4wiJI<(-Hv-%0sGtYHon}` z(C0xfVU=q#n9SUQ0v{5Iw72I~7NR3A6DYM+KY^*`<- zHk2kfhb5Je4BJD-A+*@(Gwzg%9mTv3ayQk{wy-2a6X6N$1FCQN-$_u`h zo18qe^Qvs!8RogQw9RQ1N3ymRCRN~cdV70oENMpeakk-FFJ8RhWafCb46iJ+^&ad^ zbP5f6i)%}2YFfYXikJcbK!CyS>ZjW3s>QKmc`-3D9aXiWUpTLR8`n#Y>~Ar~&P}d! zh#PM#ivK`>$!Te6A(`0*94>bo)K}kh|2~z)S^~G|Br#^#G~juD(&ERuvQ~k5((c6I z$LnlbhQ{cDfgV)q%*x8jil>pTZf*vKhCL51h1b>9$tRdzq4z!*>5mR~sTq8qYG(R0 z^v`Ab#-mp!`BolVW9>Fmp7j>C(^~?|`pkkF0tfqmbA6Ml6`Ne%l@YX&ivPz{iWfQ0 IIpoLx0#^I3DgXcg literal 0 HcmV?d00001 diff --git a/tests/compat/git/binary_mixed_delta_literal/in/foo.patch b/tests/compat/git/binary_mixed_delta_literal/in/foo.patch new file mode 100644 index 0000000..7f52479 --- /dev/null +++ b/tests/compat/git/binary_mixed_delta_literal/in/foo.patch @@ -0,0 +1,86 @@ +diff --git a/favicon.png b/favicon.png +index 5109c1de8bea744180448b347fb44ddef23b90f4..69b8613ce1506e1c92b864f91cc46df06348c62b 100644 +GIT binary patch +delta 1115 +zcmV-h1f=`t7Uc+#8Gi%-007x@vVQ;o00d`2O+f$vv5yP +zfP?@5`Tzg`fam}Kbua(`>RI+y?e7jT@qQ9J+u00Lr5M??VshmXv^00009a7bBm +z000DZ000DZ0m8#+P5=M^2XskIMF-{q4Gj?pmKmEP0008eNq +zS*l<~QA$1JAY?<@4F!`%H{0#bjE9}fZuU?Ayt&Np_x{ZHeczk+=q00aIO{BOMm>NW +zGb^R2YFbUrw129ilv&5_?R*#op%sCs1zv6w<_>da(*43C8z5GS?N}X +z{^V`QxQx)j*!`A8b;E|$ExHA=4hUt88|nlQ7tEk9hz=YqEbOPg@0Wz`s +zmN!~(9dV=u?I~Mvh)g?mp_)jf?D@Nr{DQJb>GW45GWc;+ig_8yt0(H +zchI)mFn`!(GAY|t|C#flvV9n{2WCVfKibO<1Qi=LY`EQ$z;UqGqDW*0&cq!s6My4; +zPXgEE5_G3UA~k24J=gwQb;>^P#8wX*Y*kE)MBeQ{FfT7}!Y4_A1FwN^c9)O_GgkHj~RdK~NC^L5i~IUXh_ +z95o@^6-U;DqjIVuk>ys^VPVp56?GT72DqTE5?dt$r77DZ?T}ZyX3UaF&vYMYG>7U( +z=6{7=Hn2lG;1!X1AGZ(HW&;lyamBia;`62jOIGoOLtacMykp3iKdiS0Xh_|vJ_mY~ +zb4{&%v9!zfYAbcDwo>=i)=iqrbxKXm)PpS7rrhUx{~ymQhCvo@p6LJp03~!qSaf7z +zbY(hYa%Ew3WdJfTGBPbNF)cAOR53F;F@HBYFgPnPFgh?WYdBZ^0000bbVXQnWMOn= +zI&E)cX=Zrz>%8FWQhbW?9;ba!EL +zWdL_~cP?peYja~^aAhuUa%Y?FJQ@H109SfcSaechcOYjeH +zXlY1#a%EF`PE=!hYhyWNB0oL~Ja{^IZE$U6bYUQPZES9HI(R)IVPtP&WjbziI&Eci +zVJ{*ecsh7(aCB=uB3MmOAVY6*Wgs;!H7+nBJ_;Z_a%5&YQba}|cx`NMb2@TlW<4Tk +zbaZe!FE4j@cP@7`E^l&YFEKeeIWI6WFETPMa%5&Lb9rubVR$WWb0Z=?3Lqdna%5&Y +zL}hbha%pgMX>V>Ia%5&YVPbD}bUh*>3LqdLAb4$TZgVKLZ*V;#XmoUNIxjD7b1q?IZ(?OG +zV{dIQaA>gzG9n5fARu&UW@b8AQe|^*Y;|;LZ*DyzH!?0T +zA_^cNAarSFW;$6?Wpi(Ab#!TOZapG5GA=M83LqdLaA4UZB6CtlLLf;+LpCuvHa0CXE-@ksARr)k +zZE!kGZ)9m^c|>7!Wj!J?FfuSLFgGnRFjO%&Iy5pmFf}bOH##sdA_^cNAb4$XI!$GC +zVPs)+VMJkcWj!J?FfuSLFgGnRFjO%&Iy5pmFf}bOH##sdB0dTrARs(=ZE#IZI!I}A +zbZ>HbJ_;ZpARs()WM(>3WpO?VARr(hAUtwpW;$$X3LqdLARr)fbVYV_I$>jUX>V>l +zB5-nVWOZX@WFiV6ARr(hAais@c62&(Z)S9NVRB_bXJu}5Jt9G7W@&C|ba@~|Wpim~ +zZe?;HC{1BkLr`;4M?xS;MME|*IW{&eGA=Oy04R}lk;GOVI$ud@Qx+*L$C!pq+mEwKumw3~KnQ4h_;;k4&i4exmIHQaZ +zqL)FLLv#_OTUJ!a@A=K2WJz)rnKf3?StLsilTJ~WrFvRoM)b6dJgav9|Mu0$^aY*j +z-Z0cWS=L)Sc(&ks)3QDfE$jOTsvhB@P|9CAfPr_>H%nSz9~#!-?6RaKci?;jS{}57 +zp7@oj#NC+;yq@B{6@$N$x+0n`AdZ9EPrONx`oPL8d^SdIhl+lpQ;W@unwKsRpO +zc#|V9Z5{cG8F|O!TM*a<%mH87Q8-w?emyNME%f#E;q&>pbLS2d6B8Uec8uxiX~xIL +zQ^j*gMBf4I1foDBQ{-RT#+^tE!_dKl2X*u2P0i2GOOo#2zpwoKe3h1#>dBKQO5*Y3 +z$J)PtzYN20XJZcd;!TDP;60!s^KAyZ*Q-sfow1`GNR3!H+vQ6&eW(=@9S?8tp-3K5WupmOjqW>G);Ew*g<1s +zBYXDj!LlsE;V=~y6+C_VG%5Dd&U7sG>@MJY7O&;n&z@CjX{nlx}S9Ri@dyqq&<&d}A>#oXN73Y%R7yqlE$CFylTLxTnf2c5{gGLQ`& +zIdVj?Sj^FL>((vR)YPQPbR>3Yr1gZLaJO0X!YUx +zE2XA(xfmWEroMjRXwcu^&%nTdThy`bTtRblbH)s`x3?=C4l5iEYiMXl`}Xbg%tR;@ +za`aSJSG)Pn6*Q3;CIo2j-n|(eaCbp88dY&|ahhCHQyXdqOH;gI!vu@%7>Cz=h +zQb$L}QqQgg@YxQ01I)WCWGUFadpE0Bug+F{9%uzf{uF)%UIQ&ji9jGgVPRoLO-Z9` +z)~w<3<;$EteVWP1$ptcFV`D_4(bP{r0>8O+O)YsRi&dyFfd-g1? +zuNTAk__(fKy{d_c3EjJQPpej~N=+C +z_VnHZ85ff*DC1)CCKx34yZ>!9ONDzmeMRQwwANG8TfpZmT+dtW=Vh(u{{qBcL;Juw +R3Jw4O002ovPDHLkV1icrt||Zk + diff --git a/tests/compat/git/binary_mixed_delta_literal/out/favicon.png b/tests/compat/git/binary_mixed_delta_literal/out/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..69b8613ce1506e1c92b864f91cc46df06348c62b GIT binary patch literal 1125 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE0wix1Z>k4UEa{HEjtmSN`?>!lvVtU&J%W50 z7^>757#dm_7=8hT8eT9klo~KFyh>nTu$sZZAYL$MSD+10f+@+{-G$+Qd;gjJKptm- zM`SV3z!DH1*13q>1OJmp-&A)3@J_A;A0V7u0bkfJ)-`Krrb zGey1(YybBo_r#8#3kPrfo#tA4xb3R$F4j$a9H~9huUPE$JQDstTM~O>^@Ps(;>UH<3Fx0=LBZUre@8723HjVToWun1;~KVtGY7SF1E7liM5< zHa%KaZ1y+v;MF5PX0dXM#bh1)zI}?P`JCclPp)GktoyD%Uv%1HzQp-VwViVJr}FN4 zBVAi3pdsabJ2zzio=sD>mtWX++u%m3k>>5t|1&=?+*B*EnLW)#$^O=9J{D1Fvz#4w zCmkrSML-}_v8Imc2?OP1;|%KWmLM+u&^dKy+fI{C57UY0UhRg-3U_ zKl;3k)jRBCi*uZh#-8L8Gwj!FXV37syULEeYD%&1+S-jgUC&wB|>?y4oO5hW>!C8<`)MX5lF!N|bKNY}tn*U&h` z(Adh*+{(a0+rYrez#!Wq{4a`z-29Zxv`X9>q*C7l^C^QQ$cEtjw370~qEv?R@^Zb* zyzJuS#DY}4{G#;P?`))iio&ZxB1(c1%M}WW^3yVNQWZ)n3sMy_3rdn17%JvG{=~yk z7^b0d%K!8k&!<5Q%*xz)$=t%q!rqfbn1vNw8cYtSFe`5kQ8<0$%84Uqj>sHgKi%N5 cz)O$emAGKZCnwXXKr0wLUHx3vIVCg!0EmFw6951J literal 0 HcmV?d00001 diff --git a/tests/compat/git/mod.rs b/tests/compat/git/mod.rs index 041edaf..281aab8 100644 --- a/tests/compat/git/mod.rs +++ b/tests/compat/git/mod.rs @@ -203,6 +203,75 @@ fn junk_between_hunks() { .run(); } +// Mixed binary and text patch. +// +// Both git apply and diffy should apply both the binary and text changes. +#[test] +fn binary_and_text_mixed() { + Case::git("binary_and_text_mixed").strip(1).run(); +} + +// Binary patch in literal format (new file creation). +#[test] +#[cfg(feature = "binary")] +fn binary_literal() { + Case::git("binary_literal").strip(1).run(); +} + +// Binary patch in delta format (modify existing file). +#[test] +#[cfg(feature = "binary")] +fn binary_delta() { + Case::git("binary_delta").strip(1).run(); +} + +// Binary literal patch applied to wrong original content. +// +// This documents a behavioral difference: +// - diffy: succeeds (skips validation, ignores original for literal format) +// - git: fails (validates original content via index hash before applying) +// +// diffy's behavior is intentional - we don't have access to git's object database +// to verify hashes, and for literal format the original content isn't needed anyway. +#[test] +#[cfg(feature = "binary")] +fn binary_literal_wrong_original() { + Case::git("binary_literal_wrong_original") + .strip(1) + .expect_compat(false) + .run(); +} + +// Binary delta patch applied to wrong original content. +// +// Both diffy and git fail, but for different reasons: +// - diffy: fails because delta instructions reference wrong offsets/sizes +// - git: fails because index hash doesn't match before even trying to apply +// +// This test verifies diffy correctly rejects invalid delta applications. +#[test] +#[cfg(feature = "binary")] +fn binary_delta_wrong_original() { + Case::git("binary_delta_wrong_original") + .strip(1) + .expect_success(false) + .run(); +} + +// Binary patch with mixed delta/literal format. +// +// Git can choose different encodings for forward and reverse transformations +// based on which is more efficient. This patch has: +// - forward (original -> modified): delta +// - reverse (modified -> original): literal +// +// From rust-lang/rust@ad46af24 (favicon-32x32.png update). +#[test] +#[cfg(feature = "binary")] +fn binary_mixed_delta_literal() { + Case::git("binary_mixed_delta_literal").strip(1).run(); +} + // Multi-file patch with junk/preamble text between different files. // // git apply behavior: Ignores content between `diff --git` boundaries. diff --git a/tests/compat/main.rs b/tests/compat/main.rs index e35ed07..2c0afe7 100644 --- a/tests/compat/main.rs +++ b/tests/compat/main.rs @@ -20,25 +20,25 @@ //! //! ```sh //! # Run all compat tests -//! cargo test --test compat +//! cargo test --test compat -F binary //! //! # Run with reference tool comparison (CI mode) -//! CI=1 cargo test --test compat +//! CI=1 cargo test --test compat -F binary //! //! # For Nix users, run this to ensure you have GNU patch -//! CI=1 nix shell nixpkgs#gnupatch -c cargo test --test compat +//! CI=1 nix shell nixpkgs#gnupatch -c cargo test --test compat -F binary //! ``` //! //! ## Regenerating snapshots //! //! ```sh -//! SNAPSHOTS=overwrite cargo test --test compat +//! SNAPSHOTS=overwrite cargo test --test compat -F binary //! ``` //! //! ## Adding new test cases //! //! 1. Create `case_name/in/` with input file(s) and `foo.patch` -//! 2. Run `SNAPSHOTS=overwrite cargo test --test compat` to generate `out/` +//! 2. Run `SNAPSHOTS=overwrite cargo test --test compat -F binary` to generate `out/` //! 3. Add `#[test] fn case_name() { Case::{gnu_patch,git}(...).run(); }` in the module //! //! For failure tests, use `.expect_success(false)` and skip step 2. diff --git a/tests/replay.rs b/tests/replay.rs index e6fbab3..48f64e6 100644 --- a/tests/replay.rs +++ b/tests/replay.rs @@ -6,7 +6,7 @@ //! ## Usage //! //! ```console -//! $ cargo test --test replay -- --ignored --nocapture +//! $ cargo test --test replay -F binary -- --ignored --nocapture //! ``` //! //! ## Environment Variables @@ -59,7 +59,10 @@ use std::{ sync::Mutex, }; -use diffy::patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet}; +use diffy::{ + binary::BinaryPatch, + patch_set::{FileOperation, ParseOptions, PatchKind, PatchSet}, +}; use rayon::prelude::*; /// Persistent `git cat-file --batch` process for fast object lookups. @@ -331,13 +334,10 @@ fn process_commit( // UniDiff format cannot express pure renames (no ---/+++ headers). // Use `--no-renames` to represent them as delete + create instead. - // GitDiff mode handles renames via extended headers natively. + // GitDiff mode uses `--binary` to get actual binary patch data. let diff_output = match mode { TestMode::UniDiff => git_bytes(repo, &["diff", "--no-renames", parent, child]), - // TODO: pass `--binary` once binary patch support lands, - // so binary files get actual delta/literal data instead of - // "Binary files differ" markers. - TestMode::GitDiff => git_bytes(repo, &["diff", parent, child]), + TestMode::GitDiff => git_bytes(repo, &["diff", "--binary", parent, child]), }; if diff_output.is_empty() { @@ -387,9 +387,7 @@ fn process_commit( text_files + type_changes } TestMode::GitDiff => { - // Can't use `--numstat` for GitDiff: it shows `-\t-\t` for both - // actual binary diffs AND pure binary renames (100% similarity). - // Use `--raw` for total count instead. + // With `--binary`, all files including binary ones have patch data. let raw = git(repo, &["diff", "--raw", parent, child]); let (mut total, mut type_changes) = (0, 0); for line in raw.lines().filter(|l| !l.is_empty()) { @@ -544,12 +542,51 @@ fn process_commit( ); } } - PatchKind::Binary(_) => { - // Binary patch application not yet wired up in replay tests. - // Will be done once the `binary` Cargo feature is added. + PatchKind::Binary(BinaryPatch::Marker) => { + // `Binary files differ` marker - no actual data to apply skipped += 1; continue; } + PatchKind::Binary(patch) => { + // Get content as bytes + let base_content = if let Some(path) = base_path { + let Some(content) = cat.get(parent, path) else { + skipped += 1; + continue; + }; + content + } else { + Vec::new() + }; + + let expected_content = if let Some(path) = target_path { + let Some(content) = cat.get(child, path) else { + skipped += 1; + continue; + }; + content + } else { + Vec::new() + }; + + let result = match patch.apply(&base_content) { + Ok(r) => r, + Err(e) => { + panic!( + "Failed to apply binary patch at {parent_short}..{child_short} for {desc}: {e}" + ); + } + }; + + if result != expected_content { + panic!( + "Binary content mismatch at {parent_short}..{child_short} for {desc}\n\n\ + Expected {} bytes, got {} bytes", + expected_content.len(), + result.len() + ); + } + } } applied += 1;