Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 66 additions & 21 deletions src/patch_set/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ mod parse;
mod tests;

use std::borrow::Cow;
use std::fmt;

use crate::utils::Text;
use crate::Patch;

pub use error::PatchSetParseError;
Expand Down Expand Up @@ -116,7 +118,7 @@ impl<'a, T: ToOwned + ?Sized> PatchKind<'a, T> {
/// (create, delete, modify, or rename).
#[derive(Clone, PartialEq, Eq)]
pub struct FilePatch<'a, T: ToOwned + ?Sized> {
operation: FileOperation<'a>,
operation: FileOperation<'a, T>,
kind: PatchKind<'a, T>,
old_mode: Option<FileMode>,
new_mode: Option<FileMode>,
Expand All @@ -139,7 +141,7 @@ where

impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> {
fn new(
operation: FileOperation<'a>,
operation: FileOperation<'a, T>,
patch: Patch<'a, T>,
old_mode: Option<FileMode>,
new_mode: Option<FileMode>,
Expand All @@ -153,7 +155,7 @@ impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> {
}

/// Returns the file operation for this patch.
pub fn operation(&self) -> &FileOperation<'a> {
pub fn operation(&self) -> &FileOperation<'a, T> {
&self.operation
}

Expand Down Expand Up @@ -192,48 +194,91 @@ impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> {
///
/// This is determined by examining the `---` and `+++` header lines
/// of a unified diff patch, and git extended headers when available.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FileOperation<'a> {
#[derive(PartialEq, Eq)]
pub enum FileOperation<'a, T: ToOwned + ?Sized> {
/// Delete a file (`+++ /dev/null`).
Delete(Cow<'a, str>),
Delete(Cow<'a, T>),
/// Create a new file (`--- /dev/null`).
Create(Cow<'a, str>),
Create(Cow<'a, T>),
/// Modify a file.
///
/// * If `original == modified`, this is an in-place modification.
/// * If they differ, the caller decides how to handle, e.g., treat as rename or error.
///
/// Usually, the caller needs to strip the prefix from the paths to determine.
Modify {
original: Cow<'a, str>,
modified: Cow<'a, str>,
original: Cow<'a, T>,
modified: Cow<'a, T>,
},
/// Rename a file (move from `from` to `to`, delete `from`).
///
/// Only produced when git extended headers explicitly indicate a rename.
Rename {
from: Cow<'a, str>,
to: Cow<'a, str>,
},
Rename { from: Cow<'a, T>, to: Cow<'a, T> },
/// Copy a file (copy from `from` to `to`, keep `from`).
///
/// Only produced when git extended headers explicitly indicate a copy.
Copy {
from: Cow<'a, str>,
to: Cow<'a, str>,
},
Copy { from: Cow<'a, T>, to: Cow<'a, T> },
}

impl<T: ToOwned + ?Sized> Clone for FileOperation<'_, T> {
fn clone(&self) -> Self {
match self {
Self::Delete(p) => Self::Delete(p.clone()),
Self::Create(p) => Self::Create(p.clone()),
Self::Modify { original, modified } => Self::Modify {
original: original.clone(),
modified: modified.clone(),
},
Self::Rename { from, to } => Self::Rename {
from: from.clone(),
to: to.clone(),
},
Self::Copy { from, to } => Self::Copy {
from: from.clone(),
to: to.clone(),
},
}
}
}

impl<T: ?Sized, O> fmt::Debug for FileOperation<'_, T>
where
T: ToOwned<Owned = O> + fmt::Debug,
O: std::borrow::Borrow<T> + fmt::Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Delete(p) => f.debug_tuple("Delete").field(p).finish(),
Self::Create(p) => f.debug_tuple("Create").field(p).finish(),
Self::Modify { original, modified } => f
.debug_struct("Modify")
.field("original", original)
.field("modified", modified)
.finish(),
Self::Rename { from, to } => f
.debug_struct("Rename")
.field("from", from)
.field("to", to)
.finish(),
Self::Copy { from, to } => f
.debug_struct("Copy")
.field("from", from)
.field("to", to)
.finish(),
}
}
}

impl FileOperation<'_> {
impl<T: Text + ?Sized> FileOperation<'_, T> {
/// Strip the first `n` path components from the paths in this operation.
///
/// This is similar to the `-p` option in GNU patch. For example,
/// `strip_prefix(1)` on a path `a/src/lib.rs` would return `src/lib.rs`.
pub fn strip_prefix(&self, n: usize) -> FileOperation<'_> {
fn strip(path: &str, n: usize) -> &str {
pub fn strip_prefix(&self, n: usize) -> FileOperation<'_, T> {
fn strip<T: Text + ?Sized>(path: &T, n: usize) -> &T {
let mut remaining = path;
for _ in 0..n {
match remaining.split_once('/') {
match remaining.split_at_exclusive("/") {
Some((_first, rest)) => remaining = rest,
None => return remaining,
}
Expand Down
175 changes: 105 additions & 70 deletions src/patch_set/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use super::{
PatchSetParseError,
};
use crate::patch::parse::parse_one;
use crate::utils::Text;

use std::borrow::Cow;

Expand All @@ -20,7 +21,7 @@ const EMAIL_PREAMBLE_SEPARATOR: &str = "\n---\n";

/// Streaming iterator for parsing patches one at a time.
///
/// Created by [`PatchSet::parse`].
/// Created by [`PatchSet::parse`] or [`PatchSet::parse_bytes`].
///
/// # Example
///
Expand All @@ -45,16 +46,16 @@ const EMAIL_PREAMBLE_SEPARATOR: &str = "\n---\n";
/// println!("{:?}", patch.operation());
/// }
/// ```
pub struct PatchSet<'a> {
input: &'a str,
pub struct PatchSet<'a, T: ?Sized> {
input: &'a T,
offset: usize,
opts: ParseOptions,
finished: bool,
found_any: bool,
}

impl<'a> PatchSet<'a> {
/// Creates a streaming parser for multiple file patches.
impl<'a> PatchSet<'a, str> {
/// Creates a streaming parser for multiple file patches from a string.
pub fn parse(input: &'a str, opts: ParseOptions) -> Self {
// Strip email preamble once at construction
let input = strip_email_preamble(input);
Expand All @@ -66,88 +67,117 @@ impl<'a> PatchSet<'a> {
found_any: false,
}
}
}

/// Creates an error with the current offset as span.
fn error(&self, kind: PatchSetParseErrorKind) -> PatchSetParseError {
PatchSetParseError::new(kind, self.offset..self.offset)
}

fn next_unidiff_patch(&mut self) -> Option<Result<FilePatch<'a, str>, PatchSetParseError>> {
let remaining = &self.input[self.offset..];
if remaining.is_empty() {
return None;
impl<'a> PatchSet<'a, [u8]> {
/// Creates a streaming parser for multiple file patches from raw bytes.
///
/// This is useful when the diff output may contain non-UTF-8 content,
/// such as patches produced by `git diff --binary` on files that git
/// misdetects as text.
pub fn parse_bytes(input: &'a [u8], opts: ParseOptions) -> Self {
let input = strip_email_preamble(input);
Self {
input,
offset: 0,
opts,
finished: false,
found_any: false,
}
}
}

let patch_start = find_patch_start(remaining)?;
self.found_any = true;

let patch_input = &remaining[patch_start..];

let opts = crate::patch::parse::ParseOpts::default();
let (result, consumed) = parse_one(patch_input, opts);
// Always advance so the iterator makes progress even on error.
let abs_patch_start = self.offset + patch_start;
self.offset += patch_start + consumed;

let patch = match result {
Ok(patch) => patch,
Err(e) => return Some(Err(e.into())),
};
let operation = match extract_file_op_unidiff(patch.original_path(), patch.modified_path())
{
Ok(op) => op,
Err(mut e) => {
e.set_span(abs_patch_start..abs_patch_start);
return Some(Err(e));
}
};
impl<'a> Iterator for PatchSet<'a, str> {
type Item = Result<FilePatch<'a, str>, PatchSetParseError>;

Some(Ok(FilePatch::new(operation, patch, None, None)))
fn next(&mut self) -> Option<Self::Item> {
next_patch(self)
}
}

impl<'a> Iterator for PatchSet<'a> {
type Item = Result<FilePatch<'a, str>, PatchSetParseError>;
impl<'a> Iterator for PatchSet<'a, [u8]> {
type Item = Result<FilePatch<'a, [u8]>, PatchSetParseError>;

fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
next_patch(self)
}
}

fn next_patch<'a, T: Text + ?Sized>(
ps: &mut PatchSet<'a, T>,
) -> Option<Result<FilePatch<'a, T>, PatchSetParseError>> {
if ps.finished {
return None;
}

let result = match ps.opts.format {
Format::UniDiff => next_unidiff_patch(ps),
};

if result.is_none() {
ps.finished = true;
if !ps.found_any {
let err = PatchSetParseError::new(
PatchSetParseErrorKind::NoPatchesFound,
ps.offset..ps.offset,
);
return Some(Err(err));
}
}

let result = match self.opts.format {
Format::UniDiff => {
let result = self.next_unidiff_patch();
if result.is_none() {
self.finished = true;
if !self.found_any {
return Some(Err(self.error(PatchSetParseErrorKind::NoPatchesFound)));
}
}
result
}
};
result
}

result
fn next_unidiff_patch<'a, T: Text + ?Sized>(
ps: &mut PatchSet<'a, T>,
) -> Option<Result<FilePatch<'a, T>, PatchSetParseError>> {
let remaining = remaining(ps);
if remaining.is_empty() {
return None;
}

let patch_start = find_patch_start(remaining)?;
ps.found_any = true;

let (_, patch_input) = remaining.split_at(patch_start);

let opts = crate::patch::parse::ParseOpts::default();
let (result, consumed) = parse_one(patch_input, opts);
// Always advance so the iterator makes progress even on error.
let abs_patch_start = ps.offset + patch_start;
ps.offset += patch_start + consumed;

let patch = match result {
Ok(patch) => patch,
Err(e) => return Some(Err(e.into())),
};
let operation = match extract_file_op_unidiff(patch.original_path(), patch.modified_path()) {
Ok(op) => op,
Err(mut e) => {
e.set_span(abs_patch_start..abs_patch_start);
return Some(Err(e));
}
};

Some(Ok(FilePatch::new(operation, patch, None, None)))
}

fn remaining<'a, T: Text + ?Sized>(ps: &PatchSet<'a, T>) -> &'a T {
let (_, rest) = ps.input.split_at(ps.offset);
rest
}

/// Finds the byte offset of the first patch header in the input.
///
/// A patch header starts with `--- ` or `+++ ` (the file path lines).
/// Returns `None` if no header is found.
fn find_patch_start(input: &str) -> Option<usize> {
fn find_patch_start<T: Text + ?Sized>(input: &T) -> Option<usize> {
let mut offset = 0;
for line in input.lines() {
if line.starts_with(ORIGINAL_PREFIX) || line.starts_with(MODIFIED_PREFIX) {
return Some(offset);
}
offset += line.len();
// Account for the line ending that `.lines()` strips
if input[offset..].starts_with("\r\n") {
offset += 2;
} else if input[offset..].starts_with('\n') {
offset += 1;
}
}
None
}
Expand All @@ -167,25 +197,30 @@ fn find_patch_start(input: &str) -> Option<usize> {
/// > The log message and the patch are separated by a line with a three-dash line.
///
/// [`git format-patch`]: https://git-scm.com/docs/git-format-patch
fn strip_email_preamble(input: &str) -> &str {
fn strip_email_preamble<T: Text + ?Sized>(input: &T) -> &T {
// only strip preamble for mbox-formatted input
if !input.starts_with("From ") {
return input;
}

match input.find(EMAIL_PREAMBLE_SEPARATOR) {
Some(pos) => &input[pos + EMAIL_PREAMBLE_SEPARATOR.len()..],
Some(pos) => {
let (_, rest) = input.split_at(pos + EMAIL_PREAMBLE_SEPARATOR.len());
rest
}
None => input,
}
}

/// Extracts the file operation from a patch based on its header paths.
pub(crate) fn extract_file_op_unidiff<'a>(
original: Option<&Cow<'a, str>>,
modified: Option<&Cow<'a, str>>,
) -> Result<FileOperation<'a>, PatchSetParseError> {
let is_create = original.map(Cow::as_ref) == Some(DEV_NULL);
let is_delete = modified.map(Cow::as_ref) == Some(DEV_NULL);
fn extract_file_op_unidiff<'a, T: Text + ?Sized>(
original: Option<&Cow<'a, T>>,
modified: Option<&Cow<'a, T>>,
) -> Result<FileOperation<'a, T>, PatchSetParseError> {
let is_dev_null = |cow: &Cow<'_, T>| cow.as_ref().as_bytes() == DEV_NULL.as_bytes();

let is_create = original.is_some_and(is_dev_null);
let is_delete = modified.is_some_and(is_dev_null);

if is_create && is_delete {
return Err(PatchSetParseErrorKind::BothDevNull.into());
Expand Down
Loading