bmwill · bmwill · Apr 16, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 12, 2026
diff --git a/src/patch/error.rs b/src/patch/error.rs
@@ -110,6 +110,9 @@ pub(crate) enum ParsePatchErrorKind {
 
     /// Orphaned hunk header found after trailing content.
     OrphanedHunkHeader,
+
+    /// Filename contains invalid UTF-8 when parsing as text.
+    InvalidUtf8Path,
 }
 
 impl fmt::Display for ParsePatchErrorKind {
@@ -136,6 +139,7 @@ impl fmt::Display for ParsePatchErrorKind {
             Self::UnexpectedHunkLine => "unexpected line in hunk body",
             Self::MissingNewline => "missing newline",
             Self::OrphanedHunkHeader => "orphaned hunk header after trailing content",
+            Self::InvalidUtf8Path => "filename is not valid UTF-8",
         };
         write!(f, "{msg}")
     }

diff --git a/src/patch/parse.rs b/src/patch/parse.rs
@@ -12,6 +12,43 @@ use std::borrow::Cow;
 
 type Result<T, E = ParsePatchError> = std::result::Result<T, E>;
 
+/// Options that control parsing behavior.
+///
+/// Defaults match the [`parse`]/[`parse_bytes`] behavior.
+#[derive(Clone, Copy)]
+pub(crate) struct ParseOpts {
+    skip_preamble: bool,
+    reject_orphaned_hunks: bool,
+}
+
+impl Default for ParseOpts {
+    fn default() -> Self {
+        Self {
+            skip_preamble: true,
+            reject_orphaned_hunks: false,
+        }
+    }
+}
+
+impl ParseOpts {
+    /// Don't skip preamble lines before `---`/`+++`/`@@`.
+    ///
+    /// Useful when the caller has already positioned the input
+    /// at the start of the patch content.
+    #[allow(dead_code)] // will be used by patch_set parser
+    pub(crate) fn no_skip_preamble(mut self) -> Self {
+        self.skip_preamble = false;
+        self
+    }
+
+    /// Reject orphaned `@@ ` hunk headers after parsed hunks,
+    /// matching `git apply` behavior.
+    pub(crate) fn reject_orphaned_hunks(mut self) -> Self {
+        self.reject_orphaned_hunks = true;
+        self
+    }
+}
+
 struct Parser<'a, T: Text + ?Sized> {
     lines: std::iter::Peekable<LineIter<'a, T>>,
     offset: usize,
@@ -53,78 +90,66 @@ impl<'a, T: Text + ?Sized> Parser<'a, T> {
     }
 }
 
+// TODO: make a better API for lib consumers
+//
+// Too many different variants of `parse*` functions here.
+// And that also propogate to `Patch::from_{str,bytes}{,_strict}`.
+
 pub fn parse(input: &str) -> Result<Patch<'_, str>> {
-    let (result, _consumed) = parse_one(input);
+    let (result, _consumed) = parse_one(input, ParseOpts::default());
+    result
+}
+
+pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
+    let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks());
+    result
+}
+
+pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
+    let (result, _consumed) = parse_one(input, ParseOpts::default());
+    result
+}
+
+pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
+    let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks());
     result
 }
 
 /// Parses one patch from input.
 ///
 /// Always returns consumed bytes alongside the result
 /// so callers can advance past the parsed or partially parsed content.
-pub(crate) fn parse_one(input: &str) -> (Result<Patch<'_, str>>, usize) {
+pub(crate) fn parse_one<T: Text + ?Sized>(
+    input: &T,
+    opts: ParseOpts,
+) -> (Result<Patch<'_, T>>, usize) {
     let mut parser = Parser::new(input);
 
-    let header = match patch_header(&mut parser) {
+    let header = match patch_header(&mut parser, &opts) {
         Ok(h) => h,
         Err(e) => return (Err(e), parser.offset()),
     };
     let hunks = match hunks(&mut parser) {
         Ok(h) => h,
         Err(e) => return (Err(e), parser.offset()),
     };
-
-    let patch = Patch::new(
-        header.0.map(convert_cow_to_str),
-        header.1.map(convert_cow_to_str),
-        hunks,
-    );
-    (Ok(patch), parser.offset())
-}
-
-pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
-    let mut parser = Parser::new(input);
-    let header = patch_header(&mut parser)?;
-    let hunks = hunks(&mut parser)?;
-    reject_orphaned_hunk_headers(&mut parser)?;
-
-    Ok(Patch::new(
-        header.0.map(convert_cow_to_str),
-        header.1.map(convert_cow_to_str),
-        hunks,
-    ))
-}
-
-pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
-    let mut parser = Parser::new(input);
-    let header = patch_header(&mut parser)?;
-    let hunks = hunks(&mut parser)?;
-
-    Ok(Patch::new(header.0, header.1, hunks))
-}
-
-pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
-    let mut parser = Parser::new(input);
-    let header = patch_header(&mut parser)?;
-    let hunks = hunks(&mut parser)?;
-    reject_orphaned_hunk_headers(&mut parser)?;
-
-    Ok(Patch::new(header.0, header.1, hunks))
-}
-
-// This is only used when the type originated as a utf8 string
-fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> {
-    match cow {
-        Cow::Borrowed(b) => std::str::from_utf8(b).unwrap().into(),
-        Cow::Owned(o) => String::from_utf8(o).unwrap().into(),
+    if opts.reject_orphaned_hunks {
+        if let Err(e) = reject_orphaned_hunk_headers(&mut parser) {
+            return (Err(e), parser.offset());
+        }
     }
+
+    (Ok(Patch::new(header.0, header.1, hunks)), parser.offset())
 }
 
 #[allow(clippy::type_complexity)]
-fn patch_header<'a, T: Text + ToOwned + ?Sized>(
+fn patch_header<'a, T: Text + ?Sized>(
     parser: &mut Parser<'a, T>,
-) -> Result<(Option<Cow<'a, [u8]>>, Option<Cow<'a, [u8]>>)> {
-    skip_header_preamble(parser)?;
+    opts: &ParseOpts,
+) -> Result<(Option<Cow<'a, T>>, Option<Cow<'a, T>>)> {
+    if opts.skip_preamble {
+        skip_header_preamble(parser)?;
+    }
 
     let mut filename1 = None;
     let mut filename2 = None;
@@ -161,10 +186,7 @@ fn skip_header_preamble<T: Text + ?Sized>(parser: &mut Parser<'_, T>) -> Result<
     Ok(())
 }
 
-fn parse_filename<'a, T: Text + ToOwned + ?Sized>(
-    prefix: &str,
-    line: &'a T,
-) -> Result<Cow<'a, [u8]>> {
+fn parse_filename<'a, T: Text + ?Sized>(prefix: &str, line: &'a T) -> Result<Cow<'a, T>> {
     let line = line
         .strip_prefix(prefix)
         .ok_or(ParsePatchErrorKind::InvalidFilename)?;

diff --git a/src/patch/tests.rs b/src/patch/tests.rs
@@ -618,6 +618,24 @@ fn plain_filename_roundtrip() {
     assert_eq!(p.modified(), p2.modified());
 }
 
+// Octal escape \377 decodes to 0xFF, which is not valid UTF-8.
+// When parsing into `Patch<'_, str>`, this returns a parse error
+// instead of panicking.
+#[test]
+fn non_utf8_escaped_filename_returns_error_on_str_parse() {
+    let s = r#"\
+--- "a/foo\377"
++++ "b/foo\377"
+@@ -1 +1 @@
+-x
++y
+"#;
+    assert_eq!(
+        parse(s).unwrap_err().kind,
+        ParsePatchErrorKind::InvalidUtf8Path,
+    );
+}
+
 mod error_display {
     use crate::patch::error::ParsePatchErrorKind;
     use crate::Patch;

diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs
@@ -83,7 +83,8 @@ impl<'a> PatchSet<'a> {
 
         let patch_input = &remaining[patch_start..];
 
-        let (result, consumed) = parse_one(patch_input);
+        let opts = crate::patch::parse::ParseOpts::default();
+        let (result, consumed) = parse_one(patch_input, opts);
         // Always advance so the iterator makes progress even on error.
         let abs_patch_start = self.offset + patch_start;
         self.offset += patch_start + consumed;

diff --git a/src/utils.rs b/src/utils.rs
@@ -132,7 +132,7 @@ impl<'a, T: Text + ?Sized> Iterator for LineIter<'a, T> {
 
 /// A helper trait for processing text like `str` and `[u8]`
 /// Useful for abstracting over those types for parsing as well as breaking input into lines
-pub trait Text: Eq + Hash {
+pub trait Text: Eq + Hash + ToOwned {
     fn is_empty(&self) -> bool;
     fn len(&self) -> usize;
     fn starts_with(&self, prefix: &str) -> bool;
@@ -148,6 +148,12 @@ pub trait Text: Eq + Hash {
     #[allow(unused)]
     fn lines(&self) -> LineIter<'_, Self>;
 
+    /// Converts raw bytes into `Self::Owned`.
+    ///
+    /// Returns `None` if the bytes are not valid for this type
+    /// (e.g. non-UTF-8 bytes for `str`).
+    fn owned_from_bytes(bytes: Vec<u8>) -> Option<Self::Owned>;
+
     fn parse<T: std::str::FromStr>(&self) -> Option<T> {
         self.as_str().and_then(|s| s.parse().ok())
     }
@@ -158,6 +164,10 @@ impl Text for str {
         self.is_empty()
     }
 
+    fn owned_from_bytes(bytes: Vec<u8>) -> Option<String> {
+        String::from_utf8(bytes).ok()
+    }
+
     fn len(&self) -> usize {
         self.len()
     }
@@ -209,6 +219,10 @@ impl Text for [u8] {
         self.is_empty()
     }
 
+    fn owned_from_bytes(bytes: Vec<u8>) -> Option<Vec<u8>> {
+        Some(bytes)
+    }
+
     fn len(&self) -> usize {
         self.len()
     }
@@ -292,27 +306,29 @@ fn find_byte(haystack: &[u8], byte: u8) -> Option<usize> {
 ///
 /// See [`byte_needs_quoting`] for the set of characters that
 /// require quoting.
-pub(crate) fn escaped_filename<T: Text + ToOwned + ?Sized>(
+pub(crate) fn escaped_filename<T: Text + ?Sized>(
     filename: &T,
-) -> Result<Cow<'_, [u8]>, ParsePatchError> {
+) -> Result<Cow<'_, T>, ParsePatchError> {
     if let Some(inner) = filename
         .strip_prefix("\"")
         .and_then(|s| s.strip_suffix("\""))
     {
-        decode_escaped(inner)
+        match decode_escaped(inner.as_bytes())? {
+            None => Ok(Cow::Borrowed(inner)),
+            Some(bytes) => T::owned_from_bytes(bytes)
+                .map(Cow::Owned)
+                .ok_or_else(|| ParsePatchErrorKind::InvalidUtf8Path.into()),
+        }
     } else {
         let bytes = filename.as_bytes();
         if bytes.iter().any(|b| byte_needs_quoting(*b)) {
             return Err(ParsePatchErrorKind::InvalidCharInUnquotedFilename.into());
         }
-        Ok(bytes.into())
+        Ok(Cow::Borrowed(filename))
     }
 }
 
-fn decode_escaped<T: Text + ToOwned + ?Sized>(
-    escaped: &T,
-) -> Result<Cow<'_, [u8]>, ParsePatchError> {
-    let bytes = escaped.as_bytes();
+fn decode_escaped(bytes: &[u8]) -> Result<Option<Vec<u8>>, ParsePatchError> {
     let mut result = Vec::new();
     let mut i = 0;
     let mut last_copy = 0;
@@ -365,8 +381,8 @@ fn decode_escaped<T: Text + ToOwned + ?Sized>(
 
     if needs_allocation {
         result.extend_from_slice(&bytes[last_copy..]);
-        Ok(Cow::Owned(result))
+        Ok(Some(result))
     } else {
-        Ok(Cow::Borrowed(bytes))
+        Ok(None)
     }
 }