From debcd91d5eb0eee8fb3acb091dd6a512ffadc336 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Fri, 15 May 2026 16:40:12 +0200
Subject: [PATCH 01/16] add windows and mac github runners

---
 .../install-build-dependencies/action.yml     | 20 ++++++++++++++
 .../actions/install-rust-toolchain/action.yml | 11 ++++++++
 .github/workflows/unit-tests.yml              | 26 +++++++++++--------
 3 files changed, 46 insertions(+), 11 deletions(-)
 create mode 100644 .github/actions/install-build-dependencies/action.yml
 create mode 100644 .github/actions/install-rust-toolchain/action.yml

diff --git a/.github/actions/install-build-dependencies/action.yml b/.github/actions/install-build-dependencies/action.yml
new file mode 100644
index 00000000..dd5bd6d4
--- /dev/null
+++ b/.github/actions/install-build-dependencies/action.yml
@@ -0,0 +1,20 @@
+name: install-build-dependencies
+description: Install OS-specific system packages needed to build llama-cpp-bindings (CMake, libclang, GNU make).
+
+runs:
+  using: composite
+  steps:
+    - name: install linux build dependencies
+      if: runner.os == 'Linux'
+      shell: bash
+      run: sudo apt-get update && sudo apt-get install -y cmake libclang-dev
+
+    - name: install windows build dependencies
+      if: runner.os == 'Windows'
+      shell: bash
+      run: choco install -y make
+
+    - name: set windows libclang path
+      if: runner.os == 'Windows'
+      shell: bash
+      run: echo "LIBCLANG_PATH=C:\\Program Files\\LLVM\\bin" >> $GITHUB_ENV
diff --git a/.github/actions/install-rust-toolchain/action.yml b/.github/actions/install-rust-toolchain/action.yml
new file mode 100644
index 00000000..124e13b4
--- /dev/null
+++ b/.github/actions/install-rust-toolchain/action.yml
@@ -0,0 +1,11 @@
+name: install-rust-toolchain
+description: Install the pinned stable Rust toolchain (with rustfmt and clippy) and configure the cargo build cache.
+
+runs:
+  using: composite
+  steps:
+    - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable
+      with:
+        components: rustfmt, clippy
+
+    - uses: Swatinem/rust-cache@v2
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 9d241530..de8b3dfb 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -9,7 +9,7 @@ env:
   CARGO_TERM_COLOR: always
 
 jobs:
-  fmt:
+  formatting:
     name: formatting
     runs-on: ubuntu-latest
     steps:
@@ -17,25 +17,29 @@ jobs:
         with:
           submodules: recursive
 
-      - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable
-
-      - uses: Swatinem/rust-cache@v2
+      - uses: ./.github/actions/install-rust-toolchain
 
       - run: make fmt.check
 
   test:
-    name: tests
-    runs-on: ubuntu-latest
+    name: tests (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+    defaults:
+      run:
+        shell: bash
+    env:
+      LLAMA_DISABLE_CCACHE: '1'
     steps:
       - uses: actions/checkout@v4
         with:
           submodules: recursive
 
-      - name: install system dependencies
-        run: sudo apt-get update && sudo apt-get install -y cmake libclang-dev
-
-      - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable
+      - uses: ./.github/actions/install-build-dependencies
 
-      - uses: Swatinem/rust-cache@v2
+      - uses: ./.github/actions/install-rust-toolchain
 
       - run: make test.unit

From 07b4062ca2ba92f20eb1d5c9a4a45d85fe1c9f67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Fri, 15 May 2026 17:55:17 +0200
Subject: [PATCH 02/16] fix windows compile errors from bindgen type mismatch
 on MSVC

---
 llama-cpp-bindings/src/gguf_type.rs         |  5 ++++-
 llama-cpp-bindings/src/llama_token_attrs.rs | 11 +++++------
 llama-cpp-bindings/src/send_logs_to_log.rs  | 12 +++++++++++-
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/llama-cpp-bindings/src/gguf_type.rs b/llama-cpp-bindings/src/gguf_type.rs
index c1060782..33de25cd 100644
--- a/llama-cpp-bindings/src/gguf_type.rs
+++ b/llama-cpp-bindings/src/gguf_type.rs
@@ -85,7 +85,10 @@ mod tests {
     #[test]
     fn from_raw_returns_none_for_unknown() {
         assert_eq!(GgufType::from_raw(99), None);
-        assert_eq!(GgufType::from_raw(u32::MAX), None);
+        assert_eq!(
+            GgufType::from_raw(llama_cpp_bindings_sys::gguf_type::MAX),
+            None,
+        );
     }
 
     #[test]
diff --git a/llama-cpp-bindings/src/llama_token_attrs.rs b/llama-cpp-bindings/src/llama_token_attrs.rs
index 37d46651..fbc9be85 100644
--- a/llama-cpp-bindings/src/llama_token_attrs.rs
+++ b/llama-cpp-bindings/src/llama_token_attrs.rs
@@ -55,7 +55,7 @@ mod tests {
 
     #[test]
     fn try_from_zero_produces_empty_flags() {
-        let attrs = LlamaTokenAttrs::try_from(0u32);
+        let attrs = LlamaTokenAttrs::try_from(0);
 
         assert!(attrs.is_ok());
         assert!(attrs.expect("valid attribute").is_empty());
@@ -63,14 +63,13 @@ mod tests {
 
     #[test]
     fn try_from_invalid_bits_returns_error() {
-        let invalid_value = 0xFFFF_FFFFu32;
-        let result = LlamaTokenAttrs::try_from(invalid_value);
+        let result = LlamaTokenAttrs::try_from(!0);
 
         assert!(result.is_err());
-        matches!(
+        assert!(matches!(
             result.expect_err("should fail"),
-            LlamaTokenAttrsFromIntError::UnknownValue(_)
-        );
+            LlamaTokenAttrsFromIntError::UnknownValue(_),
+        ));
     }
 
     #[test]
diff --git a/llama-cpp-bindings/src/send_logs_to_log.rs b/llama-cpp-bindings/src/send_logs_to_log.rs
index 6bd8fbb7..4fa50e91 100644
--- a/llama-cpp-bindings/src/send_logs_to_log.rs
+++ b/llama-cpp-bindings/src/send_logs_to_log.rs
@@ -33,6 +33,16 @@ impl LogSource {
 static LLAMA_SOURCE: OnceLock<LogSource> = OnceLock::new();
 static GGML_SOURCE: OnceLock<LogSource> = OnceLock::new();
 
+#[cfg(target_env = "msvc")]
+const fn ggml_level_to_u32(level: llama_cpp_bindings_sys::ggml_log_level) -> u32 {
+    level.cast_unsigned()
+}
+
+#[cfg(not(target_env = "msvc"))]
+const fn ggml_level_to_u32(level: llama_cpp_bindings_sys::ggml_log_level) -> u32 {
+    level
+}
+
 const fn ggml_level_to_incoming(raw: llama_cpp_bindings_sys::ggml_log_level) -> IncomingLogLevel {
     match raw {
         llama_cpp_bindings_sys::GGML_LOG_LEVEL_NONE => IncomingLogLevel::None,
@@ -41,7 +51,7 @@ const fn ggml_level_to_incoming(raw: llama_cpp_bindings_sys::ggml_log_level) ->
         llama_cpp_bindings_sys::GGML_LOG_LEVEL_WARN => IncomingLogLevel::Warn,
         llama_cpp_bindings_sys::GGML_LOG_LEVEL_ERROR => IncomingLogLevel::Error,
         llama_cpp_bindings_sys::GGML_LOG_LEVEL_CONT => IncomingLogLevel::Cont,
-        other => IncomingLogLevel::Unknown(other),
+        other => IncomingLogLevel::Unknown(ggml_level_to_u32(other)),
     }
 }
 

From b22817662a6f429ce2362e6dfe1293ad414e8b07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Fri, 15 May 2026 17:56:28 +0200
Subject: [PATCH 03/16] add 30 minute timeouts to unit tests workflow

---
 .github/workflows/unit-tests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index de8b3dfb..d04408d6 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -12,6 +12,7 @@ jobs:
   formatting:
     name: formatting
     runs-on: ubuntu-latest
+    timeout-minutes: 30
     steps:
       - uses: actions/checkout@v4
         with:
@@ -24,6 +25,7 @@ jobs:
   test:
     name: tests (${{ matrix.os }})
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:

From 50b39a34500e8e3d0353bcc30fb2192496d8f622 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Fri, 15 May 2026 18:34:26 +0200
Subject: [PATCH 04/16] fix another windows compile error from bindgen type
 mismatch on MSVC

---
 llama-cpp-bindings/src/llama_token_attrs.rs | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/llama-cpp-bindings/src/llama_token_attrs.rs b/llama-cpp-bindings/src/llama_token_attrs.rs
index fbc9be85..688d228f 100644
--- a/llama-cpp-bindings/src/llama_token_attrs.rs
+++ b/llama-cpp-bindings/src/llama_token_attrs.rs
@@ -5,6 +5,16 @@ use enumflags2::BitFlags;
 use crate::llama_token_attr::LlamaTokenAttr;
 use crate::llama_token_attrs_from_int_error::LlamaTokenAttrsFromIntError;
 
+#[cfg(target_env = "msvc")]
+const fn llama_token_type_to_u32(value: llama_cpp_bindings_sys::llama_token_type) -> u32 {
+    value.cast_unsigned()
+}
+
+#[cfg(not(target_env = "msvc"))]
+const fn llama_token_type_to_u32(value: llama_cpp_bindings_sys::llama_token_type) -> u32 {
+    value
+}
+
 /// A set of [`LlamaTokenAttr`] flags.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct LlamaTokenAttrs(pub BitFlags<LlamaTokenAttr>);
@@ -27,11 +37,11 @@ impl TryFrom<llama_cpp_bindings_sys::llama_token_type> for LlamaTokenAttrs {
     type Error = LlamaTokenAttrsFromIntError;
 
     fn try_from(value: llama_cpp_bindings_sys::llama_vocab_type) -> Result<Self, Self::Error> {
-        Ok(Self(BitFlags::from_bits(value as _).map_err(
-            |bit_flag_error| {
+        Ok(Self(
+            BitFlags::from_bits(llama_token_type_to_u32(value)).map_err(|bit_flag_error| {
                 LlamaTokenAttrsFromIntError::UnknownValue(bit_flag_error.invalid_bits())
-            },
-        )?))
+            })?,
+        ))
     }
 }
 

From 256638f6b54c52622e4ea99fc7649219decb38e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Fri, 15 May 2026 18:58:21 +0200
Subject: [PATCH 05/16] enable rust backtraces in CI tests

---
 .github/workflows/unit-tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index d04408d6..95deb1c8 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -35,6 +35,7 @@ jobs:
         shell: bash
     env:
       LLAMA_DISABLE_CCACHE: '1'
+      RUST_BACKTRACE: '1'
     steps:
       - uses: actions/checkout@v4
         with:

From 88e5c52266d96e5f35ba3ef8ddf27e76c324cce2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 12:24:21 +0200
Subject: [PATCH 06/16] fix windows test crashes from missing backend init and
 /EHsc

---
 llama-cpp-bindings-build/src/cpp_wrapper.rs      | 1 +
 llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs | 1 +
 llama-cpp-bindings/src/ggml_time_us.rs           | 5 +++++
 3 files changed, 7 insertions(+)

diff --git a/llama-cpp-bindings-build/src/cpp_wrapper.rs b/llama-cpp-bindings-build/src/cpp_wrapper.rs
index 722c7e41..fdd8ab37 100644
--- a/llama-cpp-bindings-build/src/cpp_wrapper.rs
+++ b/llama-cpp-bindings-build/src/cpp_wrapper.rs
@@ -33,6 +33,7 @@ pub fn compile_cpp_wrappers(llama_src: &Path, target_os: &TargetOs) {
 
     if target_os.is_msvc() {
         build.flag("/std:c++17");
+        build.flag("/EHsc");
     }
 
     if target_os.is_android() && cfg!(feature = "static-stdcxx") {
diff --git a/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs b/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs
index 7af3f01a..92f561e6 100644
--- a/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs
+++ b/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs
@@ -23,6 +23,7 @@ pub fn compile_mtmd(llama_src: &Path, target_os: &TargetOs) {
 
     if target_os.is_msvc() {
         build.flag("/std:c++17");
+        build.flag("/EHsc");
     }
 
     if target_os.is_android() && cfg!(feature = "static-stdcxx") {
diff --git a/llama-cpp-bindings/src/ggml_time_us.rs b/llama-cpp-bindings/src/ggml_time_us.rs
index 06f61dfc..4db4b490 100644
--- a/llama-cpp-bindings/src/ggml_time_us.rs
+++ b/llama-cpp-bindings/src/ggml_time_us.rs
@@ -22,10 +22,15 @@ pub fn ggml_time_us() -> i64 {
 
 #[cfg(test)]
 mod tests {
+    use serial_test::serial;
+
     use super::ggml_time_us;
+    use crate::llama_backend::LlamaBackend;
 
     #[test]
+    #[serial]
     fn returns_positive_value() {
+        let _backend = LlamaBackend::init().unwrap();
         let time_microseconds = ggml_time_us();
 
         assert!(time_microseconds > 0);

From a5b958b303d6508e67d8ce89660bd83fd765291e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 15:31:58 +0200
Subject: [PATCH 07/16] bump vendored llama.cpp to b9174

---
 llama-cpp-bindings-sys/llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama-cpp-bindings-sys/llama.cpp b/llama-cpp-bindings-sys/llama.cpp
index 846262d7..59778f01 160000
--- a/llama-cpp-bindings-sys/llama.cpp
+++ b/llama-cpp-bindings-sys/llama.cpp
@@ -1 +1 @@
-Subproject commit 846262d7875dcabf502a150fa3d7b9c770dde7eb
+Subproject commit 59778f0196a82db32580bb649d5d839355d6d7bf

From ee871026ca87f8cc1ad00e876f7662c5e353ef90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 15:39:05 +0200
Subject: [PATCH 08/16] wrap mtmd and fit ffi entry points to surface c++
 exceptions as typed rust errors

---
 llama-cpp-bindings-sys/wrapper_fit.cpp        |  46 ++-
 llama-cpp-bindings-sys/wrapper_fit.h          |  19 +-
 llama-cpp-bindings-sys/wrapper_mtmd.cpp       | 273 ++++++++++++++++
 llama-cpp-bindings-sys/wrapper_mtmd.h         | 107 +++++++
 llama-cpp-bindings/src/error/fit_error.rs     |  24 +-
 llama-cpp-bindings/src/model/params.rs        |  47 ++-
 llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs    |  52 +++-
 .../src/mtmd/mtmd_bitmap_error.rs             |  25 +-
 llama-cpp-bindings/src/mtmd/mtmd_context.rs   | 294 ++++++++++++++----
 .../src/mtmd/mtmd_encode_error.rs             |  14 +-
 .../src/mtmd/mtmd_eval_error.rs               |  23 +-
 .../src/mtmd/mtmd_init_error.rs               |  23 +-
 .../src/mtmd/mtmd_input_chunk.rs              |  43 ++-
 .../src/mtmd/mtmd_input_chunks.rs             |  13 +-
 .../src/mtmd/mtmd_tokenize_error.rs           |  34 +-
 15 files changed, 879 insertions(+), 158 deletions(-)
 create mode 100644 llama-cpp-bindings-sys/wrapper_mtmd.cpp

diff --git a/llama-cpp-bindings-sys/wrapper_fit.cpp b/llama-cpp-bindings-sys/wrapper_fit.cpp
index 1ec7d169..02eee839 100644
--- a/llama-cpp-bindings-sys/wrapper_fit.cpp
+++ b/llama-cpp-bindings-sys/wrapper_fit.cpp
@@ -1,10 +1,12 @@
 #include "wrapper_fit.h"
+#include "wrapper_utils.h"
 
 #include <exception>
+#include <new>
 
 #include "llama.cpp/common/fit.h"
 
-extern "C" llama_rs_fit_status llama_rs_fit_params(
+extern "C" llama_rs_fit_params_status llama_rs_fit_params(
     const char * path_model,
     struct llama_model_params * mparams,
     struct llama_context_params * cparams,
@@ -12,21 +14,49 @@ extern "C" llama_rs_fit_status llama_rs_fit_params(
     struct llama_model_tensor_buft_override * tensor_buft_overrides,
     size_t * margins,
     uint32_t n_ctx_min,
-    enum ggml_log_level log_level) {
+    enum ggml_log_level log_level,
+    int32_t * out_unrecognized_status_code,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_unrecognized_status_code) {
+        *out_unrecognized_status_code = 0;
+    }
+
     try {
         const common_params_fit_status status = common_fit_params(
             path_model, mparams, cparams, tensor_split, tensor_buft_overrides,
             margins, n_ctx_min, log_level);
         switch (status) {
             case COMMON_PARAMS_FIT_STATUS_SUCCESS:
-                return LLAMA_RS_FIT_STATUS_SUCCESS;
+                return LLAMA_RS_FIT_PARAMS_OK;
             case COMMON_PARAMS_FIT_STATUS_FAILURE:
-                return LLAMA_RS_FIT_STATUS_FAILURE;
+                return LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE;
             case COMMON_PARAMS_FIT_STATUS_ERROR:
-                return LLAMA_RS_FIT_STATUS_ERROR;
+                return LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR;
+        }
+        if (out_unrecognized_status_code) {
+            *out_unrecognized_status_code = static_cast<int32_t>(status);
+        }
+        return LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED;
+            }
         }
-        return LLAMA_RS_FIT_STATUS_ERROR;
-    } catch (const std::exception &) {
-        return LLAMA_RS_FIT_STATUS_ERROR;
+        return LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
diff --git a/llama-cpp-bindings-sys/wrapper_fit.h b/llama-cpp-bindings-sys/wrapper_fit.h
index c00a2620..9a6ac6ef 100644
--- a/llama-cpp-bindings-sys/wrapper_fit.h
+++ b/llama-cpp-bindings-sys/wrapper_fit.h
@@ -10,13 +10,16 @@
 extern "C" {
 #endif
 
-typedef enum llama_rs_fit_status {
-    LLAMA_RS_FIT_STATUS_SUCCESS = 0,
-    LLAMA_RS_FIT_STATUS_FAILURE = 1,
-    LLAMA_RS_FIT_STATUS_ERROR   = 2,
-} llama_rs_fit_status;
+typedef enum llama_rs_fit_params_status {
+    LLAMA_RS_FIT_PARAMS_OK = 0,
+    LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE,
+    LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR,
+    LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE,
+    LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_fit_params_status;
 
-llama_rs_fit_status llama_rs_fit_params(
+llama_rs_fit_params_status llama_rs_fit_params(
     const char * path_model,
     struct llama_model_params * mparams,
     struct llama_context_params * cparams,
@@ -24,7 +27,9 @@ llama_rs_fit_status llama_rs_fit_params(
     struct llama_model_tensor_buft_override * tensor_buft_overrides,
     size_t * margins,
     uint32_t n_ctx_min,
-    enum ggml_log_level log_level);
+    enum ggml_log_level log_level,
+    int32_t * out_unrecognized_status_code,
+    char ** out_error);
 
 #ifdef __cplusplus
 }
diff --git a/llama-cpp-bindings-sys/wrapper_mtmd.cpp b/llama-cpp-bindings-sys/wrapper_mtmd.cpp
new file mode 100644
index 00000000..bff5b958
--- /dev/null
+++ b/llama-cpp-bindings-sys/wrapper_mtmd.cpp
@@ -0,0 +1,273 @@
+#include "wrapper_mtmd.h"
+#include "wrapper_utils.h"
+
+#include <exception>
+#include <new>
+#include <string>
+
+extern "C" llama_rs_mtmd_init_from_file_status llama_rs_mtmd_init_from_file(
+    const char * mmproj_path,
+    const struct llama_model * text_model,
+    struct mtmd_context_params ctx_params,
+    struct mtmd_context ** out_ctx,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!out_ctx) {
+        return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_OUT_CTX_ARG;
+    }
+    *out_ctx = nullptr;
+    if (!mmproj_path) {
+        return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG;
+    }
+    if (!text_model) {
+        return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_TEXT_MODEL_ARG;
+    }
+
+    try {
+        struct mtmd_context * ctx = mtmd_init_from_file(mmproj_path, text_model, ctx_params);
+        if (!ctx) {
+            return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL;
+        }
+        *out_ctx = ctx;
+        return LLAMA_RS_MTMD_INIT_FROM_FILE_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION;
+    }
+}
+
+extern "C" llama_rs_mtmd_bitmap_init_from_file_status llama_rs_mtmd_bitmap_init_from_file(
+    struct mtmd_context * ctx,
+    const char * fname,
+    struct mtmd_bitmap ** out_bitmap,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!out_bitmap) {
+        return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_OUT_BITMAP_ARG;
+    }
+    *out_bitmap = nullptr;
+    if (!ctx) {
+        return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_CTX_ARG;
+    }
+    if (!fname) {
+        return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_FNAME_ARG;
+    }
+
+    try {
+        struct mtmd_bitmap * bitmap = mtmd_helper_bitmap_init_from_file(ctx, fname);
+        if (!bitmap) {
+            return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL;
+        }
+        *out_bitmap = bitmap;
+        return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION;
+    }
+}
+
+extern "C" llama_rs_mtmd_tokenize_status llama_rs_mtmd_tokenize(
+    struct mtmd_context * ctx,
+    struct mtmd_input_chunks * output,
+    const struct mtmd_input_text * text,
+    const struct mtmd_bitmap ** bitmaps,
+    size_t num_bitmaps,
+    int32_t * out_undocumented_return_code,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_undocumented_return_code) {
+        *out_undocumented_return_code = 0;
+    }
+    if (!ctx) {
+        return LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG;
+    }
+    if (!output) {
+        return LLAMA_RS_MTMD_TOKENIZE_NULL_OUTPUT_ARG;
+    }
+    if (!text) {
+        return LLAMA_RS_MTMD_TOKENIZE_NULL_TEXT_ARG;
+    }
+    if (num_bitmaps > 0 && !bitmaps) {
+        return LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO;
+    }
+
+    try {
+        int32_t result = mtmd_tokenize(ctx, output, text, bitmaps, num_bitmaps);
+        switch (result) {
+            case 0:
+                return LLAMA_RS_MTMD_TOKENIZE_OK;
+            case 1:
+                return LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT;
+            case 2:
+                return LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR;
+            default:
+                if (out_undocumented_return_code) {
+                    *out_undocumented_return_code = result;
+                }
+                return LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE;
+        }
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION;
+    }
+}
+
+extern "C" llama_rs_mtmd_encode_chunk_status llama_rs_mtmd_encode_chunk(
+    struct mtmd_context * ctx,
+    const struct mtmd_input_chunk * chunk,
+    int32_t * out_vendored_return_code,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_vendored_return_code) {
+        *out_vendored_return_code = 0;
+    }
+    if (!ctx) {
+        return LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG;
+    }
+    if (!chunk) {
+        return LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG;
+    }
+
+    try {
+        int32_t result = mtmd_encode_chunk(ctx, chunk);
+        if (result != 0) {
+            if (out_vendored_return_code) {
+                *out_vendored_return_code = result;
+            }
+            return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE;
+        }
+        return LLAMA_RS_MTMD_ENCODE_CHUNK_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION;
+    }
+}
+
+extern "C" llama_rs_mtmd_eval_chunk_single_status llama_rs_mtmd_eval_chunk_single(
+    struct mtmd_context * ctx,
+    struct llama_context * lctx,
+    const struct mtmd_input_chunk * chunk,
+    llama_pos n_past,
+    llama_seq_id seq_id,
+    int32_t n_batch,
+    bool logits_last,
+    llama_pos * out_new_n_past,
+    int32_t * out_vendored_return_code,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_vendored_return_code) {
+        *out_vendored_return_code = 0;
+    }
+    if (!ctx) {
+        return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_MTMD_CTX_ARG;
+    }
+    if (!lctx) {
+        return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_LLAMA_CTX_ARG;
+    }
+    if (!chunk) {
+        return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_CHUNK_ARG;
+    }
+    if (!out_new_n_past) {
+        return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_OUT_NEW_N_PAST_ARG;
+    }
+
+    try {
+        int32_t result = mtmd_helper_eval_chunk_single(
+            ctx, lctx, chunk, n_past, seq_id, n_batch, logits_last, out_new_n_past);
+        if (result != 0) {
+            if (out_vendored_return_code) {
+                *out_vendored_return_code = result;
+            }
+            return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE;
+        }
+        return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION;
+    }
+}
diff --git a/llama-cpp-bindings-sys/wrapper_mtmd.h b/llama-cpp-bindings-sys/wrapper_mtmd.h
index 72fb2111..ac2f5314 100644
--- a/llama-cpp-bindings-sys/wrapper_mtmd.h
+++ b/llama-cpp-bindings-sys/wrapper_mtmd.h
@@ -1,2 +1,109 @@
+#pragma once
+
+#include "llama.cpp/include/llama.h"
 #include "llama.cpp/tools/mtmd/mtmd.h"
 #include "llama.cpp/tools/mtmd/mtmd-helper.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum llama_rs_mtmd_init_from_file_status {
+    LLAMA_RS_MTMD_INIT_FROM_FILE_OK = 0,
+    LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG,
+    LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_TEXT_MODEL_ARG,
+    LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_OUT_CTX_ARG,
+    LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL,
+    LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_mtmd_init_from_file_status;
+
+llama_rs_mtmd_init_from_file_status llama_rs_mtmd_init_from_file(
+    const char * mmproj_path,
+    const struct llama_model * text_model,
+    struct mtmd_context_params ctx_params,
+    struct mtmd_context ** out_ctx,
+    char ** out_error);
+
+typedef enum llama_rs_mtmd_bitmap_init_from_file_status {
+    LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_OK = 0,
+    LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_CTX_ARG,
+    LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_FNAME_ARG,
+    LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_OUT_BITMAP_ARG,
+    LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL,
+    LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_mtmd_bitmap_init_from_file_status;
+
+llama_rs_mtmd_bitmap_init_from_file_status llama_rs_mtmd_bitmap_init_from_file(
+    struct mtmd_context * ctx,
+    const char * fname,
+    struct mtmd_bitmap ** out_bitmap,
+    char ** out_error);
+
+typedef enum llama_rs_mtmd_tokenize_status {
+    LLAMA_RS_MTMD_TOKENIZE_OK = 0,
+    LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG,
+    LLAMA_RS_MTMD_TOKENIZE_NULL_OUTPUT_ARG,
+    LLAMA_RS_MTMD_TOKENIZE_NULL_TEXT_ARG,
+    LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO,
+    LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT,
+    LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR,
+    LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE,
+    LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_mtmd_tokenize_status;
+
+llama_rs_mtmd_tokenize_status llama_rs_mtmd_tokenize(
+    struct mtmd_context * ctx,
+    struct mtmd_input_chunks * output,
+    const struct mtmd_input_text * text,
+    const struct mtmd_bitmap ** bitmaps,
+    size_t num_bitmaps,
+    int32_t * out_undocumented_return_code,
+    char ** out_error);
+
+typedef enum llama_rs_mtmd_encode_chunk_status {
+    LLAMA_RS_MTMD_ENCODE_CHUNK_OK = 0,
+    LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG,
+    LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG,
+    LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE,
+    LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_mtmd_encode_chunk_status;
+
+llama_rs_mtmd_encode_chunk_status llama_rs_mtmd_encode_chunk(
+    struct mtmd_context * ctx,
+    const struct mtmd_input_chunk * chunk,
+    int32_t * out_vendored_return_code,
+    char ** out_error);
+
+typedef enum llama_rs_mtmd_eval_chunk_single_status {
+    LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_OK = 0,
+    LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_MTMD_CTX_ARG,
+    LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_LLAMA_CTX_ARG,
+    LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_CHUNK_ARG,
+    LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_OUT_NEW_N_PAST_ARG,
+    LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE,
+    LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_mtmd_eval_chunk_single_status;
+
+llama_rs_mtmd_eval_chunk_single_status llama_rs_mtmd_eval_chunk_single(
+    struct mtmd_context * ctx,
+    struct llama_context * lctx,
+    const struct mtmd_input_chunk * chunk,
+    llama_pos n_past,
+    llama_seq_id seq_id,
+    int32_t n_batch,
+    bool logits_last,
+    llama_pos * out_new_n_past,
+    int32_t * out_vendored_return_code,
+    char ** out_error);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/llama-cpp-bindings/src/error/fit_error.rs b/llama-cpp-bindings/src/error/fit_error.rs
index 7585530d..9b2fbc5e 100644
--- a/llama-cpp-bindings/src/error/fit_error.rs
+++ b/llama-cpp-bindings/src/error/fit_error.rs
@@ -1,11 +1,19 @@
 /// Returned by [`crate::model::params::LlamaModelParams::fit_params`].
-#[derive(Debug, Clone, Copy, Eq, PartialEq, thiserror::Error)]
+#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
 pub enum FitError {
-    /// Could not find allocations that fit available memory.
-    #[error("could not find allocations that fit available memory")]
-    Failure,
-    /// A hard error occurred during fitting (e.g. model not found at the specified path,
-    /// or the C++ wrapper threw an exception).
-    #[error("hard error during parameter fitting")]
-    Error,
+    /// Vendored `common_fit_params` reported FAILURE: no allocation that fits available memory was found.
+    #[error("common_fit_params reported FAILURE: no allocations that fit available memory")]
+    VendoredReportedFailure,
+    /// Vendored `common_fit_params` reported ERROR: a hard error occurred during fitting (e.g. model file not found).
+    #[error("common_fit_params reported ERROR: hard error during parameter fitting")]
+    VendoredReportedError,
+    /// Vendored `common_fit_params` returned a status code the wrapper does not recognise.
+    #[error("common_fit_params returned an unrecognised status code: {code}")]
+    VendoredReturnedUnrecognizedStatusCode { code: i32 },
+    /// Wrapper failed to duplicate the C++ exception message into a Rust-owned string.
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    /// Vendored `common_fit_params` threw a C++ exception caught at the wrapper boundary.
+    #[error("common_fit_params threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/model/params.rs b/llama-cpp-bindings/src/model/params.rs
index ebd7edd7..446f9334 100644
--- a/llama-cpp-bindings/src/model/params.rs
+++ b/llama-cpp-bindings/src/model/params.rs
@@ -421,9 +421,7 @@ impl LlamaModelParams {
     ///
     /// # Errors
     ///
-    /// Returns [`FitError::Failure`] if no fitting allocation could be found, or
-    /// [`FitError::Error`] on a hard error (e.g. the model file could not be read or the C++
-    /// implementation threw an exception).
+    /// Returns one of the [`FitError`] variants matching the vendored wrapper's status code.
     pub fn fit_params(
         mut self: Pin<&mut Self>,
         model_path: &CStr,
@@ -450,6 +448,9 @@ impl LlamaModelParams {
         self.params.tensor_split = null::<f32>();
         self.params.tensor_buft_overrides = null();
 
+        let mut out_unrecognized_status_code: i32 = 0;
+        let mut out_error: *mut c_char = std::ptr::null_mut();
+
         let status = unsafe {
             llama_cpp_bindings_sys::llama_rs_fit_params(
                 model_path.as_ptr(),
@@ -460,13 +461,38 @@ impl LlamaModelParams {
                 margins.as_mut_ptr(),
                 n_ctx_min,
                 log_level,
+                &raw mut out_unrecognized_status_code,
+                &raw mut out_error,
             )
         };
 
         match status {
-            llama_cpp_bindings_sys::LLAMA_RS_FIT_STATUS_SUCCESS => {}
-            llama_cpp_bindings_sys::LLAMA_RS_FIT_STATUS_FAILURE => return Err(FitError::Failure),
-            _ => return Err(FitError::Error),
+            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_OK => {}
+            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE => {
+                return Err(FitError::VendoredReportedFailure);
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR => {
+                return Err(FitError::VendoredReportedError);
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE => {
+                return Err(FitError::VendoredReturnedUnrecognizedStatusCode {
+                    code: out_unrecognized_status_code,
+                });
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED => {
+                return Err(FitError::ErrorStringAllocationFailed);
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe {
+                    crate::ffi_error_reader::read_and_free_cpp_error(out_error)
+                };
+                return Err(FitError::VendoredThrewCxxException { message });
+            }
+            other => {
+                unreachable!(
+                    "llama_rs_fit_params returned unrecognized wrapper status: {other}"
+                );
+            }
         }
 
         self.params.tensor_split = self.tensor_split.as_ptr();
@@ -829,6 +855,13 @@ mod tests {
             llama_cpp_bindings_sys::GGML_LOG_LEVEL_NONE,
         );
 
-        assert_eq!(result, Err(FitError::Error));
+        assert!(
+            matches!(
+                result,
+                Err(FitError::VendoredReportedError)
+                    | Err(FitError::VendoredThrewCxxException { .. })
+            ),
+            "expected VendoredReportedError or VendoredThrewCxxException, got {result:?}"
+        );
     }
 }
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs b/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs
index 08e2ce6c..b3c6b59f 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs
@@ -1,7 +1,10 @@
 use std::ffi::{CStr, CString, c_char};
+use std::path::PathBuf;
 use std::ptr::NonNull;
 use std::slice;
 
+use crate::ffi_error_reader::read_and_free_cpp_error;
+
 use super::mtmd_bitmap_error::MtmdBitmapError;
 use super::mtmd_context::MtmdContext;
 
@@ -104,20 +107,55 @@ impl MtmdBitmap {
     ///
     /// # Errors
     ///
-    /// * `CStringError` - Path contains null bytes
-    /// * `NullResult` - File could not be loaded or processed
+    /// Returns an [`MtmdBitmapError`] variant matching the wrapper's status code.
     pub fn from_file(ctx: &MtmdContext, path: &str) -> Result<Self, MtmdBitmapError> {
         let path_cstr = CString::new(path)?;
-        let bitmap = unsafe {
-            llama_cpp_bindings_sys::mtmd_helper_bitmap_init_from_file(
+        let mut out_bitmap: *mut llama_cpp_bindings_sys::mtmd_bitmap = std::ptr::null_mut();
+        let mut out_error: *mut c_char = std::ptr::null_mut();
+
+        let status = unsafe {
+            llama_cpp_bindings_sys::llama_rs_mtmd_bitmap_init_from_file(
                 ctx.context.as_ptr(),
                 path_cstr.as_ptr(),
+                &raw mut out_bitmap,
+                &raw mut out_error,
             )
         };
 
-        let bitmap = NonNull::new(bitmap).ok_or(MtmdBitmapError::NullResult)?;
-
-        Ok(Self { bitmap })
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_OK => {
+                let bitmap = NonNull::new(out_bitmap).ok_or_else(|| {
+                    MtmdBitmapError::VendoredReturnedNull {
+                        path: PathBuf::from(path),
+                    }
+                })?;
+                Ok(Self { bitmap })
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_CTX_ARG => {
+                Err(MtmdBitmapError::NullCtxArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_FNAME_ARG => {
+                Err(MtmdBitmapError::NullFnameArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_OUT_BITMAP_ARG => {
+                Err(MtmdBitmapError::NullOutBitmapArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL => {
+                Err(MtmdBitmapError::VendoredReturnedNull {
+                    path: PathBuf::from(path),
+                })
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(MtmdBitmapError::ErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { read_and_free_cpp_error(out_error) };
+                Err(MtmdBitmapError::VendoredThrewCxxException { message })
+            }
+            other => unreachable!(
+                "llama_rs_mtmd_bitmap_init_from_file returned unrecognized status: {other}"
+            ),
+        }
     }
 
     /// Create a bitmap from a buffer containing file data.
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs
index c0ad849c..b866e787 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs
@@ -1,16 +1,27 @@
-/// Errors that can occur when working with MTMD bitmaps
+use std::path::PathBuf;
+
 #[derive(thiserror::Error, Debug)]
 pub enum MtmdBitmapError {
-    /// Failed to create `CString` from input
-    #[error("Failed to create CString: {0}")]
+    #[error("Failed to create CString from bitmap-source path: {0}")]
     CStringError(#[from] std::ffi::NulError),
-    /// Invalid data size for bitmap
+    #[error("Bitmap-source path is not valid UTF-8: {0:?}")]
+    PathToStrError(PathBuf),
     #[error("Invalid data size for bitmap")]
     InvalidDataSize,
-    /// Image dimensions too small for processing (minimum 2x2)
     #[error("Image dimensions too small: {0}x{1} (minimum 2x2)")]
     ImageDimensionsTooSmall(u32, u32),
-    /// Bitmap creation returned null
-    #[error("Bitmap creation returned null")]
+    #[error("mtmd_bitmap_init / mtmd_bitmap_init_from_audio returned null")]
     NullResult,
+    #[error("Internal wrapper invariant violated: caller did not pass an out-bitmap pointer")]
+    NullOutBitmapArg,
+    #[error("Wrapper received a null mtmd-context argument")]
+    NullCtxArg,
+    #[error("Wrapper received a null bitmap-source-path argument")]
+    NullFnameArg,
+    #[error("mtmd_helper_bitmap_init_from_file returned null without throwing for path: {path:?}")]
+    VendoredReturnedNull { path: PathBuf },
+    #[error("Wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("mtmd_helper_bitmap_init_from_file threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_context.rs b/llama-cpp-bindings/src/mtmd/mtmd_context.rs
index 4445a6ad..d8952401 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_context.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_context.rs
@@ -1,6 +1,8 @@
 use std::ffi::CString;
+use std::ffi::c_char;
 use std::ptr::NonNull;
 
+use crate::ffi_error_reader::read_and_free_cpp_error;
 use crate::model::LlamaModel;
 
 use super::mtmd_bitmap::MtmdBitmap;
@@ -12,19 +14,73 @@ use super::mtmd_input_chunks::MtmdInputChunks;
 use super::mtmd_input_text::MtmdInputText;
 use super::mtmd_tokenize_error::MtmdTokenizeError;
 
-const fn tokenize_result_to_error(result: i32) -> MtmdTokenizeError {
-    match result {
-        1 => MtmdTokenizeError::BitmapCountMismatch,
-        2 => MtmdTokenizeError::ImagePreprocessingError,
-        _ => MtmdTokenizeError::UnknownError(result),
+fn map_tokenize_status(
+    status: llama_cpp_bindings_sys::llama_rs_mtmd_tokenize_status,
+    undocumented_return_code: i32,
+    out_error: *mut c_char,
+) -> Result<(), MtmdTokenizeError> {
+    match status {
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_OK => Ok(()),
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG => {
+            Err(MtmdTokenizeError::NullCtxArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_OUTPUT_ARG => {
+            Err(MtmdTokenizeError::NullOutputArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_TEXT_ARG => {
+            Err(MtmdTokenizeError::NullTextArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO => {
+            Err(MtmdTokenizeError::NullBitmapsArgWhenNumBitmapsNonzero)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT => {
+            Err(MtmdTokenizeError::BitmapCountDoesNotMatchMarkerCount)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR => {
+            Err(MtmdTokenizeError::ImagePreprocessingError)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE => {
+            Err(MtmdTokenizeError::VendoredReturnedUndocumentedNonzeroCode {
+                code: undocumented_return_code,
+            })
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED => {
+            Err(MtmdTokenizeError::ErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION => {
+            let message = unsafe { read_and_free_cpp_error(out_error) };
+            Err(MtmdTokenizeError::VendoredThrewCxxException { message })
+        }
+        other => unreachable!("llama_rs_mtmd_tokenize returned unrecognized status: {other}"),
     }
 }
 
-const fn check_encode_result(result: i32) -> Result<(), MtmdEncodeError> {
-    if result == 0 {
-        Ok(())
-    } else {
-        Err(MtmdEncodeError::EncodeFailure(result))
+fn map_encode_chunk_status(
+    status: llama_cpp_bindings_sys::llama_rs_mtmd_encode_chunk_status,
+    vendored_return_code: i32,
+    out_error: *mut c_char,
+) -> Result<(), MtmdEncodeError> {
+    match status {
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_OK => Ok(()),
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG => {
+            Err(MtmdEncodeError::NullCtxArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG => {
+            Err(MtmdEncodeError::NullChunkArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE => {
+            Err(MtmdEncodeError::VendoredReturnedNonzeroCode {
+                code: vendored_return_code,
+            })
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED => {
+            Err(MtmdEncodeError::ErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION => {
+            let message = unsafe { read_and_free_cpp_error(out_error) };
+            Err(MtmdEncodeError::VendoredThrewCxxException { message })
+        }
+        other => unreachable!("llama_rs_mtmd_encode_chunk returned unrecognized status: {other}"),
     }
 }
 
@@ -46,9 +102,7 @@ impl MtmdContext {
     ///
     /// # Errors
     ///
-    /// This function will return an error if:
-    /// - The path cannot be converted to a C string
-    /// - The underlying C function returns null (indicating initialization failure)
+    /// Returns an [`MtmdInitError`] variant matching the wrapper's status code.
     pub fn init_from_file(
         mmproj_path: &str,
         text_model: &LlamaModel,
@@ -57,17 +111,53 @@ impl MtmdContext {
         let path_cstr = CString::new(mmproj_path)?;
         let ctx_params = llama_cpp_bindings_sys::mtmd_context_params::from(params);
 
-        let context = unsafe {
-            llama_cpp_bindings_sys::mtmd_init_from_file(
+        let mut out_ctx: *mut llama_cpp_bindings_sys::mtmd_context = std::ptr::null_mut();
+        let mut out_error: *mut c_char = std::ptr::null_mut();
+
+        let status = unsafe {
+            llama_cpp_bindings_sys::llama_rs_mtmd_init_from_file(
                 path_cstr.as_ptr(),
                 text_model.model.as_ptr(),
                 ctx_params,
+                &raw mut out_ctx,
+                &raw mut out_error,
             )
         };
 
-        let context = NonNull::new(context).ok_or(MtmdInitError::NullResult)?;
-
-        Ok(Self { context })
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_OK => {
+                let context = NonNull::new(out_ctx).ok_or_else(|| {
+                    MtmdInitError::VendoredReturnedNull {
+                        path: std::path::PathBuf::from(mmproj_path),
+                    }
+                })?;
+                Ok(Self { context })
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG => {
+                Err(MtmdInitError::NullMmprojPathArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_TEXT_MODEL_ARG => {
+                Err(MtmdInitError::NullTextModelArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_OUT_CTX_ARG => {
+                Err(MtmdInitError::NullOutCtxArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL => {
+                Err(MtmdInitError::VendoredReturnedNull {
+                    path: std::path::PathBuf::from(mmproj_path),
+                })
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(MtmdInitError::ErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { read_and_free_cpp_error(out_error) };
+                Err(MtmdInitError::VendoredThrewCxxException { message })
+            }
+            other => unreachable!(
+                "llama_rs_mtmd_init_from_file returned unrecognized status: {other}"
+            ),
+        }
     }
 
     /// Check whether non-causal attention mask is needed before `llama_decode`
@@ -117,24 +207,7 @@ impl MtmdContext {
     ///
     /// # Errors
     ///
-    /// * `BitmapCountMismatch` - Number of bitmaps doesn't match number of markers
-    /// * `ImagePreprocessingError` - Error occurred during image preprocessing
-    /// * `UnknownError` - Other tokenization error occurred
-    ///
-    /// # Example
-    ///
-    /// ```no_run
-    /// # use llama_cpp_bindings::mtmd::*;
-    /// # fn example(ctx: &MtmdContext, bitmap: &MtmdBitmap) -> Result<(), Box<dyn std::error::Error>> {
-    /// let text = MtmdInputText {
-    ///     text: "Here is an image: <__media__>\nDescribe it.".to_string(),
-    ///     add_special: true,
-    ///     parse_special: true,
-    /// };
-    /// let chunks = ctx.tokenize(text, &[bitmap])?;
-    /// # Ok(())
-    /// # }
-    /// ```
+    /// Returns an [`MtmdTokenizeError`] variant matching the wrapper's status code.
     pub fn tokenize(
         &self,
         text: MtmdInputText,
@@ -153,34 +226,44 @@ impl MtmdContext {
             .map(|bitmap| bitmap.bitmap.as_ptr().cast_const())
             .collect();
 
-        let result = unsafe {
-            llama_cpp_bindings_sys::mtmd_tokenize(
+        let mut out_undocumented_return_code: i32 = 0;
+        let mut out_error: *mut c_char = std::ptr::null_mut();
+
+        let status = unsafe {
+            llama_cpp_bindings_sys::llama_rs_mtmd_tokenize(
                 self.context.as_ptr(),
                 chunks.chunks.as_ptr(),
                 &raw const input_text,
                 bitmap_ptrs.as_ptr().cast_mut(),
                 bitmaps.len(),
+                &raw mut out_undocumented_return_code,
+                &raw mut out_error,
             )
         };
 
-        if result == 0 {
-            Ok(chunks)
-        } else {
-            Err(tokenize_result_to_error(result))
-        }
+        map_tokenize_status(status, out_undocumented_return_code, out_error)?;
+        Ok(chunks)
     }
 
     /// Encode a chunk for image/audio processing.
     ///
     /// # Errors
     ///
-    /// Returns `MtmdEncodeError::EncodeFailure` if encoding fails.
+    /// Returns an [`MtmdEncodeError`] variant matching the wrapper's status code.
     pub fn encode_chunk(&self, chunk: &MtmdInputChunk) -> Result<(), MtmdEncodeError> {
-        let result = unsafe {
-            llama_cpp_bindings_sys::mtmd_encode_chunk(self.context.as_ptr(), chunk.chunk.as_ptr())
+        let mut out_vendored_return_code: i32 = 0;
+        let mut out_error: *mut c_char = std::ptr::null_mut();
+
+        let status = unsafe {
+            llama_cpp_bindings_sys::llama_rs_mtmd_encode_chunk(
+                self.context.as_ptr(),
+                chunk.chunk.as_ptr(),
+                &raw mut out_vendored_return_code,
+                &raw mut out_error,
+            )
         };
 
-        check_encode_result(result)
+        map_encode_chunk_status(status, out_vendored_return_code, out_error)
     }
 }
 
@@ -192,44 +275,119 @@ impl Drop for MtmdContext {
 
 #[cfg(test)]
 mod unit_tests {
-    use super::check_encode_result;
-    use super::tokenize_result_to_error;
+    use super::map_encode_chunk_status;
+    use super::map_tokenize_status;
+    use crate::mtmd::mtmd_encode_error::MtmdEncodeError;
+    use crate::mtmd::mtmd_tokenize_error::MtmdTokenizeError;
 
     #[test]
-    fn tokenize_result_bitmap_count_mismatch() {
-        let error = tokenize_result_to_error(1);
+    fn tokenize_status_maps_bitmap_count_mismatch() {
+        let result = map_tokenize_status(
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT,
+            0,
+            std::ptr::null_mut(),
+        );
 
-        assert!(error.to_string().contains("does not match"));
+        assert!(matches!(
+            result,
+            Err(MtmdTokenizeError::BitmapCountDoesNotMatchMarkerCount)
+        ));
     }
 
     #[test]
-    fn tokenize_result_image_preprocessing_error() {
-        let error = tokenize_result_to_error(2);
+    fn tokenize_status_maps_image_preprocessing_error() {
+        let result = map_tokenize_status(
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR,
+            0,
+            std::ptr::null_mut(),
+        );
 
-        assert!(error.to_string().contains("Image preprocessing"));
+        assert!(matches!(
+            result,
+            Err(MtmdTokenizeError::ImagePreprocessingError)
+        ));
     }
 
     #[test]
-    fn tokenize_result_unknown_error() {
-        let error = tokenize_result_to_error(42);
+    fn tokenize_status_maps_undocumented_nonzero_code_with_value() {
+        let result = map_tokenize_status(
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE,
+            42,
+            std::ptr::null_mut(),
+        );
 
-        assert!(error.to_string().contains("Unknown error: 42"));
+        assert!(matches!(
+            result,
+            Err(MtmdTokenizeError::VendoredReturnedUndocumentedNonzeroCode { code: 42 })
+        ));
     }
 
     #[test]
-    fn check_encode_result_ok_for_zero() {
-        assert!(check_encode_result(0).is_ok());
+    fn tokenize_status_maps_ok_to_unit() {
+        let result = map_tokenize_status(
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_OK,
+            0,
+            std::ptr::null_mut(),
+        );
+
+        assert!(matches!(result, Ok(())));
     }
 
     #[test]
-    fn check_encode_result_error_for_nonzero() {
-        let result = check_encode_result(5);
-
-        assert!(
-            result
-                .unwrap_err()
-                .to_string()
-                .contains("Encode failed with code: 5")
+    fn tokenize_status_maps_null_ctx_arg() {
+        let result = map_tokenize_status(
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG,
+            0,
+            std::ptr::null_mut(),
         );
+
+        assert!(matches!(result, Err(MtmdTokenizeError::NullCtxArg)));
+    }
+
+    #[test]
+    fn encode_chunk_status_maps_ok_to_unit() {
+        let result = map_encode_chunk_status(
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_OK,
+            0,
+            std::ptr::null_mut(),
+        );
+
+        assert!(matches!(result, Ok(())));
+    }
+
+    #[test]
+    fn encode_chunk_status_maps_nonzero_code_with_value() {
+        let result = map_encode_chunk_status(
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE,
+            5,
+            std::ptr::null_mut(),
+        );
+
+        assert!(matches!(
+            result,
+            Err(MtmdEncodeError::VendoredReturnedNonzeroCode { code: 5 })
+        ));
+    }
+
+    #[test]
+    fn encode_chunk_status_maps_null_ctx_arg() {
+        let result = map_encode_chunk_status(
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG,
+            0,
+            std::ptr::null_mut(),
+        );
+
+        assert!(matches!(result, Err(MtmdEncodeError::NullCtxArg)));
+    }
+
+    #[test]
+    fn encode_chunk_status_maps_null_chunk_arg() {
+        let result = map_encode_chunk_status(
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG,
+            0,
+            std::ptr::null_mut(),
+        );
+
+        assert!(matches!(result, Err(MtmdEncodeError::NullChunkArg)));
     }
 }
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs
index fabd3311..47106390 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs
@@ -1,7 +1,13 @@
-/// Errors that can occur during encoding
 #[derive(thiserror::Error, Debug)]
 pub enum MtmdEncodeError {
-    /// Encode operation failed
-    #[error("Encode failed with code: {0}")]
-    EncodeFailure(i32),
+    #[error("Wrapper received a null mtmd-context argument")]
+    NullCtxArg,
+    #[error("Wrapper received a null chunk argument")]
+    NullChunkArg,
+    #[error("mtmd_encode_chunk returned nonzero code: {code}")]
+    VendoredReturnedNonzeroCode { code: i32 },
+    #[error("Wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("mtmd_encode_chunk threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs
index c4efa643..40431fc0 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs
@@ -1,25 +1,30 @@
 use crate::mtmd::image_chunk_batch_size_mismatch::ImageChunkBatchSizeMismatch;
 
-/// Errors that can occur during evaluation
 #[derive(thiserror::Error, Debug)]
 pub enum MtmdEvalError {
-    /// Requested batch size exceeds the context's maximum batch size
     #[error("batch size {requested} exceeds context batch size {context_max}")]
     BatchSizeExceedsContextLimit {
-        /// The batch size requested in `eval_chunks`
         requested: i32,
-        /// The maximum batch size configured on the context
         context_max: u32,
     },
-    /// An image chunk's token count exceeds the per-decode `n_batch` budget,
-    /// so handing it to `llama_decode` would trip the `GGML_ASSERT`.
     #[error(
         "image chunk has {} tokens but n_batch is {}",
         .0.image_tokens,
         .0.n_batch,
     )]
     ImageChunkExceedsBatchSize(ImageChunkBatchSizeMismatch),
-    /// Evaluation operation failed
-    #[error("Eval failed with code: {0}")]
-    EvalFailure(i32),
+    #[error("Wrapper received a null mtmd-context argument")]
+    NullMtmdCtxArg,
+    #[error("Wrapper received a null llama-context argument")]
+    NullLlamaCtxArg,
+    #[error("Wrapper received a null chunk argument")]
+    NullChunkArg,
+    #[error("Internal wrapper invariant violated: caller did not pass an out-new-n-past pointer")]
+    NullOutNewNPastArg,
+    #[error("mtmd_helper_eval_chunk_single returned nonzero code: {code}")]
+    VendoredReturnedNonzeroCode { code: i32 },
+    #[error("Wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("mtmd_helper_eval_chunk_single threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs
index 755d6a55..ec18fca9 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs
@@ -1,10 +1,21 @@
-/// Errors that can occur when initializing MTMD context
+use std::path::PathBuf;
+
 #[derive(thiserror::Error, Debug)]
 pub enum MtmdInitError {
-    /// Failed to create `CString` from input
-    #[error("Failed to create CString: {0}")]
+    #[error("Failed to create CString from mmproj path: {0}")]
     CStringError(#[from] std::ffi::NulError),
-    /// MTMD context initialization returned null
-    #[error("MTMD context initialization returned null")]
-    NullResult,
+    #[error("Mmproj path is not valid UTF-8: {0:?}")]
+    PathToStrError(PathBuf),
+    #[error("Internal wrapper invariant violated: caller did not pass an out-ctx pointer")]
+    NullOutCtxArg,
+    #[error("Wrapper received a null mmproj-path argument")]
+    NullMmprojPathArg,
+    #[error("Wrapper received a null text-model argument")]
+    NullTextModelArg,
+    #[error("mtmd_init_from_file returned null without throwing for mmproj path: {path:?}")]
+    VendoredReturnedNull { path: PathBuf },
+    #[error("Wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("mtmd_init_from_file threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs b/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs
index 50643547..e4f7a80d 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs
@@ -1,8 +1,10 @@
 use std::ffi::CStr;
+use std::ffi::c_char;
 use std::ptr::NonNull;
 use std::slice;
 
 use crate::context::LlamaContext;
+use crate::ffi_error_reader::read_and_free_cpp_error;
 use crate::token::LlamaToken;
 
 use super::image_chunk_batch_size_mismatch::ImageChunkBatchSizeMismatch;
@@ -162,9 +164,11 @@ impl MtmdInputChunk {
         }
 
         let mut final_position: llama_cpp_bindings_sys::llama_pos = start_position;
+        let mut out_vendored_return_code: i32 = 0;
+        let mut out_error: *mut c_char = std::ptr::null_mut();
 
-        let result = unsafe {
-            llama_cpp_bindings_sys::mtmd_helper_eval_chunk_single(
+        let status = unsafe {
+            llama_cpp_bindings_sys::llama_rs_mtmd_eval_chunk_single(
                 mtmd_ctx.context.as_ptr(),
                 llama_ctx.context.as_ptr(),
                 self.chunk.as_ptr(),
@@ -173,13 +177,40 @@ impl MtmdInputChunk {
                 n_batch,
                 logits_last,
                 &raw mut final_position,
+                &raw mut out_vendored_return_code,
+                &raw mut out_error,
             )
         };
 
-        if result == 0 {
-            Ok(final_position)
-        } else {
-            Err(MtmdEvalError::EvalFailure(result))
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_OK => Ok(final_position),
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_MTMD_CTX_ARG => {
+                Err(MtmdEvalError::NullMtmdCtxArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_LLAMA_CTX_ARG => {
+                Err(MtmdEvalError::NullLlamaCtxArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_CHUNK_ARG => {
+                Err(MtmdEvalError::NullChunkArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_OUT_NEW_N_PAST_ARG => {
+                Err(MtmdEvalError::NullOutNewNPastArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE => {
+                Err(MtmdEvalError::VendoredReturnedNonzeroCode {
+                    code: out_vendored_return_code,
+                })
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(MtmdEvalError::ErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { read_and_free_cpp_error(out_error) };
+                Err(MtmdEvalError::VendoredThrewCxxException { message })
+            }
+            other => unreachable!(
+                "llama_rs_mtmd_eval_chunk_single returned unrecognized status: {other}"
+            ),
         }
     }
 }
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs b/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs
index a74eb296..9b2879a0 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs
@@ -11,7 +11,7 @@ const fn check_eval_result(result: i32) -> Result<(), MtmdEvalError> {
     if result == 0 {
         Ok(())
     } else {
-        Err(MtmdEvalError::EvalFailure(result))
+        Err(MtmdEvalError::VendoredReturnedNonzeroCode { code: result })
     }
 }
 
@@ -174,15 +174,14 @@ mod tests {
 
     #[test]
     fn check_eval_result_error_for_nonzero() {
+        use super::MtmdEvalError;
         use super::check_eval_result;
 
         let result = check_eval_result(7);
 
-        assert!(
-            result
-                .unwrap_err()
-                .to_string()
-                .contains("Eval failed with code: 7")
-        );
+        assert!(matches!(
+            result,
+            Err(MtmdEvalError::VendoredReturnedNonzeroCode { code: 7 })
+        ));
     }
 }
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs
index 8886bc19..da502243 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs
@@ -1,21 +1,27 @@
 use crate::mtmd::mtmd_input_chunks_error::MtmdInputChunksError;
 
-/// Errors that can occur during tokenization
 #[derive(thiserror::Error, Debug)]
 pub enum MtmdTokenizeError {
-    /// Number of bitmaps does not match number of markers in text
-    #[error("Number of bitmaps does not match number of markers")]
-    BitmapCountMismatch,
-    /// Image preprocessing error occurred
-    #[error("Image preprocessing error")]
-    ImagePreprocessingError,
-    /// Failed to create input chunks collection
+    #[error("Failed to create CString from input text: {0}")]
+    CStringError(#[from] std::ffi::NulError),
     #[error("{0}")]
     InputChunksError(#[from] MtmdInputChunksError),
-    /// Text contains characters that cannot be converted to C string
-    #[error("Failed to create CString from text: {0}")]
-    CStringError(#[from] std::ffi::NulError),
-    /// Unknown error occurred during tokenization
-    #[error("Unknown error: {0}")]
-    UnknownError(i32),
+    #[error("Wrapper received a null mtmd-context argument")]
+    NullCtxArg,
+    #[error("Wrapper received a null output-chunks argument")]
+    NullOutputArg,
+    #[error("Wrapper received a null input-text argument")]
+    NullTextArg,
+    #[error("Wrapper received a null bitmaps argument with num_bitmaps > 0")]
+    NullBitmapsArgWhenNumBitmapsNonzero,
+    #[error("mtmd_tokenize reported that the number of bitmaps does not match the number of markers in the text")]
+    BitmapCountDoesNotMatchMarkerCount,
+    #[error("mtmd_tokenize reported an image preprocessing error")]
+    ImagePreprocessingError,
+    #[error("mtmd_tokenize returned an undocumented nonzero code: {code}")]
+    VendoredReturnedUndocumentedNonzeroCode { code: i32 },
+    #[error("Wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("mtmd_tokenize threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }

From bd7ff5dde0d75f2406c34134c408cba609801559 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 15:55:32 +0200
Subject: [PATCH 09/16] wrap encode and memory seq ffi entry points to surface
 c++ exceptions as typed rust errors

---
 llama-cpp-bindings-sys/wrapper_common.cpp     | 176 +++++++++++++-----
 llama-cpp-bindings-sys/wrapper_common.h       |  44 ++++-
 .../tests/context_kv_cache.rs                 |  19 +-
 llama-cpp-bindings/src/context.rs             |  60 +++---
 llama-cpp-bindings/src/context/kv_cache.rs    |  84 ++++++---
 llama-cpp-bindings/src/error.rs               |   4 +
 llama-cpp-bindings/src/error/encode_error.rs  |  37 ++--
 .../src/error/kv_cache_seq_add_error.rs       |  19 ++
 .../src/error/kv_cache_seq_div_error.rs       |  19 ++
 llama-cpp-bindings/src/lib.rs                 |  11 +-
 llama-cpp-bindings/src/model/params.rs        |   3 +-
 11 files changed, 339 insertions(+), 137 deletions(-)
 create mode 100644 llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs
 create mode 100644 llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs

diff --git a/llama-cpp-bindings-sys/wrapper_common.cpp b/llama-cpp-bindings-sys/wrapper_common.cpp
index e195d5d7..45e47bae 100644
--- a/llama-cpp-bindings-sys/wrapper_common.cpp
+++ b/llama-cpp-bindings-sys/wrapper_common.cpp
@@ -171,80 +171,160 @@ extern "C" llama_pos llama_rs_memory_seq_pos_max(
     if (!ctx) {
         return -1;
     }
-    auto * mem = llama_get_memory(ctx);
-    if (!mem) {
-        return -1;
-    }
-    uint32_t n_seq_max = llama_n_seq_max(ctx);
-    if (seq_id < 0 || (uint32_t) seq_id >= n_seq_max) {
+    try {
+        auto * mem = llama_get_memory(ctx);
+        if (!mem) {
+            return -1;
+        }
+        uint32_t n_seq_max = llama_n_seq_max(ctx);
+        if (seq_id < 0 || (uint32_t) seq_id >= n_seq_max) {
+            return -1;
+        }
+
+        return llama_memory_seq_pos_max(mem, seq_id);
+    } catch (...) {
         return -1;
     }
-
-    return llama_memory_seq_pos_max(mem, seq_id);
 }
 
-extern "C" llama_rs_status llama_rs_encode(
+extern "C" llama_rs_encode_status llama_rs_encode(
     struct llama_context * ctx,
-    struct llama_batch batch) {
-    if (!ctx) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    struct llama_batch batch,
+    int32_t * out_vendored_return_code,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
     }
-    const auto * model = llama_get_model(ctx);
-    if (!llama_model_has_encoder(model)) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    if (out_vendored_return_code) {
+        *out_vendored_return_code = 0;
     }
-    int32_t result = llama_encode(ctx, batch);
-    if (result != 0) {
-        return LLAMA_RS_STATUS_EXCEPTION;
+    if (!ctx) {
+        return LLAMA_RS_ENCODE_NULL_CTX_ARG;
+    }
+    try {
+        const auto * model = llama_get_model(ctx);
+        if (!llama_model_has_encoder(model)) {
+            return LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER;
+        }
+        int32_t result = llama_encode(ctx, batch);
+        if (result != 0) {
+            if (out_vendored_return_code) {
+                *out_vendored_return_code = result;
+            }
+            return LLAMA_RS_ENCODE_VENDORED_RETURNED_NONZERO_CODE;
+        }
+        return LLAMA_RS_ENCODE_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION;
     }
-
-    return LLAMA_RS_STATUS_OK;
 }
 
-extern "C" llama_rs_status llama_rs_memory_seq_add(
+extern "C" llama_rs_memory_seq_add_status llama_rs_memory_seq_add(
     struct llama_context * ctx,
     llama_seq_id seq_id,
     llama_pos p0,
     llama_pos p1,
-    llama_pos shift) {
-    if (!ctx) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    llama_pos shift,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
     }
-    const auto * model = llama_get_model(ctx);
-    const auto rope = llama_model_rope_type(model);
-    if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    if (!ctx) {
+        return LLAMA_RS_MEMORY_SEQ_ADD_NULL_CTX_ARG;
     }
-    auto * mem = llama_get_memory(ctx);
-    if (!mem) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    try {
+        const auto * model = llama_get_model(ctx);
+        const auto rope = llama_model_rope_type(model);
+        if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) {
+            return LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE;
+        }
+        auto * mem = llama_get_memory(ctx);
+        if (!mem) {
+            return LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM;
+        }
+        llama_memory_seq_add(mem, seq_id, p0, p1, shift);
+        return LLAMA_RS_MEMORY_SEQ_ADD_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION;
     }
-    llama_memory_seq_add(mem, seq_id, p0, p1, shift);
-
-    return LLAMA_RS_STATUS_OK;
 }
 
-extern "C" llama_rs_status llama_rs_memory_seq_div(
+extern "C" llama_rs_memory_seq_div_status llama_rs_memory_seq_div(
     struct llama_context * ctx,
     llama_seq_id seq_id,
     llama_pos p0,
     llama_pos p1,
-    int d) {
-    if (!ctx) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    int d,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
     }
-    const auto * model = llama_get_model(ctx);
-    const auto rope = llama_model_rope_type(model);
-    if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    if (!ctx) {
+        return LLAMA_RS_MEMORY_SEQ_DIV_NULL_CTX_ARG;
     }
-    auto * mem = llama_get_memory(ctx);
-    if (!mem) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    try {
+        const auto * model = llama_get_model(ctx);
+        const auto rope = llama_model_rope_type(model);
+        if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) {
+            return LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE;
+        }
+        auto * mem = llama_get_memory(ctx);
+        if (!mem) {
+            return LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM;
+        }
+        llama_memory_seq_div(mem, seq_id, p0, p1, d);
+        return LLAMA_RS_MEMORY_SEQ_DIV_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION;
     }
-    llama_memory_seq_div(mem, seq_id, p0, p1, d);
-
-    return LLAMA_RS_STATUS_OK;
 }
 
 extern "C" llama_rs_status llama_rs_sampler_sample(
diff --git a/llama-cpp-bindings-sys/wrapper_common.h b/llama-cpp-bindings-sys/wrapper_common.h
index 3d990abf..61ba9d2f 100644
--- a/llama-cpp-bindings-sys/wrapper_common.h
+++ b/llama-cpp-bindings-sys/wrapper_common.h
@@ -4,6 +4,7 @@
 
 #include <stdbool.h>
 #include <stddef.h>
+#include <stdint.h>
 
 struct llama_model;
 struct llama_sampler;
@@ -65,23 +66,54 @@ llama_pos llama_rs_memory_seq_pos_max(
     struct llama_context * ctx,
     llama_seq_id seq_id);
 
-llama_rs_status llama_rs_encode(
+typedef enum llama_rs_encode_status {
+    LLAMA_RS_ENCODE_OK = 0,
+    LLAMA_RS_ENCODE_NULL_CTX_ARG,
+    LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER,
+    LLAMA_RS_ENCODE_VENDORED_RETURNED_NONZERO_CODE,
+    LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_encode_status;
+
+llama_rs_encode_status llama_rs_encode(
     struct llama_context * ctx,
-    struct llama_batch batch);
+    struct llama_batch batch,
+    int32_t * out_vendored_return_code,
+    char ** out_error);
+
+typedef enum llama_rs_memory_seq_add_status {
+    LLAMA_RS_MEMORY_SEQ_ADD_OK = 0,
+    LLAMA_RS_MEMORY_SEQ_ADD_NULL_CTX_ARG,
+    LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE,
+    LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM,
+    LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_memory_seq_add_status;
 
-llama_rs_status llama_rs_memory_seq_add(
+llama_rs_memory_seq_add_status llama_rs_memory_seq_add(
     struct llama_context * ctx,
     llama_seq_id seq_id,
     llama_pos p0,
     llama_pos p1,
-    llama_pos shift);
+    llama_pos shift,
+    char ** out_error);
+
+typedef enum llama_rs_memory_seq_div_status {
+    LLAMA_RS_MEMORY_SEQ_DIV_OK = 0,
+    LLAMA_RS_MEMORY_SEQ_DIV_NULL_CTX_ARG,
+    LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE,
+    LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM,
+    LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_memory_seq_div_status;
 
-llama_rs_status llama_rs_memory_seq_div(
+llama_rs_memory_seq_div_status llama_rs_memory_seq_div(
     struct llama_context * ctx,
     llama_seq_id seq_id,
     llama_pos p0,
     llama_pos p1,
-    int d);
+    int d,
+    char ** out_error);
 
 #ifdef __cplusplus
 }
diff --git a/llama-cpp-bindings-tests/tests/context_kv_cache.rs b/llama-cpp-bindings-tests/tests/context_kv_cache.rs
index 0095bff6..e8abb54b 100644
--- a/llama-cpp-bindings-tests/tests/context_kv_cache.rs
+++ b/llama-cpp-bindings-tests/tests/context_kv_cache.rs
@@ -4,6 +4,7 @@ use std::num::NonZeroU32;
 use anyhow::Result;
 use llama_cpp_bindings::context::LlamaContext;
 use llama_cpp_bindings::context::kv_cache::KvCacheConversionError;
+use llama_cpp_bindings::error::{KvCacheSeqAddError, KvCacheSeqDivError};
 use llama_cpp_bindings::context::params::LlamaContextParams;
 use llama_cpp_bindings::llama_batch::LlamaBatch;
 use llama_cpp_bindings::model::AddBos;
@@ -126,7 +127,10 @@ fn kv_cache_seq_add_returns_error_for_mrope_model() -> Result<()> {
 
     let result = context.kv_cache_seq_add(0, Some(0), None, 1);
 
-    assert!(result.is_err());
+    assert!(matches!(
+        result.unwrap_err(),
+        KvCacheSeqAddError::IncompatibleRopeType,
+    ));
 
     Ok(())
 }
@@ -149,7 +153,10 @@ fn kv_cache_seq_div_returns_error_for_mrope_model() -> Result<()> {
     let divisor = NonZeroU8::new(2).ok_or_else(|| anyhow::anyhow!("2 is non-zero"))?;
     let result = context.kv_cache_seq_div(0, Some(0), None, divisor);
 
-    assert!(result.is_err());
+    assert!(matches!(
+        result.unwrap_err(),
+        KvCacheSeqDivError::IncompatibleRopeType,
+    ));
 
     Ok(())
 }
@@ -363,7 +370,7 @@ fn kv_cache_seq_add_rejects_p0_exceeding_i32_max() -> Result<()> {
 
     assert!(matches!(
         result.unwrap_err(),
-        KvCacheConversionError::P0TooLarge(_),
+        KvCacheSeqAddError::P0TooLarge(_),
     ));
 
     Ok(())
@@ -382,7 +389,7 @@ fn kv_cache_seq_add_rejects_p1_exceeding_i32_max() -> Result<()> {
 
     assert!(matches!(
         result.unwrap_err(),
-        KvCacheConversionError::P1TooLarge(_),
+        KvCacheSeqAddError::P1TooLarge(_),
     ));
 
     Ok(())
@@ -402,7 +409,7 @@ fn kv_cache_seq_div_rejects_p0_exceeding_i32_max() -> Result<()> {
 
     assert!(matches!(
         result.unwrap_err(),
-        KvCacheConversionError::P0TooLarge(_),
+        KvCacheSeqDivError::P0TooLarge(_),
     ));
 
     Ok(())
@@ -422,7 +429,7 @@ fn kv_cache_seq_div_rejects_p1_exceeding_i32_max() -> Result<()> {
 
     assert!(matches!(
         result.unwrap_err(),
-        KvCacheConversionError::P1TooLarge(_),
+        KvCacheSeqDivError::P1TooLarge(_),
     ));
 
     Ok(())
diff --git a/llama-cpp-bindings/src/context.rs b/llama-cpp-bindings/src/context.rs
index 410ade82..61246cbc 100644
--- a/llama-cpp-bindings/src/context.rs
+++ b/llama-cpp-bindings/src/context.rs
@@ -226,34 +226,44 @@ impl<'model> LlamaContext<'model> {
     ///
     /// # Errors
     ///
-    /// - `EncodeError` if the decoding failed.
-    ///
-    /// # Panics
-    ///
-    /// - the returned [`std::ffi::c_int`] from llama-cpp does not fit into a i32 (this should never happen on most systems)
+    /// - `EncodeError` if the encoding failed.
     pub fn encode(&mut self, batch: &mut LlamaBatch) -> Result<(), EncodeError> {
+        let mut out_vendored_return_code: i32 = 0;
+        let mut out_error: *mut std::os::raw::c_char = std::ptr::null_mut();
         let status = unsafe {
-            llama_cpp_bindings_sys::llama_rs_encode(self.context.as_ptr(), batch.llama_batch)
+            llama_cpp_bindings_sys::llama_rs_encode(
+                self.context.as_ptr(),
+                batch.llama_batch,
+                &raw mut out_vendored_return_code,
+                &raw mut out_error,
+            )
         };
-
-        self.handle_encode_result(status, batch)
-    }
-
-    fn handle_encode_result(
-        &mut self,
-        status: llama_cpp_bindings_sys::llama_rs_status,
-        batch: &mut LlamaBatch,
-    ) -> Result<(), EncodeError> {
-        if crate::status_is_ok(status) {
-            self.initialized_logits
-                .clone_from(&batch.initialized_logits);
-
-            Ok(())
-        } else {
-            Err(EncodeError::from(
-                NonZeroI32::new(crate::status_to_i32(status))
-                    .unwrap_or(NonZeroI32::new(1).expect("1 is non-zero")),
-            ))
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_ENCODE_OK => {
+                self.initialized_logits
+                    .clone_from(&batch.initialized_logits);
+                Ok(())
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_ENCODE_NULL_CTX_ARG => Err(EncodeError::NullContextArg),
+            llama_cpp_bindings_sys::LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER => {
+                Err(EncodeError::ModelHasNoEncoder)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_ENCODE_VENDORED_RETURNED_NONZERO_CODE => {
+                let code = NonZeroI32::new(out_vendored_return_code).unwrap_or_else(|| {
+                    unreachable!(
+                        "llama_rs_encode reported a nonzero return code but the value was zero"
+                    )
+                });
+                Err(EncodeError::from(code))
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(EncodeError::ErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+                Err(EncodeError::VendoredThrewCxxException { message })
+            }
+            other => unreachable!("llama_rs_encode returned unrecognized status {other}"),
         }
     }
 
diff --git a/llama-cpp-bindings/src/context/kv_cache.rs b/llama-cpp-bindings/src/context/kv_cache.rs
index 4250fe94..9e4e340c 100644
--- a/llama-cpp-bindings/src/context/kv_cache.rs
+++ b/llama-cpp-bindings/src/context/kv_cache.rs
@@ -1,8 +1,13 @@
 //! utilities for working with the kv cache
 
-use crate::context::LlamaContext;
 use std::ffi::c_int;
 use std::num::{NonZeroU8, TryFromIntError};
+use std::os::raw::c_char;
+use std::ptr;
+
+use crate::context::LlamaContext;
+use crate::error::{KvCacheSeqAddError, KvCacheSeqDivError};
+use crate::ffi_error_reader::read_and_free_cpp_error;
 
 /// Errors that can occur when attempting to prepare values for the kv cache
 #[derive(Debug, Eq, PartialEq, thiserror::Error)]
@@ -16,9 +21,6 @@ pub enum KvCacheConversionError {
     /// Position 1 conversion to i32 failed
     #[error("Provided end position is too large for a i32")]
     P1TooLarge(#[source] TryFromIntError),
-    /// The operation is not supported by the current model/context configuration.
-    #[error("operation not supported by this model: {0}")]
-    UnsupportedOperation(String),
 }
 
 impl LlamaContext<'_> {
@@ -131,20 +133,21 @@ impl LlamaContext<'_> {
     /// * `delta` - The relative position to add to the tokens
     ///
     /// # Errors
-    /// If either position exceeds [`i32::MAX`].
+    /// If either position exceeds [`i32::MAX`], or the underlying memory operation reports a failure.
     pub fn kv_cache_seq_add(
         &mut self,
         seq_id: i32,
         p0: Option<u32>,
         p1: Option<u32>,
         delta: i32,
-    ) -> Result<(), KvCacheConversionError> {
+    ) -> Result<(), KvCacheSeqAddError> {
         let p0 = p0
             .map_or(Ok(-1), i32::try_from)
-            .map_err(KvCacheConversionError::P0TooLarge)?;
+            .map_err(KvCacheSeqAddError::P0TooLarge)?;
         let p1 = p1
             .map_or(Ok(-1), i32::try_from)
-            .map_err(KvCacheConversionError::P1TooLarge)?;
+            .map_err(KvCacheSeqAddError::P1TooLarge)?;
+        let mut out_error: *mut c_char = ptr::null_mut();
         let status = unsafe {
             llama_cpp_bindings_sys::llama_rs_memory_seq_add(
                 self.context.as_ptr(),
@@ -152,16 +155,28 @@ impl LlamaContext<'_> {
                 p0,
                 p1,
                 delta,
+                &raw mut out_error,
             )
         };
-
-        if crate::status_is_ok(status) {
-            Ok(())
-        } else {
-            Err(KvCacheConversionError::UnsupportedOperation(format!(
-                "kv_cache_seq_add failed (status {})",
-                crate::status_to_i32(status)
-            )))
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_OK => Ok(()),
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_NULL_CTX_ARG => {
+                Err(KvCacheSeqAddError::NullContextArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE => {
+                Err(KvCacheSeqAddError::IncompatibleRopeType)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM => {
+                Err(KvCacheSeqAddError::NullMem)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(KvCacheSeqAddError::ErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { read_and_free_cpp_error(out_error) };
+                Err(KvCacheSeqAddError::VendoredThrewCxxException { message })
+            }
+            other => unreachable!("llama_rs_memory_seq_add returned unrecognized status {other}"),
         }
     }
 
@@ -181,21 +196,22 @@ impl LlamaContext<'_> {
     /// * `d` - The factor to divide the positions by
     ///
     /// # Errors
-    /// If either position exceeds [`i32::MAX`].
+    /// If either position exceeds [`i32::MAX`], or the underlying memory operation reports a failure.
     pub fn kv_cache_seq_div(
         &mut self,
         seq_id: i32,
         p0: Option<u32>,
         p1: Option<u32>,
         d: NonZeroU8,
-    ) -> Result<(), KvCacheConversionError> {
+    ) -> Result<(), KvCacheSeqDivError> {
         let p0 = p0
             .map_or(Ok(-1), i32::try_from)
-            .map_err(KvCacheConversionError::P0TooLarge)?;
+            .map_err(KvCacheSeqDivError::P0TooLarge)?;
         let p1 = p1
             .map_or(Ok(-1), i32::try_from)
-            .map_err(KvCacheConversionError::P1TooLarge)?;
+            .map_err(KvCacheSeqDivError::P1TooLarge)?;
         let d = c_int::from(d.get());
+        let mut out_error: *mut c_char = ptr::null_mut();
         let status = unsafe {
             llama_cpp_bindings_sys::llama_rs_memory_seq_div(
                 self.context.as_ptr(),
@@ -203,16 +219,28 @@ impl LlamaContext<'_> {
                 p0,
                 p1,
                 d,
+                &raw mut out_error,
             )
         };
-
-        if crate::status_is_ok(status) {
-            Ok(())
-        } else {
-            Err(KvCacheConversionError::UnsupportedOperation(format!(
-                "kv_cache_seq_div failed (status {})",
-                crate::status_to_i32(status)
-            )))
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_OK => Ok(()),
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_NULL_CTX_ARG => {
+                Err(KvCacheSeqDivError::NullContextArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE => {
+                Err(KvCacheSeqDivError::IncompatibleRopeType)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM => {
+                Err(KvCacheSeqDivError::NullMem)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(KvCacheSeqDivError::ErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { read_and_free_cpp_error(out_error) };
+                Err(KvCacheSeqDivError::VendoredThrewCxxException { message })
+            }
+            other => unreachable!("llama_rs_memory_seq_div returned unrecognized status {other}"),
         }
     }
 
diff --git a/llama-cpp-bindings/src/error.rs b/llama-cpp-bindings/src/error.rs
index 2314452f..731a9b13 100644
--- a/llama-cpp-bindings/src/error.rs
+++ b/llama-cpp-bindings/src/error.rs
@@ -9,6 +9,8 @@ pub mod fit_error;
 pub mod grammar_error;
 pub mod json_object_failure;
 pub mod key_value_xml_tags_failure;
+pub mod kv_cache_seq_add_error;
+pub mod kv_cache_seq_div_error;
 pub mod llama_context_load_error;
 pub mod llama_cpp_error;
 pub mod llama_lora_adapter_init_error;
@@ -42,6 +44,8 @@ pub use fit_error::FitError;
 pub use grammar_error::GrammarError;
 pub use json_object_failure::JsonObjectFailure;
 pub use key_value_xml_tags_failure::KeyValueXmlTagsFailure;
+pub use kv_cache_seq_add_error::KvCacheSeqAddError;
+pub use kv_cache_seq_div_error::KvCacheSeqDivError;
 pub use llama_context_load_error::LlamaContextLoadError;
 pub use llama_cpp_error::LlamaCppError;
 pub use llama_lora_adapter_init_error::LlamaLoraAdapterInitError;
diff --git a/llama-cpp-bindings/src/error/encode_error.rs b/llama-cpp-bindings/src/error/encode_error.rs
index 33999d61..c5bc3dad 100644
--- a/llama-cpp-bindings/src/error/encode_error.rs
+++ b/llama-cpp-bindings/src/error/encode_error.rs
@@ -1,27 +1,30 @@
 use std::num::NonZeroI32;
 use std::os::raw::c_int;
 
-/// Failed to decode a batch.
 #[derive(Debug, Eq, PartialEq, thiserror::Error)]
 pub enum EncodeError {
-    /// No kv cache slot was available.
-    #[error("Encode Error 1: NoKvCacheSlot")]
+    #[error("llama_rs_encode called with null context")]
+    NullContextArg,
+    #[error("llama_rs_encode invoked on a model that has no encoder")]
+    ModelHasNoEncoder,
+    #[error("llama_encode returned non-zero code 1: no kv cache slot was available")]
     NoKvCacheSlot,
-    /// The number of tokens in the batch was 0.
-    #[error("Encode Error -1: n_tokens == 0")]
+    #[error("llama_encode returned non-zero code -1: n_tokens == 0")]
     NTokensZero,
-    /// An unknown error occurred.
-    #[error("Encode Error {0}: unknown")]
-    Unknown(c_int),
+    #[error("llama_encode returned unrecognized non-zero code: {code}")]
+    VendoredReturnedUnrecognizedNonzeroCode { code: c_int },
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("llama_encode threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
 
-/// Encode a error from llama.cpp into a [`EncodeError`].
 impl From<NonZeroI32> for EncodeError {
     fn from(value: NonZeroI32) -> Self {
         match value.get() {
             1 => Self::NoKvCacheSlot,
             -1 => Self::NTokensZero,
-            error_code => Self::Unknown(error_code),
+            error_code => Self::VendoredReturnedUnrecognizedNonzeroCode { code: error_code },
         }
     }
 }
@@ -33,26 +36,26 @@ mod tests {
     use super::EncodeError;
 
     #[test]
-    fn encode_error_no_kv_cache_slot() {
+    fn no_kv_cache_slot_maps_from_code_one() {
         let error = EncodeError::from(NonZeroI32::new(1).expect("1 is non-zero"));
 
         assert_eq!(error, EncodeError::NoKvCacheSlot);
-        assert_eq!(error.to_string(), "Encode Error 1: NoKvCacheSlot");
     }
 
     #[test]
-    fn encode_error_n_tokens_zero() {
+    fn n_tokens_zero_maps_from_code_negative_one() {
         let error = EncodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero"));
 
         assert_eq!(error, EncodeError::NTokensZero);
-        assert_eq!(error.to_string(), "Encode Error -1: n_tokens == 0");
     }
 
     #[test]
-    fn encode_error_unknown() {
+    fn unrecognized_code_falls_through_to_typed_variant() {
         let error = EncodeError::from(NonZeroI32::new(99).expect("99 is non-zero"));
 
-        assert_eq!(error, EncodeError::Unknown(99));
-        assert_eq!(error.to_string(), "Encode Error 99: unknown");
+        assert_eq!(
+            error,
+            EncodeError::VendoredReturnedUnrecognizedNonzeroCode { code: 99 }
+        );
     }
 }
diff --git a/llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs b/llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs
new file mode 100644
index 00000000..95171d5d
--- /dev/null
+++ b/llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs
@@ -0,0 +1,19 @@
+use std::num::TryFromIntError;
+
+#[derive(Debug, thiserror::Error)]
+pub enum KvCacheSeqAddError {
+    #[error("provided start position is too large for an i32")]
+    P0TooLarge(#[source] TryFromIntError),
+    #[error("provided end position is too large for an i32")]
+    P1TooLarge(#[source] TryFromIntError),
+    #[error("llama_rs_memory_seq_add called with null context")]
+    NullContextArg,
+    #[error("llama_rs_memory_seq_add invoked on a model with incompatible rope type")]
+    IncompatibleRopeType,
+    #[error("llama_rs_memory_seq_add could not acquire the context memory handle")]
+    NullMem,
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("llama_rs_memory_seq_add threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
+}
diff --git a/llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs b/llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs
new file mode 100644
index 00000000..df073004
--- /dev/null
+++ b/llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs
@@ -0,0 +1,19 @@
+use std::num::TryFromIntError;
+
+#[derive(Debug, thiserror::Error)]
+pub enum KvCacheSeqDivError {
+    #[error("provided start position is too large for an i32")]
+    P0TooLarge(#[source] TryFromIntError),
+    #[error("provided end position is too large for an i32")]
+    P1TooLarge(#[source] TryFromIntError),
+    #[error("llama_rs_memory_seq_div called with null context")]
+    NullContextArg,
+    #[error("llama_rs_memory_seq_div invoked on a model with incompatible rope type")]
+    IncompatibleRopeType,
+    #[error("llama_rs_memory_seq_div could not acquire the context memory handle")]
+    NullMem,
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("llama_rs_memory_seq_div threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
+}
diff --git a/llama-cpp-bindings/src/lib.rs b/llama-cpp-bindings/src/lib.rs
index b77d14a4..261ec02d 100644
--- a/llama-cpp-bindings/src/lib.rs
+++ b/llama-cpp-bindings/src/lib.rs
@@ -64,11 +64,12 @@ pub mod tool_call_template_overrides;
 
 pub use error::{
     ApplyChatTemplateError, ChatTemplateError, DecodeError, EmbeddingsError, EncodeError,
-    EvalMultimodalChunksError, GrammarError, LlamaContextLoadError, LlamaCppError,
-    LlamaLoraAdapterInitError, LlamaLoraAdapterRemoveError, LlamaLoraAdapterSetError,
-    LlamaModelLoadError, LogitsError, MarkerDetectionError, MetaValError, ModelParamsError,
-    NewLlamaChatMessageError, ParseChatMessageError, Result, SampleError, SamplerAcceptError,
-    SamplingError, StringToTokenError, TokenSamplingError, TokenToStringError,
+    EvalMultimodalChunksError, GrammarError, KvCacheSeqAddError, KvCacheSeqDivError,
+    LlamaContextLoadError, LlamaCppError, LlamaLoraAdapterInitError, LlamaLoraAdapterRemoveError,
+    LlamaLoraAdapterSetError, LlamaModelLoadError, LogitsError, MarkerDetectionError,
+    MetaValError, ModelParamsError, NewLlamaChatMessageError, ParseChatMessageError, Result,
+    SampleError, SamplerAcceptError, SamplingError, StringToTokenError, TokenSamplingError,
+    TokenToStringError,
 };
 
 pub use chat_message_parse_outcome::ChatMessageParseOutcome;
diff --git a/llama-cpp-bindings/src/model/params.rs b/llama-cpp-bindings/src/model/params.rs
index 446f9334..4772a129 100644
--- a/llama-cpp-bindings/src/model/params.rs
+++ b/llama-cpp-bindings/src/model/params.rs
@@ -858,8 +858,7 @@ mod tests {
         assert!(
             matches!(
                 result,
-                Err(FitError::VendoredReportedError)
-                    | Err(FitError::VendoredThrewCxxException { .. })
+                Err(FitError::VendoredReportedError | FitError::VendoredThrewCxxException { .. })
             ),
             "expected VendoredReportedError or VendoredThrewCxxException, got {result:?}"
         );

From 53087b3628bad075cac2142a71068fd5b2204ef5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 16:02:41 +0200
Subject: [PATCH 10/16] wrap chat parse ffi accessors to surface c++ exceptions
 as typed rust errors

---
 llama-cpp-bindings-sys/wrapper_chat_parse.cpp | 352 +++++++++++++++--
 llama-cpp-bindings-sys/wrapper_chat_parse.h   | 133 +++++--
 .../src/error/parse_chat_message_error.rs     |  81 +++-
 llama-cpp-bindings/src/model.rs               | 362 ++++++++++++++++--
 4 files changed, 807 insertions(+), 121 deletions(-)

diff --git a/llama-cpp-bindings-sys/wrapper_chat_parse.cpp b/llama-cpp-bindings-sys/wrapper_chat_parse.cpp
index f60cada6..0bf59aee 100644
--- a/llama-cpp-bindings-sys/wrapper_chat_parse.cpp
+++ b/llama-cpp-bindings-sys/wrapper_chat_parse.cpp
@@ -7,6 +7,7 @@
 #include "marker_probes/marker_probe.h"
 
 #include <exception>
+#include <new>
 #include <nlohmann/json.hpp>
 #include <string>
 
@@ -16,7 +17,16 @@ struct llama_rs_parsed_chat {
     common_chat_msg message;
 };
 
-extern "C" llama_rs_status llama_rs_parse_chat_message(
+static char * dup_or_set_alloc_flag(const std::string & source, bool * out_alloc_failed) {
+    *out_alloc_failed = false;
+    char * dup = llama_rs_dup_string(source);
+    if (!dup) {
+        *out_alloc_failed = true;
+    }
+    return dup;
+}
+
+extern "C" llama_rs_parse_chat_message_status llama_rs_parse_chat_message(
     const struct llama_model * model,
     const char * tools_json,
     const char * input,
@@ -29,20 +39,28 @@ extern "C" llama_rs_status llama_rs_parse_chat_message(
     if (out_error) {
         *out_error = nullptr;
     }
-
-    if (!model || !input || !out_handle || !out_error) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    if (!model) {
+        return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG;
+    }
+    if (!input) {
+        return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG;
+    }
+    if (!out_handle) {
+        return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG;
     }
 
     try {
         const char * tmpl_src = llama_model_chat_template(model, nullptr);
         if (!tmpl_src) {
-            return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+            return LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE;
         }
 
         const llama_vocab * vocab = llama_model_get_vocab(model);
         if (!vocab) {
-            return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+            return LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB;
         }
 
         std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab));
@@ -91,63 +109,323 @@ extern "C" llama_rs_status llama_rs_parse_chat_message(
 
         *out_handle = handle;
 
-        return LLAMA_RS_STATUS_OK;
+        return LLAMA_RS_PARSE_CHAT_MESSAGE_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED;
     } catch (const std::exception & ex) {
         *out_error = llama_rs_dup_string(std::string(ex.what()));
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION;
     } catch (...) {
         *out_error = llama_rs_dup_string(std::string("unknown c++ exception"));
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
 
-extern "C" void llama_rs_parsed_chat_free(llama_rs_parsed_chat_handle handle) {
-    delete handle;
+extern "C" llama_rs_parsed_chat_free_status llama_rs_parsed_chat_free(
+    llama_rs_parsed_chat_handle handle,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    try {
+        delete handle;
+        return LLAMA_RS_PARSED_CHAT_FREE_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION;
+    }
 }
 
-extern "C" size_t llama_rs_parsed_chat_tool_call_count(llama_rs_parsed_chat_handle handle) {
+extern "C" llama_rs_parsed_chat_tool_call_count_status llama_rs_parsed_chat_tool_call_count(
+    llama_rs_parsed_chat_handle handle,
+    size_t * out_count,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_count) {
+        *out_count = 0;
+    }
     if (!handle) {
-        return 0;
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_HANDLE_ARG;
+    }
+    if (!out_count) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_OUT_COUNT_ARG;
+    }
+    try {
+        *out_count = handle->message.tool_calls.size();
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION;
     }
-    return handle->message.tool_calls.size();
 }
 
-extern "C" char * llama_rs_parsed_chat_tool_call_id(
-    llama_rs_parsed_chat_handle handle, size_t index) {
-    if (!handle || index >= handle->message.tool_calls.size()) {
-        return nullptr;
+extern "C" llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_call_id(
+    llama_rs_parsed_chat_handle handle,
+    size_t index,
+    char ** out_string,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_string) {
+        *out_string = nullptr;
+    }
+    if (!handle) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_HANDLE_ARG;
+    }
+    if (!out_string) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_OUT_STRING_ARG;
+    }
+    try {
+        if (index >= handle->message.tool_calls.size()) {
+            return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS;
+        }
+        bool alloc_failed = false;
+        *out_string = dup_or_set_alloc_flag(handle->message.tool_calls[index].id, &alloc_failed);
+        if (alloc_failed) {
+            return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION;
     }
-    return llama_rs_dup_string(handle->message.tool_calls[index].id);
 }
 
-extern "C" char * llama_rs_parsed_chat_tool_call_name(
-    llama_rs_parsed_chat_handle handle, size_t index) {
-    if (!handle || index >= handle->message.tool_calls.size()) {
-        return nullptr;
+extern "C" llama_rs_parsed_chat_tool_call_name_status llama_rs_parsed_chat_tool_call_name(
+    llama_rs_parsed_chat_handle handle,
+    size_t index,
+    char ** out_string,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_string) {
+        *out_string = nullptr;
+    }
+    if (!handle) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_HANDLE_ARG;
+    }
+    if (!out_string) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_OUT_STRING_ARG;
+    }
+    try {
+        if (index >= handle->message.tool_calls.size()) {
+            return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS;
+        }
+        bool alloc_failed = false;
+        *out_string = dup_or_set_alloc_flag(handle->message.tool_calls[index].name, &alloc_failed);
+        if (alloc_failed) {
+            return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION;
     }
-    return llama_rs_dup_string(handle->message.tool_calls[index].name);
 }
 
-extern "C" char * llama_rs_parsed_chat_tool_call_arguments(
-    llama_rs_parsed_chat_handle handle, size_t index) {
-    if (!handle || index >= handle->message.tool_calls.size()) {
-        return nullptr;
+extern "C" llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_tool_call_arguments(
+    llama_rs_parsed_chat_handle handle,
+    size_t index,
+    char ** out_string,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_string) {
+        *out_string = nullptr;
+    }
+    if (!handle) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_HANDLE_ARG;
+    }
+    if (!out_string) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_OUT_STRING_ARG;
+    }
+    try {
+        if (index >= handle->message.tool_calls.size()) {
+            return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS;
+        }
+        bool alloc_failed = false;
+        *out_string = dup_or_set_alloc_flag(
+            handle->message.tool_calls[index].arguments, &alloc_failed);
+        if (alloc_failed) {
+            return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION;
     }
-    return llama_rs_dup_string(handle->message.tool_calls[index].arguments);
 }
 
-extern "C" char * llama_rs_parsed_chat_content(llama_rs_parsed_chat_handle handle) {
+extern "C" llama_rs_parsed_chat_content_status llama_rs_parsed_chat_content(
+    llama_rs_parsed_chat_handle handle,
+    char ** out_string,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_string) {
+        *out_string = nullptr;
+    }
     if (!handle) {
-        return nullptr;
+        return LLAMA_RS_PARSED_CHAT_CONTENT_NULL_HANDLE_ARG;
+    }
+    if (!out_string) {
+        return LLAMA_RS_PARSED_CHAT_CONTENT_NULL_OUT_STRING_ARG;
+    }
+    try {
+        bool alloc_failed = false;
+        *out_string = dup_or_set_alloc_flag(handle->message.content, &alloc_failed);
+        if (alloc_failed) {
+            return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_PARSED_CHAT_CONTENT_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION;
     }
-    return llama_rs_dup_string(handle->message.content);
 }
 
-extern "C" char * llama_rs_parsed_chat_reasoning_content(llama_rs_parsed_chat_handle handle) {
+extern "C" llama_rs_parsed_chat_reasoning_content_status llama_rs_parsed_chat_reasoning_content(
+    llama_rs_parsed_chat_handle handle,
+    char ** out_string,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_string) {
+        *out_string = nullptr;
+    }
     if (!handle) {
-        return nullptr;
+        return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_HANDLE_ARG;
+    }
+    if (!out_string) {
+        return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_OUT_STRING_ARG;
+    }
+    try {
+        bool alloc_failed = false;
+        *out_string = dup_or_set_alloc_flag(handle->message.reasoning_content, &alloc_failed);
+        if (alloc_failed) {
+            return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string(err.what());
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        if (out_error) {
+            *out_error = llama_rs_dup_string("unknown c++ exception");
+            if (!*out_error) {
+                return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED;
+            }
+        }
+        return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION;
     }
-    return llama_rs_dup_string(handle->message.reasoning_content);
 }
diff --git a/llama-cpp-bindings-sys/wrapper_chat_parse.h b/llama-cpp-bindings-sys/wrapper_chat_parse.h
index 12fed5d9..e235673c 100644
--- a/llama-cpp-bindings-sys/wrapper_chat_parse.h
+++ b/llama-cpp-bindings-sys/wrapper_chat_parse.h
@@ -12,20 +12,19 @@ extern "C" {
 struct llama_rs_parsed_chat;
 typedef struct llama_rs_parsed_chat * llama_rs_parsed_chat_handle;
 
-/**
- * Parse a chat-completion turn from raw assistant output using llama.cpp's
- * `common_chat_parse`, driven by the model's autoparser-built peg parser.
- *
- * `tools_json` is a serialized JSON array of OpenAI-style tool definitions
- * (or empty / null when the request had no tools). `is_partial` switches
- * between mid-stream parses (partial accepts incomplete payloads) and final
- * parses (rejects malformed input).
- *
- * On success, `*out_handle` owns the parsed message; free via
- * `llama_rs_parsed_chat_free`. On failure, `*out_error` carries an
- * exception message; free via `llama_rs_string_free`.
- */
-llama_rs_status llama_rs_parse_chat_message(
+typedef enum llama_rs_parse_chat_message_status {
+    LLAMA_RS_PARSE_CHAT_MESSAGE_OK = 0,
+    LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG,
+    LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG,
+    LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG,
+    LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE,
+    LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB,
+    LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_parse_chat_message_status;
+
+llama_rs_parse_chat_message_status llama_rs_parse_chat_message(
     const struct llama_model * model,
     const char * tools_json,
     const char * input,
@@ -33,25 +32,99 @@ llama_rs_status llama_rs_parse_chat_message(
     llama_rs_parsed_chat_handle * out_handle,
     char ** out_error);
 
-void llama_rs_parsed_chat_free(llama_rs_parsed_chat_handle handle);
+typedef enum llama_rs_parsed_chat_free_status {
+    LLAMA_RS_PARSED_CHAT_FREE_OK = 0,
+    LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION,
+} llama_rs_parsed_chat_free_status;
+
+llama_rs_parsed_chat_free_status llama_rs_parsed_chat_free(
+    llama_rs_parsed_chat_handle handle,
+    char ** out_error);
+
+typedef enum llama_rs_parsed_chat_tool_call_count_status {
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK = 0,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_HANDLE_ARG,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_OUT_COUNT_ARG,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_parsed_chat_tool_call_count_status;
+
+llama_rs_parsed_chat_tool_call_count_status llama_rs_parsed_chat_tool_call_count(
+    llama_rs_parsed_chat_handle handle,
+    size_t * out_count,
+    char ** out_error);
+
+typedef enum llama_rs_parsed_chat_tool_call_id_status {
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK = 0,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_HANDLE_ARG,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_OUT_STRING_ARG,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_parsed_chat_tool_call_id_status;
+
+llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_call_id(
+    llama_rs_parsed_chat_handle handle,
+    size_t index,
+    char ** out_string,
+    char ** out_error);
+
+typedef enum llama_rs_parsed_chat_tool_call_name_status {
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK = 0,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_HANDLE_ARG,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_OUT_STRING_ARG,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_parsed_chat_tool_call_name_status;
+
+llama_rs_parsed_chat_tool_call_name_status llama_rs_parsed_chat_tool_call_name(
+    llama_rs_parsed_chat_handle handle,
+    size_t index,
+    char ** out_string,
+    char ** out_error);
 
-size_t llama_rs_parsed_chat_tool_call_count(llama_rs_parsed_chat_handle handle);
+typedef enum llama_rs_parsed_chat_tool_call_arguments_status {
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK = 0,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_HANDLE_ARG,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_OUT_STRING_ARG,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_parsed_chat_tool_call_arguments_status;
+
+llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_tool_call_arguments(
+    llama_rs_parsed_chat_handle handle,
+    size_t index,
+    char ** out_string,
+    char ** out_error);
 
-/**
- * Returns a heap-allocated UTF-8 string for the i-th tool call's `id`,
- * `name`, or `arguments` field. Free with `llama_rs_string_free`. Returns
- * nullptr if `handle` is null or `index` is out of bounds.
- *
- * `arguments` is the raw JSON string emitted by the parser — the caller is
- * expected to feed it into a schema validator or hand it back to clients
- * verbatim.
- */
-char * llama_rs_parsed_chat_tool_call_id(llama_rs_parsed_chat_handle handle, size_t index);
-char * llama_rs_parsed_chat_tool_call_name(llama_rs_parsed_chat_handle handle, size_t index);
-char * llama_rs_parsed_chat_tool_call_arguments(llama_rs_parsed_chat_handle handle, size_t index);
+typedef enum llama_rs_parsed_chat_content_status {
+    LLAMA_RS_PARSED_CHAT_CONTENT_OK = 0,
+    LLAMA_RS_PARSED_CHAT_CONTENT_NULL_HANDLE_ARG,
+    LLAMA_RS_PARSED_CHAT_CONTENT_NULL_OUT_STRING_ARG,
+    LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_parsed_chat_content_status;
 
-char * llama_rs_parsed_chat_content(llama_rs_parsed_chat_handle handle);
-char * llama_rs_parsed_chat_reasoning_content(llama_rs_parsed_chat_handle handle);
+llama_rs_parsed_chat_content_status llama_rs_parsed_chat_content(
+    llama_rs_parsed_chat_handle handle,
+    char ** out_string,
+    char ** out_error);
+
+typedef enum llama_rs_parsed_chat_reasoning_content_status {
+    LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK = 0,
+    LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_HANDLE_ARG,
+    LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_OUT_STRING_ARG,
+    LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_parsed_chat_reasoning_content_status;
+
+llama_rs_parsed_chat_reasoning_content_status llama_rs_parsed_chat_reasoning_content(
+    llama_rs_parsed_chat_handle handle,
+    char ** out_string,
+    char ** out_error);
 
 #ifdef __cplusplus
 }
diff --git a/llama-cpp-bindings/src/error/parse_chat_message_error.rs b/llama-cpp-bindings/src/error/parse_chat_message_error.rs
index 75460ed4..cc1ccc06 100644
--- a/llama-cpp-bindings/src/error/parse_chat_message_error.rs
+++ b/llama-cpp-bindings/src/error/parse_chat_message_error.rs
@@ -2,31 +2,82 @@ use std::string::FromUtf8Error;
 
 use crate::error::tool_call_format_failure::ToolCallFormatFailure;
 
-/// Failed to parse a chat message via [`crate::Model::parse_chat_message`].
 #[derive(Debug, thiserror::Error)]
 pub enum ParseChatMessageError {
-    /// llama.cpp returned an error code from the parse FFI call.
-    #[error("ffi error {0}")]
-    FfiError(i32),
-    /// The C++ side threw an exception while parsing.
-    #[error("c++ exception during chat parse: {0}")]
-    ParseException(String),
-    /// An accessor returned bytes that were not valid UTF-8.
+    #[error("llama_rs_parse_chat_message called with null model")]
+    ParseNullModelArg,
+    #[error("llama_rs_parse_chat_message called with null input")]
+    ParseNullInputArg,
+    #[error("llama_rs_parse_chat_message called with null out_handle")]
+    ParseNullOutHandleArg,
+    #[error("llama_rs_parse_chat_message called with null out_error")]
+    ParseNullOutErrorArg,
+    #[error("model has no chat template")]
+    ParseModelHasNoChatTemplate,
+    #[error("model has no vocab")]
+    ParseModelHasNoVocab,
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ParseErrorStringAllocationFailed,
+    #[error("c++ exception during chat parse: {message}")]
+    ParseException { message: String },
+    #[error("llama_rs_parsed_chat_free destructor threw a C++ exception: {message}")]
+    FreeDestructorThrewCxxException { message: String },
+    #[error("llama_rs_parsed_chat_free wrapper failed to duplicate the C++ exception string")]
+    FreeErrorStringAllocationFailed,
+    #[error("llama_rs_parsed_chat_tool_call_count called with null handle")]
+    ToolCallCountNullHandleArg,
+    #[error("llama_rs_parsed_chat_tool_call_count threw a C++ exception: {message}")]
+    ToolCallCountThrewCxxException { message: String },
+    #[error("llama_rs_parsed_chat_tool_call_count wrapper failed to duplicate the C++ exception string")]
+    ToolCallCountErrorStringAllocationFailed,
+    #[error("llama_rs_parsed_chat_tool_call_id called with null handle")]
+    ToolCallIdNullHandleArg,
+    #[error("llama_rs_parsed_chat_tool_call_id called with index {index} out of bounds")]
+    ToolCallIdIndexOutOfBounds { index: usize },
+    #[error("llama_rs_parsed_chat_tool_call_id threw a C++ exception: {message}")]
+    ToolCallIdThrewCxxException { message: String },
+    #[error("llama_rs_parsed_chat_tool_call_id wrapper failed to duplicate the C++ exception string")]
+    ToolCallIdErrorStringAllocationFailed,
+    #[error("llama_rs_parsed_chat_tool_call_name called with null handle")]
+    ToolCallNameNullHandleArg,
+    #[error("llama_rs_parsed_chat_tool_call_name called with index {index} out of bounds")]
+    ToolCallNameIndexOutOfBounds { index: usize },
+    #[error("llama_rs_parsed_chat_tool_call_name threw a C++ exception: {message}")]
+    ToolCallNameThrewCxxException { message: String },
+    #[error("llama_rs_parsed_chat_tool_call_name wrapper failed to duplicate the C++ exception string")]
+    ToolCallNameErrorStringAllocationFailed,
+    #[error("llama_rs_parsed_chat_tool_call_arguments called with null handle")]
+    ToolCallArgumentsNullHandleArg,
+    #[error("llama_rs_parsed_chat_tool_call_arguments called with index {index} out of bounds")]
+    ToolCallArgumentsIndexOutOfBounds { index: usize },
+    #[error("llama_rs_parsed_chat_tool_call_arguments threw a C++ exception: {message}")]
+    ToolCallArgumentsThrewCxxException { message: String },
+    #[error(
+        "llama_rs_parsed_chat_tool_call_arguments wrapper failed to duplicate the C++ exception string"
+    )]
+    ToolCallArgumentsErrorStringAllocationFailed,
+    #[error("llama_rs_parsed_chat_content called with null handle")]
+    ContentNullHandleArg,
+    #[error("llama_rs_parsed_chat_content threw a C++ exception: {message}")]
+    ContentThrewCxxException { message: String },
+    #[error("llama_rs_parsed_chat_content wrapper failed to duplicate the C++ exception string")]
+    ContentErrorStringAllocationFailed,
+    #[error("llama_rs_parsed_chat_reasoning_content called with null handle")]
+    ReasoningContentNullHandleArg,
+    #[error("llama_rs_parsed_chat_reasoning_content threw a C++ exception: {message}")]
+    ReasoningContentThrewCxxException { message: String },
+    #[error(
+        "llama_rs_parsed_chat_reasoning_content wrapper failed to duplicate the C++ exception string"
+    )]
+    ReasoningContentErrorStringAllocationFailed,
     #[error("ffi returned non-utf8 string: {0}")]
     StringUtf8Error(#[from] FromUtf8Error),
-    /// The caller passed a `tools_json` argument that is not valid JSON.
     #[error("tools_json is not valid JSON: {0}")]
     ToolsJsonInvalid(#[source] serde_json::Error),
-    /// The caller passed a `tools_json` argument that parses as JSON but is not an array.
     #[error("tools_json must be a JSON array")]
     ToolsJsonNotArray,
-    /// Failed to serialize the tools array for the FFI call.
     #[error("could not serialize tools to JSON: {0}")]
     ToolsSerialization(String),
-    /// The model has no usable chat template, so the parser cannot be built.
-    #[error("model has no chat template")]
-    NoChatTemplate,
-    /// The wrapper-side fallback parser detected a structural issue while parsing the body.
     #[error("template-override fallback parser failed: {0}")]
     TemplateOverrideFailed(#[from] ToolCallFormatFailure),
 }
diff --git a/llama-cpp-bindings/src/model.rs b/llama-cpp-bindings/src/model.rs
index de22549d..789a6ddf 100644
--- a/llama-cpp-bindings/src/model.rs
+++ b/llama-cpp-bindings/src/model.rs
@@ -936,12 +936,12 @@ impl LlamaModel {
                 synthesize_missing_tool_call_ids(&mut parsed.tool_calls);
                 Ok(ChatMessageParseOutcome::Recognized(parsed))
             }
-            Err(ParseChatMessageError::ParseException(ffi_error_message)) => {
+            Err(ParseChatMessageError::ParseException { message }) => {
                 Ok(ChatMessageParseOutcome::Unrecognized(RawChatMessage {
                     tools_json: tools_json.to_owned(),
                     text: input.to_owned(),
                     is_partial,
-                    ffi_error_message,
+                    ffi_error_message: message,
                 }))
             }
             Err(other) => Err(other),
@@ -974,18 +974,70 @@ impl LlamaModel {
         };
 
         let parsed = match status {
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => collect_parsed_chat_message(handle),
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION => {
-                let message = read_optional_owned_cstr_lossy(out_error);
-                Err(ParseChatMessageError::ParseException(message))
+            llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_OK => {
+                collect_parsed_chat_message(handle)
             }
-            other => Err(ParseChatMessageError::FfiError(status_to_i32(other))),
+            llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG => {
+                Err(ParseChatMessageError::ParseNullModelArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG => {
+                Err(ParseChatMessageError::ParseNullInputArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG => {
+                Err(ParseChatMessageError::ParseNullOutHandleArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG => {
+                Err(ParseChatMessageError::ParseNullOutErrorArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE => {
+                Err(ParseChatMessageError::ParseModelHasNoChatTemplate)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB => {
+                Err(ParseChatMessageError::ParseModelHasNoVocab)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(ParseChatMessageError::ParseErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION => {
+                let message =
+                    unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+                out_error = ptr::null_mut();
+                Err(ParseChatMessageError::ParseException { message })
+            }
+            other => unreachable!("llama_rs_parse_chat_message returned unrecognized status {other}"),
         };
 
-        unsafe { llama_cpp_bindings_sys::llama_rs_parsed_chat_free(handle) };
-        unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
-
-        parsed
+        let mut free_error: *mut c_char = ptr::null_mut();
+        let free_status = unsafe {
+            llama_cpp_bindings_sys::llama_rs_parsed_chat_free(handle, &raw mut free_error)
+        };
+        match (parsed, free_status) {
+            (Ok(value), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_OK) => {
+                unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+                Ok(value)
+            }
+            (Ok(_), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION) => {
+                unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+                let message = unsafe {
+                    crate::ffi_error_reader::read_and_free_cpp_error(free_error)
+                };
+                Err(ParseChatMessageError::FreeDestructorThrewCxxException { message })
+            }
+            (Ok(_), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED) => {
+                unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+                Err(ParseChatMessageError::FreeErrorStringAllocationFailed)
+            }
+            (Ok(_), other) => {
+                unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+                unsafe { llama_cpp_bindings_sys::llama_rs_string_free(free_error) };
+                unreachable!("llama_rs_parsed_chat_free returned unrecognized status {other}")
+            }
+            (Err(parse_err), _) => {
+                unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+                unsafe { llama_cpp_bindings_sys::llama_rs_string_free(free_error) };
+                Err(parse_err)
+            }
+        }
     }
 
     /// Render the model's chat template with the autoparser's synthetic
@@ -1071,26 +1123,15 @@ fn collect_parsed_chat_message(
         return Ok(ParsedChatMessage::default());
     }
 
-    let content = read_owned_cstr_for_parse(unsafe {
-        llama_cpp_bindings_sys::llama_rs_parsed_chat_content(handle)
-    })?;
-    let reasoning_content = read_owned_cstr_for_parse(unsafe {
-        llama_cpp_bindings_sys::llama_rs_parsed_chat_reasoning_content(handle)
-    })?;
-
-    let count = unsafe { llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_count(handle) };
+    let content = read_parsed_chat_content(handle)?;
+    let reasoning_content = read_parsed_chat_reasoning_content(handle)?;
+    let count = read_parsed_chat_tool_call_count(handle)?;
 
     let mut tool_calls = Vec::with_capacity(count);
     for index in 0..count {
-        let id = read_owned_cstr_for_parse(unsafe {
-            llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_id(handle, index)
-        })?;
-        let name = read_owned_cstr_for_parse(unsafe {
-            llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_name(handle, index)
-        })?;
-        let arguments_json = read_owned_cstr_for_parse(unsafe {
-            llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_arguments(handle, index)
-        })?;
+        let id = read_parsed_chat_tool_call_id(handle, index)?;
+        let name = read_parsed_chat_tool_call_name(handle, index)?;
+        let arguments_json = read_parsed_chat_tool_call_arguments(handle, index)?;
 
         let arguments = ToolCallArguments::from_string(arguments_json);
         tool_calls.push(ParsedToolCall::new(id, name, arguments));
@@ -1103,6 +1144,260 @@ fn collect_parsed_chat_message(
     ))
 }
 
+fn read_parsed_chat_content(
+    handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
+) -> Result<String, ParseChatMessageError> {
+    let mut out_string: *mut c_char = ptr::null_mut();
+    let mut out_error: *mut c_char = ptr::null_mut();
+    let status = unsafe {
+        llama_cpp_bindings_sys::llama_rs_parsed_chat_content(
+            handle,
+            &raw mut out_string,
+            &raw mut out_error,
+        )
+    };
+    match status {
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_OK => {
+            consume_accessor_string(out_string)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_NULL_HANDLE_ARG => {
+            Err(ParseChatMessageError::ContentNullHandleArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_NULL_OUT_STRING_ARG => {
+            unreachable!(
+                "llama_rs_parsed_chat_content reported null out_string while we passed a valid pointer"
+            )
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED => {
+            unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+            Err(ParseChatMessageError::ContentErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION => {
+            let message =
+                unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            Err(ParseChatMessageError::ContentThrewCxxException { message })
+        }
+        other => unreachable!("llama_rs_parsed_chat_content returned unrecognized status {other}"),
+    }
+}
+
+fn read_parsed_chat_reasoning_content(
+    handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
+) -> Result<String, ParseChatMessageError> {
+    let mut out_string: *mut c_char = ptr::null_mut();
+    let mut out_error: *mut c_char = ptr::null_mut();
+    let status = unsafe {
+        llama_cpp_bindings_sys::llama_rs_parsed_chat_reasoning_content(
+            handle,
+            &raw mut out_string,
+            &raw mut out_error,
+        )
+    };
+    match status {
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK => {
+            consume_accessor_string(out_string)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_HANDLE_ARG => {
+            Err(ParseChatMessageError::ReasoningContentNullHandleArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_OUT_STRING_ARG => {
+            unreachable!(
+                "llama_rs_parsed_chat_reasoning_content reported null out_string while we passed a valid pointer"
+            )
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED => {
+            unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+            Err(ParseChatMessageError::ReasoningContentErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION => {
+            let message =
+                unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            Err(ParseChatMessageError::ReasoningContentThrewCxxException { message })
+        }
+        other => unreachable!(
+            "llama_rs_parsed_chat_reasoning_content returned unrecognized status {other}"
+        ),
+    }
+}
+
+fn read_parsed_chat_tool_call_count(
+    handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
+) -> Result<usize, ParseChatMessageError> {
+    let mut out_count: usize = 0;
+    let mut out_error: *mut c_char = ptr::null_mut();
+    let status = unsafe {
+        llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_count(
+            handle,
+            &raw mut out_count,
+            &raw mut out_error,
+        )
+    };
+    match status {
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK => Ok(out_count),
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_HANDLE_ARG => {
+            Err(ParseChatMessageError::ToolCallCountNullHandleArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_OUT_COUNT_ARG => {
+            unreachable!(
+                "llama_rs_parsed_chat_tool_call_count reported null out_count while we passed a valid pointer"
+            )
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED => {
+            unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+            Err(ParseChatMessageError::ToolCallCountErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION => {
+            let message =
+                unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            Err(ParseChatMessageError::ToolCallCountThrewCxxException { message })
+        }
+        other => unreachable!(
+            "llama_rs_parsed_chat_tool_call_count returned unrecognized status {other}"
+        ),
+    }
+}
+
+fn read_parsed_chat_tool_call_id(
+    handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
+    index: usize,
+) -> Result<String, ParseChatMessageError> {
+    let mut out_string: *mut c_char = ptr::null_mut();
+    let mut out_error: *mut c_char = ptr::null_mut();
+    let status = unsafe {
+        llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_id(
+            handle,
+            index,
+            &raw mut out_string,
+            &raw mut out_error,
+        )
+    };
+    match status {
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK => {
+            consume_accessor_string(out_string)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_HANDLE_ARG => {
+            Err(ParseChatMessageError::ToolCallIdNullHandleArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_OUT_STRING_ARG => {
+            unreachable!(
+                "llama_rs_parsed_chat_tool_call_id reported null out_string while we passed a valid pointer"
+            )
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS => {
+            Err(ParseChatMessageError::ToolCallIdIndexOutOfBounds { index })
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED => {
+            unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+            Err(ParseChatMessageError::ToolCallIdErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION => {
+            let message =
+                unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            Err(ParseChatMessageError::ToolCallIdThrewCxxException { message })
+        }
+        other => unreachable!(
+            "llama_rs_parsed_chat_tool_call_id returned unrecognized status {other}"
+        ),
+    }
+}
+
+fn read_parsed_chat_tool_call_name(
+    handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
+    index: usize,
+) -> Result<String, ParseChatMessageError> {
+    let mut out_string: *mut c_char = ptr::null_mut();
+    let mut out_error: *mut c_char = ptr::null_mut();
+    let status = unsafe {
+        llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_name(
+            handle,
+            index,
+            &raw mut out_string,
+            &raw mut out_error,
+        )
+    };
+    match status {
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK => {
+            consume_accessor_string(out_string)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_HANDLE_ARG => {
+            Err(ParseChatMessageError::ToolCallNameNullHandleArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_OUT_STRING_ARG => {
+            unreachable!(
+                "llama_rs_parsed_chat_tool_call_name reported null out_string while we passed a valid pointer"
+            )
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS => {
+            Err(ParseChatMessageError::ToolCallNameIndexOutOfBounds { index })
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED => {
+            unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+            Err(ParseChatMessageError::ToolCallNameErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION => {
+            let message =
+                unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            Err(ParseChatMessageError::ToolCallNameThrewCxxException { message })
+        }
+        other => unreachable!(
+            "llama_rs_parsed_chat_tool_call_name returned unrecognized status {other}"
+        ),
+    }
+}
+
+fn read_parsed_chat_tool_call_arguments(
+    handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
+    index: usize,
+) -> Result<String, ParseChatMessageError> {
+    let mut out_string: *mut c_char = ptr::null_mut();
+    let mut out_error: *mut c_char = ptr::null_mut();
+    let status = unsafe {
+        llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_arguments(
+            handle,
+            index,
+            &raw mut out_string,
+            &raw mut out_error,
+        )
+    };
+    match status {
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK => {
+            consume_accessor_string(out_string)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_HANDLE_ARG => {
+            Err(ParseChatMessageError::ToolCallArgumentsNullHandleArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_OUT_STRING_ARG => {
+            unreachable!(
+                "llama_rs_parsed_chat_tool_call_arguments reported null out_string while we passed a valid pointer"
+            )
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS => {
+            Err(ParseChatMessageError::ToolCallArgumentsIndexOutOfBounds { index })
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED => {
+            unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+            Err(ParseChatMessageError::ToolCallArgumentsErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION => {
+            let message =
+                unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            Err(ParseChatMessageError::ToolCallArgumentsThrewCxxException { message })
+        }
+        other => unreachable!(
+            "llama_rs_parsed_chat_tool_call_arguments returned unrecognized status {other}"
+        ),
+    }
+}
+
+fn consume_accessor_string(ptr: *mut c_char) -> Result<String, ParseChatMessageError> {
+    if ptr.is_null() {
+        return Ok(String::new());
+    }
+    let bytes = unsafe { CStr::from_ptr(ptr) }.to_bytes().to_vec();
+    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(ptr) };
+    Ok(String::from_utf8(bytes)?)
+}
+
 struct ReasoningSplit {
     reasoning: String,
     content: String,
@@ -1230,17 +1525,6 @@ where
     parsed
 }
 
-fn read_owned_cstr_for_parse(ptr: *mut c_char) -> Result<String, ParseChatMessageError> {
-    if ptr.is_null() {
-        return Ok(String::new());
-    }
-
-    let bytes = unsafe { CStr::from_ptr(ptr) }.to_bytes().to_vec();
-    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(ptr) };
-
-    Ok(String::from_utf8(bytes)?)
-}
-
 fn read_optional_owned_cstr(ptr: *const c_char) -> Result<Option<String>, MarkerDetectionError> {
     if ptr.is_null() {
         return Ok(None);

From 3471b41761cceb500a97151257ee95c289b518bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 16:30:11 +0200
Subject: [PATCH 11/16] retrofit existing wrappers to per-wrapper status enums
 and surface c++ exceptions as typed rust errors

---
 llama-cpp-bindings-sys/wrapper_common.cpp     | 248 +++++++----
 llama-cpp-bindings-sys/wrapper_common.h       |  69 ++-
 llama-cpp-bindings-sys/wrapper_reasoning.cpp  |  41 +-
 llama-cpp-bindings-sys/wrapper_reasoning.h    |  26 +-
 llama-cpp-bindings-sys/wrapper_tool_calls.cpp |  76 ++--
 llama-cpp-bindings-sys/wrapper_tool_calls.h   |  51 +--
 llama-cpp-bindings/src/error.rs               |   2 +
 llama-cpp-bindings/src/error/grammar_error.rs |  58 ++-
 .../src/error/json_schema_to_grammar_error.rs |  20 +
 .../src/error/llama_cpp_error.rs              |  33 +-
 .../src/error/marker_detection_error.rs       |  48 ++-
 llama-cpp-bindings/src/error/sample_error.rs  |  22 +-
 .../src/error/sampler_accept_error.rs         |  16 +-
 .../src/json_schema_to_grammar.rs             |  86 ++--
 llama-cpp-bindings/src/lib.rs                 |  12 +-
 llama-cpp-bindings/src/llama_backend.rs       |   6 +-
 llama-cpp-bindings/src/llguidance_sampler.rs  |   4 +-
 llama-cpp-bindings/src/model.rs               | 392 +++++++-----------
 llama-cpp-bindings/src/model/params.rs        |   4 +-
 llama-cpp-bindings/src/sampling.rs            | 169 ++++++--
 20 files changed, 808 insertions(+), 575 deletions(-)
 create mode 100644 llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs

diff --git a/llama-cpp-bindings-sys/wrapper_common.cpp b/llama-cpp-bindings-sys/wrapper_common.cpp
index 45e47bae..4544ecfd 100644
--- a/llama-cpp-bindings-sys/wrapper_common.cpp
+++ b/llama-cpp-bindings-sys/wrapper_common.cpp
@@ -1,9 +1,9 @@
 #include "wrapper_common.h"
 
-#include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <exception>
+#include <new>
 #include <string>
 #include <stdint.h>
 
@@ -14,34 +14,49 @@
 
 #include <nlohmann/json.hpp>
 
-extern "C" llama_rs_status llama_rs_json_schema_to_grammar(
+extern "C" llama_rs_json_schema_to_grammar_status llama_rs_json_schema_to_grammar(
     const char * schema_json,
     bool force_gbnf,
     char ** out_grammar,
     char ** out_error) {
-    if (!schema_json || !out_grammar || !out_error) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    if (out_grammar) {
+        *out_grammar = nullptr;
+    }
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!schema_json) {
+        return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_SCHEMA_JSON_ARG;
+    }
+    if (!out_grammar) {
+        return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_GRAMMAR_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_ERROR_ARG;
     }
-
-    *out_grammar = nullptr;
-    *out_error = nullptr;
 
     try {
         const auto schema = nlohmann::ordered_json::parse(schema_json);
         const auto grammar = json_schema_to_grammar(schema, force_gbnf);
         *out_grammar = llama_rs_dup_string(grammar);
-
-        return *out_grammar ? LLAMA_RS_STATUS_OK : LLAMA_RS_STATUS_ALLOCATION_FAILED;
+        if (!*out_grammar) {
+            return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED;
     } catch (const std::exception & err) {
-        fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what());
         *out_error = llama_rs_dup_string(err.what());
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION;
     } catch (...) {
-        fprintf(stderr, "%s: unknown C++ exception\n", __func__);
-        *out_error = llama_rs_dup_string("unknown C++ exception");
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
 
@@ -51,33 +66,48 @@ extern "C" void llama_rs_string_free(char * ptr) {
     }
 }
 
-extern "C" struct llama_sampler * llama_rs_sampler_init_grammar(
+extern "C" llama_rs_sampler_init_grammar_status llama_rs_sampler_init_grammar(
     const struct llama_vocab * vocab,
     const char * grammar_str,
     const char * grammar_root,
+    struct llama_sampler ** out_sampler,
     char ** out_error) {
+    if (out_sampler) {
+        *out_sampler = nullptr;
+    }
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!out_sampler) {
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_SAMPLER_ARG;
+    }
     if (!out_error) {
-        return nullptr;
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_ERROR_ARG;
     }
-
-    *out_error = nullptr;
-
     try {
-        return llama_sampler_init_grammar(vocab, grammar_str, grammar_root);
+        *out_sampler = llama_sampler_init_grammar(vocab, grammar_str, grammar_root);
+        if (!*out_sampler) {
+            return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL;
+        }
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED;
     } catch (const std::exception & err) {
-        fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what());
         *out_error = llama_rs_dup_string(err.what());
-
-        return nullptr;
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION;
     } catch (...) {
-        fprintf(stderr, "%s: unknown C++ exception\n", __func__);
-        *out_error = llama_rs_dup_string("unknown C++ exception");
-
-        return nullptr;
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
 
-extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy(
+extern "C" llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_grammar_lazy(
     const struct llama_vocab * vocab,
     const char * grammar_str,
     const char * grammar_root,
@@ -85,13 +115,20 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy(
     size_t num_trigger_words,
     const llama_token * trigger_tokens,
     size_t num_trigger_tokens,
+    struct llama_sampler ** out_sampler,
     char ** out_error) {
+    if (out_sampler) {
+        *out_sampler = nullptr;
+    }
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!out_sampler) {
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_SAMPLER_ARG;
+    }
     if (!out_error) {
-        return nullptr;
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_ERROR_ARG;
     }
-
-    *out_error = nullptr;
-
     try {
         std::vector<std::string> trigger_patterns;
         trigger_patterns.reserve(num_trigger_words);
@@ -107,7 +144,7 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy(
             trigger_patterns_c.push_back(pattern.c_str());
         }
 
-        return llama_sampler_init_grammar_lazy_patterns(
+        *out_sampler = llama_sampler_init_grammar_lazy_patterns(
             vocab,
             grammar_str,
             grammar_root,
@@ -115,20 +152,28 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy(
             trigger_patterns_c.size(),
             trigger_tokens,
             num_trigger_tokens);
+        if (!*out_sampler) {
+            return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL;
+        }
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED;
     } catch (const std::exception & err) {
-        fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what());
         *out_error = llama_rs_dup_string(err.what());
-
-        return nullptr;
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION;
     } catch (...) {
-        fprintf(stderr, "%s: unknown C++ exception\n", __func__);
-        *out_error = llama_rs_dup_string("unknown C++ exception");
-
-        return nullptr;
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
 
-extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns(
+extern "C" llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_init_grammar_lazy_patterns(
     const struct llama_vocab * vocab,
     const char * grammar_str,
     const char * grammar_root,
@@ -136,15 +181,22 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns(
     size_t num_trigger_patterns,
     const llama_token * trigger_tokens,
     size_t num_trigger_tokens,
+    struct llama_sampler ** out_sampler,
     char ** out_error) {
+    if (out_sampler) {
+        *out_sampler = nullptr;
+    }
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!out_sampler) {
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_SAMPLER_ARG;
+    }
     if (!out_error) {
-        return nullptr;
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_ERROR_ARG;
     }
-
-    *out_error = nullptr;
-
     try {
-        return llama_sampler_init_grammar_lazy_patterns(
+        *out_sampler = llama_sampler_init_grammar_lazy_patterns(
             vocab,
             grammar_str,
             grammar_root,
@@ -152,16 +204,24 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns(
             num_trigger_patterns,
             trigger_tokens,
             num_trigger_tokens);
+        if (!*out_sampler) {
+            return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL;
+        }
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED;
     } catch (const std::exception & err) {
-        fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what());
         *out_error = llama_rs_dup_string(err.what());
-
-        return nullptr;
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION;
     } catch (...) {
-        fprintf(stderr, "%s: unknown C++ exception\n", __func__);
-        *out_error = llama_rs_dup_string("unknown C++ exception");
-
-        return nullptr;
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
 
@@ -327,58 +387,76 @@ extern "C" llama_rs_memory_seq_div_status llama_rs_memory_seq_div(
     }
 }
 
-extern "C" llama_rs_status llama_rs_sampler_sample(
+extern "C" llama_rs_sampler_sample_status llama_rs_sampler_sample(
     struct llama_sampler * sampler,
     struct llama_context * ctx,
     int32_t idx,
     llama_token * out_token,
     char ** out_error) {
-    if (!sampler || !ctx || !out_token || !out_error) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!sampler) {
+        return LLAMA_RS_SAMPLER_SAMPLE_NULL_SAMPLER_ARG;
+    }
+    if (!ctx) {
+        return LLAMA_RS_SAMPLER_SAMPLE_NULL_CTX_ARG;
+    }
+    if (!out_token) {
+        return LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_TOKEN_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_ERROR_ARG;
     }
-
-    *out_error = nullptr;
-
     try {
         *out_token = llama_sampler_sample(sampler, ctx, idx);
-
-        return LLAMA_RS_STATUS_OK;
+        return LLAMA_RS_SAMPLER_SAMPLE_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED;
     } catch (const std::exception & err) {
-        fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what());
         *out_error = llama_rs_dup_string(err.what());
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION;
     } catch (...) {
-        fprintf(stderr, "%s: unknown C++ exception\n", __func__);
-        *out_error = llama_rs_dup_string("unknown C++ exception");
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
 
-extern "C" llama_rs_status llama_rs_sampler_accept(
+extern "C" llama_rs_sampler_accept_status llama_rs_sampler_accept(
     struct llama_sampler * sampler,
     llama_token token,
     char ** out_error) {
-    if (!sampler || !out_error) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!sampler) {
+        return LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_SAMPLER_ACCEPT_NULL_OUT_ERROR_ARG;
     }
-
-    *out_error = nullptr;
-
     try {
         llama_sampler_accept(sampler, token);
-
-        return LLAMA_RS_STATUS_OK;
+        return LLAMA_RS_SAMPLER_ACCEPT_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED;
     } catch (const std::exception & err) {
-        fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what());
         *out_error = llama_rs_dup_string(err.what());
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION;
     } catch (...) {
-        fprintf(stderr, "%s: unknown C++ exception\n", __func__);
-        *out_error = llama_rs_dup_string("unknown C++ exception");
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
diff --git a/llama-cpp-bindings-sys/wrapper_common.h b/llama-cpp-bindings-sys/wrapper_common.h
index 61ba9d2f..d1770b2a 100644
--- a/llama-cpp-bindings-sys/wrapper_common.h
+++ b/llama-cpp-bindings-sys/wrapper_common.h
@@ -16,19 +16,47 @@ struct llama_vocab;
 extern "C" {
 #endif
 
-llama_rs_status llama_rs_json_schema_to_grammar(
+typedef enum llama_rs_json_schema_to_grammar_status {
+    LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK = 0,
+    LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_SCHEMA_JSON_ARG,
+    LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_GRAMMAR_ARG,
+    LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_json_schema_to_grammar_status;
+
+llama_rs_json_schema_to_grammar_status llama_rs_json_schema_to_grammar(
     const char * schema_json,
     bool force_gbnf,
     char ** out_grammar,
     char ** out_error);
 
-struct llama_sampler * llama_rs_sampler_init_grammar(
+typedef enum llama_rs_sampler_init_grammar_status {
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK = 0,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_SAMPLER_ARG,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_sampler_init_grammar_status;
+
+llama_rs_sampler_init_grammar_status llama_rs_sampler_init_grammar(
     const struct llama_vocab * vocab,
     const char * grammar_str,
     const char * grammar_root,
+    struct llama_sampler ** out_sampler,
     char ** out_error);
 
-struct llama_sampler * llama_rs_sampler_init_grammar_lazy(
+typedef enum llama_rs_sampler_init_grammar_lazy_status {
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK = 0,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_SAMPLER_ARG,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_sampler_init_grammar_lazy_status;
+
+llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_grammar_lazy(
     const struct llama_vocab * vocab,
     const char * grammar_str,
     const char * grammar_root,
@@ -36,9 +64,19 @@ struct llama_sampler * llama_rs_sampler_init_grammar_lazy(
     size_t num_trigger_words,
     const llama_token * trigger_tokens,
     size_t num_trigger_tokens,
+    struct llama_sampler ** out_sampler,
     char ** out_error);
 
-struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns(
+typedef enum llama_rs_sampler_init_grammar_lazy_patterns_status {
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK = 0,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_SAMPLER_ARG,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_sampler_init_grammar_lazy_patterns_status;
+
+llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_init_grammar_lazy_patterns(
     const struct llama_vocab * vocab,
     const char * grammar_str,
     const char * grammar_root,
@@ -46,14 +84,33 @@ struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns(
     size_t num_trigger_patterns,
     const llama_token * trigger_tokens,
     size_t num_trigger_tokens,
+    struct llama_sampler ** out_sampler,
     char ** out_error);
 
-llama_rs_status llama_rs_sampler_accept(
+typedef enum llama_rs_sampler_accept_status {
+    LLAMA_RS_SAMPLER_ACCEPT_OK = 0,
+    LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG,
+    LLAMA_RS_SAMPLER_ACCEPT_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_sampler_accept_status;
+
+llama_rs_sampler_accept_status llama_rs_sampler_accept(
     struct llama_sampler * sampler,
     llama_token token,
     char ** out_error);
 
-llama_rs_status llama_rs_sampler_sample(
+typedef enum llama_rs_sampler_sample_status {
+    LLAMA_RS_SAMPLER_SAMPLE_OK = 0,
+    LLAMA_RS_SAMPLER_SAMPLE_NULL_SAMPLER_ARG,
+    LLAMA_RS_SAMPLER_SAMPLE_NULL_CTX_ARG,
+    LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_TOKEN_ARG,
+    LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_sampler_sample_status;
+
+llama_rs_sampler_sample_status llama_rs_sampler_sample(
     struct llama_sampler * sampler,
     struct llama_context * ctx,
     int32_t idx,
diff --git a/llama-cpp-bindings-sys/wrapper_reasoning.cpp b/llama-cpp-bindings-sys/wrapper_reasoning.cpp
index 36b0763e..7970b4ee 100644
--- a/llama-cpp-bindings-sys/wrapper_reasoning.cpp
+++ b/llama-cpp-bindings-sys/wrapper_reasoning.cpp
@@ -6,6 +6,7 @@
 #include "marker_probes/marker_probe.h"
 
 #include <exception>
+#include <new>
 #include <nlohmann/json.hpp>
 #include <string>
 
@@ -26,7 +27,7 @@ std::string token_text_or_empty(const llama_vocab * vocab, llama_token token) {
 
 }  // namespace
 
-extern "C" llama_rs_status llama_rs_detect_reasoning_markers(
+extern "C" llama_rs_detect_reasoning_markers_status llama_rs_detect_reasoning_markers(
     const struct llama_model * model,
     char ** out_open,
     char ** out_close,
@@ -40,20 +41,28 @@ extern "C" llama_rs_status llama_rs_detect_reasoning_markers(
     if (out_error) {
         *out_error = nullptr;
     }
-
-    if (!model || !out_open || !out_close || !out_error) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    if (!model) {
+        return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_MODEL_ARG;
+    }
+    if (!out_open) {
+        return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_OPEN_ARG;
+    }
+    if (!out_close) {
+        return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_CLOSE_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_ERROR_ARG;
     }
 
     try {
         const char * tmpl_src = llama_model_chat_template(model, nullptr);
         if (!tmpl_src) {
-            return LLAMA_RS_STATUS_OK;
+            return LLAMA_RS_DETECT_REASONING_MARKERS_OK;
         }
 
         const llama_vocab * vocab = llama_model_get_vocab(model);
         if (!vocab) {
-            return LLAMA_RS_STATUS_OK;
+            return LLAMA_RS_DETECT_REASONING_MARKERS_OK;
         }
 
         std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab));
@@ -112,7 +121,7 @@ extern "C" llama_rs_status llama_rs_detect_reasoning_markers(
         }
 
         if (!detected) {
-            return LLAMA_RS_STATUS_OK;
+            return LLAMA_RS_DETECT_REASONING_MARKERS_OK;
         }
 
         char * open_dup = llama_rs_dup_string(detected_start);
@@ -122,21 +131,27 @@ extern "C" llama_rs_status llama_rs_detect_reasoning_markers(
             std::free(open_dup);
             std::free(close_dup);
 
-            return LLAMA_RS_STATUS_ALLOCATION_FAILED;
+            return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED;
         }
 
         *out_open = open_dup;
         *out_close = close_dup;
 
-        return LLAMA_RS_STATUS_OK;
+        return LLAMA_RS_DETECT_REASONING_MARKERS_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED;
     } catch (const std::exception & ex) {
         *out_error = llama_rs_dup_string(std::string(ex.what()));
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION;
     } catch (...) {
         *out_error = llama_rs_dup_string(std::string("unknown c++ exception"));
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
 
diff --git a/llama-cpp-bindings-sys/wrapper_reasoning.h b/llama-cpp-bindings-sys/wrapper_reasoning.h
index eca91ea8..a22f79ba 100644
--- a/llama-cpp-bindings-sys/wrapper_reasoning.h
+++ b/llama-cpp-bindings-sys/wrapper_reasoning.h
@@ -7,21 +7,17 @@
 extern "C" {
 #endif
 
-/**
- * Detect the reasoning open/close marker strings for a model by analyzing its
- * Jinja chat template via llama.cpp's autoparser.
- *
- * On success (LLAMA_RS_STATUS_OK):
- *   - If the model has detected reasoning markers, *out_open and *out_close are
- *     set to heap-allocated null-terminated strings owned by the caller. Free
- *     each via llama_rs_string_free.
- *   - If no reasoning markers were detected, *out_open and *out_close are left
- *     as nullptr.
- *
- * On LLAMA_RS_STATUS_EXCEPTION, *out_error is set to a heap-allocated message;
- * free via llama_rs_string_free.
- */
-llama_rs_status llama_rs_detect_reasoning_markers(
+typedef enum llama_rs_detect_reasoning_markers_status {
+    LLAMA_RS_DETECT_REASONING_MARKERS_OK = 0,
+    LLAMA_RS_DETECT_REASONING_MARKERS_NULL_MODEL_ARG,
+    LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_OPEN_ARG,
+    LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_CLOSE_ARG,
+    LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_detect_reasoning_markers_status;
+
+llama_rs_detect_reasoning_markers_status llama_rs_detect_reasoning_markers(
     const struct llama_model * model,
     char ** out_open,
     char ** out_close,
diff --git a/llama-cpp-bindings-sys/wrapper_tool_calls.cpp b/llama-cpp-bindings-sys/wrapper_tool_calls.cpp
index eb869201..54b3a999 100644
--- a/llama-cpp-bindings-sys/wrapper_tool_calls.cpp
+++ b/llama-cpp-bindings-sys/wrapper_tool_calls.cpp
@@ -7,6 +7,7 @@
 #include "llama.cpp/include/llama.h"
 
 #include <exception>
+#include <new>
 #include <nlohmann/json.hpp>
 #include <string>
 
@@ -107,7 +108,7 @@ std::string detect_tool_call_haystack(
 
 }  // namespace
 
-extern "C" llama_rs_status llama_rs_compute_tool_call_haystack(
+extern "C" llama_rs_compute_tool_call_haystack_status llama_rs_compute_tool_call_haystack(
     const struct llama_model * model,
     char ** out_haystack,
     char ** out_error) {
@@ -117,20 +118,25 @@ extern "C" llama_rs_status llama_rs_compute_tool_call_haystack(
     if (out_error) {
         *out_error = nullptr;
     }
-
-    if (!model || !out_haystack || !out_error) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    if (!model) {
+        return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_MODEL_ARG;
+    }
+    if (!out_haystack) {
+        return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_HAYSTACK_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_ERROR_ARG;
     }
 
     try {
         const char * tmpl_src = llama_model_chat_template(model, nullptr);
         if (!tmpl_src) {
-            return LLAMA_RS_STATUS_OK;
+            return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK;
         }
 
         const llama_vocab * vocab = llama_model_get_vocab(model);
         if (!vocab) {
-            return LLAMA_RS_STATUS_OK;
+            return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK;
         }
 
         std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab));
@@ -142,29 +148,35 @@ extern "C" llama_rs_status llama_rs_compute_tool_call_haystack(
 
         std::string haystack = detect_tool_call_haystack(tmpl, reasoning);
         if (haystack.empty()) {
-            return LLAMA_RS_STATUS_OK;
+            return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK;
         }
 
         char * haystack_dup = llama_rs_dup_string(haystack);
         if (!haystack_dup) {
-            return LLAMA_RS_STATUS_ALLOCATION_FAILED;
+            return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED;
         }
 
         *out_haystack = haystack_dup;
 
-        return LLAMA_RS_STATUS_OK;
+        return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED;
     } catch (const std::exception & ex) {
         *out_error = llama_rs_dup_string(std::string(ex.what()));
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION;
     } catch (...) {
         *out_error = llama_rs_dup_string(std::string("unknown c++ exception"));
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
 
-extern "C" llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders(
+extern "C" llama_rs_diagnose_tool_call_synthetic_renders_status llama_rs_diagnose_tool_call_synthetic_renders(
     const struct llama_model * model,
     char ** out_no_tools,
     char ** out_with_tools,
@@ -178,20 +190,28 @@ extern "C" llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders(
     if (out_error) {
         *out_error = nullptr;
     }
-
-    if (!model || !out_no_tools || !out_with_tools || !out_error) {
-        return LLAMA_RS_STATUS_INVALID_ARGUMENT;
+    if (!model) {
+        return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_MODEL_ARG;
+    }
+    if (!out_no_tools) {
+        return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_NO_TOOLS_ARG;
+    }
+    if (!out_with_tools) {
+        return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_WITH_TOOLS_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_ERROR_ARG;
     }
 
     try {
         const char * tmpl_src = llama_model_chat_template(model, nullptr);
         if (!tmpl_src) {
-            return LLAMA_RS_STATUS_OK;
+            return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK;
         }
 
         const llama_vocab * vocab = llama_model_get_vocab(model);
         if (!vocab) {
-            return LLAMA_RS_STATUS_OK;
+            return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK;
         }
 
         std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab));
@@ -259,20 +279,26 @@ extern "C" llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders(
             std::free(a_dup);
             std::free(b_dup);
 
-            return LLAMA_RS_STATUS_ALLOCATION_FAILED;
+            return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED;
         }
 
         *out_no_tools = a_dup;
         *out_with_tools = b_dup;
 
-        return LLAMA_RS_STATUS_OK;
+        return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED;
     } catch (const std::exception & ex) {
         *out_error = llama_rs_dup_string(std::string(ex.what()));
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION;
     } catch (...) {
         *out_error = llama_rs_dup_string(std::string("unknown c++ exception"));
-
-        return LLAMA_RS_STATUS_EXCEPTION;
+        if (!*out_error) {
+            return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
diff --git a/llama-cpp-bindings-sys/wrapper_tool_calls.h b/llama-cpp-bindings-sys/wrapper_tool_calls.h
index e6a59e20..7c96c20b 100644
--- a/llama-cpp-bindings-sys/wrapper_tool_calls.h
+++ b/llama-cpp-bindings-sys/wrapper_tool_calls.h
@@ -7,40 +7,31 @@
 extern "C" {
 #endif
 
-/**
- * Render the model's chat template with the autoparser's standard tool-call
- * vs. plain-assistant synthetic turns and return the diff slice that surrounds
- * the tool-call payload. The returned haystack is the text that lives between
- * the model's tool-call open/close markers (with any reasoning prelude
- * stripped). Marker extraction from the haystack is performed in Rust.
- *
- * On success (LLAMA_RS_STATUS_OK):
- *   - If the model declares no tool-call markers (or an empty haystack),
- *     *out_haystack is left as nullptr.
- *   - Otherwise *out_haystack is a heap-allocated null-terminated string owned
- *     by the caller. Free via llama_rs_string_free.
- *
- * On LLAMA_RS_STATUS_EXCEPTION, *out_error is set to a heap-allocated message;
- * free via llama_rs_string_free.
- */
-llama_rs_status llama_rs_compute_tool_call_haystack(
+typedef enum llama_rs_compute_tool_call_haystack_status {
+    LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK = 0,
+    LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_MODEL_ARG,
+    LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_HAYSTACK_ARG,
+    LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_compute_tool_call_haystack_status;
+
+llama_rs_compute_tool_call_haystack_status llama_rs_compute_tool_call_haystack(
     const struct llama_model * model,
     char ** out_haystack,
     char ** out_error);
 
-/**
- * Render the model's chat template with the autoparser's standard synthetic
- * inputs (assistant_no_tools vs assistant_with_tools). Useful for diagnosing
- * why marker detection fails.
- *
- * On success (LLAMA_RS_STATUS_OK):
- *   - *out_no_tools and *out_with_tools point to heap-allocated rendered
- *     outputs (free via llama_rs_string_free). Either can be empty when the
- *     template throws during rendering.
- *
- * On LLAMA_RS_STATUS_EXCEPTION, *out_error is set.
- */
-llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders(
+typedef enum llama_rs_diagnose_tool_call_synthetic_renders_status {
+    LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK = 0,
+    LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_MODEL_ARG,
+    LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_NO_TOOLS_ARG,
+    LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_WITH_TOOLS_ARG,
+    LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_diagnose_tool_call_synthetic_renders_status;
+
+llama_rs_diagnose_tool_call_synthetic_renders_status llama_rs_diagnose_tool_call_synthetic_renders(
     const struct llama_model * model,
     char ** out_no_tools,
     char ** out_with_tools,
diff --git a/llama-cpp-bindings/src/error.rs b/llama-cpp-bindings/src/error.rs
index 731a9b13..ba684109 100644
--- a/llama-cpp-bindings/src/error.rs
+++ b/llama-cpp-bindings/src/error.rs
@@ -8,6 +8,7 @@ pub mod eval_multimodal_chunks_error;
 pub mod fit_error;
 pub mod grammar_error;
 pub mod json_object_failure;
+pub mod json_schema_to_grammar_error;
 pub mod key_value_xml_tags_failure;
 pub mod kv_cache_seq_add_error;
 pub mod kv_cache_seq_div_error;
@@ -43,6 +44,7 @@ pub use eval_multimodal_chunks_error::EvalMultimodalChunksError;
 pub use fit_error::FitError;
 pub use grammar_error::GrammarError;
 pub use json_object_failure::JsonObjectFailure;
+pub use json_schema_to_grammar_error::JsonSchemaToGrammarError;
 pub use key_value_xml_tags_failure::KeyValueXmlTagsFailure;
 pub use kv_cache_seq_add_error::KvCacheSeqAddError;
 pub use kv_cache_seq_div_error::KvCacheSeqDivError;
diff --git a/llama-cpp-bindings/src/error/grammar_error.rs b/llama-cpp-bindings/src/error/grammar_error.rs
index 58216b8c..f9adb0a8 100644
--- a/llama-cpp-bindings/src/error/grammar_error.rs
+++ b/llama-cpp-bindings/src/error/grammar_error.rs
@@ -1,27 +1,53 @@
 use std::ffi::NulError;
 
-/// Errors that can occur when initializing a grammar sampler
-#[derive(Debug, Eq, PartialEq, thiserror::Error)]
+#[derive(Debug, thiserror::Error)]
 pub enum GrammarError {
-    /// The grammar root was not found in the grammar string
-    #[error("Grammar root not found in grammar string")]
+    #[error("grammar root not found in grammar string")]
     RootNotFound,
-    /// The trigger word contains null bytes
-    #[error("Trigger word contains null bytes: {0}")]
+    #[error("trigger word contains null bytes: {0}")]
     TriggerWordNullBytes(NulError),
-    /// The grammar string or root contains null bytes
-    #[error("Grammar string or root contains null bytes: {0}")]
+    #[error("grammar string or root contains null bytes: {0}")]
     GrammarNullBytes(NulError),
-    /// A string contains null bytes
-    #[error("String contains null bytes: {0}")]
+    #[error("string contains null bytes: {0}")]
     NulError(#[from] NulError),
-    /// The grammar call returned null
-    #[error("Grammar initialization failed: {0}")]
-    NullGrammar(String),
-    /// An integer value exceeded the allowed range
-    #[error("Integer overflow: {0}")]
+    #[error("integer overflow: {0}")]
     IntegerOverflow(String),
-    /// An error from the llguidance library
     #[error("llguidance error: {0}")]
     LlguidanceError(String),
+    #[error("llama_rs_sampler_init_grammar called with null out_sampler")]
+    GrammarInitNullOutSamplerArg,
+    #[error("llama_rs_sampler_init_grammar called with null out_error")]
+    GrammarInitNullOutErrorArg,
+    #[error("llama_rs_sampler_init_grammar returned null")]
+    GrammarInitVendoredReturnedNull,
+    #[error("llama_rs_sampler_init_grammar wrapper failed to duplicate the C++ exception string")]
+    GrammarInitErrorStringAllocationFailed,
+    #[error("llama_rs_sampler_init_grammar threw a C++ exception: {message}")]
+    GrammarInitVendoredThrewCxxException { message: String },
+    #[error("llama_rs_sampler_init_grammar_lazy called with null out_sampler")]
+    GrammarLazyInitNullOutSamplerArg,
+    #[error("llama_rs_sampler_init_grammar_lazy called with null out_error")]
+    GrammarLazyInitNullOutErrorArg,
+    #[error("llama_rs_sampler_init_grammar_lazy returned null")]
+    GrammarLazyInitVendoredReturnedNull,
+    #[error(
+        "llama_rs_sampler_init_grammar_lazy wrapper failed to duplicate the C++ exception string"
+    )]
+    GrammarLazyInitErrorStringAllocationFailed,
+    #[error("llama_rs_sampler_init_grammar_lazy threw a C++ exception: {message}")]
+    GrammarLazyInitVendoredThrewCxxException { message: String },
+    #[error("llama_rs_sampler_init_grammar_lazy_patterns called with null out_sampler")]
+    GrammarLazyPatternsInitNullOutSamplerArg,
+    #[error("llama_rs_sampler_init_grammar_lazy_patterns called with null out_error")]
+    GrammarLazyPatternsInitNullOutErrorArg,
+    #[error("llama_rs_sampler_init_grammar_lazy_patterns returned null")]
+    GrammarLazyPatternsInitVendoredReturnedNull,
+    #[error(
+        "llama_rs_sampler_init_grammar_lazy_patterns wrapper failed to duplicate the C++ exception string"
+    )]
+    GrammarLazyPatternsInitErrorStringAllocationFailed,
+    #[error("llama_rs_sampler_init_grammar_lazy_patterns threw a C++ exception: {message}")]
+    GrammarLazyPatternsInitVendoredThrewCxxException { message: String },
+    #[error("vendored llama_sampler_init for llguidance returned null")]
+    LlguidanceSamplerInitVendoredReturnedNull,
 }
diff --git a/llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs b/llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs
new file mode 100644
index 00000000..e5943464
--- /dev/null
+++ b/llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs
@@ -0,0 +1,20 @@
+use std::ffi::NulError;
+use std::string::FromUtf8Error;
+
+#[derive(Debug, thiserror::Error)]
+pub enum JsonSchemaToGrammarError {
+    #[error("schema string contains an interior NUL byte: {0}")]
+    SchemaContainsNulByte(#[from] NulError),
+    #[error("llama_rs_json_schema_to_grammar called with null schema_json")]
+    NullSchemaJsonArg,
+    #[error("llama_rs_json_schema_to_grammar called with null out_grammar")]
+    NullOutGrammarArg,
+    #[error("llama_rs_json_schema_to_grammar called with null out_error")]
+    NullOutErrorArg,
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("llama_rs_json_schema_to_grammar threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
+    #[error("grammar string returned by llama_rs_json_schema_to_grammar is not valid UTF-8")]
+    GrammarNotUtf8(#[from] FromUtf8Error),
+}
diff --git a/llama-cpp-bindings/src/error/llama_cpp_error.rs b/llama-cpp-bindings/src/error/llama_cpp_error.rs
index b99fefdd..e40664a8 100644
--- a/llama-cpp-bindings/src/error/llama_cpp_error.rs
+++ b/llama-cpp-bindings/src/error/llama_cpp_error.rs
@@ -4,47 +4,34 @@ use crate::error::decode_error::DecodeError;
 use crate::error::embeddings_error::EmbeddingsError;
 use crate::error::encode_error::EncodeError;
 use crate::error::fit_error::FitError;
+use crate::error::json_schema_to_grammar_error::JsonSchemaToGrammarError;
 use crate::error::llama_context_load_error::LlamaContextLoadError;
 use crate::error::llama_model_load_error::LlamaModelLoadError;
 
-/// All errors that can occur in the llama-cpp crate.
-#[derive(Debug, Eq, PartialEq, thiserror::Error)]
+#[derive(Debug, thiserror::Error)]
 pub enum LlamaCppError {
-    /// The backend was already initialized. This can generally be ignored as initializing the backend
-    /// is idempotent.
     #[error("BackendAlreadyInitialized")]
     BackendAlreadyInitialized,
-    /// There was an error while get the chat template from model.
-    #[error("{0}")]
+    #[error(transparent)]
     ChatTemplateError(#[from] ChatTemplateError),
-    /// There was an error while decoding a batch.
-    #[error("{0}")]
+    #[error(transparent)]
     DecodeError(#[from] DecodeError),
-    /// There was an error while encoding a batch.
-    #[error("{0}")]
+    #[error(transparent)]
     EncodeError(#[from] EncodeError),
-    /// There was an error loading a model.
-    #[error("{0}")]
+    #[error(transparent)]
     LlamaModelLoadError(#[from] LlamaModelLoadError),
-    /// There was an error creating a new model context.
-    #[error("{0}")]
+    #[error(transparent)]
     LlamaContextLoadError(#[from] LlamaContextLoadError),
-    /// There was an error adding a token to a batch.
-    #[error["{0}"]]
+    #[error(transparent)]
     BatchAddError(#[from] BatchAddError),
-    /// see [`EmbeddingsError`]
     #[error(transparent)]
     EmbeddingError(#[from] EmbeddingsError),
-    /// Backend device not found
     #[error("Backend device {0} not found")]
     BackendDeviceNotFound(usize),
-    /// Max devices exceeded
     #[error("Max devices exceeded. Max devices is {0}")]
     MaxDevicesExceeded(usize),
-    /// Failed to convert JSON schema to grammar.
-    #[error("JsonSchemaToGrammarError: {0}")]
-    JsonSchemaToGrammarError(String),
-    /// see [`FitError`]
+    #[error(transparent)]
+    JsonSchemaToGrammarError(#[from] JsonSchemaToGrammarError),
     #[error(transparent)]
     FitError(#[from] FitError),
 }
diff --git a/llama-cpp-bindings/src/error/marker_detection_error.rs b/llama-cpp-bindings/src/error/marker_detection_error.rs
index aa755878..92e9939c 100644
--- a/llama-cpp-bindings/src/error/marker_detection_error.rs
+++ b/llama-cpp-bindings/src/error/marker_detection_error.rs
@@ -1,15 +1,47 @@
 use std::string::FromUtf8Error;
 
-/// Failed to detect tool-call diagnostic markers for a model.
 #[derive(Debug, thiserror::Error)]
 pub enum MarkerDetectionError {
-    /// llama.cpp returned an error code from the marker detection FFI call.
-    #[error("ffi error {0}")]
-    FfiError(i32),
-    /// The C++ side threw an exception during template analysis.
-    #[error("c++ exception during template analysis: {0}")]
-    AnalyzeException(String),
-    /// llama.cpp returned a marker string but its bytes were not valid UTF-8.
     #[error("ffi returned non-utf8 marker bytes: {0}")]
     MarkerUtf8Error(#[from] FromUtf8Error),
+    #[error("llama_rs_detect_reasoning_markers called with null model")]
+    DetectReasoningMarkersNullModelArg,
+    #[error("llama_rs_detect_reasoning_markers called with null out_open")]
+    DetectReasoningMarkersNullOutOpenArg,
+    #[error("llama_rs_detect_reasoning_markers called with null out_close")]
+    DetectReasoningMarkersNullOutCloseArg,
+    #[error("llama_rs_detect_reasoning_markers called with null out_error")]
+    DetectReasoningMarkersNullOutErrorArg,
+    #[error(
+        "llama_rs_detect_reasoning_markers wrapper failed to duplicate the C++ exception string"
+    )]
+    DetectReasoningMarkersErrorStringAllocationFailed,
+    #[error("llama_rs_detect_reasoning_markers threw a C++ exception: {message}")]
+    DetectReasoningMarkersVendoredThrewCxxException { message: String },
+    #[error("llama_rs_compute_tool_call_haystack called with null model")]
+    ComputeToolCallHaystackNullModelArg,
+    #[error("llama_rs_compute_tool_call_haystack called with null out_haystack")]
+    ComputeToolCallHaystackNullOutHaystackArg,
+    #[error("llama_rs_compute_tool_call_haystack called with null out_error")]
+    ComputeToolCallHaystackNullOutErrorArg,
+    #[error(
+        "llama_rs_compute_tool_call_haystack wrapper failed to duplicate the C++ exception string"
+    )]
+    ComputeToolCallHaystackErrorStringAllocationFailed,
+    #[error("llama_rs_compute_tool_call_haystack threw a C++ exception: {message}")]
+    ComputeToolCallHaystackVendoredThrewCxxException { message: String },
+    #[error("llama_rs_diagnose_tool_call_synthetic_renders called with null model")]
+    DiagnoseToolCallSyntheticRendersNullModelArg,
+    #[error("llama_rs_diagnose_tool_call_synthetic_renders called with null out_no_tools")]
+    DiagnoseToolCallSyntheticRendersNullOutNoToolsArg,
+    #[error("llama_rs_diagnose_tool_call_synthetic_renders called with null out_with_tools")]
+    DiagnoseToolCallSyntheticRendersNullOutWithToolsArg,
+    #[error("llama_rs_diagnose_tool_call_synthetic_renders called with null out_error")]
+    DiagnoseToolCallSyntheticRendersNullOutErrorArg,
+    #[error(
+        "llama_rs_diagnose_tool_call_synthetic_renders wrapper failed to duplicate the C++ exception string"
+    )]
+    DiagnoseToolCallSyntheticRendersErrorStringAllocationFailed,
+    #[error("llama_rs_diagnose_tool_call_synthetic_renders threw a C++ exception: {message}")]
+    DiagnoseToolCallSyntheticRendersVendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/error/sample_error.rs b/llama-cpp-bindings/src/error/sample_error.rs
index a7bbf4e8..8f5e1aa9 100644
--- a/llama-cpp-bindings/src/error/sample_error.rs
+++ b/llama-cpp-bindings/src/error/sample_error.rs
@@ -1,11 +1,15 @@
-/// Errors that can occur when sampling a token.
-#[derive(Debug, Eq, PartialEq, thiserror::Error)]
+#[derive(Debug, thiserror::Error)]
 pub enum SampleError {
-    /// A C++ exception was thrown during sampling
-    #[error("C++ exception during sampling: {0}")]
-    CppException(String),
-
-    /// An invalid argument was passed to the sampler
-    #[error("Invalid argument passed to sampler")]
-    InvalidArgument,
+    #[error("llama_rs_sampler_sample called with null sampler")]
+    NullSamplerArg,
+    #[error("llama_rs_sampler_sample called with null context")]
+    NullCtxArg,
+    #[error("llama_rs_sampler_sample called with null out_token")]
+    NullOutTokenArg,
+    #[error("llama_rs_sampler_sample called with null out_error")]
+    NullOutErrorArg,
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("llama_rs_sampler_sample threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/error/sampler_accept_error.rs b/llama-cpp-bindings/src/error/sampler_accept_error.rs
index afa32a61..dc5e40c1 100644
--- a/llama-cpp-bindings/src/error/sampler_accept_error.rs
+++ b/llama-cpp-bindings/src/error/sampler_accept_error.rs
@@ -1,11 +1,11 @@
-/// Failed to accept a token in a sampler.
 #[derive(Debug, thiserror::Error)]
 pub enum SamplerAcceptError {
-    /// A C++ exception was thrown during accept
-    #[error("C++ exception during sampler accept: {0}")]
-    CppException(String),
-
-    /// An invalid argument was passed (null sampler or null error pointer)
-    #[error("Invalid argument passed to sampler accept")]
-    InvalidArgument,
+    #[error("llama_rs_sampler_accept called with null sampler")]
+    NullSamplerArg,
+    #[error("llama_rs_sampler_accept called with null out_error")]
+    NullOutErrorArg,
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("llama_rs_sampler_accept threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/json_schema_to_grammar.rs b/llama-cpp-bindings/src/json_schema_to_grammar.rs
index 34590a82..d9b29cf8 100644
--- a/llama-cpp-bindings/src/json_schema_to_grammar.rs
+++ b/llama-cpp-bindings/src/json_schema_to_grammar.rs
@@ -1,15 +1,14 @@
 use std::ffi::{CStr, CString, c_char};
 
-use crate::error::{LlamaCppError, Result};
-use crate::ffi_status_is_ok::status_is_ok;
+use crate::error::JsonSchemaToGrammarError;
+use crate::ffi_error_reader::read_and_free_cpp_error;
 
-/// Convert a JSON schema string into a llama.cpp grammar string.
-///
 /// # Errors
-/// Returns an error if the schema contains null bytes or the conversion fails.
-pub fn json_schema_to_grammar(schema_json: &str) -> Result<String> {
-    let schema_cstr = CString::new(schema_json)
-        .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
+///
+/// Returns [`JsonSchemaToGrammarError`] if the schema string contains a NUL byte,
+/// the wrapper reports any non-OK status, or the returned grammar is not valid UTF-8.
+pub fn json_schema_to_grammar(schema_json: &str) -> Result<String, JsonSchemaToGrammarError> {
+    let schema_cstr = CString::new(schema_json)?;
     let mut out: *mut c_char = std::ptr::null_mut();
     let mut error_ptr: *mut c_char = std::ptr::null_mut();
 
@@ -22,63 +21,80 @@ pub fn json_schema_to_grammar(schema_json: &str) -> Result<String> {
         )
     };
 
-    if !status_is_ok(status) || out.is_null() {
-        let message = if error_ptr.is_null() {
-            "unknown error".to_owned()
-        } else {
-            let message = unsafe { CStr::from_ptr(error_ptr) }
-                .to_string_lossy()
-                .into_owned();
-
-            unsafe { llama_cpp_bindings_sys::llama_rs_string_free(error_ptr) };
-
-            message
-        };
-
-        return Err(LlamaCppError::JsonSchemaToGrammarError(message));
+    match status {
+        llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK => {
+            let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec();
+            unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out) };
+            Ok(String::from_utf8(grammar_bytes)?)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_SCHEMA_JSON_ARG => {
+            unreachable!(
+                "llama_rs_json_schema_to_grammar received null schema_json despite valid Rust CString"
+            )
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_GRAMMAR_ARG => {
+            unreachable!(
+                "llama_rs_json_schema_to_grammar reported null out_grammar despite valid Rust pointer"
+            )
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_ERROR_ARG => {
+            unreachable!(
+                "llama_rs_json_schema_to_grammar reported null out_error despite valid Rust pointer"
+            )
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED => {
+            Err(JsonSchemaToGrammarError::ErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION => {
+            let message = unsafe { read_and_free_cpp_error(error_ptr) };
+            Err(JsonSchemaToGrammarError::VendoredThrewCxxException { message })
+        }
+        other => unreachable!(
+            "llama_rs_json_schema_to_grammar returned unrecognized status {other}"
+        ),
     }
-
-    let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec();
-
-    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out) };
-
-    String::from_utf8(grammar_bytes)
-        .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))
 }
 
 #[cfg(test)]
 mod tests {
     use super::json_schema_to_grammar;
+    use crate::error::JsonSchemaToGrammarError;
 
     #[test]
     fn simple_object() {
         let schema = r#"{"type": "object", "properties": {"name": {"type": "string"}}}"#;
-        let grammar = json_schema_to_grammar(schema).unwrap();
+        let grammar = json_schema_to_grammar(schema).expect("schema converts to grammar");
 
         assert!(!grammar.is_empty());
     }
 
     #[test]
-    fn null_byte_returns_error() {
+    fn null_byte_returns_schema_contains_nul_byte_error() {
         let schema = "{\x00}";
         let result = json_schema_to_grammar(schema);
 
-        assert!(result.is_err());
+        assert!(matches!(
+            result,
+            Err(JsonSchemaToGrammarError::SchemaContainsNulByte(_)),
+        ));
     }
 
     #[test]
     fn simple_string() {
         let schema = r#"{"type": "string"}"#;
-        let grammar = json_schema_to_grammar(schema).unwrap();
+        let grammar = json_schema_to_grammar(schema).expect("schema converts to grammar");
 
         assert!(!grammar.is_empty());
     }
 
     #[test]
-    fn invalid_json_returns_ffi_error() {
+    fn invalid_json_returns_vendored_threw_cxx_exception() {
         let schema = "not valid json at all";
         let result = json_schema_to_grammar(schema);
 
-        assert!(result.is_err());
+        assert!(matches!(
+            result,
+            Err(JsonSchemaToGrammarError::VendoredThrewCxxException { .. }),
+        ));
     }
 }
diff --git a/llama-cpp-bindings/src/lib.rs b/llama-cpp-bindings/src/lib.rs
index 261ec02d..9bed927b 100644
--- a/llama-cpp-bindings/src/lib.rs
+++ b/llama-cpp-bindings/src/lib.rs
@@ -64,12 +64,12 @@ pub mod tool_call_template_overrides;
 
 pub use error::{
     ApplyChatTemplateError, ChatTemplateError, DecodeError, EmbeddingsError, EncodeError,
-    EvalMultimodalChunksError, GrammarError, KvCacheSeqAddError, KvCacheSeqDivError,
-    LlamaContextLoadError, LlamaCppError, LlamaLoraAdapterInitError, LlamaLoraAdapterRemoveError,
-    LlamaLoraAdapterSetError, LlamaModelLoadError, LogitsError, MarkerDetectionError,
-    MetaValError, ModelParamsError, NewLlamaChatMessageError, ParseChatMessageError, Result,
-    SampleError, SamplerAcceptError, SamplingError, StringToTokenError, TokenSamplingError,
-    TokenToStringError,
+    EvalMultimodalChunksError, GrammarError, JsonSchemaToGrammarError, KvCacheSeqAddError,
+    KvCacheSeqDivError, LlamaContextLoadError, LlamaCppError, LlamaLoraAdapterInitError,
+    LlamaLoraAdapterRemoveError, LlamaLoraAdapterSetError, LlamaModelLoadError, LogitsError,
+    MarkerDetectionError, MetaValError, ModelParamsError, NewLlamaChatMessageError,
+    ParseChatMessageError, Result, SampleError, SamplerAcceptError, SamplingError,
+    StringToTokenError, TokenSamplingError, TokenToStringError,
 };
 
 pub use chat_message_parse_outcome::ChatMessageParseOutcome;
diff --git a/llama-cpp-bindings/src/llama_backend.rs b/llama-cpp-bindings/src/llama_backend.rs
index 20ad3ac3..ff6b09f9 100644
--- a/llama-cpp-bindings/src/llama_backend.rs
+++ b/llama-cpp-bindings/src/llama_backend.rs
@@ -38,7 +38,7 @@ impl LlamaBackend {
     ///
     /// let backend = LlamaBackend::init()?;
     /// // the llama backend can only be initialized once
-    /// assert_eq!(Err(LlamaCppError::BackendAlreadyInitialized), LlamaBackend::init());
+    /// assert!(matches!(LlamaBackend::init(), Err(LlamaCppError::BackendAlreadyInitialized)));
     ///
     ///# Ok(())
     ///# }
@@ -161,10 +161,10 @@ mod tests {
     fn double_init_returns_error() {
         let _backend = LlamaBackend::init().unwrap();
         let second = LlamaBackend::init();
-        assert_eq!(
+        assert!(matches!(
             second.unwrap_err(),
             LlamaCppError::BackendAlreadyInitialized
-        );
+        ));
     }
 
     #[test]
diff --git a/llama-cpp-bindings/src/llguidance_sampler.rs b/llama-cpp-bindings/src/llguidance_sampler.rs
index ffd51d75..4130ee33 100644
--- a/llama-cpp-bindings/src/llguidance_sampler.rs
+++ b/llama-cpp-bindings/src/llguidance_sampler.rs
@@ -153,9 +153,7 @@ pub fn create_llg_sampler(
     };
 
     if sampler.is_null() {
-        Err(GrammarError::NullGrammar(
-            "llguidance sampler returned null".to_owned(),
-        ))
+        Err(GrammarError::LlguidanceSamplerInitVendoredReturnedNull)
     } else {
         Ok(LlamaSampler { sampler })
     }
diff --git a/llama-cpp-bindings/src/model.rs b/llama-cpp-bindings/src/model.rs
index 789a6ddf..91011890 100644
--- a/llama-cpp-bindings/src/model.rs
+++ b/llama-cpp-bindings/src/model.rs
@@ -31,7 +31,6 @@ use llama_cpp_bindings_types::ToolCallArguments;
 use llama_cpp_bindings_types::ToolCallMarkers;
 
 use crate::chat_message_parse_outcome::ChatMessageParseOutcome;
-use crate::ffi_status_to_i32::status_to_i32;
 use crate::llama_backend::LlamaBackend;
 use crate::llama_token_attrs::LlamaTokenAttrs;
 use crate::llama_token_attrs_from_int_error::LlamaTokenAttrsFromIntError;
@@ -741,22 +740,9 @@ impl LlamaModel {
     /// Returns [`MarkerDetectionError`] when any underlying FFI call fails.
     pub fn streaming_markers(&self) -> Result<StreamingMarkers, MarkerDetectionError> {
         let (reasoning_open_str, reasoning_close_str) =
-            invoke_ffi_string_pair_detector(|first, second, error| unsafe {
-                llama_cpp_bindings_sys::llama_rs_detect_reasoning_markers(
-                    self.model.as_ptr(),
-                    first,
-                    second,
-                    error,
-                )
-            })?;
+            invoke_detect_reasoning_markers(self.model.as_ptr())?;
 
-        let tool_call_haystack = invoke_ffi_single_string_detector(|haystack, error| unsafe {
-            llama_cpp_bindings_sys::llama_rs_compute_tool_call_haystack(
-                self.model.as_ptr(),
-                haystack,
-                error,
-            )
-        })?;
+        let tool_call_haystack = invoke_compute_tool_call_haystack(self.model.as_ptr())?;
 
         let autoparser_pair = tool_call_haystack.as_deref().and_then(
             crate::extract_tool_call_markers_from_haystack::extract_tool_call_markers_from_haystack,
@@ -817,14 +803,7 @@ impl LlamaModel {
     /// # Errors
     /// Returns [`MarkerDetectionError`] when the underlying FFI call fails.
     pub fn reasoning_markers(&self) -> Result<Option<ReasoningMarkers>, MarkerDetectionError> {
-        let (open, close) = invoke_ffi_string_pair_detector(|first, second, error| unsafe {
-            llama_cpp_bindings_sys::llama_rs_detect_reasoning_markers(
-                self.model.as_ptr(),
-                first,
-                second,
-                error,
-            )
-        })?;
+        let (open, close) = invoke_detect_reasoning_markers(self.model.as_ptr())?;
 
         match (open, close) {
             (Some(open), Some(close)) if !open.is_empty() && !close.is_empty() => {
@@ -1052,15 +1031,7 @@ impl LlamaModel {
     pub fn diagnose_tool_call_synthetic_renders(
         &self,
     ) -> Result<(String, String), MarkerDetectionError> {
-        let (no_tools, with_tools) =
-            invoke_ffi_string_pair_detector(|first, second, error| unsafe {
-                llama_cpp_bindings_sys::llama_rs_diagnose_tool_call_synthetic_renders(
-                    self.model.as_ptr(),
-                    first,
-                    second,
-                    error,
-                )
-            })?;
+        let (no_tools, with_tools) = invoke_diagnose_tool_call_synthetic_renders(self.model.as_ptr())?;
 
         Ok((no_tools.unwrap_or_default(), with_tools.unwrap_or_default()))
     }
@@ -1447,80 +1418,166 @@ fn synthesize_missing_tool_call_ids(tool_calls: &mut [ParsedToolCall]) {
     }
 }
 
-fn parse_single_string_status(
-    status: llama_cpp_bindings_sys::llama_rs_status,
-    out_value: *mut c_char,
-    out_error: *mut c_char,
-) -> Result<Option<String>, MarkerDetectionError> {
-    match status {
-        llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => read_optional_owned_cstr(out_value),
-        llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION => {
-            let message = read_optional_owned_cstr_lossy(out_error);
+fn invoke_detect_reasoning_markers(
+    model: *const llama_cpp_bindings_sys::llama_model,
+) -> Result<(Option<String>, Option<String>), MarkerDetectionError> {
+    let mut out_open: *mut c_char = ptr::null_mut();
+    let mut out_close: *mut c_char = ptr::null_mut();
+    let mut out_error: *mut c_char = ptr::null_mut();
+
+    let status = unsafe {
+        llama_cpp_bindings_sys::llama_rs_detect_reasoning_markers(
+            model,
+            &raw mut out_open,
+            &raw mut out_close,
+            &raw mut out_error,
+        )
+    };
 
-            Err(MarkerDetectionError::AnalyzeException(message))
+    let parsed = match status {
+        llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_OK => {
+            collect_optional_cstr_pair(out_open, out_close)
         }
-        other => Err(MarkerDetectionError::FfiError(status_to_i32(other))),
+        llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_NULL_MODEL_ARG => {
+            Err(MarkerDetectionError::DetectReasoningMarkersNullModelArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_OPEN_ARG => {
+            Err(MarkerDetectionError::DetectReasoningMarkersNullOutOpenArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_CLOSE_ARG => {
+            Err(MarkerDetectionError::DetectReasoningMarkersNullOutCloseArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_ERROR_ARG => {
+            Err(MarkerDetectionError::DetectReasoningMarkersNullOutErrorArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED => {
+            Err(MarkerDetectionError::DetectReasoningMarkersErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION => {
+            let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            Err(MarkerDetectionError::DetectReasoningMarkersVendoredThrewCxxException { message })
+        }
+        other => unreachable!(
+            "llama_rs_detect_reasoning_markers returned unrecognized status {other}"
+        ),
+    };
+
+    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_open) };
+    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_close) };
+    if !matches!(
+        parsed,
+        Err(MarkerDetectionError::DetectReasoningMarkersVendoredThrewCxxException { .. })
+    ) {
+        unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
     }
+
+    parsed
 }
 
-fn invoke_ffi_single_string_detector<TInvoke>(
-    invoke: TInvoke,
-) -> Result<Option<String>, MarkerDetectionError>
-where
-    TInvoke: FnOnce(*mut *mut c_char, *mut *mut c_char) -> llama_cpp_bindings_sys::llama_rs_status,
-{
-    let mut out_value: *mut c_char = ptr::null_mut();
+fn invoke_compute_tool_call_haystack(
+    model: *const llama_cpp_bindings_sys::llama_model,
+) -> Result<Option<String>, MarkerDetectionError> {
+    let mut out_haystack: *mut c_char = ptr::null_mut();
     let mut out_error: *mut c_char = ptr::null_mut();
 
-    let status = invoke(&raw mut out_value, &raw mut out_error);
-    let parsed = parse_single_string_status(status, out_value, out_error);
+    let status = unsafe {
+        llama_cpp_bindings_sys::llama_rs_compute_tool_call_haystack(
+            model,
+            &raw mut out_haystack,
+            &raw mut out_error,
+        )
+    };
 
-    unsafe {
-        if !out_value.is_null() {
-            llama_cpp_bindings_sys::llama_rs_string_free(out_value);
+    let parsed = match status {
+        llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK => {
+            read_optional_owned_cstr(out_haystack)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_MODEL_ARG => {
+            Err(MarkerDetectionError::ComputeToolCallHaystackNullModelArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_HAYSTACK_ARG => {
+            Err(MarkerDetectionError::ComputeToolCallHaystackNullOutHaystackArg)
         }
-        if !out_error.is_null() {
-            llama_cpp_bindings_sys::llama_rs_string_free(out_error);
+        llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_ERROR_ARG => {
+            Err(MarkerDetectionError::ComputeToolCallHaystackNullOutErrorArg)
         }
+        llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED => {
+            Err(MarkerDetectionError::ComputeToolCallHaystackErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION => {
+            let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            Err(MarkerDetectionError::ComputeToolCallHaystackVendoredThrewCxxException { message })
+        }
+        other => unreachable!(
+            "llama_rs_compute_tool_call_haystack returned unrecognized status {other}"
+        ),
+    };
+
+    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_haystack) };
+    if !matches!(
+        parsed,
+        Err(MarkerDetectionError::ComputeToolCallHaystackVendoredThrewCxxException { .. })
+    ) {
+        unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
     }
 
     parsed
 }
 
-fn invoke_ffi_string_pair_detector<TInvoke>(
-    invoke: TInvoke,
-) -> Result<(Option<String>, Option<String>), MarkerDetectionError>
-where
-    TInvoke: FnOnce(
-        *mut *mut c_char,
-        *mut *mut c_char,
-        *mut *mut c_char,
-    ) -> llama_cpp_bindings_sys::llama_rs_status,
-{
-    let mut out_first: *mut c_char = ptr::null_mut();
-    let mut out_second: *mut c_char = ptr::null_mut();
+fn invoke_diagnose_tool_call_synthetic_renders(
+    model: *const llama_cpp_bindings_sys::llama_model,
+) -> Result<(Option<String>, Option<String>), MarkerDetectionError> {
+    let mut out_no_tools: *mut c_char = ptr::null_mut();
+    let mut out_with_tools: *mut c_char = ptr::null_mut();
     let mut out_error: *mut c_char = ptr::null_mut();
 
-    let status = invoke(&raw mut out_first, &raw mut out_second, &raw mut out_error);
-
-    let parsed = (|| match status {
-        llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => {
-            let first = read_optional_owned_cstr(out_first)?;
-            let second = read_optional_owned_cstr(out_second)?;
+    let status = unsafe {
+        llama_cpp_bindings_sys::llama_rs_diagnose_tool_call_synthetic_renders(
+            model,
+            &raw mut out_no_tools,
+            &raw mut out_with_tools,
+            &raw mut out_error,
+        )
+    };
 
-            Ok((first, second))
+    let parsed = match status {
+        llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK => {
+            collect_optional_cstr_pair(out_no_tools, out_with_tools)
         }
-        llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION => {
-            let message = read_optional_owned_cstr_lossy(out_error);
-
-            Err(MarkerDetectionError::AnalyzeException(message))
+        llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_MODEL_ARG => {
+            Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersNullModelArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_NO_TOOLS_ARG => {
+            Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersNullOutNoToolsArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_WITH_TOOLS_ARG => {
+            Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersNullOutWithToolsArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_ERROR_ARG => {
+            Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersNullOutErrorArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED => {
+            Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION => {
+            let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersVendoredThrewCxxException {
+                message,
+            })
         }
-        other => Err(MarkerDetectionError::FfiError(status_to_i32(other))),
-    })();
+        other => unreachable!(
+            "llama_rs_diagnose_tool_call_synthetic_renders returned unrecognized status {other}"
+        ),
+    };
 
-    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_first) };
-    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_second) };
-    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_no_tools) };
+    unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_with_tools) };
+    if !matches!(
+        parsed,
+        Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersVendoredThrewCxxException { .. })
+    ) {
+        unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
+    }
 
     parsed
 }
@@ -1535,14 +1592,13 @@ fn read_optional_owned_cstr(ptr: *const c_char) -> Result<Option<String>, Marker
     Ok(Some(String::from_utf8(bytes)?))
 }
 
-fn read_optional_owned_cstr_lossy(ptr: *const c_char) -> String {
-    if ptr.is_null() {
-        return String::new();
-    }
-
-    unsafe { CStr::from_ptr(ptr) }
-        .to_string_lossy()
-        .into_owned()
+fn collect_optional_cstr_pair(
+    first_ptr: *const c_char,
+    second_ptr: *const c_char,
+) -> Result<(Option<String>, Option<String>), MarkerDetectionError> {
+    let first = read_optional_owned_cstr(first_ptr)?;
+    let second = read_optional_owned_cstr(second_ptr)?;
+    Ok((first, second))
 }
 
 fn extract_meta_string<TCFunction>(
@@ -1677,151 +1733,3 @@ mod extract_meta_string_tests {
     }
 }
 
-#[cfg(test)]
-mod ffi_helper_tests {
-    use std::ffi::CString;
-    use std::ptr;
-
-    use super::invoke_ffi_single_string_detector;
-    use super::invoke_ffi_string_pair_detector;
-    use super::parse_single_string_status;
-    use super::read_optional_owned_cstr_lossy;
-    use crate::MarkerDetectionError;
-
-    #[test]
-    fn read_optional_owned_cstr_lossy_returns_empty_for_null() {
-        let result = read_optional_owned_cstr_lossy(ptr::null());
-
-        assert!(result.is_empty());
-    }
-
-    #[test]
-    fn read_optional_owned_cstr_lossy_returns_string_for_valid_pointer() {
-        let owned = CString::new("hello").expect("static literal has no nuls");
-        let result = read_optional_owned_cstr_lossy(owned.as_ptr());
-
-        assert_eq!(result, "hello");
-    }
-
-    #[test]
-    fn read_optional_owned_cstr_lossy_handles_invalid_utf8_via_replacement() {
-        let owned = CString::new(vec![b'a', 0xFF, b'b']).expect("no interior nul");
-        let result = read_optional_owned_cstr_lossy(owned.as_ptr());
-
-        assert!(result.starts_with('a'));
-        assert!(result.ends_with('b'));
-    }
-
-    #[test]
-    fn parse_single_string_status_returns_none_for_ok_with_null() {
-        let result = parse_single_string_status(
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK,
-            ptr::null_mut(),
-            ptr::null_mut(),
-        );
-
-        assert_eq!(result.expect("OK + null returns Ok(None)"), None);
-    }
-
-    #[test]
-    fn parse_single_string_status_returns_some_for_ok_with_value() {
-        let owned = CString::new("present").expect("no nul");
-        let result = parse_single_string_status(
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK,
-            owned.as_ptr().cast_mut(),
-            ptr::null_mut(),
-        );
-
-        assert_eq!(
-            result.expect("OK + value returns Ok(Some)"),
-            Some("present".to_owned())
-        );
-    }
-
-    #[test]
-    fn parse_single_string_status_returns_analyze_exception() {
-        let owned = CString::new("boom").expect("no nul");
-        let result = parse_single_string_status(
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION,
-            ptr::null_mut(),
-            owned.as_ptr().cast_mut(),
-        );
-
-        match result.expect_err("EXCEPTION must yield Err") {
-            MarkerDetectionError::AnalyzeException(message) => assert_eq!(message, "boom"),
-            other => panic!("expected AnalyzeException, got {other:?}"),
-        }
-    }
-
-    #[test]
-    fn parse_single_string_status_returns_ffi_error_for_other_status() {
-        let result = parse_single_string_status(
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT,
-            ptr::null_mut(),
-            ptr::null_mut(),
-        );
-
-        match result.expect_err("invalid status must yield Err") {
-            MarkerDetectionError::FfiError(_) => {}
-            other => panic!("expected FfiError, got {other:?}"),
-        }
-    }
-
-    #[test]
-    fn invoke_ffi_single_string_detector_propagates_invalid_argument_status() {
-        let result = invoke_ffi_single_string_detector(|_value, _error| {
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT
-        });
-
-        assert!(matches!(result, Err(MarkerDetectionError::FfiError(_))));
-    }
-
-    #[test]
-    fn invoke_ffi_single_string_detector_returns_none_for_ok_with_null() {
-        let result = invoke_ffi_single_string_detector(|value, _error| {
-            unsafe {
-                *value = ptr::null_mut();
-            }
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK
-        });
-
-        assert_eq!(result.expect("OK + null returns Ok(None)"), None);
-    }
-
-    #[test]
-    fn invoke_ffi_string_pair_detector_propagates_invalid_argument_status() {
-        let result = invoke_ffi_string_pair_detector(|_first, _second, _error| {
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT
-        });
-
-        assert!(matches!(result, Err(MarkerDetectionError::FfiError(_))));
-    }
-
-    #[test]
-    fn invoke_ffi_string_pair_detector_returns_pair_of_none_for_ok_with_nulls() {
-        let result = invoke_ffi_string_pair_detector(|first, second, _error| {
-            unsafe {
-                *first = ptr::null_mut();
-                *second = ptr::null_mut();
-            }
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK
-        });
-
-        assert_eq!(
-            result.expect("OK with both null returns Ok((None, None))"),
-            (None, None)
-        );
-    }
-
-    #[test]
-    fn invoke_ffi_string_pair_detector_propagates_invalid_status_codes() {
-        let result = invoke_ffi_string_pair_detector(|_first, _second, _error| {
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_ALLOCATION_FAILED
-        });
-
-        match result.expect_err("non-OK status yields Err") {
-            MarkerDetectionError::FfiError(code) => assert!(code != 0),
-            other => panic!("expected FfiError, got {other:?}"),
-        }
-    }
-}
diff --git a/llama-cpp-bindings/src/model/params.rs b/llama-cpp-bindings/src/model/params.rs
index 4772a129..d246137e 100644
--- a/llama-cpp-bindings/src/model/params.rs
+++ b/llama-cpp-bindings/src/model/params.rs
@@ -707,10 +707,10 @@ mod tests {
     fn with_devices_invalid_index_returns_error() {
         let result = LlamaModelParams::default().with_devices(&[999_999]);
 
-        assert_eq!(
+        assert!(matches!(
             result.unwrap_err(),
             crate::LlamaCppError::BackendDeviceNotFound(999_999)
-        );
+        ));
     }
 
     #[test]
diff --git a/llama-cpp-bindings/src/sampling.rs b/llama-cpp-bindings/src/sampling.rs
index e9aadb21..4c63980c 100644
--- a/llama-cpp-bindings/src/sampling.rs
+++ b/llama-cpp-bindings/src/sampling.rs
@@ -13,30 +13,25 @@ use crate::token::logit_bias::LlamaLogitBias;
 use crate::{GrammarError, SampleError, SamplerAcceptError, SamplingError};
 
 fn check_sampler_accept_status(
-    status: llama_cpp_bindings_sys::llama_rs_status,
+    status: llama_cpp_bindings_sys::llama_rs_sampler_accept_status,
     error_ptr: *mut c_char,
 ) -> Result<(), SamplerAcceptError> {
     match status {
-        llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => Ok(()),
-        llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT => {
-            Err(SamplerAcceptError::InvalidArgument)
+        llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_OK => Ok(()),
+        llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG => {
+            Err(SamplerAcceptError::NullSamplerArg)
         }
-        _ => Err(SamplerAcceptError::CppException(unsafe {
-            read_and_free_cpp_error(error_ptr)
-        })),
-    }
-}
-
-fn check_sampler_not_null(
-    sampler: *mut llama_cpp_bindings_sys::llama_sampler,
-    error_ptr: *mut c_char,
-) -> Result<LlamaSampler, GrammarError> {
-    if sampler.is_null() {
-        Err(GrammarError::NullGrammar(unsafe {
-            read_and_free_cpp_error(error_ptr)
-        }))
-    } else {
-        Ok(LlamaSampler { sampler })
+        llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_NULL_OUT_ERROR_ARG => {
+            Err(SamplerAcceptError::NullOutErrorArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED => {
+            Err(SamplerAcceptError::ErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION => {
+            let message = unsafe { read_and_free_cpp_error(error_ptr) };
+            Err(SamplerAcceptError::VendoredThrewCxxException { message })
+        }
+        other => unreachable!("llama_rs_sampler_accept returned unrecognized status {other}"),
     }
 }
 
@@ -85,13 +80,27 @@ impl LlamaSampler {
         };
 
         match status {
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => Ok(LlamaToken(token)),
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT => {
-                Err(SampleError::InvalidArgument)
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_OK => Ok(LlamaToken(token)),
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_NULL_SAMPLER_ARG => {
+                Err(SampleError::NullSamplerArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_NULL_CTX_ARG => {
+                Err(SampleError::NullCtxArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_TOKEN_ARG => {
+                Err(SampleError::NullOutTokenArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_ERROR_ARG => {
+                Err(SampleError::NullOutErrorArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(SampleError::ErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { read_and_free_cpp_error(error_ptr) };
+                Err(SampleError::VendoredThrewCxxException { message })
             }
-            _ => Err(SampleError::CppException(unsafe {
-                read_and_free_cpp_error(error_ptr)
-            })),
+            other => unreachable!("llama_rs_sampler_sample returned unrecognized status {other}"),
         }
     }
 
@@ -377,18 +386,43 @@ impl LlamaSampler {
     ) -> Result<Self, GrammarError> {
         let (grammar_str, grammar_root) =
             Self::sanitize_grammar_strings(grammar_str, grammar_root)?;
+        let mut sampler: *mut llama_cpp_bindings_sys::llama_sampler = std::ptr::null_mut();
         let mut error_ptr: *mut c_char = std::ptr::null_mut();
 
-        let sampler = unsafe {
+        let status = unsafe {
             llama_cpp_bindings_sys::llama_rs_sampler_init_grammar(
                 model.vocab_ptr(),
                 grammar_str.as_ptr(),
                 grammar_root.as_ptr(),
+                &raw mut sampler,
                 &raw mut error_ptr,
             )
         };
 
-        check_sampler_not_null(sampler, error_ptr)
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK => {
+                Ok(Self { sampler })
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_SAMPLER_ARG => {
+                Err(GrammarError::GrammarInitNullOutSamplerArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_ERROR_ARG => {
+                Err(GrammarError::GrammarInitNullOutErrorArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL => {
+                Err(GrammarError::GrammarInitVendoredReturnedNull)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(GrammarError::GrammarInitErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { read_and_free_cpp_error(error_ptr) };
+                Err(GrammarError::GrammarInitVendoredThrewCxxException { message })
+            }
+            other => unreachable!(
+                "llama_rs_sampler_init_grammar returned unrecognized status {other}"
+            ),
+        }
     }
 
     /// Lazy grammar sampler, introduced in <https://github.com/ggerganov/llama.cpp/pull/9639>
@@ -407,12 +441,13 @@ impl LlamaSampler {
         let (grammar_str, grammar_root) =
             Self::sanitize_grammar_strings(grammar_str, grammar_root)?;
         let trigger_words = Self::sanitize_trigger_words(trigger_words)?;
+        let mut sampler: *mut llama_cpp_bindings_sys::llama_sampler = std::ptr::null_mut();
         let mut error_ptr: *mut c_char = std::ptr::null_mut();
 
         let mut trigger_word_ptrs: Vec<*const c_char> =
             trigger_words.iter().map(|cs| cs.as_ptr()).collect();
 
-        let sampler = unsafe {
+        let status = unsafe {
             llama_cpp_bindings_sys::llama_rs_sampler_init_grammar_lazy(
                 model.vocab_ptr(),
                 grammar_str.as_ptr(),
@@ -421,11 +456,35 @@ impl LlamaSampler {
                 trigger_word_ptrs.len(),
                 trigger_tokens.as_ptr().cast(),
                 trigger_tokens.len(),
+                &raw mut sampler,
                 &raw mut error_ptr,
             )
         };
 
-        check_sampler_not_null(sampler, error_ptr)
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK => {
+                Ok(Self { sampler })
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_SAMPLER_ARG => {
+                Err(GrammarError::GrammarLazyInitNullOutSamplerArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_ERROR_ARG => {
+                Err(GrammarError::GrammarLazyInitNullOutErrorArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL => {
+                Err(GrammarError::GrammarLazyInitVendoredReturnedNull)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(GrammarError::GrammarLazyInitErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { read_and_free_cpp_error(error_ptr) };
+                Err(GrammarError::GrammarLazyInitVendoredThrewCxxException { message })
+            }
+            other => unreachable!(
+                "llama_rs_sampler_init_grammar_lazy returned unrecognized status {other}"
+            ),
+        }
     }
 
     /// Lazy grammar sampler using regex trigger patterns.
@@ -446,12 +505,13 @@ impl LlamaSampler {
         let (grammar_str, grammar_root) =
             Self::sanitize_grammar_strings(grammar_str, grammar_root)?;
         let trigger_patterns = Self::sanitize_trigger_patterns(trigger_patterns)?;
+        let mut sampler: *mut llama_cpp_bindings_sys::llama_sampler = std::ptr::null_mut();
         let mut error_ptr: *mut c_char = std::ptr::null_mut();
 
         let mut trigger_pattern_ptrs: Vec<*const c_char> =
             trigger_patterns.iter().map(|cs| cs.as_ptr()).collect();
 
-        let sampler = unsafe {
+        let status = unsafe {
             llama_cpp_bindings_sys::llama_rs_sampler_init_grammar_lazy_patterns(
                 model.vocab_ptr(),
                 grammar_str.as_ptr(),
@@ -460,11 +520,35 @@ impl LlamaSampler {
                 trigger_pattern_ptrs.len(),
                 trigger_tokens.as_ptr().cast(),
                 trigger_tokens.len(),
+                &raw mut sampler,
                 &raw mut error_ptr,
             )
         };
 
-        check_sampler_not_null(sampler, error_ptr)
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK => {
+                Ok(Self { sampler })
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_SAMPLER_ARG => {
+                Err(GrammarError::GrammarLazyPatternsInitNullOutSamplerArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_ERROR_ARG => {
+                Err(GrammarError::GrammarLazyPatternsInitNullOutErrorArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL => {
+                Err(GrammarError::GrammarLazyPatternsInitVendoredReturnedNull)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(GrammarError::GrammarLazyPatternsInitErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { read_and_free_cpp_error(error_ptr) };
+                Err(GrammarError::GrammarLazyPatternsInitVendoredThrewCxxException { message })
+            }
+            other => unreachable!(
+                "llama_rs_sampler_init_grammar_lazy_patterns returned unrecognized status {other}"
+            ),
+        }
     }
 
     /// `LLGuidance` sampler for constrained decoding.
@@ -719,7 +803,7 @@ mod tests {
     fn sanitize_grammar_strings_root_not_found() {
         let result = LlamaSampler::sanitize_grammar_strings("expr ::= \"hello\"", "root");
 
-        assert_eq!(result.err(), Some(GrammarError::RootNotFound));
+        assert!(matches!(result.err(), Some(GrammarError::RootNotFound)));
     }
 
     #[test]
@@ -928,35 +1012,28 @@ mod tests {
     }
 
     #[test]
-    fn check_sampler_accept_status_invalid_argument() {
+    fn check_sampler_accept_status_null_sampler() {
         let result = super::check_sampler_accept_status(
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT,
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG,
             std::ptr::null_mut(),
         );
 
         assert!(matches!(
             result,
-            Err(crate::SamplerAcceptError::InvalidArgument)
+            Err(crate::SamplerAcceptError::NullSamplerArg)
         ));
     }
 
     #[test]
-    fn check_sampler_accept_status_exception() {
+    fn check_sampler_accept_status_exception_maps_to_typed_variant() {
         let result = super::check_sampler_accept_status(
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION,
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION,
             std::ptr::null_mut(),
         );
 
         assert!(matches!(
             result,
-            Err(crate::SamplerAcceptError::CppException(_))
+            Err(crate::SamplerAcceptError::VendoredThrewCxxException { .. })
         ));
     }
-
-    #[test]
-    fn check_sampler_not_null_returns_error() {
-        let result = super::check_sampler_not_null(std::ptr::null_mut(), std::ptr::null_mut());
-
-        assert!(result.is_err());
-    }
 }

From 002b61bfbef2679017db72984bcf307eb35cf68a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 16:33:32 +0200
Subject: [PATCH 12/16] pass /EHsc to vendored llama.cpp msvc build so c++
 exceptions can unwind into wrapper try/catch

---
 llama-cpp-bindings-build/src/cmake_config.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llama-cpp-bindings-build/src/cmake_config.rs b/llama-cpp-bindings-build/src/cmake_config.rs
index a52521e3..90b608d4 100644
--- a/llama-cpp-bindings-build/src/cmake_config.rs
+++ b/llama-cpp-bindings-build/src/cmake_config.rs
@@ -205,6 +205,7 @@ fn configure_platform_specific(
         TargetOs::Windows(WindowsVariant::Msvc) => {
             config.cflag("/w");
             config.cxxflag("/w");
+            config.cxxflag("/EHsc");
             configure_msvc_release_workaround(config, profile);
         }
         TargetOs::Android => {

From fa39d5a267a60d073a15ed098da907413500d752 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 16:42:10 +0200
Subject: [PATCH 13/16] wrap remaining raw vendored ffi calls to surface c++
 exceptions as typed rust errors

---
 llama-cpp-bindings-sys/wrapper_common.cpp     | 215 ++++++++++++++++++
 llama-cpp-bindings-sys/wrapper_common.h       |  82 +++++++
 llama-cpp-bindings-tests/tests/model.rs       |  15 +-
 llama-cpp-bindings/src/context.rs             |  83 +++++--
 llama-cpp-bindings/src/error/decode_error.rs  |  51 +++--
 .../src/error/llama_context_load_error.rs     |  18 +-
 .../src/error/llama_model_load_error.rs       |  21 +-
 .../src/error/string_to_token_error.rs        |  15 +-
 llama-cpp-bindings/src/model.rs               | 144 +++++++++---
 llama-cpp-bindings/src/token/data_array.rs    |  39 +++-
 10 files changed, 583 insertions(+), 100 deletions(-)

diff --git a/llama-cpp-bindings-sys/wrapper_common.cpp b/llama-cpp-bindings-sys/wrapper_common.cpp
index 4544ecfd..935af272 100644
--- a/llama-cpp-bindings-sys/wrapper_common.cpp
+++ b/llama-cpp-bindings-sys/wrapper_common.cpp
@@ -460,3 +460,218 @@ extern "C" llama_rs_sampler_accept_status llama_rs_sampler_accept(
         return LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION;
     }
 }
+
+extern "C" llama_rs_load_model_from_file_status llama_rs_load_model_from_file(
+    const char * path,
+    struct llama_model_params params,
+    struct llama_model ** out_model,
+    char ** out_error) {
+    if (out_model) {
+        *out_model = nullptr;
+    }
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!path) {
+        return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_PATH_ARG;
+    }
+    if (!out_model) {
+        return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_MODEL_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_ERROR_ARG;
+    }
+    try {
+        *out_model = llama_load_model_from_file(path, params);
+        if (!*out_model) {
+            return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL;
+        }
+        return LLAMA_RS_LOAD_MODEL_FROM_FILE_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        *out_error = llama_rs_dup_string(err.what());
+        if (!*out_error) {
+            return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION;
+    }
+}
+
+extern "C" llama_rs_new_context_with_model_status llama_rs_new_context_with_model(
+    struct llama_model * model,
+    struct llama_context_params params,
+    struct llama_context ** out_ctx,
+    char ** out_error) {
+    if (out_ctx) {
+        *out_ctx = nullptr;
+    }
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!model) {
+        return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_MODEL_ARG;
+    }
+    if (!out_ctx) {
+        return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_CTX_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_ERROR_ARG;
+    }
+    try {
+        *out_ctx = llama_new_context_with_model(model, params);
+        if (!*out_ctx) {
+            return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL;
+        }
+        return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        *out_error = llama_rs_dup_string(err.what());
+        if (!*out_error) {
+            return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION;
+    }
+}
+
+extern "C" llama_rs_decode_status llama_rs_decode(
+    struct llama_context * ctx,
+    struct llama_batch batch,
+    int32_t * out_vendored_return_code,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_vendored_return_code) {
+        *out_vendored_return_code = 0;
+    }
+    if (!ctx) {
+        return LLAMA_RS_DECODE_NULL_CTX_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_DECODE_NULL_OUT_ERROR_ARG;
+    }
+    try {
+        int32_t result = llama_decode(ctx, batch);
+        if (result != 0) {
+            if (out_vendored_return_code) {
+                *out_vendored_return_code = result;
+            }
+            return LLAMA_RS_DECODE_VENDORED_RETURNED_NONZERO_CODE;
+        }
+        return LLAMA_RS_DECODE_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        *out_error = llama_rs_dup_string(err.what());
+        if (!*out_error) {
+            return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION;
+    }
+}
+
+extern "C" llama_rs_tokenize_status llama_rs_tokenize(
+    const struct llama_vocab * vocab,
+    const char * text,
+    int32_t text_len,
+    llama_token * tokens,
+    int32_t n_tokens_max,
+    bool add_special,
+    bool parse_special,
+    int32_t * out_returned_count,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (out_returned_count) {
+        *out_returned_count = 0;
+    }
+    if (!vocab) {
+        return LLAMA_RS_TOKENIZE_NULL_VOCAB_ARG;
+    }
+    if (!text) {
+        return LLAMA_RS_TOKENIZE_NULL_TEXT_ARG;
+    }
+    if (!out_returned_count) {
+        return LLAMA_RS_TOKENIZE_NULL_OUT_RETURNED_COUNT_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_TOKENIZE_NULL_OUT_ERROR_ARG;
+    }
+    try {
+        int32_t count = llama_tokenize(
+            vocab, text, text_len, tokens, n_tokens_max, add_special, parse_special);
+        *out_returned_count = count;
+        return LLAMA_RS_TOKENIZE_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        *out_error = llama_rs_dup_string(err.what());
+        if (!*out_error) {
+            return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION;
+    }
+}
+
+extern "C" llama_rs_sampler_apply_status llama_rs_sampler_apply(
+    struct llama_sampler * sampler,
+    struct llama_token_data_array * data_array,
+    char ** out_error) {
+    if (out_error) {
+        *out_error = nullptr;
+    }
+    if (!sampler) {
+        return LLAMA_RS_SAMPLER_APPLY_NULL_SAMPLER_ARG;
+    }
+    if (!data_array) {
+        return LLAMA_RS_SAMPLER_APPLY_NULL_DATA_ARRAY_ARG;
+    }
+    if (!out_error) {
+        return LLAMA_RS_SAMPLER_APPLY_NULL_OUT_ERROR_ARG;
+    }
+    try {
+        llama_sampler_apply(sampler, data_array);
+        return LLAMA_RS_SAMPLER_APPLY_OK;
+    } catch (const std::bad_alloc &) {
+        return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED;
+    } catch (const std::exception & err) {
+        *out_error = llama_rs_dup_string(err.what());
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION;
+    } catch (...) {
+        *out_error = llama_rs_dup_string("unknown c++ exception");
+        if (!*out_error) {
+            return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED;
+        }
+        return LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION;
+    }
+}
diff --git a/llama-cpp-bindings-sys/wrapper_common.h b/llama-cpp-bindings-sys/wrapper_common.h
index d1770b2a..20aaee09 100644
--- a/llama-cpp-bindings-sys/wrapper_common.h
+++ b/llama-cpp-bindings-sys/wrapper_common.h
@@ -172,6 +172,88 @@ llama_rs_memory_seq_div_status llama_rs_memory_seq_div(
     int d,
     char ** out_error);
 
+typedef enum llama_rs_load_model_from_file_status {
+    LLAMA_RS_LOAD_MODEL_FROM_FILE_OK = 0,
+    LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_PATH_ARG,
+    LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_MODEL_ARG,
+    LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL,
+    LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_load_model_from_file_status;
+
+llama_rs_load_model_from_file_status llama_rs_load_model_from_file(
+    const char * path,
+    struct llama_model_params params,
+    struct llama_model ** out_model,
+    char ** out_error);
+
+typedef enum llama_rs_new_context_with_model_status {
+    LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK = 0,
+    LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_MODEL_ARG,
+    LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_CTX_ARG,
+    LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL,
+    LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_new_context_with_model_status;
+
+llama_rs_new_context_with_model_status llama_rs_new_context_with_model(
+    struct llama_model * model,
+    struct llama_context_params params,
+    struct llama_context ** out_ctx,
+    char ** out_error);
+
+typedef enum llama_rs_decode_status {
+    LLAMA_RS_DECODE_OK = 0,
+    LLAMA_RS_DECODE_NULL_CTX_ARG,
+    LLAMA_RS_DECODE_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_DECODE_VENDORED_RETURNED_NONZERO_CODE,
+    LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_decode_status;
+
+llama_rs_decode_status llama_rs_decode(
+    struct llama_context * ctx,
+    struct llama_batch batch,
+    int32_t * out_vendored_return_code,
+    char ** out_error);
+
+typedef enum llama_rs_tokenize_status {
+    LLAMA_RS_TOKENIZE_OK = 0,
+    LLAMA_RS_TOKENIZE_NULL_VOCAB_ARG,
+    LLAMA_RS_TOKENIZE_NULL_TEXT_ARG,
+    LLAMA_RS_TOKENIZE_NULL_OUT_RETURNED_COUNT_ARG,
+    LLAMA_RS_TOKENIZE_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_tokenize_status;
+
+llama_rs_tokenize_status llama_rs_tokenize(
+    const struct llama_vocab * vocab,
+    const char * text,
+    int32_t text_len,
+    llama_token * tokens,
+    int32_t n_tokens_max,
+    bool add_special,
+    bool parse_special,
+    int32_t * out_returned_count,
+    char ** out_error);
+
+typedef enum llama_rs_sampler_apply_status {
+    LLAMA_RS_SAMPLER_APPLY_OK = 0,
+    LLAMA_RS_SAMPLER_APPLY_NULL_SAMPLER_ARG,
+    LLAMA_RS_SAMPLER_APPLY_NULL_DATA_ARRAY_ARG,
+    LLAMA_RS_SAMPLER_APPLY_NULL_OUT_ERROR_ARG,
+    LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED,
+    LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION,
+} llama_rs_sampler_apply_status;
+
+llama_rs_sampler_apply_status llama_rs_sampler_apply(
+    struct llama_sampler * sampler,
+    struct llama_token_data_array * data_array,
+    char ** out_error);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/llama-cpp-bindings-tests/tests/model.rs b/llama-cpp-bindings-tests/tests/model.rs
index b69f0bd9..52270d69 100644
--- a/llama-cpp-bindings-tests/tests/model.rs
+++ b/llama-cpp-bindings-tests/tests/model.rs
@@ -1,6 +1,6 @@
 use std::num::NonZeroU16;
 use std::num::NonZeroU32;
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
 
 use anyhow::Result;
 use llama_cpp_bindings::ChatTemplateError;
@@ -261,15 +261,15 @@ fn load_model_with_invalid_path_returns_error() {
     let model_params = LlamaModelParams::default();
     let result = LlamaModel::load_from_file(backend, "/nonexistent/model.gguf", &model_params);
 
-    assert_eq!(
+    assert!(matches!(
         result.unwrap_err(),
-        LlamaModelLoadError::FileNotFound(PathBuf::from("/nonexistent/model.gguf"))
-    );
+        LlamaModelLoadError::FileNotFound(path) if path == Path::new("/nonexistent/model.gguf"),
+    ));
 }
 
 #[test]
 #[serial]
-fn load_model_with_invalid_file_content_returns_null_result() -> Result<()> {
+fn load_model_with_invalid_file_content_returns_vendored_returned_null() -> Result<()> {
     let fixture = FixtureSession::open()?;
     let backend = fixture.backend();
     let model_params = LlamaModelParams::default();
@@ -278,7 +278,10 @@ fn load_model_with_invalid_file_content_returns_null_result() -> Result<()> {
 
     let result = LlamaModel::load_from_file(backend, &dummy_path, &model_params);
 
-    assert_eq!(result.unwrap_err(), LlamaModelLoadError::NullResult);
+    assert!(matches!(
+        result.unwrap_err(),
+        LlamaModelLoadError::VendoredReturnedNull,
+    ));
     let _ = std::fs::remove_file(&dummy_path);
 
     Ok(())
diff --git a/llama-cpp-bindings/src/context.rs b/llama-cpp-bindings/src/context.rs
index 61246cbc..a01aa6df 100644
--- a/llama-cpp-bindings/src/context.rs
+++ b/llama-cpp-bindings/src/context.rs
@@ -111,15 +111,45 @@ impl<'model> LlamaContext<'model> {
         params: LlamaContextParams,
     ) -> Result<Self, LlamaContextLoadError> {
         let context_params = params.context_params;
-        let context = unsafe {
-            llama_cpp_bindings_sys::llama_new_context_with_model(
+        let mut out_ctx: *mut llama_cpp_bindings_sys::llama_context = std::ptr::null_mut();
+        let mut out_error: *mut std::os::raw::c_char = std::ptr::null_mut();
+        let status = unsafe {
+            llama_cpp_bindings_sys::llama_rs_new_context_with_model(
                 model.model.as_ptr(),
                 context_params,
+                &raw mut out_ctx,
+                &raw mut out_error,
             )
         };
-        let context = NonNull::new(context).ok_or(LlamaContextLoadError::NullReturn)?;
-
-        Ok(Self::new(model, context, params.embeddings()))
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK => {
+                let context = NonNull::new(out_ctx)
+                    .ok_or(LlamaContextLoadError::VendoredReturnedNull)?;
+                Ok(Self::new(model, context, params.embeddings()))
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_MODEL_ARG => {
+                Err(LlamaContextLoadError::NullModelArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_CTX_ARG => {
+                Err(LlamaContextLoadError::NullOutCtxArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_ERROR_ARG => {
+                Err(LlamaContextLoadError::NullOutErrorArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL => {
+                Err(LlamaContextLoadError::VendoredReturnedNull)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(LlamaContextLoadError::ErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+                Err(LlamaContextLoadError::VendoredThrewCxxException { message })
+            }
+            other => unreachable!(
+                "llama_rs_new_context_with_model returned unrecognized status {other}"
+            ),
+        }
     }
 
     /// Gets the max number of logical tokens that can be submitted to decode. Must be greater than or equal to [`Self::n_ubatch`].
@@ -203,22 +233,45 @@ impl<'model> LlamaContext<'model> {
     /// # Errors
     ///
     /// - `DecodeError` if the decoding failed.
-    ///
-    /// # Panics
-    ///
-    /// - the returned [`std::ffi::c_int`] from llama-cpp does not fit into a i32 (this should never happen on most systems)
     pub fn decode(&mut self, batch: &mut LlamaBatch) -> Result<(), DecodeError> {
-        let result = unsafe {
-            llama_cpp_bindings_sys::llama_decode(self.context.as_ptr(), batch.llama_batch)
+        let mut out_vendored_return_code: i32 = 0;
+        let mut out_error: *mut std::os::raw::c_char = std::ptr::null_mut();
+        let status = unsafe {
+            llama_cpp_bindings_sys::llama_rs_decode(
+                self.context.as_ptr(),
+                batch.llama_batch,
+                &raw mut out_vendored_return_code,
+                &raw mut out_error,
+            )
         };
-
-        match NonZeroI32::new(result) {
-            None => {
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_DECODE_OK => {
                 self.initialized_logits
                     .clone_from(&batch.initialized_logits);
                 Ok(())
             }
-            Some(error) => Err(DecodeError::from(error)),
+            llama_cpp_bindings_sys::LLAMA_RS_DECODE_NULL_CTX_ARG => {
+                Err(DecodeError::NullContextArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_DECODE_NULL_OUT_ERROR_ARG => {
+                Err(DecodeError::NullOutErrorArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_DECODE_VENDORED_RETURNED_NONZERO_CODE => {
+                let code = NonZeroI32::new(out_vendored_return_code).unwrap_or_else(|| {
+                    unreachable!(
+                        "llama_rs_decode reported a nonzero return code but the value was zero"
+                    )
+                });
+                Err(DecodeError::from(code))
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(DecodeError::ErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+                Err(DecodeError::VendoredThrewCxxException { message })
+            }
+            other => unreachable!("llama_rs_decode returned unrecognized status {other}"),
         }
     }
 
diff --git a/llama-cpp-bindings/src/error/decode_error.rs b/llama-cpp-bindings/src/error/decode_error.rs
index 1a404605..105d19e9 100644
--- a/llama-cpp-bindings/src/error/decode_error.rs
+++ b/llama-cpp-bindings/src/error/decode_error.rs
@@ -1,31 +1,33 @@
 use std::num::NonZeroI32;
 use std::os::raw::c_int;
 
-/// Failed to decode a batch.
 #[derive(Debug, Eq, PartialEq, thiserror::Error)]
 pub enum DecodeError {
-    /// No kv cache slot was available.
-    #[error("Decode Error 1: NoKvCacheSlot")]
+    #[error("llama_rs_decode called with null context")]
+    NullContextArg,
+    #[error("llama_rs_decode called with null out_error")]
+    NullOutErrorArg,
+    #[error("llama_decode returned non-zero code 1: no kv cache slot was available")]
     NoKvCacheSlot,
-    /// The computation was aborted by the abort callback.
-    #[error("Decode Error 2: Aborted")]
+    #[error("llama_decode returned non-zero code 2: aborted by abort callback")]
     Aborted,
-    /// The number of tokens in the batch was 0.
-    #[error("Decode Error -1: n_tokens == 0")]
+    #[error("llama_decode returned non-zero code -1: n_tokens == 0")]
     NTokensZero,
-    /// An unknown error occurred.
-    #[error("Decode Error {0}: unknown")]
-    Unknown(c_int),
+    #[error("llama_decode returned unrecognized non-zero code: {code}")]
+    VendoredReturnedUnrecognizedNonzeroCode { code: c_int },
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("llama_decode threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
 
-/// Decode a error from llama.cpp into a [`DecodeError`].
 impl From<NonZeroI32> for DecodeError {
     fn from(value: NonZeroI32) -> Self {
         match value.get() {
             1 => Self::NoKvCacheSlot,
             2 => Self::Aborted,
             -1 => Self::NTokensZero,
-            error_code => Self::Unknown(error_code),
+            error_code => Self::VendoredReturnedUnrecognizedNonzeroCode { code: error_code },
         }
     }
 }
@@ -37,34 +39,33 @@ mod tests {
     use super::DecodeError;
 
     #[test]
-    fn decode_error_no_kv_cache_slot() {
+    fn no_kv_cache_slot_maps_from_code_one() {
         let error = DecodeError::from(NonZeroI32::new(1).expect("1 is non-zero"));
 
         assert_eq!(error, DecodeError::NoKvCacheSlot);
-        assert_eq!(error.to_string(), "Decode Error 1: NoKvCacheSlot");
     }
 
     #[test]
-    fn decode_error_n_tokens_zero() {
-        let error = DecodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero"));
+    fn aborted_maps_from_code_two() {
+        let error = DecodeError::from(NonZeroI32::new(2).expect("2 is non-zero"));
 
-        assert_eq!(error, DecodeError::NTokensZero);
-        assert_eq!(error.to_string(), "Decode Error -1: n_tokens == 0");
+        assert_eq!(error, DecodeError::Aborted);
     }
 
     #[test]
-    fn decode_error_aborted() {
-        let error = DecodeError::from(NonZeroI32::new(2).expect("2 is non-zero"));
+    fn n_tokens_zero_maps_from_code_negative_one() {
+        let error = DecodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero"));
 
-        assert_eq!(error, DecodeError::Aborted);
-        assert_eq!(error.to_string(), "Decode Error 2: Aborted");
+        assert_eq!(error, DecodeError::NTokensZero);
     }
 
     #[test]
-    fn decode_error_unknown() {
+    fn unrecognized_code_falls_through_to_typed_variant() {
         let error = DecodeError::from(NonZeroI32::new(42).expect("42 is non-zero"));
 
-        assert_eq!(error, DecodeError::Unknown(42));
-        assert_eq!(error.to_string(), "Decode Error 42: unknown");
+        assert_eq!(
+            error,
+            DecodeError::VendoredReturnedUnrecognizedNonzeroCode { code: 42 }
+        );
     }
 }
diff --git a/llama-cpp-bindings/src/error/llama_context_load_error.rs b/llama-cpp-bindings/src/error/llama_context_load_error.rs
index 752c88af..d2911c5e 100644
--- a/llama-cpp-bindings/src/error/llama_context_load_error.rs
+++ b/llama-cpp-bindings/src/error/llama_context_load_error.rs
@@ -1,7 +1,15 @@
-/// Failed to Load context
-#[derive(Debug, Eq, PartialEq, thiserror::Error)]
+#[derive(Debug, thiserror::Error)]
 pub enum LlamaContextLoadError {
-    /// llama.cpp returned null
-    #[error("null reference from llama.cpp")]
-    NullReturn,
+    #[error("llama_rs_new_context_with_model called with null model")]
+    NullModelArg,
+    #[error("llama_rs_new_context_with_model called with null out_ctx")]
+    NullOutCtxArg,
+    #[error("llama_rs_new_context_with_model called with null out_error")]
+    NullOutErrorArg,
+    #[error("llama_rs_new_context_with_model returned null")]
+    VendoredReturnedNull,
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("llama_rs_new_context_with_model threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/error/llama_model_load_error.rs b/llama-cpp-bindings/src/error/llama_model_load_error.rs
index a7b24012..96416b88 100644
--- a/llama-cpp-bindings/src/error/llama_model_load_error.rs
+++ b/llama-cpp-bindings/src/error/llama_model_load_error.rs
@@ -1,19 +1,24 @@
 use std::ffi::NulError;
 use std::path::PathBuf;
 
-/// An error that can occur when loading a model.
-#[derive(Debug, Eq, PartialEq, thiserror::Error)]
+#[derive(Debug, thiserror::Error)]
 pub enum LlamaModelLoadError {
-    /// There was a null byte in a provided string and thus it could not be converted to a C string.
     #[error("null byte in string {0}")]
     NullError(#[from] NulError),
-    /// llama.cpp returned a nullptr - this could be many different causes.
-    #[error("null result from llama cpp")]
-    NullResult,
-    /// Failed to convert the path to a rust str. This means the path was not valid unicode
     #[error("failed to convert path {0} to str")]
     PathToStrError(PathBuf),
-    /// The model file does not exist at the given path.
     #[error("model file not found: {0}")]
     FileNotFound(PathBuf),
+    #[error("llama_rs_load_model_from_file called with null path")]
+    NullPathArg,
+    #[error("llama_rs_load_model_from_file called with null out_model")]
+    NullOutModelArg,
+    #[error("llama_rs_load_model_from_file called with null out_error")]
+    NullOutErrorArg,
+    #[error("llama_rs_load_model_from_file returned null (model failed to load)")]
+    VendoredReturnedNull,
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("llama_rs_load_model_from_file threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/error/string_to_token_error.rs b/llama-cpp-bindings/src/error/string_to_token_error.rs
index dc00b484..68045f2f 100644
--- a/llama-cpp-bindings/src/error/string_to_token_error.rs
+++ b/llama-cpp-bindings/src/error/string_to_token_error.rs
@@ -1,12 +1,21 @@
 use std::ffi::NulError;
 
-/// Failed to convert a string to a token sequence.
 #[derive(Debug, thiserror::Error)]
 pub enum StringToTokenError {
-    /// the string contained a null byte and thus could not be converted to a c string.
     #[error("{0}")]
     NulError(#[from] NulError),
     #[error("{0}")]
-    /// Failed to convert a provided integer to a [`c_int`].
     CIntConversionError(#[from] std::num::TryFromIntError),
+    #[error("llama_rs_tokenize called with null vocab")]
+    NullVocabArg,
+    #[error("llama_rs_tokenize called with null text")]
+    NullTextArg,
+    #[error("llama_rs_tokenize called with null out_returned_count")]
+    NullOutReturnedCountArg,
+    #[error("llama_rs_tokenize called with null out_error")]
+    NullOutErrorArg,
+    #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")]
+    ErrorStringAllocationFailed,
+    #[error("llama_rs_tokenize threw a C++ exception: {message}")]
+    VendoredThrewCxxException { message: String },
 }
diff --git a/llama-cpp-bindings/src/model.rs b/llama-cpp-bindings/src/model.rs
index 91011890..71acb711 100644
--- a/llama-cpp-bindings/src/model.rs
+++ b/llama-cpp-bindings/src/model.rs
@@ -218,35 +218,29 @@ impl LlamaModel {
         let (c_string, c_string_len) = cstring_with_validated_len(str)?;
         let buffer_capacity = c_int::try_from(buffer.capacity())?;
 
-        let size = unsafe {
-            llama_cpp_bindings_sys::llama_tokenize(
+        let size = invoke_rs_tokenize(
+            self.vocab_ptr(),
+            c_string.as_ptr(),
+            c_string_len,
+            buffer
+                .as_mut_ptr()
+                .cast::<llama_cpp_bindings_sys::llama_token>(),
+            buffer_capacity,
+            add_bos,
+        )?;
+
+        let size = if size.is_negative() {
+            buffer.reserve_exact(usize::try_from(-size)?);
+            invoke_rs_tokenize(
                 self.vocab_ptr(),
                 c_string.as_ptr(),
                 c_string_len,
                 buffer
                     .as_mut_ptr()
                     .cast::<llama_cpp_bindings_sys::llama_token>(),
-                buffer_capacity,
+                -size,
                 add_bos,
-                true,
-            )
-        };
-
-        let size = if size.is_negative() {
-            buffer.reserve_exact(usize::try_from(-size)?);
-            unsafe {
-                llama_cpp_bindings_sys::llama_tokenize(
-                    self.vocab_ptr(),
-                    c_string.as_ptr(),
-                    c_string_len,
-                    buffer
-                        .as_mut_ptr()
-                        .cast::<llama_cpp_bindings_sys::llama_token>(),
-                    -size,
-                    add_bos,
-                    true,
-                )
-            }
+            )?
         } else {
             size
         };
@@ -577,22 +571,52 @@ impl LlamaModel {
         }
 
         let cstr = CString::new(path_str)?;
-        let llama_model = unsafe {
-            llama_cpp_bindings_sys::llama_load_model_from_file(cstr.as_ptr(), params.params)
+        let mut out_model: *mut llama_cpp_bindings_sys::llama_model = ptr::null_mut();
+        let mut out_error: *mut c_char = ptr::null_mut();
+        let status = unsafe {
+            llama_cpp_bindings_sys::llama_rs_load_model_from_file(
+                cstr.as_ptr(),
+                params.params,
+                &raw mut out_model,
+                &raw mut out_error,
+            )
         };
-
-        let model = match NonNull::new(llama_model) {
-            Some(ptr) => ptr,
-            None if !path.exists() => {
-                return Err(LlamaModelLoadError::FileNotFound(path.to_path_buf()));
+        match status {
+            llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_OK => {
+                let model = NonNull::new(out_model)
+                    .ok_or(LlamaModelLoadError::VendoredReturnedNull)?;
+                Ok(Self {
+                    model,
+                    tok_env: OnceLock::new(),
+                })
             }
-            None => return Err(LlamaModelLoadError::NullResult),
-        };
-
-        Ok(Self {
-            model,
-            tok_env: OnceLock::new(),
-        })
+            llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_PATH_ARG => {
+                Err(LlamaModelLoadError::NullPathArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_MODEL_ARG => {
+                Err(LlamaModelLoadError::NullOutModelArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_ERROR_ARG => {
+                Err(LlamaModelLoadError::NullOutErrorArg)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL => {
+                if path.exists() {
+                    Err(LlamaModelLoadError::VendoredReturnedNull)
+                } else {
+                    Err(LlamaModelLoadError::FileNotFound(path.to_path_buf()))
+                }
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED => {
+                Err(LlamaModelLoadError::ErrorStringAllocationFailed)
+            }
+            llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION => {
+                let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+                Err(LlamaModelLoadError::VendoredThrewCxxException { message })
+            }
+            other => unreachable!(
+                "llama_rs_load_model_from_file returned unrecognized status {other}"
+            ),
+        }
     }
 
     /// Initializes a lora adapter from a file.
@@ -1592,6 +1616,54 @@ fn read_optional_owned_cstr(ptr: *const c_char) -> Result<Option<String>, Marker
     Ok(Some(String::from_utf8(bytes)?))
 }
 
+fn invoke_rs_tokenize(
+    vocab: *const llama_cpp_bindings_sys::llama_vocab,
+    text: *const c_char,
+    text_len: c_int,
+    tokens: *mut llama_cpp_bindings_sys::llama_token,
+    n_tokens_max: c_int,
+    add_bos: bool,
+) -> Result<c_int, StringToTokenError> {
+    let mut out_count: i32 = 0;
+    let mut out_error: *mut c_char = ptr::null_mut();
+    let status = unsafe {
+        llama_cpp_bindings_sys::llama_rs_tokenize(
+            vocab,
+            text,
+            text_len,
+            tokens,
+            n_tokens_max,
+            add_bos,
+            true,
+            &raw mut out_count,
+            &raw mut out_error,
+        )
+    };
+    match status {
+        llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_OK => Ok(out_count),
+        llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_NULL_VOCAB_ARG => {
+            Err(StringToTokenError::NullVocabArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_NULL_TEXT_ARG => {
+            Err(StringToTokenError::NullTextArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_NULL_OUT_RETURNED_COUNT_ARG => {
+            Err(StringToTokenError::NullOutReturnedCountArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_NULL_OUT_ERROR_ARG => {
+            Err(StringToTokenError::NullOutErrorArg)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED => {
+            Err(StringToTokenError::ErrorStringAllocationFailed)
+        }
+        llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION => {
+            let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            Err(StringToTokenError::VendoredThrewCxxException { message })
+        }
+        other => unreachable!("llama_rs_tokenize returned unrecognized status {other}"),
+    }
+}
+
 fn collect_optional_cstr_pair(
     first_ptr: *const c_char,
     second_ptr: *const c_char,
diff --git a/llama-cpp-bindings/src/token/data_array.rs b/llama-cpp-bindings/src/token/data_array.rs
index af2134df..ec3afbf2 100644
--- a/llama-cpp-bindings/src/token/data_array.rs
+++ b/llama-cpp-bindings/src/token/data_array.rs
@@ -125,14 +125,49 @@ impl LlamaTokenDataArray {
         result
     }
 
-    /// Modifies the data array by applying a sampler to it
+    /// Modifies the data array by applying a sampler to it.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the vendored sampler throws a C++ exception. `llama_sampler_apply` is
+    /// documented to be a pure logit transform and is not expected to throw; if it does
+    /// the failure is propagated as a panic per the crash-fast invariant.
     pub fn apply_sampler(&mut self, sampler: &LlamaSampler) {
         unsafe {
             self.modify_as_c_llama_token_data_array(|c_llama_token_data_array| {
-                llama_cpp_bindings_sys::llama_sampler_apply(
+                let mut out_error: *mut std::os::raw::c_char = ptr::null_mut();
+                let status = llama_cpp_bindings_sys::llama_rs_sampler_apply(
                     sampler.sampler,
                     c_llama_token_data_array,
+                    &raw mut out_error,
                 );
+                match status {
+                    llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_OK => {}
+                    llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_NULL_SAMPLER_ARG => {
+                        panic!("llama_rs_sampler_apply received null sampler pointer")
+                    }
+                    llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_NULL_DATA_ARRAY_ARG => {
+                        panic!("llama_rs_sampler_apply received null data array pointer")
+                    }
+                    llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_NULL_OUT_ERROR_ARG => {
+                        panic!(
+                            "llama_rs_sampler_apply reported null out_error despite valid Rust pointer"
+                        )
+                    }
+                    llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED => {
+                        panic!(
+                            "llama_rs_sampler_apply could not allocate a Rust-owned copy of the C++ exception message"
+                        )
+                    }
+                    llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION => {
+                        let message =
+                            crate::ffi_error_reader::read_and_free_cpp_error(out_error);
+                        panic!("llama_rs_sampler_apply threw a C++ exception: {message}");
+                    }
+                    other => unreachable!(
+                        "llama_rs_sampler_apply returned unrecognized status {other}"
+                    ),
+                }
             });
         }
     }

From 38cfdf75f51f057ce001938d7b14ab9962acabc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 16:58:01 +0200
Subject: [PATCH 14/16] apply cargo fmt to phase 1 wrapper refactor

---
 .../tests/context_kv_cache.rs                 |  2 +-
 llama-cpp-bindings/src/context.rs             | 10 ++++---
 .../src/error/parse_chat_message_error.rs     | 12 ++++++---
 .../src/json_schema_to_grammar.rs             |  6 ++---
 llama-cpp-bindings/src/model.rs               | 26 ++++++++++++-------
 llama-cpp-bindings/src/mtmd/mtmd_context.rs   | 13 +++++-----
 .../src/mtmd/mtmd_eval_error.rs               |  5 +---
 .../src/mtmd/mtmd_tokenize_error.rs           |  4 ++-
 8 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/llama-cpp-bindings-tests/tests/context_kv_cache.rs b/llama-cpp-bindings-tests/tests/context_kv_cache.rs
index e8abb54b..39ee2714 100644
--- a/llama-cpp-bindings-tests/tests/context_kv_cache.rs
+++ b/llama-cpp-bindings-tests/tests/context_kv_cache.rs
@@ -4,8 +4,8 @@ use std::num::NonZeroU32;
 use anyhow::Result;
 use llama_cpp_bindings::context::LlamaContext;
 use llama_cpp_bindings::context::kv_cache::KvCacheConversionError;
-use llama_cpp_bindings::error::{KvCacheSeqAddError, KvCacheSeqDivError};
 use llama_cpp_bindings::context::params::LlamaContextParams;
+use llama_cpp_bindings::error::{KvCacheSeqAddError, KvCacheSeqDivError};
 use llama_cpp_bindings::llama_batch::LlamaBatch;
 use llama_cpp_bindings::model::AddBos;
 use llama_cpp_bindings_tests::FixtureSession;
diff --git a/llama-cpp-bindings/src/context.rs b/llama-cpp-bindings/src/context.rs
index a01aa6df..a58e1039 100644
--- a/llama-cpp-bindings/src/context.rs
+++ b/llama-cpp-bindings/src/context.rs
@@ -268,7 +268,8 @@ impl<'model> LlamaContext<'model> {
                 Err(DecodeError::ErrorStringAllocationFailed)
             }
             llama_cpp_bindings_sys::LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION => {
-                let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+                let message =
+                    unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
                 Err(DecodeError::VendoredThrewCxxException { message })
             }
             other => unreachable!("llama_rs_decode returned unrecognized status {other}"),
@@ -297,7 +298,9 @@ impl<'model> LlamaContext<'model> {
                     .clone_from(&batch.initialized_logits);
                 Ok(())
             }
-            llama_cpp_bindings_sys::LLAMA_RS_ENCODE_NULL_CTX_ARG => Err(EncodeError::NullContextArg),
+            llama_cpp_bindings_sys::LLAMA_RS_ENCODE_NULL_CTX_ARG => {
+                Err(EncodeError::NullContextArg)
+            }
             llama_cpp_bindings_sys::LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER => {
                 Err(EncodeError::ModelHasNoEncoder)
             }
@@ -313,7 +316,8 @@ impl<'model> LlamaContext<'model> {
                 Err(EncodeError::ErrorStringAllocationFailed)
             }
             llama_cpp_bindings_sys::LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION => {
-                let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+                let message =
+                    unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
                 Err(EncodeError::VendoredThrewCxxException { message })
             }
             other => unreachable!("llama_rs_encode returned unrecognized status {other}"),
diff --git a/llama-cpp-bindings/src/error/parse_chat_message_error.rs b/llama-cpp-bindings/src/error/parse_chat_message_error.rs
index cc1ccc06..9359c133 100644
--- a/llama-cpp-bindings/src/error/parse_chat_message_error.rs
+++ b/llama-cpp-bindings/src/error/parse_chat_message_error.rs
@@ -28,7 +28,9 @@ pub enum ParseChatMessageError {
     ToolCallCountNullHandleArg,
     #[error("llama_rs_parsed_chat_tool_call_count threw a C++ exception: {message}")]
     ToolCallCountThrewCxxException { message: String },
-    #[error("llama_rs_parsed_chat_tool_call_count wrapper failed to duplicate the C++ exception string")]
+    #[error(
+        "llama_rs_parsed_chat_tool_call_count wrapper failed to duplicate the C++ exception string"
+    )]
     ToolCallCountErrorStringAllocationFailed,
     #[error("llama_rs_parsed_chat_tool_call_id called with null handle")]
     ToolCallIdNullHandleArg,
@@ -36,7 +38,9 @@ pub enum ParseChatMessageError {
     ToolCallIdIndexOutOfBounds { index: usize },
     #[error("llama_rs_parsed_chat_tool_call_id threw a C++ exception: {message}")]
     ToolCallIdThrewCxxException { message: String },
-    #[error("llama_rs_parsed_chat_tool_call_id wrapper failed to duplicate the C++ exception string")]
+    #[error(
+        "llama_rs_parsed_chat_tool_call_id wrapper failed to duplicate the C++ exception string"
+    )]
     ToolCallIdErrorStringAllocationFailed,
     #[error("llama_rs_parsed_chat_tool_call_name called with null handle")]
     ToolCallNameNullHandleArg,
@@ -44,7 +48,9 @@ pub enum ParseChatMessageError {
     ToolCallNameIndexOutOfBounds { index: usize },
     #[error("llama_rs_parsed_chat_tool_call_name threw a C++ exception: {message}")]
     ToolCallNameThrewCxxException { message: String },
-    #[error("llama_rs_parsed_chat_tool_call_name wrapper failed to duplicate the C++ exception string")]
+    #[error(
+        "llama_rs_parsed_chat_tool_call_name wrapper failed to duplicate the C++ exception string"
+    )]
     ToolCallNameErrorStringAllocationFailed,
     #[error("llama_rs_parsed_chat_tool_call_arguments called with null handle")]
     ToolCallArgumentsNullHandleArg,
diff --git a/llama-cpp-bindings/src/json_schema_to_grammar.rs b/llama-cpp-bindings/src/json_schema_to_grammar.rs
index d9b29cf8..eda70c49 100644
--- a/llama-cpp-bindings/src/json_schema_to_grammar.rs
+++ b/llama-cpp-bindings/src/json_schema_to_grammar.rs
@@ -49,9 +49,9 @@ pub fn json_schema_to_grammar(schema_json: &str) -> Result<String, JsonSchemaToG
             let message = unsafe { read_and_free_cpp_error(error_ptr) };
             Err(JsonSchemaToGrammarError::VendoredThrewCxxException { message })
         }
-        other => unreachable!(
-            "llama_rs_json_schema_to_grammar returned unrecognized status {other}"
-        ),
+        other => {
+            unreachable!("llama_rs_json_schema_to_grammar returned unrecognized status {other}")
+        }
     }
 }
 
diff --git a/llama-cpp-bindings/src/model.rs b/llama-cpp-bindings/src/model.rs
index 71acb711..9b6736d1 100644
--- a/llama-cpp-bindings/src/model.rs
+++ b/llama-cpp-bindings/src/model.rs
@@ -1007,7 +1007,9 @@ impl LlamaModel {
                 out_error = ptr::null_mut();
                 Err(ParseChatMessageError::ParseException { message })
             }
-            other => unreachable!("llama_rs_parse_chat_message returned unrecognized status {other}"),
+            other => {
+                unreachable!("llama_rs_parse_chat_message returned unrecognized status {other}")
+            }
         };
 
         let mut free_error: *mut c_char = ptr::null_mut();
@@ -1019,14 +1021,19 @@ impl LlamaModel {
                 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
                 Ok(value)
             }
-            (Ok(_), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION) => {
+            (
+                Ok(_),
+                llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION,
+            ) => {
                 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
-                let message = unsafe {
-                    crate::ffi_error_reader::read_and_free_cpp_error(free_error)
-                };
+                let message =
+                    unsafe { crate::ffi_error_reader::read_and_free_cpp_error(free_error) };
                 Err(ParseChatMessageError::FreeDestructorThrewCxxException { message })
             }
-            (Ok(_), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED) => {
+            (
+                Ok(_),
+                llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED,
+            ) => {
                 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
                 Err(ParseChatMessageError::FreeErrorStringAllocationFailed)
             }
@@ -1055,7 +1062,8 @@ impl LlamaModel {
     pub fn diagnose_tool_call_synthetic_renders(
         &self,
     ) -> Result<(String, String), MarkerDetectionError> {
-        let (no_tools, with_tools) = invoke_diagnose_tool_call_synthetic_renders(self.model.as_ptr())?;
+        let (no_tools, with_tools) =
+            invoke_diagnose_tool_call_synthetic_renders(self.model.as_ptr())?;
 
         Ok((no_tools.unwrap_or_default(), with_tools.unwrap_or_default()))
     }
@@ -1168,8 +1176,7 @@ fn read_parsed_chat_content(
             Err(ParseChatMessageError::ContentErrorStringAllocationFailed)
         }
         llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION => {
-            let message =
-                unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
+            let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
             Err(ParseChatMessageError::ContentThrewCxxException { message })
         }
         other => unreachable!("llama_rs_parsed_chat_content returned unrecognized status {other}"),
@@ -1804,4 +1811,3 @@ mod extract_meta_string_tests {
         assert!(result.is_err());
     }
 }
-
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_context.rs b/llama-cpp-bindings/src/mtmd/mtmd_context.rs
index d8952401..9ce51c4c 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_context.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_context.rs
@@ -126,11 +126,10 @@ impl MtmdContext {
 
         match status {
             llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_OK => {
-                let context = NonNull::new(out_ctx).ok_or_else(|| {
-                    MtmdInitError::VendoredReturnedNull {
+                let context =
+                    NonNull::new(out_ctx).ok_or_else(|| MtmdInitError::VendoredReturnedNull {
                         path: std::path::PathBuf::from(mmproj_path),
-                    }
-                })?;
+                    })?;
                 Ok(Self { context })
             }
             llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG => {
@@ -154,9 +153,9 @@ impl MtmdContext {
                 let message = unsafe { read_and_free_cpp_error(out_error) };
                 Err(MtmdInitError::VendoredThrewCxxException { message })
             }
-            other => unreachable!(
-                "llama_rs_mtmd_init_from_file returned unrecognized status: {other}"
-            ),
+            other => {
+                unreachable!("llama_rs_mtmd_init_from_file returned unrecognized status: {other}")
+            }
         }
     }
 
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs
index 40431fc0..fdf46896 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs
@@ -3,10 +3,7 @@ use crate::mtmd::image_chunk_batch_size_mismatch::ImageChunkBatchSizeMismatch;
 #[derive(thiserror::Error, Debug)]
 pub enum MtmdEvalError {
     #[error("batch size {requested} exceeds context batch size {context_max}")]
-    BatchSizeExceedsContextLimit {
-        requested: i32,
-        context_max: u32,
-    },
+    BatchSizeExceedsContextLimit { requested: i32, context_max: u32 },
     #[error(
         "image chunk has {} tokens but n_batch is {}",
         .0.image_tokens,
diff --git a/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs
index da502243..4f9f8c4c 100644
--- a/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs
+++ b/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs
@@ -14,7 +14,9 @@ pub enum MtmdTokenizeError {
     NullTextArg,
     #[error("Wrapper received a null bitmaps argument with num_bitmaps > 0")]
     NullBitmapsArgWhenNumBitmapsNonzero,
-    #[error("mtmd_tokenize reported that the number of bitmaps does not match the number of markers in the text")]
+    #[error(
+        "mtmd_tokenize reported that the number of bitmaps does not match the number of markers in the text"
+    )]
     BitmapCountDoesNotMatchMarkerCount,
     #[error("mtmd_tokenize reported an image preprocessing error")]
     ImagePreprocessingError,

From 5e4b7c5edc6fb4ad1d1cc2948acb387bee6eb5bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 17:12:34 +0200
Subject: [PATCH 15/16] use per-wrapper LLAMA_RS_SAMPLER_ACCEPT_OK constant in
 sampler accept test

---
 llama-cpp-bindings/src/sampling.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama-cpp-bindings/src/sampling.rs b/llama-cpp-bindings/src/sampling.rs
index 4c63980c..596d1137 100644
--- a/llama-cpp-bindings/src/sampling.rs
+++ b/llama-cpp-bindings/src/sampling.rs
@@ -1004,7 +1004,7 @@ mod tests {
     #[test]
     fn check_sampler_accept_status_ok() {
         let result = super::check_sampler_accept_status(
-            llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK,
+            llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_OK,
             std::ptr::null_mut(),
         );
 

From f5d0272627e0f669c91614b7f42aa41ad291f12e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= <m.zagajewska@gmail.com>
Date: Sat, 16 May 2026 17:39:06 +0200
Subject: [PATCH 16/16] build llm tests in release mode to avoid windows
 debug-crt _osfile assertions

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index f6830ed4..f92be572 100644
--- a/Makefile
+++ b/Makefile
@@ -5,8 +5,8 @@ DEVICE_FEATURE = $(if $(TEST_DEVICE),--features $(TEST_DEVICE),)
 LLM_BASE_FEATURE_FLAGS = $(DEVICE_FEATURE)
 LLM_QWEN_CAPABLE_FEATURE_FLAGS = $(DEVICE_FEATURE) --features $(QWEN_CAPABLE_FEATURES)
 
-CARGO_TEST_LLM_FLAGS = --no-fail-fast -p llama-cpp-bindings-tests $(LLM_BASE_FEATURE_FLAGS) -- --test-threads=1
-CARGO_TEST_LLM_FLAGS_QWEN_CAPABLE = --no-fail-fast -p llama-cpp-bindings-tests $(LLM_QWEN_CAPABLE_FEATURE_FLAGS) -- --test-threads=1
+CARGO_TEST_LLM_FLAGS = --release --no-fail-fast -p llama-cpp-bindings-tests $(LLM_BASE_FEATURE_FLAGS) -- --test-threads=1
+CARGO_TEST_LLM_FLAGS_QWEN_CAPABLE = --release --no-fail-fast -p llama-cpp-bindings-tests $(LLM_QWEN_CAPABLE_FEATURE_FLAGS) -- --test-threads=1
 
 QWEN3_5_0_8B_ENV = \
 	LLAMA_TEST_HF_REPO=unsloth/Qwen3.5-0.8B-GGUF \