From debcd91d5eb0eee8fb3acb091dd6a512ffadc336 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Fri, 15 May 2026 16:40:12 +0200 Subject: [PATCH 01/16] add windows and mac github runners --- .../install-build-dependencies/action.yml | 20 ++++++++++++++ .../actions/install-rust-toolchain/action.yml | 11 ++++++++ .github/workflows/unit-tests.yml | 26 +++++++++++-------- 3 files changed, 46 insertions(+), 11 deletions(-) create mode 100644 .github/actions/install-build-dependencies/action.yml create mode 100644 .github/actions/install-rust-toolchain/action.yml diff --git a/.github/actions/install-build-dependencies/action.yml b/.github/actions/install-build-dependencies/action.yml new file mode 100644 index 00000000..dd5bd6d4 --- /dev/null +++ b/.github/actions/install-build-dependencies/action.yml @@ -0,0 +1,20 @@ +name: install-build-dependencies +description: Install OS-specific system packages needed to build llama-cpp-bindings (CMake, libclang, GNU make). + +runs: + using: composite + steps: + - name: install linux build dependencies + if: runner.os == 'Linux' + shell: bash + run: sudo apt-get update && sudo apt-get install -y cmake libclang-dev + + - name: install windows build dependencies + if: runner.os == 'Windows' + shell: bash + run: choco install -y make + + - name: set windows libclang path + if: runner.os == 'Windows' + shell: bash + run: echo "LIBCLANG_PATH=C:\\Program Files\\LLVM\\bin" >> $GITHUB_ENV diff --git a/.github/actions/install-rust-toolchain/action.yml b/.github/actions/install-rust-toolchain/action.yml new file mode 100644 index 00000000..124e13b4 --- /dev/null +++ b/.github/actions/install-rust-toolchain/action.yml @@ -0,0 +1,11 @@ +name: install-rust-toolchain +description: Install the pinned stable Rust toolchain (with rustfmt and clippy) and configure the cargo build cache. + +runs: + using: composite + steps: + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable + with: + components: rustfmt, clippy + + - uses: Swatinem/rust-cache@v2 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 9d241530..de8b3dfb 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -9,7 +9,7 @@ env: CARGO_TERM_COLOR: always jobs: - fmt: + formatting: name: formatting runs-on: ubuntu-latest steps: @@ -17,25 +17,29 @@ jobs: with: submodules: recursive - - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable - - - uses: Swatinem/rust-cache@v2 + - uses: ./.github/actions/install-rust-toolchain - run: make fmt.check test: - name: tests - runs-on: ubuntu-latest + name: tests (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + defaults: + run: + shell: bash + env: + LLAMA_DISABLE_CCACHE: '1' steps: - uses: actions/checkout@v4 with: submodules: recursive - - name: install system dependencies - run: sudo apt-get update && sudo apt-get install -y cmake libclang-dev - - - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable + - uses: ./.github/actions/install-build-dependencies - - uses: Swatinem/rust-cache@v2 + - uses: ./.github/actions/install-rust-toolchain - run: make test.unit From 07b4062ca2ba92f20eb1d5c9a4a45d85fe1c9f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Fri, 15 May 2026 17:55:17 +0200 Subject: [PATCH 02/16] fix windows compile errors from bindgen type mismatch on MSVC --- llama-cpp-bindings/src/gguf_type.rs | 5 ++++- llama-cpp-bindings/src/llama_token_attrs.rs | 11 +++++------ llama-cpp-bindings/src/send_logs_to_log.rs | 12 +++++++++++- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/llama-cpp-bindings/src/gguf_type.rs b/llama-cpp-bindings/src/gguf_type.rs index c1060782..33de25cd 100644 --- a/llama-cpp-bindings/src/gguf_type.rs +++ b/llama-cpp-bindings/src/gguf_type.rs @@ -85,7 +85,10 @@ mod tests { #[test] fn from_raw_returns_none_for_unknown() { assert_eq!(GgufType::from_raw(99), None); - assert_eq!(GgufType::from_raw(u32::MAX), None); + assert_eq!( + GgufType::from_raw(llama_cpp_bindings_sys::gguf_type::MAX), + None, + ); } #[test] diff --git a/llama-cpp-bindings/src/llama_token_attrs.rs b/llama-cpp-bindings/src/llama_token_attrs.rs index 37d46651..fbc9be85 100644 --- a/llama-cpp-bindings/src/llama_token_attrs.rs +++ b/llama-cpp-bindings/src/llama_token_attrs.rs @@ -55,7 +55,7 @@ mod tests { #[test] fn try_from_zero_produces_empty_flags() { - let attrs = LlamaTokenAttrs::try_from(0u32); + let attrs = LlamaTokenAttrs::try_from(0); assert!(attrs.is_ok()); assert!(attrs.expect("valid attribute").is_empty()); @@ -63,14 +63,13 @@ mod tests { #[test] fn try_from_invalid_bits_returns_error() { - let invalid_value = 0xFFFF_FFFFu32; - let result = LlamaTokenAttrs::try_from(invalid_value); + let result = LlamaTokenAttrs::try_from(!0); assert!(result.is_err()); - matches!( + assert!(matches!( result.expect_err("should fail"), - LlamaTokenAttrsFromIntError::UnknownValue(_) - ); + LlamaTokenAttrsFromIntError::UnknownValue(_), + )); } #[test] diff --git a/llama-cpp-bindings/src/send_logs_to_log.rs b/llama-cpp-bindings/src/send_logs_to_log.rs index 6bd8fbb7..4fa50e91 100644 --- a/llama-cpp-bindings/src/send_logs_to_log.rs +++ b/llama-cpp-bindings/src/send_logs_to_log.rs @@ -33,6 +33,16 @@ impl LogSource { static LLAMA_SOURCE: OnceLock = OnceLock::new(); static GGML_SOURCE: OnceLock = OnceLock::new(); +#[cfg(target_env = "msvc")] +const fn ggml_level_to_u32(level: llama_cpp_bindings_sys::ggml_log_level) -> u32 { + level.cast_unsigned() +} + +#[cfg(not(target_env = "msvc"))] +const fn ggml_level_to_u32(level: llama_cpp_bindings_sys::ggml_log_level) -> u32 { + level +} + const fn ggml_level_to_incoming(raw: llama_cpp_bindings_sys::ggml_log_level) -> IncomingLogLevel { match raw { llama_cpp_bindings_sys::GGML_LOG_LEVEL_NONE => IncomingLogLevel::None, @@ -41,7 +51,7 @@ const fn ggml_level_to_incoming(raw: llama_cpp_bindings_sys::ggml_log_level) -> llama_cpp_bindings_sys::GGML_LOG_LEVEL_WARN => IncomingLogLevel::Warn, llama_cpp_bindings_sys::GGML_LOG_LEVEL_ERROR => IncomingLogLevel::Error, llama_cpp_bindings_sys::GGML_LOG_LEVEL_CONT => IncomingLogLevel::Cont, - other => IncomingLogLevel::Unknown(other), + other => IncomingLogLevel::Unknown(ggml_level_to_u32(other)), } } From b22817662a6f429ce2362e6dfe1293ad414e8b07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Fri, 15 May 2026 17:56:28 +0200 Subject: [PATCH 03/16] add 30 minute timeouts to unit tests workflow --- .github/workflows/unit-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index de8b3dfb..d04408d6 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -12,6 +12,7 @@ jobs: formatting: name: formatting runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v4 with: @@ -24,6 +25,7 @@ jobs: test: name: tests (${{ matrix.os }}) runs-on: ${{ matrix.os }} + timeout-minutes: 30 strategy: fail-fast: false matrix: From 50b39a34500e8e3d0353bcc30fb2192496d8f622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Fri, 15 May 2026 18:34:26 +0200 Subject: [PATCH 04/16] fix another windows compile error from bindgen type mismatch on MSVC --- llama-cpp-bindings/src/llama_token_attrs.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/llama-cpp-bindings/src/llama_token_attrs.rs b/llama-cpp-bindings/src/llama_token_attrs.rs index fbc9be85..688d228f 100644 --- a/llama-cpp-bindings/src/llama_token_attrs.rs +++ b/llama-cpp-bindings/src/llama_token_attrs.rs @@ -5,6 +5,16 @@ use enumflags2::BitFlags; use crate::llama_token_attr::LlamaTokenAttr; use crate::llama_token_attrs_from_int_error::LlamaTokenAttrsFromIntError; +#[cfg(target_env = "msvc")] +const fn llama_token_type_to_u32(value: llama_cpp_bindings_sys::llama_token_type) -> u32 { + value.cast_unsigned() +} + +#[cfg(not(target_env = "msvc"))] +const fn llama_token_type_to_u32(value: llama_cpp_bindings_sys::llama_token_type) -> u32 { + value +} + /// A set of [`LlamaTokenAttr`] flags. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct LlamaTokenAttrs(pub BitFlags); @@ -27,11 +37,11 @@ impl TryFrom for LlamaTokenAttrs { type Error = LlamaTokenAttrsFromIntError; fn try_from(value: llama_cpp_bindings_sys::llama_vocab_type) -> Result { - Ok(Self(BitFlags::from_bits(value as _).map_err( - |bit_flag_error| { + Ok(Self( + BitFlags::from_bits(llama_token_type_to_u32(value)).map_err(|bit_flag_error| { LlamaTokenAttrsFromIntError::UnknownValue(bit_flag_error.invalid_bits()) - }, - )?)) + })?, + )) } } From 256638f6b54c52622e4ea99fc7649219decb38e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Fri, 15 May 2026 18:58:21 +0200 Subject: [PATCH 05/16] enable rust backtraces in CI tests --- .github/workflows/unit-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index d04408d6..95deb1c8 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -35,6 +35,7 @@ jobs: shell: bash env: LLAMA_DISABLE_CCACHE: '1' + RUST_BACKTRACE: '1' steps: - uses: actions/checkout@v4 with: From 88e5c52266d96e5f35ba3ef8ddf27e76c324cce2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 12:24:21 +0200 Subject: [PATCH 06/16] fix windows test crashes from missing backend init and /EHsc --- llama-cpp-bindings-build/src/cpp_wrapper.rs | 1 + llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs | 1 + llama-cpp-bindings/src/ggml_time_us.rs | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/llama-cpp-bindings-build/src/cpp_wrapper.rs b/llama-cpp-bindings-build/src/cpp_wrapper.rs index 722c7e41..fdd8ab37 100644 --- a/llama-cpp-bindings-build/src/cpp_wrapper.rs +++ b/llama-cpp-bindings-build/src/cpp_wrapper.rs @@ -33,6 +33,7 @@ pub fn compile_cpp_wrappers(llama_src: &Path, target_os: &TargetOs) { if target_os.is_msvc() { build.flag("/std:c++17"); + build.flag("/EHsc"); } if target_os.is_android() && cfg!(feature = "static-stdcxx") { diff --git a/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs b/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs index 7af3f01a..92f561e6 100644 --- a/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs +++ b/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs @@ -23,6 +23,7 @@ pub fn compile_mtmd(llama_src: &Path, target_os: &TargetOs) { if target_os.is_msvc() { build.flag("/std:c++17"); + build.flag("/EHsc"); } if target_os.is_android() && cfg!(feature = "static-stdcxx") { diff --git a/llama-cpp-bindings/src/ggml_time_us.rs b/llama-cpp-bindings/src/ggml_time_us.rs index 06f61dfc..4db4b490 100644 --- a/llama-cpp-bindings/src/ggml_time_us.rs +++ b/llama-cpp-bindings/src/ggml_time_us.rs @@ -22,10 +22,15 @@ pub fn ggml_time_us() -> i64 { #[cfg(test)] mod tests { + use serial_test::serial; + use super::ggml_time_us; + use crate::llama_backend::LlamaBackend; #[test] + #[serial] fn returns_positive_value() { + let _backend = LlamaBackend::init().unwrap(); let time_microseconds = ggml_time_us(); assert!(time_microseconds > 0); From a5b958b303d6508e67d8ce89660bd83fd765291e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 15:31:58 +0200 Subject: [PATCH 07/16] bump vendored llama.cpp to b9174 --- llama-cpp-bindings-sys/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama-cpp-bindings-sys/llama.cpp b/llama-cpp-bindings-sys/llama.cpp index 846262d7..59778f01 160000 --- a/llama-cpp-bindings-sys/llama.cpp +++ b/llama-cpp-bindings-sys/llama.cpp @@ -1 +1 @@ -Subproject commit 846262d7875dcabf502a150fa3d7b9c770dde7eb +Subproject commit 59778f0196a82db32580bb649d5d839355d6d7bf From ee871026ca87f8cc1ad00e876f7662c5e353ef90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 15:39:05 +0200 Subject: [PATCH 08/16] wrap mtmd and fit ffi entry points to surface c++ exceptions as typed rust errors --- llama-cpp-bindings-sys/wrapper_fit.cpp | 46 ++- llama-cpp-bindings-sys/wrapper_fit.h | 19 +- llama-cpp-bindings-sys/wrapper_mtmd.cpp | 273 ++++++++++++++++ llama-cpp-bindings-sys/wrapper_mtmd.h | 107 +++++++ llama-cpp-bindings/src/error/fit_error.rs | 24 +- llama-cpp-bindings/src/model/params.rs | 47 ++- llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs | 52 +++- .../src/mtmd/mtmd_bitmap_error.rs | 25 +- llama-cpp-bindings/src/mtmd/mtmd_context.rs | 294 ++++++++++++++---- .../src/mtmd/mtmd_encode_error.rs | 14 +- .../src/mtmd/mtmd_eval_error.rs | 23 +- .../src/mtmd/mtmd_init_error.rs | 23 +- .../src/mtmd/mtmd_input_chunk.rs | 43 ++- .../src/mtmd/mtmd_input_chunks.rs | 13 +- .../src/mtmd/mtmd_tokenize_error.rs | 34 +- 15 files changed, 879 insertions(+), 158 deletions(-) create mode 100644 llama-cpp-bindings-sys/wrapper_mtmd.cpp diff --git a/llama-cpp-bindings-sys/wrapper_fit.cpp b/llama-cpp-bindings-sys/wrapper_fit.cpp index 1ec7d169..02eee839 100644 --- a/llama-cpp-bindings-sys/wrapper_fit.cpp +++ b/llama-cpp-bindings-sys/wrapper_fit.cpp @@ -1,10 +1,12 @@ #include "wrapper_fit.h" +#include "wrapper_utils.h" #include +#include #include "llama.cpp/common/fit.h" -extern "C" llama_rs_fit_status llama_rs_fit_params( +extern "C" llama_rs_fit_params_status llama_rs_fit_params( const char * path_model, struct llama_model_params * mparams, struct llama_context_params * cparams, @@ -12,21 +14,49 @@ extern "C" llama_rs_fit_status llama_rs_fit_params( struct llama_model_tensor_buft_override * tensor_buft_overrides, size_t * margins, uint32_t n_ctx_min, - enum ggml_log_level log_level) { + enum ggml_log_level log_level, + int32_t * out_unrecognized_status_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_unrecognized_status_code) { + *out_unrecognized_status_code = 0; + } + try { const common_params_fit_status status = common_fit_params( path_model, mparams, cparams, tensor_split, tensor_buft_overrides, margins, n_ctx_min, log_level); switch (status) { case COMMON_PARAMS_FIT_STATUS_SUCCESS: - return LLAMA_RS_FIT_STATUS_SUCCESS; + return LLAMA_RS_FIT_PARAMS_OK; case COMMON_PARAMS_FIT_STATUS_FAILURE: - return LLAMA_RS_FIT_STATUS_FAILURE; + return LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE; case COMMON_PARAMS_FIT_STATUS_ERROR: - return LLAMA_RS_FIT_STATUS_ERROR; + return LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR; + } + if (out_unrecognized_status_code) { + *out_unrecognized_status_code = static_cast(status); + } + return LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE; + } catch (const std::bad_alloc &) { + return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED; + } } - return LLAMA_RS_FIT_STATUS_ERROR; - } catch (const std::exception &) { - return LLAMA_RS_FIT_STATUS_ERROR; + return LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION; } } diff --git a/llama-cpp-bindings-sys/wrapper_fit.h b/llama-cpp-bindings-sys/wrapper_fit.h index c00a2620..9a6ac6ef 100644 --- a/llama-cpp-bindings-sys/wrapper_fit.h +++ b/llama-cpp-bindings-sys/wrapper_fit.h @@ -10,13 +10,16 @@ extern "C" { #endif -typedef enum llama_rs_fit_status { - LLAMA_RS_FIT_STATUS_SUCCESS = 0, - LLAMA_RS_FIT_STATUS_FAILURE = 1, - LLAMA_RS_FIT_STATUS_ERROR = 2, -} llama_rs_fit_status; +typedef enum llama_rs_fit_params_status { + LLAMA_RS_FIT_PARAMS_OK = 0, + LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE, + LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR, + LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE, + LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_fit_params_status; -llama_rs_fit_status llama_rs_fit_params( +llama_rs_fit_params_status llama_rs_fit_params( const char * path_model, struct llama_model_params * mparams, struct llama_context_params * cparams, @@ -24,7 +27,9 @@ llama_rs_fit_status llama_rs_fit_params( struct llama_model_tensor_buft_override * tensor_buft_overrides, size_t * margins, uint32_t n_ctx_min, - enum ggml_log_level log_level); + enum ggml_log_level log_level, + int32_t * out_unrecognized_status_code, + char ** out_error); #ifdef __cplusplus } diff --git a/llama-cpp-bindings-sys/wrapper_mtmd.cpp b/llama-cpp-bindings-sys/wrapper_mtmd.cpp new file mode 100644 index 00000000..bff5b958 --- /dev/null +++ b/llama-cpp-bindings-sys/wrapper_mtmd.cpp @@ -0,0 +1,273 @@ +#include "wrapper_mtmd.h" +#include "wrapper_utils.h" + +#include +#include +#include + +extern "C" llama_rs_mtmd_init_from_file_status llama_rs_mtmd_init_from_file( + const char * mmproj_path, + const struct llama_model * text_model, + struct mtmd_context_params ctx_params, + struct mtmd_context ** out_ctx, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (!out_ctx) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_OUT_CTX_ARG; + } + *out_ctx = nullptr; + if (!mmproj_path) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG; + } + if (!text_model) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_TEXT_MODEL_ARG; + } + + try { + struct mtmd_context * ctx = mtmd_init_from_file(mmproj_path, text_model, ctx_params); + if (!ctx) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL; + } + *out_ctx = ctx; + return LLAMA_RS_MTMD_INIT_FROM_FILE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_mtmd_bitmap_init_from_file_status llama_rs_mtmd_bitmap_init_from_file( + struct mtmd_context * ctx, + const char * fname, + struct mtmd_bitmap ** out_bitmap, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (!out_bitmap) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_OUT_BITMAP_ARG; + } + *out_bitmap = nullptr; + if (!ctx) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_CTX_ARG; + } + if (!fname) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_FNAME_ARG; + } + + try { + struct mtmd_bitmap * bitmap = mtmd_helper_bitmap_init_from_file(ctx, fname); + if (!bitmap) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL; + } + *out_bitmap = bitmap; + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_mtmd_tokenize_status llama_rs_mtmd_tokenize( + struct mtmd_context * ctx, + struct mtmd_input_chunks * output, + const struct mtmd_input_text * text, + const struct mtmd_bitmap ** bitmaps, + size_t num_bitmaps, + int32_t * out_undocumented_return_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_undocumented_return_code) { + *out_undocumented_return_code = 0; + } + if (!ctx) { + return LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG; + } + if (!output) { + return LLAMA_RS_MTMD_TOKENIZE_NULL_OUTPUT_ARG; + } + if (!text) { + return LLAMA_RS_MTMD_TOKENIZE_NULL_TEXT_ARG; + } + if (num_bitmaps > 0 && !bitmaps) { + return LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO; + } + + try { + int32_t result = mtmd_tokenize(ctx, output, text, bitmaps, num_bitmaps); + switch (result) { + case 0: + return LLAMA_RS_MTMD_TOKENIZE_OK; + case 1: + return LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT; + case 2: + return LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR; + default: + if (out_undocumented_return_code) { + *out_undocumented_return_code = result; + } + return LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE; + } + } catch (const std::bad_alloc &) { + return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_mtmd_encode_chunk_status llama_rs_mtmd_encode_chunk( + struct mtmd_context * ctx, + const struct mtmd_input_chunk * chunk, + int32_t * out_vendored_return_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_vendored_return_code) { + *out_vendored_return_code = 0; + } + if (!ctx) { + return LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG; + } + if (!chunk) { + return LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG; + } + + try { + int32_t result = mtmd_encode_chunk(ctx, chunk); + if (result != 0) { + if (out_vendored_return_code) { + *out_vendored_return_code = result; + } + return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE; + } + return LLAMA_RS_MTMD_ENCODE_CHUNK_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_mtmd_eval_chunk_single_status llama_rs_mtmd_eval_chunk_single( + struct mtmd_context * ctx, + struct llama_context * lctx, + const struct mtmd_input_chunk * chunk, + llama_pos n_past, + llama_seq_id seq_id, + int32_t n_batch, + bool logits_last, + llama_pos * out_new_n_past, + int32_t * out_vendored_return_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_vendored_return_code) { + *out_vendored_return_code = 0; + } + if (!ctx) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_MTMD_CTX_ARG; + } + if (!lctx) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_LLAMA_CTX_ARG; + } + if (!chunk) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_CHUNK_ARG; + } + if (!out_new_n_past) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_OUT_NEW_N_PAST_ARG; + } + + try { + int32_t result = mtmd_helper_eval_chunk_single( + ctx, lctx, chunk, n_past, seq_id, n_batch, logits_last, out_new_n_past); + if (result != 0) { + if (out_vendored_return_code) { + *out_vendored_return_code = result; + } + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE; + } + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION; + } +} diff --git a/llama-cpp-bindings-sys/wrapper_mtmd.h b/llama-cpp-bindings-sys/wrapper_mtmd.h index 72fb2111..ac2f5314 100644 --- a/llama-cpp-bindings-sys/wrapper_mtmd.h +++ b/llama-cpp-bindings-sys/wrapper_mtmd.h @@ -1,2 +1,109 @@ +#pragma once + +#include "llama.cpp/include/llama.h" #include "llama.cpp/tools/mtmd/mtmd.h" #include "llama.cpp/tools/mtmd/mtmd-helper.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum llama_rs_mtmd_init_from_file_status { + LLAMA_RS_MTMD_INIT_FROM_FILE_OK = 0, + LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG, + LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_TEXT_MODEL_ARG, + LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_OUT_CTX_ARG, + LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL, + LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_mtmd_init_from_file_status; + +llama_rs_mtmd_init_from_file_status llama_rs_mtmd_init_from_file( + const char * mmproj_path, + const struct llama_model * text_model, + struct mtmd_context_params ctx_params, + struct mtmd_context ** out_ctx, + char ** out_error); + +typedef enum llama_rs_mtmd_bitmap_init_from_file_status { + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_OK = 0, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_CTX_ARG, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_FNAME_ARG, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_OUT_BITMAP_ARG, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_mtmd_bitmap_init_from_file_status; + +llama_rs_mtmd_bitmap_init_from_file_status llama_rs_mtmd_bitmap_init_from_file( + struct mtmd_context * ctx, + const char * fname, + struct mtmd_bitmap ** out_bitmap, + char ** out_error); + +typedef enum llama_rs_mtmd_tokenize_status { + LLAMA_RS_MTMD_TOKENIZE_OK = 0, + LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG, + LLAMA_RS_MTMD_TOKENIZE_NULL_OUTPUT_ARG, + LLAMA_RS_MTMD_TOKENIZE_NULL_TEXT_ARG, + LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO, + LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT, + LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR, + LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE, + LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_mtmd_tokenize_status; + +llama_rs_mtmd_tokenize_status llama_rs_mtmd_tokenize( + struct mtmd_context * ctx, + struct mtmd_input_chunks * output, + const struct mtmd_input_text * text, + const struct mtmd_bitmap ** bitmaps, + size_t num_bitmaps, + int32_t * out_undocumented_return_code, + char ** out_error); + +typedef enum llama_rs_mtmd_encode_chunk_status { + LLAMA_RS_MTMD_ENCODE_CHUNK_OK = 0, + LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG, + LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG, + LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE, + LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_mtmd_encode_chunk_status; + +llama_rs_mtmd_encode_chunk_status llama_rs_mtmd_encode_chunk( + struct mtmd_context * ctx, + const struct mtmd_input_chunk * chunk, + int32_t * out_vendored_return_code, + char ** out_error); + +typedef enum llama_rs_mtmd_eval_chunk_single_status { + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_OK = 0, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_MTMD_CTX_ARG, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_LLAMA_CTX_ARG, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_CHUNK_ARG, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_OUT_NEW_N_PAST_ARG, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_mtmd_eval_chunk_single_status; + +llama_rs_mtmd_eval_chunk_single_status llama_rs_mtmd_eval_chunk_single( + struct mtmd_context * ctx, + struct llama_context * lctx, + const struct mtmd_input_chunk * chunk, + llama_pos n_past, + llama_seq_id seq_id, + int32_t n_batch, + bool logits_last, + llama_pos * out_new_n_past, + int32_t * out_vendored_return_code, + char ** out_error); + +#ifdef __cplusplus +} +#endif diff --git a/llama-cpp-bindings/src/error/fit_error.rs b/llama-cpp-bindings/src/error/fit_error.rs index 7585530d..9b2fbc5e 100644 --- a/llama-cpp-bindings/src/error/fit_error.rs +++ b/llama-cpp-bindings/src/error/fit_error.rs @@ -1,11 +1,19 @@ /// Returned by [`crate::model::params::LlamaModelParams::fit_params`]. -#[derive(Debug, Clone, Copy, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)] pub enum FitError { - /// Could not find allocations that fit available memory. - #[error("could not find allocations that fit available memory")] - Failure, - /// A hard error occurred during fitting (e.g. model not found at the specified path, - /// or the C++ wrapper threw an exception). - #[error("hard error during parameter fitting")] - Error, + /// Vendored `common_fit_params` reported FAILURE: no allocation that fits available memory was found. + #[error("common_fit_params reported FAILURE: no allocations that fit available memory")] + VendoredReportedFailure, + /// Vendored `common_fit_params` reported ERROR: a hard error occurred during fitting (e.g. model file not found). + #[error("common_fit_params reported ERROR: hard error during parameter fitting")] + VendoredReportedError, + /// Vendored `common_fit_params` returned a status code the wrapper does not recognise. + #[error("common_fit_params returned an unrecognised status code: {code}")] + VendoredReturnedUnrecognizedStatusCode { code: i32 }, + /// Wrapper failed to duplicate the C++ exception message into a Rust-owned string. + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + /// Vendored `common_fit_params` threw a C++ exception caught at the wrapper boundary. + #[error("common_fit_params threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/model/params.rs b/llama-cpp-bindings/src/model/params.rs index ebd7edd7..446f9334 100644 --- a/llama-cpp-bindings/src/model/params.rs +++ b/llama-cpp-bindings/src/model/params.rs @@ -421,9 +421,7 @@ impl LlamaModelParams { /// /// # Errors /// - /// Returns [`FitError::Failure`] if no fitting allocation could be found, or - /// [`FitError::Error`] on a hard error (e.g. the model file could not be read or the C++ - /// implementation threw an exception). + /// Returns one of the [`FitError`] variants matching the vendored wrapper's status code. pub fn fit_params( mut self: Pin<&mut Self>, model_path: &CStr, @@ -450,6 +448,9 @@ impl LlamaModelParams { self.params.tensor_split = null::(); self.params.tensor_buft_overrides = null(); + let mut out_unrecognized_status_code: i32 = 0; + let mut out_error: *mut c_char = std::ptr::null_mut(); + let status = unsafe { llama_cpp_bindings_sys::llama_rs_fit_params( model_path.as_ptr(), @@ -460,13 +461,38 @@ impl LlamaModelParams { margins.as_mut_ptr(), n_ctx_min, log_level, + &raw mut out_unrecognized_status_code, + &raw mut out_error, ) }; match status { - llama_cpp_bindings_sys::LLAMA_RS_FIT_STATUS_SUCCESS => {} - llama_cpp_bindings_sys::LLAMA_RS_FIT_STATUS_FAILURE => return Err(FitError::Failure), - _ => return Err(FitError::Error), + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_OK => {} + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE => { + return Err(FitError::VendoredReportedFailure); + } + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR => { + return Err(FitError::VendoredReportedError); + } + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE => { + return Err(FitError::VendoredReturnedUnrecognizedStatusCode { + code: out_unrecognized_status_code, + }); + } + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED => { + return Err(FitError::ErrorStringAllocationFailed); + } + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { + crate::ffi_error_reader::read_and_free_cpp_error(out_error) + }; + return Err(FitError::VendoredThrewCxxException { message }); + } + other => { + unreachable!( + "llama_rs_fit_params returned unrecognized wrapper status: {other}" + ); + } } self.params.tensor_split = self.tensor_split.as_ptr(); @@ -829,6 +855,13 @@ mod tests { llama_cpp_bindings_sys::GGML_LOG_LEVEL_NONE, ); - assert_eq!(result, Err(FitError::Error)); + assert!( + matches!( + result, + Err(FitError::VendoredReportedError) + | Err(FitError::VendoredThrewCxxException { .. }) + ), + "expected VendoredReportedError or VendoredThrewCxxException, got {result:?}" + ); } } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs b/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs index 08e2ce6c..b3c6b59f 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs @@ -1,7 +1,10 @@ use std::ffi::{CStr, CString, c_char}; +use std::path::PathBuf; use std::ptr::NonNull; use std::slice; +use crate::ffi_error_reader::read_and_free_cpp_error; + use super::mtmd_bitmap_error::MtmdBitmapError; use super::mtmd_context::MtmdContext; @@ -104,20 +107,55 @@ impl MtmdBitmap { /// /// # Errors /// - /// * `CStringError` - Path contains null bytes - /// * `NullResult` - File could not be loaded or processed + /// Returns an [`MtmdBitmapError`] variant matching the wrapper's status code. pub fn from_file(ctx: &MtmdContext, path: &str) -> Result { let path_cstr = CString::new(path)?; - let bitmap = unsafe { - llama_cpp_bindings_sys::mtmd_helper_bitmap_init_from_file( + let mut out_bitmap: *mut llama_cpp_bindings_sys::mtmd_bitmap = std::ptr::null_mut(); + let mut out_error: *mut c_char = std::ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_mtmd_bitmap_init_from_file( ctx.context.as_ptr(), path_cstr.as_ptr(), + &raw mut out_bitmap, + &raw mut out_error, ) }; - let bitmap = NonNull::new(bitmap).ok_or(MtmdBitmapError::NullResult)?; - - Ok(Self { bitmap }) + match status { + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_OK => { + let bitmap = NonNull::new(out_bitmap).ok_or_else(|| { + MtmdBitmapError::VendoredReturnedNull { + path: PathBuf::from(path), + } + })?; + Ok(Self { bitmap }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_CTX_ARG => { + Err(MtmdBitmapError::NullCtxArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_FNAME_ARG => { + Err(MtmdBitmapError::NullFnameArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_OUT_BITMAP_ARG => { + Err(MtmdBitmapError::NullOutBitmapArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL => { + Err(MtmdBitmapError::VendoredReturnedNull { + path: PathBuf::from(path), + }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED => { + Err(MtmdBitmapError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(MtmdBitmapError::VendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_mtmd_bitmap_init_from_file returned unrecognized status: {other}" + ), + } } /// Create a bitmap from a buffer containing file data. diff --git a/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs index c0ad849c..b866e787 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs @@ -1,16 +1,27 @@ -/// Errors that can occur when working with MTMD bitmaps +use std::path::PathBuf; + #[derive(thiserror::Error, Debug)] pub enum MtmdBitmapError { - /// Failed to create `CString` from input - #[error("Failed to create CString: {0}")] + #[error("Failed to create CString from bitmap-source path: {0}")] CStringError(#[from] std::ffi::NulError), - /// Invalid data size for bitmap + #[error("Bitmap-source path is not valid UTF-8: {0:?}")] + PathToStrError(PathBuf), #[error("Invalid data size for bitmap")] InvalidDataSize, - /// Image dimensions too small for processing (minimum 2x2) #[error("Image dimensions too small: {0}x{1} (minimum 2x2)")] ImageDimensionsTooSmall(u32, u32), - /// Bitmap creation returned null - #[error("Bitmap creation returned null")] + #[error("mtmd_bitmap_init / mtmd_bitmap_init_from_audio returned null")] NullResult, + #[error("Internal wrapper invariant violated: caller did not pass an out-bitmap pointer")] + NullOutBitmapArg, + #[error("Wrapper received a null mtmd-context argument")] + NullCtxArg, + #[error("Wrapper received a null bitmap-source-path argument")] + NullFnameArg, + #[error("mtmd_helper_bitmap_init_from_file returned null without throwing for path: {path:?}")] + VendoredReturnedNull { path: PathBuf }, + #[error("Wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("mtmd_helper_bitmap_init_from_file threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_context.rs b/llama-cpp-bindings/src/mtmd/mtmd_context.rs index 4445a6ad..d8952401 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_context.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_context.rs @@ -1,6 +1,8 @@ use std::ffi::CString; +use std::ffi::c_char; use std::ptr::NonNull; +use crate::ffi_error_reader::read_and_free_cpp_error; use crate::model::LlamaModel; use super::mtmd_bitmap::MtmdBitmap; @@ -12,19 +14,73 @@ use super::mtmd_input_chunks::MtmdInputChunks; use super::mtmd_input_text::MtmdInputText; use super::mtmd_tokenize_error::MtmdTokenizeError; -const fn tokenize_result_to_error(result: i32) -> MtmdTokenizeError { - match result { - 1 => MtmdTokenizeError::BitmapCountMismatch, - 2 => MtmdTokenizeError::ImagePreprocessingError, - _ => MtmdTokenizeError::UnknownError(result), +fn map_tokenize_status( + status: llama_cpp_bindings_sys::llama_rs_mtmd_tokenize_status, + undocumented_return_code: i32, + out_error: *mut c_char, +) -> Result<(), MtmdTokenizeError> { + match status { + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_OK => Ok(()), + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG => { + Err(MtmdTokenizeError::NullCtxArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_OUTPUT_ARG => { + Err(MtmdTokenizeError::NullOutputArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_TEXT_ARG => { + Err(MtmdTokenizeError::NullTextArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO => { + Err(MtmdTokenizeError::NullBitmapsArgWhenNumBitmapsNonzero) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT => { + Err(MtmdTokenizeError::BitmapCountDoesNotMatchMarkerCount) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR => { + Err(MtmdTokenizeError::ImagePreprocessingError) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE => { + Err(MtmdTokenizeError::VendoredReturnedUndocumentedNonzeroCode { + code: undocumented_return_code, + }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED => { + Err(MtmdTokenizeError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(MtmdTokenizeError::VendoredThrewCxxException { message }) + } + other => unreachable!("llama_rs_mtmd_tokenize returned unrecognized status: {other}"), } } -const fn check_encode_result(result: i32) -> Result<(), MtmdEncodeError> { - if result == 0 { - Ok(()) - } else { - Err(MtmdEncodeError::EncodeFailure(result)) +fn map_encode_chunk_status( + status: llama_cpp_bindings_sys::llama_rs_mtmd_encode_chunk_status, + vendored_return_code: i32, + out_error: *mut c_char, +) -> Result<(), MtmdEncodeError> { + match status { + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_OK => Ok(()), + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG => { + Err(MtmdEncodeError::NullCtxArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG => { + Err(MtmdEncodeError::NullChunkArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE => { + Err(MtmdEncodeError::VendoredReturnedNonzeroCode { + code: vendored_return_code, + }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED => { + Err(MtmdEncodeError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(MtmdEncodeError::VendoredThrewCxxException { message }) + } + other => unreachable!("llama_rs_mtmd_encode_chunk returned unrecognized status: {other}"), } } @@ -46,9 +102,7 @@ impl MtmdContext { /// /// # Errors /// - /// This function will return an error if: - /// - The path cannot be converted to a C string - /// - The underlying C function returns null (indicating initialization failure) + /// Returns an [`MtmdInitError`] variant matching the wrapper's status code. pub fn init_from_file( mmproj_path: &str, text_model: &LlamaModel, @@ -57,17 +111,53 @@ impl MtmdContext { let path_cstr = CString::new(mmproj_path)?; let ctx_params = llama_cpp_bindings_sys::mtmd_context_params::from(params); - let context = unsafe { - llama_cpp_bindings_sys::mtmd_init_from_file( + let mut out_ctx: *mut llama_cpp_bindings_sys::mtmd_context = std::ptr::null_mut(); + let mut out_error: *mut c_char = std::ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_mtmd_init_from_file( path_cstr.as_ptr(), text_model.model.as_ptr(), ctx_params, + &raw mut out_ctx, + &raw mut out_error, ) }; - let context = NonNull::new(context).ok_or(MtmdInitError::NullResult)?; - - Ok(Self { context }) + match status { + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_OK => { + let context = NonNull::new(out_ctx).ok_or_else(|| { + MtmdInitError::VendoredReturnedNull { + path: std::path::PathBuf::from(mmproj_path), + } + })?; + Ok(Self { context }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG => { + Err(MtmdInitError::NullMmprojPathArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_TEXT_MODEL_ARG => { + Err(MtmdInitError::NullTextModelArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_OUT_CTX_ARG => { + Err(MtmdInitError::NullOutCtxArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL => { + Err(MtmdInitError::VendoredReturnedNull { + path: std::path::PathBuf::from(mmproj_path), + }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED => { + Err(MtmdInitError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(MtmdInitError::VendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_mtmd_init_from_file returned unrecognized status: {other}" + ), + } } /// Check whether non-causal attention mask is needed before `llama_decode` @@ -117,24 +207,7 @@ impl MtmdContext { /// /// # Errors /// - /// * `BitmapCountMismatch` - Number of bitmaps doesn't match number of markers - /// * `ImagePreprocessingError` - Error occurred during image preprocessing - /// * `UnknownError` - Other tokenization error occurred - /// - /// # Example - /// - /// ```no_run - /// # use llama_cpp_bindings::mtmd::*; - /// # fn example(ctx: &MtmdContext, bitmap: &MtmdBitmap) -> Result<(), Box> { - /// let text = MtmdInputText { - /// text: "Here is an image: <__media__>\nDescribe it.".to_string(), - /// add_special: true, - /// parse_special: true, - /// }; - /// let chunks = ctx.tokenize(text, &[bitmap])?; - /// # Ok(()) - /// # } - /// ``` + /// Returns an [`MtmdTokenizeError`] variant matching the wrapper's status code. pub fn tokenize( &self, text: MtmdInputText, @@ -153,34 +226,44 @@ impl MtmdContext { .map(|bitmap| bitmap.bitmap.as_ptr().cast_const()) .collect(); - let result = unsafe { - llama_cpp_bindings_sys::mtmd_tokenize( + let mut out_undocumented_return_code: i32 = 0; + let mut out_error: *mut c_char = std::ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_mtmd_tokenize( self.context.as_ptr(), chunks.chunks.as_ptr(), &raw const input_text, bitmap_ptrs.as_ptr().cast_mut(), bitmaps.len(), + &raw mut out_undocumented_return_code, + &raw mut out_error, ) }; - if result == 0 { - Ok(chunks) - } else { - Err(tokenize_result_to_error(result)) - } + map_tokenize_status(status, out_undocumented_return_code, out_error)?; + Ok(chunks) } /// Encode a chunk for image/audio processing. /// /// # Errors /// - /// Returns `MtmdEncodeError::EncodeFailure` if encoding fails. + /// Returns an [`MtmdEncodeError`] variant matching the wrapper's status code. pub fn encode_chunk(&self, chunk: &MtmdInputChunk) -> Result<(), MtmdEncodeError> { - let result = unsafe { - llama_cpp_bindings_sys::mtmd_encode_chunk(self.context.as_ptr(), chunk.chunk.as_ptr()) + let mut out_vendored_return_code: i32 = 0; + let mut out_error: *mut c_char = std::ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_mtmd_encode_chunk( + self.context.as_ptr(), + chunk.chunk.as_ptr(), + &raw mut out_vendored_return_code, + &raw mut out_error, + ) }; - check_encode_result(result) + map_encode_chunk_status(status, out_vendored_return_code, out_error) } } @@ -192,44 +275,119 @@ impl Drop for MtmdContext { #[cfg(test)] mod unit_tests { - use super::check_encode_result; - use super::tokenize_result_to_error; + use super::map_encode_chunk_status; + use super::map_tokenize_status; + use crate::mtmd::mtmd_encode_error::MtmdEncodeError; + use crate::mtmd::mtmd_tokenize_error::MtmdTokenizeError; #[test] - fn tokenize_result_bitmap_count_mismatch() { - let error = tokenize_result_to_error(1); + fn tokenize_status_maps_bitmap_count_mismatch() { + let result = map_tokenize_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT, + 0, + std::ptr::null_mut(), + ); - assert!(error.to_string().contains("does not match")); + assert!(matches!( + result, + Err(MtmdTokenizeError::BitmapCountDoesNotMatchMarkerCount) + )); } #[test] - fn tokenize_result_image_preprocessing_error() { - let error = tokenize_result_to_error(2); + fn tokenize_status_maps_image_preprocessing_error() { + let result = map_tokenize_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR, + 0, + std::ptr::null_mut(), + ); - assert!(error.to_string().contains("Image preprocessing")); + assert!(matches!( + result, + Err(MtmdTokenizeError::ImagePreprocessingError) + )); } #[test] - fn tokenize_result_unknown_error() { - let error = tokenize_result_to_error(42); + fn tokenize_status_maps_undocumented_nonzero_code_with_value() { + let result = map_tokenize_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE, + 42, + std::ptr::null_mut(), + ); - assert!(error.to_string().contains("Unknown error: 42")); + assert!(matches!( + result, + Err(MtmdTokenizeError::VendoredReturnedUndocumentedNonzeroCode { code: 42 }) + )); } #[test] - fn check_encode_result_ok_for_zero() { - assert!(check_encode_result(0).is_ok()); + fn tokenize_status_maps_ok_to_unit() { + let result = map_tokenize_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_OK, + 0, + std::ptr::null_mut(), + ); + + assert!(matches!(result, Ok(()))); } #[test] - fn check_encode_result_error_for_nonzero() { - let result = check_encode_result(5); - - assert!( - result - .unwrap_err() - .to_string() - .contains("Encode failed with code: 5") + fn tokenize_status_maps_null_ctx_arg() { + let result = map_tokenize_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG, + 0, + std::ptr::null_mut(), ); + + assert!(matches!(result, Err(MtmdTokenizeError::NullCtxArg))); + } + + #[test] + fn encode_chunk_status_maps_ok_to_unit() { + let result = map_encode_chunk_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_OK, + 0, + std::ptr::null_mut(), + ); + + assert!(matches!(result, Ok(()))); + } + + #[test] + fn encode_chunk_status_maps_nonzero_code_with_value() { + let result = map_encode_chunk_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE, + 5, + std::ptr::null_mut(), + ); + + assert!(matches!( + result, + Err(MtmdEncodeError::VendoredReturnedNonzeroCode { code: 5 }) + )); + } + + #[test] + fn encode_chunk_status_maps_null_ctx_arg() { + let result = map_encode_chunk_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG, + 0, + std::ptr::null_mut(), + ); + + assert!(matches!(result, Err(MtmdEncodeError::NullCtxArg))); + } + + #[test] + fn encode_chunk_status_maps_null_chunk_arg() { + let result = map_encode_chunk_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG, + 0, + std::ptr::null_mut(), + ); + + assert!(matches!(result, Err(MtmdEncodeError::NullChunkArg))); } } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs index fabd3311..47106390 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs @@ -1,7 +1,13 @@ -/// Errors that can occur during encoding #[derive(thiserror::Error, Debug)] pub enum MtmdEncodeError { - /// Encode operation failed - #[error("Encode failed with code: {0}")] - EncodeFailure(i32), + #[error("Wrapper received a null mtmd-context argument")] + NullCtxArg, + #[error("Wrapper received a null chunk argument")] + NullChunkArg, + #[error("mtmd_encode_chunk returned nonzero code: {code}")] + VendoredReturnedNonzeroCode { code: i32 }, + #[error("Wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("mtmd_encode_chunk threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs index c4efa643..40431fc0 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs @@ -1,25 +1,30 @@ use crate::mtmd::image_chunk_batch_size_mismatch::ImageChunkBatchSizeMismatch; -/// Errors that can occur during evaluation #[derive(thiserror::Error, Debug)] pub enum MtmdEvalError { - /// Requested batch size exceeds the context's maximum batch size #[error("batch size {requested} exceeds context batch size {context_max}")] BatchSizeExceedsContextLimit { - /// The batch size requested in `eval_chunks` requested: i32, - /// The maximum batch size configured on the context context_max: u32, }, - /// An image chunk's token count exceeds the per-decode `n_batch` budget, - /// so handing it to `llama_decode` would trip the `GGML_ASSERT`. #[error( "image chunk has {} tokens but n_batch is {}", .0.image_tokens, .0.n_batch, )] ImageChunkExceedsBatchSize(ImageChunkBatchSizeMismatch), - /// Evaluation operation failed - #[error("Eval failed with code: {0}")] - EvalFailure(i32), + #[error("Wrapper received a null mtmd-context argument")] + NullMtmdCtxArg, + #[error("Wrapper received a null llama-context argument")] + NullLlamaCtxArg, + #[error("Wrapper received a null chunk argument")] + NullChunkArg, + #[error("Internal wrapper invariant violated: caller did not pass an out-new-n-past pointer")] + NullOutNewNPastArg, + #[error("mtmd_helper_eval_chunk_single returned nonzero code: {code}")] + VendoredReturnedNonzeroCode { code: i32 }, + #[error("Wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("mtmd_helper_eval_chunk_single threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs index 755d6a55..ec18fca9 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs @@ -1,10 +1,21 @@ -/// Errors that can occur when initializing MTMD context +use std::path::PathBuf; + #[derive(thiserror::Error, Debug)] pub enum MtmdInitError { - /// Failed to create `CString` from input - #[error("Failed to create CString: {0}")] + #[error("Failed to create CString from mmproj path: {0}")] CStringError(#[from] std::ffi::NulError), - /// MTMD context initialization returned null - #[error("MTMD context initialization returned null")] - NullResult, + #[error("Mmproj path is not valid UTF-8: {0:?}")] + PathToStrError(PathBuf), + #[error("Internal wrapper invariant violated: caller did not pass an out-ctx pointer")] + NullOutCtxArg, + #[error("Wrapper received a null mmproj-path argument")] + NullMmprojPathArg, + #[error("Wrapper received a null text-model argument")] + NullTextModelArg, + #[error("mtmd_init_from_file returned null without throwing for mmproj path: {path:?}")] + VendoredReturnedNull { path: PathBuf }, + #[error("Wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("mtmd_init_from_file threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs b/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs index 50643547..e4f7a80d 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs @@ -1,8 +1,10 @@ use std::ffi::CStr; +use std::ffi::c_char; use std::ptr::NonNull; use std::slice; use crate::context::LlamaContext; +use crate::ffi_error_reader::read_and_free_cpp_error; use crate::token::LlamaToken; use super::image_chunk_batch_size_mismatch::ImageChunkBatchSizeMismatch; @@ -162,9 +164,11 @@ impl MtmdInputChunk { } let mut final_position: llama_cpp_bindings_sys::llama_pos = start_position; + let mut out_vendored_return_code: i32 = 0; + let mut out_error: *mut c_char = std::ptr::null_mut(); - let result = unsafe { - llama_cpp_bindings_sys::mtmd_helper_eval_chunk_single( + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_mtmd_eval_chunk_single( mtmd_ctx.context.as_ptr(), llama_ctx.context.as_ptr(), self.chunk.as_ptr(), @@ -173,13 +177,40 @@ impl MtmdInputChunk { n_batch, logits_last, &raw mut final_position, + &raw mut out_vendored_return_code, + &raw mut out_error, ) }; - if result == 0 { - Ok(final_position) - } else { - Err(MtmdEvalError::EvalFailure(result)) + match status { + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_OK => Ok(final_position), + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_MTMD_CTX_ARG => { + Err(MtmdEvalError::NullMtmdCtxArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_LLAMA_CTX_ARG => { + Err(MtmdEvalError::NullLlamaCtxArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_CHUNK_ARG => { + Err(MtmdEvalError::NullChunkArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_OUT_NEW_N_PAST_ARG => { + Err(MtmdEvalError::NullOutNewNPastArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE => { + Err(MtmdEvalError::VendoredReturnedNonzeroCode { + code: out_vendored_return_code, + }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED => { + Err(MtmdEvalError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(MtmdEvalError::VendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_mtmd_eval_chunk_single returned unrecognized status: {other}" + ), } } } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs b/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs index a74eb296..9b2879a0 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs @@ -11,7 +11,7 @@ const fn check_eval_result(result: i32) -> Result<(), MtmdEvalError> { if result == 0 { Ok(()) } else { - Err(MtmdEvalError::EvalFailure(result)) + Err(MtmdEvalError::VendoredReturnedNonzeroCode { code: result }) } } @@ -174,15 +174,14 @@ mod tests { #[test] fn check_eval_result_error_for_nonzero() { + use super::MtmdEvalError; use super::check_eval_result; let result = check_eval_result(7); - assert!( - result - .unwrap_err() - .to_string() - .contains("Eval failed with code: 7") - ); + assert!(matches!( + result, + Err(MtmdEvalError::VendoredReturnedNonzeroCode { code: 7 }) + )); } } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs index 8886bc19..da502243 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs @@ -1,21 +1,27 @@ use crate::mtmd::mtmd_input_chunks_error::MtmdInputChunksError; -/// Errors that can occur during tokenization #[derive(thiserror::Error, Debug)] pub enum MtmdTokenizeError { - /// Number of bitmaps does not match number of markers in text - #[error("Number of bitmaps does not match number of markers")] - BitmapCountMismatch, - /// Image preprocessing error occurred - #[error("Image preprocessing error")] - ImagePreprocessingError, - /// Failed to create input chunks collection + #[error("Failed to create CString from input text: {0}")] + CStringError(#[from] std::ffi::NulError), #[error("{0}")] InputChunksError(#[from] MtmdInputChunksError), - /// Text contains characters that cannot be converted to C string - #[error("Failed to create CString from text: {0}")] - CStringError(#[from] std::ffi::NulError), - /// Unknown error occurred during tokenization - #[error("Unknown error: {0}")] - UnknownError(i32), + #[error("Wrapper received a null mtmd-context argument")] + NullCtxArg, + #[error("Wrapper received a null output-chunks argument")] + NullOutputArg, + #[error("Wrapper received a null input-text argument")] + NullTextArg, + #[error("Wrapper received a null bitmaps argument with num_bitmaps > 0")] + NullBitmapsArgWhenNumBitmapsNonzero, + #[error("mtmd_tokenize reported that the number of bitmaps does not match the number of markers in the text")] + BitmapCountDoesNotMatchMarkerCount, + #[error("mtmd_tokenize reported an image preprocessing error")] + ImagePreprocessingError, + #[error("mtmd_tokenize returned an undocumented nonzero code: {code}")] + VendoredReturnedUndocumentedNonzeroCode { code: i32 }, + #[error("Wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("mtmd_tokenize threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } From bd7ff5dde0d75f2406c34134c408cba609801559 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 15:55:32 +0200 Subject: [PATCH 09/16] wrap encode and memory seq ffi entry points to surface c++ exceptions as typed rust errors --- llama-cpp-bindings-sys/wrapper_common.cpp | 176 +++++++++++++----- llama-cpp-bindings-sys/wrapper_common.h | 44 ++++- .../tests/context_kv_cache.rs | 19 +- llama-cpp-bindings/src/context.rs | 60 +++--- llama-cpp-bindings/src/context/kv_cache.rs | 84 ++++++--- llama-cpp-bindings/src/error.rs | 4 + llama-cpp-bindings/src/error/encode_error.rs | 37 ++-- .../src/error/kv_cache_seq_add_error.rs | 19 ++ .../src/error/kv_cache_seq_div_error.rs | 19 ++ llama-cpp-bindings/src/lib.rs | 11 +- llama-cpp-bindings/src/model/params.rs | 3 +- 11 files changed, 339 insertions(+), 137 deletions(-) create mode 100644 llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs create mode 100644 llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs diff --git a/llama-cpp-bindings-sys/wrapper_common.cpp b/llama-cpp-bindings-sys/wrapper_common.cpp index e195d5d7..45e47bae 100644 --- a/llama-cpp-bindings-sys/wrapper_common.cpp +++ b/llama-cpp-bindings-sys/wrapper_common.cpp @@ -171,80 +171,160 @@ extern "C" llama_pos llama_rs_memory_seq_pos_max( if (!ctx) { return -1; } - auto * mem = llama_get_memory(ctx); - if (!mem) { - return -1; - } - uint32_t n_seq_max = llama_n_seq_max(ctx); - if (seq_id < 0 || (uint32_t) seq_id >= n_seq_max) { + try { + auto * mem = llama_get_memory(ctx); + if (!mem) { + return -1; + } + uint32_t n_seq_max = llama_n_seq_max(ctx); + if (seq_id < 0 || (uint32_t) seq_id >= n_seq_max) { + return -1; + } + + return llama_memory_seq_pos_max(mem, seq_id); + } catch (...) { return -1; } - - return llama_memory_seq_pos_max(mem, seq_id); } -extern "C" llama_rs_status llama_rs_encode( +extern "C" llama_rs_encode_status llama_rs_encode( struct llama_context * ctx, - struct llama_batch batch) { - if (!ctx) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + struct llama_batch batch, + int32_t * out_vendored_return_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; } - const auto * model = llama_get_model(ctx); - if (!llama_model_has_encoder(model)) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (out_vendored_return_code) { + *out_vendored_return_code = 0; } - int32_t result = llama_encode(ctx, batch); - if (result != 0) { - return LLAMA_RS_STATUS_EXCEPTION; + if (!ctx) { + return LLAMA_RS_ENCODE_NULL_CTX_ARG; + } + try { + const auto * model = llama_get_model(ctx); + if (!llama_model_has_encoder(model)) { + return LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER; + } + int32_t result = llama_encode(ctx, batch); + if (result != 0) { + if (out_vendored_return_code) { + *out_vendored_return_code = result; + } + return LLAMA_RS_ENCODE_VENDORED_RETURNED_NONZERO_CODE; + } + return LLAMA_RS_ENCODE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION; } - - return LLAMA_RS_STATUS_OK; } -extern "C" llama_rs_status llama_rs_memory_seq_add( +extern "C" llama_rs_memory_seq_add_status llama_rs_memory_seq_add( struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, - llama_pos shift) { - if (!ctx) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + llama_pos shift, + char ** out_error) { + if (out_error) { + *out_error = nullptr; } - const auto * model = llama_get_model(ctx); - const auto rope = llama_model_rope_type(model); - if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!ctx) { + return LLAMA_RS_MEMORY_SEQ_ADD_NULL_CTX_ARG; } - auto * mem = llama_get_memory(ctx); - if (!mem) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + try { + const auto * model = llama_get_model(ctx); + const auto rope = llama_model_rope_type(model); + if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) { + return LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE; + } + auto * mem = llama_get_memory(ctx); + if (!mem) { + return LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM; + } + llama_memory_seq_add(mem, seq_id, p0, p1, shift); + return LLAMA_RS_MEMORY_SEQ_ADD_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION; } - llama_memory_seq_add(mem, seq_id, p0, p1, shift); - - return LLAMA_RS_STATUS_OK; } -extern "C" llama_rs_status llama_rs_memory_seq_div( +extern "C" llama_rs_memory_seq_div_status llama_rs_memory_seq_div( struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, - int d) { - if (!ctx) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + int d, + char ** out_error) { + if (out_error) { + *out_error = nullptr; } - const auto * model = llama_get_model(ctx); - const auto rope = llama_model_rope_type(model); - if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!ctx) { + return LLAMA_RS_MEMORY_SEQ_DIV_NULL_CTX_ARG; } - auto * mem = llama_get_memory(ctx); - if (!mem) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + try { + const auto * model = llama_get_model(ctx); + const auto rope = llama_model_rope_type(model); + if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) { + return LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE; + } + auto * mem = llama_get_memory(ctx); + if (!mem) { + return LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM; + } + llama_memory_seq_div(mem, seq_id, p0, p1, d); + return LLAMA_RS_MEMORY_SEQ_DIV_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION; } - llama_memory_seq_div(mem, seq_id, p0, p1, d); - - return LLAMA_RS_STATUS_OK; } extern "C" llama_rs_status llama_rs_sampler_sample( diff --git a/llama-cpp-bindings-sys/wrapper_common.h b/llama-cpp-bindings-sys/wrapper_common.h index 3d990abf..61ba9d2f 100644 --- a/llama-cpp-bindings-sys/wrapper_common.h +++ b/llama-cpp-bindings-sys/wrapper_common.h @@ -4,6 +4,7 @@ #include #include +#include struct llama_model; struct llama_sampler; @@ -65,23 +66,54 @@ llama_pos llama_rs_memory_seq_pos_max( struct llama_context * ctx, llama_seq_id seq_id); -llama_rs_status llama_rs_encode( +typedef enum llama_rs_encode_status { + LLAMA_RS_ENCODE_OK = 0, + LLAMA_RS_ENCODE_NULL_CTX_ARG, + LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER, + LLAMA_RS_ENCODE_VENDORED_RETURNED_NONZERO_CODE, + LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_encode_status; + +llama_rs_encode_status llama_rs_encode( struct llama_context * ctx, - struct llama_batch batch); + struct llama_batch batch, + int32_t * out_vendored_return_code, + char ** out_error); + +typedef enum llama_rs_memory_seq_add_status { + LLAMA_RS_MEMORY_SEQ_ADD_OK = 0, + LLAMA_RS_MEMORY_SEQ_ADD_NULL_CTX_ARG, + LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE, + LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM, + LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_memory_seq_add_status; -llama_rs_status llama_rs_memory_seq_add( +llama_rs_memory_seq_add_status llama_rs_memory_seq_add( struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, - llama_pos shift); + llama_pos shift, + char ** out_error); + +typedef enum llama_rs_memory_seq_div_status { + LLAMA_RS_MEMORY_SEQ_DIV_OK = 0, + LLAMA_RS_MEMORY_SEQ_DIV_NULL_CTX_ARG, + LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE, + LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM, + LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_memory_seq_div_status; -llama_rs_status llama_rs_memory_seq_div( +llama_rs_memory_seq_div_status llama_rs_memory_seq_div( struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, - int d); + int d, + char ** out_error); #ifdef __cplusplus } diff --git a/llama-cpp-bindings-tests/tests/context_kv_cache.rs b/llama-cpp-bindings-tests/tests/context_kv_cache.rs index 0095bff6..e8abb54b 100644 --- a/llama-cpp-bindings-tests/tests/context_kv_cache.rs +++ b/llama-cpp-bindings-tests/tests/context_kv_cache.rs @@ -4,6 +4,7 @@ use std::num::NonZeroU32; use anyhow::Result; use llama_cpp_bindings::context::LlamaContext; use llama_cpp_bindings::context::kv_cache::KvCacheConversionError; +use llama_cpp_bindings::error::{KvCacheSeqAddError, KvCacheSeqDivError}; use llama_cpp_bindings::context::params::LlamaContextParams; use llama_cpp_bindings::llama_batch::LlamaBatch; use llama_cpp_bindings::model::AddBos; @@ -126,7 +127,10 @@ fn kv_cache_seq_add_returns_error_for_mrope_model() -> Result<()> { let result = context.kv_cache_seq_add(0, Some(0), None, 1); - assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + KvCacheSeqAddError::IncompatibleRopeType, + )); Ok(()) } @@ -149,7 +153,10 @@ fn kv_cache_seq_div_returns_error_for_mrope_model() -> Result<()> { let divisor = NonZeroU8::new(2).ok_or_else(|| anyhow::anyhow!("2 is non-zero"))?; let result = context.kv_cache_seq_div(0, Some(0), None, divisor); - assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + KvCacheSeqDivError::IncompatibleRopeType, + )); Ok(()) } @@ -363,7 +370,7 @@ fn kv_cache_seq_add_rejects_p0_exceeding_i32_max() -> Result<()> { assert!(matches!( result.unwrap_err(), - KvCacheConversionError::P0TooLarge(_), + KvCacheSeqAddError::P0TooLarge(_), )); Ok(()) @@ -382,7 +389,7 @@ fn kv_cache_seq_add_rejects_p1_exceeding_i32_max() -> Result<()> { assert!(matches!( result.unwrap_err(), - KvCacheConversionError::P1TooLarge(_), + KvCacheSeqAddError::P1TooLarge(_), )); Ok(()) @@ -402,7 +409,7 @@ fn kv_cache_seq_div_rejects_p0_exceeding_i32_max() -> Result<()> { assert!(matches!( result.unwrap_err(), - KvCacheConversionError::P0TooLarge(_), + KvCacheSeqDivError::P0TooLarge(_), )); Ok(()) @@ -422,7 +429,7 @@ fn kv_cache_seq_div_rejects_p1_exceeding_i32_max() -> Result<()> { assert!(matches!( result.unwrap_err(), - KvCacheConversionError::P1TooLarge(_), + KvCacheSeqDivError::P1TooLarge(_), )); Ok(()) diff --git a/llama-cpp-bindings/src/context.rs b/llama-cpp-bindings/src/context.rs index 410ade82..61246cbc 100644 --- a/llama-cpp-bindings/src/context.rs +++ b/llama-cpp-bindings/src/context.rs @@ -226,34 +226,44 @@ impl<'model> LlamaContext<'model> { /// /// # Errors /// - /// - `EncodeError` if the decoding failed. - /// - /// # Panics - /// - /// - the returned [`std::ffi::c_int`] from llama-cpp does not fit into a i32 (this should never happen on most systems) + /// - `EncodeError` if the encoding failed. pub fn encode(&mut self, batch: &mut LlamaBatch) -> Result<(), EncodeError> { + let mut out_vendored_return_code: i32 = 0; + let mut out_error: *mut std::os::raw::c_char = std::ptr::null_mut(); let status = unsafe { - llama_cpp_bindings_sys::llama_rs_encode(self.context.as_ptr(), batch.llama_batch) + llama_cpp_bindings_sys::llama_rs_encode( + self.context.as_ptr(), + batch.llama_batch, + &raw mut out_vendored_return_code, + &raw mut out_error, + ) }; - - self.handle_encode_result(status, batch) - } - - fn handle_encode_result( - &mut self, - status: llama_cpp_bindings_sys::llama_rs_status, - batch: &mut LlamaBatch, - ) -> Result<(), EncodeError> { - if crate::status_is_ok(status) { - self.initialized_logits - .clone_from(&batch.initialized_logits); - - Ok(()) - } else { - Err(EncodeError::from( - NonZeroI32::new(crate::status_to_i32(status)) - .unwrap_or(NonZeroI32::new(1).expect("1 is non-zero")), - )) + match status { + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_OK => { + self.initialized_logits + .clone_from(&batch.initialized_logits); + Ok(()) + } + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_NULL_CTX_ARG => Err(EncodeError::NullContextArg), + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER => { + Err(EncodeError::ModelHasNoEncoder) + } + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_VENDORED_RETURNED_NONZERO_CODE => { + let code = NonZeroI32::new(out_vendored_return_code).unwrap_or_else(|| { + unreachable!( + "llama_rs_encode reported a nonzero return code but the value was zero" + ) + }); + Err(EncodeError::from(code)) + } + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED => { + Err(EncodeError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(EncodeError::VendoredThrewCxxException { message }) + } + other => unreachable!("llama_rs_encode returned unrecognized status {other}"), } } diff --git a/llama-cpp-bindings/src/context/kv_cache.rs b/llama-cpp-bindings/src/context/kv_cache.rs index 4250fe94..9e4e340c 100644 --- a/llama-cpp-bindings/src/context/kv_cache.rs +++ b/llama-cpp-bindings/src/context/kv_cache.rs @@ -1,8 +1,13 @@ //! utilities for working with the kv cache -use crate::context::LlamaContext; use std::ffi::c_int; use std::num::{NonZeroU8, TryFromIntError}; +use std::os::raw::c_char; +use std::ptr; + +use crate::context::LlamaContext; +use crate::error::{KvCacheSeqAddError, KvCacheSeqDivError}; +use crate::ffi_error_reader::read_and_free_cpp_error; /// Errors that can occur when attempting to prepare values for the kv cache #[derive(Debug, Eq, PartialEq, thiserror::Error)] @@ -16,9 +21,6 @@ pub enum KvCacheConversionError { /// Position 1 conversion to i32 failed #[error("Provided end position is too large for a i32")] P1TooLarge(#[source] TryFromIntError), - /// The operation is not supported by the current model/context configuration. - #[error("operation not supported by this model: {0}")] - UnsupportedOperation(String), } impl LlamaContext<'_> { @@ -131,20 +133,21 @@ impl LlamaContext<'_> { /// * `delta` - The relative position to add to the tokens /// /// # Errors - /// If either position exceeds [`i32::MAX`]. + /// If either position exceeds [`i32::MAX`], or the underlying memory operation reports a failure. pub fn kv_cache_seq_add( &mut self, seq_id: i32, p0: Option, p1: Option, delta: i32, - ) -> Result<(), KvCacheConversionError> { + ) -> Result<(), KvCacheSeqAddError> { let p0 = p0 .map_or(Ok(-1), i32::try_from) - .map_err(KvCacheConversionError::P0TooLarge)?; + .map_err(KvCacheSeqAddError::P0TooLarge)?; let p1 = p1 .map_or(Ok(-1), i32::try_from) - .map_err(KvCacheConversionError::P1TooLarge)?; + .map_err(KvCacheSeqAddError::P1TooLarge)?; + let mut out_error: *mut c_char = ptr::null_mut(); let status = unsafe { llama_cpp_bindings_sys::llama_rs_memory_seq_add( self.context.as_ptr(), @@ -152,16 +155,28 @@ impl LlamaContext<'_> { p0, p1, delta, + &raw mut out_error, ) }; - - if crate::status_is_ok(status) { - Ok(()) - } else { - Err(KvCacheConversionError::UnsupportedOperation(format!( - "kv_cache_seq_add failed (status {})", - crate::status_to_i32(status) - ))) + match status { + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_OK => Ok(()), + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_NULL_CTX_ARG => { + Err(KvCacheSeqAddError::NullContextArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE => { + Err(KvCacheSeqAddError::IncompatibleRopeType) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM => { + Err(KvCacheSeqAddError::NullMem) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED => { + Err(KvCacheSeqAddError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(KvCacheSeqAddError::VendoredThrewCxxException { message }) + } + other => unreachable!("llama_rs_memory_seq_add returned unrecognized status {other}"), } } @@ -181,21 +196,22 @@ impl LlamaContext<'_> { /// * `d` - The factor to divide the positions by /// /// # Errors - /// If either position exceeds [`i32::MAX`]. + /// If either position exceeds [`i32::MAX`], or the underlying memory operation reports a failure. pub fn kv_cache_seq_div( &mut self, seq_id: i32, p0: Option, p1: Option, d: NonZeroU8, - ) -> Result<(), KvCacheConversionError> { + ) -> Result<(), KvCacheSeqDivError> { let p0 = p0 .map_or(Ok(-1), i32::try_from) - .map_err(KvCacheConversionError::P0TooLarge)?; + .map_err(KvCacheSeqDivError::P0TooLarge)?; let p1 = p1 .map_or(Ok(-1), i32::try_from) - .map_err(KvCacheConversionError::P1TooLarge)?; + .map_err(KvCacheSeqDivError::P1TooLarge)?; let d = c_int::from(d.get()); + let mut out_error: *mut c_char = ptr::null_mut(); let status = unsafe { llama_cpp_bindings_sys::llama_rs_memory_seq_div( self.context.as_ptr(), @@ -203,16 +219,28 @@ impl LlamaContext<'_> { p0, p1, d, + &raw mut out_error, ) }; - - if crate::status_is_ok(status) { - Ok(()) - } else { - Err(KvCacheConversionError::UnsupportedOperation(format!( - "kv_cache_seq_div failed (status {})", - crate::status_to_i32(status) - ))) + match status { + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_OK => Ok(()), + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_NULL_CTX_ARG => { + Err(KvCacheSeqDivError::NullContextArg) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE => { + Err(KvCacheSeqDivError::IncompatibleRopeType) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM => { + Err(KvCacheSeqDivError::NullMem) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED => { + Err(KvCacheSeqDivError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(KvCacheSeqDivError::VendoredThrewCxxException { message }) + } + other => unreachable!("llama_rs_memory_seq_div returned unrecognized status {other}"), } } diff --git a/llama-cpp-bindings/src/error.rs b/llama-cpp-bindings/src/error.rs index 2314452f..731a9b13 100644 --- a/llama-cpp-bindings/src/error.rs +++ b/llama-cpp-bindings/src/error.rs @@ -9,6 +9,8 @@ pub mod fit_error; pub mod grammar_error; pub mod json_object_failure; pub mod key_value_xml_tags_failure; +pub mod kv_cache_seq_add_error; +pub mod kv_cache_seq_div_error; pub mod llama_context_load_error; pub mod llama_cpp_error; pub mod llama_lora_adapter_init_error; @@ -42,6 +44,8 @@ pub use fit_error::FitError; pub use grammar_error::GrammarError; pub use json_object_failure::JsonObjectFailure; pub use key_value_xml_tags_failure::KeyValueXmlTagsFailure; +pub use kv_cache_seq_add_error::KvCacheSeqAddError; +pub use kv_cache_seq_div_error::KvCacheSeqDivError; pub use llama_context_load_error::LlamaContextLoadError; pub use llama_cpp_error::LlamaCppError; pub use llama_lora_adapter_init_error::LlamaLoraAdapterInitError; diff --git a/llama-cpp-bindings/src/error/encode_error.rs b/llama-cpp-bindings/src/error/encode_error.rs index 33999d61..c5bc3dad 100644 --- a/llama-cpp-bindings/src/error/encode_error.rs +++ b/llama-cpp-bindings/src/error/encode_error.rs @@ -1,27 +1,30 @@ use std::num::NonZeroI32; use std::os::raw::c_int; -/// Failed to decode a batch. #[derive(Debug, Eq, PartialEq, thiserror::Error)] pub enum EncodeError { - /// No kv cache slot was available. - #[error("Encode Error 1: NoKvCacheSlot")] + #[error("llama_rs_encode called with null context")] + NullContextArg, + #[error("llama_rs_encode invoked on a model that has no encoder")] + ModelHasNoEncoder, + #[error("llama_encode returned non-zero code 1: no kv cache slot was available")] NoKvCacheSlot, - /// The number of tokens in the batch was 0. - #[error("Encode Error -1: n_tokens == 0")] + #[error("llama_encode returned non-zero code -1: n_tokens == 0")] NTokensZero, - /// An unknown error occurred. - #[error("Encode Error {0}: unknown")] - Unknown(c_int), + #[error("llama_encode returned unrecognized non-zero code: {code}")] + VendoredReturnedUnrecognizedNonzeroCode { code: c_int }, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("llama_encode threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } -/// Encode a error from llama.cpp into a [`EncodeError`]. impl From for EncodeError { fn from(value: NonZeroI32) -> Self { match value.get() { 1 => Self::NoKvCacheSlot, -1 => Self::NTokensZero, - error_code => Self::Unknown(error_code), + error_code => Self::VendoredReturnedUnrecognizedNonzeroCode { code: error_code }, } } } @@ -33,26 +36,26 @@ mod tests { use super::EncodeError; #[test] - fn encode_error_no_kv_cache_slot() { + fn no_kv_cache_slot_maps_from_code_one() { let error = EncodeError::from(NonZeroI32::new(1).expect("1 is non-zero")); assert_eq!(error, EncodeError::NoKvCacheSlot); - assert_eq!(error.to_string(), "Encode Error 1: NoKvCacheSlot"); } #[test] - fn encode_error_n_tokens_zero() { + fn n_tokens_zero_maps_from_code_negative_one() { let error = EncodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero")); assert_eq!(error, EncodeError::NTokensZero); - assert_eq!(error.to_string(), "Encode Error -1: n_tokens == 0"); } #[test] - fn encode_error_unknown() { + fn unrecognized_code_falls_through_to_typed_variant() { let error = EncodeError::from(NonZeroI32::new(99).expect("99 is non-zero")); - assert_eq!(error, EncodeError::Unknown(99)); - assert_eq!(error.to_string(), "Encode Error 99: unknown"); + assert_eq!( + error, + EncodeError::VendoredReturnedUnrecognizedNonzeroCode { code: 99 } + ); } } diff --git a/llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs b/llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs new file mode 100644 index 00000000..95171d5d --- /dev/null +++ b/llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs @@ -0,0 +1,19 @@ +use std::num::TryFromIntError; + +#[derive(Debug, thiserror::Error)] +pub enum KvCacheSeqAddError { + #[error("provided start position is too large for an i32")] + P0TooLarge(#[source] TryFromIntError), + #[error("provided end position is too large for an i32")] + P1TooLarge(#[source] TryFromIntError), + #[error("llama_rs_memory_seq_add called with null context")] + NullContextArg, + #[error("llama_rs_memory_seq_add invoked on a model with incompatible rope type")] + IncompatibleRopeType, + #[error("llama_rs_memory_seq_add could not acquire the context memory handle")] + NullMem, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("llama_rs_memory_seq_add threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, +} diff --git a/llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs b/llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs new file mode 100644 index 00000000..df073004 --- /dev/null +++ b/llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs @@ -0,0 +1,19 @@ +use std::num::TryFromIntError; + +#[derive(Debug, thiserror::Error)] +pub enum KvCacheSeqDivError { + #[error("provided start position is too large for an i32")] + P0TooLarge(#[source] TryFromIntError), + #[error("provided end position is too large for an i32")] + P1TooLarge(#[source] TryFromIntError), + #[error("llama_rs_memory_seq_div called with null context")] + NullContextArg, + #[error("llama_rs_memory_seq_div invoked on a model with incompatible rope type")] + IncompatibleRopeType, + #[error("llama_rs_memory_seq_div could not acquire the context memory handle")] + NullMem, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("llama_rs_memory_seq_div threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, +} diff --git a/llama-cpp-bindings/src/lib.rs b/llama-cpp-bindings/src/lib.rs index b77d14a4..261ec02d 100644 --- a/llama-cpp-bindings/src/lib.rs +++ b/llama-cpp-bindings/src/lib.rs @@ -64,11 +64,12 @@ pub mod tool_call_template_overrides; pub use error::{ ApplyChatTemplateError, ChatTemplateError, DecodeError, EmbeddingsError, EncodeError, - EvalMultimodalChunksError, GrammarError, LlamaContextLoadError, LlamaCppError, - LlamaLoraAdapterInitError, LlamaLoraAdapterRemoveError, LlamaLoraAdapterSetError, - LlamaModelLoadError, LogitsError, MarkerDetectionError, MetaValError, ModelParamsError, - NewLlamaChatMessageError, ParseChatMessageError, Result, SampleError, SamplerAcceptError, - SamplingError, StringToTokenError, TokenSamplingError, TokenToStringError, + EvalMultimodalChunksError, GrammarError, KvCacheSeqAddError, KvCacheSeqDivError, + LlamaContextLoadError, LlamaCppError, LlamaLoraAdapterInitError, LlamaLoraAdapterRemoveError, + LlamaLoraAdapterSetError, LlamaModelLoadError, LogitsError, MarkerDetectionError, + MetaValError, ModelParamsError, NewLlamaChatMessageError, ParseChatMessageError, Result, + SampleError, SamplerAcceptError, SamplingError, StringToTokenError, TokenSamplingError, + TokenToStringError, }; pub use chat_message_parse_outcome::ChatMessageParseOutcome; diff --git a/llama-cpp-bindings/src/model/params.rs b/llama-cpp-bindings/src/model/params.rs index 446f9334..4772a129 100644 --- a/llama-cpp-bindings/src/model/params.rs +++ b/llama-cpp-bindings/src/model/params.rs @@ -858,8 +858,7 @@ mod tests { assert!( matches!( result, - Err(FitError::VendoredReportedError) - | Err(FitError::VendoredThrewCxxException { .. }) + Err(FitError::VendoredReportedError | FitError::VendoredThrewCxxException { .. }) ), "expected VendoredReportedError or VendoredThrewCxxException, got {result:?}" ); From 53087b3628bad075cac2142a71068fd5b2204ef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 16:02:41 +0200 Subject: [PATCH 10/16] wrap chat parse ffi accessors to surface c++ exceptions as typed rust errors --- llama-cpp-bindings-sys/wrapper_chat_parse.cpp | 352 +++++++++++++++-- llama-cpp-bindings-sys/wrapper_chat_parse.h | 133 +++++-- .../src/error/parse_chat_message_error.rs | 81 +++- llama-cpp-bindings/src/model.rs | 362 ++++++++++++++++-- 4 files changed, 807 insertions(+), 121 deletions(-) diff --git a/llama-cpp-bindings-sys/wrapper_chat_parse.cpp b/llama-cpp-bindings-sys/wrapper_chat_parse.cpp index f60cada6..0bf59aee 100644 --- a/llama-cpp-bindings-sys/wrapper_chat_parse.cpp +++ b/llama-cpp-bindings-sys/wrapper_chat_parse.cpp @@ -7,6 +7,7 @@ #include "marker_probes/marker_probe.h" #include +#include #include #include @@ -16,7 +17,16 @@ struct llama_rs_parsed_chat { common_chat_msg message; }; -extern "C" llama_rs_status llama_rs_parse_chat_message( +static char * dup_or_set_alloc_flag(const std::string & source, bool * out_alloc_failed) { + *out_alloc_failed = false; + char * dup = llama_rs_dup_string(source); + if (!dup) { + *out_alloc_failed = true; + } + return dup; +} + +extern "C" llama_rs_parse_chat_message_status llama_rs_parse_chat_message( const struct llama_model * model, const char * tools_json, const char * input, @@ -29,20 +39,28 @@ extern "C" llama_rs_status llama_rs_parse_chat_message( if (out_error) { *out_error = nullptr; } - - if (!model || !input || !out_handle || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!model) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG; + } + if (!input) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG; + } + if (!out_handle) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG; + } + if (!out_error) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); if (!tmpl_src) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + return LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE; } const llama_vocab * vocab = llama_model_get_vocab(model); if (!vocab) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + return LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB; } std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); @@ -91,63 +109,323 @@ extern "C" llama_rs_status llama_rs_parse_chat_message( *out_handle = handle; - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_PARSE_CHAT_MESSAGE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" void llama_rs_parsed_chat_free(llama_rs_parsed_chat_handle handle) { - delete handle; +extern "C" llama_rs_parsed_chat_free_status llama_rs_parsed_chat_free( + llama_rs_parsed_chat_handle handle, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + try { + delete handle; + return LLAMA_RS_PARSED_CHAT_FREE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION; + } } -extern "C" size_t llama_rs_parsed_chat_tool_call_count(llama_rs_parsed_chat_handle handle) { +extern "C" llama_rs_parsed_chat_tool_call_count_status llama_rs_parsed_chat_tool_call_count( + llama_rs_parsed_chat_handle handle, + size_t * out_count, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_count) { + *out_count = 0; + } if (!handle) { - return 0; + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_HANDLE_ARG; + } + if (!out_count) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_OUT_COUNT_ARG; + } + try { + *out_count = handle->message.tool_calls.size(); + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION; } - return handle->message.tool_calls.size(); } -extern "C" char * llama_rs_parsed_chat_tool_call_id( - llama_rs_parsed_chat_handle handle, size_t index) { - if (!handle || index >= handle->message.tool_calls.size()) { - return nullptr; +extern "C" llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_call_id( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_string) { + *out_string = nullptr; + } + if (!handle) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_HANDLE_ARG; + } + if (!out_string) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_OUT_STRING_ARG; + } + try { + if (index >= handle->message.tool_calls.size()) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS; + } + bool alloc_failed = false; + *out_string = dup_or_set_alloc_flag(handle->message.tool_calls[index].id, &alloc_failed); + if (alloc_failed) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION; } - return llama_rs_dup_string(handle->message.tool_calls[index].id); } -extern "C" char * llama_rs_parsed_chat_tool_call_name( - llama_rs_parsed_chat_handle handle, size_t index) { - if (!handle || index >= handle->message.tool_calls.size()) { - return nullptr; +extern "C" llama_rs_parsed_chat_tool_call_name_status llama_rs_parsed_chat_tool_call_name( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_string) { + *out_string = nullptr; + } + if (!handle) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_HANDLE_ARG; + } + if (!out_string) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_OUT_STRING_ARG; + } + try { + if (index >= handle->message.tool_calls.size()) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS; + } + bool alloc_failed = false; + *out_string = dup_or_set_alloc_flag(handle->message.tool_calls[index].name, &alloc_failed); + if (alloc_failed) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION; } - return llama_rs_dup_string(handle->message.tool_calls[index].name); } -extern "C" char * llama_rs_parsed_chat_tool_call_arguments( - llama_rs_parsed_chat_handle handle, size_t index) { - if (!handle || index >= handle->message.tool_calls.size()) { - return nullptr; +extern "C" llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_tool_call_arguments( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_string) { + *out_string = nullptr; + } + if (!handle) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_HANDLE_ARG; + } + if (!out_string) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_OUT_STRING_ARG; + } + try { + if (index >= handle->message.tool_calls.size()) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS; + } + bool alloc_failed = false; + *out_string = dup_or_set_alloc_flag( + handle->message.tool_calls[index].arguments, &alloc_failed); + if (alloc_failed) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION; } - return llama_rs_dup_string(handle->message.tool_calls[index].arguments); } -extern "C" char * llama_rs_parsed_chat_content(llama_rs_parsed_chat_handle handle) { +extern "C" llama_rs_parsed_chat_content_status llama_rs_parsed_chat_content( + llama_rs_parsed_chat_handle handle, + char ** out_string, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_string) { + *out_string = nullptr; + } if (!handle) { - return nullptr; + return LLAMA_RS_PARSED_CHAT_CONTENT_NULL_HANDLE_ARG; + } + if (!out_string) { + return LLAMA_RS_PARSED_CHAT_CONTENT_NULL_OUT_STRING_ARG; + } + try { + bool alloc_failed = false; + *out_string = dup_or_set_alloc_flag(handle->message.content, &alloc_failed); + if (alloc_failed) { + return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSED_CHAT_CONTENT_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION; } - return llama_rs_dup_string(handle->message.content); } -extern "C" char * llama_rs_parsed_chat_reasoning_content(llama_rs_parsed_chat_handle handle) { +extern "C" llama_rs_parsed_chat_reasoning_content_status llama_rs_parsed_chat_reasoning_content( + llama_rs_parsed_chat_handle handle, + char ** out_string, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_string) { + *out_string = nullptr; + } if (!handle) { - return nullptr; + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_HANDLE_ARG; + } + if (!out_string) { + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_OUT_STRING_ARG; + } + try { + bool alloc_failed = false; + *out_string = dup_or_set_alloc_flag(handle->message.reasoning_content, &alloc_failed); + if (alloc_failed) { + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION; } - return llama_rs_dup_string(handle->message.reasoning_content); } diff --git a/llama-cpp-bindings-sys/wrapper_chat_parse.h b/llama-cpp-bindings-sys/wrapper_chat_parse.h index 12fed5d9..e235673c 100644 --- a/llama-cpp-bindings-sys/wrapper_chat_parse.h +++ b/llama-cpp-bindings-sys/wrapper_chat_parse.h @@ -12,20 +12,19 @@ extern "C" { struct llama_rs_parsed_chat; typedef struct llama_rs_parsed_chat * llama_rs_parsed_chat_handle; -/** - * Parse a chat-completion turn from raw assistant output using llama.cpp's - * `common_chat_parse`, driven by the model's autoparser-built peg parser. - * - * `tools_json` is a serialized JSON array of OpenAI-style tool definitions - * (or empty / null when the request had no tools). `is_partial` switches - * between mid-stream parses (partial accepts incomplete payloads) and final - * parses (rejects malformed input). - * - * On success, `*out_handle` owns the parsed message; free via - * `llama_rs_parsed_chat_free`. On failure, `*out_error` carries an - * exception message; free via `llama_rs_string_free`. - */ -llama_rs_status llama_rs_parse_chat_message( +typedef enum llama_rs_parse_chat_message_status { + LLAMA_RS_PARSE_CHAT_MESSAGE_OK = 0, + LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG, + LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG, + LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG, + LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG, + LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE, + LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB, + LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parse_chat_message_status; + +llama_rs_parse_chat_message_status llama_rs_parse_chat_message( const struct llama_model * model, const char * tools_json, const char * input, @@ -33,25 +32,99 @@ llama_rs_status llama_rs_parse_chat_message( llama_rs_parsed_chat_handle * out_handle, char ** out_error); -void llama_rs_parsed_chat_free(llama_rs_parsed_chat_handle handle); +typedef enum llama_rs_parsed_chat_free_status { + LLAMA_RS_PARSED_CHAT_FREE_OK = 0, + LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_free_status; + +llama_rs_parsed_chat_free_status llama_rs_parsed_chat_free( + llama_rs_parsed_chat_handle handle, + char ** out_error); + +typedef enum llama_rs_parsed_chat_tool_call_count_status { + LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK = 0, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_OUT_COUNT_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_tool_call_count_status; + +llama_rs_parsed_chat_tool_call_count_status llama_rs_parsed_chat_tool_call_count( + llama_rs_parsed_chat_handle handle, + size_t * out_count, + char ** out_error); + +typedef enum llama_rs_parsed_chat_tool_call_id_status { + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK = 0, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_OUT_STRING_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_tool_call_id_status; + +llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_call_id( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error); + +typedef enum llama_rs_parsed_chat_tool_call_name_status { + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK = 0, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_OUT_STRING_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_tool_call_name_status; + +llama_rs_parsed_chat_tool_call_name_status llama_rs_parsed_chat_tool_call_name( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error); -size_t llama_rs_parsed_chat_tool_call_count(llama_rs_parsed_chat_handle handle); +typedef enum llama_rs_parsed_chat_tool_call_arguments_status { + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK = 0, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_OUT_STRING_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_tool_call_arguments_status; + +llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_tool_call_arguments( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error); -/** - * Returns a heap-allocated UTF-8 string for the i-th tool call's `id`, - * `name`, or `arguments` field. Free with `llama_rs_string_free`. Returns - * nullptr if `handle` is null or `index` is out of bounds. - * - * `arguments` is the raw JSON string emitted by the parser — the caller is - * expected to feed it into a schema validator or hand it back to clients - * verbatim. - */ -char * llama_rs_parsed_chat_tool_call_id(llama_rs_parsed_chat_handle handle, size_t index); -char * llama_rs_parsed_chat_tool_call_name(llama_rs_parsed_chat_handle handle, size_t index); -char * llama_rs_parsed_chat_tool_call_arguments(llama_rs_parsed_chat_handle handle, size_t index); +typedef enum llama_rs_parsed_chat_content_status { + LLAMA_RS_PARSED_CHAT_CONTENT_OK = 0, + LLAMA_RS_PARSED_CHAT_CONTENT_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_CONTENT_NULL_OUT_STRING_ARG, + LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_content_status; -char * llama_rs_parsed_chat_content(llama_rs_parsed_chat_handle handle); -char * llama_rs_parsed_chat_reasoning_content(llama_rs_parsed_chat_handle handle); +llama_rs_parsed_chat_content_status llama_rs_parsed_chat_content( + llama_rs_parsed_chat_handle handle, + char ** out_string, + char ** out_error); + +typedef enum llama_rs_parsed_chat_reasoning_content_status { + LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK = 0, + LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_OUT_STRING_ARG, + LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_reasoning_content_status; + +llama_rs_parsed_chat_reasoning_content_status llama_rs_parsed_chat_reasoning_content( + llama_rs_parsed_chat_handle handle, + char ** out_string, + char ** out_error); #ifdef __cplusplus } diff --git a/llama-cpp-bindings/src/error/parse_chat_message_error.rs b/llama-cpp-bindings/src/error/parse_chat_message_error.rs index 75460ed4..cc1ccc06 100644 --- a/llama-cpp-bindings/src/error/parse_chat_message_error.rs +++ b/llama-cpp-bindings/src/error/parse_chat_message_error.rs @@ -2,31 +2,82 @@ use std::string::FromUtf8Error; use crate::error::tool_call_format_failure::ToolCallFormatFailure; -/// Failed to parse a chat message via [`crate::Model::parse_chat_message`]. #[derive(Debug, thiserror::Error)] pub enum ParseChatMessageError { - /// llama.cpp returned an error code from the parse FFI call. - #[error("ffi error {0}")] - FfiError(i32), - /// The C++ side threw an exception while parsing. - #[error("c++ exception during chat parse: {0}")] - ParseException(String), - /// An accessor returned bytes that were not valid UTF-8. + #[error("llama_rs_parse_chat_message called with null model")] + ParseNullModelArg, + #[error("llama_rs_parse_chat_message called with null input")] + ParseNullInputArg, + #[error("llama_rs_parse_chat_message called with null out_handle")] + ParseNullOutHandleArg, + #[error("llama_rs_parse_chat_message called with null out_error")] + ParseNullOutErrorArg, + #[error("model has no chat template")] + ParseModelHasNoChatTemplate, + #[error("model has no vocab")] + ParseModelHasNoVocab, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ParseErrorStringAllocationFailed, + #[error("c++ exception during chat parse: {message}")] + ParseException { message: String }, + #[error("llama_rs_parsed_chat_free destructor threw a C++ exception: {message}")] + FreeDestructorThrewCxxException { message: String }, + #[error("llama_rs_parsed_chat_free wrapper failed to duplicate the C++ exception string")] + FreeErrorStringAllocationFailed, + #[error("llama_rs_parsed_chat_tool_call_count called with null handle")] + ToolCallCountNullHandleArg, + #[error("llama_rs_parsed_chat_tool_call_count threw a C++ exception: {message}")] + ToolCallCountThrewCxxException { message: String }, + #[error("llama_rs_parsed_chat_tool_call_count wrapper failed to duplicate the C++ exception string")] + ToolCallCountErrorStringAllocationFailed, + #[error("llama_rs_parsed_chat_tool_call_id called with null handle")] + ToolCallIdNullHandleArg, + #[error("llama_rs_parsed_chat_tool_call_id called with index {index} out of bounds")] + ToolCallIdIndexOutOfBounds { index: usize }, + #[error("llama_rs_parsed_chat_tool_call_id threw a C++ exception: {message}")] + ToolCallIdThrewCxxException { message: String }, + #[error("llama_rs_parsed_chat_tool_call_id wrapper failed to duplicate the C++ exception string")] + ToolCallIdErrorStringAllocationFailed, + #[error("llama_rs_parsed_chat_tool_call_name called with null handle")] + ToolCallNameNullHandleArg, + #[error("llama_rs_parsed_chat_tool_call_name called with index {index} out of bounds")] + ToolCallNameIndexOutOfBounds { index: usize }, + #[error("llama_rs_parsed_chat_tool_call_name threw a C++ exception: {message}")] + ToolCallNameThrewCxxException { message: String }, + #[error("llama_rs_parsed_chat_tool_call_name wrapper failed to duplicate the C++ exception string")] + ToolCallNameErrorStringAllocationFailed, + #[error("llama_rs_parsed_chat_tool_call_arguments called with null handle")] + ToolCallArgumentsNullHandleArg, + #[error("llama_rs_parsed_chat_tool_call_arguments called with index {index} out of bounds")] + ToolCallArgumentsIndexOutOfBounds { index: usize }, + #[error("llama_rs_parsed_chat_tool_call_arguments threw a C++ exception: {message}")] + ToolCallArgumentsThrewCxxException { message: String }, + #[error( + "llama_rs_parsed_chat_tool_call_arguments wrapper failed to duplicate the C++ exception string" + )] + ToolCallArgumentsErrorStringAllocationFailed, + #[error("llama_rs_parsed_chat_content called with null handle")] + ContentNullHandleArg, + #[error("llama_rs_parsed_chat_content threw a C++ exception: {message}")] + ContentThrewCxxException { message: String }, + #[error("llama_rs_parsed_chat_content wrapper failed to duplicate the C++ exception string")] + ContentErrorStringAllocationFailed, + #[error("llama_rs_parsed_chat_reasoning_content called with null handle")] + ReasoningContentNullHandleArg, + #[error("llama_rs_parsed_chat_reasoning_content threw a C++ exception: {message}")] + ReasoningContentThrewCxxException { message: String }, + #[error( + "llama_rs_parsed_chat_reasoning_content wrapper failed to duplicate the C++ exception string" + )] + ReasoningContentErrorStringAllocationFailed, #[error("ffi returned non-utf8 string: {0}")] StringUtf8Error(#[from] FromUtf8Error), - /// The caller passed a `tools_json` argument that is not valid JSON. #[error("tools_json is not valid JSON: {0}")] ToolsJsonInvalid(#[source] serde_json::Error), - /// The caller passed a `tools_json` argument that parses as JSON but is not an array. #[error("tools_json must be a JSON array")] ToolsJsonNotArray, - /// Failed to serialize the tools array for the FFI call. #[error("could not serialize tools to JSON: {0}")] ToolsSerialization(String), - /// The model has no usable chat template, so the parser cannot be built. - #[error("model has no chat template")] - NoChatTemplate, - /// The wrapper-side fallback parser detected a structural issue while parsing the body. #[error("template-override fallback parser failed: {0}")] TemplateOverrideFailed(#[from] ToolCallFormatFailure), } diff --git a/llama-cpp-bindings/src/model.rs b/llama-cpp-bindings/src/model.rs index de22549d..789a6ddf 100644 --- a/llama-cpp-bindings/src/model.rs +++ b/llama-cpp-bindings/src/model.rs @@ -936,12 +936,12 @@ impl LlamaModel { synthesize_missing_tool_call_ids(&mut parsed.tool_calls); Ok(ChatMessageParseOutcome::Recognized(parsed)) } - Err(ParseChatMessageError::ParseException(ffi_error_message)) => { + Err(ParseChatMessageError::ParseException { message }) => { Ok(ChatMessageParseOutcome::Unrecognized(RawChatMessage { tools_json: tools_json.to_owned(), text: input.to_owned(), is_partial, - ffi_error_message, + ffi_error_message: message, })) } Err(other) => Err(other), @@ -974,18 +974,70 @@ impl LlamaModel { }; let parsed = match status { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => collect_parsed_chat_message(handle), - llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION => { - let message = read_optional_owned_cstr_lossy(out_error); - Err(ParseChatMessageError::ParseException(message)) + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_OK => { + collect_parsed_chat_message(handle) } - other => Err(ParseChatMessageError::FfiError(status_to_i32(other))), + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG => { + Err(ParseChatMessageError::ParseNullModelArg) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG => { + Err(ParseChatMessageError::ParseNullInputArg) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG => { + Err(ParseChatMessageError::ParseNullOutHandleArg) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG => { + Err(ParseChatMessageError::ParseNullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE => { + Err(ParseChatMessageError::ParseModelHasNoChatTemplate) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB => { + Err(ParseChatMessageError::ParseModelHasNoVocab) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED => { + Err(ParseChatMessageError::ParseErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + out_error = ptr::null_mut(); + Err(ParseChatMessageError::ParseException { message }) + } + other => unreachable!("llama_rs_parse_chat_message returned unrecognized status {other}"), }; - unsafe { llama_cpp_bindings_sys::llama_rs_parsed_chat_free(handle) }; - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; - - parsed + let mut free_error: *mut c_char = ptr::null_mut(); + let free_status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_free(handle, &raw mut free_error) + }; + match (parsed, free_status) { + (Ok(value), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_OK) => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Ok(value) + } + (Ok(_), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION) => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + let message = unsafe { + crate::ffi_error_reader::read_and_free_cpp_error(free_error) + }; + Err(ParseChatMessageError::FreeDestructorThrewCxxException { message }) + } + (Ok(_), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED) => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::FreeErrorStringAllocationFailed) + } + (Ok(_), other) => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(free_error) }; + unreachable!("llama_rs_parsed_chat_free returned unrecognized status {other}") + } + (Err(parse_err), _) => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(free_error) }; + Err(parse_err) + } + } } /// Render the model's chat template with the autoparser's synthetic @@ -1071,26 +1123,15 @@ fn collect_parsed_chat_message( return Ok(ParsedChatMessage::default()); } - let content = read_owned_cstr_for_parse(unsafe { - llama_cpp_bindings_sys::llama_rs_parsed_chat_content(handle) - })?; - let reasoning_content = read_owned_cstr_for_parse(unsafe { - llama_cpp_bindings_sys::llama_rs_parsed_chat_reasoning_content(handle) - })?; - - let count = unsafe { llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_count(handle) }; + let content = read_parsed_chat_content(handle)?; + let reasoning_content = read_parsed_chat_reasoning_content(handle)?; + let count = read_parsed_chat_tool_call_count(handle)?; let mut tool_calls = Vec::with_capacity(count); for index in 0..count { - let id = read_owned_cstr_for_parse(unsafe { - llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_id(handle, index) - })?; - let name = read_owned_cstr_for_parse(unsafe { - llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_name(handle, index) - })?; - let arguments_json = read_owned_cstr_for_parse(unsafe { - llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_arguments(handle, index) - })?; + let id = read_parsed_chat_tool_call_id(handle, index)?; + let name = read_parsed_chat_tool_call_name(handle, index)?; + let arguments_json = read_parsed_chat_tool_call_arguments(handle, index)?; let arguments = ToolCallArguments::from_string(arguments_json); tool_calls.push(ParsedToolCall::new(id, name, arguments)); @@ -1103,6 +1144,260 @@ fn collect_parsed_chat_message( )) } +fn read_parsed_chat_content( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, +) -> Result { + let mut out_string: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_content( + handle, + &raw mut out_string, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_OK => { + consume_accessor_string(out_string) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_NULL_HANDLE_ARG => { + Err(ParseChatMessageError::ContentNullHandleArg) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_NULL_OUT_STRING_ARG => { + unreachable!( + "llama_rs_parsed_chat_content reported null out_string while we passed a valid pointer" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::ContentErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::ContentThrewCxxException { message }) + } + other => unreachable!("llama_rs_parsed_chat_content returned unrecognized status {other}"), + } +} + +fn read_parsed_chat_reasoning_content( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, +) -> Result { + let mut out_string: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_reasoning_content( + handle, + &raw mut out_string, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK => { + consume_accessor_string(out_string) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_HANDLE_ARG => { + Err(ParseChatMessageError::ReasoningContentNullHandleArg) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_OUT_STRING_ARG => { + unreachable!( + "llama_rs_parsed_chat_reasoning_content reported null out_string while we passed a valid pointer" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::ReasoningContentErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::ReasoningContentThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_parsed_chat_reasoning_content returned unrecognized status {other}" + ), + } +} + +fn read_parsed_chat_tool_call_count( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, +) -> Result { + let mut out_count: usize = 0; + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_count( + handle, + &raw mut out_count, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK => Ok(out_count), + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_HANDLE_ARG => { + Err(ParseChatMessageError::ToolCallCountNullHandleArg) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_OUT_COUNT_ARG => { + unreachable!( + "llama_rs_parsed_chat_tool_call_count reported null out_count while we passed a valid pointer" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::ToolCallCountErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::ToolCallCountThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_parsed_chat_tool_call_count returned unrecognized status {other}" + ), + } +} + +fn read_parsed_chat_tool_call_id( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, + index: usize, +) -> Result { + let mut out_string: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_id( + handle, + index, + &raw mut out_string, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK => { + consume_accessor_string(out_string) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_HANDLE_ARG => { + Err(ParseChatMessageError::ToolCallIdNullHandleArg) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_OUT_STRING_ARG => { + unreachable!( + "llama_rs_parsed_chat_tool_call_id reported null out_string while we passed a valid pointer" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS => { + Err(ParseChatMessageError::ToolCallIdIndexOutOfBounds { index }) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::ToolCallIdErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::ToolCallIdThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_parsed_chat_tool_call_id returned unrecognized status {other}" + ), + } +} + +fn read_parsed_chat_tool_call_name( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, + index: usize, +) -> Result { + let mut out_string: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_name( + handle, + index, + &raw mut out_string, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK => { + consume_accessor_string(out_string) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_HANDLE_ARG => { + Err(ParseChatMessageError::ToolCallNameNullHandleArg) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_OUT_STRING_ARG => { + unreachable!( + "llama_rs_parsed_chat_tool_call_name reported null out_string while we passed a valid pointer" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS => { + Err(ParseChatMessageError::ToolCallNameIndexOutOfBounds { index }) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::ToolCallNameErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::ToolCallNameThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_parsed_chat_tool_call_name returned unrecognized status {other}" + ), + } +} + +fn read_parsed_chat_tool_call_arguments( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, + index: usize, +) -> Result { + let mut out_string: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_arguments( + handle, + index, + &raw mut out_string, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK => { + consume_accessor_string(out_string) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_HANDLE_ARG => { + Err(ParseChatMessageError::ToolCallArgumentsNullHandleArg) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_OUT_STRING_ARG => { + unreachable!( + "llama_rs_parsed_chat_tool_call_arguments reported null out_string while we passed a valid pointer" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS => { + Err(ParseChatMessageError::ToolCallArgumentsIndexOutOfBounds { index }) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::ToolCallArgumentsErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::ToolCallArgumentsThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_parsed_chat_tool_call_arguments returned unrecognized status {other}" + ), + } +} + +fn consume_accessor_string(ptr: *mut c_char) -> Result { + if ptr.is_null() { + return Ok(String::new()); + } + let bytes = unsafe { CStr::from_ptr(ptr) }.to_bytes().to_vec(); + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(ptr) }; + Ok(String::from_utf8(bytes)?) +} + struct ReasoningSplit { reasoning: String, content: String, @@ -1230,17 +1525,6 @@ where parsed } -fn read_owned_cstr_for_parse(ptr: *mut c_char) -> Result { - if ptr.is_null() { - return Ok(String::new()); - } - - let bytes = unsafe { CStr::from_ptr(ptr) }.to_bytes().to_vec(); - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(ptr) }; - - Ok(String::from_utf8(bytes)?) -} - fn read_optional_owned_cstr(ptr: *const c_char) -> Result, MarkerDetectionError> { if ptr.is_null() { return Ok(None); From 3471b41761cceb500a97151257ee95c289b518bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 16:30:11 +0200 Subject: [PATCH 11/16] retrofit existing wrappers to per-wrapper status enums and surface c++ exceptions as typed rust errors --- llama-cpp-bindings-sys/wrapper_common.cpp | 248 +++++++---- llama-cpp-bindings-sys/wrapper_common.h | 69 ++- llama-cpp-bindings-sys/wrapper_reasoning.cpp | 41 +- llama-cpp-bindings-sys/wrapper_reasoning.h | 26 +- llama-cpp-bindings-sys/wrapper_tool_calls.cpp | 76 ++-- llama-cpp-bindings-sys/wrapper_tool_calls.h | 51 +-- llama-cpp-bindings/src/error.rs | 2 + llama-cpp-bindings/src/error/grammar_error.rs | 58 ++- .../src/error/json_schema_to_grammar_error.rs | 20 + .../src/error/llama_cpp_error.rs | 33 +- .../src/error/marker_detection_error.rs | 48 ++- llama-cpp-bindings/src/error/sample_error.rs | 22 +- .../src/error/sampler_accept_error.rs | 16 +- .../src/json_schema_to_grammar.rs | 86 ++-- llama-cpp-bindings/src/lib.rs | 12 +- llama-cpp-bindings/src/llama_backend.rs | 6 +- llama-cpp-bindings/src/llguidance_sampler.rs | 4 +- llama-cpp-bindings/src/model.rs | 392 +++++++----------- llama-cpp-bindings/src/model/params.rs | 4 +- llama-cpp-bindings/src/sampling.rs | 169 ++++++-- 20 files changed, 808 insertions(+), 575 deletions(-) create mode 100644 llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs diff --git a/llama-cpp-bindings-sys/wrapper_common.cpp b/llama-cpp-bindings-sys/wrapper_common.cpp index 45e47bae..4544ecfd 100644 --- a/llama-cpp-bindings-sys/wrapper_common.cpp +++ b/llama-cpp-bindings-sys/wrapper_common.cpp @@ -1,9 +1,9 @@ #include "wrapper_common.h" -#include #include #include #include +#include #include #include @@ -14,34 +14,49 @@ #include -extern "C" llama_rs_status llama_rs_json_schema_to_grammar( +extern "C" llama_rs_json_schema_to_grammar_status llama_rs_json_schema_to_grammar( const char * schema_json, bool force_gbnf, char ** out_grammar, char ** out_error) { - if (!schema_json || !out_grammar || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (out_grammar) { + *out_grammar = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!schema_json) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_SCHEMA_JSON_ARG; + } + if (!out_grammar) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_GRAMMAR_ARG; + } + if (!out_error) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_ERROR_ARG; } - - *out_grammar = nullptr; - *out_error = nullptr; try { const auto schema = nlohmann::ordered_json::parse(schema_json); const auto grammar = json_schema_to_grammar(schema, force_gbnf); *out_grammar = llama_rs_dup_string(grammar); - - return *out_grammar ? LLAMA_RS_STATUS_OK : LLAMA_RS_STATUS_ALLOCATION_FAILED; + if (!*out_grammar) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return LLAMA_RS_STATUS_EXCEPTION; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } } @@ -51,33 +66,48 @@ extern "C" void llama_rs_string_free(char * ptr) { } } -extern "C" struct llama_sampler * llama_rs_sampler_init_grammar( +extern "C" llama_rs_sampler_init_grammar_status llama_rs_sampler_init_grammar( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, + struct llama_sampler ** out_sampler, char ** out_error) { + if (out_sampler) { + *out_sampler = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_SAMPLER_ARG; + } if (!out_error) { - return nullptr; + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_ERROR_ARG; } - - *out_error = nullptr; - try { - return llama_sampler_init_grammar(vocab, grammar_str, grammar_root); + *out_sampler = llama_sampler_init_grammar(vocab, grammar_str, grammar_root); + if (!*out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return nullptr; + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return nullptr; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy( +extern "C" llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_grammar_lazy( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, @@ -85,13 +115,20 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy( size_t num_trigger_words, const llama_token * trigger_tokens, size_t num_trigger_tokens, + struct llama_sampler ** out_sampler, char ** out_error) { + if (out_sampler) { + *out_sampler = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_SAMPLER_ARG; + } if (!out_error) { - return nullptr; + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_ERROR_ARG; } - - *out_error = nullptr; - try { std::vector trigger_patterns; trigger_patterns.reserve(num_trigger_words); @@ -107,7 +144,7 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy( trigger_patterns_c.push_back(pattern.c_str()); } - return llama_sampler_init_grammar_lazy_patterns( + *out_sampler = llama_sampler_init_grammar_lazy_patterns( vocab, grammar_str, grammar_root, @@ -115,20 +152,28 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy( trigger_patterns_c.size(), trigger_tokens, num_trigger_tokens); + if (!*out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return nullptr; + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return nullptr; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns( +extern "C" llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_init_grammar_lazy_patterns( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, @@ -136,15 +181,22 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns( size_t num_trigger_patterns, const llama_token * trigger_tokens, size_t num_trigger_tokens, + struct llama_sampler ** out_sampler, char ** out_error) { + if (out_sampler) { + *out_sampler = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_SAMPLER_ARG; + } if (!out_error) { - return nullptr; + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_ERROR_ARG; } - - *out_error = nullptr; - try { - return llama_sampler_init_grammar_lazy_patterns( + *out_sampler = llama_sampler_init_grammar_lazy_patterns( vocab, grammar_str, grammar_root, @@ -152,16 +204,24 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns( num_trigger_patterns, trigger_tokens, num_trigger_tokens); + if (!*out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return nullptr; + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return nullptr; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION; } } @@ -327,58 +387,76 @@ extern "C" llama_rs_memory_seq_div_status llama_rs_memory_seq_div( } } -extern "C" llama_rs_status llama_rs_sampler_sample( +extern "C" llama_rs_sampler_sample_status llama_rs_sampler_sample( struct llama_sampler * sampler, struct llama_context * ctx, int32_t idx, llama_token * out_token, char ** out_error) { - if (!sampler || !ctx || !out_token || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (out_error) { + *out_error = nullptr; + } + if (!sampler) { + return LLAMA_RS_SAMPLER_SAMPLE_NULL_SAMPLER_ARG; + } + if (!ctx) { + return LLAMA_RS_SAMPLER_SAMPLE_NULL_CTX_ARG; + } + if (!out_token) { + return LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_TOKEN_ARG; + } + if (!out_error) { + return LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_ERROR_ARG; } - - *out_error = nullptr; - try { *out_token = llama_sampler_sample(sampler, ctx, idx); - - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_SAMPLER_SAMPLE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return LLAMA_RS_STATUS_EXCEPTION; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_status llama_rs_sampler_accept( +extern "C" llama_rs_sampler_accept_status llama_rs_sampler_accept( struct llama_sampler * sampler, llama_token token, char ** out_error) { - if (!sampler || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (out_error) { + *out_error = nullptr; + } + if (!sampler) { + return LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG; + } + if (!out_error) { + return LLAMA_RS_SAMPLER_ACCEPT_NULL_OUT_ERROR_ARG; } - - *out_error = nullptr; - try { llama_sampler_accept(sampler, token); - - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_SAMPLER_ACCEPT_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return LLAMA_RS_STATUS_EXCEPTION; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION; } } diff --git a/llama-cpp-bindings-sys/wrapper_common.h b/llama-cpp-bindings-sys/wrapper_common.h index 61ba9d2f..d1770b2a 100644 --- a/llama-cpp-bindings-sys/wrapper_common.h +++ b/llama-cpp-bindings-sys/wrapper_common.h @@ -16,19 +16,47 @@ struct llama_vocab; extern "C" { #endif -llama_rs_status llama_rs_json_schema_to_grammar( +typedef enum llama_rs_json_schema_to_grammar_status { + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK = 0, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_SCHEMA_JSON_ARG, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_GRAMMAR_ARG, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_ERROR_ARG, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_json_schema_to_grammar_status; + +llama_rs_json_schema_to_grammar_status llama_rs_json_schema_to_grammar( const char * schema_json, bool force_gbnf, char ** out_grammar, char ** out_error); -struct llama_sampler * llama_rs_sampler_init_grammar( +typedef enum llama_rs_sampler_init_grammar_status { + LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK = 0, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_SAMPLER_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_init_grammar_status; + +llama_rs_sampler_init_grammar_status llama_rs_sampler_init_grammar( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, + struct llama_sampler ** out_sampler, char ** out_error); -struct llama_sampler * llama_rs_sampler_init_grammar_lazy( +typedef enum llama_rs_sampler_init_grammar_lazy_status { + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK = 0, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_SAMPLER_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_init_grammar_lazy_status; + +llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_grammar_lazy( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, @@ -36,9 +64,19 @@ struct llama_sampler * llama_rs_sampler_init_grammar_lazy( size_t num_trigger_words, const llama_token * trigger_tokens, size_t num_trigger_tokens, + struct llama_sampler ** out_sampler, char ** out_error); -struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns( +typedef enum llama_rs_sampler_init_grammar_lazy_patterns_status { + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK = 0, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_SAMPLER_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_init_grammar_lazy_patterns_status; + +llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_init_grammar_lazy_patterns( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, @@ -46,14 +84,33 @@ struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns( size_t num_trigger_patterns, const llama_token * trigger_tokens, size_t num_trigger_tokens, + struct llama_sampler ** out_sampler, char ** out_error); -llama_rs_status llama_rs_sampler_accept( +typedef enum llama_rs_sampler_accept_status { + LLAMA_RS_SAMPLER_ACCEPT_OK = 0, + LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG, + LLAMA_RS_SAMPLER_ACCEPT_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_accept_status; + +llama_rs_sampler_accept_status llama_rs_sampler_accept( struct llama_sampler * sampler, llama_token token, char ** out_error); -llama_rs_status llama_rs_sampler_sample( +typedef enum llama_rs_sampler_sample_status { + LLAMA_RS_SAMPLER_SAMPLE_OK = 0, + LLAMA_RS_SAMPLER_SAMPLE_NULL_SAMPLER_ARG, + LLAMA_RS_SAMPLER_SAMPLE_NULL_CTX_ARG, + LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_TOKEN_ARG, + LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_sample_status; + +llama_rs_sampler_sample_status llama_rs_sampler_sample( struct llama_sampler * sampler, struct llama_context * ctx, int32_t idx, diff --git a/llama-cpp-bindings-sys/wrapper_reasoning.cpp b/llama-cpp-bindings-sys/wrapper_reasoning.cpp index 36b0763e..7970b4ee 100644 --- a/llama-cpp-bindings-sys/wrapper_reasoning.cpp +++ b/llama-cpp-bindings-sys/wrapper_reasoning.cpp @@ -6,6 +6,7 @@ #include "marker_probes/marker_probe.h" #include +#include #include #include @@ -26,7 +27,7 @@ std::string token_text_or_empty(const llama_vocab * vocab, llama_token token) { } // namespace -extern "C" llama_rs_status llama_rs_detect_reasoning_markers( +extern "C" llama_rs_detect_reasoning_markers_status llama_rs_detect_reasoning_markers( const struct llama_model * model, char ** out_open, char ** out_close, @@ -40,20 +41,28 @@ extern "C" llama_rs_status llama_rs_detect_reasoning_markers( if (out_error) { *out_error = nullptr; } - - if (!model || !out_open || !out_close || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!model) { + return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_MODEL_ARG; + } + if (!out_open) { + return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_OPEN_ARG; + } + if (!out_close) { + return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_CLOSE_ARG; + } + if (!out_error) { + return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); if (!tmpl_src) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DETECT_REASONING_MARKERS_OK; } const llama_vocab * vocab = llama_model_get_vocab(model); if (!vocab) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DETECT_REASONING_MARKERS_OK; } std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); @@ -112,7 +121,7 @@ extern "C" llama_rs_status llama_rs_detect_reasoning_markers( } if (!detected) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DETECT_REASONING_MARKERS_OK; } char * open_dup = llama_rs_dup_string(detected_start); @@ -122,21 +131,27 @@ extern "C" llama_rs_status llama_rs_detect_reasoning_markers( std::free(open_dup); std::free(close_dup); - return LLAMA_RS_STATUS_ALLOCATION_FAILED; + return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; } *out_open = open_dup; *out_close = close_dup; - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DETECT_REASONING_MARKERS_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION; } } diff --git a/llama-cpp-bindings-sys/wrapper_reasoning.h b/llama-cpp-bindings-sys/wrapper_reasoning.h index eca91ea8..a22f79ba 100644 --- a/llama-cpp-bindings-sys/wrapper_reasoning.h +++ b/llama-cpp-bindings-sys/wrapper_reasoning.h @@ -7,21 +7,17 @@ extern "C" { #endif -/** - * Detect the reasoning open/close marker strings for a model by analyzing its - * Jinja chat template via llama.cpp's autoparser. - * - * On success (LLAMA_RS_STATUS_OK): - * - If the model has detected reasoning markers, *out_open and *out_close are - * set to heap-allocated null-terminated strings owned by the caller. Free - * each via llama_rs_string_free. - * - If no reasoning markers were detected, *out_open and *out_close are left - * as nullptr. - * - * On LLAMA_RS_STATUS_EXCEPTION, *out_error is set to a heap-allocated message; - * free via llama_rs_string_free. - */ -llama_rs_status llama_rs_detect_reasoning_markers( +typedef enum llama_rs_detect_reasoning_markers_status { + LLAMA_RS_DETECT_REASONING_MARKERS_OK = 0, + LLAMA_RS_DETECT_REASONING_MARKERS_NULL_MODEL_ARG, + LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_OPEN_ARG, + LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_CLOSE_ARG, + LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_ERROR_ARG, + LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_detect_reasoning_markers_status; + +llama_rs_detect_reasoning_markers_status llama_rs_detect_reasoning_markers( const struct llama_model * model, char ** out_open, char ** out_close, diff --git a/llama-cpp-bindings-sys/wrapper_tool_calls.cpp b/llama-cpp-bindings-sys/wrapper_tool_calls.cpp index eb869201..54b3a999 100644 --- a/llama-cpp-bindings-sys/wrapper_tool_calls.cpp +++ b/llama-cpp-bindings-sys/wrapper_tool_calls.cpp @@ -7,6 +7,7 @@ #include "llama.cpp/include/llama.h" #include +#include #include #include @@ -107,7 +108,7 @@ std::string detect_tool_call_haystack( } // namespace -extern "C" llama_rs_status llama_rs_compute_tool_call_haystack( +extern "C" llama_rs_compute_tool_call_haystack_status llama_rs_compute_tool_call_haystack( const struct llama_model * model, char ** out_haystack, char ** out_error) { @@ -117,20 +118,25 @@ extern "C" llama_rs_status llama_rs_compute_tool_call_haystack( if (out_error) { *out_error = nullptr; } - - if (!model || !out_haystack || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!model) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_MODEL_ARG; + } + if (!out_haystack) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_HAYSTACK_ARG; + } + if (!out_error) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); if (!tmpl_src) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; } const llama_vocab * vocab = llama_model_get_vocab(model); if (!vocab) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; } std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); @@ -142,29 +148,35 @@ extern "C" llama_rs_status llama_rs_compute_tool_call_haystack( std::string haystack = detect_tool_call_haystack(tmpl, reasoning); if (haystack.empty()) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; } char * haystack_dup = llama_rs_dup_string(haystack); if (!haystack_dup) { - return LLAMA_RS_STATUS_ALLOCATION_FAILED; + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; } *out_haystack = haystack_dup; - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders( +extern "C" llama_rs_diagnose_tool_call_synthetic_renders_status llama_rs_diagnose_tool_call_synthetic_renders( const struct llama_model * model, char ** out_no_tools, char ** out_with_tools, @@ -178,20 +190,28 @@ extern "C" llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders( if (out_error) { *out_error = nullptr; } - - if (!model || !out_no_tools || !out_with_tools || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!model) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_MODEL_ARG; + } + if (!out_no_tools) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_NO_TOOLS_ARG; + } + if (!out_with_tools) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_WITH_TOOLS_ARG; + } + if (!out_error) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); if (!tmpl_src) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK; } const llama_vocab * vocab = llama_model_get_vocab(model); if (!vocab) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK; } std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); @@ -259,20 +279,26 @@ extern "C" llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders( std::free(a_dup); std::free(b_dup); - return LLAMA_RS_STATUS_ALLOCATION_FAILED; + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; } *out_no_tools = a_dup; *out_with_tools = b_dup; - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION; } } diff --git a/llama-cpp-bindings-sys/wrapper_tool_calls.h b/llama-cpp-bindings-sys/wrapper_tool_calls.h index e6a59e20..7c96c20b 100644 --- a/llama-cpp-bindings-sys/wrapper_tool_calls.h +++ b/llama-cpp-bindings-sys/wrapper_tool_calls.h @@ -7,40 +7,31 @@ extern "C" { #endif -/** - * Render the model's chat template with the autoparser's standard tool-call - * vs. plain-assistant synthetic turns and return the diff slice that surrounds - * the tool-call payload. The returned haystack is the text that lives between - * the model's tool-call open/close markers (with any reasoning prelude - * stripped). Marker extraction from the haystack is performed in Rust. - * - * On success (LLAMA_RS_STATUS_OK): - * - If the model declares no tool-call markers (or an empty haystack), - * *out_haystack is left as nullptr. - * - Otherwise *out_haystack is a heap-allocated null-terminated string owned - * by the caller. Free via llama_rs_string_free. - * - * On LLAMA_RS_STATUS_EXCEPTION, *out_error is set to a heap-allocated message; - * free via llama_rs_string_free. - */ -llama_rs_status llama_rs_compute_tool_call_haystack( +typedef enum llama_rs_compute_tool_call_haystack_status { + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK = 0, + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_MODEL_ARG, + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_HAYSTACK_ARG, + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_ERROR_ARG, + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_compute_tool_call_haystack_status; + +llama_rs_compute_tool_call_haystack_status llama_rs_compute_tool_call_haystack( const struct llama_model * model, char ** out_haystack, char ** out_error); -/** - * Render the model's chat template with the autoparser's standard synthetic - * inputs (assistant_no_tools vs assistant_with_tools). Useful for diagnosing - * why marker detection fails. - * - * On success (LLAMA_RS_STATUS_OK): - * - *out_no_tools and *out_with_tools point to heap-allocated rendered - * outputs (free via llama_rs_string_free). Either can be empty when the - * template throws during rendering. - * - * On LLAMA_RS_STATUS_EXCEPTION, *out_error is set. - */ -llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders( +typedef enum llama_rs_diagnose_tool_call_synthetic_renders_status { + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK = 0, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_MODEL_ARG, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_NO_TOOLS_ARG, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_WITH_TOOLS_ARG, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_ERROR_ARG, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_diagnose_tool_call_synthetic_renders_status; + +llama_rs_diagnose_tool_call_synthetic_renders_status llama_rs_diagnose_tool_call_synthetic_renders( const struct llama_model * model, char ** out_no_tools, char ** out_with_tools, diff --git a/llama-cpp-bindings/src/error.rs b/llama-cpp-bindings/src/error.rs index 731a9b13..ba684109 100644 --- a/llama-cpp-bindings/src/error.rs +++ b/llama-cpp-bindings/src/error.rs @@ -8,6 +8,7 @@ pub mod eval_multimodal_chunks_error; pub mod fit_error; pub mod grammar_error; pub mod json_object_failure; +pub mod json_schema_to_grammar_error; pub mod key_value_xml_tags_failure; pub mod kv_cache_seq_add_error; pub mod kv_cache_seq_div_error; @@ -43,6 +44,7 @@ pub use eval_multimodal_chunks_error::EvalMultimodalChunksError; pub use fit_error::FitError; pub use grammar_error::GrammarError; pub use json_object_failure::JsonObjectFailure; +pub use json_schema_to_grammar_error::JsonSchemaToGrammarError; pub use key_value_xml_tags_failure::KeyValueXmlTagsFailure; pub use kv_cache_seq_add_error::KvCacheSeqAddError; pub use kv_cache_seq_div_error::KvCacheSeqDivError; diff --git a/llama-cpp-bindings/src/error/grammar_error.rs b/llama-cpp-bindings/src/error/grammar_error.rs index 58216b8c..f9adb0a8 100644 --- a/llama-cpp-bindings/src/error/grammar_error.rs +++ b/llama-cpp-bindings/src/error/grammar_error.rs @@ -1,27 +1,53 @@ use std::ffi::NulError; -/// Errors that can occur when initializing a grammar sampler -#[derive(Debug, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, thiserror::Error)] pub enum GrammarError { - /// The grammar root was not found in the grammar string - #[error("Grammar root not found in grammar string")] + #[error("grammar root not found in grammar string")] RootNotFound, - /// The trigger word contains null bytes - #[error("Trigger word contains null bytes: {0}")] + #[error("trigger word contains null bytes: {0}")] TriggerWordNullBytes(NulError), - /// The grammar string or root contains null bytes - #[error("Grammar string or root contains null bytes: {0}")] + #[error("grammar string or root contains null bytes: {0}")] GrammarNullBytes(NulError), - /// A string contains null bytes - #[error("String contains null bytes: {0}")] + #[error("string contains null bytes: {0}")] NulError(#[from] NulError), - /// The grammar call returned null - #[error("Grammar initialization failed: {0}")] - NullGrammar(String), - /// An integer value exceeded the allowed range - #[error("Integer overflow: {0}")] + #[error("integer overflow: {0}")] IntegerOverflow(String), - /// An error from the llguidance library #[error("llguidance error: {0}")] LlguidanceError(String), + #[error("llama_rs_sampler_init_grammar called with null out_sampler")] + GrammarInitNullOutSamplerArg, + #[error("llama_rs_sampler_init_grammar called with null out_error")] + GrammarInitNullOutErrorArg, + #[error("llama_rs_sampler_init_grammar returned null")] + GrammarInitVendoredReturnedNull, + #[error("llama_rs_sampler_init_grammar wrapper failed to duplicate the C++ exception string")] + GrammarInitErrorStringAllocationFailed, + #[error("llama_rs_sampler_init_grammar threw a C++ exception: {message}")] + GrammarInitVendoredThrewCxxException { message: String }, + #[error("llama_rs_sampler_init_grammar_lazy called with null out_sampler")] + GrammarLazyInitNullOutSamplerArg, + #[error("llama_rs_sampler_init_grammar_lazy called with null out_error")] + GrammarLazyInitNullOutErrorArg, + #[error("llama_rs_sampler_init_grammar_lazy returned null")] + GrammarLazyInitVendoredReturnedNull, + #[error( + "llama_rs_sampler_init_grammar_lazy wrapper failed to duplicate the C++ exception string" + )] + GrammarLazyInitErrorStringAllocationFailed, + #[error("llama_rs_sampler_init_grammar_lazy threw a C++ exception: {message}")] + GrammarLazyInitVendoredThrewCxxException { message: String }, + #[error("llama_rs_sampler_init_grammar_lazy_patterns called with null out_sampler")] + GrammarLazyPatternsInitNullOutSamplerArg, + #[error("llama_rs_sampler_init_grammar_lazy_patterns called with null out_error")] + GrammarLazyPatternsInitNullOutErrorArg, + #[error("llama_rs_sampler_init_grammar_lazy_patterns returned null")] + GrammarLazyPatternsInitVendoredReturnedNull, + #[error( + "llama_rs_sampler_init_grammar_lazy_patterns wrapper failed to duplicate the C++ exception string" + )] + GrammarLazyPatternsInitErrorStringAllocationFailed, + #[error("llama_rs_sampler_init_grammar_lazy_patterns threw a C++ exception: {message}")] + GrammarLazyPatternsInitVendoredThrewCxxException { message: String }, + #[error("vendored llama_sampler_init for llguidance returned null")] + LlguidanceSamplerInitVendoredReturnedNull, } diff --git a/llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs b/llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs new file mode 100644 index 00000000..e5943464 --- /dev/null +++ b/llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs @@ -0,0 +1,20 @@ +use std::ffi::NulError; +use std::string::FromUtf8Error; + +#[derive(Debug, thiserror::Error)] +pub enum JsonSchemaToGrammarError { + #[error("schema string contains an interior NUL byte: {0}")] + SchemaContainsNulByte(#[from] NulError), + #[error("llama_rs_json_schema_to_grammar called with null schema_json")] + NullSchemaJsonArg, + #[error("llama_rs_json_schema_to_grammar called with null out_grammar")] + NullOutGrammarArg, + #[error("llama_rs_json_schema_to_grammar called with null out_error")] + NullOutErrorArg, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("llama_rs_json_schema_to_grammar threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, + #[error("grammar string returned by llama_rs_json_schema_to_grammar is not valid UTF-8")] + GrammarNotUtf8(#[from] FromUtf8Error), +} diff --git a/llama-cpp-bindings/src/error/llama_cpp_error.rs b/llama-cpp-bindings/src/error/llama_cpp_error.rs index b99fefdd..e40664a8 100644 --- a/llama-cpp-bindings/src/error/llama_cpp_error.rs +++ b/llama-cpp-bindings/src/error/llama_cpp_error.rs @@ -4,47 +4,34 @@ use crate::error::decode_error::DecodeError; use crate::error::embeddings_error::EmbeddingsError; use crate::error::encode_error::EncodeError; use crate::error::fit_error::FitError; +use crate::error::json_schema_to_grammar_error::JsonSchemaToGrammarError; use crate::error::llama_context_load_error::LlamaContextLoadError; use crate::error::llama_model_load_error::LlamaModelLoadError; -/// All errors that can occur in the llama-cpp crate. -#[derive(Debug, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, thiserror::Error)] pub enum LlamaCppError { - /// The backend was already initialized. This can generally be ignored as initializing the backend - /// is idempotent. #[error("BackendAlreadyInitialized")] BackendAlreadyInitialized, - /// There was an error while get the chat template from model. - #[error("{0}")] + #[error(transparent)] ChatTemplateError(#[from] ChatTemplateError), - /// There was an error while decoding a batch. - #[error("{0}")] + #[error(transparent)] DecodeError(#[from] DecodeError), - /// There was an error while encoding a batch. - #[error("{0}")] + #[error(transparent)] EncodeError(#[from] EncodeError), - /// There was an error loading a model. - #[error("{0}")] + #[error(transparent)] LlamaModelLoadError(#[from] LlamaModelLoadError), - /// There was an error creating a new model context. - #[error("{0}")] + #[error(transparent)] LlamaContextLoadError(#[from] LlamaContextLoadError), - /// There was an error adding a token to a batch. - #[error["{0}"]] + #[error(transparent)] BatchAddError(#[from] BatchAddError), - /// see [`EmbeddingsError`] #[error(transparent)] EmbeddingError(#[from] EmbeddingsError), - /// Backend device not found #[error("Backend device {0} not found")] BackendDeviceNotFound(usize), - /// Max devices exceeded #[error("Max devices exceeded. Max devices is {0}")] MaxDevicesExceeded(usize), - /// Failed to convert JSON schema to grammar. - #[error("JsonSchemaToGrammarError: {0}")] - JsonSchemaToGrammarError(String), - /// see [`FitError`] + #[error(transparent)] + JsonSchemaToGrammarError(#[from] JsonSchemaToGrammarError), #[error(transparent)] FitError(#[from] FitError), } diff --git a/llama-cpp-bindings/src/error/marker_detection_error.rs b/llama-cpp-bindings/src/error/marker_detection_error.rs index aa755878..92e9939c 100644 --- a/llama-cpp-bindings/src/error/marker_detection_error.rs +++ b/llama-cpp-bindings/src/error/marker_detection_error.rs @@ -1,15 +1,47 @@ use std::string::FromUtf8Error; -/// Failed to detect tool-call diagnostic markers for a model. #[derive(Debug, thiserror::Error)] pub enum MarkerDetectionError { - /// llama.cpp returned an error code from the marker detection FFI call. - #[error("ffi error {0}")] - FfiError(i32), - /// The C++ side threw an exception during template analysis. - #[error("c++ exception during template analysis: {0}")] - AnalyzeException(String), - /// llama.cpp returned a marker string but its bytes were not valid UTF-8. #[error("ffi returned non-utf8 marker bytes: {0}")] MarkerUtf8Error(#[from] FromUtf8Error), + #[error("llama_rs_detect_reasoning_markers called with null model")] + DetectReasoningMarkersNullModelArg, + #[error("llama_rs_detect_reasoning_markers called with null out_open")] + DetectReasoningMarkersNullOutOpenArg, + #[error("llama_rs_detect_reasoning_markers called with null out_close")] + DetectReasoningMarkersNullOutCloseArg, + #[error("llama_rs_detect_reasoning_markers called with null out_error")] + DetectReasoningMarkersNullOutErrorArg, + #[error( + "llama_rs_detect_reasoning_markers wrapper failed to duplicate the C++ exception string" + )] + DetectReasoningMarkersErrorStringAllocationFailed, + #[error("llama_rs_detect_reasoning_markers threw a C++ exception: {message}")] + DetectReasoningMarkersVendoredThrewCxxException { message: String }, + #[error("llama_rs_compute_tool_call_haystack called with null model")] + ComputeToolCallHaystackNullModelArg, + #[error("llama_rs_compute_tool_call_haystack called with null out_haystack")] + ComputeToolCallHaystackNullOutHaystackArg, + #[error("llama_rs_compute_tool_call_haystack called with null out_error")] + ComputeToolCallHaystackNullOutErrorArg, + #[error( + "llama_rs_compute_tool_call_haystack wrapper failed to duplicate the C++ exception string" + )] + ComputeToolCallHaystackErrorStringAllocationFailed, + #[error("llama_rs_compute_tool_call_haystack threw a C++ exception: {message}")] + ComputeToolCallHaystackVendoredThrewCxxException { message: String }, + #[error("llama_rs_diagnose_tool_call_synthetic_renders called with null model")] + DiagnoseToolCallSyntheticRendersNullModelArg, + #[error("llama_rs_diagnose_tool_call_synthetic_renders called with null out_no_tools")] + DiagnoseToolCallSyntheticRendersNullOutNoToolsArg, + #[error("llama_rs_diagnose_tool_call_synthetic_renders called with null out_with_tools")] + DiagnoseToolCallSyntheticRendersNullOutWithToolsArg, + #[error("llama_rs_diagnose_tool_call_synthetic_renders called with null out_error")] + DiagnoseToolCallSyntheticRendersNullOutErrorArg, + #[error( + "llama_rs_diagnose_tool_call_synthetic_renders wrapper failed to duplicate the C++ exception string" + )] + DiagnoseToolCallSyntheticRendersErrorStringAllocationFailed, + #[error("llama_rs_diagnose_tool_call_synthetic_renders threw a C++ exception: {message}")] + DiagnoseToolCallSyntheticRendersVendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/error/sample_error.rs b/llama-cpp-bindings/src/error/sample_error.rs index a7bbf4e8..8f5e1aa9 100644 --- a/llama-cpp-bindings/src/error/sample_error.rs +++ b/llama-cpp-bindings/src/error/sample_error.rs @@ -1,11 +1,15 @@ -/// Errors that can occur when sampling a token. -#[derive(Debug, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, thiserror::Error)] pub enum SampleError { - /// A C++ exception was thrown during sampling - #[error("C++ exception during sampling: {0}")] - CppException(String), - - /// An invalid argument was passed to the sampler - #[error("Invalid argument passed to sampler")] - InvalidArgument, + #[error("llama_rs_sampler_sample called with null sampler")] + NullSamplerArg, + #[error("llama_rs_sampler_sample called with null context")] + NullCtxArg, + #[error("llama_rs_sampler_sample called with null out_token")] + NullOutTokenArg, + #[error("llama_rs_sampler_sample called with null out_error")] + NullOutErrorArg, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("llama_rs_sampler_sample threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/error/sampler_accept_error.rs b/llama-cpp-bindings/src/error/sampler_accept_error.rs index afa32a61..dc5e40c1 100644 --- a/llama-cpp-bindings/src/error/sampler_accept_error.rs +++ b/llama-cpp-bindings/src/error/sampler_accept_error.rs @@ -1,11 +1,11 @@ -/// Failed to accept a token in a sampler. #[derive(Debug, thiserror::Error)] pub enum SamplerAcceptError { - /// A C++ exception was thrown during accept - #[error("C++ exception during sampler accept: {0}")] - CppException(String), - - /// An invalid argument was passed (null sampler or null error pointer) - #[error("Invalid argument passed to sampler accept")] - InvalidArgument, + #[error("llama_rs_sampler_accept called with null sampler")] + NullSamplerArg, + #[error("llama_rs_sampler_accept called with null out_error")] + NullOutErrorArg, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("llama_rs_sampler_accept threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/json_schema_to_grammar.rs b/llama-cpp-bindings/src/json_schema_to_grammar.rs index 34590a82..d9b29cf8 100644 --- a/llama-cpp-bindings/src/json_schema_to_grammar.rs +++ b/llama-cpp-bindings/src/json_schema_to_grammar.rs @@ -1,15 +1,14 @@ use std::ffi::{CStr, CString, c_char}; -use crate::error::{LlamaCppError, Result}; -use crate::ffi_status_is_ok::status_is_ok; +use crate::error::JsonSchemaToGrammarError; +use crate::ffi_error_reader::read_and_free_cpp_error; -/// Convert a JSON schema string into a llama.cpp grammar string. -/// /// # Errors -/// Returns an error if the schema contains null bytes or the conversion fails. -pub fn json_schema_to_grammar(schema_json: &str) -> Result { - let schema_cstr = CString::new(schema_json) - .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?; +/// +/// Returns [`JsonSchemaToGrammarError`] if the schema string contains a NUL byte, +/// the wrapper reports any non-OK status, or the returned grammar is not valid UTF-8. +pub fn json_schema_to_grammar(schema_json: &str) -> Result { + let schema_cstr = CString::new(schema_json)?; let mut out: *mut c_char = std::ptr::null_mut(); let mut error_ptr: *mut c_char = std::ptr::null_mut(); @@ -22,63 +21,80 @@ pub fn json_schema_to_grammar(schema_json: &str) -> Result { ) }; - if !status_is_ok(status) || out.is_null() { - let message = if error_ptr.is_null() { - "unknown error".to_owned() - } else { - let message = unsafe { CStr::from_ptr(error_ptr) } - .to_string_lossy() - .into_owned(); - - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(error_ptr) }; - - message - }; - - return Err(LlamaCppError::JsonSchemaToGrammarError(message)); + match status { + llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK => { + let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec(); + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out) }; + Ok(String::from_utf8(grammar_bytes)?) + } + llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_SCHEMA_JSON_ARG => { + unreachable!( + "llama_rs_json_schema_to_grammar received null schema_json despite valid Rust CString" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_GRAMMAR_ARG => { + unreachable!( + "llama_rs_json_schema_to_grammar reported null out_grammar despite valid Rust pointer" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_ERROR_ARG => { + unreachable!( + "llama_rs_json_schema_to_grammar reported null out_error despite valid Rust pointer" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED => { + Err(JsonSchemaToGrammarError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(JsonSchemaToGrammarError::VendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_json_schema_to_grammar returned unrecognized status {other}" + ), } - - let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec(); - - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out) }; - - String::from_utf8(grammar_bytes) - .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string())) } #[cfg(test)] mod tests { use super::json_schema_to_grammar; + use crate::error::JsonSchemaToGrammarError; #[test] fn simple_object() { let schema = r#"{"type": "object", "properties": {"name": {"type": "string"}}}"#; - let grammar = json_schema_to_grammar(schema).unwrap(); + let grammar = json_schema_to_grammar(schema).expect("schema converts to grammar"); assert!(!grammar.is_empty()); } #[test] - fn null_byte_returns_error() { + fn null_byte_returns_schema_contains_nul_byte_error() { let schema = "{\x00}"; let result = json_schema_to_grammar(schema); - assert!(result.is_err()); + assert!(matches!( + result, + Err(JsonSchemaToGrammarError::SchemaContainsNulByte(_)), + )); } #[test] fn simple_string() { let schema = r#"{"type": "string"}"#; - let grammar = json_schema_to_grammar(schema).unwrap(); + let grammar = json_schema_to_grammar(schema).expect("schema converts to grammar"); assert!(!grammar.is_empty()); } #[test] - fn invalid_json_returns_ffi_error() { + fn invalid_json_returns_vendored_threw_cxx_exception() { let schema = "not valid json at all"; let result = json_schema_to_grammar(schema); - assert!(result.is_err()); + assert!(matches!( + result, + Err(JsonSchemaToGrammarError::VendoredThrewCxxException { .. }), + )); } } diff --git a/llama-cpp-bindings/src/lib.rs b/llama-cpp-bindings/src/lib.rs index 261ec02d..9bed927b 100644 --- a/llama-cpp-bindings/src/lib.rs +++ b/llama-cpp-bindings/src/lib.rs @@ -64,12 +64,12 @@ pub mod tool_call_template_overrides; pub use error::{ ApplyChatTemplateError, ChatTemplateError, DecodeError, EmbeddingsError, EncodeError, - EvalMultimodalChunksError, GrammarError, KvCacheSeqAddError, KvCacheSeqDivError, - LlamaContextLoadError, LlamaCppError, LlamaLoraAdapterInitError, LlamaLoraAdapterRemoveError, - LlamaLoraAdapterSetError, LlamaModelLoadError, LogitsError, MarkerDetectionError, - MetaValError, ModelParamsError, NewLlamaChatMessageError, ParseChatMessageError, Result, - SampleError, SamplerAcceptError, SamplingError, StringToTokenError, TokenSamplingError, - TokenToStringError, + EvalMultimodalChunksError, GrammarError, JsonSchemaToGrammarError, KvCacheSeqAddError, + KvCacheSeqDivError, LlamaContextLoadError, LlamaCppError, LlamaLoraAdapterInitError, + LlamaLoraAdapterRemoveError, LlamaLoraAdapterSetError, LlamaModelLoadError, LogitsError, + MarkerDetectionError, MetaValError, ModelParamsError, NewLlamaChatMessageError, + ParseChatMessageError, Result, SampleError, SamplerAcceptError, SamplingError, + StringToTokenError, TokenSamplingError, TokenToStringError, }; pub use chat_message_parse_outcome::ChatMessageParseOutcome; diff --git a/llama-cpp-bindings/src/llama_backend.rs b/llama-cpp-bindings/src/llama_backend.rs index 20ad3ac3..ff6b09f9 100644 --- a/llama-cpp-bindings/src/llama_backend.rs +++ b/llama-cpp-bindings/src/llama_backend.rs @@ -38,7 +38,7 @@ impl LlamaBackend { /// /// let backend = LlamaBackend::init()?; /// // the llama backend can only be initialized once - /// assert_eq!(Err(LlamaCppError::BackendAlreadyInitialized), LlamaBackend::init()); + /// assert!(matches!(LlamaBackend::init(), Err(LlamaCppError::BackendAlreadyInitialized))); /// ///# Ok(()) ///# } @@ -161,10 +161,10 @@ mod tests { fn double_init_returns_error() { let _backend = LlamaBackend::init().unwrap(); let second = LlamaBackend::init(); - assert_eq!( + assert!(matches!( second.unwrap_err(), LlamaCppError::BackendAlreadyInitialized - ); + )); } #[test] diff --git a/llama-cpp-bindings/src/llguidance_sampler.rs b/llama-cpp-bindings/src/llguidance_sampler.rs index ffd51d75..4130ee33 100644 --- a/llama-cpp-bindings/src/llguidance_sampler.rs +++ b/llama-cpp-bindings/src/llguidance_sampler.rs @@ -153,9 +153,7 @@ pub fn create_llg_sampler( }; if sampler.is_null() { - Err(GrammarError::NullGrammar( - "llguidance sampler returned null".to_owned(), - )) + Err(GrammarError::LlguidanceSamplerInitVendoredReturnedNull) } else { Ok(LlamaSampler { sampler }) } diff --git a/llama-cpp-bindings/src/model.rs b/llama-cpp-bindings/src/model.rs index 789a6ddf..91011890 100644 --- a/llama-cpp-bindings/src/model.rs +++ b/llama-cpp-bindings/src/model.rs @@ -31,7 +31,6 @@ use llama_cpp_bindings_types::ToolCallArguments; use llama_cpp_bindings_types::ToolCallMarkers; use crate::chat_message_parse_outcome::ChatMessageParseOutcome; -use crate::ffi_status_to_i32::status_to_i32; use crate::llama_backend::LlamaBackend; use crate::llama_token_attrs::LlamaTokenAttrs; use crate::llama_token_attrs_from_int_error::LlamaTokenAttrsFromIntError; @@ -741,22 +740,9 @@ impl LlamaModel { /// Returns [`MarkerDetectionError`] when any underlying FFI call fails. pub fn streaming_markers(&self) -> Result { let (reasoning_open_str, reasoning_close_str) = - invoke_ffi_string_pair_detector(|first, second, error| unsafe { - llama_cpp_bindings_sys::llama_rs_detect_reasoning_markers( - self.model.as_ptr(), - first, - second, - error, - ) - })?; + invoke_detect_reasoning_markers(self.model.as_ptr())?; - let tool_call_haystack = invoke_ffi_single_string_detector(|haystack, error| unsafe { - llama_cpp_bindings_sys::llama_rs_compute_tool_call_haystack( - self.model.as_ptr(), - haystack, - error, - ) - })?; + let tool_call_haystack = invoke_compute_tool_call_haystack(self.model.as_ptr())?; let autoparser_pair = tool_call_haystack.as_deref().and_then( crate::extract_tool_call_markers_from_haystack::extract_tool_call_markers_from_haystack, @@ -817,14 +803,7 @@ impl LlamaModel { /// # Errors /// Returns [`MarkerDetectionError`] when the underlying FFI call fails. pub fn reasoning_markers(&self) -> Result, MarkerDetectionError> { - let (open, close) = invoke_ffi_string_pair_detector(|first, second, error| unsafe { - llama_cpp_bindings_sys::llama_rs_detect_reasoning_markers( - self.model.as_ptr(), - first, - second, - error, - ) - })?; + let (open, close) = invoke_detect_reasoning_markers(self.model.as_ptr())?; match (open, close) { (Some(open), Some(close)) if !open.is_empty() && !close.is_empty() => { @@ -1052,15 +1031,7 @@ impl LlamaModel { pub fn diagnose_tool_call_synthetic_renders( &self, ) -> Result<(String, String), MarkerDetectionError> { - let (no_tools, with_tools) = - invoke_ffi_string_pair_detector(|first, second, error| unsafe { - llama_cpp_bindings_sys::llama_rs_diagnose_tool_call_synthetic_renders( - self.model.as_ptr(), - first, - second, - error, - ) - })?; + let (no_tools, with_tools) = invoke_diagnose_tool_call_synthetic_renders(self.model.as_ptr())?; Ok((no_tools.unwrap_or_default(), with_tools.unwrap_or_default())) } @@ -1447,80 +1418,166 @@ fn synthesize_missing_tool_call_ids(tool_calls: &mut [ParsedToolCall]) { } } -fn parse_single_string_status( - status: llama_cpp_bindings_sys::llama_rs_status, - out_value: *mut c_char, - out_error: *mut c_char, -) -> Result, MarkerDetectionError> { - match status { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => read_optional_owned_cstr(out_value), - llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION => { - let message = read_optional_owned_cstr_lossy(out_error); +fn invoke_detect_reasoning_markers( + model: *const llama_cpp_bindings_sys::llama_model, +) -> Result<(Option, Option), MarkerDetectionError> { + let mut out_open: *mut c_char = ptr::null_mut(); + let mut out_close: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_detect_reasoning_markers( + model, + &raw mut out_open, + &raw mut out_close, + &raw mut out_error, + ) + }; - Err(MarkerDetectionError::AnalyzeException(message)) + let parsed = match status { + llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_OK => { + collect_optional_cstr_pair(out_open, out_close) } - other => Err(MarkerDetectionError::FfiError(status_to_i32(other))), + llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_NULL_MODEL_ARG => { + Err(MarkerDetectionError::DetectReasoningMarkersNullModelArg) + } + llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_OPEN_ARG => { + Err(MarkerDetectionError::DetectReasoningMarkersNullOutOpenArg) + } + llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_CLOSE_ARG => { + Err(MarkerDetectionError::DetectReasoningMarkersNullOutCloseArg) + } + llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_ERROR_ARG => { + Err(MarkerDetectionError::DetectReasoningMarkersNullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED => { + Err(MarkerDetectionError::DetectReasoningMarkersErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(MarkerDetectionError::DetectReasoningMarkersVendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_detect_reasoning_markers returned unrecognized status {other}" + ), + }; + + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_open) }; + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_close) }; + if !matches!( + parsed, + Err(MarkerDetectionError::DetectReasoningMarkersVendoredThrewCxxException { .. }) + ) { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; } + + parsed } -fn invoke_ffi_single_string_detector( - invoke: TInvoke, -) -> Result, MarkerDetectionError> -where - TInvoke: FnOnce(*mut *mut c_char, *mut *mut c_char) -> llama_cpp_bindings_sys::llama_rs_status, -{ - let mut out_value: *mut c_char = ptr::null_mut(); +fn invoke_compute_tool_call_haystack( + model: *const llama_cpp_bindings_sys::llama_model, +) -> Result, MarkerDetectionError> { + let mut out_haystack: *mut c_char = ptr::null_mut(); let mut out_error: *mut c_char = ptr::null_mut(); - let status = invoke(&raw mut out_value, &raw mut out_error); - let parsed = parse_single_string_status(status, out_value, out_error); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_compute_tool_call_haystack( + model, + &raw mut out_haystack, + &raw mut out_error, + ) + }; - unsafe { - if !out_value.is_null() { - llama_cpp_bindings_sys::llama_rs_string_free(out_value); + let parsed = match status { + llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK => { + read_optional_owned_cstr(out_haystack) + } + llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_MODEL_ARG => { + Err(MarkerDetectionError::ComputeToolCallHaystackNullModelArg) + } + llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_HAYSTACK_ARG => { + Err(MarkerDetectionError::ComputeToolCallHaystackNullOutHaystackArg) } - if !out_error.is_null() { - llama_cpp_bindings_sys::llama_rs_string_free(out_error); + llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_ERROR_ARG => { + Err(MarkerDetectionError::ComputeToolCallHaystackNullOutErrorArg) } + llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED => { + Err(MarkerDetectionError::ComputeToolCallHaystackErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(MarkerDetectionError::ComputeToolCallHaystackVendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_compute_tool_call_haystack returned unrecognized status {other}" + ), + }; + + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_haystack) }; + if !matches!( + parsed, + Err(MarkerDetectionError::ComputeToolCallHaystackVendoredThrewCxxException { .. }) + ) { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; } parsed } -fn invoke_ffi_string_pair_detector( - invoke: TInvoke, -) -> Result<(Option, Option), MarkerDetectionError> -where - TInvoke: FnOnce( - *mut *mut c_char, - *mut *mut c_char, - *mut *mut c_char, - ) -> llama_cpp_bindings_sys::llama_rs_status, -{ - let mut out_first: *mut c_char = ptr::null_mut(); - let mut out_second: *mut c_char = ptr::null_mut(); +fn invoke_diagnose_tool_call_synthetic_renders( + model: *const llama_cpp_bindings_sys::llama_model, +) -> Result<(Option, Option), MarkerDetectionError> { + let mut out_no_tools: *mut c_char = ptr::null_mut(); + let mut out_with_tools: *mut c_char = ptr::null_mut(); let mut out_error: *mut c_char = ptr::null_mut(); - let status = invoke(&raw mut out_first, &raw mut out_second, &raw mut out_error); - - let parsed = (|| match status { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => { - let first = read_optional_owned_cstr(out_first)?; - let second = read_optional_owned_cstr(out_second)?; + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_diagnose_tool_call_synthetic_renders( + model, + &raw mut out_no_tools, + &raw mut out_with_tools, + &raw mut out_error, + ) + }; - Ok((first, second)) + let parsed = match status { + llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK => { + collect_optional_cstr_pair(out_no_tools, out_with_tools) } - llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION => { - let message = read_optional_owned_cstr_lossy(out_error); - - Err(MarkerDetectionError::AnalyzeException(message)) + llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_MODEL_ARG => { + Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersNullModelArg) + } + llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_NO_TOOLS_ARG => { + Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersNullOutNoToolsArg) + } + llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_WITH_TOOLS_ARG => { + Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersNullOutWithToolsArg) + } + llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_ERROR_ARG => { + Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersNullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED => { + Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersVendoredThrewCxxException { + message, + }) } - other => Err(MarkerDetectionError::FfiError(status_to_i32(other))), - })(); + other => unreachable!( + "llama_rs_diagnose_tool_call_synthetic_renders returned unrecognized status {other}" + ), + }; - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_first) }; - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_second) }; - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_no_tools) }; + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_with_tools) }; + if !matches!( + parsed, + Err(MarkerDetectionError::DiagnoseToolCallSyntheticRendersVendoredThrewCxxException { .. }) + ) { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + } parsed } @@ -1535,14 +1592,13 @@ fn read_optional_owned_cstr(ptr: *const c_char) -> Result, Marker Ok(Some(String::from_utf8(bytes)?)) } -fn read_optional_owned_cstr_lossy(ptr: *const c_char) -> String { - if ptr.is_null() { - return String::new(); - } - - unsafe { CStr::from_ptr(ptr) } - .to_string_lossy() - .into_owned() +fn collect_optional_cstr_pair( + first_ptr: *const c_char, + second_ptr: *const c_char, +) -> Result<(Option, Option), MarkerDetectionError> { + let first = read_optional_owned_cstr(first_ptr)?; + let second = read_optional_owned_cstr(second_ptr)?; + Ok((first, second)) } fn extract_meta_string( @@ -1677,151 +1733,3 @@ mod extract_meta_string_tests { } } -#[cfg(test)] -mod ffi_helper_tests { - use std::ffi::CString; - use std::ptr; - - use super::invoke_ffi_single_string_detector; - use super::invoke_ffi_string_pair_detector; - use super::parse_single_string_status; - use super::read_optional_owned_cstr_lossy; - use crate::MarkerDetectionError; - - #[test] - fn read_optional_owned_cstr_lossy_returns_empty_for_null() { - let result = read_optional_owned_cstr_lossy(ptr::null()); - - assert!(result.is_empty()); - } - - #[test] - fn read_optional_owned_cstr_lossy_returns_string_for_valid_pointer() { - let owned = CString::new("hello").expect("static literal has no nuls"); - let result = read_optional_owned_cstr_lossy(owned.as_ptr()); - - assert_eq!(result, "hello"); - } - - #[test] - fn read_optional_owned_cstr_lossy_handles_invalid_utf8_via_replacement() { - let owned = CString::new(vec![b'a', 0xFF, b'b']).expect("no interior nul"); - let result = read_optional_owned_cstr_lossy(owned.as_ptr()); - - assert!(result.starts_with('a')); - assert!(result.ends_with('b')); - } - - #[test] - fn parse_single_string_status_returns_none_for_ok_with_null() { - let result = parse_single_string_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK, - ptr::null_mut(), - ptr::null_mut(), - ); - - assert_eq!(result.expect("OK + null returns Ok(None)"), None); - } - - #[test] - fn parse_single_string_status_returns_some_for_ok_with_value() { - let owned = CString::new("present").expect("no nul"); - let result = parse_single_string_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK, - owned.as_ptr().cast_mut(), - ptr::null_mut(), - ); - - assert_eq!( - result.expect("OK + value returns Ok(Some)"), - Some("present".to_owned()) - ); - } - - #[test] - fn parse_single_string_status_returns_analyze_exception() { - let owned = CString::new("boom").expect("no nul"); - let result = parse_single_string_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION, - ptr::null_mut(), - owned.as_ptr().cast_mut(), - ); - - match result.expect_err("EXCEPTION must yield Err") { - MarkerDetectionError::AnalyzeException(message) => assert_eq!(message, "boom"), - other => panic!("expected AnalyzeException, got {other:?}"), - } - } - - #[test] - fn parse_single_string_status_returns_ffi_error_for_other_status() { - let result = parse_single_string_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT, - ptr::null_mut(), - ptr::null_mut(), - ); - - match result.expect_err("invalid status must yield Err") { - MarkerDetectionError::FfiError(_) => {} - other => panic!("expected FfiError, got {other:?}"), - } - } - - #[test] - fn invoke_ffi_single_string_detector_propagates_invalid_argument_status() { - let result = invoke_ffi_single_string_detector(|_value, _error| { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT - }); - - assert!(matches!(result, Err(MarkerDetectionError::FfiError(_)))); - } - - #[test] - fn invoke_ffi_single_string_detector_returns_none_for_ok_with_null() { - let result = invoke_ffi_single_string_detector(|value, _error| { - unsafe { - *value = ptr::null_mut(); - } - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK - }); - - assert_eq!(result.expect("OK + null returns Ok(None)"), None); - } - - #[test] - fn invoke_ffi_string_pair_detector_propagates_invalid_argument_status() { - let result = invoke_ffi_string_pair_detector(|_first, _second, _error| { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT - }); - - assert!(matches!(result, Err(MarkerDetectionError::FfiError(_)))); - } - - #[test] - fn invoke_ffi_string_pair_detector_returns_pair_of_none_for_ok_with_nulls() { - let result = invoke_ffi_string_pair_detector(|first, second, _error| { - unsafe { - *first = ptr::null_mut(); - *second = ptr::null_mut(); - } - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK - }); - - assert_eq!( - result.expect("OK with both null returns Ok((None, None))"), - (None, None) - ); - } - - #[test] - fn invoke_ffi_string_pair_detector_propagates_invalid_status_codes() { - let result = invoke_ffi_string_pair_detector(|_first, _second, _error| { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_ALLOCATION_FAILED - }); - - match result.expect_err("non-OK status yields Err") { - MarkerDetectionError::FfiError(code) => assert!(code != 0), - other => panic!("expected FfiError, got {other:?}"), - } - } -} diff --git a/llama-cpp-bindings/src/model/params.rs b/llama-cpp-bindings/src/model/params.rs index 4772a129..d246137e 100644 --- a/llama-cpp-bindings/src/model/params.rs +++ b/llama-cpp-bindings/src/model/params.rs @@ -707,10 +707,10 @@ mod tests { fn with_devices_invalid_index_returns_error() { let result = LlamaModelParams::default().with_devices(&[999_999]); - assert_eq!( + assert!(matches!( result.unwrap_err(), crate::LlamaCppError::BackendDeviceNotFound(999_999) - ); + )); } #[test] diff --git a/llama-cpp-bindings/src/sampling.rs b/llama-cpp-bindings/src/sampling.rs index e9aadb21..4c63980c 100644 --- a/llama-cpp-bindings/src/sampling.rs +++ b/llama-cpp-bindings/src/sampling.rs @@ -13,30 +13,25 @@ use crate::token::logit_bias::LlamaLogitBias; use crate::{GrammarError, SampleError, SamplerAcceptError, SamplingError}; fn check_sampler_accept_status( - status: llama_cpp_bindings_sys::llama_rs_status, + status: llama_cpp_bindings_sys::llama_rs_sampler_accept_status, error_ptr: *mut c_char, ) -> Result<(), SamplerAcceptError> { match status { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => Ok(()), - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT => { - Err(SamplerAcceptError::InvalidArgument) + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_OK => Ok(()), + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG => { + Err(SamplerAcceptError::NullSamplerArg) } - _ => Err(SamplerAcceptError::CppException(unsafe { - read_and_free_cpp_error(error_ptr) - })), - } -} - -fn check_sampler_not_null( - sampler: *mut llama_cpp_bindings_sys::llama_sampler, - error_ptr: *mut c_char, -) -> Result { - if sampler.is_null() { - Err(GrammarError::NullGrammar(unsafe { - read_and_free_cpp_error(error_ptr) - })) - } else { - Ok(LlamaSampler { sampler }) + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_NULL_OUT_ERROR_ARG => { + Err(SamplerAcceptError::NullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED => { + Err(SamplerAcceptError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(SamplerAcceptError::VendoredThrewCxxException { message }) + } + other => unreachable!("llama_rs_sampler_accept returned unrecognized status {other}"), } } @@ -85,13 +80,27 @@ impl LlamaSampler { }; match status { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => Ok(LlamaToken(token)), - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT => { - Err(SampleError::InvalidArgument) + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_OK => Ok(LlamaToken(token)), + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_NULL_SAMPLER_ARG => { + Err(SampleError::NullSamplerArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_NULL_CTX_ARG => { + Err(SampleError::NullCtxArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_TOKEN_ARG => { + Err(SampleError::NullOutTokenArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_ERROR_ARG => { + Err(SampleError::NullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED => { + Err(SampleError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(SampleError::VendoredThrewCxxException { message }) } - _ => Err(SampleError::CppException(unsafe { - read_and_free_cpp_error(error_ptr) - })), + other => unreachable!("llama_rs_sampler_sample returned unrecognized status {other}"), } } @@ -377,18 +386,43 @@ impl LlamaSampler { ) -> Result { let (grammar_str, grammar_root) = Self::sanitize_grammar_strings(grammar_str, grammar_root)?; + let mut sampler: *mut llama_cpp_bindings_sys::llama_sampler = std::ptr::null_mut(); let mut error_ptr: *mut c_char = std::ptr::null_mut(); - let sampler = unsafe { + let status = unsafe { llama_cpp_bindings_sys::llama_rs_sampler_init_grammar( model.vocab_ptr(), grammar_str.as_ptr(), grammar_root.as_ptr(), + &raw mut sampler, &raw mut error_ptr, ) }; - check_sampler_not_null(sampler, error_ptr) + match status { + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK => { + Ok(Self { sampler }) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_SAMPLER_ARG => { + Err(GrammarError::GrammarInitNullOutSamplerArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_ERROR_ARG => { + Err(GrammarError::GrammarInitNullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL => { + Err(GrammarError::GrammarInitVendoredReturnedNull) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED => { + Err(GrammarError::GrammarInitErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(GrammarError::GrammarInitVendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_sampler_init_grammar returned unrecognized status {other}" + ), + } } /// Lazy grammar sampler, introduced in @@ -407,12 +441,13 @@ impl LlamaSampler { let (grammar_str, grammar_root) = Self::sanitize_grammar_strings(grammar_str, grammar_root)?; let trigger_words = Self::sanitize_trigger_words(trigger_words)?; + let mut sampler: *mut llama_cpp_bindings_sys::llama_sampler = std::ptr::null_mut(); let mut error_ptr: *mut c_char = std::ptr::null_mut(); let mut trigger_word_ptrs: Vec<*const c_char> = trigger_words.iter().map(|cs| cs.as_ptr()).collect(); - let sampler = unsafe { + let status = unsafe { llama_cpp_bindings_sys::llama_rs_sampler_init_grammar_lazy( model.vocab_ptr(), grammar_str.as_ptr(), @@ -421,11 +456,35 @@ impl LlamaSampler { trigger_word_ptrs.len(), trigger_tokens.as_ptr().cast(), trigger_tokens.len(), + &raw mut sampler, &raw mut error_ptr, ) }; - check_sampler_not_null(sampler, error_ptr) + match status { + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK => { + Ok(Self { sampler }) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_SAMPLER_ARG => { + Err(GrammarError::GrammarLazyInitNullOutSamplerArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_ERROR_ARG => { + Err(GrammarError::GrammarLazyInitNullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL => { + Err(GrammarError::GrammarLazyInitVendoredReturnedNull) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED => { + Err(GrammarError::GrammarLazyInitErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(GrammarError::GrammarLazyInitVendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_sampler_init_grammar_lazy returned unrecognized status {other}" + ), + } } /// Lazy grammar sampler using regex trigger patterns. @@ -446,12 +505,13 @@ impl LlamaSampler { let (grammar_str, grammar_root) = Self::sanitize_grammar_strings(grammar_str, grammar_root)?; let trigger_patterns = Self::sanitize_trigger_patterns(trigger_patterns)?; + let mut sampler: *mut llama_cpp_bindings_sys::llama_sampler = std::ptr::null_mut(); let mut error_ptr: *mut c_char = std::ptr::null_mut(); let mut trigger_pattern_ptrs: Vec<*const c_char> = trigger_patterns.iter().map(|cs| cs.as_ptr()).collect(); - let sampler = unsafe { + let status = unsafe { llama_cpp_bindings_sys::llama_rs_sampler_init_grammar_lazy_patterns( model.vocab_ptr(), grammar_str.as_ptr(), @@ -460,11 +520,35 @@ impl LlamaSampler { trigger_pattern_ptrs.len(), trigger_tokens.as_ptr().cast(), trigger_tokens.len(), + &raw mut sampler, &raw mut error_ptr, ) }; - check_sampler_not_null(sampler, error_ptr) + match status { + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK => { + Ok(Self { sampler }) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_SAMPLER_ARG => { + Err(GrammarError::GrammarLazyPatternsInitNullOutSamplerArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_ERROR_ARG => { + Err(GrammarError::GrammarLazyPatternsInitNullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL => { + Err(GrammarError::GrammarLazyPatternsInitVendoredReturnedNull) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED => { + Err(GrammarError::GrammarLazyPatternsInitErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(GrammarError::GrammarLazyPatternsInitVendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_sampler_init_grammar_lazy_patterns returned unrecognized status {other}" + ), + } } /// `LLGuidance` sampler for constrained decoding. @@ -719,7 +803,7 @@ mod tests { fn sanitize_grammar_strings_root_not_found() { let result = LlamaSampler::sanitize_grammar_strings("expr ::= \"hello\"", "root"); - assert_eq!(result.err(), Some(GrammarError::RootNotFound)); + assert!(matches!(result.err(), Some(GrammarError::RootNotFound))); } #[test] @@ -928,35 +1012,28 @@ mod tests { } #[test] - fn check_sampler_accept_status_invalid_argument() { + fn check_sampler_accept_status_null_sampler() { let result = super::check_sampler_accept_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT, + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG, std::ptr::null_mut(), ); assert!(matches!( result, - Err(crate::SamplerAcceptError::InvalidArgument) + Err(crate::SamplerAcceptError::NullSamplerArg) )); } #[test] - fn check_sampler_accept_status_exception() { + fn check_sampler_accept_status_exception_maps_to_typed_variant() { let result = super::check_sampler_accept_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION, + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION, std::ptr::null_mut(), ); assert!(matches!( result, - Err(crate::SamplerAcceptError::CppException(_)) + Err(crate::SamplerAcceptError::VendoredThrewCxxException { .. }) )); } - - #[test] - fn check_sampler_not_null_returns_error() { - let result = super::check_sampler_not_null(std::ptr::null_mut(), std::ptr::null_mut()); - - assert!(result.is_err()); - } } From 002b61bfbef2679017db72984bcf307eb35cf68a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 16:33:32 +0200 Subject: [PATCH 12/16] pass /EHsc to vendored llama.cpp msvc build so c++ exceptions can unwind into wrapper try/catch --- llama-cpp-bindings-build/src/cmake_config.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/llama-cpp-bindings-build/src/cmake_config.rs b/llama-cpp-bindings-build/src/cmake_config.rs index a52521e3..90b608d4 100644 --- a/llama-cpp-bindings-build/src/cmake_config.rs +++ b/llama-cpp-bindings-build/src/cmake_config.rs @@ -205,6 +205,7 @@ fn configure_platform_specific( TargetOs::Windows(WindowsVariant::Msvc) => { config.cflag("/w"); config.cxxflag("/w"); + config.cxxflag("/EHsc"); configure_msvc_release_workaround(config, profile); } TargetOs::Android => { From fa39d5a267a60d073a15ed098da907413500d752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 16:42:10 +0200 Subject: [PATCH 13/16] wrap remaining raw vendored ffi calls to surface c++ exceptions as typed rust errors --- llama-cpp-bindings-sys/wrapper_common.cpp | 215 ++++++++++++++++++ llama-cpp-bindings-sys/wrapper_common.h | 82 +++++++ llama-cpp-bindings-tests/tests/model.rs | 15 +- llama-cpp-bindings/src/context.rs | 83 +++++-- llama-cpp-bindings/src/error/decode_error.rs | 51 +++-- .../src/error/llama_context_load_error.rs | 18 +- .../src/error/llama_model_load_error.rs | 21 +- .../src/error/string_to_token_error.rs | 15 +- llama-cpp-bindings/src/model.rs | 144 +++++++++--- llama-cpp-bindings/src/token/data_array.rs | 39 +++- 10 files changed, 583 insertions(+), 100 deletions(-) diff --git a/llama-cpp-bindings-sys/wrapper_common.cpp b/llama-cpp-bindings-sys/wrapper_common.cpp index 4544ecfd..935af272 100644 --- a/llama-cpp-bindings-sys/wrapper_common.cpp +++ b/llama-cpp-bindings-sys/wrapper_common.cpp @@ -460,3 +460,218 @@ extern "C" llama_rs_sampler_accept_status llama_rs_sampler_accept( return LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION; } } + +extern "C" llama_rs_load_model_from_file_status llama_rs_load_model_from_file( + const char * path, + struct llama_model_params params, + struct llama_model ** out_model, + char ** out_error) { + if (out_model) { + *out_model = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!path) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_PATH_ARG; + } + if (!out_model) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_MODEL_ARG; + } + if (!out_error) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_ERROR_ARG; + } + try { + *out_model = llama_load_model_from_file(path, params); + if (!*out_model) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL; + } + return LLAMA_RS_LOAD_MODEL_FROM_FILE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_new_context_with_model_status llama_rs_new_context_with_model( + struct llama_model * model, + struct llama_context_params params, + struct llama_context ** out_ctx, + char ** out_error) { + if (out_ctx) { + *out_ctx = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!model) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_MODEL_ARG; + } + if (!out_ctx) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_CTX_ARG; + } + if (!out_error) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_ERROR_ARG; + } + try { + *out_ctx = llama_new_context_with_model(model, params); + if (!*out_ctx) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL; + } + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_decode_status llama_rs_decode( + struct llama_context * ctx, + struct llama_batch batch, + int32_t * out_vendored_return_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_vendored_return_code) { + *out_vendored_return_code = 0; + } + if (!ctx) { + return LLAMA_RS_DECODE_NULL_CTX_ARG; + } + if (!out_error) { + return LLAMA_RS_DECODE_NULL_OUT_ERROR_ARG; + } + try { + int32_t result = llama_decode(ctx, batch); + if (result != 0) { + if (out_vendored_return_code) { + *out_vendored_return_code = result; + } + return LLAMA_RS_DECODE_VENDORED_RETURNED_NONZERO_CODE; + } + return LLAMA_RS_DECODE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_tokenize_status llama_rs_tokenize( + const struct llama_vocab * vocab, + const char * text, + int32_t text_len, + llama_token * tokens, + int32_t n_tokens_max, + bool add_special, + bool parse_special, + int32_t * out_returned_count, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_returned_count) { + *out_returned_count = 0; + } + if (!vocab) { + return LLAMA_RS_TOKENIZE_NULL_VOCAB_ARG; + } + if (!text) { + return LLAMA_RS_TOKENIZE_NULL_TEXT_ARG; + } + if (!out_returned_count) { + return LLAMA_RS_TOKENIZE_NULL_OUT_RETURNED_COUNT_ARG; + } + if (!out_error) { + return LLAMA_RS_TOKENIZE_NULL_OUT_ERROR_ARG; + } + try { + int32_t count = llama_tokenize( + vocab, text, text_len, tokens, n_tokens_max, add_special, parse_special); + *out_returned_count = count; + return LLAMA_RS_TOKENIZE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_sampler_apply_status llama_rs_sampler_apply( + struct llama_sampler * sampler, + struct llama_token_data_array * data_array, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (!sampler) { + return LLAMA_RS_SAMPLER_APPLY_NULL_SAMPLER_ARG; + } + if (!data_array) { + return LLAMA_RS_SAMPLER_APPLY_NULL_DATA_ARRAY_ARG; + } + if (!out_error) { + return LLAMA_RS_SAMPLER_APPLY_NULL_OUT_ERROR_ARG; + } + try { + llama_sampler_apply(sampler, data_array); + return LLAMA_RS_SAMPLER_APPLY_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION; + } +} diff --git a/llama-cpp-bindings-sys/wrapper_common.h b/llama-cpp-bindings-sys/wrapper_common.h index d1770b2a..20aaee09 100644 --- a/llama-cpp-bindings-sys/wrapper_common.h +++ b/llama-cpp-bindings-sys/wrapper_common.h @@ -172,6 +172,88 @@ llama_rs_memory_seq_div_status llama_rs_memory_seq_div( int d, char ** out_error); +typedef enum llama_rs_load_model_from_file_status { + LLAMA_RS_LOAD_MODEL_FROM_FILE_OK = 0, + LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_PATH_ARG, + LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_MODEL_ARG, + LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_ERROR_ARG, + LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL, + LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_load_model_from_file_status; + +llama_rs_load_model_from_file_status llama_rs_load_model_from_file( + const char * path, + struct llama_model_params params, + struct llama_model ** out_model, + char ** out_error); + +typedef enum llama_rs_new_context_with_model_status { + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK = 0, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_MODEL_ARG, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_CTX_ARG, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_ERROR_ARG, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_new_context_with_model_status; + +llama_rs_new_context_with_model_status llama_rs_new_context_with_model( + struct llama_model * model, + struct llama_context_params params, + struct llama_context ** out_ctx, + char ** out_error); + +typedef enum llama_rs_decode_status { + LLAMA_RS_DECODE_OK = 0, + LLAMA_RS_DECODE_NULL_CTX_ARG, + LLAMA_RS_DECODE_NULL_OUT_ERROR_ARG, + LLAMA_RS_DECODE_VENDORED_RETURNED_NONZERO_CODE, + LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_decode_status; + +llama_rs_decode_status llama_rs_decode( + struct llama_context * ctx, + struct llama_batch batch, + int32_t * out_vendored_return_code, + char ** out_error); + +typedef enum llama_rs_tokenize_status { + LLAMA_RS_TOKENIZE_OK = 0, + LLAMA_RS_TOKENIZE_NULL_VOCAB_ARG, + LLAMA_RS_TOKENIZE_NULL_TEXT_ARG, + LLAMA_RS_TOKENIZE_NULL_OUT_RETURNED_COUNT_ARG, + LLAMA_RS_TOKENIZE_NULL_OUT_ERROR_ARG, + LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_tokenize_status; + +llama_rs_tokenize_status llama_rs_tokenize( + const struct llama_vocab * vocab, + const char * text, + int32_t text_len, + llama_token * tokens, + int32_t n_tokens_max, + bool add_special, + bool parse_special, + int32_t * out_returned_count, + char ** out_error); + +typedef enum llama_rs_sampler_apply_status { + LLAMA_RS_SAMPLER_APPLY_OK = 0, + LLAMA_RS_SAMPLER_APPLY_NULL_SAMPLER_ARG, + LLAMA_RS_SAMPLER_APPLY_NULL_DATA_ARRAY_ARG, + LLAMA_RS_SAMPLER_APPLY_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_apply_status; + +llama_rs_sampler_apply_status llama_rs_sampler_apply( + struct llama_sampler * sampler, + struct llama_token_data_array * data_array, + char ** out_error); + #ifdef __cplusplus } #endif diff --git a/llama-cpp-bindings-tests/tests/model.rs b/llama-cpp-bindings-tests/tests/model.rs index b69f0bd9..52270d69 100644 --- a/llama-cpp-bindings-tests/tests/model.rs +++ b/llama-cpp-bindings-tests/tests/model.rs @@ -1,6 +1,6 @@ use std::num::NonZeroU16; use std::num::NonZeroU32; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use anyhow::Result; use llama_cpp_bindings::ChatTemplateError; @@ -261,15 +261,15 @@ fn load_model_with_invalid_path_returns_error() { let model_params = LlamaModelParams::default(); let result = LlamaModel::load_from_file(backend, "/nonexistent/model.gguf", &model_params); - assert_eq!( + assert!(matches!( result.unwrap_err(), - LlamaModelLoadError::FileNotFound(PathBuf::from("/nonexistent/model.gguf")) - ); + LlamaModelLoadError::FileNotFound(path) if path == Path::new("/nonexistent/model.gguf"), + )); } #[test] #[serial] -fn load_model_with_invalid_file_content_returns_null_result() -> Result<()> { +fn load_model_with_invalid_file_content_returns_vendored_returned_null() -> Result<()> { let fixture = FixtureSession::open()?; let backend = fixture.backend(); let model_params = LlamaModelParams::default(); @@ -278,7 +278,10 @@ fn load_model_with_invalid_file_content_returns_null_result() -> Result<()> { let result = LlamaModel::load_from_file(backend, &dummy_path, &model_params); - assert_eq!(result.unwrap_err(), LlamaModelLoadError::NullResult); + assert!(matches!( + result.unwrap_err(), + LlamaModelLoadError::VendoredReturnedNull, + )); let _ = std::fs::remove_file(&dummy_path); Ok(()) diff --git a/llama-cpp-bindings/src/context.rs b/llama-cpp-bindings/src/context.rs index 61246cbc..a01aa6df 100644 --- a/llama-cpp-bindings/src/context.rs +++ b/llama-cpp-bindings/src/context.rs @@ -111,15 +111,45 @@ impl<'model> LlamaContext<'model> { params: LlamaContextParams, ) -> Result { let context_params = params.context_params; - let context = unsafe { - llama_cpp_bindings_sys::llama_new_context_with_model( + let mut out_ctx: *mut llama_cpp_bindings_sys::llama_context = std::ptr::null_mut(); + let mut out_error: *mut std::os::raw::c_char = std::ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_new_context_with_model( model.model.as_ptr(), context_params, + &raw mut out_ctx, + &raw mut out_error, ) }; - let context = NonNull::new(context).ok_or(LlamaContextLoadError::NullReturn)?; - - Ok(Self::new(model, context, params.embeddings())) + match status { + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK => { + let context = NonNull::new(out_ctx) + .ok_or(LlamaContextLoadError::VendoredReturnedNull)?; + Ok(Self::new(model, context, params.embeddings())) + } + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_MODEL_ARG => { + Err(LlamaContextLoadError::NullModelArg) + } + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_CTX_ARG => { + Err(LlamaContextLoadError::NullOutCtxArg) + } + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_ERROR_ARG => { + Err(LlamaContextLoadError::NullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL => { + Err(LlamaContextLoadError::VendoredReturnedNull) + } + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED => { + Err(LlamaContextLoadError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(LlamaContextLoadError::VendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_new_context_with_model returned unrecognized status {other}" + ), + } } /// Gets the max number of logical tokens that can be submitted to decode. Must be greater than or equal to [`Self::n_ubatch`]. @@ -203,22 +233,45 @@ impl<'model> LlamaContext<'model> { /// # Errors /// /// - `DecodeError` if the decoding failed. - /// - /// # Panics - /// - /// - the returned [`std::ffi::c_int`] from llama-cpp does not fit into a i32 (this should never happen on most systems) pub fn decode(&mut self, batch: &mut LlamaBatch) -> Result<(), DecodeError> { - let result = unsafe { - llama_cpp_bindings_sys::llama_decode(self.context.as_ptr(), batch.llama_batch) + let mut out_vendored_return_code: i32 = 0; + let mut out_error: *mut std::os::raw::c_char = std::ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_decode( + self.context.as_ptr(), + batch.llama_batch, + &raw mut out_vendored_return_code, + &raw mut out_error, + ) }; - - match NonZeroI32::new(result) { - None => { + match status { + llama_cpp_bindings_sys::LLAMA_RS_DECODE_OK => { self.initialized_logits .clone_from(&batch.initialized_logits); Ok(()) } - Some(error) => Err(DecodeError::from(error)), + llama_cpp_bindings_sys::LLAMA_RS_DECODE_NULL_CTX_ARG => { + Err(DecodeError::NullContextArg) + } + llama_cpp_bindings_sys::LLAMA_RS_DECODE_NULL_OUT_ERROR_ARG => { + Err(DecodeError::NullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_DECODE_VENDORED_RETURNED_NONZERO_CODE => { + let code = NonZeroI32::new(out_vendored_return_code).unwrap_or_else(|| { + unreachable!( + "llama_rs_decode reported a nonzero return code but the value was zero" + ) + }); + Err(DecodeError::from(code)) + } + llama_cpp_bindings_sys::LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED => { + Err(DecodeError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(DecodeError::VendoredThrewCxxException { message }) + } + other => unreachable!("llama_rs_decode returned unrecognized status {other}"), } } diff --git a/llama-cpp-bindings/src/error/decode_error.rs b/llama-cpp-bindings/src/error/decode_error.rs index 1a404605..105d19e9 100644 --- a/llama-cpp-bindings/src/error/decode_error.rs +++ b/llama-cpp-bindings/src/error/decode_error.rs @@ -1,31 +1,33 @@ use std::num::NonZeroI32; use std::os::raw::c_int; -/// Failed to decode a batch. #[derive(Debug, Eq, PartialEq, thiserror::Error)] pub enum DecodeError { - /// No kv cache slot was available. - #[error("Decode Error 1: NoKvCacheSlot")] + #[error("llama_rs_decode called with null context")] + NullContextArg, + #[error("llama_rs_decode called with null out_error")] + NullOutErrorArg, + #[error("llama_decode returned non-zero code 1: no kv cache slot was available")] NoKvCacheSlot, - /// The computation was aborted by the abort callback. - #[error("Decode Error 2: Aborted")] + #[error("llama_decode returned non-zero code 2: aborted by abort callback")] Aborted, - /// The number of tokens in the batch was 0. - #[error("Decode Error -1: n_tokens == 0")] + #[error("llama_decode returned non-zero code -1: n_tokens == 0")] NTokensZero, - /// An unknown error occurred. - #[error("Decode Error {0}: unknown")] - Unknown(c_int), + #[error("llama_decode returned unrecognized non-zero code: {code}")] + VendoredReturnedUnrecognizedNonzeroCode { code: c_int }, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("llama_decode threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } -/// Decode a error from llama.cpp into a [`DecodeError`]. impl From for DecodeError { fn from(value: NonZeroI32) -> Self { match value.get() { 1 => Self::NoKvCacheSlot, 2 => Self::Aborted, -1 => Self::NTokensZero, - error_code => Self::Unknown(error_code), + error_code => Self::VendoredReturnedUnrecognizedNonzeroCode { code: error_code }, } } } @@ -37,34 +39,33 @@ mod tests { use super::DecodeError; #[test] - fn decode_error_no_kv_cache_slot() { + fn no_kv_cache_slot_maps_from_code_one() { let error = DecodeError::from(NonZeroI32::new(1).expect("1 is non-zero")); assert_eq!(error, DecodeError::NoKvCacheSlot); - assert_eq!(error.to_string(), "Decode Error 1: NoKvCacheSlot"); } #[test] - fn decode_error_n_tokens_zero() { - let error = DecodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero")); + fn aborted_maps_from_code_two() { + let error = DecodeError::from(NonZeroI32::new(2).expect("2 is non-zero")); - assert_eq!(error, DecodeError::NTokensZero); - assert_eq!(error.to_string(), "Decode Error -1: n_tokens == 0"); + assert_eq!(error, DecodeError::Aborted); } #[test] - fn decode_error_aborted() { - let error = DecodeError::from(NonZeroI32::new(2).expect("2 is non-zero")); + fn n_tokens_zero_maps_from_code_negative_one() { + let error = DecodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero")); - assert_eq!(error, DecodeError::Aborted); - assert_eq!(error.to_string(), "Decode Error 2: Aborted"); + assert_eq!(error, DecodeError::NTokensZero); } #[test] - fn decode_error_unknown() { + fn unrecognized_code_falls_through_to_typed_variant() { let error = DecodeError::from(NonZeroI32::new(42).expect("42 is non-zero")); - assert_eq!(error, DecodeError::Unknown(42)); - assert_eq!(error.to_string(), "Decode Error 42: unknown"); + assert_eq!( + error, + DecodeError::VendoredReturnedUnrecognizedNonzeroCode { code: 42 } + ); } } diff --git a/llama-cpp-bindings/src/error/llama_context_load_error.rs b/llama-cpp-bindings/src/error/llama_context_load_error.rs index 752c88af..d2911c5e 100644 --- a/llama-cpp-bindings/src/error/llama_context_load_error.rs +++ b/llama-cpp-bindings/src/error/llama_context_load_error.rs @@ -1,7 +1,15 @@ -/// Failed to Load context -#[derive(Debug, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, thiserror::Error)] pub enum LlamaContextLoadError { - /// llama.cpp returned null - #[error("null reference from llama.cpp")] - NullReturn, + #[error("llama_rs_new_context_with_model called with null model")] + NullModelArg, + #[error("llama_rs_new_context_with_model called with null out_ctx")] + NullOutCtxArg, + #[error("llama_rs_new_context_with_model called with null out_error")] + NullOutErrorArg, + #[error("llama_rs_new_context_with_model returned null")] + VendoredReturnedNull, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("llama_rs_new_context_with_model threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/error/llama_model_load_error.rs b/llama-cpp-bindings/src/error/llama_model_load_error.rs index a7b24012..96416b88 100644 --- a/llama-cpp-bindings/src/error/llama_model_load_error.rs +++ b/llama-cpp-bindings/src/error/llama_model_load_error.rs @@ -1,19 +1,24 @@ use std::ffi::NulError; use std::path::PathBuf; -/// An error that can occur when loading a model. -#[derive(Debug, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, thiserror::Error)] pub enum LlamaModelLoadError { - /// There was a null byte in a provided string and thus it could not be converted to a C string. #[error("null byte in string {0}")] NullError(#[from] NulError), - /// llama.cpp returned a nullptr - this could be many different causes. - #[error("null result from llama cpp")] - NullResult, - /// Failed to convert the path to a rust str. This means the path was not valid unicode #[error("failed to convert path {0} to str")] PathToStrError(PathBuf), - /// The model file does not exist at the given path. #[error("model file not found: {0}")] FileNotFound(PathBuf), + #[error("llama_rs_load_model_from_file called with null path")] + NullPathArg, + #[error("llama_rs_load_model_from_file called with null out_model")] + NullOutModelArg, + #[error("llama_rs_load_model_from_file called with null out_error")] + NullOutErrorArg, + #[error("llama_rs_load_model_from_file returned null (model failed to load)")] + VendoredReturnedNull, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("llama_rs_load_model_from_file threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/error/string_to_token_error.rs b/llama-cpp-bindings/src/error/string_to_token_error.rs index dc00b484..68045f2f 100644 --- a/llama-cpp-bindings/src/error/string_to_token_error.rs +++ b/llama-cpp-bindings/src/error/string_to_token_error.rs @@ -1,12 +1,21 @@ use std::ffi::NulError; -/// Failed to convert a string to a token sequence. #[derive(Debug, thiserror::Error)] pub enum StringToTokenError { - /// the string contained a null byte and thus could not be converted to a c string. #[error("{0}")] NulError(#[from] NulError), #[error("{0}")] - /// Failed to convert a provided integer to a [`c_int`]. CIntConversionError(#[from] std::num::TryFromIntError), + #[error("llama_rs_tokenize called with null vocab")] + NullVocabArg, + #[error("llama_rs_tokenize called with null text")] + NullTextArg, + #[error("llama_rs_tokenize called with null out_returned_count")] + NullOutReturnedCountArg, + #[error("llama_rs_tokenize called with null out_error")] + NullOutErrorArg, + #[error("wrapper failed to duplicate the C++ exception message into a Rust-owned string")] + ErrorStringAllocationFailed, + #[error("llama_rs_tokenize threw a C++ exception: {message}")] + VendoredThrewCxxException { message: String }, } diff --git a/llama-cpp-bindings/src/model.rs b/llama-cpp-bindings/src/model.rs index 91011890..71acb711 100644 --- a/llama-cpp-bindings/src/model.rs +++ b/llama-cpp-bindings/src/model.rs @@ -218,35 +218,29 @@ impl LlamaModel { let (c_string, c_string_len) = cstring_with_validated_len(str)?; let buffer_capacity = c_int::try_from(buffer.capacity())?; - let size = unsafe { - llama_cpp_bindings_sys::llama_tokenize( + let size = invoke_rs_tokenize( + self.vocab_ptr(), + c_string.as_ptr(), + c_string_len, + buffer + .as_mut_ptr() + .cast::(), + buffer_capacity, + add_bos, + )?; + + let size = if size.is_negative() { + buffer.reserve_exact(usize::try_from(-size)?); + invoke_rs_tokenize( self.vocab_ptr(), c_string.as_ptr(), c_string_len, buffer .as_mut_ptr() .cast::(), - buffer_capacity, + -size, add_bos, - true, - ) - }; - - let size = if size.is_negative() { - buffer.reserve_exact(usize::try_from(-size)?); - unsafe { - llama_cpp_bindings_sys::llama_tokenize( - self.vocab_ptr(), - c_string.as_ptr(), - c_string_len, - buffer - .as_mut_ptr() - .cast::(), - -size, - add_bos, - true, - ) - } + )? } else { size }; @@ -577,22 +571,52 @@ impl LlamaModel { } let cstr = CString::new(path_str)?; - let llama_model = unsafe { - llama_cpp_bindings_sys::llama_load_model_from_file(cstr.as_ptr(), params.params) + let mut out_model: *mut llama_cpp_bindings_sys::llama_model = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_load_model_from_file( + cstr.as_ptr(), + params.params, + &raw mut out_model, + &raw mut out_error, + ) }; - - let model = match NonNull::new(llama_model) { - Some(ptr) => ptr, - None if !path.exists() => { - return Err(LlamaModelLoadError::FileNotFound(path.to_path_buf())); + match status { + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_OK => { + let model = NonNull::new(out_model) + .ok_or(LlamaModelLoadError::VendoredReturnedNull)?; + Ok(Self { + model, + tok_env: OnceLock::new(), + }) } - None => return Err(LlamaModelLoadError::NullResult), - }; - - Ok(Self { - model, - tok_env: OnceLock::new(), - }) + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_PATH_ARG => { + Err(LlamaModelLoadError::NullPathArg) + } + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_MODEL_ARG => { + Err(LlamaModelLoadError::NullOutModelArg) + } + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_ERROR_ARG => { + Err(LlamaModelLoadError::NullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL => { + if path.exists() { + Err(LlamaModelLoadError::VendoredReturnedNull) + } else { + Err(LlamaModelLoadError::FileNotFound(path.to_path_buf())) + } + } + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED => { + Err(LlamaModelLoadError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(LlamaModelLoadError::VendoredThrewCxxException { message }) + } + other => unreachable!( + "llama_rs_load_model_from_file returned unrecognized status {other}" + ), + } } /// Initializes a lora adapter from a file. @@ -1592,6 +1616,54 @@ fn read_optional_owned_cstr(ptr: *const c_char) -> Result, Marker Ok(Some(String::from_utf8(bytes)?)) } +fn invoke_rs_tokenize( + vocab: *const llama_cpp_bindings_sys::llama_vocab, + text: *const c_char, + text_len: c_int, + tokens: *mut llama_cpp_bindings_sys::llama_token, + n_tokens_max: c_int, + add_bos: bool, +) -> Result { + let mut out_count: i32 = 0; + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_tokenize( + vocab, + text, + text_len, + tokens, + n_tokens_max, + add_bos, + true, + &raw mut out_count, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_OK => Ok(out_count), + llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_NULL_VOCAB_ARG => { + Err(StringToTokenError::NullVocabArg) + } + llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_NULL_TEXT_ARG => { + Err(StringToTokenError::NullTextArg) + } + llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_NULL_OUT_RETURNED_COUNT_ARG => { + Err(StringToTokenError::NullOutReturnedCountArg) + } + llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_NULL_OUT_ERROR_ARG => { + Err(StringToTokenError::NullOutErrorArg) + } + llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED => { + Err(StringToTokenError::ErrorStringAllocationFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(StringToTokenError::VendoredThrewCxxException { message }) + } + other => unreachable!("llama_rs_tokenize returned unrecognized status {other}"), + } +} + fn collect_optional_cstr_pair( first_ptr: *const c_char, second_ptr: *const c_char, diff --git a/llama-cpp-bindings/src/token/data_array.rs b/llama-cpp-bindings/src/token/data_array.rs index af2134df..ec3afbf2 100644 --- a/llama-cpp-bindings/src/token/data_array.rs +++ b/llama-cpp-bindings/src/token/data_array.rs @@ -125,14 +125,49 @@ impl LlamaTokenDataArray { result } - /// Modifies the data array by applying a sampler to it + /// Modifies the data array by applying a sampler to it. + /// + /// # Panics + /// + /// Panics if the vendored sampler throws a C++ exception. `llama_sampler_apply` is + /// documented to be a pure logit transform and is not expected to throw; if it does + /// the failure is propagated as a panic per the crash-fast invariant. pub fn apply_sampler(&mut self, sampler: &LlamaSampler) { unsafe { self.modify_as_c_llama_token_data_array(|c_llama_token_data_array| { - llama_cpp_bindings_sys::llama_sampler_apply( + let mut out_error: *mut std::os::raw::c_char = ptr::null_mut(); + let status = llama_cpp_bindings_sys::llama_rs_sampler_apply( sampler.sampler, c_llama_token_data_array, + &raw mut out_error, ); + match status { + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_OK => {} + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_NULL_SAMPLER_ARG => { + panic!("llama_rs_sampler_apply received null sampler pointer") + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_NULL_DATA_ARRAY_ARG => { + panic!("llama_rs_sampler_apply received null data array pointer") + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_NULL_OUT_ERROR_ARG => { + panic!( + "llama_rs_sampler_apply reported null out_error despite valid Rust pointer" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED => { + panic!( + "llama_rs_sampler_apply could not allocate a Rust-owned copy of the C++ exception message" + ) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION => { + let message = + crate::ffi_error_reader::read_and_free_cpp_error(out_error); + panic!("llama_rs_sampler_apply threw a C++ exception: {message}"); + } + other => unreachable!( + "llama_rs_sampler_apply returned unrecognized status {other}" + ), + } }); } } From 38cfdf75f51f057ce001938d7b14ab9962acabc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 16:58:01 +0200 Subject: [PATCH 14/16] apply cargo fmt to phase 1 wrapper refactor --- .../tests/context_kv_cache.rs | 2 +- llama-cpp-bindings/src/context.rs | 10 ++++--- .../src/error/parse_chat_message_error.rs | 12 ++++++--- .../src/json_schema_to_grammar.rs | 6 ++--- llama-cpp-bindings/src/model.rs | 26 ++++++++++++------- llama-cpp-bindings/src/mtmd/mtmd_context.rs | 13 +++++----- .../src/mtmd/mtmd_eval_error.rs | 5 +--- .../src/mtmd/mtmd_tokenize_error.rs | 4 ++- 8 files changed, 46 insertions(+), 32 deletions(-) diff --git a/llama-cpp-bindings-tests/tests/context_kv_cache.rs b/llama-cpp-bindings-tests/tests/context_kv_cache.rs index e8abb54b..39ee2714 100644 --- a/llama-cpp-bindings-tests/tests/context_kv_cache.rs +++ b/llama-cpp-bindings-tests/tests/context_kv_cache.rs @@ -4,8 +4,8 @@ use std::num::NonZeroU32; use anyhow::Result; use llama_cpp_bindings::context::LlamaContext; use llama_cpp_bindings::context::kv_cache::KvCacheConversionError; -use llama_cpp_bindings::error::{KvCacheSeqAddError, KvCacheSeqDivError}; use llama_cpp_bindings::context::params::LlamaContextParams; +use llama_cpp_bindings::error::{KvCacheSeqAddError, KvCacheSeqDivError}; use llama_cpp_bindings::llama_batch::LlamaBatch; use llama_cpp_bindings::model::AddBos; use llama_cpp_bindings_tests::FixtureSession; diff --git a/llama-cpp-bindings/src/context.rs b/llama-cpp-bindings/src/context.rs index a01aa6df..a58e1039 100644 --- a/llama-cpp-bindings/src/context.rs +++ b/llama-cpp-bindings/src/context.rs @@ -268,7 +268,8 @@ impl<'model> LlamaContext<'model> { Err(DecodeError::ErrorStringAllocationFailed) } llama_cpp_bindings_sys::LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION => { - let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; Err(DecodeError::VendoredThrewCxxException { message }) } other => unreachable!("llama_rs_decode returned unrecognized status {other}"), @@ -297,7 +298,9 @@ impl<'model> LlamaContext<'model> { .clone_from(&batch.initialized_logits); Ok(()) } - llama_cpp_bindings_sys::LLAMA_RS_ENCODE_NULL_CTX_ARG => Err(EncodeError::NullContextArg), + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_NULL_CTX_ARG => { + Err(EncodeError::NullContextArg) + } llama_cpp_bindings_sys::LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER => { Err(EncodeError::ModelHasNoEncoder) } @@ -313,7 +316,8 @@ impl<'model> LlamaContext<'model> { Err(EncodeError::ErrorStringAllocationFailed) } llama_cpp_bindings_sys::LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION => { - let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; Err(EncodeError::VendoredThrewCxxException { message }) } other => unreachable!("llama_rs_encode returned unrecognized status {other}"), diff --git a/llama-cpp-bindings/src/error/parse_chat_message_error.rs b/llama-cpp-bindings/src/error/parse_chat_message_error.rs index cc1ccc06..9359c133 100644 --- a/llama-cpp-bindings/src/error/parse_chat_message_error.rs +++ b/llama-cpp-bindings/src/error/parse_chat_message_error.rs @@ -28,7 +28,9 @@ pub enum ParseChatMessageError { ToolCallCountNullHandleArg, #[error("llama_rs_parsed_chat_tool_call_count threw a C++ exception: {message}")] ToolCallCountThrewCxxException { message: String }, - #[error("llama_rs_parsed_chat_tool_call_count wrapper failed to duplicate the C++ exception string")] + #[error( + "llama_rs_parsed_chat_tool_call_count wrapper failed to duplicate the C++ exception string" + )] ToolCallCountErrorStringAllocationFailed, #[error("llama_rs_parsed_chat_tool_call_id called with null handle")] ToolCallIdNullHandleArg, @@ -36,7 +38,9 @@ pub enum ParseChatMessageError { ToolCallIdIndexOutOfBounds { index: usize }, #[error("llama_rs_parsed_chat_tool_call_id threw a C++ exception: {message}")] ToolCallIdThrewCxxException { message: String }, - #[error("llama_rs_parsed_chat_tool_call_id wrapper failed to duplicate the C++ exception string")] + #[error( + "llama_rs_parsed_chat_tool_call_id wrapper failed to duplicate the C++ exception string" + )] ToolCallIdErrorStringAllocationFailed, #[error("llama_rs_parsed_chat_tool_call_name called with null handle")] ToolCallNameNullHandleArg, @@ -44,7 +48,9 @@ pub enum ParseChatMessageError { ToolCallNameIndexOutOfBounds { index: usize }, #[error("llama_rs_parsed_chat_tool_call_name threw a C++ exception: {message}")] ToolCallNameThrewCxxException { message: String }, - #[error("llama_rs_parsed_chat_tool_call_name wrapper failed to duplicate the C++ exception string")] + #[error( + "llama_rs_parsed_chat_tool_call_name wrapper failed to duplicate the C++ exception string" + )] ToolCallNameErrorStringAllocationFailed, #[error("llama_rs_parsed_chat_tool_call_arguments called with null handle")] ToolCallArgumentsNullHandleArg, diff --git a/llama-cpp-bindings/src/json_schema_to_grammar.rs b/llama-cpp-bindings/src/json_schema_to_grammar.rs index d9b29cf8..eda70c49 100644 --- a/llama-cpp-bindings/src/json_schema_to_grammar.rs +++ b/llama-cpp-bindings/src/json_schema_to_grammar.rs @@ -49,9 +49,9 @@ pub fn json_schema_to_grammar(schema_json: &str) -> Result unreachable!( - "llama_rs_json_schema_to_grammar returned unrecognized status {other}" - ), + other => { + unreachable!("llama_rs_json_schema_to_grammar returned unrecognized status {other}") + } } } diff --git a/llama-cpp-bindings/src/model.rs b/llama-cpp-bindings/src/model.rs index 71acb711..9b6736d1 100644 --- a/llama-cpp-bindings/src/model.rs +++ b/llama-cpp-bindings/src/model.rs @@ -1007,7 +1007,9 @@ impl LlamaModel { out_error = ptr::null_mut(); Err(ParseChatMessageError::ParseException { message }) } - other => unreachable!("llama_rs_parse_chat_message returned unrecognized status {other}"), + other => { + unreachable!("llama_rs_parse_chat_message returned unrecognized status {other}") + } }; let mut free_error: *mut c_char = ptr::null_mut(); @@ -1019,14 +1021,19 @@ impl LlamaModel { unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; Ok(value) } - (Ok(_), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION) => { + ( + Ok(_), + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION, + ) => { unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; - let message = unsafe { - crate::ffi_error_reader::read_and_free_cpp_error(free_error) - }; + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(free_error) }; Err(ParseChatMessageError::FreeDestructorThrewCxxException { message }) } - (Ok(_), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED) => { + ( + Ok(_), + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED, + ) => { unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; Err(ParseChatMessageError::FreeErrorStringAllocationFailed) } @@ -1055,7 +1062,8 @@ impl LlamaModel { pub fn diagnose_tool_call_synthetic_renders( &self, ) -> Result<(String, String), MarkerDetectionError> { - let (no_tools, with_tools) = invoke_diagnose_tool_call_synthetic_renders(self.model.as_ptr())?; + let (no_tools, with_tools) = + invoke_diagnose_tool_call_synthetic_renders(self.model.as_ptr())?; Ok((no_tools.unwrap_or_default(), with_tools.unwrap_or_default())) } @@ -1168,8 +1176,7 @@ fn read_parsed_chat_content( Err(ParseChatMessageError::ContentErrorStringAllocationFailed) } llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION => { - let message = - unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; Err(ParseChatMessageError::ContentThrewCxxException { message }) } other => unreachable!("llama_rs_parsed_chat_content returned unrecognized status {other}"), @@ -1804,4 +1811,3 @@ mod extract_meta_string_tests { assert!(result.is_err()); } } - diff --git a/llama-cpp-bindings/src/mtmd/mtmd_context.rs b/llama-cpp-bindings/src/mtmd/mtmd_context.rs index d8952401..9ce51c4c 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_context.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_context.rs @@ -126,11 +126,10 @@ impl MtmdContext { match status { llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_OK => { - let context = NonNull::new(out_ctx).ok_or_else(|| { - MtmdInitError::VendoredReturnedNull { + let context = + NonNull::new(out_ctx).ok_or_else(|| MtmdInitError::VendoredReturnedNull { path: std::path::PathBuf::from(mmproj_path), - } - })?; + })?; Ok(Self { context }) } llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG => { @@ -154,9 +153,9 @@ impl MtmdContext { let message = unsafe { read_and_free_cpp_error(out_error) }; Err(MtmdInitError::VendoredThrewCxxException { message }) } - other => unreachable!( - "llama_rs_mtmd_init_from_file returned unrecognized status: {other}" - ), + other => { + unreachable!("llama_rs_mtmd_init_from_file returned unrecognized status: {other}") + } } } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs index 40431fc0..fdf46896 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs @@ -3,10 +3,7 @@ use crate::mtmd::image_chunk_batch_size_mismatch::ImageChunkBatchSizeMismatch; #[derive(thiserror::Error, Debug)] pub enum MtmdEvalError { #[error("batch size {requested} exceeds context batch size {context_max}")] - BatchSizeExceedsContextLimit { - requested: i32, - context_max: u32, - }, + BatchSizeExceedsContextLimit { requested: i32, context_max: u32 }, #[error( "image chunk has {} tokens but n_batch is {}", .0.image_tokens, diff --git a/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs index da502243..4f9f8c4c 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs @@ -14,7 +14,9 @@ pub enum MtmdTokenizeError { NullTextArg, #[error("Wrapper received a null bitmaps argument with num_bitmaps > 0")] NullBitmapsArgWhenNumBitmapsNonzero, - #[error("mtmd_tokenize reported that the number of bitmaps does not match the number of markers in the text")] + #[error( + "mtmd_tokenize reported that the number of bitmaps does not match the number of markers in the text" + )] BitmapCountDoesNotMatchMarkerCount, #[error("mtmd_tokenize reported an image preprocessing error")] ImagePreprocessingError, From 5e4b7c5edc6fb4ad1d1cc2948acb387bee6eb5bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 17:12:34 +0200 Subject: [PATCH 15/16] use per-wrapper LLAMA_RS_SAMPLER_ACCEPT_OK constant in sampler accept test --- llama-cpp-bindings/src/sampling.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama-cpp-bindings/src/sampling.rs b/llama-cpp-bindings/src/sampling.rs index 4c63980c..596d1137 100644 --- a/llama-cpp-bindings/src/sampling.rs +++ b/llama-cpp-bindings/src/sampling.rs @@ -1004,7 +1004,7 @@ mod tests { #[test] fn check_sampler_accept_status_ok() { let result = super::check_sampler_accept_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK, + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_OK, std::ptr::null_mut(), ); From f5d0272627e0f669c91614b7f42aa41ad291f12e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Zagajewska?= Date: Sat, 16 May 2026 17:39:06 +0200 Subject: [PATCH 16/16] build llm tests in release mode to avoid windows debug-crt _osfile assertions --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f6830ed4..f92be572 100644 --- a/Makefile +++ b/Makefile @@ -5,8 +5,8 @@ DEVICE_FEATURE = $(if $(TEST_DEVICE),--features $(TEST_DEVICE),) LLM_BASE_FEATURE_FLAGS = $(DEVICE_FEATURE) LLM_QWEN_CAPABLE_FEATURE_FLAGS = $(DEVICE_FEATURE) --features $(QWEN_CAPABLE_FEATURES) -CARGO_TEST_LLM_FLAGS = --no-fail-fast -p llama-cpp-bindings-tests $(LLM_BASE_FEATURE_FLAGS) -- --test-threads=1 -CARGO_TEST_LLM_FLAGS_QWEN_CAPABLE = --no-fail-fast -p llama-cpp-bindings-tests $(LLM_QWEN_CAPABLE_FEATURE_FLAGS) -- --test-threads=1 +CARGO_TEST_LLM_FLAGS = --release --no-fail-fast -p llama-cpp-bindings-tests $(LLM_BASE_FEATURE_FLAGS) -- --test-threads=1 +CARGO_TEST_LLM_FLAGS_QWEN_CAPABLE = --release --no-fail-fast -p llama-cpp-bindings-tests $(LLM_QWEN_CAPABLE_FEATURE_FLAGS) -- --test-threads=1 QWEN3_5_0_8B_ENV = \ LLAMA_TEST_HF_REPO=unsloth/Qwen3.5-0.8B-GGUF \