diff --git a/ggml/src/ggml-openvino/CMakeLists.txt b/ggml/src/ggml-openvino/CMakeLists.txt index 175b585661d..3753b1b969c 100644 --- a/ggml/src/ggml-openvino/CMakeLists.txt +++ b/ggml/src/ggml-openvino/CMakeLists.txt @@ -11,7 +11,10 @@ ggml_add_backend_library(ggml-openvino ${GGML_HEADERS_OPENVINO} ) -target_link_libraries(ggml-openvino PRIVATE openvino::runtime TBB::tbb OpenCL::OpenCL) +target_link_libraries(ggml-openvino + PRIVATE openvino::runtime TBB::tbb + PUBLIC OpenCL::OpenCL +) if (GGML_OPENVINO) if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index 0938d2273e9..131ae723ae6 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -843,15 +843,30 @@ ov::element::Type GgmlOvDecoder::get_ov_type(const ggml_tensor * tensor) { } ov::PartialShape GgmlOvDecoder::get_input_shape(int node_idx, const std::string & name) const { - return ov::PartialShape(get_shape(m_node_info_list[node_idx].node_inputs.at(name))); + auto& inputs = m_node_info_list[node_idx].node_inputs; + if (inputs.find(name) == inputs.end()) { + std::cerr << "\nCRASH CAUGHT: get_input_shape missing key: '" << name << "' on node " << node_idx << "\n"; + return ov::PartialShape::dynamic(); + } + return ov::PartialShape(get_shape(inputs.at(name))); } std::vector GgmlOvDecoder::get_input_stride(int node_idx, const std::string & name) const { - return get_stride(m_node_info_list[node_idx].node_inputs.at(name)); + auto& inputs = m_node_info_list[node_idx].node_inputs; + if (inputs.find(name) == inputs.end()) { + std::cerr << "\nCRASH CAUGHT: get_input_stride missing key: '" << name << "' on node " << node_idx << "\n"; + return {}; + } + return get_stride(inputs.at(name)); } ov::element::Type GgmlOvDecoder::get_input_type(int node_idx, const std::string & name) const { - return get_ov_type(m_node_info_list[node_idx].node_inputs.at(name)); + auto& inputs = m_node_info_list[node_idx].node_inputs; + if (inputs.find(name) == inputs.end()) { + std::cerr << "\nCRASH CAUGHT: get_input_type missing key: '" << name << "' on node " << node_idx << "\n"; + return ov::element::dynamic; + } + return get_ov_type(inputs.at(name)); } size_t GgmlOvDecoder::get_input_size() const { @@ -879,6 +894,28 @@ std::vector GgmlOvDecoder::get_output_names(int node_idx) const { return {m_node_info_list[node_idx].node_output_name}; } +// OUR NEW IMPLEMENTATIONS +std::vector GgmlOvDecoder::get_input_tensors(int node_idx) const { + const auto& info = m_node_info_list[node_idx]; + std::vector input_tensors; + input_tensors.reserve(info.node_inputs_names.size()); + + for (const auto& name : info.node_inputs_names) { + // Safely get the pointer mapped by OpenVINO's strict port names + if (info.node_inputs.find(name) != info.node_inputs.end()) { + input_tensors.push_back(info.node_inputs.at(name)); + } else { + input_tensors.push_back(nullptr); // Fallback flag + } + } + return input_tensors; +} + +std::vector GgmlOvDecoder::get_output_tensors(int node_idx) const { + // The output is simple: just return the raw node_output pointer wrapped in a vector. + return {m_node_info_list[node_idx].node_output}; +} + const std::string & GgmlOvDecoder::get_op_name() const { static const std::string unknown_name = "UNKNOWN_OP_NAME"; return unknown_name; @@ -889,7 +926,12 @@ const std::string & GgmlOvDecoder::get_op_name(int node_idx) const { } int32_t * GgmlOvDecoder::get_input_op_params(int node_idx, const std::string & name) const { - return m_node_info_list[node_idx].node_inputs.at(name)->op_params; + auto& inputs = m_node_info_list[node_idx].node_inputs; + if (inputs.find(name) == inputs.end()) { + std::cerr << "\nCRASH CAUGHT: get_input_op_params missing key: '" << name << "' on node " << node_idx << "\n"; + return nullptr; + } + return inputs.at(name)->op_params; } int32_t * GgmlOvDecoder::get_output_op_params(int node_idx) const { @@ -954,13 +996,30 @@ std::string GgmlOvDecoder::compute_op_type(const ggml_tensor * node) { }; switch (node->op) { - case GGML_OP_UNARY: - return unary_ops.at(ggml_get_unary_op(node)); - case GGML_OP_GLU: - return glu_ops.at(ggml_get_glu_op(node)); - default: + case GGML_OP_UNARY: { + auto uop = ggml_get_unary_op(node); + if (unary_ops.find(uop) == unary_ops.end()) { + std::cerr << "\n[GgmlOvDecoder] MISSING UNARY OP: " << uop << " (Node: " << node->name << ")\n"; + return "UNKNOWN_GGML_OP"; + } + return unary_ops.at(uop); + } + case GGML_OP_GLU: { + auto gop = ggml_get_glu_op(node); + if (glu_ops.find(gop) == glu_ops.end()) { + std::cerr << "\n[GgmlOvDecoder] MISSING GLU OP: " << gop << " (Node: " << node->name << ")\n"; + return "UNKNOWN_GGML_OP"; + } + return glu_ops.at(gop); + } + default: { + if (ops.find(node->op) == ops.end()) { + std::cerr << "\n[GgmlOvDecoder] CRASH PREVENTED: Missing GGML OP Code: " << node->op << " (Node: " << node->name << ")\n"; + return "UNKNOWN_GGML_OP"; + } return ops.at(node->op); } + } static const std::string unknown_op = "UNKNOWN_GGML_OP"; return unknown_op; } diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index 3ae25ddda32..7c999e22040 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -102,6 +102,8 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { virtual std::vector get_input_names(int node_idx) const override; + virtual std::vector get_input_tensors(int node_idx) const override; + virtual ov::PartialShape get_output_shape(int node_idx) const override; virtual ov::element::Type get_output_type(int node_idx) const override; @@ -112,6 +114,8 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { virtual std::vector get_output_names(int node_idx) const override; + virtual std::vector get_output_tensors(int node_idx) const override; + virtual const std::string & get_op_type() const override; virtual const std::string & get_op_type(int node_idx) const override; diff --git a/ggml/src/ggml-openvino/ggml-openvino-extra.h b/ggml/src/ggml-openvino/ggml-openvino-extra.h index cd0baf4a681..64991f0a950 100644 --- a/ggml/src/ggml-openvino/ggml-openvino-extra.h +++ b/ggml/src/ggml-openvino/ggml-openvino-extra.h @@ -66,6 +66,9 @@ struct ggml_openvino_device_config { ov::AnyMap compile_config; cl_command_queue cl_queue = nullptr; + bool is_capturing = false; + struct ggml_cgraph * captured_graph = nullptr; + void init(); ~ggml_openvino_device_config(); }; @@ -178,5 +181,22 @@ struct ggml_backend_openvino_context { std::shared_ptr runtime_context = nullptr; + bool is_capturing = false; + struct ggml_cgraph * captured_graph = nullptr; + ggml_backend_openvino_context() = default; }; + +#ifdef __cplusplus +extern "C" { +#endif + +void ggml_backend_ov_set_capture_mode(bool enable); +struct ggml_cgraph * ggml_backend_ov_get_captured_graph(); + +// maths bypass (temporary) +void ggml_backend_ov_set_bypass(bool bypass); + +#ifdef __cplusplus +} +#endif diff --git a/ggml/src/ggml-openvino/ggml-openvino.cpp b/ggml/src/ggml-openvino/ggml-openvino.cpp index 0031cb7369f..ad22c41f4b6 100644 --- a/ggml/src/ggml-openvino/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino/ggml-openvino.cpp @@ -606,8 +606,18 @@ static const char * ggml_backend_openvino_get_name(ggml_backend_t backend) { } static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { + // 1. Grab our internal context + auto& config = ggml_openvino_get_device_config(); + + // 2. --- OPENVINO GENAI TRACER INTERCEPTION --- + if (config.is_capturing) { + config.captured_graph = cgraph; + return GGML_STATUS_SUCCESS; + } + // --------------------------------------------- + + // 3. Normal execution path (if we are NOT capturing) return ov_graph_compute(cgraph, backend); - GGML_UNUSED(backend); } static const ggml_backend_i ggml_backend_openvino_interface = { @@ -732,6 +742,11 @@ static void ggml_backend_openvino_device_get_props(ggml_backend_dev_t dev, ggml_ /* .buffer_from_host_ptr = */ false, /* .events = */ false, }; + + if (ggml_openvino_get_device_config().is_capturing) { + props->caps.host_buffer = true; + props->caps.buffer_from_host_ptr = true; + } } static ggml_backend_t ggml_backend_openvino_device_init(ggml_backend_dev_t dev, const char * params) { @@ -922,9 +937,19 @@ static bool is_op_unsupported_case(const ggml_tensor * op) { return false; } +extern bool g_ov_bypass_mode; + static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) { GGML_ASSERT(dev->reg != nullptr); + if (g_ov_bypass_mode) { + return false; + } + + if (ggml_openvino_get_device_config().is_capturing) { + return true; + } + static std::set supported_types{GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_I64, GGML_TYPE_I32, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_K, GGML_TYPE_Q5_K, GGML_TYPE_Q8_0, GGML_TYPE_Q6_K}; @@ -1017,6 +1042,10 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con } static bool ggml_backend_openvino_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { + + if (ggml_openvino_get_device_config().is_capturing) { + return true; + } return ggml_backend_buft_is_openvino(buft) || ggml_backend_buft_is_host(buft); GGML_UNUSED(dev); } @@ -1108,3 +1137,23 @@ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_openvino_reg(void) { return ® } + +void ggml_backend_ov_set_capture_mode(bool enable) { + auto& config = ggml_openvino_get_device_config(); + config.is_capturing = enable; + if (enable) { + config.captured_graph = nullptr; + } +} + +struct ggml_cgraph * ggml_backend_ov_get_captured_graph() { + return ggml_openvino_get_device_config().captured_graph; +} + +// phase-1 temporary bypass system for ov::model verification + +bool g_ov_bypass_mode = false; // The global kill switch + +void ggml_backend_ov_set_bypass(bool bypass) { + g_ov_bypass_mode = bypass; +} diff --git a/ggml/src/ggml-openvino/openvino/decoder.h b/ggml/src/ggml-openvino/openvino/decoder.h index 3b8da2be5d2..204028a2638 100644 --- a/ggml/src/ggml-openvino/openvino/decoder.h +++ b/ggml/src/ggml-openvino/openvino/decoder.h @@ -6,6 +6,8 @@ #include #include +struct ggml_tensor; + namespace ov { namespace frontend { namespace ggml { @@ -33,6 +35,8 @@ class GgmlDecoder : public DecoderBase { virtual PartialShape get_output_shape(int node_idx) const = 0; + virtual std::vector get_input_tensors(int node_idx) const = 0; + virtual element::Type get_output_type(const int node_idx) const = 0; virtual int32_t* get_input_op_params(int node_idx, const std::string& name) const = 0; @@ -41,6 +45,8 @@ class GgmlDecoder : public DecoderBase { virtual std::vector get_output_names(int node_idx) const = 0; + virtual std::vector get_output_tensors(int node_idx) const = 0; + virtual const std::string& get_op_type() const = 0; virtual const std::string& get_op_type(int node_idx) const = 0; diff --git a/ggml/src/ggml-openvino/openvino/node_context.h b/ggml/src/ggml-openvino/openvino/node_context.h index aa484128a95..58c67c427ad 100644 --- a/ggml/src/ggml-openvino/openvino/node_context.h +++ b/ggml/src/ggml-openvino/openvino/node_context.h @@ -3,8 +3,11 @@ #include #include #include - +#include #include "decoder.h" +#include "ggml.h" + +struct ggml_tensor; namespace ov { namespace frontend { @@ -13,20 +16,26 @@ namespace ggml { class TranslateSession; typedef std::map> TensorMap; +typedef std::map> TensorPtrMap; class NodeContext : public frontend::NodeContext { public: NodeContext(const std::shared_ptr& decoder, std::shared_ptr& tensor_map, + std::shared_ptr& tensor_ptr_map, int node_idx, TranslateSession* translate_session = nullptr) : ov::frontend::NodeContext(decoder->get_op_type(node_idx)), m_decoder(decoder), m_tensor_map(tensor_map), + m_tensor_ptr_map(tensor_ptr_map), m_node_idx(node_idx), m_translate_session(translate_session) { m_input_names = decoder->get_input_names(m_node_idx); m_output_names = decoder->get_output_names(m_node_idx); + + m_input_tensors = decoder->get_input_tensors(m_node_idx); + m_output_tensors = decoder->get_output_tensors(m_node_idx); } TranslateSession* get_translate_session() const { @@ -66,7 +75,50 @@ class NodeContext : public frontend::NodeContext { } Output get_input(int idx) const override { - return m_tensor_map->at(m_input_names[idx]); + // 1. Safely check the pointer map first (Physical Memory Address) + if (idx < m_input_tensors.size() && m_input_tensors[idx] != nullptr) { + auto it = m_tensor_ptr_map->find(m_input_tensors[idx]); + if (it != m_tensor_ptr_map->end()) { + // PROOF IT WORKS: + // std::cout << "[DEBUG] Tensor found perfectly via Pointer Map!\n"; + return it->second; // Found it via exact pointer! + } + } + + // 2. Fallback to the string map (For OpenVINO synthetic tensors & static weights) + if (idx < m_input_names.size()) { + std::string target_name = m_input_names[idx]; + + auto it = m_tensor_map->find(target_name); + if (it != m_tensor_map->end()) { + return it->second; // Found it via string name! + } + + // Temporary fallback: Brute-Force Pointer Search + // If the pointer mutated due to in-place optimization, scan all translated physical nodes! + for (const auto& pair : *m_tensor_ptr_map) { + if (pair.first != nullptr) { + std::string actual_name = ggml_get_name(pair.first); + + // IF WE ARE LOOKING FOR NORM-21, PRINT EVERYTHING WE HAVE! + if (target_name == "norm-21" || target_name == "ffn_inp-21") { + std::cout << "[DEBUG TRAP] In memory pointer name: '" << actual_name << "'\n"; + } + + if (actual_name == target_name) { + std::cerr << "[GGUFReaderV2] Recovered shifted tensor via brute-force: '" << target_name << "'\n"; + return pair.second; + } + } + } + + // 🚨 THE GSOC FIX: NO MORE DUMMY NODES! 🚨 + // If we get here, the node is TRULY missing. We throw a hard error + // so we know if our Scheduler Capture Override worked or failed. + throw std::runtime_error("[GGUFReaderV2] FATAL: Tensor completely lost during extraction: '" + target_name + "'"); + } + + throw std::runtime_error("CRITICAL: Input index out of bounds!"); } Output get_input(const std::string& name) const override { @@ -99,10 +151,13 @@ class NodeContext : public frontend::NodeContext { private: std::shared_ptr m_decoder; std::shared_ptr& m_tensor_map; + std::shared_ptr& m_tensor_ptr_map; int m_node_idx; TranslateSession* m_translate_session; std::vector m_input_names; std::vector m_output_names; + std::vector m_input_tensors; + std::vector m_output_tensors; }; using CreatorFunction = std::function; diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp index 23a1dea2496..2d2253d896c 100644 --- a/ggml/src/ggml-openvino/openvino/translate_session.cpp +++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp @@ -168,6 +168,7 @@ std::shared_ptr TranslateSession::translate_graph(const frontend::InputMo ov::ParameterVector params; ov::ResultVector results; auto tensor_map = std::make_shared(); + auto tensor_ptr_map = std::make_shared(); std::shared_ptr resulting_model; const auto & ggml_model = std::dynamic_pointer_cast(input_model); @@ -191,7 +192,15 @@ std::shared_ptr TranslateSession::translate_graph(const frontend::InputMo auto node_visitor = [&](std::shared_ptr decoder, int node_idx) { auto operation_type = decoder->get_op_type(node_idx); + + // TRAP 1: Is OpenVINO throwing our tensor in the trash? if (operation_type == "GGML_OP_NONE") { + const auto & skipped_names = decoder->get_output_names(node_idx); + for (const auto& name : skipped_names) { + if (name == "norm-21" || name == "ffn_inp-21") { + std::cerr << "\nCAUGHT THE BUG! '" << name << "' is a GGML_OP_NONE and OpenVINO skipped it!\n"; + } + } return; } @@ -199,18 +208,27 @@ std::shared_ptr TranslateSession::translate_graph(const frontend::InputMo auto it = m_translator_map.find(operation_type); FRONT_END_OP_CONVERSION_CHECK(it != m_translator_map.end(), "Translation for operation type ", operation_type, " is not implemented."); - NodeContext node_context(decoder, tensor_map, node_idx, this); + NodeContext node_context(decoder, tensor_map, tensor_ptr_map, node_idx, this); converted_outputs = it->second(node_context); const auto & node_output_names = decoder->get_output_names(node_idx); + const auto & node_output_tensors = decoder->get_output_tensors(node_idx); FRONT_END_OP_CONVERSION_CHECK(node_output_names.size() == converted_outputs.size(), "Number of ", operation_type, " outputs greater than number of converted outputs, which are ", node_output_names.size(), " and ", converted_outputs.size(), " respectively."); for (size_t i = 0; i < node_output_names.size(); ++i) { auto output_name = node_output_names[i]; + auto output_tensor = node_output_tensors[i]; // Match the pointer + if (i < converted_outputs.size() && converted_outputs[i].get_node_shared_ptr() != nullptr) { - (*tensor_map)[output_name] = converted_outputs[i]; + // Save to the original string map (keeps OpenVINO synthetic nodes satisfied) + (*tensor_map)[output_name] = converted_outputs[i]; + + // Save to our new pointer map (makes it physically impossible to overwrite unnamed tensors) + if (output_tensor != nullptr) { + (*tensor_ptr_map)[output_tensor] = converted_outputs[i]; + } } } };