Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2339edc
Thread safety per request only
cavusmustafa Mar 17, 2026
94df04c
Fix ROPE yarn case
wine99 Mar 24, 2026
2dcbe2f
Fix sticky stateful config
wine99 Mar 19, 2026
c21ed57
Use i4/i8 directly for symmetric quant
wine99 Mar 19, 2026
e804594
Use weightless caching
wine99 Mar 19, 2026
7b8c6f5
Add WeightlessCacheAttribute to reduce NPU memory usage
wine99 Mar 19, 2026
77bd354
Gelu tanh support (#125)
cavusmustafa Apr 9, 2026
5957f77
Imrope support (#126)
cavusmustafa Apr 9, 2026
185bfcc
Add interface is_model_splitted() to check the c-graph is splited or not
zhaixuejun1993 Mar 6, 2026
1f25490
Infer and propagate dynamic-dimension indices for all tensors in the …
zhaixuejun1993 Mar 17, 2026
6ae864f
Only do this for fallback sub graph
zhaixuejun1993 Mar 19, 2026
3562f7c
Move dynamic dims compute in graph missmatch
zhaixuejun1993 Mar 23, 2026
fe01725
ggml-openvino: fix tensor data handling for PERMUTE/VIEW ops in split…
zhaixuejun1993 Mar 19, 2026
ce0e4e9
ggml-openvino:add comments
zhaixuejun1993 Mar 19, 2026
91a0eda
ggml-openvino: override VIEW op_case to 0 for split model inputs
zhaixuejun1993 Mar 19, 2026
c670633
openvino backend: Handle unsupported VIEW shape-mismatch in OpenVINO …
zhaixuejun1993 Mar 19, 2026
8e17919
Enable additional mul_mat tests and add tensor data saving function (…
zhaixuejun1993 Mar 23, 2026
8c5ca60
ggml-openvino: fix CONT/TRANSPOSE mapping and improve dynamic-dimensi…
zhaixuejun1993 Mar 26, 2026
b0d66ec
OpenVINO: add NORM/TANH support and rework SOFT_MAX translation
zhaixuejun1993 Mar 28, 2026
4119258
ggml-openvino: extend VIEW handling
zhaixuejun1993 Mar 30, 2026
1f1d900
Enable -fa off (#118)
wine99 Apr 2, 2026
8c3ff16
Enable --context-shift
wine99 Apr 10, 2026
3e67742
Fix llm param compute error for normal softmax not the softmax in att…
zhaixuejun1993 Apr 13, 2026
d69be5f
Merge pull request #129 from zhaixuejun1993/xuejun/fix-softmax-llm-pa…
zhaixuejun1993 Apr 13, 2026
ea4d4b6
openvino backend: enable OpenVINO backend fallback to CPU backend
zhaixuejun1993 Mar 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,9 @@ extern "C" {

void * extra; // extra things e.g. for ggml-cuda.cu

char padding[8];
char padding[16];
// add a struct ggml_tensor * named org_src, initialized to NULL, for keeping track of original source tensors in case of in-place operations
struct ggml_tensor * org_src;
};

static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
Expand Down
20 changes: 17 additions & 3 deletions ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1124,8 +1124,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
struct ggml_tensor * node = graph->nodes[i];
int * cur_backend_id = &tensor_backend_id(node);
if (node->view_src != NULL && *cur_backend_id == -1) {
*cur_backend_id = tensor_backend_id(node->view_src);
SET_CAUSE(node, "4.vsrc");
auto view_src_backend = tensor_backend_id(node->view_src);
if (view_src_backend != -1 && ggml_backend_supports_op(sched->backends[view_src_backend], node)) {
*cur_backend_id = tensor_backend_id(node->view_src);
SET_CAUSE(node, "4.vsrc");
}
}
for (int j = 0; j < GGML_MAX_SRC; j++) {
struct ggml_tensor * src = node->src[j];
Expand All @@ -1151,6 +1154,14 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
GGML_ASSERT(*cur_backend_id != -1);
}

// add the node id to the name for easier debugging
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
char new_name[128];
snprintf(new_name, sizeof(new_name), "%s#%d", node->name, i);
ggml_format_name(node, "%s", new_name);
}

// pass 5: split graph, find tensors that need to be copied
{
int i_split = 0;
Expand All @@ -1171,7 +1182,9 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
struct ggml_tensor * node = graph->nodes[i];

if (ggml_is_view_op(node->op)) {
continue;
if ((tensor_backend_id(node) != cur_backend_id) && (ggml_backend_supports_op(sched->backends[cur_backend_id], node))) {
tensor_backend_id(node) = cur_backend_id;
}
}

const int node_backend_id = tensor_backend_id(node);
Expand Down Expand Up @@ -1269,6 +1282,7 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
ggml_set_input(tensor_copy);
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
}
tensor_copy->org_src = src;
tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy;
SET_CAUSE(tensor_copy, "4.cpy");
}
Expand Down
Loading