CUDA: also store node->src ne/nb for graph equality (#21736)
This commit is contained in:
@@ -1186,6 +1186,8 @@ struct ggml_cuda_graph {
|
|||||||
struct node_properties {
|
struct node_properties {
|
||||||
ggml_tensor node;
|
ggml_tensor node;
|
||||||
void * node_src_data_ptrs[GGML_MAX_SRC];
|
void * node_src_data_ptrs[GGML_MAX_SRC];
|
||||||
|
int64_t node_src_ne[GGML_MAX_SRC][GGML_MAX_DIMS];
|
||||||
|
size_t node_src_nb[GGML_MAX_SRC][GGML_MAX_DIMS];
|
||||||
};
|
};
|
||||||
std::vector<node_properties> node_props;
|
std::vector<node_properties> node_props;
|
||||||
|
|
||||||
|
|||||||
@@ -3070,16 +3070,18 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
|
|||||||
ggml_cuda_graph::node_properties prop = {};
|
ggml_cuda_graph::node_properties prop = {};
|
||||||
memcpy(&prop.node, cgraph->nodes[i], sizeof(ggml_tensor));
|
memcpy(&prop.node, cgraph->nodes[i], sizeof(ggml_tensor));
|
||||||
|
|
||||||
// if the backend scheduler is making copies of CPU tensors, the src pointers can be the same but with different data, see:
|
|
||||||
// https://github.com/ggml-org/llama.cpp/pull/21472#discussion_r3052235188
|
|
||||||
for (int j = 0; j < GGML_MAX_SRC; ++j) {
|
for (int j = 0; j < GGML_MAX_SRC; ++j) {
|
||||||
prop.node_src_data_ptrs[j] = cgraph->nodes[i]->src[j] ? cgraph->nodes[i]->src[j]->data : nullptr;
|
if (cgraph->nodes[i]->src[j]) {
|
||||||
|
prop.node_src_data_ptrs[j] = cgraph->nodes[i]->src[j]->data;
|
||||||
|
memcpy(prop.node_src_ne[j], cgraph->nodes[i]->src[j]->ne, sizeof(prop.node_src_ne[j]));
|
||||||
|
memcpy(prop.node_src_nb[j], cgraph->nodes[i]->src[j]->nb, sizeof(prop.node_src_nb[j]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!res && memcmp(&graph->node_props[i], &prop, sizeof(prop)) != 0) {
|
if (res || memcmp(&graph->node_props[i], &prop, sizeof(prop)) != 0) {
|
||||||
|
graph->node_props[i] = prop;
|
||||||
res = true;
|
res = true;
|
||||||
}
|
}
|
||||||
graph->node_props[i] = prop;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
|||||||
Reference in New Issue
Block a user