ggml-cuda: refactor cuda graph usage (#18637)

* ggml-cuda: refactor cuda graph usage

* use is_enabled() instead of enabled
This commit is contained in:
Aman Gupta
2026-01-06 23:48:45 +08:00
committed by GitHub
parent 968929528c
commit 090b137e56
3 changed files with 72 additions and 96 deletions
+19 -5
View File
@@ -1036,7 +1036,7 @@ struct ggml_tensor_extra_gpu {
#define USE_CUDA_GRAPH
#endif
struct ggml_graph_node_properties {
struct ggml_cuda_graph_node_properties {
void * node_address;
ggml_op node_op;
int64_t ne[GGML_MAX_DIMS];
@@ -1061,11 +1061,25 @@ struct ggml_cuda_graph {
std::vector<cudaGraphNode_t> nodes;
bool disable_due_to_gpu_arch = false;
bool disable_due_to_too_many_updates = false;
bool disable_due_to_failed_graph_capture = false;
int number_consecutive_updates = 0;
bool cuda_graphs_enabled = false;
std::vector<ggml_graph_node_properties> ggml_graph_properties;
std::vector<ggml_graph_node_properties> extraneous_srcs_properties;
std::vector<ggml_cuda_graph_node_properties> props;
void record_update(bool use_graph, bool update_required) {
if (use_graph && update_required) {
number_consecutive_updates++;
} else {
number_consecutive_updates = 0;
}
if (number_consecutive_updates >= 4) {
GGML_LOG_DEBUG("%s: disabling CUDA graphs due to too many consecutive updates\n", __func__);
disable_due_to_too_many_updates = true;
}
}
bool is_enabled() const {
static const bool disable_cuda_graphs_due_to_env = (getenv("GGML_CUDA_DISABLE_GRAPHS") != nullptr);
return !(disable_due_to_gpu_arch || disable_cuda_graphs_due_to_env || disable_due_to_too_many_updates);
}
#endif
};