cuda : fix nkvo, offload and cuda graph node properties matching (#19165)
* cuda : fix nkvo * cont : more robust cuda graph node property matching * cont : restore pre-leafs implementation * cont : comments + static_assert
This commit is contained in:
@@ -1122,15 +1122,17 @@ struct ggml_tensor_extra_gpu {
|
||||
#endif
|
||||
|
||||
struct ggml_cuda_graph_node_properties {
|
||||
void * node_address;
|
||||
void * node_data;
|
||||
ggml_op node_op;
|
||||
int32_t flags;
|
||||
int64_t ne[GGML_MAX_DIMS];
|
||||
size_t nb[GGML_MAX_DIMS];
|
||||
void * src_address[GGML_MAX_SRC];
|
||||
void * src_data[GGML_MAX_SRC];
|
||||
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
|
||||
};
|
||||
|
||||
static_assert(std::is_trivial<ggml_cuda_graph_node_properties>::value, "ggml_cuda_graph_node_properties must be trivial");
|
||||
|
||||
struct ggml_cuda_graph {
|
||||
#ifdef USE_CUDA_GRAPH
|
||||
~ggml_cuda_graph() {
|
||||
@@ -1150,6 +1152,12 @@ struct ggml_cuda_graph {
|
||||
int number_consecutive_updates = 0;
|
||||
std::vector<ggml_cuda_graph_node_properties> props;
|
||||
|
||||
// these are extra tensors (inputs) that participate in the ggml graph but are not nodes
|
||||
// they properties also have to match in order to be able to safely reuse a CUDA graph
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/18583
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/19165
|
||||
std::vector<ggml_cuda_graph_node_properties> extra;
|
||||
|
||||
void record_update(bool use_graph, bool update_required) {
|
||||
if (use_graph && update_required) {
|
||||
number_consecutive_updates++;
|
||||
|
||||
Reference in New Issue
Block a user