CUDA: manage NCCL communicators in context (#21891)

* CUDA: manage NCCL communicators in context * add check that all backends are CUDA * remove unused vector, limit init to > 1 GPUs * fix warnings * fix cuda device, cache allreduce
2026-04-15 15:58:40 +02:00
parent adb541a6ad
commit 014dca49d6
4 changed files with 148 additions and 76 deletions
@@ -1092,10 +1092,6 @@ struct ggml_cuda_device_info {
    cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};

    std::array<float, GGML_CUDA_MAX_DEVICES> default_tensor_split = {};
-
-#ifdef GGML_USE_NCCL
-    ncclComm_t comms[GGML_CUDA_MAX_DEVICES];
-#endif // GGML_USE_NCCL
 };

 const ggml_cuda_device_info & ggml_cuda_info();