CUDA: manage NCCL communicators in context (#21891)

* CUDA: manage NCCL communicators in context

* add check that all backends are CUDA

* remove unused vector, limit init to > 1 GPUs

* fix warnings

* fix cuda device, cache allreduce
This commit is contained in:
Johannes Gäßler
2026-04-15 15:58:40 +02:00
committed by GitHub
parent adb541a6ad
commit 014dca49d6
4 changed files with 148 additions and 76 deletions
-4
View File
@@ -1092,10 +1092,6 @@ struct ggml_cuda_device_info {
cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};
std::array<float, GGML_CUDA_MAX_DEVICES> default_tensor_split = {};
#ifdef GGML_USE_NCCL
ncclComm_t comms[GGML_CUDA_MAX_DEVICES];
#endif // GGML_USE_NCCL
};
const ggml_cuda_device_info & ggml_cuda_info();