CUDA: manage NCCL communicators in context (#21891)
* CUDA: manage NCCL communicators in context * add check that all backends are CUDA * remove unused vector, limit init to > 1 GPUs * fix warnings * fix cuda device, cache allreduce
This commit is contained in:
@@ -1092,10 +1092,6 @@ struct ggml_cuda_device_info {
|
||||
cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};
|
||||
|
||||
std::array<float, GGML_CUDA_MAX_DEVICES> default_tensor_split = {};
|
||||
|
||||
#ifdef GGML_USE_NCCL
|
||||
ncclComm_t comms[GGML_CUDA_MAX_DEVICES];
|
||||
#endif // GGML_USE_NCCL
|
||||
};
|
||||
|
||||
const ggml_cuda_device_info & ggml_cuda_info();
|
||||
|
||||
Reference in New Issue
Block a user