CUDA: require explicit opt-in for P2P access (#21910)
This commit is contained in:
@@ -281,6 +281,12 @@ Use `GGML_CUDA_FORCE_CUBLAS_COMPUTE_16F` environment variable to force use FP16
|
|||||||
|
|
||||||
The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted. In Windows this setting is available in the NVIDIA control panel as `System Memory Fallback`.
|
The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted. In Windows this setting is available in the NVIDIA control panel as `System Memory Fallback`.
|
||||||
|
|
||||||
|
### Peer Access
|
||||||
|
|
||||||
|
The environment variable `GGML_CUDA_P2P` can be set to enable peer-to-peer access between multiple GPUs, allowing them to transfer data directly rather than to go through system memory.
|
||||||
|
Requires driver support (usually restricted to workstation/datacenter GPUs).
|
||||||
|
May cause crashes or corrupted outputs for some motherboards and BIOS settings (e.g. IOMMU).
|
||||||
|
|
||||||
### Performance Tuning
|
### Performance Tuning
|
||||||
|
|
||||||
The following compilation options are also available to tweak performance:
|
The following compilation options are also available to tweak performance:
|
||||||
|
|||||||
@@ -324,16 +324,18 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||||||
// configure logging to stdout
|
// configure logging to stdout
|
||||||
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
|
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
|
||||||
|
|
||||||
for (int id = 0; id < info.device_count; ++id) {
|
if (getenv("GGML_CUDA_P2P") != nullptr) {
|
||||||
ggml_cuda_set_device(id);
|
for (int id = 0; id < info.device_count; ++id) {
|
||||||
for (int id_other = 0; id_other < info.device_count; ++id_other) {
|
ggml_cuda_set_device(id);
|
||||||
if (id == id_other) {
|
for (int id_other = 0; id_other < info.device_count; ++id_other) {
|
||||||
continue;
|
if (id == id_other) {
|
||||||
}
|
continue;
|
||||||
int can_access_peer;
|
}
|
||||||
CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other));
|
int can_access_peer;
|
||||||
if (can_access_peer) {
|
CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other));
|
||||||
CUDA_CHECK(cudaDeviceEnablePeerAccess(id_other, 0));
|
if (can_access_peer) {
|
||||||
|
CUDA_CHECK(cudaDeviceEnablePeerAccess(id_other, 0));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user