CUDA: require explicit opt-in for P2P access (#21910)

2026-04-15 16:01:46 +02:00
parent 014dca49d6
commit a6206958d2
2 changed files with 18 additions and 10 deletions
@@ -281,6 +281,12 @@ Use `GGML_CUDA_FORCE_CUBLAS_COMPUTE_16F` environment variable to force use FP16
 The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted. In Windows this setting is available in the NVIDIA control panel as `System Memory Fallback`.
 ### Peer Access
 The environment variable `GGML_CUDA_P2P` can be set to enable peer-to-peer access between multiple GPUs, allowing them to transfer data directly rather than to go through system memory.
 Requires driver support (usually restricted to workstation/datacenter GPUs).
 May cause crashes or corrupted outputs for some motherboards and BIOS settings (e.g. IOMMU).
 ### Performance Tuning
 The following compilation options are also available to tweak performance:
@@ -324,16 +324,18 @@ static ggml_cuda_device_info ggml_cuda_init() {
    // configure logging to stdout
    // CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
-    for (int id = 0; id < info.device_count; ++id) {
+    if (getenv("GGML_CUDA_P2P") != nullptr) {
-        ggml_cuda_set_device(id);
+        for (int id = 0; id < info.device_count; ++id) {
-        for (int id_other = 0; id_other < info.device_count; ++id_other) {
+            ggml_cuda_set_device(id);
-            if (id == id_other) {
+            for (int id_other = 0; id_other < info.device_count; ++id_other) {
-                continue;
+                if (id == id_other) {
-            }
+                    continue;
-            int can_access_peer;
+                }
-            CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other));
+                int can_access_peer;
-            if (can_access_peer) {
+                CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other));
-                CUDA_CHECK(cudaDeviceEnablePeerAccess(id_other, 0));
+                if (can_access_peer) {
                    CUDA_CHECK(cudaDeviceEnablePeerAccess(id_other, 0));
                }
            }
        }
    }