Patch for multi device CUDA (#3100)

Awni Hannun · web-flow · commit 185b06d9efc1 · 2026-02-05T17:33:51.000-08:00
diff --git a/mlx/backend/cuda/allocator.cpp b/mlx/backend/cuda/allocator.cpp
@@ -168,6 +168,9 @@ CudaAllocator::CudaAllocator()
   free_limit_ = total_memory_ - memory_limit_;
   max_pool_size_ = memory_limit_;
 
+  int curr;
+  CHECK_CUDA_ERROR(cudaGetDevice(&curr));
+
   int device_count = gpu::device_count();
   free_streams_.resize(device_count);
   mem_pools_.resize(device_count);
@@ -178,6 +181,7 @@ CudaAllocator::CudaAllocator()
       CHECK_CUDA_ERROR(cudaDeviceGetDefaultMemPool(&mem_pools_[i], i));
     }
   }
+  CHECK_CUDA_ERROR(cudaSetDevice(curr));
 }
 
 Buffer

Original file line number	Diff line number	Diff line change
`@@ -168,6 +168,9 @@ CudaAllocator::CudaAllocator()`
`168`	`168`	`free_limit_ = total_memory_ - memory_limit_;`
`169`	`169`	`max_pool_size_ = memory_limit_;`
`170`	`170`
	`171`	`+ int curr;`
	`172`	`+ CHECK_CUDA_ERROR(cudaGetDevice(&curr));`
	`173`	`+`
`171`	`174`	`int device_count = gpu::device_count();`
`172`	`175`	`free_streams_.resize(device_count);`
`173`	`176`	`mem_pools_.resize(device_count);`
`@@ -178,6 +181,7 @@ CudaAllocator::CudaAllocator()`
`178`	`181`	`CHECK_CUDA_ERROR(cudaDeviceGetDefaultMemPool(&mem_pools_[i], i));`
`179`	`182`	`}`
`180`	`183`	`}`
	`184`	`+ CHECK_CUDA_ERROR(cudaSetDevice(curr));`
`181`	`185`	`}`
`182`	`186`
`183`	`187`	`Buffer`