1 | #include <c10/cuda/CUDAFunctions.h> |
2 | #include <c10/macros/Macros.h> |
3 | |
4 | #include <limits> |
5 | |
6 | namespace c10 { |
7 | namespace cuda { |
8 | |
9 | namespace { |
10 | // returns -1 on failure |
11 | int32_t driver_version() { |
12 | int driver_version = -1; |
13 | C10_CUDA_IGNORE_ERROR(cudaDriverGetVersion(&driver_version)); |
14 | return driver_version; |
15 | } |
16 | |
17 | int device_count_impl(bool fail_if_no_driver) { |
18 | int count; |
19 | auto err = C10_CUDA_ERROR_HANDLED(cudaGetDeviceCount(&count)); |
20 | if (err == cudaSuccess) { |
21 | return count; |
22 | } |
23 | // Clear out the error state, so we don't spuriously trigger someone else. |
24 | // (This shouldn't really matter, since we won't be running very much CUDA |
25 | // code in this regime.) |
26 | cudaError_t last_err C10_UNUSED = cudaGetLastError(); |
27 | switch (err) { |
28 | case cudaErrorNoDevice: |
29 | // Zero devices is ok here |
30 | count = 0; |
31 | break; |
32 | case cudaErrorInsufficientDriver: { |
33 | auto version = driver_version(); |
34 | if (version <= 0) { |
35 | if (!fail_if_no_driver) { |
36 | // No CUDA driver means no devices |
37 | count = 0; |
38 | break; |
39 | } |
40 | TORCH_CHECK( |
41 | false, |
42 | "Found no NVIDIA driver on your system. Please check that you " |
43 | "have an NVIDIA GPU and installed a driver from " |
44 | "http://www.nvidia.com/Download/index.aspx" ); |
45 | } else { |
46 | TORCH_CHECK( |
47 | false, |
48 | "The NVIDIA driver on your system is too old (found version " , |
49 | version, |
50 | "). Please update your GPU driver by downloading and installing " |
51 | "a new version from the URL: " |
52 | "http://www.nvidia.com/Download/index.aspx Alternatively, go to: " |
53 | "https://pytorch.org to install a PyTorch version that has been " |
54 | "compiled with your version of the CUDA driver." ); |
55 | } |
56 | } break; |
57 | case cudaErrorInitializationError: |
58 | TORCH_CHECK( |
59 | false, |
60 | "CUDA driver initialization failed, you might not " |
61 | "have a CUDA gpu." ); |
62 | break; |
63 | case cudaErrorUnknown: |
64 | TORCH_CHECK( |
65 | false, |
66 | "CUDA unknown error - this may be due to an " |
67 | "incorrectly set up environment, e.g. changing env " |
68 | "variable CUDA_VISIBLE_DEVICES after program start. " |
69 | "Setting the available devices to be zero." ); |
70 | break; |
71 | #if C10_ASAN_ENABLED |
72 | case cudaErrorMemoryAllocation: |
73 | // In ASAN mode, we know that a cudaErrorMemoryAllocation error will |
74 | // pop up if compiled with NVCC (clang-cuda is fine) |
75 | TORCH_CHECK( |
76 | false, |
77 | "Got 'out of memory' error while trying to initialize CUDA. " |
78 | "CUDA with nvcc does not work well with ASAN and it's probably " |
79 | "the reason. We will simply shut down CUDA support. If you " |
80 | "would like to use GPUs, turn off ASAN." ); |
81 | break; |
82 | #endif // C10_ASAN_ENABLED |
83 | default: |
84 | TORCH_CHECK( |
85 | false, |
86 | "Unexpected error from cudaGetDeviceCount(). Did you run " |
87 | "some cuda functions before calling NumCudaDevices() " |
88 | "that might have already set an error? Error " , |
89 | err, |
90 | ": " , |
91 | cudaGetErrorString(err)); |
92 | } |
93 | return count; |
94 | } |
95 | } // namespace |
96 | |
97 | DeviceIndex device_count() noexcept { |
98 | // initialize number of devices only once |
99 | static int count = []() { |
100 | try { |
101 | auto result = device_count_impl(/*fail_if_no_driver=*/false); |
102 | TORCH_INTERNAL_ASSERT( |
103 | result <= std::numeric_limits<DeviceIndex>::max(), |
104 | "Too many CUDA devices, DeviceIndex overflowed" ); |
105 | return result; |
106 | } catch (const c10::Error& ex) { |
107 | // We don't want to fail, but still log the warning |
108 | // msg() returns the message without the stack trace |
109 | TORCH_WARN("CUDA initialization: " , ex.msg()); |
110 | return 0; |
111 | } |
112 | }(); |
113 | return static_cast<DeviceIndex>(count); |
114 | } |
115 | |
116 | DeviceIndex device_count_ensure_non_zero() { |
117 | // Call the implementation every time to throw the exception |
118 | int count = device_count_impl(/*fail_if_no_driver=*/true); |
119 | // Zero gpus doesn't produce a warning in `device_count` but we fail here |
120 | TORCH_CHECK(count, "No CUDA GPUs are available" ); |
121 | return static_cast<DeviceIndex>(count); |
122 | } |
123 | |
124 | DeviceIndex current_device() { |
125 | int cur_device; |
126 | C10_CUDA_CHECK(cudaGetDevice(&cur_device)); |
127 | return static_cast<DeviceIndex>(cur_device); |
128 | } |
129 | |
130 | void set_device(DeviceIndex device) { |
131 | C10_CUDA_CHECK(cudaSetDevice(static_cast<int>(device))); |
132 | } |
133 | |
134 | void device_synchronize() { |
135 | const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); |
136 | if (C10_UNLIKELY(interp)) { |
137 | (*interp)->trace_gpu_device_synchronization(); |
138 | } |
139 | C10_CUDA_CHECK(cudaDeviceSynchronize()); |
140 | } |
141 | |
142 | // this function has to be called from callers performing cuda synchronizing |
143 | // operations, to raise proper error or warning |
144 | void warn_or_error_on_sync() { |
145 | if (warning_state().get_sync_debug_mode() == SyncDebugMode::L_ERROR) { |
146 | TORCH_CHECK(false, "called a synchronizing CUDA operation" ); |
147 | } else if (warning_state().get_sync_debug_mode() == SyncDebugMode::L_WARN) { |
148 | TORCH_WARN("called a synchronizing CUDA operation" ); |
149 | } |
150 | } |
151 | |
152 | } // namespace cuda |
153 | } // namespace c10 |
154 | |