1#include <c10/cuda/CUDAFunctions.h>
2#include <c10/macros/Macros.h>
3
4#include <limits>
5
6namespace c10 {
7namespace cuda {
8
9namespace {
10// returns -1 on failure
11int32_t driver_version() {
12 int driver_version = -1;
13 C10_CUDA_IGNORE_ERROR(cudaDriverGetVersion(&driver_version));
14 return driver_version;
15}
16
17int device_count_impl(bool fail_if_no_driver) {
18 int count;
19 auto err = C10_CUDA_ERROR_HANDLED(cudaGetDeviceCount(&count));
20 if (err == cudaSuccess) {
21 return count;
22 }
23 // Clear out the error state, so we don't spuriously trigger someone else.
24 // (This shouldn't really matter, since we won't be running very much CUDA
25 // code in this regime.)
26 cudaError_t last_err C10_UNUSED = cudaGetLastError();
27 switch (err) {
28 case cudaErrorNoDevice:
29 // Zero devices is ok here
30 count = 0;
31 break;
32 case cudaErrorInsufficientDriver: {
33 auto version = driver_version();
34 if (version <= 0) {
35 if (!fail_if_no_driver) {
36 // No CUDA driver means no devices
37 count = 0;
38 break;
39 }
40 TORCH_CHECK(
41 false,
42 "Found no NVIDIA driver on your system. Please check that you "
43 "have an NVIDIA GPU and installed a driver from "
44 "http://www.nvidia.com/Download/index.aspx");
45 } else {
46 TORCH_CHECK(
47 false,
48 "The NVIDIA driver on your system is too old (found version ",
49 version,
50 "). Please update your GPU driver by downloading and installing "
51 "a new version from the URL: "
52 "http://www.nvidia.com/Download/index.aspx Alternatively, go to: "
53 "https://pytorch.org to install a PyTorch version that has been "
54 "compiled with your version of the CUDA driver.");
55 }
56 } break;
57 case cudaErrorInitializationError:
58 TORCH_CHECK(
59 false,
60 "CUDA driver initialization failed, you might not "
61 "have a CUDA gpu.");
62 break;
63 case cudaErrorUnknown:
64 TORCH_CHECK(
65 false,
66 "CUDA unknown error - this may be due to an "
67 "incorrectly set up environment, e.g. changing env "
68 "variable CUDA_VISIBLE_DEVICES after program start. "
69 "Setting the available devices to be zero.");
70 break;
71#if C10_ASAN_ENABLED
72 case cudaErrorMemoryAllocation:
73 // In ASAN mode, we know that a cudaErrorMemoryAllocation error will
74 // pop up if compiled with NVCC (clang-cuda is fine)
75 TORCH_CHECK(
76 false,
77 "Got 'out of memory' error while trying to initialize CUDA. "
78 "CUDA with nvcc does not work well with ASAN and it's probably "
79 "the reason. We will simply shut down CUDA support. If you "
80 "would like to use GPUs, turn off ASAN.");
81 break;
82#endif // C10_ASAN_ENABLED
83 default:
84 TORCH_CHECK(
85 false,
86 "Unexpected error from cudaGetDeviceCount(). Did you run "
87 "some cuda functions before calling NumCudaDevices() "
88 "that might have already set an error? Error ",
89 err,
90 ": ",
91 cudaGetErrorString(err));
92 }
93 return count;
94}
95} // namespace
96
97DeviceIndex device_count() noexcept {
98 // initialize number of devices only once
99 static int count = []() {
100 try {
101 auto result = device_count_impl(/*fail_if_no_driver=*/false);
102 TORCH_INTERNAL_ASSERT(
103 result <= std::numeric_limits<DeviceIndex>::max(),
104 "Too many CUDA devices, DeviceIndex overflowed");
105 return result;
106 } catch (const c10::Error& ex) {
107 // We don't want to fail, but still log the warning
108 // msg() returns the message without the stack trace
109 TORCH_WARN("CUDA initialization: ", ex.msg());
110 return 0;
111 }
112 }();
113 return static_cast<DeviceIndex>(count);
114}
115
116DeviceIndex device_count_ensure_non_zero() {
117 // Call the implementation every time to throw the exception
118 int count = device_count_impl(/*fail_if_no_driver=*/true);
119 // Zero gpus doesn't produce a warning in `device_count` but we fail here
120 TORCH_CHECK(count, "No CUDA GPUs are available");
121 return static_cast<DeviceIndex>(count);
122}
123
124DeviceIndex current_device() {
125 int cur_device;
126 C10_CUDA_CHECK(cudaGetDevice(&cur_device));
127 return static_cast<DeviceIndex>(cur_device);
128}
129
130void set_device(DeviceIndex device) {
131 C10_CUDA_CHECK(cudaSetDevice(static_cast<int>(device)));
132}
133
134void device_synchronize() {
135 const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
136 if (C10_UNLIKELY(interp)) {
137 (*interp)->trace_gpu_device_synchronization();
138 }
139 C10_CUDA_CHECK(cudaDeviceSynchronize());
140}
141
142// this function has to be called from callers performing cuda synchronizing
143// operations, to raise proper error or warning
144void warn_or_error_on_sync() {
145 if (warning_state().get_sync_debug_mode() == SyncDebugMode::L_ERROR) {
146 TORCH_CHECK(false, "called a synchronizing CUDA operation");
147 } else if (warning_state().get_sync_debug_mode() == SyncDebugMode::L_WARN) {
148 TORCH_WARN("called a synchronizing CUDA operation");
149 }
150}
151
152} // namespace cuda
153} // namespace c10
154