CUDAFunctions.cpp source code [pytorch/c10/cuda/CUDAFunctions.cpp]

1	#include <c10/cuda/CUDAFunctions.h>
2	#include <c10/macros/Macros.h>
3
4	#include <limits>
5
6	namespace c10 {
7	namespace cuda {
8
9	namespace {
10	// returns -1 on failure
11	int32_t driver_version() {
12	int driver_version = -`1`;
13	C10_CUDA_IGNORE_ERROR(cudaDriverGetVersion(&driver_version));
14	return driver_version;
15	}
16
17	int device_count_impl(bool fail_if_no_driver) {
18	int count;
19	auto err = C10_CUDA_ERROR_HANDLED(cudaGetDeviceCount(&count));
20	if (err == cudaSuccess) {
21	return count;
22	}
23	// Clear out the error state, so we don't spuriously trigger someone else.
24	// (This shouldn't really matter, since we won't be running very much CUDA
25	// code in this regime.)
26	cudaError_t last_err C10_UNUSED = cudaGetLastError();
27	switch (err) {
28	case cudaErrorNoDevice:
29	// Zero devices is ok here
30	count = `0`;
31	break;
32	case cudaErrorInsufficientDriver: {
33	auto version = driver_version();
34	if (version <= `0`) {
35	if (!fail_if_no_driver) {
36	// No CUDA driver means no devices
37	count = `0`;
38	break;
39	}
40	TORCH_CHECK(
41	false,
42	"Found no NVIDIA driver on your system. Please check that you "
43	"have an NVIDIA GPU and installed a driver from "
44	"http://www.nvidia.com/Download/index.aspx");
45	} else {
46	TORCH_CHECK(
47	false,
48	"The NVIDIA driver on your system is too old (found version ",
49	version,
50	"). Please update your GPU driver by downloading and installing "
51	"a new version from the URL: "
52	"http://www.nvidia.com/Download/index.aspx Alternatively, go to: "
53	"https://pytorch.org to install a PyTorch version that has been "
54	"compiled with your version of the CUDA driver.");
55	}
56	} break;
57	case cudaErrorInitializationError:
58	TORCH_CHECK(
59	false,
60	"CUDA driver initialization failed, you might not "
61	"have a CUDA gpu.");
62	break;
63	case cudaErrorUnknown:
64	TORCH_CHECK(
65	false,
66	"CUDA unknown error - this may be due to an "
67	"incorrectly set up environment, e.g. changing env "
68	"variable CUDA_VISIBLE_DEVICES after program start. "
69	"Setting the available devices to be zero.");
70	break;
71	#if C10_ASAN_ENABLED
72	case cudaErrorMemoryAllocation:
73	// In ASAN mode, we know that a cudaErrorMemoryAllocation error will
74	// pop up if compiled with NVCC (clang-cuda is fine)
75	TORCH_CHECK(
76	false,
77	"Got 'out of memory' error while trying to initialize CUDA. "
78	"CUDA with nvcc does not work well with ASAN and it's probably "
79	"the reason. We will simply shut down CUDA support. If you "
80	"would like to use GPUs, turn off ASAN.");
81	break;
82	#endif // C10_ASAN_ENABLED
83	default:
84	TORCH_CHECK(
85	false,
86	"Unexpected error from cudaGetDeviceCount(). Did you run "
87	"some cuda functions before calling NumCudaDevices() "
88	"that might have already set an error? Error ",
89	err,
90	": ",
91	cudaGetErrorString(err));
92	}
93	return count;
94	}
95	} // namespace
96
97	DeviceIndex device_count() noexcept {
98	// initialize number of devices only once
99	static int count = []() {
100	try {
101	auto result = device_count_impl(/fail_if_no_driver=/false);
102	TORCH_INTERNAL_ASSERT(
103	result <= std::numeric_limits<DeviceIndex>::max(),
104	"Too many CUDA devices, DeviceIndex overflowed");
105	return result;
106	} catch (const c10::Error& ex) {
107	// We don't want to fail, but still log the warning
108	// msg() returns the message without the stack trace
109	TORCH_WARN("CUDA initialization: ", ex.msg());
110	return `0`;
111	}
112	}();
113	return static_cast<DeviceIndex>(count);
114	}
115
116	DeviceIndex device_count_ensure_non_zero() {
117	// Call the implementation every time to throw the exception
118	int count = device_count_impl(/fail_if_no_driver=/true);
119	// Zero gpus doesn't produce a warning in `device_count` but we fail here
120	TORCH_CHECK(count, "No CUDA GPUs are available");
121	return static_cast<DeviceIndex>(count);
122	}
123
124	DeviceIndex current_device() {
125	int cur_device;
126	C10_CUDA_CHECK(cudaGetDevice(&cur_device));
127	return static_cast<DeviceIndex>(cur_device);
128	}
129
130	void set_device(DeviceIndex device) {
131	C10_CUDA_CHECK(cudaSetDevice(static_cast<int>(device)));
132	}
133
134	void device_synchronize() {
135	const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
136	if (C10_UNLIKELY(interp)) {
137	(*interp)->trace_gpu_device_synchronization();
138	}
139	C10_CUDA_CHECK(cudaDeviceSynchronize());
140	}
141
142	// this function has to be called from callers performing cuda synchronizing
143	// operations, to raise proper error or warning
144	void warn_or_error_on_sync() {
145	if (warning_state().get_sync_debug_mode() == SyncDebugMode::L_ERROR) {
146	TORCH_CHECK(false, "called a synchronizing CUDA operation");
147	} else if (warning_state().get_sync_debug_mode() == SyncDebugMode::L_WARN) {
148	TORCH_WARN("called a synchronizing CUDA operation");
149	}
150	}
151
152	} // namespace cuda
153	} // namespace c10
154

Browse the source code of pytorch/c10/cuda/CUDAFunctions.cpp