cudart.cpp source code [pytorch/torch/csrc/cuda/shared/cudart.cpp]

1	#include <cuda.h>
2	#include <cuda_runtime.h>
3	#include <torch/csrc/utils/pybind.h>
4	#if !defined(USE_ROCM)
5	#include <cuda_profiler_api.h>
6	#else
7	#include <hip/hip_runtime_api.h>
8	#endif
9
10	#include <c10/cuda/CUDAException.h>
11	#include <c10/cuda/CUDAGuard.h>
12
13	namespace torch {
14	namespace cuda {
15	namespace shared {
16
17	#ifdef USE_ROCM
18	namespace {
19	hipError_t hipReturnSuccess() {
20	return hipSuccess;
21	}
22	} // namespace
23	#endif
24
25	void initCudartBindings(PyObject* module) {
26	auto m = py::handle (module).cast<py::module>();
27
28	auto cudart = m.def_submodule("_cudart", "libcudart.so bindings");
29
30	// By splitting the names of these objects into two literals we prevent the
31	// HIP rewrite rules from changing these names when building with HIP.
32
33	#if !defined(USE_ROCM) && defined(CUDA_VERSION) && CUDA_VERSION < 12000
34	// cudaOutputMode_t is used in cudaProfilerInitialize only. The latter is gone
35	// in CUDA 12.
36	py::enum_<cudaOutputMode_t>(
37	cudart,
38	"cuda"
39	"OutputMode")
40	.value("KeyValuePair", cudaKeyValuePair)
41	.value("CSV", cudaCSV);
42	#endif
43
44	py::enum_<cudaError_t>(
45	cudart,
46	"cuda"
47	"Error")
48	.value("success", cudaSuccess);
49
50	cudart.def(
51	"cuda"
52	"GetErrorString",
53	cudaGetErrorString);
54	cudart.def(
55	"cuda"
56	"ProfilerStart",
57	#ifdef USE_ROCM
58	hipReturnSuccess
59	#else
60	cudaProfilerStart
61	#endif
62	);
63	cudart.def(
64	"cuda"
65	"ProfilerStop",
66	#ifdef USE_ROCM
67	hipReturnSuccess
68	#else
69	cudaProfilerStop
70	#endif
71	);
72	cudart.def(
73	"cuda"
74	"HostRegister",
75	[](uintptr_t ptr, size_t size, unsigned int flags) -> cudaError_t {
76	return C10_CUDA_ERROR_HANDLED(
77	cudaHostRegister((void*)ptr, size, flags));
78	});
79	cudart.def(
80	"cuda"
81	"HostUnregister",
82	[](uintptr_t ptr) -> cudaError_t {
83	return C10_CUDA_ERROR_HANDLED(cudaHostUnregister((void*)ptr));
84	});
85	cudart.def(
86	"cuda"
87	"StreamCreate",
88	[](uintptr_t ptr) -> cudaError_t {
89	return C10_CUDA_ERROR_HANDLED(cudaStreamCreate((cudaStream_t*)ptr));
90	});
91	cudart.def(
92	"cuda"
93	"StreamDestroy",
94	[](uintptr_t ptr) -> cudaError_t {
95	return C10_CUDA_ERROR_HANDLED(cudaStreamDestroy((cudaStream_t)ptr));
96	});
97	#if !defined(USE_ROCM) && defined(CUDA_VERSION) && CUDA_VERSION < 12000
98	// cudaProfilerInitialize is no longer needed after CUDA 12:
99	// https://forums.developer.nvidia.com/t/cudaprofilerinitialize-is-deprecated-alternative/200776/3
100	cudart.def(
101	"cuda"
102	"ProfilerInitialize",
103	cudaProfilerInitialize);
104	#endif
105	cudart.def(
106	"cuda"
107	"MemGetInfo",
108	[](int device) -> std::pair<size_t, size_t> {
109	c10::cuda::CUDAGuard guard(device);
110	size_t device_free = `0`;
111	size_t device_total = `0`;
112	C10_CUDA_CHECK(cudaMemGetInfo(&device_free, &device_total));
113	return {device_free, device_total};
114	});
115	}
116
117	} // namespace shared
118	} // namespace cuda
119	} // namespace torch
120

Browse the source code of pytorch/torch/csrc/cuda/shared/cudart.cpp