1 | #include <pybind11/pybind11.h> |
2 | #include <torch/csrc/Device.h> |
3 | #include <torch/csrc/THP.h> |
4 | #include <torch/csrc/cuda/Event.h> |
5 | #include <torch/csrc/cuda/Module.h> |
6 | #include <torch/csrc/cuda/Stream.h> |
7 | #include <torch/csrc/utils/pybind.h> |
8 | #include <torch/csrc/utils/pycfunction_helpers.h> |
9 | #include <torch/csrc/utils/python_arg_parser.h> |
10 | |
11 | #include <c10/cuda/CUDAGuard.h> |
12 | |
13 | #include <cuda_runtime_api.h> |
14 | #include <structmember.h> |
15 | |
16 | PyObject* THCPEventClass = nullptr; |
17 | |
18 | static PyObject* THCPEvent_pynew( |
19 | PyTypeObject* type, |
20 | PyObject* args, |
21 | PyObject* kwargs) { |
22 | HANDLE_TH_ERRORS |
23 | unsigned char enable_timing = 0; |
24 | unsigned char blocking = 0; |
25 | unsigned char interprocess = 0; |
26 | |
27 | // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) |
28 | constexpr char* kwlist[] = { |
29 | "enable_timing" , "blocking" , "interprocess" , nullptr}; |
30 | if (!PyArg_ParseTupleAndKeywords( |
31 | args, |
32 | kwargs, |
33 | "|bbb" , |
34 | const_cast<char**>(kwlist), |
35 | &enable_timing, |
36 | &blocking, |
37 | &interprocess)) { |
38 | return nullptr; |
39 | } |
40 | |
41 | THPObjectPtr ptr(type->tp_alloc(type, 0)); |
42 | if (!ptr) { |
43 | return nullptr; |
44 | } |
45 | |
46 | THCPEvent* self = (THCPEvent*)ptr.get(); |
47 | unsigned int flags = (blocking ? cudaEventBlockingSync : cudaEventDefault) | |
48 | (enable_timing ? cudaEventDefault : cudaEventDisableTiming) | |
49 | (interprocess ? cudaEventInterprocess : cudaEventDefault); |
50 | |
51 | new (&self->cuda_event) at::cuda::CUDAEvent(flags); |
52 | |
53 | return (PyObject*)ptr.release(); |
54 | END_HANDLE_TH_ERRORS |
55 | } |
56 | |
57 | static PyObject* THCPEvent_from_ipc_handle( |
58 | PyObject* _type, |
59 | PyObject* args, |
60 | PyObject* kwargs) { |
61 | HANDLE_TH_ERRORS |
62 | auto type = (PyTypeObject*)_type; |
63 | |
64 | static torch::PythonArgParser parser({ |
65 | "from_ipc_handle(Device device, std::string ipc_handle)" , |
66 | }); |
67 | torch::ParsedArgs<2> parsed_args; |
68 | auto r = parser.parse(args, kwargs, parsed_args); |
69 | |
70 | at::Device device = r.device(0); |
71 | std::string handle_string = r.string(1); |
72 | |
73 | TORCH_CHECK( |
74 | handle_string.size() == sizeof(cudaIpcEventHandle_t), |
75 | "cudaIpcEventHandle_t expects byte-like object of size " , |
76 | sizeof(cudaIpcEventHandle_t), |
77 | ", but got " , |
78 | handle_string.size()); |
79 | TORCH_CHECK( |
80 | device.type() == at::kCUDA, |
81 | "Event can only be created on " |
82 | "CUDA devices, but got device type " , |
83 | device.type()) |
84 | |
85 | THPObjectPtr ptr(type->tp_alloc(type, 0)); |
86 | if (!ptr) { |
87 | return nullptr; |
88 | } |
89 | THCPEvent* self = (THCPEvent*)ptr.get(); |
90 | |
91 | // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
92 | cudaIpcEventHandle_t handle; |
93 | std::memcpy(&handle, handle_string.c_str(), handle_string.size()); |
94 | new (&self->cuda_event) at::cuda::CUDAEvent(device.index(), &handle); |
95 | |
96 | return (PyObject*)ptr.release(); |
97 | END_HANDLE_TH_ERRORS |
98 | } |
99 | |
100 | static void THCPEvent_dealloc(THCPEvent* self) { |
101 | self->cuda_event.~CUDAEvent(); |
102 | Py_TYPE(self)->tp_free((PyObject*)self); |
103 | } |
104 | |
105 | static PyObject* THCPEvent_get_cuda_event(THCPEvent* self, void* unused) { |
106 | HANDLE_TH_ERRORS |
107 | return PyLong_FromVoidPtr(self->cuda_event.event()); |
108 | END_HANDLE_TH_ERRORS |
109 | } |
110 | |
111 | static PyObject* THCPEvent_get_device(THCPEvent* self, void* unused) { |
112 | HANDLE_TH_ERRORS |
113 | at::optional<at::Device> device = self->cuda_event.device(); |
114 | if (!device) { |
115 | Py_RETURN_NONE; |
116 | } |
117 | return THPDevice_New(device.value()); |
118 | END_HANDLE_TH_ERRORS |
119 | } |
120 | |
121 | static PyObject* THCPEvent_record(PyObject* _self, PyObject* _stream) { |
122 | HANDLE_TH_ERRORS |
123 | auto self = (THCPEvent*)_self; |
124 | auto stream = (THCPStream*)_stream; |
125 | self->cuda_event.record(stream->cuda_stream); |
126 | Py_RETURN_NONE; |
127 | END_HANDLE_TH_ERRORS |
128 | } |
129 | |
130 | static PyObject* THCPEvent_wait(PyObject* _self, PyObject* _stream) { |
131 | HANDLE_TH_ERRORS { |
132 | auto self = (THCPEvent*)_self; |
133 | auto stream = (THCPStream*)_stream; |
134 | pybind11::gil_scoped_release no_gil{}; |
135 | self->cuda_event.block(stream->cuda_stream); |
136 | } |
137 | Py_RETURN_NONE; |
138 | END_HANDLE_TH_ERRORS |
139 | } |
140 | |
141 | static PyObject* THCPEvent_query(PyObject* _self, PyObject* noargs) { |
142 | HANDLE_TH_ERRORS |
143 | auto self = (THCPEvent*)_self; |
144 | return PyBool_FromLong(self->cuda_event.query()); |
145 | END_HANDLE_TH_ERRORS |
146 | } |
147 | |
148 | static PyObject* THCPEvent_elapsed_time(PyObject* _self, PyObject* _other) { |
149 | HANDLE_TH_ERRORS |
150 | auto self = (THCPEvent*)_self; |
151 | auto other = (THCPEvent*)_other; |
152 | return PyFloat_FromDouble(self->cuda_event.elapsed_time(other->cuda_event)); |
153 | END_HANDLE_TH_ERRORS |
154 | } |
155 | |
156 | static PyObject* THCPEvent_synchronize(PyObject* _self, PyObject* noargs) { |
157 | HANDLE_TH_ERRORS { |
158 | auto self = (THCPEvent*)_self; |
159 | pybind11::gil_scoped_release no_gil{}; |
160 | self->cuda_event.synchronize(); |
161 | } |
162 | Py_RETURN_NONE; |
163 | END_HANDLE_TH_ERRORS |
164 | } |
165 | |
166 | static PyObject* THCPEvent_ipc_handle(PyObject* _self, PyObject* noargs) { |
167 | HANDLE_TH_ERRORS |
168 | auto self = (THCPEvent*)_self; |
169 | // NOLINTNEXTLINE(cppcoreguidelines-init-variables) |
170 | cudaIpcEventHandle_t handle; |
171 | self->cuda_event.ipc_handle(&handle); |
172 | return PyBytes_FromStringAndSize((const char*)&handle, sizeof(handle)); |
173 | END_HANDLE_TH_ERRORS |
174 | } |
175 | |
176 | // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays, |
177 | // cppcoreguidelines-avoid-non-const-global-variables, modernize-avoid-c-arrays) |
178 | static struct PyGetSetDef THCPEvent_properties[] = { |
179 | {"device" , (getter)THCPEvent_get_device, nullptr, nullptr, nullptr}, |
180 | {"cuda_event" , (getter)THCPEvent_get_cuda_event, nullptr, nullptr, nullptr}, |
181 | {nullptr}}; |
182 | |
183 | // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays, |
184 | // cppcoreguidelines-avoid-non-const-global-variables, modernize-avoid-c-arrays) |
185 | static PyMethodDef THCPEvent_methods[] = { |
186 | {(char*)"from_ipc_handle" , |
187 | castPyCFunctionWithKeywords(THCPEvent_from_ipc_handle), |
188 | METH_CLASS | METH_VARARGS | METH_KEYWORDS, |
189 | nullptr}, |
190 | {(char*)"record" , THCPEvent_record, METH_O, nullptr}, |
191 | {(char*)"wait" , THCPEvent_wait, METH_O, nullptr}, |
192 | {(char*)"query" , THCPEvent_query, METH_NOARGS, nullptr}, |
193 | {(char*)"elapsed_time" , THCPEvent_elapsed_time, METH_O, nullptr}, |
194 | {(char*)"synchronize" , THCPEvent_synchronize, METH_NOARGS, nullptr}, |
195 | {(char*)"ipc_handle" , THCPEvent_ipc_handle, METH_NOARGS, nullptr}, |
196 | {nullptr}}; |
197 | |
198 | PyTypeObject THCPEventType = { |
199 | PyVarObject_HEAD_INIT(nullptr, 0) "torch._C._CudaEventBase" , /* tp_name */ |
200 | sizeof(THCPEvent), /* tp_basicsize */ |
201 | 0, /* tp_itemsize */ |
202 | (destructor)THCPEvent_dealloc, /* tp_dealloc */ |
203 | 0, /* tp_vectorcall_offset */ |
204 | nullptr, /* tp_getattr */ |
205 | nullptr, /* tp_setattr */ |
206 | nullptr, /* tp_reserved */ |
207 | nullptr, /* tp_repr */ |
208 | nullptr, /* tp_as_number */ |
209 | nullptr, /* tp_as_sequence */ |
210 | nullptr, /* tp_as_mapping */ |
211 | nullptr, /* tp_hash */ |
212 | nullptr, /* tp_call */ |
213 | nullptr, /* tp_str */ |
214 | nullptr, /* tp_getattro */ |
215 | nullptr, /* tp_setattro */ |
216 | nullptr, /* tp_as_buffer */ |
217 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
218 | nullptr, /* tp_doc */ |
219 | nullptr, /* tp_traverse */ |
220 | nullptr, /* tp_clear */ |
221 | nullptr, /* tp_richcompare */ |
222 | 0, /* tp_weaklistoffset */ |
223 | nullptr, /* tp_iter */ |
224 | nullptr, /* tp_iternext */ |
225 | THCPEvent_methods, /* tp_methods */ |
226 | nullptr, /* tp_members */ |
227 | THCPEvent_properties, /* tp_getset */ |
228 | nullptr, /* tp_base */ |
229 | nullptr, /* tp_dict */ |
230 | nullptr, /* tp_descr_get */ |
231 | nullptr, /* tp_descr_set */ |
232 | 0, /* tp_dictoffset */ |
233 | nullptr, /* tp_init */ |
234 | nullptr, /* tp_alloc */ |
235 | THCPEvent_pynew, /* tp_new */ |
236 | }; |
237 | |
238 | void THCPEvent_init(PyObject* module) { |
239 | THCPEventClass = (PyObject*)&THCPEventType; |
240 | if (PyType_Ready(&THCPEventType) < 0) { |
241 | throw python_error(); |
242 | } |
243 | Py_INCREF(&THCPEventType); |
244 | if (PyModule_AddObject(module, "_CudaEventBase" , (PyObject*)&THCPEventType) < |
245 | 0) { |
246 | throw python_error(); |
247 | } |
248 | } |
249 | |