1#include <pybind11/pybind11.h>
2#include <torch/csrc/Device.h>
3#include <torch/csrc/THP.h>
4#include <torch/csrc/cuda/Event.h>
5#include <torch/csrc/cuda/Module.h>
6#include <torch/csrc/cuda/Stream.h>
7#include <torch/csrc/utils/pybind.h>
8#include <torch/csrc/utils/pycfunction_helpers.h>
9#include <torch/csrc/utils/python_arg_parser.h>
10
11#include <c10/cuda/CUDAGuard.h>
12
13#include <cuda_runtime_api.h>
14#include <structmember.h>
15
16PyObject* THCPEventClass = nullptr;
17
18static PyObject* THCPEvent_pynew(
19 PyTypeObject* type,
20 PyObject* args,
21 PyObject* kwargs) {
22 HANDLE_TH_ERRORS
23 unsigned char enable_timing = 0;
24 unsigned char blocking = 0;
25 unsigned char interprocess = 0;
26
27 // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
28 constexpr char* kwlist[] = {
29 "enable_timing", "blocking", "interprocess", nullptr};
30 if (!PyArg_ParseTupleAndKeywords(
31 args,
32 kwargs,
33 "|bbb",
34 const_cast<char**>(kwlist),
35 &enable_timing,
36 &blocking,
37 &interprocess)) {
38 return nullptr;
39 }
40
41 THPObjectPtr ptr(type->tp_alloc(type, 0));
42 if (!ptr) {
43 return nullptr;
44 }
45
46 THCPEvent* self = (THCPEvent*)ptr.get();
47 unsigned int flags = (blocking ? cudaEventBlockingSync : cudaEventDefault) |
48 (enable_timing ? cudaEventDefault : cudaEventDisableTiming) |
49 (interprocess ? cudaEventInterprocess : cudaEventDefault);
50
51 new (&self->cuda_event) at::cuda::CUDAEvent(flags);
52
53 return (PyObject*)ptr.release();
54 END_HANDLE_TH_ERRORS
55}
56
57static PyObject* THCPEvent_from_ipc_handle(
58 PyObject* _type,
59 PyObject* args,
60 PyObject* kwargs) {
61 HANDLE_TH_ERRORS
62 auto type = (PyTypeObject*)_type;
63
64 static torch::PythonArgParser parser({
65 "from_ipc_handle(Device device, std::string ipc_handle)",
66 });
67 torch::ParsedArgs<2> parsed_args;
68 auto r = parser.parse(args, kwargs, parsed_args);
69
70 at::Device device = r.device(0);
71 std::string handle_string = r.string(1);
72
73 TORCH_CHECK(
74 handle_string.size() == sizeof(cudaIpcEventHandle_t),
75 "cudaIpcEventHandle_t expects byte-like object of size ",
76 sizeof(cudaIpcEventHandle_t),
77 ", but got ",
78 handle_string.size());
79 TORCH_CHECK(
80 device.type() == at::kCUDA,
81 "Event can only be created on "
82 "CUDA devices, but got device type ",
83 device.type())
84
85 THPObjectPtr ptr(type->tp_alloc(type, 0));
86 if (!ptr) {
87 return nullptr;
88 }
89 THCPEvent* self = (THCPEvent*)ptr.get();
90
91 // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
92 cudaIpcEventHandle_t handle;
93 std::memcpy(&handle, handle_string.c_str(), handle_string.size());
94 new (&self->cuda_event) at::cuda::CUDAEvent(device.index(), &handle);
95
96 return (PyObject*)ptr.release();
97 END_HANDLE_TH_ERRORS
98}
99
100static void THCPEvent_dealloc(THCPEvent* self) {
101 self->cuda_event.~CUDAEvent();
102 Py_TYPE(self)->tp_free((PyObject*)self);
103}
104
105static PyObject* THCPEvent_get_cuda_event(THCPEvent* self, void* unused) {
106 HANDLE_TH_ERRORS
107 return PyLong_FromVoidPtr(self->cuda_event.event());
108 END_HANDLE_TH_ERRORS
109}
110
111static PyObject* THCPEvent_get_device(THCPEvent* self, void* unused) {
112 HANDLE_TH_ERRORS
113 at::optional<at::Device> device = self->cuda_event.device();
114 if (!device) {
115 Py_RETURN_NONE;
116 }
117 return THPDevice_New(device.value());
118 END_HANDLE_TH_ERRORS
119}
120
121static PyObject* THCPEvent_record(PyObject* _self, PyObject* _stream) {
122 HANDLE_TH_ERRORS
123 auto self = (THCPEvent*)_self;
124 auto stream = (THCPStream*)_stream;
125 self->cuda_event.record(stream->cuda_stream);
126 Py_RETURN_NONE;
127 END_HANDLE_TH_ERRORS
128}
129
130static PyObject* THCPEvent_wait(PyObject* _self, PyObject* _stream) {
131 HANDLE_TH_ERRORS {
132 auto self = (THCPEvent*)_self;
133 auto stream = (THCPStream*)_stream;
134 pybind11::gil_scoped_release no_gil{};
135 self->cuda_event.block(stream->cuda_stream);
136 }
137 Py_RETURN_NONE;
138 END_HANDLE_TH_ERRORS
139}
140
141static PyObject* THCPEvent_query(PyObject* _self, PyObject* noargs) {
142 HANDLE_TH_ERRORS
143 auto self = (THCPEvent*)_self;
144 return PyBool_FromLong(self->cuda_event.query());
145 END_HANDLE_TH_ERRORS
146}
147
148static PyObject* THCPEvent_elapsed_time(PyObject* _self, PyObject* _other) {
149 HANDLE_TH_ERRORS
150 auto self = (THCPEvent*)_self;
151 auto other = (THCPEvent*)_other;
152 return PyFloat_FromDouble(self->cuda_event.elapsed_time(other->cuda_event));
153 END_HANDLE_TH_ERRORS
154}
155
156static PyObject* THCPEvent_synchronize(PyObject* _self, PyObject* noargs) {
157 HANDLE_TH_ERRORS {
158 auto self = (THCPEvent*)_self;
159 pybind11::gil_scoped_release no_gil{};
160 self->cuda_event.synchronize();
161 }
162 Py_RETURN_NONE;
163 END_HANDLE_TH_ERRORS
164}
165
166static PyObject* THCPEvent_ipc_handle(PyObject* _self, PyObject* noargs) {
167 HANDLE_TH_ERRORS
168 auto self = (THCPEvent*)_self;
169 // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
170 cudaIpcEventHandle_t handle;
171 self->cuda_event.ipc_handle(&handle);
172 return PyBytes_FromStringAndSize((const char*)&handle, sizeof(handle));
173 END_HANDLE_TH_ERRORS
174}
175
176// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,
177// cppcoreguidelines-avoid-non-const-global-variables, modernize-avoid-c-arrays)
178static struct PyGetSetDef THCPEvent_properties[] = {
179 {"device", (getter)THCPEvent_get_device, nullptr, nullptr, nullptr},
180 {"cuda_event", (getter)THCPEvent_get_cuda_event, nullptr, nullptr, nullptr},
181 {nullptr}};
182
183// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,
184// cppcoreguidelines-avoid-non-const-global-variables, modernize-avoid-c-arrays)
185static PyMethodDef THCPEvent_methods[] = {
186 {(char*)"from_ipc_handle",
187 castPyCFunctionWithKeywords(THCPEvent_from_ipc_handle),
188 METH_CLASS | METH_VARARGS | METH_KEYWORDS,
189 nullptr},
190 {(char*)"record", THCPEvent_record, METH_O, nullptr},
191 {(char*)"wait", THCPEvent_wait, METH_O, nullptr},
192 {(char*)"query", THCPEvent_query, METH_NOARGS, nullptr},
193 {(char*)"elapsed_time", THCPEvent_elapsed_time, METH_O, nullptr},
194 {(char*)"synchronize", THCPEvent_synchronize, METH_NOARGS, nullptr},
195 {(char*)"ipc_handle", THCPEvent_ipc_handle, METH_NOARGS, nullptr},
196 {nullptr}};
197
198PyTypeObject THCPEventType = {
199 PyVarObject_HEAD_INIT(nullptr, 0) "torch._C._CudaEventBase", /* tp_name */
200 sizeof(THCPEvent), /* tp_basicsize */
201 0, /* tp_itemsize */
202 (destructor)THCPEvent_dealloc, /* tp_dealloc */
203 0, /* tp_vectorcall_offset */
204 nullptr, /* tp_getattr */
205 nullptr, /* tp_setattr */
206 nullptr, /* tp_reserved */
207 nullptr, /* tp_repr */
208 nullptr, /* tp_as_number */
209 nullptr, /* tp_as_sequence */
210 nullptr, /* tp_as_mapping */
211 nullptr, /* tp_hash */
212 nullptr, /* tp_call */
213 nullptr, /* tp_str */
214 nullptr, /* tp_getattro */
215 nullptr, /* tp_setattro */
216 nullptr, /* tp_as_buffer */
217 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
218 nullptr, /* tp_doc */
219 nullptr, /* tp_traverse */
220 nullptr, /* tp_clear */
221 nullptr, /* tp_richcompare */
222 0, /* tp_weaklistoffset */
223 nullptr, /* tp_iter */
224 nullptr, /* tp_iternext */
225 THCPEvent_methods, /* tp_methods */
226 nullptr, /* tp_members */
227 THCPEvent_properties, /* tp_getset */
228 nullptr, /* tp_base */
229 nullptr, /* tp_dict */
230 nullptr, /* tp_descr_get */
231 nullptr, /* tp_descr_set */
232 0, /* tp_dictoffset */
233 nullptr, /* tp_init */
234 nullptr, /* tp_alloc */
235 THCPEvent_pynew, /* tp_new */
236};
237
238void THCPEvent_init(PyObject* module) {
239 THCPEventClass = (PyObject*)&THCPEventType;
240 if (PyType_Ready(&THCPEventType) < 0) {
241 throw python_error();
242 }
243 Py_INCREF(&THCPEventType);
244 if (PyModule_AddObject(module, "_CudaEventBase", (PyObject*)&THCPEventType) <
245 0) {
246 throw python_error();
247 }
248}
249