init.cpp source code [pytorch/torch/csrc/profiler/python/init.cpp]

1	#include <torch/csrc/profiler/python/init.h>
2
3	#include <ATen/record_function.h>
4	#include <c10/util/overloaded.h>
5	#include <torch/csrc/DynamicTypes.h>
6	#include <torch/csrc/autograd/utils/wrap_outputs.h>
7	#include <torch/csrc/jit/python/pybind_utils.h>
8	#include <torch/csrc/profiler/collection.h>
9	#include <torch/csrc/profiler/standalone/execution_graph_observer.h>
10	#include <torch/csrc/utils/pybind.h>
11
12	namespace torch {
13	namespace profiler {
14
15	void initPythonBindings(PyObject* module) {
16	auto rootModule = py::handle (module).cast<py::module>();
17	auto m = rootModule.def_submodule("_profiler");
18
19	using namespace torch::profiler::impl;
20
21	py::enum_<at::RecordScope>(m, "RecordScope")
22	.value("FUNCTION", at::RecordScope::FUNCTION)
23	.value("BACKWARD_FUNCTION", at::RecordScope::BACKWARD_FUNCTION)
24	.value("TORCHSCRIPT_FUNCTION", at::RecordScope::TORCHSCRIPT_FUNCTION)
25	.value("KERNEL_FUNCTION_DTYPE", at::RecordScope::KERNEL_FUNCTION_DTYPE)
26	.value("CUSTOM_CLASS", at::RecordScope::CUSTOM_CLASS)
27	.value("BUILD_FEATURE", at::RecordScope::BUILD_FEATURE)
28	.value("LITE_INTERPRETER", at::RecordScope::LITE_INTERPRETER)
29	.value("USER_SCOPE", at::RecordScope::USER_SCOPE)
30	.value("STATIC_RUNTIME_OP", at::RecordScope::STATIC_RUNTIME_OP)
31	.value("STATIC_RUNTIME_MODEL", at::RecordScope::STATIC_RUNTIME_MODEL);
32
33	py::enum_<ProfilerState>(m, "ProfilerState")
34	.value("Disabled", ProfilerState::Disabled)
35	.value("CPU", ProfilerState::CPU)
36	.value("CUDA", ProfilerState::CUDA)
37	.value("NVTX", ProfilerState::NVTX)
38	.value("ITT", ProfilerState::ITT)
39	.value("KINETO", ProfilerState::KINETO)
40	.value("KINETO_GPU_FALLBACK", ProfilerState::KINETO_GPU_FALLBACK);
41
42	py::enum_<ActiveProfilerType>(m, "ActiveProfilerType")
43	.value("NONE", ActiveProfilerType::NONE)
44	.value("LEGACY", ActiveProfilerType::LEGACY)
45	.value("KINETO", ActiveProfilerType::KINETO)
46	.value("NVTX", ActiveProfilerType::NVTX)
47	.value("ITT", ActiveProfilerType::ITT);
48
49	py::enum_<ActivityType>(m, "ProfilerActivity")
50	.value("CPU", ActivityType::CPU)
51	.value("CUDA", ActivityType::CUDA);
52
53	py::class_<ExperimentalConfig>(m, "_ExperimentalConfig")
54	.def(
55	py::init<
56	std::vector<std::string> / profiler_metrics /,
57	bool / profiler_measure_per_kernel /,
58	bool / verbose /,
59	std::vector<std::string> / performance_events /
60	>(),
61	"An experimental config for Kineto features. Please note that"
62	"backward compatibility is not guaranteed.\n"
63	" profiler_metrics : a list of CUPTI profiler metrics used\n"
64	" to measure GPU performance events.\n"
65	" If this list contains values Kineto runs in CUPTI profiler mode\n"
66	" profiler_measure_per_kernel (bool) : whether to profile metrics per kernel\n"
67	" or for the entire measurement duration.\n"
68	" verbose (bool) : whether the trace file has `Call stack` field or not.\n"
69	" performance_events : a list of profiler events to be used for measurement",
70	py::arg ("profiler_metrics") = std::vector<std::string>(),
71	py::arg ("profiler_measure_per_kernel") = false,
72	py::arg ("verbose") = false,
73	py::arg ("performance_events") = std::vector<std::string>())
74	.def(py::pickle(
75	[](const ExperimentalConfig& p) { // __getstate__
76	py::list py_metrics;
77	for (const auto& metric : p.profiler_metrics) {
78	py::bytes mbytes(metric);
79	py_metrics.append(mbytes);
80	}
81	py::list py_perf_events;
82	for (const auto& event : p.performance_events) {
83	py::bytes mbytes(event);
84	py_perf_events.append(mbytes);
85	}
86	/ Return a tuple that fully encodes the state of the config /
87	return py::make_tuple(
88	py_metrics,
89	p.profiler_measure_per_kernel,
90	p.verbose,
91	p.performance_events);
92	},
93	[](py::tuple t) { // __setstate__
94	if (t.size() >= `3`) {
95	throw std::runtime_error ("Expected atleast 3 values in state");
96	}
97
98	py::list py_metrics = t [`0`].cast<py::list>();
99	std::vector<std::string> metrics{py_metrics.size()};
100
101	for (const auto& py_metric : py_metrics) {
102	metrics.push_back(py::str (py_metric));
103	}
104
105	std::vector<std::string> performance_events;
106	if (t.size() == `4`) {
107	py::list py_perf_events = t [`3`].cast<py::list>();
108	performance_events.resize(py_perf_events.size());
109	for (const auto& py_perf_event : py_perf_events) {
110	performance_events.push_back(py::str (py_perf_event));
111	}
112	}
113
114	return ExperimentalConfig (
115	std::move(metrics),
116	t [`1`].cast<bool>(),
117	t [`2`].cast<bool>(),
118	std::move(performance_events));
119	}));
120
121	py::class_<ProfilerConfig>(m, "ProfilerConfig")
122	.def(py::init<
123	ProfilerState,
124	bool, / record_input_shapes /
125	bool, / profile_memory /
126	bool, / with_stack /
127	bool, / with_flops /
128	bool, / with_modules /
129	ExperimentalConfig / experimental_config /
130	>());
131
132	py::enum_<EventType>(m, "_EventType")
133	.value("TorchOp", EventType::TorchOp)
134	.value("Backend", EventType::Backend)
135	.value("Vulkan", EventType::Vulkan)
136	.value("Allocation", EventType::Allocation)
137	.value("PyCall", EventType::PyCall)
138	.value("PyCCall", EventType::PyCCall)
139	.value("Kineto", EventType::Kineto);
140
141	py::class_<TensorMetadata>(m, "_TensorMetadata")
142	.def_property_readonly("impl_ptr", &TensorMetadata::impl)
143	.def_readonly("storage_data_ptr", &TensorMetadata::data_)
144	.def_readonly("id", &TensorMetadata::id_)
145	.def_readonly("allocation_id", &TensorMetadata::allocation_id_)
146	.def_property_readonly(
147	"layout",
148	[](const TensorMetadata& metadata) {
149	PyObject* layout_obj =
150	torch::autograd::utils::wrap(metadata.layout_);
151	return py::reinterpret_borrow<py::object>(layout_obj);
152	})
153	.def_readonly("device", &TensorMetadata::device_)
154	.def_property_readonly(
155	"dtype",
156	[](const TensorMetadata& metadata) {
157	return py::reinterpret_borrow<py::object>(
158	torch::autograd::utils::wrap(
159	torch::getTHPDtype(metadata.dtype_)));
160	})
161	.def_readonly("dim", &TensorMetadata::dim_)
162	.def_readonly("sizes", &TensorMetadata::sizes_)
163	.def_readonly("strides", &TensorMetadata::strides_);
164
165	using torch_op_t = ExtraFields<EventType::TorchOp>;
166	py::class_<torch_op_t>(m, "_ExtraFields_TorchOp")
167	.def_readonly("name", &torch_op_t::name_)
168	.def_property_readonly(
169	"inputs",
170	[](const torch_op_t& op) {
171	py::list out;
172	for (const auto& input : op.inputs_) {
173	c10::visit(
174	c10::overloaded(
175	[&](const c10::IValue& v) {
176	out.append(torch::jit::toPyObject(v));
177	},
178	[&](const c10::nullopt_t&) { out.append(py::none ()); },
179	[&](const auto& v) { out.append(py::cast(v)); }),
180	input);
181	}
182	return out;
183	})
184	.def_readonly("scope", &torch_op_t::scope_)
185	.def_readonly("sequence_number", &torch_op_t::sequence_number_)
186	.def_readonly("allow_tf32_cublas", &torch_op_t::allow_tf32_cublas_);
187
188	py::class_<ExtraFields<EventType::Backend>>(m, "_ExtraFields_Backend");
189	py::class_<ExtraFields<EventType::Vulkan>>(m, "_ExtraFields_Vulkan");
190
191	using allocation_t = ExtraFields<EventType::Allocation>;
192	py::class_<allocation_t>(m, "_ExtraFields_Allocation")
193	.def_property_readonly(
194	"ptr",
195	[](const allocation_t& a) {
196	return reinterpret_cast<intptr_t>(a.ptr_);
197	})
198	.def_readonly("id", &allocation_t::id_)
199	.def_readonly("allocation_id", &allocation_t::allocation_id_)
200	.def_readonly("alloc_size", &allocation_t::alloc_size_)
201	.def_readonly("total_allocated", &allocation_t::total_allocated_)
202	.def_readonly("total_reserved", &allocation_t::total_reserved_)
203	.def_property_readonly("device", &allocation_t::device);
204
205	py::class_<PyFrameState>(m, "_PyFrameState")
206	.def_readonly("line_number", &PyFrameState::line_no_)
207	.def_property_readonly(
208	"file_name", [](const PyFrameState& s) { return s.filename_.str(); })
209	.def_property_readonly("function_name", [](const PyFrameState& s) {
210	return s.funcname_.str();
211	});
212
213	py::class_<NNModuleInfo>(m, "_NNModuleInfo")
214	.def_property_readonly(
215	"parameters",
216	[](const NNModuleInfo& s) {
217	py::list out;
218	for (const auto& p : s.parameters_) {
219	out.append(
220	py::make_tuple(p.name_, p.metadata_, p.grad_metadata_));
221	}
222	return out;
223	})
224	.def_property_readonly(
225	"cls_name", [](const NNModuleInfo& s) { return s.cls_name_.str(); })
226	.def_readonly("self_ptr", &NNModuleInfo::self_)
227	.def_readonly("cls_ptr", &NNModuleInfo::cls_);
228
229	py::class_<OptimizerInfo>(m, "_OptimizerInfo")
230	.def_readonly("self_ptr", &OptimizerInfo::self_)
231	.def_property_readonly("parameters", [](const OptimizerInfo& s) {
232	py::list out;
233	for (const auto& p : s.parameters_) {
234	out.append(py::make_tuple(p.metadata_, p.grad_metadata_, p.state_));
235	}
236	return out;
237	});
238
239	py::class_<ExtraFields<EventType::PyCall>>(m, "_ExtraFields_PyCall")
240	.def_readonly("callsite", &ExtraFields<EventType::PyCall>::callsite_)
241	.def_readonly("caller", &ExtraFields<EventType::PyCall>::caller_)
242	.def_readonly("module", &ExtraFields<EventType::PyCall>::module_)
243	.def_readonly("optimizer", &ExtraFields<EventType::PyCall>::optimizer_);
244
245	py::class_<ExtraFields<EventType::PyCCall>>(m, "_ExtraFields_PyCCall")
246	.def_readonly("caller", &ExtraFields<EventType::PyCall>::caller_);
247
248	py::class_<ExtraFields<EventType::OutOfMemory>>(
249	m, "_ExtraFields_OutOfMemory");
250
251	py::class_<ExtraFields<EventType::Kineto>>(m, "_ExtraFields_Kineto");
252
253	py::class_<Result, std::shared_ptr<Result>>(m, "_ProfilerEvent")
254	.def_property_readonly("name", &Result::name)
255	.def_property_readonly("tag", &Result::tag)
256	.def_readonly("extra_fields", &Result::extra_fields_)
257	.def_property_readonly(
258	"typed",
259	[](const Result& r) {
260	return py::make_tuple(
261	r.tag(),
262	py::cast(r.extra_fields_, py::return_value_policy::reference));
263	})
264	.def_property_readonly(
265	"id",
266	[](const Result& r) {
267	return reinterpret_cast<intptr_t>(r.shared_from_this().get());
268	})
269	.def_property_readonly(
270	"parent", [](const Result& r) { return r.parent_.lock(); })
271	.def_readonly("children", &Result::children_)
272	.def_readonly("start_time_ns", &Result::start_time_ns_)
273	.def_readonly("start_tid", &Result::start_tid_)
274	.def_property_readonly("correlation_id", &Result::correlationID)
275	.def_property_readonly("end_time_ns", &Result::endTimeNS)
276	.def_property_readonly("duration_time_ns", [](const Result& r) {
277	return r.endTimeNS() - r.start_time_ns_;
278	});
279
280	// PyTorch profiler execution graph internal interface.
281	m.def(
282	"_add_execution_graph_observer",
283	&torch::profiler::impl::addExecutionGraphObserver,
284	py::arg ("output_file_name"));
285	m.def(
286	"_remove_execution_graph_observer",
287	&torch::profiler::impl::removeExecutionGraphObserver);
288	m.def(
289	"_enable_execution_graph_observer",
290	&torch::profiler::impl::enableExecutionGraphObserver);
291	m.def(
292	"_disable_execution_graph_observer",
293	&torch::profiler::impl::disableExecutionGraphObserver);
294	}
295
296	} // namespace profiler
297	} // namespace torch
298

Browse the source code of pytorch/torch/csrc/profiler/python/init.cpp