profiler_kineto.h source code [pytorch/torch/csrc/autograd/profiler_kineto.h]

1	#pragma once
2
3	#include <string>
4	#include <vector>
5
6	#include <torch/csrc/profiler/api.h>
7	#include <torch/csrc/profiler/events.h>
8	#include <torch/csrc/profiler/stubs/base.h>
9	#include <torch/csrc/profiler/util.h>
10
11	namespace torch {
12	namespace profiler {
13	namespace impl {
14	struct Result;
15	namespace kineto {
16	struct ActivityTraceWrapper;
17	} // namespace kineto
18	} // namespace impl
19	} // namespace profiler
20	namespace autograd {
21	namespace profiler {
22	using experimental_event_t = std::shared_ptr<torch::profiler::impl::Result>;
23
24	struct TORCH_API KinetoEvent {
25	KinetoEvent(
26	std::shared_ptr<const torch::profiler::impl::Result>,
27	const bool verbose);
28
29	uint64_t startThreadId() const;
30	uint64_t endThreadId() const;
31	uint8_t activityType() const;
32	uint64_t fwdThreadId() const;
33	bool hasShapes() const;
34	const c10::ArrayRef<std::vector<int64_t>> shapes() const;
35	bool hasTypes() const;
36	const c10::ArrayRef<std::string> dtypes() const;
37	uint64_t flops() const;
38	int64_t sequenceNr() const;
39	bool hasStack() const;
40	const c10::ArrayRef<std::string> stack() const;
41	uint8_t scope() const;
42	bool hasModuleHierarchy() const;
43	const c10::ArrayRef<std::string> moduleHierarchy() const;
44	int64_t debugHandle() const;
45	std::string name() const;
46	c10::DeviceType deviceType() const;
47	uint8_t deviceIndex() const;
48	int64_t nBytes() const;
49	uint64_t startUs() const;
50	uint64_t durationUs() const;
51	bool isAsync() const;
52	uint64_t correlationId() const;
53	uint64_t linkedCorrelationId() const;
54	int64_t deviceResourceId() const;
55	std::string backend() const;
56	bool isPythonFunction() const;
57	int64_t cudaElapsedUs() const;
58	void getPerfEventCounters(torch::profiler::perf_counters_t&) const;
59
60	private:
61	torch::profiler::impl::ProfilerEventStub fallbackStart() const;
62	torch::profiler::impl::ProfilerEventStub fallbackEnd() const;
63
64	std::shared_ptr<const torch::profiler::impl::Result> result_;
65	std::vector<std::string> python_stack_;
66
67	// Copy fields from result so we can return ArrayRefs.
68	std::vector<std::vector<int64_t>> shapes_;
69	std::vector<std::string> dtypes_;
70	};
71
72	// Consolidating events returned directly from Kineto
73	// with events manually created by us (e.g. start/stop marks,
74	// memory allocation events)
75	struct TORCH_API ProfilerResult {
76	ProfilerResult();
77	ProfilerResult(
78	uint64_t start_time,
79	std::vector<KinetoEvent> events,
80	std::unique_ptr<torch::profiler::impl::kineto::ActivityTraceWrapper>&&
81	trace,
82	std::vector<experimental_event_t>&& event_tree);
83	~ProfilerResult();
84
85	uint64_t trace_start_us() const {
86	return trace_start_us_;
87	}
88
89	const std::vector<KinetoEvent>& events() const {
90	return events_;
91	}
92
93	const std::vector<experimental_event_t>& event_tree() const {
94	return event_tree_;
95	}
96
97	void save(const std::string& path);
98
99	private:
100	uint64_t trace_start_us_ = `0`;
101	std::vector<KinetoEvent> events_;
102	std::unique_ptr<torch::profiler::impl::kineto::ActivityTraceWrapper> trace_;
103	std::vector<experimental_event_t> event_tree_;
104	};
105
106	/*
107	* This API is used by backends to record latency of events that
108	* happened in the backend but were not visible to pytorch runtime.
109	* For example, if part of the model is lowered to a dsp backend, then
110	* the execution of that part of the model is delegated to the backend.
111	* When backend finishes execution it has an option to provide profiling
112	* information (latency only at th emoment) corresponding to different operators
113	* that were executed in the backend.
114	* When such events are recorded by backend using this API, the event
115	* records will be collected by active kineto profiler. If no kineto profiler
116	* is active then the event is ignored.
117	* This provides us with a way to generate all the profiling information
118	* for a model regardless of where model (or part of it) executed.
119	* @param start_time_us: start time in us of the event
120	* @param end_time_us: end time in us of the event
121	* @param debug_handle: debug handle to correlate this event/op with
122	* model level module/source information
123	* @param scope: scope of the event, e.g. LITE_INTERPRETER, RECORD_FN etc.
124	* @param event_name: name of the event, e.g. op name
125	* @param backend_name: name of the backend where the event took place.
126	*/
127	TORCH_API void reportBackendEventToActiveKinetoProfiler(
128	const int64_t start_time_us,
129	const int64_t end_time_us,
130	const int64_t debug_handle,
131	const at::RecordScope scope,
132	const std::string& event_name,
133	const std::string& backend_name);
134
135	TORCH_API void enableProfiler(
136	const torch::profiler::impl::ProfilerConfig& config,
137	const std::set<torch::profiler::impl::ActivityType>& activities,
138	const std::unordered_set<at::RecordScope>& scopes = {});
139
140	/*
141	* Same as enableProfiler but with callback to do post-processing of
142	* KinetoEvents.
143	* enableProfilerWithEventPostProcess enables profiler to capture
144	* specified activities, with specified RecordFunction scope, if any.
145	* Additionally, it takes a functor that does in-place post processing of
146	* events, e.g. populate stack trace or module hierarchy information lazily
147	* using debug_handle.
148	* Example usage is with lite interpreter that has recording scope of
149	* LITE_INTERPRETER. In this case lite interpreter runtime, records debug
150	* handles in RecordFunction, along with other information. Debug handles are
151	* eventually passed down to KinetoEvent and recorded as part of the event.
152	* KinetoEdgeCPUProfiler, in torch/csrc/jit/mobile/profiler_edge.cpp, enables
153	* profiler using post-processing callback, via
154	* enableProfilerWithEventPostProcess, that takes these debug handles and
155	* generates stack trace and module hierarchy information, once profiling is
156	* done.
157	*/
158	using post_process_t = std::function<void(
159	/debug_handle / int64_t,
160	/jit_stack / std::vector<std::string>&,
161	/jit_modules / std::vector<std::string>&)>;
162	TORCH_API void enableProfilerWithEventPostProcess(
163	const torch::profiler::impl::ProfilerConfig& config,
164	const std::set<torch::profiler::impl::ActivityType>& activities,
165	post_process_t&& cb,
166	const std::unordered_set<at::RecordScope>& scopes = {});
167
168	TORCH_API std::unique_ptr<ProfilerResult> disableProfiler();
169
170	TORCH_API void prepareProfiler(
171	const torch::profiler::impl::ProfilerConfig& config,
172	const std::set<torch::profiler::impl::ActivityType>& activities);
173
174	} // namespace profiler
175	} // namespace autograd
176
177	namespace profiler {
178	namespace impl {
179
180	// Experimental.
181	TORCH_API void _reportVulkanEventToProfiler(vulkan_id_t id);
182
183	} // namespace impl
184	} // namespace profiler
185
186	} // namespace torch
187

Browse the source code of pytorch/torch/csrc/autograd/profiler_kineto.h