1#pragma once
2
3#include <array>
4#include <cstring>
5#include <vector>
6
7namespace torch {
8namespace profiler {
9
10/* A vector type to hold a list of performance counters */
11using perf_counters_t = std::vector<uint64_t>;
12
13/* Standard list of performance events independent of hardware or backend */
14constexpr std::array<const char*, 2> ProfilerPerfEvents = {
15 /*
16 * Number of Processing Elelement (PE) cycles between two points of interest
17 * in time. This should correlate positively with wall-time. Measured in
18 * uint64_t. PE can be non cpu. TBD reporting behavior for multiple PEs
19 * participating (i.e. threadpool).
20 */
21 "cycles",
22
23 /* Number of PE instructions between two points of interest in time. This
24 * should correlate positively with wall time and the amount of computation
25 * (i.e. work). Across repeat executions, the number of instructions should
26 * be more or less invariant. Measured in uint64_t. PE can be non cpu.
27 */
28 "instructions"};
29} // namespace profiler
30} // namespace torch
31