1 | #pragma once |
2 | |
3 | #include <array> |
4 | #include <cstring> |
5 | #include <vector> |
6 | |
7 | namespace torch { |
8 | namespace profiler { |
9 | |
10 | /* A vector type to hold a list of performance counters */ |
11 | using perf_counters_t = std::vector<uint64_t>; |
12 | |
13 | /* Standard list of performance events independent of hardware or backend */ |
14 | constexpr std::array<const char*, 2> ProfilerPerfEvents = { |
15 | /* |
16 | * Number of Processing Elelement (PE) cycles between two points of interest |
17 | * in time. This should correlate positively with wall-time. Measured in |
18 | * uint64_t. PE can be non cpu. TBD reporting behavior for multiple PEs |
19 | * participating (i.e. threadpool). |
20 | */ |
21 | "cycles" , |
22 | |
23 | /* Number of PE instructions between two points of interest in time. This |
24 | * should correlate positively with wall time and the amount of computation |
25 | * (i.e. work). Across repeat executions, the number of instructions should |
26 | * be more or less invariant. Measured in uint64_t. PE can be non cpu. |
27 | */ |
28 | "instructions" }; |
29 | } // namespace profiler |
30 | } // namespace torch |
31 | |