1 | #pragma once |
2 | |
3 | #include <array> |
4 | #include <cstdint> |
5 | #include <memory> |
6 | #include <stack> |
7 | #include <string> |
8 | #include <unordered_map> |
9 | #include <utility> |
10 | #include <vector> |
11 | |
12 | #include <torch/csrc/profiler/events.h> |
13 | |
14 | #include <c10/util/Exception.h> |
15 | |
16 | namespace torch { |
17 | namespace profiler { |
18 | namespace impl { |
19 | namespace linux_perf { |
20 | |
21 | /* |
22 | * Maximum number of events supported |
23 | * This stems from the hardware limitation on CPU performance counters, and the |
24 | * fact that we don't support time multiplexing just yet. |
25 | * Time multiplexing involves scaling the counter values proportional to |
26 | * the enabled and running time or running the workload multiple times. |
27 | */ |
28 | constexpr uint8_t MAX_EVENTS = 4; |
29 | |
30 | struct PerfCounter { |
31 | uint64_t value; /* The value of the event */ |
32 | uint64_t time_enabled; /* for TIME_ENABLED */ |
33 | uint64_t time_running; /* for TIME_RUNNING */ |
34 | }; |
35 | |
36 | /* |
37 | * Basic perf event handler for Android and Linux |
38 | */ |
39 | class PerfEvent { |
40 | public: |
41 | explicit PerfEvent(std::string& name) : name_(name) {} |
42 | |
43 | PerfEvent& operator=(PerfEvent&& other) noexcept { |
44 | if (this != &other) { |
45 | fd_ = other.fd_; |
46 | other.fd_ = -1; |
47 | name_ = std::move(other.name_); |
48 | } |
49 | return *this; |
50 | } |
51 | |
52 | PerfEvent(PerfEvent&& other) noexcept { |
53 | *this = std::move(other); |
54 | } |
55 | |
56 | ~PerfEvent(); |
57 | |
58 | /* Setup perf events with the Linux Kernel, attaches perf to this process |
59 | * using perf_event_open(2) */ |
60 | void Init(); |
61 | |
62 | /* Stop incrementing hardware counters for this event */ |
63 | void Disable() const; |
64 | |
65 | /* Start counting hardware event from this point on */ |
66 | void Enable() const; |
67 | |
68 | /* Zero out the counts for this event */ |
69 | void Reset() const; |
70 | |
71 | /* Returns PerfCounter values for this event from kernel, on non supported |
72 | * platforms this always returns zero */ |
73 | uint64_t ReadCounter() const; |
74 | |
75 | private: |
76 | /* Name of the event */ |
77 | std::string name_; |
78 | |
79 | int fd_ = -1; |
80 | }; |
81 | |
82 | class PerfProfiler { |
83 | public: |
84 | /* Configure all the events and track them as individual PerfEvent */ |
85 | void Configure(std::vector<std::string>& event_names); |
86 | |
87 | /* Enable events counting from here */ |
88 | void Enable(); |
89 | |
90 | /* Disable counting and fill in the caller supplied container with delta |
91 | * calculated from the start count values since last Enable() */ |
92 | void Disable(perf_counters_t&); |
93 | |
94 | private: |
95 | uint64_t CalcDelta(uint64_t start, uint64_t end) const; |
96 | void StartCounting() const; |
97 | void StopCounting() const; |
98 | |
99 | std::vector<PerfEvent> events_; |
100 | std::stack<perf_counters_t> start_values_; |
101 | }; |
102 | } // namespace linux_perf |
103 | } // namespace impl |
104 | } // namespace profiler |
105 | } // namespace torch |
106 | |