1#pragma once
2
3#include <array>
4#include <cstdint>
5#include <memory>
6#include <stack>
7#include <string>
8#include <unordered_map>
9#include <utility>
10#include <vector>
11
12#include <torch/csrc/profiler/events.h>
13
14#include <c10/util/Exception.h>
15
16namespace torch {
17namespace profiler {
18namespace impl {
19namespace linux_perf {
20
21/*
22 * Maximum number of events supported
23 * This stems from the hardware limitation on CPU performance counters, and the
24 * fact that we don't support time multiplexing just yet.
25 * Time multiplexing involves scaling the counter values proportional to
26 * the enabled and running time or running the workload multiple times.
27 */
28constexpr uint8_t MAX_EVENTS = 4;
29
30struct PerfCounter {
31 uint64_t value; /* The value of the event */
32 uint64_t time_enabled; /* for TIME_ENABLED */
33 uint64_t time_running; /* for TIME_RUNNING */
34};
35
36/*
37 * Basic perf event handler for Android and Linux
38 */
39class PerfEvent {
40 public:
41 explicit PerfEvent(std::string& name) : name_(name) {}
42
43 PerfEvent& operator=(PerfEvent&& other) noexcept {
44 if (this != &other) {
45 fd_ = other.fd_;
46 other.fd_ = -1;
47 name_ = std::move(other.name_);
48 }
49 return *this;
50 }
51
52 PerfEvent(PerfEvent&& other) noexcept {
53 *this = std::move(other);
54 }
55
56 ~PerfEvent();
57
58 /* Setup perf events with the Linux Kernel, attaches perf to this process
59 * using perf_event_open(2) */
60 void Init();
61
62 /* Stop incrementing hardware counters for this event */
63 void Disable() const;
64
65 /* Start counting hardware event from this point on */
66 void Enable() const;
67
68 /* Zero out the counts for this event */
69 void Reset() const;
70
71 /* Returns PerfCounter values for this event from kernel, on non supported
72 * platforms this always returns zero */
73 uint64_t ReadCounter() const;
74
75 private:
76 /* Name of the event */
77 std::string name_;
78
79 int fd_ = -1;
80};
81
82class PerfProfiler {
83 public:
84 /* Configure all the events and track them as individual PerfEvent */
85 void Configure(std::vector<std::string>& event_names);
86
87 /* Enable events counting from here */
88 void Enable();
89
90 /* Disable counting and fill in the caller supplied container with delta
91 * calculated from the start count values since last Enable() */
92 void Disable(perf_counters_t&);
93
94 private:
95 uint64_t CalcDelta(uint64_t start, uint64_t end) const;
96 void StartCounting() const;
97 void StopCounting() const;
98
99 std::vector<PerfEvent> events_;
100 std::stack<perf_counters_t> start_values_;
101};
102} // namespace linux_perf
103} // namespace impl
104} // namespace profiler
105} // namespace torch
106