1 | #pragma once |
2 | |
3 | #include <utils.h> |
4 | |
5 | #include <nvToolsExt.h> |
6 | |
7 | // NOLINTNEXTLINE(modernize-deprecated-headers) |
8 | #include <stdio.h> |
9 | #include <chrono> |
10 | #include <cstdio> |
11 | |
12 | namespace torch { |
13 | namespace jit { |
14 | namespace fuser { |
15 | namespace cuda { |
16 | namespace inst { |
17 | |
18 | //! An optional record of selected timestamped operations, events and counters |
19 | //! |
20 | //! This class is not intended to be used directly. Instead, the operations |
21 | //! to be traced are marked (for example using the FUSER_PERF_SCOPE macro) |
22 | //! |
23 | //! In order to enable tracing, the `PYTORCH_NVFUSER_TRACE` environment |
24 | //! variable is set to point to a trace file (ex `test.trace`). The file name |
25 | //! may be a relative or an absolute path. |
26 | //! |
27 | //! The trace uses the Chrome Tracing (Catapult) format, which is a well |
28 | //! documented JSON based format supported by multiple tools: |
29 | //! https://chromium.googlesource.com/catapult/+/HEAD/tracing/README.md |
30 | //! |
31 | //! An easy way to view traces is to type `about://tracing` in Chrome or |
32 | //! Chromium. |
33 | //! |
34 | class TORCH_CUDA_CU_API Trace : public NonCopyable { |
35 | public: |
36 | using Clock = std::chrono::steady_clock; |
37 | |
38 | public: |
39 | static Trace* instance() { |
40 | static Trace trace; |
41 | return &trace; |
42 | } |
43 | |
44 | void beginEvent(const char* name) { |
45 | if (log_file_ != nullptr) { |
46 | logEvent('B', name); |
47 | } |
48 | if (record_nvtx_range_) { |
49 | nvtxRangePushA(name); |
50 | } |
51 | } |
52 | |
53 | void endEvent(const char* name) { |
54 | if (record_nvtx_range_) { |
55 | nvtxRangePop(); |
56 | } |
57 | if (log_file_ != nullptr) { |
58 | logEvent('E', name); |
59 | } |
60 | } |
61 | |
62 | private: |
63 | Trace(); |
64 | ~Trace(); |
65 | |
66 | void logEvent(char ph, const char* name, char sep = ','); |
67 | |
68 | private: |
69 | FILE* log_file_ = nullptr; |
70 | Clock::time_point start_timestamp_; |
71 | bool record_nvtx_range_ = true; |
72 | }; |
73 | |
74 | //! \internal Automatic scope for a perf marker |
75 | //! (normally used through the FUSER_PERF_SCOPE macro) |
76 | class TORCH_CUDA_CU_API TraceScope : public NonCopyable { |
77 | public: |
78 | explicit TraceScope(const char* event_name) : event_name_(event_name) { |
79 | Trace::instance()->beginEvent(event_name_); |
80 | } |
81 | |
82 | ~TraceScope() { |
83 | Trace::instance()->endEvent(event_name_); |
84 | } |
85 | |
86 | private: |
87 | const char* event_name_ = nullptr; |
88 | }; |
89 | |
90 | #define FUSER_MACRO_CONCAT2(a, b) a##b |
91 | #define FUSER_MACRO_CONCAT(a, b) FUSER_MACRO_CONCAT2(a, b) |
92 | #define FUSER_ANONYMOUS(prefix) FUSER_MACRO_CONCAT(prefix, __COUNTER__) |
93 | |
94 | //! Defines a scope we want to measure and record in a perf trace |
95 | //! |
96 | //! \param name The name of the scope, normally a simple string literal |
97 | //! |
98 | #define FUSER_PERF_SCOPE(name) \ |
99 | torch::jit::fuser::cuda::inst::TraceScope FUSER_ANONYMOUS(_perf_scope_)(name) |
100 | |
101 | } // namespace inst |
102 | } // namespace cuda |
103 | } // namespace fuser |
104 | } // namespace jit |
105 | } // namespace torch |
106 | |