1#pragma once
2
3#include <utils.h>
4
5#include <nvToolsExt.h>
6
7// NOLINTNEXTLINE(modernize-deprecated-headers)
8#include <stdio.h>
9#include <chrono>
10#include <cstdio>
11
12namespace torch {
13namespace jit {
14namespace fuser {
15namespace cuda {
16namespace inst {
17
18//! An optional record of selected timestamped operations, events and counters
19//!
20//! This class is not intended to be used directly. Instead, the operations
21//! to be traced are marked (for example using the FUSER_PERF_SCOPE macro)
22//!
23//! In order to enable tracing, the `PYTORCH_NVFUSER_TRACE` environment
24//! variable is set to point to a trace file (ex `test.trace`). The file name
25//! may be a relative or an absolute path.
26//!
27//! The trace uses the Chrome Tracing (Catapult) format, which is a well
28//! documented JSON based format supported by multiple tools:
29//! https://chromium.googlesource.com/catapult/+/HEAD/tracing/README.md
30//!
31//! An easy way to view traces is to type `about://tracing` in Chrome or
32//! Chromium.
33//!
34class TORCH_CUDA_CU_API Trace : public NonCopyable {
35 public:
36 using Clock = std::chrono::steady_clock;
37
38 public:
39 static Trace* instance() {
40 static Trace trace;
41 return &trace;
42 }
43
44 void beginEvent(const char* name) {
45 if (log_file_ != nullptr) {
46 logEvent('B', name);
47 }
48 if (record_nvtx_range_) {
49 nvtxRangePushA(name);
50 }
51 }
52
53 void endEvent(const char* name) {
54 if (record_nvtx_range_) {
55 nvtxRangePop();
56 }
57 if (log_file_ != nullptr) {
58 logEvent('E', name);
59 }
60 }
61
62 private:
63 Trace();
64 ~Trace();
65
66 void logEvent(char ph, const char* name, char sep = ',');
67
68 private:
69 FILE* log_file_ = nullptr;
70 Clock::time_point start_timestamp_;
71 bool record_nvtx_range_ = true;
72};
73
74//! \internal Automatic scope for a perf marker
75//! (normally used through the FUSER_PERF_SCOPE macro)
76class TORCH_CUDA_CU_API TraceScope : public NonCopyable {
77 public:
78 explicit TraceScope(const char* event_name) : event_name_(event_name) {
79 Trace::instance()->beginEvent(event_name_);
80 }
81
82 ~TraceScope() {
83 Trace::instance()->endEvent(event_name_);
84 }
85
86 private:
87 const char* event_name_ = nullptr;
88};
89
90#define FUSER_MACRO_CONCAT2(a, b) a##b
91#define FUSER_MACRO_CONCAT(a, b) FUSER_MACRO_CONCAT2(a, b)
92#define FUSER_ANONYMOUS(prefix) FUSER_MACRO_CONCAT(prefix, __COUNTER__)
93
94//! Defines a scope we want to measure and record in a perf trace
95//!
96//! \param name The name of the scope, normally a simple string literal
97//!
98#define FUSER_PERF_SCOPE(name) \
99 torch::jit::fuser::cuda::inst::TraceScope FUSER_ANONYMOUS(_perf_scope_)(name)
100
101} // namespace inst
102} // namespace cuda
103} // namespace fuser
104} // namespace jit
105} // namespace torch
106