1#pragma once
2
3#include <ir_all_nodes.h>
4
5namespace torch {
6namespace jit {
7namespace fuser {
8namespace cuda {
9
10//! Set up KernelPerformanceProfile of GpuLower when enabled, which
11//! keeps track of expressions to profile. A new TensorView is added
12//! for storing profiling results. The expression list is prepended
13//! with an kir::Allocate node to allocate the TensorView profile
14//! buffer. Note that any expression added after this pass will not be
15//! profiled, so this pass should be called after all expressions are
16//! lowered. KernelPerformanceProfile is copied to Kernel after
17//! lowering.
18std::vector<Expr*> instrumentKernel(const std::vector<Expr*>& exprs);
19
20} // namespace cuda
21} // namespace fuser
22} // namespace jit
23} // namespace torch
24