lower_instrument.h source code [pytorch/third_party/nvfuser/csrc/lower_instrument.h]

1	#pragma once
2
3	#include <ir_all_nodes.h>
4
5	namespace torch {
6	namespace jit {
7	namespace fuser {
8	namespace cuda {
9
10	//! Set up KernelPerformanceProfile of GpuLower when enabled, which
11	//! keeps track of expressions to profile. A new TensorView is added
12	//! for storing profiling results. The expression list is prepended
13	//! with an kir::Allocate node to allocate the TensorView profile
14	//! buffer. Note that any expression added after this pass will not be
15	//! profiled, so this pass should be called after all expressions are
16	//! lowered. KernelPerformanceProfile is copied to Kernel after
17	//! lowering.
18	std::vector<Expr> instrumentKernel(const* std::vector<Expr*>& exprs);
19
20	} // namespace cuda
21	} // namespace fuser
22	} // namespace jit
23	} // namespace torch
24

Browse the source code of pytorch/third_party/nvfuser/csrc/lower_instrument.h