1#pragma once
2
3#include <c10/macros/Export.h>
4
5#include <dispatch.h>
6#include <ir_all_nodes.h>
7
8#include <vector>
9
10namespace torch {
11namespace jit {
12namespace fuser {
13namespace cuda {
14
15//! Reuse Allocation nodes via pointer aliasing
16//!
17//! First pass finds candidate TensorViews
18//! A candidate TensorView is anything in shared memory OR
19//! in local memory with a static size larger than register_size_threshold
20//!
21//! Second pass finds appropriate input Allocate Node
22//! among candidate TensorViews
23//!
24//! Alias Criteria:
25//! If input is a candidate TensorView,
26//! input allocation has the same size as output allocation,
27//! thread bindings match,
28//! is not used after this op:
29//! then alias output Allocate to input Allocate.
30//!
31std::vector<Expr*> reuseMemoryAllocations(const std::vector<Expr*>& exprs);
32
33} // namespace cuda
34} // namespace fuser
35} // namespace jit
36} // namespace torch
37