1#if defined(USE_CUDA)
2#include <gmock/gmock-matchers.h>
3#include <gtest/gtest.h>
4
5#include <codegen.h>
6#include <executor.h>
7#include <fusion.h>
8#include <ir_all_nodes.h>
9#include <ir_iostream.h>
10#include <kernel_cache.h>
11#include <ops/all_ops.h>
12#include <test/test_gpu_validator.h>
13#include <test/test_utils.h>
14
15// Tests go in torch::jit
16namespace torch {
17namespace jit {
18
19using namespace torch::jit::fuser::cuda;
20
21TEST_F(NVFuserTest, FusionStandaloneFull_CUDA) {
22 auto sizes = {0, 1, 10, 17, 1024};
23 auto dtypes = {
24 kBool,
25 kFloat,
26 kLong,
27 kDouble,
28 kHalf,
29 kBFloat16,
30 kInt,
31 kComplexFloat,
32 kComplexDouble};
33
34 auto fusion = std::make_unique<Fusion>();
35 FusionGuard fg(fusion.get());
36
37 Val* size = IrBuilder::create<Int>();
38 Val* fill_val1 = IrBuilder::create<Int>();
39 Val* fill_val2 = IrBuilder::create<Int>();
40 Val* fill_val3 = IrBuilder::create<Int>();
41 fusion->addInput(size);
42 fusion->addInput(fill_val1);
43 fusion->addInput(fill_val2);
44 fusion->addInput(fill_val3);
45 for (auto dtype : dtypes) {
46 if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
47 continue;
48 }
49 auto out_tv = full({size}, fill_val1, aten_to_data_type(dtype));
50 fusion->addOutput(out_tv);
51 out_tv = full({size, size}, fill_val2, aten_to_data_type(dtype));
52 fusion->addOutput(out_tv);
53 out_tv = full_like(out_tv, fill_val3);
54 fusion->addOutput(out_tv);
55 }
56
57 FusionExecutorCache executor_cache(std::move(fusion));
58
59 for (auto size : sizes) {
60 std::vector<at::Tensor> expect;
61 expect.reserve(dtypes.size());
62 for (auto dtype : dtypes) {
63 if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
64 continue;
65 }
66 const auto options =
67 at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
68 expect.emplace_back(at::full({size}, 11, options));
69 expect.emplace_back(at::full({size, size}, 12, options));
70 expect.emplace_back(at::full({size, size}, 13, options));
71 }
72 auto cg_outputs = executor_cache.runFusionWithInputs({size, 11, 12, 13});
73
74 testValidate(
75 executor_cache.fusion(),
76 cg_outputs,
77 {size, 11, 12, 13},
78 expect,
79 __LINE__,
80 __FILE__);
81 }
82}
83
84TEST_F(NVFuserTest, FusionStandaloneZeros_CUDA) {
85 auto sizes = {0, 1, 10, 17, 1024};
86 auto dtypes = {
87 kBool,
88 kFloat,
89 kLong,
90 kDouble,
91 kHalf,
92 kBFloat16,
93 kInt,
94 kComplexFloat,
95 kComplexDouble};
96
97 auto fusion = std::make_unique<Fusion>();
98 FusionGuard fg(fusion.get());
99
100 Val* size = IrBuilder::create<Int>();
101 fusion->addInput(size);
102 for (auto dtype : dtypes) {
103 if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
104 continue;
105 }
106 auto out_tv = zeros({size}, aten_to_data_type(dtype));
107 fusion->addOutput(out_tv);
108 out_tv = zeros({size, size}, aten_to_data_type(dtype));
109 fusion->addOutput(out_tv);
110 out_tv = zeros_like(out_tv);
111 fusion->addOutput(out_tv);
112 }
113
114 FusionExecutorCache executor_cache(std::move(fusion));
115
116 for (auto size : sizes) {
117 std::vector<at::Tensor> expect;
118 expect.reserve(dtypes.size());
119 for (auto dtype : dtypes) {
120 if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
121 continue;
122 }
123 const auto options =
124 at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
125 expect.emplace_back(at::zeros({size}, options));
126 expect.emplace_back(at::zeros({size, size}, options));
127 expect.emplace_back(at::zeros({size, size}, options));
128 }
129 auto cg_outputs = executor_cache.runFusionWithInputs({size});
130
131 testValidate(
132 executor_cache.fusion(),
133 cg_outputs,
134 {size},
135 expect,
136 __LINE__,
137 __FILE__);
138 }
139}
140
141TEST_F(NVFuserTest, FusionStandaloneOnes_CUDA) {
142 auto sizes = {0, 1, 10, 17, 1024};
143 auto dtypes = {
144 kBool,
145 kFloat,
146 kLong,
147 kDouble,
148 kHalf,
149 kBFloat16,
150 kInt,
151 kComplexFloat,
152 kComplexDouble};
153
154 auto fusion = std::make_unique<Fusion>();
155 FusionGuard fg(fusion.get());
156
157 Val* size = IrBuilder::create<Int>();
158 fusion->addInput(size);
159 for (auto dtype : dtypes) {
160 if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
161 continue;
162 }
163 auto out_tv = ones({size}, aten_to_data_type(dtype));
164 fusion->addOutput(out_tv);
165 out_tv = ones({size, size}, aten_to_data_type(dtype));
166 fusion->addOutput(out_tv);
167 out_tv = ones_like(out_tv);
168 fusion->addOutput(out_tv);
169 }
170
171 FusionExecutorCache executor_cache(std::move(fusion));
172
173 for (auto size : sizes) {
174 std::vector<at::Tensor> expect;
175 expect.reserve(dtypes.size());
176 for (auto dtype : dtypes) {
177 if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
178 continue;
179 }
180 const auto options =
181 at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
182 expect.emplace_back(at::ones({size}, options));
183 expect.emplace_back(at::ones({size, size}, options));
184 expect.emplace_back(at::ones({size, size}, options));
185 }
186 auto cg_outputs = executor_cache.runFusionWithInputs({size});
187
188 testValidate(
189 executor_cache.fusion(),
190 cg_outputs,
191 {size},
192 expect,
193 __LINE__,
194 __FILE__);
195 }
196}
197
198TEST_F(NVFuserTest, FusionStandaloneARange_CUDA) {
199 auto starts_ends = {-1., 0., 10.3, 1024. * 256};
200 auto steps = {-1.5, 1., 2.};
201 auto dtypes = {kFloat, kLong, kDouble};
202
203 for (auto dtype : dtypes) {
204 if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
205 continue;
206 }
207
208 auto fusion = std::make_unique<Fusion>();
209 FusionGuard fg(fusion.get());
210
211 Val* start_int = IrBuilder::create<Int>();
212 Val* end_int = IrBuilder::create<Int>();
213 Val* step_int = IrBuilder::create<Int>();
214 Val* start_double = IrBuilder::create<Double>();
215 Val* end_double = IrBuilder::create<Double>();
216 Val* step_double = IrBuilder::create<Double>();
217 fusion->addInput(start_int);
218 fusion->addInput(end_int);
219 fusion->addInput(step_int);
220 fusion->addInput(start_double);
221 fusion->addInput(end_double);
222 fusion->addInput(step_double);
223 auto tv0 = arange(start_int, end_int, step_int, aten_to_data_type(dtype));
224 auto tv1 =
225 arange(start_double, end_double, step_double, aten_to_data_type(dtype));
226 auto tv2 =
227 arange(start_int, end_double, step_double, aten_to_data_type(dtype));
228 auto tv3 =
229 arange(start_double, end_double, step_int, aten_to_data_type(dtype));
230 fusion->addOutput(tv0);
231 fusion->addOutput(tv1);
232 fusion->addOutput(tv2);
233 fusion->addOutput(tv3);
234
235 FusionExecutorCache executor_cache(std::move(fusion));
236
237 const auto options = at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
238
239 for (auto start : starts_ends) {
240 for (auto end : starts_ends) {
241 for (auto step : steps) {
242 if (std::signbit(end - start) != std::signbit(step)) {
243 continue;
244 }
245
246 at::Tensor a =
247 at::arange((int64_t)start, (int64_t)end, (int64_t)step, options);
248 at::Tensor b =
249 at::arange((double)start, (double)end, (double)step, options);
250 at::Tensor c =
251 at::arange((int64_t)start, (double)end, (double)step, options);
252 at::Tensor d =
253 at::arange((double)start, (double)end, (int64_t)step, options);
254
255 auto cg_outputs = executor_cache.runFusionWithInputs(
256 {(int64_t)start,
257 (int64_t)end,
258 (int64_t)step,
259 (double)start,
260 (double)end,
261 (double)step});
262
263 testValidate(
264 executor_cache.fusion(),
265 cg_outputs,
266 {(int64_t)start,
267 (int64_t)end,
268 (int64_t)step,
269 (double)start,
270 (double)end,
271 (double)step},
272 {a, b, c, d},
273 __LINE__,
274 __FILE__);
275 }
276 }
277 }
278 }
279}
280
281TEST_F(NVFuserTest, FusionStandaloneEye_CUDA) {
282 auto sizes = {0, 1, 10, 17, 1024};
283 auto dtypes = {
284 kBool,
285 kFloat,
286 kLong,
287 kDouble,
288 kHalf,
289 kBFloat16,
290 kInt,
291 kComplexFloat,
292 kComplexDouble};
293
294 auto fusion = std::make_unique<Fusion>();
295 FusionGuard fg(fusion.get());
296
297 Val* size = IrBuilder::create<Int>();
298 Val* maybe_m = IrBuilder::create<Int>();
299 fusion->addInput(size);
300 fusion->addInput(maybe_m);
301 for (auto dtype : dtypes) {
302 if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
303 continue;
304 }
305 auto out_tv1 = eye(size, aten_to_data_type(dtype));
306 fusion->addOutput(out_tv1);
307 auto out_tv2 = eye(size, maybe_m, aten_to_data_type(dtype));
308 fusion->addOutput(out_tv2);
309 }
310
311 FusionExecutorCache executor_cache(std::move(fusion));
312
313 for (auto size : sizes) {
314 std::vector<at::Tensor> expect;
315 expect.reserve(dtypes.size());
316 for (auto dtype : dtypes) {
317 if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
318 continue;
319 }
320 const auto options =
321 at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
322 expect.emplace_back(at::eye(size, options));
323 expect.emplace_back(at::eye(size, 15, options));
324 }
325 auto cg_outputs = executor_cache.runFusionWithInputs({size, 15});
326
327 testValidate(
328 executor_cache.fusion(),
329 cg_outputs,
330 {size, 15},
331 expect,
332 __LINE__,
333 __FILE__);
334 }
335}
336
337} // namespace jit
338} // namespace torch
339#endif // #if defined(USE_CUDA)
340