test_gpu_tensor_factories.cpp source code [pytorch/third_party/nvfuser/test/test_gpu_tensor_factories.cpp]

1	#if defined(USE_CUDA)
2	#include <gmock/gmock-matchers.h>
3	#include <gtest/gtest.h>
4
5	#include <codegen.h>
6	#include <executor.h>
7	#include <fusion.h>
8	#include <ir_all_nodes.h>
9	#include <ir_iostream.h>
10	#include <kernel_cache.h>
11	#include <ops/all_ops.h>
12	#include <test/test_gpu_validator.h>
13	#include <test/test_utils.h>
14
15	// Tests go in torch::jit
16	namespace torch {
17	namespace jit {
18
19	using namespace torch::jit::fuser::cuda;
20
21	TEST_F(NVFuserTest, FusionStandaloneFull_CUDA) {
22	auto sizes = {`0`, `1`, `10`, `17`, `1024`};
23	auto dtypes = {
24	kBool,
25	kFloat,
26	kLong,
27	kDouble,
28	kHalf,
29	kBFloat16,
30	kInt,
31	kComplexFloat,
32	kComplexDouble};
33
34	auto fusion = std::make_unique<Fusion>();
35	FusionGuard fg(fusion.get());
36
37	Val* size = IrBuilder::create<Int>();
38	Val* fill_val1 = IrBuilder::create<Int>();
39	Val* fill_val2 = IrBuilder::create<Int>();
40	Val* fill_val3 = IrBuilder::create<Int>();
41	fusion ->addInput(size);
42	fusion ->addInput(fill_val1);
43	fusion ->addInput(fill_val2);
44	fusion ->addInput(fill_val3);
45	for (auto dtype : dtypes) {
46	if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
47	continue;
48	}
49	auto out_tv = full({size}, fill_val1, aten_to_data_type(dtype));
50	fusion ->addOutput(out_tv);
51	out_tv = full({size, size}, fill_val2, aten_to_data_type(dtype));
52	fusion ->addOutput(out_tv);
53	out_tv = full_like(out_tv, fill_val3);
54	fusion ->addOutput(out_tv);
55	}
56
57	FusionExecutorCache executor_cache(std::move(fusion));
58
59	for (auto size : sizes) {
60	std::vector<at::Tensor> expect;
61	expect.reserve(dtypes.size());
62	for (auto dtype : dtypes) {
63	if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
64	continue;
65	}
66	const auto options =
67	at::TensorOptions().dtype(dtype).device(at::kCUDA, `0`);
68	expect.emplace_back(at::full({size}, `11`, options));
69	expect.emplace_back(at::full({size, size}, `12`, options));
70	expect.emplace_back(at::full({size, size}, `13`, options));
71	}
72	auto cg_outputs = executor_cache.runFusionWithInputs({size, `11`, `12`, `13`});
73
74	testValidate(
75	executor_cache.fusion(),
76	cg_outputs,
77	{size, `11`, `12`, `13`},
78	expect,
79	__LINE__,
80	__FILE__);
81	}
82	}
83
84	TEST_F(NVFuserTest, FusionStandaloneZeros_CUDA) {
85	auto sizes = {`0`, `1`, `10`, `17`, `1024`};
86	auto dtypes = {
87	kBool,
88	kFloat,
89	kLong,
90	kDouble,
91	kHalf,
92	kBFloat16,
93	kInt,
94	kComplexFloat,
95	kComplexDouble};
96
97	auto fusion = std::make_unique<Fusion>();
98	FusionGuard fg(fusion.get());
99
100	Val* size = IrBuilder::create<Int>();
101	fusion ->addInput(size);
102	for (auto dtype : dtypes) {
103	if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
104	continue;
105	}
106	auto out_tv = zeros({size}, aten_to_data_type(dtype));
107	fusion ->addOutput(out_tv);
108	out_tv = zeros({size, size}, aten_to_data_type(dtype));
109	fusion ->addOutput(out_tv);
110	out_tv = zeros_like(out_tv);
111	fusion ->addOutput(out_tv);
112	}
113
114	FusionExecutorCache executor_cache(std::move(fusion));
115
116	for (auto size : sizes) {
117	std::vector<at::Tensor> expect;
118	expect.reserve(dtypes.size());
119	for (auto dtype : dtypes) {
120	if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
121	continue;
122	}
123	const auto options =
124	at::TensorOptions().dtype(dtype).device(at::kCUDA, `0`);
125	expect.emplace_back(at::zeros({size}, options));
126	expect.emplace_back(at::zeros({size, size}, options));
127	expect.emplace_back(at::zeros({size, size}, options));
128	}
129	auto cg_outputs = executor_cache.runFusionWithInputs({size});
130
131	testValidate(
132	executor_cache.fusion(),
133	cg_outputs,
134	{size},
135	expect,
136	__LINE__,
137	__FILE__);
138	}
139	}
140
141	TEST_F(NVFuserTest, FusionStandaloneOnes_CUDA) {
142	auto sizes = {`0`, `1`, `10`, `17`, `1024`};
143	auto dtypes = {
144	kBool,
145	kFloat,
146	kLong,
147	kDouble,
148	kHalf,
149	kBFloat16,
150	kInt,
151	kComplexFloat,
152	kComplexDouble};
153
154	auto fusion = std::make_unique<Fusion>();
155	FusionGuard fg(fusion.get());
156
157	Val* size = IrBuilder::create<Int>();
158	fusion ->addInput(size);
159	for (auto dtype : dtypes) {
160	if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
161	continue;
162	}
163	auto out_tv = ones({size}, aten_to_data_type(dtype));
164	fusion ->addOutput(out_tv);
165	out_tv = ones({size, size}, aten_to_data_type(dtype));
166	fusion ->addOutput(out_tv);
167	out_tv = ones_like(out_tv);
168	fusion ->addOutput(out_tv);
169	}
170
171	FusionExecutorCache executor_cache(std::move(fusion));
172
173	for (auto size : sizes) {
174	std::vector<at::Tensor> expect;
175	expect.reserve(dtypes.size());
176	for (auto dtype : dtypes) {
177	if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
178	continue;
179	}
180	const auto options =
181	at::TensorOptions().dtype(dtype).device(at::kCUDA, `0`);
182	expect.emplace_back(at::ones({size}, options));
183	expect.emplace_back(at::ones({size, size}, options));
184	expect.emplace_back(at::ones({size, size}, options));
185	}
186	auto cg_outputs = executor_cache.runFusionWithInputs({size});
187
188	testValidate(
189	executor_cache.fusion(),
190	cg_outputs,
191	{size},
192	expect,
193	__LINE__,
194	__FILE__);
195	}
196	}
197
198	TEST_F(NVFuserTest, FusionStandaloneARange_CUDA) {
199	auto starts_ends = {-`1.`, `0.`, `10.3`, `1024.` * `256`};
200	auto steps = {-`1.5`, `1.`, `2.`};
201	auto dtypes = {kFloat, kLong, kDouble};
202
203	for (auto dtype : dtypes) {
204	if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
205	continue;
206	}
207
208	auto fusion = std::make_unique<Fusion>();
209	FusionGuard fg(fusion.get());
210
211	Val* start_int = IrBuilder::create<Int>();
212	Val* end_int = IrBuilder::create<Int>();
213	Val* step_int = IrBuilder::create<Int>();
214	Val* start_double = IrBuilder::create<Double>();
215	Val* end_double = IrBuilder::create<Double>();
216	Val* step_double = IrBuilder::create<Double>();
217	fusion ->addInput(start_int);
218	fusion ->addInput(end_int);
219	fusion ->addInput(step_int);
220	fusion ->addInput(start_double);
221	fusion ->addInput(end_double);
222	fusion ->addInput(step_double);
223	auto tv0 = arange(start_int, end_int, step_int, aten_to_data_type(dtype));
224	auto tv1 =
225	arange(start_double, end_double, step_double, aten_to_data_type(dtype));
226	auto tv2 =
227	arange(start_int, end_double, step_double, aten_to_data_type(dtype));
228	auto tv3 =
229	arange(start_double, end_double, step_int, aten_to_data_type(dtype));
230	fusion ->addOutput(tv0);
231	fusion ->addOutput(tv1);
232	fusion ->addOutput(tv2);
233	fusion ->addOutput(tv3);
234
235	FusionExecutorCache executor_cache(std::move(fusion));
236
237	const auto options = at::TensorOptions().dtype(dtype).device(at::kCUDA, `0`);
238
239	for (auto start : starts_ends) {
240	for (auto end : starts_ends) {
241	for (auto step : steps) {
242	if (std::signbit(end - start) != std::signbit(step)) {
243	continue;
244	}
245
246	at::Tensor a =
247	at::arange((int64_t)start, (int64_t)end, (int64_t)step, options);
248	at::Tensor b =
249	at::arange((double)start, (double)end, (double)step, options);
250	at::Tensor c =
251	at::arange((int64_t)start, (double)end, (double)step, options);
252	at::Tensor d =
253	at::arange((double)start, (double)end, (int64_t)step, options);
254
255	auto cg_outputs = executor_cache.runFusionWithInputs(
256	{(int64_t)start,
257	(int64_t)end,
258	(int64_t)step,
259	(double)start,
260	(double)end,
261	(double)step});
262
263	testValidate(
264	executor_cache.fusion(),
265	cg_outputs,
266	{(int64_t)start,
267	(int64_t)end,
268	(int64_t)step,
269	(double)start,
270	(double)end,
271	(double)step},
272	{a, b, c, d},
273	__LINE__,
274	__FILE__);
275	}
276	}
277	}
278	}
279	}
280
281	TEST_F(NVFuserTest, FusionStandaloneEye_CUDA) {
282	auto sizes = {`0`, `1`, `10`, `17`, `1024`};
283	auto dtypes = {
284	kBool,
285	kFloat,
286	kLong,
287	kDouble,
288	kHalf,
289	kBFloat16,
290	kInt,
291	kComplexFloat,
292	kComplexDouble};
293
294	auto fusion = std::make_unique<Fusion>();
295	FusionGuard fg(fusion.get());
296
297	Val* size = IrBuilder::create<Int>();
298	Val* maybe_m = IrBuilder::create<Int>();
299	fusion ->addInput(size);
300	fusion ->addInput(maybe_m);
301	for (auto dtype : dtypes) {
302	if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
303	continue;
304	}
305	auto out_tv1 = eye(size, aten_to_data_type(dtype));
306	fusion ->addOutput(out_tv1);
307	auto out_tv2 = eye(size, maybe_m, aten_to_data_type(dtype));
308	fusion ->addOutput(out_tv2);
309	}
310
311	FusionExecutorCache executor_cache(std::move(fusion));
312
313	for (auto size : sizes) {
314	std::vector<at::Tensor> expect;
315	expect.reserve(dtypes.size());
316	for (auto dtype : dtypes) {
317	if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
318	continue;
319	}
320	const auto options =
321	at::TensorOptions().dtype(dtype).device(at::kCUDA, `0`);
322	expect.emplace_back(at::eye(size, options));
323	expect.emplace_back(at::eye(size, `15`, options));
324	}
325	auto cg_outputs = executor_cache.runFusionWithInputs({size, `15`});
326
327	testValidate(
328	executor_cache.fusion(),
329	cg_outputs,
330	{size, `15`},
331	expect,
332	__LINE__,
333	__FILE__);
334	}
335	}
336
337	} // namespace jit
338	} // namespace torch
339	#endif // #if defined(USE_CUDA)
340

Browse the source code of pytorch/third_party/nvfuser/test/test_gpu_tensor_factories.cpp